From 20f2258f3481e708fc954034ee36e2c72bce1782 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 20:39:25 -0700
Subject: [PATCH 001/143] fix(interrupt): propagate to concurrent-tool workers
 + opt-in debug trace (#11907)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(interrupt): propagate to concurrent-tool workers + opt-in debug trace

interrupt() previously only flagged the agent's _execution_thread_id.
Tools running inside _execute_tool_calls_concurrent execute on
ThreadPoolExecutor worker threads whose tids are distinct from the
agent's, so is_interrupted() inside those tools returned False no matter
how many times the gateway called .interrupt() — hung ssh / curl / long
make-builds ran to their own timeout.

Changes:
- run_agent.py: track concurrent-tool worker tids in a per-agent set,
  fan interrupt()/clear_interrupt() out to them, and handle the
  register-after-interrupt race at _run_tool entry.  getattr fallback
  for the tracker so test stubs built via object.__new__ keep working.
- tools/environments/base.py: opt-in _wait_for_process trace (ENTER,
  per-30s HEARTBEAT with interrupt+activity-cb state, INTERRUPT
  DETECTED, TIMEOUT, EXIT) behind HERMES_DEBUG_INTERRUPT=1.
- tools/interrupt.py: opt-in set_interrupt() trace (caller tid, target
  tid, set snapshot) behind the same env flag.
- tests: new regression test runs a polling tool on a concurrent worker
  and asserts is_interrupted() flips to True within ~1s of interrupt().
  Second new test guards clear_interrupt() clearing tracked worker bits.

Validation: tests/run_agent/ all 762 pass; tests/tools/ interrupt+env
subset 216 pass.

* fix(interrupt-debug): bypass quiet_mode logger filter so trace reaches agent.log

AIAgent.__init__ sets logging.getLogger('tools').setLevel(ERROR) when
quiet_mode=True (the CLI default). This would silently swallow every
INFO-level trace line from the HERMES_DEBUG_INTERRUPT=1 instrumentation
added in the parent commit — confirmed by running hermes chat -q with
the flag and finding zero trace lines in agent.log even though
_wait_for_process was clearly executing (subprocess pid existed).

Fix: when HERMES_DEBUG_INTERRUPT=1, each traced module explicitly sets
its own logger level to INFO at import time, overriding the 'tools'
parent-level filter. Scoped to the opt-in case only, so production
(quiet_mode default) logs stay quiet as designed.

Validation: hermes chat -q with HERMES_DEBUG_INTERRUPT=1 now writes
'_wait_for_process ENTER/EXIT' lines to agent.log as expected.

* fix(cli): SIGTERM/SIGHUP no longer orphans tool subprocesses

Tool subprocesses spawned by the local environment backend use
os.setsid so they run in their own process group. Before this fix,
SIGTERM/SIGHUP to the hermes CLI killed the main thread via
KeyboardInterrupt but the worker thread running _wait_for_process
never got a chance to call _kill_process — Python exited, the child
was reparented to init (PPID=1), and the subprocess ran to its
natural end (confirmed live: sleep 300 survived 4+ min after SIGTERM
to the agent until manual cleanup).

Changes:
- cli.py _signal_handler (interactive) + _signal_handler_q (-q mode):
  route SIGTERM/SIGHUP through agent.interrupt() so the worker's poll
  loop sees the per-thread interrupt flag and calls _kill_process
  (os.killpg) on the subprocess group. HERMES_SIGTERM_GRACE (default
  1.5s) gives the worker time to complete its SIGTERM+SIGKILL
  escalation before KeyboardInterrupt unwinds main.
- tools/environments/base.py _wait_for_process: wrap the poll loop in
  try/except (KeyboardInterrupt, SystemExit) so the cleanup fires
  even on paths the signal handlers don't cover (direct sys.exit,
  unhandled KI from nested code, etc.). Emits EXCEPTION_EXIT trace
  line when HERMES_DEBUG_INTERRUPT=1.
- New regression test: injects KeyboardInterrupt into a running
  _wait_for_process via PyThreadState_SetAsyncExc, verifies the
  subprocess process group is dead within 3s of the exception and
  that KeyboardInterrupt re-raises cleanly afterward.

Validation:
| Before                                                  | After              |
|---------------------------------------------------------|--------------------|
| sleep 300 survives 4+ min as PPID=1 orphan after SIGTERM | dies within 2 s   |
| No INTERRUPT DETECTED in trace                          | INTERRUPT DETECTED fires + killing process group |
| tests/tools/test_local_interrupt_cleanup                | 1/1 pass          |
| tests/run_agent/test_concurrent_interrupt               | 4/4 pass          |
---
 cli.py                                       |  69 ++++++++-
 run_agent.py                                 |  72 +++++++++
 tests/run_agent/test_concurrent_interrupt.py | 123 +++++++++++++++-
 tests/tools/test_local_interrupt_cleanup.py  | 145 +++++++++++++++++++
 tools/environments/base.py                   | 142 +++++++++++++++---
 tools/interrupt.py                           |  22 +++
 6 files changed, 551 insertions(+), 22 deletions(-)
 create mode 100644 tests/tools/test_local_interrupt_cleanup.py

diff --git a/cli.py b/cli.py
index c0c17babc4c..2456c7754b2 100644
--- a/cli.py
+++ b/cli.py
@@ -10067,8 +10067,36 @@ class HermesCLI:
         
         # Register signal handlers for graceful shutdown on SSH disconnect / SIGTERM
         def _signal_handler(signum, frame):
-            """Handle SIGHUP/SIGTERM by triggering graceful cleanup."""
+            """Handle SIGHUP/SIGTERM by triggering graceful cleanup.
+
+            Calls ``self.agent.interrupt()`` first so the agent daemon
+            thread's poll loop sees the per-thread interrupt and kills the
+            tool's subprocess group via ``_kill_process`` (os.killpg).
+            Without this, the main thread dies from KeyboardInterrupt and
+            the daemon thread is killed with it — before it can run one
+            more poll iteration to clean up the subprocess, which was
+            spawned with ``os.setsid`` and therefore survives as an orphan
+            with PPID=1.
+
+            Grace window (``HERMES_SIGTERM_GRACE``, default 1.5 s) gives
+            the daemon time to: detect the interrupt (next 200 ms poll) →
+            call _kill_process (SIGTERM + 1 s wait + SIGKILL if needed) →
+            return from _wait_for_process.  ``time.sleep`` releases the
+            GIL so the daemon actually runs during the window.
+            """
             logger.debug("Received signal %s, triggering graceful shutdown", signum)
+            try:
+                if getattr(self, "agent", None) and getattr(self, "_agent_running", False):
+                    self.agent.interrupt(f"received signal {signum}")
+                    import time as _t
+                    try:
+                        _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
+                    except (TypeError, ValueError):
+                        _grace = 1.5
+                    if _grace > 0:
+                        _t.sleep(_grace)
+            except Exception:
+                pass  # never block signal handling
             raise KeyboardInterrupt()
         
         try:
@@ -10371,6 +10399,45 @@ def main(
     
     # Register cleanup for single-query mode (interactive mode registers in run())
     atexit.register(_run_cleanup)
+
+    # Also install signal handlers in single-query / `-q` mode.  Interactive
+    # mode registers its own inside HermesCLI.run(), but `-q` runs
+    # cli.agent.run_conversation() below and AIAgent spawns worker threads
+    # for tools — so when SIGTERM arrives on the main thread, raising
+    # KeyboardInterrupt only unwinds the main thread, not the worker
+    # running _wait_for_process.  Python then exits, the child subprocess
+    # (spawned with os.setsid, its own process group) is reparented to
+    # init and keeps running as an orphan.
+    #
+    # Fix: route SIGTERM/SIGHUP through agent.interrupt() which sets the
+    # per-thread interrupt flag the worker's poll loop checks every 200 ms.
+    # Give the worker a grace window to call _kill_process (SIGTERM to the
+    # process group, then SIGKILL after 1 s), then raise KeyboardInterrupt
+    # so main unwinds normally.  HERMES_SIGTERM_GRACE overrides the 1.5 s
+    # default for debugging.
+    def _signal_handler_q(signum, frame):
+        logger.debug("Received signal %s in single-query mode", signum)
+        try:
+            _agent = getattr(cli, "agent", None)
+            if _agent is not None:
+                _agent.interrupt(f"received signal {signum}")
+                import time as _t
+                try:
+                    _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
+                except (TypeError, ValueError):
+                    _grace = 1.5
+                if _grace > 0:
+                    _t.sleep(_grace)
+        except Exception:
+            pass  # never block signal handling
+        raise KeyboardInterrupt()
+    try:
+        import signal as _signal
+        _signal.signal(_signal.SIGTERM, _signal_handler_q)
+        if hasattr(_signal, "SIGHUP"):
+            _signal.signal(_signal.SIGHUP, _signal_handler_q)
+    except Exception:
+        pass  # signal handler may fail in restricted environments
     
     # Handle single query mode
     if query or image:
diff --git a/run_agent.py b/run_agent.py
index ef90ae39e20..010715280ca 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -831,6 +831,16 @@ class AIAgent:
         self._execution_thread_id: int | None = None  # Set at run_conversation() start
         self._interrupt_thread_signal_pending = False
         self._client_lock = threading.RLock()
+
+        # Concurrent-tool worker thread tracking.  `_execute_tool_calls_concurrent`
+        # runs each tool on its own ThreadPoolExecutor worker — those worker
+        # threads have tids distinct from `_execution_thread_id`, so
+        # `_set_interrupt(True, _execution_thread_id)` alone does NOT cause
+        # `is_interrupted()` inside the worker to return True.  Track the
+        # workers here so `interrupt()` / `clear_interrupt()` can fan out to
+        # their tids explicitly.
+        self._tool_worker_threads: set[int] = set()
+        self._tool_worker_threads_lock = threading.Lock()
         
         # Subagent delegation state
         self._delegate_depth = 0        # 0 = top-level agent, incremented for children
@@ -3191,6 +3201,25 @@ class AIAgent:
             # interrupt signal until startup completes instead of targeting
             # the caller thread by mistake.
             self._interrupt_thread_signal_pending = True
+        # Fan out to concurrent-tool worker threads.  Those workers run tools
+        # on their own tids (ThreadPoolExecutor workers), so `is_interrupted()`
+        # inside a tool only sees an interrupt when their specific tid is in
+        # the `_interrupted_threads` set.  Without this propagation, an
+        # already-running concurrent tool (e.g. a terminal command hung on
+        # network I/O) never notices the interrupt and has to run to its own
+        # timeout.  See `_run_tool` for the matching entry/exit bookkeeping.
+        # `getattr` fallback covers test stubs that build AIAgent via
+        # object.__new__ and skip __init__.
+        _tracker = getattr(self, "_tool_worker_threads", None)
+        _tracker_lock = getattr(self, "_tool_worker_threads_lock", None)
+        if _tracker is not None and _tracker_lock is not None:
+            with _tracker_lock:
+                _worker_tids = list(_tracker)
+            for _wtid in _worker_tids:
+                try:
+                    _set_interrupt(True, _wtid)
+                except Exception:
+                    pass
         # Propagate interrupt to any running child agents (subagent delegation)
         with self._active_children_lock:
             children_copy = list(self._active_children)
@@ -3209,6 +3238,23 @@ class AIAgent:
         self._interrupt_thread_signal_pending = False
         if self._execution_thread_id is not None:
             _set_interrupt(False, self._execution_thread_id)
+        # Also clear any concurrent-tool worker thread bits.  Tracked
+        # workers normally clear their own bit on exit, but an explicit
+        # clear here guarantees no stale interrupt can survive a turn
+        # boundary and fire on a subsequent, unrelated tool call that
+        # happens to get scheduled onto the same recycled worker tid.
+        # `getattr` fallback covers test stubs that build AIAgent via
+        # object.__new__ and skip __init__.
+        _tracker = getattr(self, "_tool_worker_threads", None)
+        _tracker_lock = getattr(self, "_tool_worker_threads_lock", None)
+        if _tracker is not None and _tracker_lock is not None:
+            with _tracker_lock:
+                _worker_tids = list(_tracker)
+            for _wtid in _worker_tids:
+                try:
+                    _set_interrupt(False, _wtid)
+                except Exception:
+                    pass
 
     def _touch_activity(self, desc: str) -> None:
         """Update the last-activity timestamp and description (thread-safe)."""
@@ -7653,6 +7699,22 @@ class AIAgent:
 
         def _run_tool(index, tool_call, function_name, function_args):
             """Worker function executed in a thread."""
+            # Register this worker tid so the agent can fan out an interrupt
+            # to it — see AIAgent.interrupt().  Must happen first thing, and
+            # must be paired with discard + clear in the finally block.
+            _worker_tid = threading.current_thread().ident
+            with self._tool_worker_threads_lock:
+                self._tool_worker_threads.add(_worker_tid)
+            # Race: if the agent was interrupted between fan-out (which
+            # snapshotted an empty/earlier set) and our registration, apply
+            # the interrupt to our own tid now so is_interrupted() inside
+            # the tool returns True on the next poll.
+            if self._interrupt_requested:
+                try:
+                    from tools.interrupt import set_interrupt as _sif
+                    _sif(True, _worker_tid)
+                except Exception:
+                    pass
             # Set the activity callback on THIS worker thread so
             # _wait_for_process (terminal commands) can fire heartbeats.
             # The callback is thread-local; the main thread's callback
@@ -7675,6 +7737,16 @@ class AIAgent:
             else:
                 logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
             results[index] = (function_name, function_args, result, duration, is_error)
+            # Tear down worker-tid tracking.  Clear any interrupt bit we may
+            # have set so the next task scheduled onto this recycled tid
+            # starts with a clean slate.
+            with self._tool_worker_threads_lock:
+                self._tool_worker_threads.discard(_worker_tid)
+            try:
+                from tools.interrupt import set_interrupt as _sif
+                _sif(False, _worker_tid)
+            except Exception:
+                pass
 
         # Start spinner for CLI mode (skip when TUI handles tool progress)
         spinner = None
diff --git a/tests/run_agent/test_concurrent_interrupt.py b/tests/run_agent/test_concurrent_interrupt.py
index fdeb8dd6907..e5d8b88e727 100644
--- a/tests/run_agent/test_concurrent_interrupt.py
+++ b/tests/run_agent/test_concurrent_interrupt.py
@@ -23,6 +23,10 @@ def _make_agent(monkeypatch):
 
     class _Stub:
         _interrupt_requested = False
+        _interrupt_message = None
+        # Bind to this thread's ident so interrupt() targets a real tid.
+        _execution_thread_id = threading.current_thread().ident
+        _interrupt_thread_signal_pending = False
         log_prefix = ""
         quiet_mode = True
         verbose_logging = False
@@ -40,6 +44,15 @@ def _make_agent(monkeypatch):
         _current_tool = None
         _last_activity = 0
         _print_fn = print
+        # Worker-thread tracking state mirrored from AIAgent.__init__ so the
+        # real interrupt() method can fan out to concurrent-tool workers.
+        _active_children: list = []
+
+        def __init__(self):
+            # Instance-level (not class-level) so each test gets a fresh set.
+            self._tool_worker_threads: set = set()
+            self._tool_worker_threads_lock = threading.Lock()
+            self._active_children_lock = threading.Lock()
 
         def _touch_activity(self, desc):
             self._last_activity = time.time()
@@ -60,8 +73,10 @@ def _make_agent(monkeypatch):
             return False
 
     stub = _Stub()
-    # Bind the real methods
+    # Bind the real methods under test
     stub._execute_tool_calls_concurrent = _ra.AIAgent._execute_tool_calls_concurrent.__get__(stub)
+    stub.interrupt = _ra.AIAgent.interrupt.__get__(stub)
+    stub.clear_interrupt = _ra.AIAgent.clear_interrupt.__get__(stub)
     stub._invoke_tool = MagicMock(side_effect=lambda *a, **kw: '{"ok": true}')
     return stub
 
@@ -137,3 +152,109 @@ def test_concurrent_preflight_interrupt_skips_all(monkeypatch):
     assert "skipped due to user interrupt" in messages[1]["content"]
     # _invoke_tool should never have been called
     agent._invoke_tool.assert_not_called()
+
+
+def test_running_concurrent_worker_sees_is_interrupted(monkeypatch):
+    """Regression guard for the "interrupt-doesn't-reach-hung-tool" class of
+    bug Physikal reported in April 2026.
+
+    Before this fix, `AIAgent.interrupt()` called `_set_interrupt(True,
+    _execution_thread_id)` — which only flagged the agent's *main* thread.
+    Tools running inside `_execute_tool_calls_concurrent` execute on
+    ThreadPoolExecutor worker threads whose tids are NOT the agent's, so
+    `is_interrupted()` (which checks the *current* thread's tid) returned
+    False inside those tools no matter how many times the gateway called
+    `.interrupt()`.  Hung ssh / long curl / big make-build tools would run
+    to their own timeout.
+
+    This test runs a fake tool in the concurrent path that polls
+    `is_interrupted()` like a real terminal command does, then calls
+    `agent.interrupt()` from another thread, and asserts the poll sees True
+    within one second.
+    """
+    from tools.interrupt import is_interrupted
+
+    agent = _make_agent(monkeypatch)
+
+    # Counter plus observation hooks so we can prove the worker saw the flip.
+    observed = {"saw_true": False, "poll_count": 0, "worker_tid": None}
+    worker_started = threading.Event()
+
+    def polling_tool(name, args, task_id, call_id=None):
+        observed["worker_tid"] = threading.current_thread().ident
+        worker_started.set()
+        deadline = time.monotonic() + 5.0
+        while time.monotonic() < deadline:
+            observed["poll_count"] += 1
+            if is_interrupted():
+                observed["saw_true"] = True
+                return '{"interrupted": true}'
+            time.sleep(0.05)
+        return '{"timed_out": true}'
+
+    agent._invoke_tool = MagicMock(side_effect=polling_tool)
+
+    tc1 = _FakeToolCall("hung_fake_tool_1", call_id="tc1")
+    tc2 = _FakeToolCall("hung_fake_tool_2", call_id="tc2")
+    msg = _FakeAssistantMsg([tc1, tc2])
+    messages = []
+
+    def _interrupt_after_start():
+        # Wait until at least one worker is running so its tid is tracked.
+        worker_started.wait(timeout=2.0)
+        time.sleep(0.2)  # let the other worker enter too
+        agent.interrupt("stop requested by test")
+
+    t = threading.Thread(target=_interrupt_after_start)
+    t.start()
+    start = time.monotonic()
+    agent._execute_tool_calls_concurrent(msg, messages, "test_task")
+    elapsed = time.monotonic() - start
+    t.join(timeout=2.0)
+
+    # The worker must have actually polled is_interrupted — otherwise the
+    # test isn't exercising what it claims to.
+    assert observed["poll_count"] > 0, (
+        "polling_tool never ran — test scaffold issue"
+    )
+    # The worker must see the interrupt within ~1 s of agent.interrupt()
+    # being called.  Before the fix this loop ran until its 5 s own-timeout.
+    assert observed["saw_true"], (
+        f"is_interrupted() never returned True inside the concurrent worker "
+        f"after agent.interrupt() — interrupt-propagation hole regressed. "
+        f"worker_tid={observed['worker_tid']!r} poll_count={observed['poll_count']}"
+    )
+    assert elapsed < 3.0, (
+        f"concurrent execution took {elapsed:.2f}s after interrupt — the fan-out "
+        f"to worker tids didn't shortcut the tool's poll loop as expected"
+    )
+    # Also verify cleanup: no stale worker tids should remain after all
+    # tools finished.
+    assert agent._tool_worker_threads == set(), (
+        f"worker tids leaked after run: {agent._tool_worker_threads}"
+    )
+
+
+def test_clear_interrupt_clears_worker_tids(monkeypatch):
+    """After clear_interrupt(), stale worker-tid bits must be cleared so the
+    next turn's tools — which may be scheduled onto recycled tids — don't
+    see a false interrupt."""
+    from tools.interrupt import is_interrupted, set_interrupt
+
+    agent = _make_agent(monkeypatch)
+    # Simulate a worker having registered but not yet exited cleanly (e.g. a
+    # hypothetical bug in the tear-down).  Put a fake tid in the set and
+    # flag it interrupted.
+    fake_tid = threading.current_thread().ident  # use real tid so is_interrupted can see it
+    with agent._tool_worker_threads_lock:
+        agent._tool_worker_threads.add(fake_tid)
+    set_interrupt(True, fake_tid)
+    assert is_interrupted() is True  # sanity
+
+    agent.clear_interrupt()
+
+    assert is_interrupted() is False, (
+        "clear_interrupt() did not clear the interrupt bit for a tracked "
+        "worker tid — stale interrupt can leak into the next turn"
+    )
+
diff --git a/tests/tools/test_local_interrupt_cleanup.py b/tests/tools/test_local_interrupt_cleanup.py
new file mode 100644
index 00000000000..72310009a54
--- /dev/null
+++ b/tests/tools/test_local_interrupt_cleanup.py
@@ -0,0 +1,145 @@
+"""Regression tests for _wait_for_process subprocess cleanup on exception exit.
+
+When the poll loop exits via KeyboardInterrupt or SystemExit (SIGTERM via
+cli.py signal handler, SIGINT on the main thread in non-interactive -q mode,
+or explicit sys.exit from some caller), the child subprocess must be killed
+before the exception propagates — otherwise the local backend's use of
+os.setsid leaves an orphan with PPID=1.
+
+The live repro that motivated this: hermes chat -q ... 'sleep 300', SIGTERM
+to the python process, sleep 300 survived with PPID=1 for the full 300 s
+because _wait_for_process never got to call _kill_process before python
+died.  See commit message for full context.
+"""
+import os
+import signal
+import subprocess
+import threading
+import time
+
+import pytest
+
+from tools.environments.local import LocalEnvironment
+
+
+@pytest.fixture(autouse=True)
+def _isolate_hermes_home(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / "logs").mkdir(exist_ok=True)
+
+
+def _pgid_still_alive(pgid: int) -> bool:
+    """Return True if any process in the given process group is still alive."""
+    try:
+        os.killpg(pgid, 0)  # signal 0 = existence check
+        return True
+    except ProcessLookupError:
+        return False
+
+
+def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
+    """When KeyboardInterrupt arrives mid-poll, the subprocess group must be
+    killed before the exception is re-raised."""
+    env = LocalEnvironment(cwd="/tmp")
+    try:
+        result_holder = {}
+        proc_holder = {}
+        started = threading.Event()
+        raise_at = [None]  # set by the main thread to tell worker when
+
+        # Drive execute() on a separate thread so we can SIGNAL-interrupt it
+        # via a thread-targeted exception without killing our test process.
+        def worker():
+            # Spawn a subprocess that will definitely be alive long enough
+            # to observe the cleanup, via env.execute(...) — the normal path
+            # that goes through _wait_for_process.
+            try:
+                result_holder["result"] = env.execute("sleep 30", timeout=60)
+            except BaseException as e:  # noqa: BLE001 — we want to observe it
+                result_holder["exception"] = type(e).__name__
+
+        t = threading.Thread(target=worker, daemon=True)
+        t.start()
+        # Wait until the subprocess actually exists.  LocalEnvironment.execute
+        # does init_session() (one spawn) before the real command, so we need
+        # to wait until a sleep 30 is visible.  Use pgrep-style lookup via
+        # /proc to find the bash process running our sleep.
+        deadline = time.monotonic() + 5.0
+        target_pid = None
+        while time.monotonic() < deadline:
+            # Walk our children and grand-children to find one running 'sleep 30'
+            try:
+                import psutil  # optional — fall back if absent
+                for p in psutil.Process(os.getpid()).children(recursive=True):
+                    try:
+                        if "sleep 30" in " ".join(p.cmdline()):
+                            target_pid = p.pid
+                            break
+                    except (psutil.NoSuchProcess, psutil.AccessDenied):
+                        continue
+            except ImportError:
+                # Fall back to ps
+                ps = subprocess.run(
+                    ["ps", "-eo", "pid,ppid,pgid,cmd"], capture_output=True, text=True,
+                )
+                for line in ps.stdout.splitlines():
+                    if "sleep 30" in line and "grep" not in line:
+                        parts = line.split()
+                        if parts and parts[0].isdigit():
+                            target_pid = int(parts[0])
+                            break
+            if target_pid:
+                break
+            time.sleep(0.1)
+
+        assert target_pid is not None, (
+            "test setup: couldn't find 'sleep 30' subprocess after 5 s"
+        )
+        pgid = os.getpgid(target_pid)
+        assert _pgid_still_alive(pgid), "sanity: subprocess should be alive"
+
+        # Now inject a KeyboardInterrupt into the worker thread the same
+        # way CPython's signal machinery would.  We use ctypes.PyThreadState_SetAsyncExc
+        # which is how signal delivery to non-main threads is simulated.
+        import ctypes
+        import sys as _sys
+        # py-thread-state exception targets need the ident, not the Thread
+        tid = t.ident
+        assert tid is not None
+        # Fire KeyboardInterrupt into the worker thread
+        ret = ctypes.pythonapi.PyThreadState_SetAsyncExc(
+            ctypes.c_ulong(tid), ctypes.py_object(KeyboardInterrupt),
+        )
+        assert ret == 1, f"SetAsyncExc returned {ret}, expected 1"
+
+        # Give the worker a moment to: hit the exception at the next poll,
+        # run the except-block cleanup (_kill_process), and exit.
+        t.join(timeout=5.0)
+        assert not t.is_alive(), "worker didn't exit within 5 s of the interrupt"
+
+        # The critical assertion: the subprocess GROUP must be dead.  Not
+        # just the bash wrapper — the 'sleep 30' child too.
+        # Give the SIGTERM+1s wait+SIGKILL escalation a moment to complete.
+        deadline = time.monotonic() + 3.0
+        while time.monotonic() < deadline:
+            if not _pgid_still_alive(pgid):
+                break
+            time.sleep(0.1)
+        assert not _pgid_still_alive(pgid), (
+            f"subprocess group {pgid} is STILL ALIVE after worker received "
+            f"KeyboardInterrupt — orphan bug regressed.  This is the "
+            f"sleep-300-survives-SIGTERM scenario from Physikal's Apr 2026 "
+            f"report.  See tools/environments/base.py _wait_for_process "
+            f"except-block."
+        )
+        # And the worker should have observed the KeyboardInterrupt (i.e.
+        # it re-raised cleanly, not silently swallowed).
+        assert result_holder.get("exception") == "KeyboardInterrupt", (
+            f"worker result: {result_holder!r} — expected KeyboardInterrupt "
+            f"propagation after cleanup"
+        )
+    finally:
+        try:
+            env.cleanup()
+        except Exception:
+            pass
diff --git a/tools/environments/base.py b/tools/environments/base.py
index 8e990792369..1bc08449e49 100644
--- a/tools/environments/base.py
+++ b/tools/environments/base.py
@@ -23,6 +23,19 @@ from tools.interrupt import is_interrupted
 
 logger = logging.getLogger(__name__)
 
+# Opt-in debug tracing for the interrupt/activity/poll machinery.  Set
+# HERMES_DEBUG_INTERRUPT=1 to log loop entry/exit, periodic heartbeats, and
+# every is_interrupted() state change from _wait_for_process.  Off by default
+# to avoid flooding production gateway logs.
+_DEBUG_INTERRUPT = bool(os.getenv("HERMES_DEBUG_INTERRUPT"))
+
+if _DEBUG_INTERRUPT:
+    # AIAgent's quiet_mode path (run_agent.py) forces the `tools` logger to
+    # ERROR on CLI startup, which would silently swallow every trace we emit.
+    # Force this module's own logger back to INFO so the trace is visible in
+    # agent.log regardless of quiet-mode.  Scoped to the opt-in case only.
+    logger.setLevel(logging.INFO)
+
 # Thread-local activity callback.  The agent sets this before a tool call so
 # long-running _wait_for_process loops can report liveness to the gateway.
 _activity_callback_local = threading.local()
@@ -413,6 +426,13 @@ class BaseEnvironment(ABC):
         Fires the ``activity_callback`` (if set on this instance) every 10s
         while the process is running so the gateway's inactivity timeout
         doesn't kill long-running commands.
+
+        Also wraps the poll loop in a ``try/finally`` that guarantees we
+        call ``self._kill_process(proc)`` if we exit via ``KeyboardInterrupt``
+        or ``SystemExit``.  Without this, the local backend (which spawns
+        subprocesses with ``os.setsid`` into their own process group) leaves
+        an orphan with ``PPID=1`` when python is shut down mid-tool — the
+        ``sleep 300``-survives-30-min bug Physikal and I both hit.
         """
         output_chunks: list[str] = []
 
@@ -437,28 +457,101 @@ class BaseEnvironment(ABC):
             "start": _now,
         }
 
-        while proc.poll() is None:
-            if is_interrupted():
+        # --- Debug tracing (opt-in via HERMES_DEBUG_INTERRUPT=1) -------------
+        # Captures loop entry/exit, interrupt state changes, and periodic
+        # heartbeats so we can diagnose "agent never sees the interrupt"
+        # reports without reproducing locally.
+        _tid = threading.current_thread().ident
+        _pid = getattr(proc, "pid", None)
+        _iter_count = 0
+        _last_heartbeat = _now
+        _last_interrupt_state = False
+        _cb_was_none = _get_activity_callback() is None
+        if _DEBUG_INTERRUPT:
+            logger.info(
+                "[interrupt-debug] _wait_for_process ENTER tid=%s pid=%s "
+                "timeout=%ss activity_cb=%s initial_interrupt=%s",
+                _tid, _pid, timeout,
+                "set" if not _cb_was_none else "MISSING",
+                is_interrupted(),
+            )
+
+        try:
+            while proc.poll() is None:
+                _iter_count += 1
+                if is_interrupted():
+                    if _DEBUG_INTERRUPT:
+                        logger.info(
+                            "[interrupt-debug] _wait_for_process INTERRUPT DETECTED "
+                            "tid=%s pid=%s iter=%d elapsed=%.1fs — killing process group",
+                            _tid, _pid, _iter_count, time.monotonic() - _activity_state["start"],
+                        )
+                    self._kill_process(proc)
+                    drain_thread.join(timeout=2)
+                    return {
+                        "output": "".join(output_chunks) + "\n[Command interrupted]",
+                        "returncode": 130,
+                    }
+                if time.monotonic() > deadline:
+                    if _DEBUG_INTERRUPT:
+                        logger.info(
+                            "[interrupt-debug] _wait_for_process TIMEOUT "
+                            "tid=%s pid=%s iter=%d timeout=%ss",
+                            _tid, _pid, _iter_count, timeout,
+                        )
+                    self._kill_process(proc)
+                    drain_thread.join(timeout=2)
+                    partial = "".join(output_chunks)
+                    timeout_msg = f"\n[Command timed out after {timeout}s]"
+                    return {
+                        "output": partial + timeout_msg
+                        if partial
+                        else timeout_msg.lstrip(),
+                        "returncode": 124,
+                    }
+                # Periodic activity touch so the gateway knows we're alive
+                touch_activity_if_due(_activity_state, "terminal command running")
+
+                # Heartbeat every ~30s: proves the loop is alive and reports
+                # the activity-callback state (thread-local, can get clobbered
+                # by nested tool calls or executor thread reuse).
+                if _DEBUG_INTERRUPT and time.monotonic() - _last_heartbeat >= 30.0:
+                    _cb_now_none = _get_activity_callback() is None
+                    logger.info(
+                        "[interrupt-debug] _wait_for_process HEARTBEAT "
+                        "tid=%s pid=%s iter=%d elapsed=%.0fs "
+                        "interrupt=%s activity_cb=%s%s",
+                        _tid, _pid, _iter_count,
+                        time.monotonic() - _activity_state["start"],
+                        is_interrupted(),
+                        "set" if not _cb_now_none else "MISSING",
+                        " (LOST during run)" if _cb_now_none and not _cb_was_none else "",
+                    )
+                    _last_heartbeat = time.monotonic()
+                    _cb_was_none = _cb_now_none
+
+                time.sleep(0.2)
+        except (KeyboardInterrupt, SystemExit):
+            # Signal arrived (SIGTERM/SIGHUP/SIGINT) or sys.exit() was called
+            # while we were polling.  The local backend spawns subprocesses
+            # with os.setsid, which puts them in their own process group — so
+            # if we let the interrupt propagate without killing the child,
+            # python exits and the child is reparented to init (PPID=1) and
+            # keeps running as an orphan.  Killing the process group here
+            # guarantees the tool's side effects stop when the agent stops.
+            if _DEBUG_INTERRUPT:
+                logger.info(
+                    "[interrupt-debug] _wait_for_process EXCEPTION_EXIT "
+                    "tid=%s pid=%s iter=%d elapsed=%.1fs — killing subprocess group before re-raise",
+                    _tid, _pid, _iter_count,
+                    time.monotonic() - _activity_state["start"],
+                )
+            try:
                 self._kill_process(proc)
                 drain_thread.join(timeout=2)
-                return {
-                    "output": "".join(output_chunks) + "\n[Command interrupted]",
-                    "returncode": 130,
-                }
-            if time.monotonic() > deadline:
-                self._kill_process(proc)
-                drain_thread.join(timeout=2)
-                partial = "".join(output_chunks)
-                timeout_msg = f"\n[Command timed out after {timeout}s]"
-                return {
-                    "output": partial + timeout_msg
-                    if partial
-                    else timeout_msg.lstrip(),
-                    "returncode": 124,
-                }
-            # Periodic activity touch so the gateway knows we're alive
-            touch_activity_if_due(_activity_state, "terminal command running")
-            time.sleep(0.2)
+            except Exception:
+                pass  # cleanup is best-effort
+            raise
 
         drain_thread.join(timeout=5)
 
@@ -467,6 +560,15 @@ class BaseEnvironment(ABC):
         except Exception:
             pass
 
+        if _DEBUG_INTERRUPT:
+            logger.info(
+                "[interrupt-debug] _wait_for_process EXIT (natural) "
+                "tid=%s pid=%s iter=%d elapsed=%.1fs returncode=%s",
+                _tid, _pid, _iter_count,
+                time.monotonic() - _activity_state["start"],
+                proc.returncode,
+            )
+
         return {"output": "".join(output_chunks), "returncode": proc.returncode}
 
     def _kill_process(self, proc: ProcessHandle):
diff --git a/tools/interrupt.py b/tools/interrupt.py
index 9bc8b83ae4f..ac784332f91 100644
--- a/tools/interrupt.py
+++ b/tools/interrupt.py
@@ -14,8 +14,23 @@ Usage in tools:
         return {"output": "[interrupted]", "returncode": 130}
 """
 
+import logging
+import os
 import threading
 
+logger = logging.getLogger(__name__)
+
+# Opt-in debug tracing — pairs with HERMES_DEBUG_INTERRUPT in
+# tools/environments/base.py.  Enables per-call logging of set/check so the
+# caller thread, target thread, and current state are visible when
+# diagnosing "interrupt signaled but tool never saw it" reports.
+_DEBUG_INTERRUPT = bool(os.getenv("HERMES_DEBUG_INTERRUPT"))
+
+if _DEBUG_INTERRUPT:
+    # AIAgent's quiet_mode path forces `tools` logger to ERROR on CLI startup.
+    # Force our own logger back to INFO so the trace is visible in agent.log.
+    logger.setLevel(logging.INFO)
+
 # Set of thread idents that have been interrupted.
 _interrupted_threads: set[int] = set()
 _lock = threading.Lock()
@@ -35,6 +50,13 @@ def set_interrupt(active: bool, thread_id: int | None = None) -> None:
             _interrupted_threads.add(tid)
         else:
             _interrupted_threads.discard(tid)
+        _snapshot = set(_interrupted_threads) if _DEBUG_INTERRUPT else None
+    if _DEBUG_INTERRUPT:
+        logger.info(
+            "[interrupt-debug] set_interrupt(active=%s, target_tid=%s) "
+            "called_from_tid=%s current_set=%s",
+            active, tid, threading.current_thread().ident, _snapshot,
+        )
 
 
 def is_interrupted() -> bool:

From c5c0bb9a732c11b786e1595af98d5faa06048899 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:16:33 -0700
Subject: [PATCH 002/143] fix: point optional-dep install hints at the venv's
 python (#11938)

Error messages that tell users to install optional extras now use
{sys.executable} -m pip install ... instead of a bare 'pip install
hermes-agent[extra]' string.  Under the curl installer, bare 'pip'
resolves to system pip, which either fails with PEP 668
externally-managed-environment or installs into the wrong Python.

Affects: hermes dashboard, hermes web server startup, mcp_serve,
hermes doctor Bedrock check, CLI voice mode, voice_mode tool runtime
error, Discord voice-channel join failure message.
---
 cli.py                   | 6 ++----
 gateway/run.py           | 3 +--
 hermes_cli/doctor.py     | 4 ++--
 hermes_cli/main.py       | 2 +-
 hermes_cli/web_server.py | 2 +-
 mcp_serve.py             | 4 ++--
 tools/voice_mode.py      | 4 ++--
 7 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/cli.py b/cli.py
index 2456c7754b2..ea76991acc3 100644
--- a/cli.py
+++ b/cli.py
@@ -7017,8 +7017,7 @@ class HermesCLI:
                 )
             raise RuntimeError(
                 "Voice mode requires sounddevice and numpy.\n"
-                "Install with: pip install sounddevice numpy\n"
-                "Or: pip install hermes-agent[voice]"
+                f"Install with: {sys.executable} -m pip install sounddevice numpy"
             )
         if not reqs.get("stt_available", reqs.get("stt_key_set")):
             raise RuntimeError(
@@ -7294,8 +7293,7 @@ class HermesCLI:
                     _cprint(f"  {_DIM}Then install/update the Termux:API Android app for microphone capture{_RST}")
                     _cprint(f"  {_BOLD}Option 2: pkg install python-numpy portaudio && python -m pip install sounddevice{_RST}")
                 else:
-                    _cprint(f"\n  {_BOLD}Install: pip install {' '.join(reqs['missing_packages'])}{_RST}")
-                    _cprint(f"  {_DIM}Or: pip install hermes-agent[voice]{_RST}")
+                    _cprint(f"\n  {_BOLD}Install: {sys.executable} -m pip install {' '.join(reqs['missing_packages'])}{_RST}")
             return
 
         with self._voice_lock:
diff --git a/gateway/run.py b/gateway/run.py
index b3270d95827..e09dbde2654 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -5520,8 +5520,7 @@ class GatewayRunner:
             if "pynacl" in err_lower or "nacl" in err_lower or "davey" in err_lower:
                 return (
                     "Voice dependencies are missing (PyNaCl / davey). "
-                    "Install or reinstall Hermes with the messaging extra, e.g. "
-                    "`pip install hermes-agent[messaging]`."
+                    f"Install with: `{sys.executable} -m pip install PyNaCl`"
                 )
             return f"Failed to join voice channel: {e}"
 
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 28c4af1fa8a..4138aeaa278 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -895,8 +895,8 @@ def run_doctor(args):
                 _model_count = len(_br_resp.get("modelSummaries", []))
                 print(f"\r  {color('✓', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)}           ")
             except ImportError:
-                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color('(boto3 not installed — pip install hermes-agent[bedrock])', Colors.DIM)}           ")
-                issues.append("Install boto3 for Bedrock: pip install hermes-agent[bedrock]")
+                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color(f'(boto3 not installed — {sys.executable} -m pip install boto3)', Colors.DIM)}           ")
+                issues.append(f"Install boto3 for Bedrock: {sys.executable} -m pip install boto3")
             except Exception as _e:
                 _err_name = type(_e).__name__
                 print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)}           ")
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index e2e2a774f5a..81b27e4a100 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -6029,7 +6029,7 @@ def cmd_dashboard(args):
         import uvicorn  # noqa: F401
     except ImportError:
         print("Web UI dependencies not installed.")
-        print("Install them with:  pip install hermes-agent[web]")
+        print(f"Install them with:  {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'")
         sys.exit(1)
 
     if not _build_web_ui(PROJECT_ROOT / "web", fatal=True):
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index e5f2eb53767..0d0dc4a66b5 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -56,7 +56,7 @@ try:
 except ImportError:
     raise SystemExit(
         "Web UI requires fastapi and uvicorn.\n"
-        "Run 'hermes web' to auto-install, or: pip install hermes-agent[web]"
+        f"Install with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'"
     )
 
 WEB_DIST = Path(__file__).parent / "web_dist"
diff --git a/mcp_serve.py b/mcp_serve.py
index e8294d1f91f..e0aeb706191 100644
--- a/mcp_serve.py
+++ b/mcp_serve.py
@@ -433,7 +433,7 @@ def create_mcp_server(event_bridge: Optional[EventBridge] = None) -> "FastMCP":
     if not _MCP_SERVER_AVAILABLE:
         raise ImportError(
             "MCP server requires the 'mcp' package. "
-            "Install with: pip install 'hermes-agent[mcp]'"
+            f"Install with: {sys.executable} -m pip install 'mcp'"
         )
 
     mcp = FastMCP(
@@ -838,7 +838,7 @@ def run_mcp_server(verbose: bool = False) -> None:
     if not _MCP_SERVER_AVAILABLE:
         print(
             "Error: MCP server requires the 'mcp' package.\n"
-            "Install with: pip install 'hermes-agent[mcp]'",
+            f"Install with: {sys.executable} -m pip install 'mcp'",
             file=sys.stderr,
         )
         sys.exit(1)
diff --git a/tools/voice_mode.py b/tools/voice_mode.py
index 50515fc6903..66ecb242c67 100644
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -15,6 +15,7 @@ import platform
 import re
 import shutil
 import subprocess
+import sys
 import tempfile
 import threading
 import time
@@ -582,8 +583,7 @@ class AudioRecorder:
         except (ImportError, OSError) as e:
             raise RuntimeError(
                 "Voice mode requires sounddevice and numpy.\n"
-                "Install with: pip install sounddevice numpy\n"
-                "Or: pip install hermes-agent[voice]"
+                f"Install with: {sys.executable} -m pip install sounddevice numpy"
             ) from e
 
         with self._lock:

From 45acd9beb571d0cba4ea38662b0daaac642ea3fb Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:17:33 -0700
Subject: [PATCH 003/143] fix(gateway): ignore redelivered /restart after PTB
 offset ACK fails (#11940)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a Telegram /restart fires and PTB's graceful-shutdown `get_updates`
ACK call times out ("When polling for updates is restarted, updates may
be received twice" in gateway.log), the new gateway receives the same
/restart again and restarts a second time — a self-perpetuating loop.

Record the triggering update_id in `.restart_last_processed.json` when
handling /restart.  On the next process, reject a /restart whose
update_id <= the recorded one as a stale redelivery.  5-minute staleness
guard so an orphaned marker can't block a legitimately new /restart.

- gateway/platforms/base.py: add `platform_update_id` to MessageEvent
- gateway/platforms/telegram.py: propagate `update.update_id` through
  _build_message_event for text/command/location/media handlers
- gateway/run.py: write dedup marker in _handle_restart_command;
  _is_stale_restart_redelivery checks it before processing /restart
- tests/gateway/test_restart_redelivery_dedup.py: 9 new tests covering
  fresh restart, redelivery, staleness window, cross-platform,
  malformed-marker resilience, and no-update_id (CLI) bypass

Only active for Telegram today (the one platform with monotonic
cross-session update ordering); other platforms return False from
_is_stale_restart_redelivery and proceed normally.
---
 gateway/platforms/base.py                     |   9 +
 gateway/platforms/telegram.py                 |  24 +-
 gateway/run.py                                |  92 +++++++
 .../gateway/test_restart_redelivery_dedup.py  | 247 ++++++++++++++++++
 4 files changed, 366 insertions(+), 6 deletions(-)
 create mode 100644 tests/gateway/test_restart_redelivery_dedup.py

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index af694a5e2d6..f82b1fa0683 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -669,6 +669,15 @@ class MessageEvent:
     # Original platform data
     raw_message: Any = None
     message_id: Optional[str] = None
+
+    # Platform-specific update identifier.  For Telegram this is the
+    # ``update_id`` from the PTB Update wrapper; other platforms currently
+    # ignore it.  Used by ``/restart`` to record the triggering update so the
+    # new gateway can advance the Telegram offset past it and avoid processing
+    # the same ``/restart`` twice if PTB's graceful-shutdown ACK times out
+    # ("Error while calling `get_updates` one more time to mark all fetched
+    # updates" in gateway.log).
+    platform_update_id: Optional[int] = None
     
     # Media attachments
     # media_urls: local file paths (for vision tool access)
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 5b1fef1337b..8df05268c71 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -2326,7 +2326,7 @@ class TelegramAdapter(BasePlatformAdapter):
         if not self._should_process_message(update.message):
             return
 
-        event = self._build_message_event(update.message, MessageType.TEXT)
+        event = self._build_message_event(update.message, MessageType.TEXT, update_id=update.update_id)
         event.text = self._clean_bot_trigger_text(event.text)
         self._enqueue_text_event(event)
     
@@ -2337,7 +2337,7 @@ class TelegramAdapter(BasePlatformAdapter):
         if not self._should_process_message(update.message, is_command=True):
             return
         
-        event = self._build_message_event(update.message, MessageType.COMMAND)
+        event = self._build_message_event(update.message, MessageType.COMMAND, update_id=update.update_id)
         await self.handle_message(event)
     
     async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@@ -2373,7 +2373,7 @@ class TelegramAdapter(BasePlatformAdapter):
         parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}")
         parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.")
 
-        event = self._build_message_event(msg, MessageType.LOCATION)
+        event = self._build_message_event(msg, MessageType.LOCATION, update_id=update.update_id)
         event.text = "\n".join(parts)
         await self.handle_message(event)
 
@@ -2524,7 +2524,7 @@ class TelegramAdapter(BasePlatformAdapter):
         else:
             msg_type = MessageType.DOCUMENT
         
-        event = self._build_message_event(msg, msg_type)
+        event = self._build_message_event(msg, msg_type, update_id=update.update_id)
         
         # Add caption as text
         if msg.caption:
@@ -2863,8 +2863,19 @@ class TelegramAdapter(BasePlatformAdapter):
                 self.name, cache_key, thread_id,
             )
 
-    def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
-        """Build a MessageEvent from a Telegram message."""
+    def _build_message_event(
+        self,
+        message: Message,
+        msg_type: MessageType,
+        update_id: Optional[int] = None,
+    ) -> MessageEvent:
+        """Build a MessageEvent from a Telegram message.
+
+        ``update_id`` is the ``Update.update_id`` from PTB; passing it through
+        lets ``/restart`` record the triggering offset so the new gateway
+        process can advance past it (prevents ``/restart`` being re-delivered
+        when PTB's graceful-shutdown ACK fails).
+        """
         chat = message.chat
         user = message.from_user
         
@@ -2943,6 +2954,7 @@ class TelegramAdapter(BasePlatformAdapter):
             source=source,
             raw_message=message,
             message_id=str(message.message_id),
+            platform_update_id=update_id,
             reply_to_message_id=reply_to_id,
             reply_to_text=reply_to_text,
             auto_skill=topic_skill,
diff --git a/gateway/run.py b/gateway/run.py
index e09dbde2654..62b813f0d6b 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -4738,6 +4738,26 @@ class GatewayRunner:
 
     async def _handle_restart_command(self, event: MessageEvent) -> str:
         """Handle /restart command - drain active work, then restart the gateway."""
+        # Defensive idempotency check: if the previous gateway process
+        # recorded this same /restart (same platform + update_id) and the new
+        # process is seeing it *again*, this is a re-delivery caused by PTB's
+        # graceful-shutdown `get_updates` ACK failing on the way out ("Error
+        # while calling `get_updates` one more time to mark all fetched
+        # updates. Suppressing error to ensure graceful shutdown. When
+        # polling for updates is restarted, updates may be received twice."
+        # in gateway.log).  Ignoring the stale redelivery prevents a
+        # self-perpetuating restart loop where every fresh gateway
+        # re-processes the same /restart command and immediately restarts
+        # again.
+        if self._is_stale_restart_redelivery(event):
+            logger.info(
+                "Ignoring redelivered /restart (platform=%s, update_id=%s) — "
+                "already processed by a previous gateway instance.",
+                event.source.platform.value if event.source and event.source.platform else "?",
+                event.platform_update_id,
+            )
+            return ""
+
         if self._restart_requested or self._draining:
             count = self._running_agent_count()
             if count:
@@ -4760,6 +4780,26 @@ class GatewayRunner:
         except Exception as e:
             logger.debug("Failed to write restart notify file: %s", e)
 
+        # Record the triggering platform + update_id in a dedicated dedup
+        # marker.  Unlike .restart_notify.json (which gets unlinked once the
+        # new gateway sends the "gateway restarted" notification), this
+        # marker persists so the new gateway can still detect a delayed
+        # /restart redelivery from Telegram.  Overwritten on every /restart.
+        try:
+            import json as _json
+            import time as _time
+            dedup_data = {
+                "platform": event.source.platform.value if event.source.platform else None,
+                "requested_at": _time.time(),
+            }
+            if event.platform_update_id is not None:
+                dedup_data["update_id"] = event.platform_update_id
+            (_hermes_home / ".restart_last_processed.json").write_text(
+                _json.dumps(dedup_data)
+            )
+        except Exception as e:
+            logger.debug("Failed to write restart dedup marker: %s", e)
+
         active_agents = self._running_agent_count()
         # When running under a service manager (systemd/launchd), use the
         # service restart path: exit with code 75 so the service manager
@@ -4775,6 +4815,58 @@ class GatewayRunner:
             return f"⏳ Draining {active_agents} active agent(s) before restart..."
         return "♻ Restarting gateway. If you aren't notified within 60 seconds, restart from the console with `hermes gateway restart`."
 
+    def _is_stale_restart_redelivery(self, event: MessageEvent) -> bool:
+        """Return True if this /restart is a Telegram re-delivery we already handled.
+
+        The previous gateway wrote ``.restart_last_processed.json`` with the
+        triggering platform + update_id when it processed the /restart.  If
+        we now see a /restart on the same platform with an update_id <= that
+        recorded value AND the marker is recent (< 5 minutes), it's a
+        redelivery and should be ignored.
+
+        Only applies to Telegram today (the only platform that exposes a
+        numeric cross-session update ordering); other platforms return False.
+        """
+        if event is None or event.source is None:
+            return False
+        if event.platform_update_id is None:
+            return False
+        if event.source.platform is None:
+            return False
+        # Only Telegram populates platform_update_id currently; be explicit
+        # so future platforms aren't accidentally gated by this check.
+        try:
+            platform_value = event.source.platform.value
+        except Exception:
+            return False
+        if platform_value != "telegram":
+            return False
+
+        try:
+            import json as _json
+            import time as _time
+            marker_path = _hermes_home / ".restart_last_processed.json"
+            if not marker_path.exists():
+                return False
+            data = _json.loads(marker_path.read_text())
+        except Exception:
+            return False
+
+        if data.get("platform") != platform_value:
+            return False
+        recorded_uid = data.get("update_id")
+        if not isinstance(recorded_uid, int):
+            return False
+        # Staleness guard: ignore markers older than 5 minutes.  A legitimately
+        # old marker (e.g. crash recovery where notify never fired) should not
+        # swallow a fresh /restart from the user.
+        requested_at = data.get("requested_at")
+        if isinstance(requested_at, (int, float)):
+            if _time.time() - requested_at > 300:
+                return False
+        return event.platform_update_id <= recorded_uid
+
+
     async def _handle_help_command(self, event: MessageEvent) -> str:
         """Handle /help command - list available commands."""
         from hermes_cli.commands import gateway_help_lines
diff --git a/tests/gateway/test_restart_redelivery_dedup.py b/tests/gateway/test_restart_redelivery_dedup.py
new file mode 100644
index 00000000000..aa4e4330caf
--- /dev/null
+++ b/tests/gateway/test_restart_redelivery_dedup.py
@@ -0,0 +1,247 @@
+"""Tests for /restart idempotency guard against Telegram update re-delivery.
+
+When PTB's graceful-shutdown ACK call (the final `get_updates` on exit) fails
+with a network error, Telegram re-delivers the `/restart` message to the new
+gateway process.  Without a dedup guard, the new gateway would process
+`/restart` again and immediately restart — a self-perpetuating loop.
+"""
+import asyncio
+import json
+import time
+from unittest.mock import MagicMock
+
+import pytest
+
+import gateway.run as gateway_run
+from gateway.platforms.base import MessageEvent, MessageType
+from tests.gateway.restart_test_helpers import make_restart_runner, make_restart_source
+
+
+def _make_restart_event(update_id: int | None = 100) -> MessageEvent:
+    return MessageEvent(
+        text="/restart",
+        message_type=MessageType.TEXT,
+        source=make_restart_source(),
+        message_id="m1",
+        platform_update_id=update_id,
+    )
+
+
+@pytest.mark.asyncio
+async def test_restart_handler_writes_dedup_marker_with_update_id(tmp_path, monkeypatch):
+    """First /restart writes .restart_last_processed.json with the triggering update_id."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    event = _make_restart_event(update_id=12345)
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    marker_path = tmp_path / ".restart_last_processed.json"
+    assert marker_path.exists()
+    data = json.loads(marker_path.read_text())
+    assert data["platform"] == "telegram"
+    assert data["update_id"] == 12345
+    assert isinstance(data["requested_at"], (int, float))
+
+
+@pytest.mark.asyncio
+async def test_redelivered_restart_with_same_update_id_is_ignored(tmp_path, monkeypatch):
+    """A /restart with update_id <= recorded marker is silently ignored as a redelivery."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    # Previous gateway recorded update_id=12345 a few seconds ago
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text(json.dumps({
+        "platform": "telegram",
+        "update_id": 12345,
+        "requested_at": time.time() - 5,
+    }))
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock()
+
+    event = _make_restart_event(update_id=12345)  # same update_id → redelivery
+    result = await runner._handle_restart_command(event)
+
+    assert result == ""  # silently ignored
+    runner.request_restart.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_redelivered_restart_with_older_update_id_is_ignored(tmp_path, monkeypatch):
+    """update_id strictly LESS than the recorded one is also a redelivery."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text(json.dumps({
+        "platform": "telegram",
+        "update_id": 12345,
+        "requested_at": time.time() - 5,
+    }))
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock()
+
+    event = _make_restart_event(update_id=12344)  # older update — shouldn't happen,
+                                                  # but if Telegram does re-deliver
+                                                  # something older, treat as stale
+    result = await runner._handle_restart_command(event)
+
+    assert result == ""
+    runner.request_restart.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_fresh_restart_with_higher_update_id_is_processed(tmp_path, monkeypatch):
+    """A NEW /restart from the user (higher update_id) bypasses the dedup guard."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    # Previous restart recorded update_id=12345
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text(json.dumps({
+        "platform": "telegram",
+        "update_id": 12345,
+        "requested_at": time.time() - 5,
+    }))
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    event = _make_restart_event(update_id=12346)  # strictly higher → fresh
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    runner.request_restart.assert_called_once()
+
+    # Marker is overwritten with the new update_id
+    data = json.loads(marker.read_text())
+    assert data["update_id"] == 12346
+
+
+@pytest.mark.asyncio
+async def test_stale_marker_older_than_5min_does_not_block(tmp_path, monkeypatch):
+    """A marker older than the 5-minute window is ignored — fresh /restart proceeds."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text(json.dumps({
+        "platform": "telegram",
+        "update_id": 12345,
+        "requested_at": time.time() - 600,  # 10 minutes ago
+    }))
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    # Same update_id as the stale marker, but the marker is too old to trust
+    event = _make_restart_event(update_id=12345)
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    runner.request_restart.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_no_marker_file_allows_restart(tmp_path, monkeypatch):
+    """Clean gateway start (no prior marker) processes /restart normally."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    event = _make_restart_event(update_id=100)
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    runner.request_restart.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_corrupt_marker_file_is_treated_as_absent(tmp_path, monkeypatch):
+    """Malformed JSON in the marker file doesn't crash — /restart proceeds."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text("not-json{")
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    event = _make_restart_event(update_id=100)
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    runner.request_restart.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_event_without_update_id_bypasses_dedup(tmp_path, monkeypatch):
+    """Events with no platform_update_id (non-Telegram, CLI fallback) aren't gated."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text(json.dumps({
+        "platform": "telegram",
+        "update_id": 999999,
+        "requested_at": time.time(),
+    }))
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    # No update_id — the dedup check should NOT kick in
+    event = _make_restart_event(update_id=None)
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    runner.request_restart.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_different_platform_bypasses_dedup(tmp_path, monkeypatch):
+    """Marker from Telegram doesn't block a /restart from another platform."""
+    from gateway.config import Platform
+    from gateway.session import SessionSource
+
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text(json.dumps({
+        "platform": "telegram",
+        "update_id": 12345,
+        "requested_at": time.time(),
+    }))
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    # /restart from Discord — not a redelivery candidate
+    discord_source = SessionSource(
+        platform=Platform.DISCORD,
+        chat_id="discord-chan",
+        chat_type="dm",
+        user_id="u1",
+    )
+    event = MessageEvent(
+        text="/restart",
+        message_type=MessageType.TEXT,
+        source=discord_source,
+        message_id="m1",
+        platform_update_id=12345,
+    )
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    runner.request_restart.assert_called_once()

From 11a89cc032b20f75e5273f98e9a02dcaf06ce573 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:22:11 -0700
Subject: [PATCH 004/143] docs: backfill coverage for recently-merged features
 (#11942)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fills documentation gaps that accumulated as features merged ahead of their
docs updates. All additions are verified against code and the originating PRs.

Providers:
- Ollama Cloud (#10782) — new provider section, env vars, quickstart/fallback rows
- xAI Grok Responses API + TTS (#10783) — provider note, TTS table + config
- Google Gemini CLI OAuth (#11270) — quickstart/fallback/cli-commands entries
- NVIDIA NIM (#11774) — NVIDIA_API_KEY / NVIDIA_BASE_URL in env-vars reference
- HERMES_INFERENCE_PROVIDER enum updated

Messaging:
- DISCORD_ALLOWED_ROLES (#11608) — env-vars, discord.md access control section
- DingTalk QR device-flow (#11574) — wizard path in Option A + openClaw disclosure
- Feishu document comment intelligent reply (#11898) — full section + 3-tier access control + CLI

Skills / commands:
- concept-diagrams skill (#11363) — optional-skills-catalog entry
- /gquota (#11270) — slash-commands reference

Build: docusaurus build passes, ascii-guard lint 0 errors.
---
 website/docs/getting-started/quickstart.md    |  3 ++
 website/docs/integrations/providers.md        | 30 +++++++++++-
 website/docs/reference/cli-commands.md        |  2 +-
 .../docs/reference/environment-variables.md   |  9 +++-
 .../docs/reference/optional-skills-catalog.md |  1 +
 website/docs/reference/slash-commands.md      |  1 +
 .../user-guide/features/fallback-providers.md |  3 ++
 website/docs/user-guide/features/tts.md       | 10 +++-
 website/docs/user-guide/messaging/dingtalk.md |  9 +++-
 website/docs/user-guide/messaging/discord.md  | 23 ++++++++-
 website/docs/user-guide/messaging/feishu.md   | 48 +++++++++++++++++++
 11 files changed, 132 insertions(+), 7 deletions(-)

diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index 428d23b7ce3..77d6ac84904 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -62,6 +62,9 @@ hermes setup       # Or configure everything at once
 | **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
 | **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` |
 | **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
+| **Ollama Cloud** | Managed Ollama catalog without local GPU | Set `OLLAMA_API_KEY` (or pick **Ollama Cloud** in `hermes model`) |
+| **Google Gemini (OAuth)** | Gemini via Cloud Code Assist — free and paid tiers | OAuth via `hermes model` (optional: `HERMES_GEMINI_PROJECT_ID` for paid tiers) |
+| **xAI (Grok)** | Grok 4 models via Responses API + prompt caching | Set `XAI_API_KEY` (alias: `grok`) |
 | **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
 | **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) |
 | **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index 750ad671cda..56d2f0ea38d 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -289,12 +289,40 @@ Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_
 When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoints (global, China, coding variants) to find one that accepts your API key. You don't need to set `GLM_BASE_URL` manually — the working endpoint is detected and cached automatically.
 :::
 
-### xAI (Grok) Prompt Caching
+### xAI (Grok) — Responses API + Prompt Caching
+
+xAI is wired through the Responses API (`codex_responses` transport) for automatic reasoning support on Grok 4 models — no `reasoning_effort` parameter needed, the server reasons by default. Set `XAI_API_KEY` in `~/.hermes/.env` and pick xAI in `hermes model`, or drop `grok` as a shortcut into `/model grok-4-1-fast-reasoning`.
 
 When using xAI as a provider (any base URL containing `x.ai`), Hermes automatically enables prompt caching by sending the `x-grok-conv-id` header with every API request. This routes requests to the same server within a conversation session, allowing xAI's infrastructure to reuse cached system prompts and conversation history.
 
 No configuration is needed — caching activates automatically when an xAI endpoint is detected and a session ID is available. This reduces latency and cost for multi-turn conversations.
 
+xAI also ships a dedicated TTS endpoint (`/v1/tts`). Select **xAI TTS** in `hermes tools` → Voice & TTS, or see the [Voice & TTS](../user-guide/features/tts.md#text-to-speech) page for config.
+
+### Ollama Cloud — Managed Ollama Models, OAuth + API Key
+
+[Ollama Cloud](https://ollama.com/cloud) hosts the same open-weight catalog as local Ollama but without the GPU requirement. Pick it in `hermes model` as **Ollama Cloud**, paste your API key from [ollama.com/settings/keys](https://ollama.com/settings/keys), and Hermes auto-discovers the available models.
+
+```bash
+hermes model
+# → pick "Ollama Cloud"
+# → paste your OLLAMA_API_KEY
+# → select from discovered models (gpt-oss:120b, glm-4.6:cloud, qwen3-coder:480b-cloud, etc.)
+```
+
+Or `config.yaml` directly:
+```yaml
+model:
+  provider: "ollama-cloud"
+  default: "gpt-oss:120b"
+```
+
+The model catalog is fetched dynamically from `ollama.com/v1/models` and cached for one hour. `model:tag` notation (e.g. `qwen3-coder:480b-cloud`) is preserved through normalization — don't use dashes.
+
+:::tip Ollama Cloud vs local Ollama
+Both speak the same OpenAI-compatible API. Cloud is a first-class provider (`--provider ollama-cloud`, `OLLAMA_API_KEY`); local Ollama is reached via the Custom Endpoint flow (base URL `http://localhost:11434/v1`, no key). Use cloud for large models you can't run locally; use local for privacy or offline work.
+:::
+
 ### NVIDIA NIM
 
 Nemotron and other open source models via [build.nvidia.com](https://build.nvidia.com) (free API key) or a local NIM endpoint.
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 6b08552676e..ea5557a193d 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -85,7 +85,7 @@ Common options:
 | `-q`, `--query "..."` | One-shot, non-interactive prompt. |
 | `-m`, `--model <model>` | Override the model for this run. |
 | `-t`, `--toolsets <csv>` | Enable a comma-separated set of toolsets. |
-| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`. |
+| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`). |
 | `-s`, `--skills <name>` | Preload one or more skills for the session (can be repeated or comma-separated). |
 | `-v`, `--verbose` | Verbose output. |
 | `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. |
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index ead884ba7b7..ff223739af3 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -56,6 +56,12 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 | `DASHSCOPE_BASE_URL` | Custom DashScope base URL (default: `https://coding-intl.dashscope.aliyuncs.com/v1`) |
 | `DEEPSEEK_API_KEY` | DeepSeek API key for direct DeepSeek access ([platform.deepseek.com](https://platform.deepseek.com/api_keys)) |
 | `DEEPSEEK_BASE_URL` | Custom DeepSeek API base URL |
+| `NVIDIA_API_KEY` | NVIDIA NIM API key — Nemotron and open models ([build.nvidia.com](https://build.nvidia.com)) |
+| `NVIDIA_BASE_URL` | Override NVIDIA base URL (default: `https://integrate.api.nvidia.com/v1`; set to `http://localhost:8000/v1` for a local NIM endpoint) |
+| `OLLAMA_API_KEY` | Ollama Cloud API key — managed Ollama catalog without local GPU ([ollama.com/settings/keys](https://ollama.com/settings/keys)) |
+| `OLLAMA_BASE_URL` | Override Ollama Cloud base URL (default: `https://ollama.com/v1`) |
+| `XAI_API_KEY` | xAI (Grok) API key for chat + TTS ([console.x.ai](https://console.x.ai/)) |
+| `XAI_BASE_URL` | Override xAI base URL (default: `https://api.x.ai/v1`) |
 | `OPENCODE_ZEN_API_KEY` | OpenCode Zen API key — pay-as-you-go access to curated models ([opencode.ai](https://opencode.ai/auth)) |
 | `OPENCODE_ZEN_BASE_URL` | Override OpenCode Zen base URL |
 | `OPENCODE_GO_API_KEY` | OpenCode Go API key — $10/month subscription for open models ([opencode.ai](https://opencode.ai/auth)) |
@@ -73,7 +79,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
 
 | Variable | Description |
 |----------|-------------|
-| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
+| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
 | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
 | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
 | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
@@ -187,6 +193,7 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `TELEGRAM_PROXY` | Proxy URL for Telegram connections — overrides `HTTPS_PROXY`. Supports `http://`, `https://`, `socks5://` |
 | `DISCORD_BOT_TOKEN` | Discord bot token |
 | `DISCORD_ALLOWED_USERS` | Comma-separated Discord user IDs allowed to use the bot |
+| `DISCORD_ALLOWED_ROLES` | Comma-separated Discord role IDs allowed to use the bot (OR with `DISCORD_ALLOWED_USERS`). Auto-enables the Members intent. Useful when moderation teams churn — role grants propagate automatically. |
 | `DISCORD_HOME_CHANNEL` | Default Discord channel for cron delivery |
 | `DISCORD_HOME_CHANNEL_NAME` | Display name for the Discord home channel |
 | `DISCORD_REQUIRE_MENTION` | Require an @mention before responding in server channels |
diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index 18ec4b3810b..6fde99b5ee8 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -54,6 +54,7 @@ hermes skills uninstall <skill-name>
 | Skill | Description |
 |-------|-------------|
 | **blender-mcp** | Control Blender directly from Hermes via socket connection to the blender-mcp addon. Create 3D objects, materials, animations, and run arbitrary Blender Python (bpy) code. |
+| **concept-diagrams** | Generate flat, minimal light/dark-aware SVG diagrams as standalone HTML files, using a unified educational visual language (9 semantic color ramps, automatic dark mode). Best for physics setups, chemistry mechanisms, math curves, physical objects (aircraft, turbines, smartphones), floor plans, cross-sections, lifecycle/process narratives, and hub-spoke system diagrams. Ships with 15 example diagrams. |
 | **meme-generation** | Generate real meme images by picking a template and overlaying text with Pillow. Produces actual `.png` meme files. |
 
 ## DevOps
diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md
index 2ad3c62d81c..214b2866d07 100644
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@@ -83,6 +83,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/image <path>` | Attach a local image file for your next prompt. |
 | `/debug` | Upload debug report (system info + logs) and get shareable links. Also available in messaging. |
 | `/profile` | Show active profile name and home directory |
+| `/gquota` | Show Google Gemini Code Assist quota usage with progress bars (only available when the `google-gemini-cli` provider is active). |
 
 ### Exit
 
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index 12fde185d46..8d16079c2e5 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -48,6 +48,9 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
 | MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` |
 | DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
 | NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
+| Ollama Cloud | `ollama-cloud` | `OLLAMA_API_KEY` |
+| Google Gemini (OAuth) | `google-gemini-cli` | `hermes model` (Google OAuth; optional: `HERMES_GEMINI_PROJECT_ID`) |
+| xAI (Grok) | `xai` (alias `grok`) | `XAI_API_KEY` (optional: `XAI_BASE_URL`) |
 | OpenCode Zen | `opencode-zen` | `OPENCODE_ZEN_API_KEY` |
 | OpenCode Go | `opencode-go` | `OPENCODE_GO_API_KEY` |
 | Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md
index 9b0fe8b3afc..9f9d257fcc4 100644
--- a/website/docs/user-guide/features/tts.md
+++ b/website/docs/user-guide/features/tts.md
@@ -24,6 +24,7 @@ Convert text to speech with seven providers:
 | **MiniMax TTS** | Excellent | Paid | `MINIMAX_API_KEY` |
 | **Mistral (Voxtral TTS)** | Excellent | Paid | `MISTRAL_API_KEY` |
 | **Google Gemini TTS** | Excellent | Free tier | `GEMINI_API_KEY` |
+| **xAI TTS** | Excellent | Paid | `XAI_API_KEY` |
 | **NeuTTS** | Good | Free | None needed |
 
 ### Platform Delivery
@@ -40,7 +41,7 @@ Convert text to speech with seven providers:
 ```yaml
 # In ~/.hermes/config.yaml
 tts:
-  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "neutts"
+  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "xai" | "neutts"
   speed: 1.0                    # Global speed multiplier (provider-specific settings override this)
   edge:
     voice: "en-US-AriaNeural"   # 322 voices, 74 languages
@@ -65,6 +66,12 @@ tts:
   gemini:
     model: "gemini-2.5-flash-preview-tts"  # or gemini-2.5-pro-preview-tts
     voice: "Kore"               # 30 prebuilt voices: Zephyr, Puck, Kore, Enceladus, Gacrux, etc.
+  xai:
+    voice_id: "eve"             # xAI TTS voice (see https://docs.x.ai/docs/api-reference#tts)
+    language: "en"              # ISO 639-1 code
+    sample_rate: 24000          # 22050 / 24000 (default) / 44100 / 48000
+    bit_rate: 128000            # MP3 bitrate; only applies when codec=mp3
+    # base_url: "https://api.x.ai/v1"   # Override via XAI_BASE_URL env var
   neutts:
     ref_audio: ''
     ref_text: ''
@@ -82,6 +89,7 @@ Telegram voice bubbles require Opus/OGG audio format:
 - **Edge TTS** (default) outputs MP3 and needs **ffmpeg** to convert:
 - **MiniMax TTS** outputs MP3 and needs **ffmpeg** to convert for Telegram voice bubbles
 - **Google Gemini TTS** outputs raw PCM and uses **ffmpeg** to encode Opus directly for Telegram voice bubbles
+- **xAI TTS** outputs MP3 and needs **ffmpeg** to convert for Telegram voice bubbles
 - **NeuTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles
 
 ```bash
diff --git a/website/docs/user-guide/messaging/dingtalk.md b/website/docs/user-guide/messaging/dingtalk.md
index d88c1a952f6..9e8e74ee26f 100644
--- a/website/docs/user-guide/messaging/dingtalk.md
+++ b/website/docs/user-guide/messaging/dingtalk.md
@@ -100,7 +100,14 @@ Run the guided setup command:
 hermes gateway setup
 ```
 
-Select **DingTalk** when prompted, then paste your Client ID, Client Secret, and allowed user IDs when asked.
+Select **DingTalk** when prompted. The setup wizard can authorize via one of two paths:
+
+- **QR-code device flow (recommended).** Scan the QR that prints in your terminal with the DingTalk mobile app — your Client ID and Client Secret are returned automatically and written to `~/.hermes/.env`. No developer-console trip needed.
+- **Manual paste.** If you already have credentials (or QR scanning isn't convenient), paste your Client ID, Client Secret, and allowed user IDs when prompted.
+
+:::note openClaw branding disclosure
+Because DingTalk's `verification_uri_complete` is hardcoded to the openClaw identity at the API layer, the QR currently authorizes under an `openClaw` source string until Alibaba / DingTalk-Real-AI registers a Hermes-specific template server-side. This is purely how DingTalk presents the consent screen — the bot you create is fully yours and private to your tenant.
+:::
 
 ### Option B: Manual Configuration
 
diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md
index 233f544d9c6..44e08330dfa 100644
--- a/website/docs/user-guide/messaging/discord.md
+++ b/website/docs/user-guide/messaging/discord.md
@@ -271,7 +271,8 @@ Discord behavior is controlled through two files: **`~/.hermes/.env`** for crede
 | Variable | Required | Default | Description |
 |----------|----------|---------|-------------|
 | `DISCORD_BOT_TOKEN` | **Yes** | — | Bot token from the [Discord Developer Portal](https://discord.com/developers/applications). |
-| `DISCORD_ALLOWED_USERS` | **Yes** | — | Comma-separated Discord user IDs allowed to interact with the bot. Without this, the gateway denies all users. |
+| `DISCORD_ALLOWED_USERS` | **Yes** | — | Comma-separated Discord user IDs allowed to interact with the bot. Without this **or** `DISCORD_ALLOWED_ROLES`, the gateway denies all users. |
+| `DISCORD_ALLOWED_ROLES` | No | — | Comma-separated Discord role IDs. Any member with one of these roles is authorized — OR semantics with `DISCORD_ALLOWED_USERS`. Auto-enables the **Server Members Intent** on connect. Useful when moderation teams churn: new mods get access as soon as the role is granted, no config push needed. |
 | `DISCORD_HOME_CHANNEL` | No | — | Channel ID where the bot sends proactive messages (cron output, reminders, notifications). |
 | `DISCORD_HOME_CHANNEL_NAME` | No | `"Home"` | Display name for the home channel in logs and status output. |
 | `DISCORD_REQUIRE_MENTION` | No | `true` | When `true`, the bot only responds in server channels when `@mentioned`. Set to `false` to respond to all messages in every channel. |
@@ -569,9 +570,27 @@ If you intentionally want a shared room conversation, leave it off — just expe
 ## Security
 
 :::warning
-Always set `DISCORD_ALLOWED_USERS` to restrict who can interact with the bot. Without it, the gateway denies all users by default as a safety measure. Only add User IDs of people you trust — authorized users have full access to the agent's capabilities, including tool use and system access.
+Always set `DISCORD_ALLOWED_USERS` (or `DISCORD_ALLOWED_ROLES`) to restrict who can interact with the bot. Without either, the gateway denies all users by default as a safety measure. Only authorize people you trust — authorized users have full access to the agent's capabilities, including tool use and system access.
 :::
 
+### Role-Based Access Control
+
+For servers where access is managed by roles instead of individual user lists (moderator teams, support staff, internal tooling), use `DISCORD_ALLOWED_ROLES` — a comma-separated list of role IDs. Any member with one of those roles is authorized.
+
+```bash
+# ~/.hermes/.env — works alongside or instead of DISCORD_ALLOWED_USERS
+DISCORD_ALLOWED_ROLES=987654321098765432,876543210987654321
+```
+
+Semantics:
+
+- **OR with user allowlist.** A user is authorized if their ID is in `DISCORD_ALLOWED_USERS` **or** they have any role in `DISCORD_ALLOWED_ROLES`.
+- **Server Members Intent auto-enabled.** When `DISCORD_ALLOWED_ROLES` is set, the bot enables the Members intent on connect — required for Discord to send role information with member records.
+- **Role IDs, not names.** Grab them from Discord: **User Settings → Advanced → Developer Mode ON**, then right-click any role → **Copy Role ID**.
+- **DM fallback.** In DMs the role check scans mutual guilds; a user with an allowed role in any shared server is authorized in DMs too.
+
+This is the preferred pattern when the moderation team churns — new moderators get access the moment the role is granted, with no `.env` edit or gateway restart.
+
 ### Mention Control
 
 By default, Hermes blocks the bot from pinging `@everyone`, `@here`, and role mentions, even if its reply contains those tokens. This prevents a poorly-worded prompt or echoed user content from spamming a whole server. Individual `@user` pings and reply-reference pings (the little "replying to…" chip) stay enabled so normal conversation still works.
diff --git a/website/docs/user-guide/messaging/feishu.md b/website/docs/user-guide/messaging/feishu.md
index 4d9783d402b..6e9f1d0e7fb 100644
--- a/website/docs/user-guide/messaging/feishu.md
+++ b/website/docs/user-guide/messaging/feishu.md
@@ -244,6 +244,54 @@ Interactive cards require **three** configuration steps in the Feishu Developer
 Without all three steps, Feishu will successfully *send* interactive cards (sending only requires `im:message:send` permission), but clicking any button will return error 200340. The card appears to work — the error only surfaces when a user interacts with it.
 :::
 
+## Document Comment Intelligent Reply
+
+Beyond chat, the adapter can also answer `@`-mentions left on **Feishu/Lark documents**. When a user comments on a document (local text selection or whole-doc comment) and @-mentions the bot, Hermes reads the document plus the surrounding comment thread and posts an LLM reply inline on the thread.
+
+Powered by the `drive.notice.comment_add_v1` event, the handler:
+
+- Fetches the document content and comment timeline in parallel (20 messages for whole-doc threads, 12 for local-selection threads).
+- Runs the agent with the `feishu_doc` + `feishu_drive` toolsets scoped to that single comment session.
+- Chunks replies at 4000 chars and posts them back as threaded replies.
+- Caches per-document sessions for 1 hour with a 50-message cap so follow-up comments on the same doc keep context.
+
+### 3-Tier Access Control
+
+Document-comment replies are **explicit-grant only** — there is no implicit allow-all mode. Permissions resolve in this order (first match wins, per field):
+
+1. **Exact doc** — rule scoped to a specific document token.
+2. **Wildcard** — rule that matches a pattern of docs.
+3. **Top-level** — default rule for the workspace.
+
+Two policies are available per rule:
+
+- **`allowlist`** — a static list of users / tenants.
+- **`pairing`** — static list ∪ runtime-approved store. Useful for rollouts where moderators can grant access live.
+
+Rules live in `~/.hermes/feishu_comment_rules.json` (pairing grants in `~/.hermes/feishu_comment_pairing.json`) with mtime-cached hot-reload — edits take effect on the next comment event without restarting the gateway.
+
+CLI:
+
+```bash
+# Inspect current rules and pairing state
+python -m gateway.platforms.feishu_comment_rules status
+
+# Simulate an access check for a specific doc + user
+python -m gateway.platforms.feishu_comment_rules check <fileType:fileToken> <user_open_id>
+
+# Manage pairing grants at runtime
+python -m gateway.platforms.feishu_comment_rules pairing list
+python -m gateway.platforms.feishu_comment_rules pairing add <user_open_id>
+python -m gateway.platforms.feishu_comment_rules pairing remove <user_open_id>
+```
+
+### Required Feishu App Configuration
+
+On top of the chat/card permissions already granted, add the drive comment event:
+
+- Subscribe to `drive.notice.comment_add_v1` in **Event Subscriptions**.
+- Grant the `docs:doc:readonly` and `drive:drive:readonly` scopes so the handler can read document content.
+
 ## Media Support
 
 ### Inbound (receiving)

From 1c352f6b1d377088b5a3d4310030587a9960a09d Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:23:31 -0700
Subject: [PATCH 005/143] docs(browser): expand Camofox persistence guide with
 troubleshooting (#11957)

The existing 'Persistent browser sessions' section had the correct config
snippet but users still hit the flag at the wrong config path, assumed
Hermes could force persistence when the server was ephemeral, and had no
way to verify the flag was actually taking effect.

Adds to that section:
- Warning admonition calling out the nested path vs top-level mistake.
- Explicit 'What Hermes does / does not do' split so users understand
  Hermes can only send a stable userId; the Camofox server must map it
  to a persistent profile.
- 5-step verification flow for confirming persistence works end-to-end.
- Reminder to restart Hermes after editing config.yaml.
- Where Hermes derives the stable userId (~/.hermes/browser_auth/camofox/)
  so users can reset or back up state.

Docs-only change.
---
 website/docs/user-guide/features/browser.md | 39 +++++++++++++++++++--
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md
index 9880965ae48..42b6815df51 100644
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@@ -111,16 +111,49 @@ When `CAMOFOX_URL` is set, all browser tools automatically route through Camofox
 
 #### Persistent browser sessions
 
-By default, each Camofox session gets a random identity — cookies and logins don't survive across agent restarts. To enable persistent browser sessions:
+By default, each Camofox session gets a random identity — cookies and logins don't survive across agent restarts. To enable persistent browser sessions, add the following to `~/.hermes/config.yaml`:
 
 ```yaml
-# In ~/.hermes/config.yaml
 browser:
   camofox:
     managed_persistence: true
 ```
 
-When enabled, Hermes sends a stable profile-scoped `userId` to Camofox. The Camofox server automatically maps each `userId` to a dedicated persistent Firefox profile, so cookies, logins, and localStorage survive across restarts. Different Hermes profiles get different browser profiles (profile isolation).
+Then fully restart Hermes so the new config is picked up.
+
+:::warning Nested path matters
+Hermes reads `browser.camofox.managed_persistence`, **not** a top-level `managed_persistence`. A common mistake is writing:
+
+```yaml
+# ❌ Wrong — Hermes ignores this
+managed_persistence: true
+```
+
+If the flag is placed at the wrong path, Hermes silently falls back to a random ephemeral `userId` and your login state will be lost on every session.
+:::
+
+##### What Hermes does
+- Sends a deterministic profile-scoped `userId` to Camofox so the server can reuse the same Firefox profile across sessions.
+- Skips server-side context destruction on cleanup, so cookies and logins survive between agent tasks.
+- Scopes the `userId` to the active Hermes profile, so different Hermes profiles get different browser profiles (profile isolation).
+
+##### What Hermes does not do
+- It does not force persistence on the Camofox server. Hermes only sends a stable `userId`; the server must honor it by mapping that `userId` to a persistent Firefox profile directory.
+- If your Camofox server build treats every request as ephemeral (e.g. always calls `browser.newContext()` without loading a stored profile), Hermes cannot make those sessions persist. Make sure you are running a Camofox build that implements userId-based profile persistence.
+
+##### Verify it's working
+
+1. Start Hermes and your Camofox server.
+2. Open Google (or any login site) in a browser task and sign in manually.
+3. End the browser task normally.
+4. Start a new browser task.
+5. Open the same site again — you should still be signed in.
+
+If step 5 logs you out, the Camofox server isn't honoring the stable `userId`. Double-check your config path, confirm you fully restarted Hermes after editing `config.yaml`, and verify your Camofox server version supports persistent per-user profiles.
+
+##### Where state lives
+
+Hermes derives the stable `userId` from the profile-scoped directory `~/.hermes/browser_auth/camofox/` (or the equivalent under `$HERMES_HOME` for non-default profiles). The actual browser profile data lives on the Camofox server side, keyed by that `userId`. To fully reset a persistent profile, clear it on the Camofox server and remove the corresponding Hermes profile's state directory.
 
 #### VNC live view
 

From 8a59f8a9edcf6a23cffda3377cced2761732e7bc Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:29:24 -0700
Subject: [PATCH 006/143] fix(update): survive mid-update terminal disconnect
 (#11960)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

hermes update no longer dies when the controlling terminal closes
(SSH drop, shell close) during pip install.  SIGHUP is set to SIG_IGN
for the duration of the update, and stdout/stderr are wrapped so writes
to a closed pipe are absorbed instead of cascading into process exit.
All update output is mirrored to ~/.hermes/logs/update.log so users can
see what happened after reconnecting.

SIGINT (Ctrl-C) and SIGTERM (systemd) are intentionally still honored —
those are deliberate cancellations, not accidents.  In gateway mode the
helper is a no-op since the update is already detached.

POSIX preserves SIG_IGN across exec(), so pip and git subprocesses
inherit hangup protection automatically — no changes to subprocess
spawning needed.
---
 hermes_cli/main.py                            | 195 ++++++++++-
 .../test_update_hangup_protection.py          | 325 ++++++++++++++++++
 website/docs/getting-started/updating.md      |  15 +
 3 files changed, 534 insertions(+), 1 deletion(-)
 create mode 100644 tests/hermes_cli/test_update_hangup_protection.py

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 81b27e4a100..0afadac3d16 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -4985,8 +4985,187 @@ def _update_node_dependencies() -> None:
             print(f"    {stderr.splitlines()[-1]}")
 
 
+class _UpdateOutputStream:
+    """Stream wrapper used during ``hermes update`` to survive terminal loss.
+
+    Wraps the process's original stdout/stderr so that:
+
+    * Every write is also mirrored to an append-only log file
+      (``~/.hermes/logs/update.log``) that users can inspect after the
+      terminal disconnects.
+    * Writes to the original stream that fail with ``BrokenPipeError`` /
+      ``OSError`` / ``ValueError`` (closed file) no longer cascade into
+      process exit — the update keeps going, only the on-screen output
+      stops.
+
+    Combined with ``SIGHUP -> SIG_IGN`` installed by
+    ``_install_hangup_protection``, this makes ``hermes update`` safe to
+    run in a plain SSH session that might disconnect mid-install.
+    """
+
+    def __init__(self, original, log_file):
+        self._original = original
+        self._log = log_file
+        self._original_broken = False
+
+    def write(self, data):
+        # Mirror to the log file first — it's the most reliable destination.
+        if self._log is not None:
+            try:
+                self._log.write(data)
+            except Exception:
+                # Log errors should never abort the update.
+                pass
+
+        if self._original_broken:
+            return len(data) if isinstance(data, (str, bytes)) else 0
+
+        try:
+            return self._original.write(data)
+        except (BrokenPipeError, OSError, ValueError):
+            # Terminal vanished (SSH disconnect, shell close).  Stop trying
+            # to write to it, but keep the update running.
+            self._original_broken = True
+            return len(data) if isinstance(data, (str, bytes)) else 0
+
+    def flush(self):
+        if self._log is not None:
+            try:
+                self._log.flush()
+            except Exception:
+                pass
+        if self._original_broken:
+            return
+        try:
+            self._original.flush()
+        except (BrokenPipeError, OSError, ValueError):
+            self._original_broken = True
+
+    def isatty(self):
+        if self._original_broken:
+            return False
+        try:
+            return self._original.isatty()
+        except Exception:
+            return False
+
+    def fileno(self):
+        # Some tools probe fileno(); defer to the underlying stream and let
+        # callers handle failures (same behaviour as the unwrapped stream).
+        return self._original.fileno()
+
+    def __getattr__(self, name):
+        return getattr(self._original, name)
+
+
+def _install_hangup_protection(gateway_mode: bool = False):
+    """Protect ``cmd_update`` from SIGHUP and broken terminal pipes.
+
+    Users commonly run ``hermes update`` in an SSH session or a terminal
+    that may close mid-install.  Without protection, ``SIGHUP`` from the
+    terminal kills the Python process during ``pip install`` and leaves
+    the venv half-installed; the documented workaround ("use screen /
+    tmux") shouldn't be required for something as routine as an update.
+
+    Protections installed:
+
+    1. ``SIGHUP`` is set to ``SIG_IGN``.  POSIX preserves ``SIG_IGN``
+       across ``exec()``, so pip and git subprocesses also stop dying on
+       hangup.
+    2. ``sys.stdout`` / ``sys.stderr`` are wrapped to mirror output to
+       ``~/.hermes/logs/update.log`` and to silently absorb
+       ``BrokenPipeError`` when the terminal vanishes.
+
+    ``SIGINT`` (Ctrl-C) and ``SIGTERM`` (systemd shutdown) are
+    **intentionally left alone** — those are legitimate cancellation
+    signals the user or OS sent on purpose.
+
+    In gateway mode (``hermes update --gateway``) the update is already
+    spawned detached from a terminal, so this function is a no-op.
+
+    Returns a dict that ``cmd_update`` can pass to
+    ``_finalize_update_output`` on exit.  Returning a dict rather than a
+    tuple keeps the call site forward-compatible with future additions.
+    """
+    state = {
+        "prev_stdout": sys.stdout,
+        "prev_stderr": sys.stderr,
+        "log_file": None,
+        "installed": False,
+    }
+
+    if gateway_mode:
+        return state
+
+    import signal as _signal
+
+    # (1) Ignore SIGHUP for the remainder of this process.
+    if hasattr(_signal, "SIGHUP"):
+        try:
+            _signal.signal(_signal.SIGHUP, _signal.SIG_IGN)
+        except (ValueError, OSError):
+            # Called from a non-main thread — not fatal.  The update still
+            # runs, just without hangup protection.
+            pass
+
+    # (2) Mirror output to update.log and wrap stdio for broken-pipe
+    # tolerance.  Any failure here is non-fatal; we just skip the wrap.
+    try:
+        from hermes_cli.config import get_hermes_home
+
+        logs_dir = get_hermes_home() / "logs"
+        logs_dir.mkdir(parents=True, exist_ok=True)
+        log_path = logs_dir / "update.log"
+        log_file = open(log_path, "a", buffering=1, encoding="utf-8")
+
+        import datetime as _dt
+
+        log_file.write(
+            f"\n=== hermes update started "
+            f"{_dt.datetime.now().isoformat(timespec='seconds')} ===\n"
+        )
+
+        state["log_file"] = log_file
+        sys.stdout = _UpdateOutputStream(state["prev_stdout"], log_file)
+        sys.stderr = _UpdateOutputStream(state["prev_stderr"], log_file)
+        state["installed"] = True
+    except Exception:
+        # Leave stdio untouched on any setup failure.  Update continues
+        # without mirroring.
+        state["log_file"] = None
+
+    return state
+
+
+def _finalize_update_output(state):
+    """Restore stdio and close the update.log handle opened by ``_install_hangup_protection``."""
+    if not state:
+        return
+    if state.get("installed"):
+        try:
+            sys.stdout = state.get("prev_stdout", sys.stdout)
+        except Exception:
+            pass
+        try:
+            sys.stderr = state.get("prev_stderr", sys.stderr)
+        except Exception:
+            pass
+    log_file = state.get("log_file")
+    if log_file is not None:
+        try:
+            log_file.flush()
+            log_file.close()
+        except Exception:
+            pass
+
+
 def cmd_update(args):
-    """Update Hermes Agent to the latest version."""
+    """Update Hermes Agent to the latest version.
+
+    Thin wrapper around ``_cmd_update_impl``: installs hangup protection,
+    runs the update, then restores stdio on the way out (even on
+    ``sys.exit`` or unhandled exceptions).
+    """
     from hermes_cli.config import is_managed, managed_error
 
     if is_managed():
@@ -4994,6 +5173,20 @@ def cmd_update(args):
         return
 
     gateway_mode = getattr(args, "gateway", False)
+
+    # Protect against mid-update terminal disconnects (SIGHUP) and tolerate
+    # writes to a closed stdout.  No-op in gateway mode.  See
+    # _install_hangup_protection for rationale.
+    _update_io_state = _install_hangup_protection(gateway_mode=gateway_mode)
+    try:
+        _cmd_update_impl(args, gateway_mode=gateway_mode)
+    finally:
+        _finalize_update_output(_update_io_state)
+
+
+def _cmd_update_impl(args, gateway_mode: bool):
+    """Body of ``cmd_update`` — kept separate so the wrapper can always
+    restore stdio even on ``sys.exit``."""
     # In gateway mode, use file-based IPC for prompts instead of stdin
     gw_input_fn = (
         (lambda prompt, default="": _gateway_prompt(prompt, default))
diff --git a/tests/hermes_cli/test_update_hangup_protection.py b/tests/hermes_cli/test_update_hangup_protection.py
new file mode 100644
index 00000000000..e5c81a45a01
--- /dev/null
+++ b/tests/hermes_cli/test_update_hangup_protection.py
@@ -0,0 +1,325 @@
+"""Tests for SIGHUP protection and stdout mirroring in ``hermes update``.
+
+Covers ``_UpdateOutputStream``, ``_install_hangup_protection``, and
+``_finalize_update_output`` in ``hermes_cli/main.py``.  These exist so
+that ``hermes update`` survives a terminal disconnect mid-install
+(SSH drop, shell close) without leaving the venv half-installed.
+"""
+
+from __future__ import annotations
+
+import io
+import os
+import signal
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.main import (
+    _UpdateOutputStream,
+    _finalize_update_output,
+    _install_hangup_protection,
+)
+
+
+# -----------------------------------------------------------------------------
+# _UpdateOutputStream
+# -----------------------------------------------------------------------------
+
+
+class TestUpdateOutputStream:
+    def test_write_mirrors_to_both_original_and_log(self):
+        original = io.StringIO()
+        log = io.StringIO()
+        stream = _UpdateOutputStream(original, log)
+
+        stream.write("hello world\n")
+
+        assert original.getvalue() == "hello world\n"
+        assert log.getvalue() == "hello world\n"
+
+    def test_write_continues_after_broken_original(self):
+        """When the terminal disconnects, original.write raises BrokenPipeError.
+
+        The wrapper must catch it, flip the broken flag, and keep writing to
+        the log from then on.
+        """
+        log = io.StringIO()
+
+        class _BrokenStream:
+            def write(self, data):
+                raise BrokenPipeError("terminal gone")
+
+            def flush(self):
+                raise BrokenPipeError("terminal gone")
+
+        stream = _UpdateOutputStream(_BrokenStream(), log)
+
+        # First write triggers the broken-pipe path.
+        stream.write("first line\n")
+        # Subsequent writes take the fast broken path (no exception).
+        stream.write("second line\n")
+
+        assert log.getvalue() == "first line\nsecond line\n"
+        assert stream._original_broken is True
+
+    def test_write_tolerates_oserror_and_valueerror(self):
+        """OSError (EIO) and ValueError (closed file) should also be absorbed."""
+        log = io.StringIO()
+
+        class _RaisingStream:
+            def __init__(self, exc):
+                self._exc = exc
+
+            def write(self, data):
+                raise self._exc
+
+            def flush(self):
+                raise self._exc
+
+        for exc in (OSError("EIO"), ValueError("closed file")):
+            stream = _UpdateOutputStream(_RaisingStream(exc), log)
+            stream.write("x\n")
+            assert stream._original_broken is True
+
+    def test_log_failure_does_not_abort_write(self):
+        """Even if the log file write raises, the original write must still happen."""
+        class _BrokenLog:
+            def write(self, data):
+                raise OSError("disk full")
+
+            def flush(self):
+                raise OSError("disk full")
+
+        original = io.StringIO()
+        stream = _UpdateOutputStream(original, _BrokenLog())
+
+        stream.write("data\n")
+
+        assert original.getvalue() == "data\n"
+
+    def test_flush_tolerates_broken_original(self):
+        class _BrokenStream:
+            def write(self, data):
+                return len(data)
+
+            def flush(self):
+                raise BrokenPipeError("gone")
+
+        log = io.StringIO()
+        stream = _UpdateOutputStream(_BrokenStream(), log)
+        stream.flush()  # must not raise
+        assert stream._original_broken is True
+
+    def test_isatty_delegates_to_original(self):
+        class _TtyStream:
+            def isatty(self):
+                return True
+
+            def write(self, data):
+                return len(data)
+
+            def flush(self):
+                return None
+
+        stream = _UpdateOutputStream(_TtyStream(), io.StringIO())
+        assert stream.isatty() is True
+
+    def test_isatty_returns_false_after_broken(self):
+        class _BrokenStream:
+            def isatty(self):
+                return True
+
+            def write(self, data):
+                raise BrokenPipeError()
+
+            def flush(self):
+                return None
+
+        stream = _UpdateOutputStream(_BrokenStream(), io.StringIO())
+        stream.write("x")  # marks broken
+        assert stream.isatty() is False
+
+    def test_getattr_delegates_unknown_attrs(self):
+        class _StreamWithEncoding:
+            encoding = "utf-8"
+
+            def write(self, data):
+                return len(data)
+
+            def flush(self):
+                return None
+
+        stream = _UpdateOutputStream(_StreamWithEncoding(), io.StringIO())
+        assert stream.encoding == "utf-8"
+
+
+# -----------------------------------------------------------------------------
+# _install_hangup_protection
+# -----------------------------------------------------------------------------
+
+
+class TestInstallHangupProtection:
+    def test_gateway_mode_is_noop(self):
+        """In gateway mode the process is already detached — don't touch stdio or signals."""
+        prev_out, prev_err = sys.stdout, sys.stderr
+        prev_sighup = signal.getsignal(signal.SIGHUP) if hasattr(signal, "SIGHUP") else None
+
+        state = _install_hangup_protection(gateway_mode=True)
+
+        try:
+            assert sys.stdout is prev_out
+            assert sys.stderr is prev_err
+            assert state["log_file"] is None
+            assert state["installed"] is False
+            if hasattr(signal, "SIGHUP"):
+                assert signal.getsignal(signal.SIGHUP) == prev_sighup
+        finally:
+            _finalize_update_output(state)
+
+    @pytest.mark.skipif(
+        not hasattr(signal, "SIGHUP"), reason="SIGHUP not available on this platform"
+    )
+    def test_installs_sighup_ignore(self, tmp_path, monkeypatch):
+        """SIGHUP should be set to SIG_IGN so SSH disconnect doesn't kill the update."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        # Clear cached get_hermes_home if present
+        import hermes_cli.config as _cfg
+        if hasattr(_cfg, "_HERMES_HOME_CACHE"):
+            _cfg._HERMES_HOME_CACHE = None  # type: ignore[attr-defined]
+
+        original_handler = signal.getsignal(signal.SIGHUP)
+        state = _install_hangup_protection(gateway_mode=False)
+
+        try:
+            assert signal.getsignal(signal.SIGHUP) == signal.SIG_IGN
+        finally:
+            _finalize_update_output(state)
+            # Restore whatever was there before so we don't leak to other tests.
+            signal.signal(signal.SIGHUP, original_handler)
+
+    def test_wraps_stdout_and_stderr_with_mirror(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        # Nuke any cached home path
+        import hermes_cli.config as _cfg
+        if hasattr(_cfg, "_HERMES_HOME_CACHE"):
+            _cfg._HERMES_HOME_CACHE = None  # type: ignore[attr-defined]
+
+        prev_out, prev_err = sys.stdout, sys.stderr
+        state = _install_hangup_protection(gateway_mode=False)
+
+        try:
+            # On Windows (no SIGHUP) we still wrap stdio and create the log.
+            assert state["installed"] is True
+            assert isinstance(sys.stdout, _UpdateOutputStream)
+            assert isinstance(sys.stderr, _UpdateOutputStream)
+            assert state["log_file"] is not None
+
+            sys.stdout.write("checking mirror\n")
+            sys.stdout.flush()
+
+            log_path = tmp_path / "logs" / "update.log"
+            assert log_path.exists()
+            contents = log_path.read_text(encoding="utf-8")
+            assert "checking mirror" in contents
+            assert "hermes update started" in contents
+        finally:
+            _finalize_update_output(state)
+            # Sanity-check restoration
+            assert sys.stdout is prev_out
+            assert sys.stderr is prev_err
+
+    def test_logs_dir_created_if_missing(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import hermes_cli.config as _cfg
+        if hasattr(_cfg, "_HERMES_HOME_CACHE"):
+            _cfg._HERMES_HOME_CACHE = None  # type: ignore[attr-defined]
+
+        # No logs/ dir yet.
+        assert not (tmp_path / "logs").exists()
+
+        state = _install_hangup_protection(gateway_mode=False)
+        try:
+            assert (tmp_path / "logs").is_dir()
+            assert (tmp_path / "logs" / "update.log").exists()
+        finally:
+            _finalize_update_output(state)
+
+    def test_non_fatal_if_log_setup_fails(self, monkeypatch):
+        """If get_hermes_home() raises, stdio must be left untouched but SIGHUP still handled."""
+        prev_out, prev_err = sys.stdout, sys.stderr
+
+        def _boom():
+            raise RuntimeError("no home for you")
+
+        # Patch the import inside _install_hangup_protection.
+        monkeypatch.setattr(
+            "hermes_cli.config.get_hermes_home", _boom, raising=True
+        )
+
+        original_handler = (
+            signal.getsignal(signal.SIGHUP) if hasattr(signal, "SIGHUP") else None
+        )
+
+        state = _install_hangup_protection(gateway_mode=False)
+
+        try:
+            assert sys.stdout is prev_out
+            assert sys.stderr is prev_err
+            assert state["installed"] is False
+            # SIGHUP must still be installed even when log setup fails.
+            if hasattr(signal, "SIGHUP"):
+                assert signal.getsignal(signal.SIGHUP) == signal.SIG_IGN
+        finally:
+            _finalize_update_output(state)
+            if hasattr(signal, "SIGHUP") and original_handler is not None:
+                signal.signal(signal.SIGHUP, original_handler)
+
+
+# -----------------------------------------------------------------------------
+# _finalize_update_output
+# -----------------------------------------------------------------------------
+
+
+class TestFinalizeUpdateOutput:
+    def test_none_state_is_noop(self):
+        _finalize_update_output(None)  # must not raise
+
+    def test_restores_streams_and_closes_log(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import hermes_cli.config as _cfg
+        if hasattr(_cfg, "_HERMES_HOME_CACHE"):
+            _cfg._HERMES_HOME_CACHE = None  # type: ignore[attr-defined]
+
+        prev_out = sys.stdout
+        state = _install_hangup_protection(gateway_mode=False)
+        log_file = state["log_file"]
+
+        assert sys.stdout is not prev_out
+        assert log_file is not None
+
+        _finalize_update_output(state)
+
+        assert sys.stdout is prev_out
+        # The log file handle should be closed.
+        assert log_file.closed is True
+
+    def test_skipped_install_leaves_stdio_alone(self):
+        """When install failed (state['installed']=False) finalize should not
+        touch sys.stdout / sys.stderr (they were never wrapped)."""
+        # Build a synthetic state that mimics a failed install.
+        sentinel_out = object()
+        state = {
+            "prev_stdout": sentinel_out,
+            "prev_stderr": sentinel_out,
+            "log_file": None,
+            "installed": False,
+        }
+        before_out, before_err = sys.stdout, sys.stderr
+
+        _finalize_update_output(state)
+
+        assert sys.stdout is before_out
+        assert sys.stderr is before_err
diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md
index b0e34e07dec..eb74427a0a0 100644
--- a/website/docs/getting-started/updating.md
+++ b/website/docs/getting-started/updating.md
@@ -59,6 +59,21 @@ Already up to date.  (or: Updating abc1234..def5678)
 If `git status --short` shows unexpected changes after `hermes update`, stop and inspect them before continuing. This usually means local modifications were reapplied on top of the updated code, or a dependency step refreshed lockfiles.
 :::
 
+### If your terminal disconnects mid-update
+
+`hermes update` protects itself against accidental terminal loss:
+
+- The update ignores `SIGHUP`, so closing your SSH session or terminal window no longer kills it mid-install. `pip` and `git` child processes inherit this protection, so the Python environment cannot be left half-installed by a dropped connection.
+- All output is mirrored to `~/.hermes/logs/update.log` while the update runs. If your terminal disappears, reconnect and inspect the log to see whether the update finished and whether the gateway restart succeeded:
+
+```bash
+tail -f ~/.hermes/logs/update.log
+```
+
+- `Ctrl-C` (SIGINT) and system shutdown (SIGTERM) are still honored — those are deliberate cancellations, not accidents.
+
+You no longer need to wrap `hermes update` in `screen` or `tmux` to survive a terminal drop.
+
 ### Checking your current version
 
 ```bash

From 994faacce894cba8f97c1ff06f65da89f56520f5 Mon Sep 17 00:00:00 2001
From: AviArora02-commits <aviralarora002@gmail.com>
Date: Sun, 12 Apr 2026 23:23:03 +0530
Subject: [PATCH 007/143] fix: suppress Authorization: Bearer for Gemini
 provider to prevent HTTP 400 (#7893)

---
 agent/auxiliary_client.py                | 27 ++++++++++++
 run_agent.py                             | 21 ++++++++++
 tests/hermes_cli/test_gemini_provider.py | 52 ++++++++++++++++++++++++
 3 files changed, 100 insertions(+)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 8adf080e31d..568d6109220 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -745,6 +745,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                 from hermes_cli.models import copilot_default_headers
 
                 extra["default_headers"] = copilot_default_headers()
+            elif "generativelanguage.googleapis.com" in base_url.lower():
+                # Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
+                # Passing api_key= causes the SDK to inject Authorization: Bearer,
+                # which Google rejects with HTTP 400 "Multiple authentication
+                # credentials received". Use a placeholder for api_key and pass
+                # the real key via x-goog-api-key header instead.
+                # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
+                extra["default_headers"] = {"x-goog-api-key": api_key}
+                api_key = "not-used"
             return OpenAI(api_key=api_key, base_url=base_url, **extra), model
 
         creds = resolve_api_key_provider_credentials(provider_id)
@@ -766,6 +775,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
             from hermes_cli.models import copilot_default_headers
 
             extra["default_headers"] = copilot_default_headers()
+        elif "generativelanguage.googleapis.com" in base_url.lower():
+            # Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
+            # Passing api_key= causes the SDK to inject Authorization: Bearer,
+            # which Google rejects with HTTP 400 "Multiple authentication
+            # credentials received". Use a placeholder for api_key and pass
+            # the real key via x-goog-api-key header instead.
+            # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
+            extra["default_headers"] = {"x-goog-api-key": api_key}
+            api_key = "not-used"
         return OpenAI(api_key=api_key, base_url=base_url, **extra), model
 
     return None, None
@@ -1611,6 +1629,15 @@ def resolve_provider_client(
             from hermes_cli.models import copilot_default_headers
 
             headers.update(copilot_default_headers())
+        elif "generativelanguage.googleapis.com" in base_url.lower():
+            # Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
+            # Passing api_key= causes the OpenAI SDK to inject Authorization: Bearer,
+            # which Google rejects with HTTP 400 "Multiple authentication credentials
+            # received". Use a placeholder for api_key and pass the real key via
+            # x-goog-api-key header instead.
+            # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
+            headers["x-goog-api-key"] = api_key
+            api_key = "not-used"
 
         client = OpenAI(api_key=api_key, base_url=base_url,
                         **({"default_headers": headers} if headers else {}))
diff --git a/run_agent.py b/run_agent.py
index 010715280ca..e8d23d39cac 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1044,6 +1044,16 @@ class AIAgent:
                     }
                 elif "portal.qwen.ai" in effective_base.lower():
                     client_kwargs["default_headers"] = _qwen_portal_headers()
+                elif "generativelanguage.googleapis.com" in effective_base.lower():
+                    # Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
+                    # The OpenAI SDK auto-injects Authorization: Bearer when api_key= is
+                    # set to a real value, causing HTTP 400 "Multiple authentication
+                    # credentials received".  Pass a placeholder so the SDK does not
+                    # emit Bearer, and carry the real key via x-goog-api-key instead.
+                    # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
+                    real_key = client_kwargs["api_key"]
+                    client_kwargs["api_key"] = "not-used"
+                    client_kwargs["default_headers"] = {"x-goog-api-key": real_key}
             else:
                 # No explicit creds — use the centralized provider router
                 from agent.auxiliary_client import resolve_provider_client
@@ -5102,6 +5112,17 @@ class AIAgent:
             self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
         elif "portal.qwen.ai" in normalized:
             self._client_kwargs["default_headers"] = _qwen_portal_headers()
+        elif "generativelanguage.googleapis.com" in normalized:
+            # Google's endpoint rejects Bearer tokens; use x-goog-api-key instead.
+            # Swap the real key out of api_key and into the header so the OpenAI
+            # SDK does not emit Authorization: Bearer.
+            # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
+            real_key = self._client_kwargs.get("api_key", "")
+            if real_key and real_key != "not-used":
+                self._client_kwargs["api_key"] = "not-used"
+            self._client_kwargs["default_headers"] = {
+                "x-goog-api-key": real_key or self._client_kwargs.get("api_key", ""),
+            }
         else:
             self._client_kwargs.pop("default_headers", None)
 
diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py
index 089a5cf98d1..fd16e825d14 100644
--- a/tests/hermes_cli/test_gemini_provider.py
+++ b/tests/hermes_cli/test_gemini_provider.py
@@ -207,6 +207,58 @@ class TestGeminiAgentInit:
             assert agent.api_mode == "chat_completions"
             assert agent.provider == "gemini"
 
+    def test_gemini_uses_x_goog_api_key_not_bearer(self, monkeypatch):
+        """Regression test for issue #7893.
+
+        When provider=gemini, the OpenAI client must be constructed with
+        api_key='not-used' and default_headers={'x-goog-api-key': real_key}.
+        This prevents the SDK from injecting Authorization: Bearer, which
+        Google's endpoint rejects with HTTP 400.
+        """
+        monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy_REAL_KEY")
+        real_key = "AIzaSy_REAL_KEY"
+        with patch("run_agent.OpenAI") as mock_openai:
+            mock_openai.return_value = MagicMock()
+            from run_agent import AIAgent
+            AIAgent(
+                model="gemini-2.5-flash",
+                provider="gemini",
+                api_key=real_key,
+                base_url="https://generativelanguage.googleapis.com/v1beta/openai",
+            )
+        call_kwargs = mock_openai.call_args[1]
+        # The SDK must NOT receive the real key as api_key (which would emit Bearer)
+        assert call_kwargs.get("api_key") == "not-used", (
+            "api_key must be 'not-used' to suppress Authorization: Bearer for Gemini"
+        )
+        # The real key must be in x-goog-api-key header
+        headers = call_kwargs.get("default_headers", {})
+        assert headers.get("x-goog-api-key") == real_key, (
+            "x-goog-api-key header must carry the real Gemini API key"
+        )
+
+    def test_gemini_resolve_provider_client_auth(self, monkeypatch):
+        """Regression test for issue #7893 — resolve_provider_client path.
+
+        When resolve_provider_client('gemini') is called, the returned OpenAI
+        client must use x-goog-api-key header, not Authorization: Bearer.
+        """
+        monkeypatch.setenv("GEMINI_API_KEY", "AIzaSy_TEST_KEY")
+        real_key = "AIzaSy_TEST_KEY"
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            mock_openai.return_value = MagicMock()
+            mock_openai.return_value.api_key = "not-used"
+            from agent.auxiliary_client import resolve_provider_client
+            resolve_provider_client("gemini")
+        call_kwargs = mock_openai.call_args[1]
+        assert call_kwargs.get("api_key") == "not-used", (
+            "api_key must be 'not-used' to prevent Bearer injection for Gemini"
+        )
+        headers = call_kwargs.get("default_headers", {})
+        assert headers.get("x-goog-api-key") == real_key, (
+            "x-goog-api-key header must carry the real Gemini API key"
+        )
+
 
 # ── models.dev Integration ──
 

From c20e236b7156ad9d882567e36bae7ce3d0d95927 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Fri, 17 Apr 2026 21:27:43 -0700
Subject: [PATCH 008/143] chore: map AviArora02-commits author email in release
 AUTHOR_MAP

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index e8039047ceb..5e909de76ec 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -262,6 +262,7 @@ AUTHOR_MAP = {
     "xiayh17@gmail.com": "xiayh0107",
     "asurla@nvidia.com": "anniesurla",
     "limkuan24@gmail.com": "WideLee",
+    "aviralarora002@gmail.com": "AviArora02-commits",
 }
 
 

From 5ff65dbf68a4f6b0a25cbb3ee618210f7700d322 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:30:34 -0700
Subject: [PATCH 009/143] docs(execute_code): clarify that scripts run in their
 own temp dir, not session CWD (#11956)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Weaker models (Gemma-class) repeatedly rediscover and forget that execute_code's
working directory differs from terminal()/read_file()'s, leading to
os.path.exists('.env') returning False even though the file exists in the
session's CWD. They then bounce between 'the file exists' and 'the file is
missing' across tool calls.

Adds a 'Working directory' note to the execute_code schema description
pointing agents at absolute paths (os.path.expanduser) or terminal()/read_file()
for inspecting user files.

Carefully avoids the 'sandbox'/'isolated'/'cloud' language that commit
39b83f34 removed (it caused agents on local backends to refuse networking
tasks and save false sandbox beliefs to persistent memory). Purely factual
CWD guidance — no restriction implications.
---
 tools/code_execution_tool.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index 3e7e3f925b9..8268024fc72 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -1367,6 +1367,8 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
         f"{tool_lines}\n\n"
         "Limits: 5-minute timeout, 50KB stdout cap, max 50 tool calls per script. "
         "terminal() is foreground-only (no background or pty).\n\n"
+        "Scripts run in their own temp dir, not the session's CWD — use absolute paths "
+        "(os.path.expanduser('~/.hermes/.env')) or terminal()/read_file() for user files.\n\n"
         "Print your final result to stdout. Use Python stdlib (json, re, math, csv, "
         "datetime, collections, etc.) for processing between tool calls.\n\n"
         "Also available (no import needed — built into hermes_tools):\n"

From 598cba62adb3b722d0bb49512efcead336148b98 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:35:30 -0700
Subject: [PATCH 010/143] test: update stale tests to match current code
 (#11963)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Seven test files were asserting against older function signatures and
behaviors. CI has been red on main because of accumulated test debt
from other PRs; this catches the tests up.

- tests/agent/test_subagent_progress.py: _build_child_progress_callback
  now takes (task_index, goal, parent_agent, task_count=1); update all
  call sites and rewrite tests that assumed the old 'batch-only' relay
  semantics (now relays per-tool AND flushes a summary at BATCH_SIZE).
  Renamed test_thinking_not_relayed_to_gateway → test_thinking_relayed_to_gateway
  since thinking IS now relayed as subagent.thinking.
- tests/tools/test_delegate.py: _build_child_agent now requires
  task_count; add task_count=1 to all 8 call sites.
- tests/cli/test_reasoning_command.py: AIAgent gained _stream_callback;
  stub it on the two test agent helpers that use spec=AIAgent / __new__.
- tests/hermes_cli/test_cmd_update.py: cmd_update now runs npm install
  in repo root + ui-tui/ + web/ and 'npm run build' in web/; assert
  all four subprocess calls in the expected order.
- tests/hermes_cli/test_model_validation.py: dissimilar unknown models
  now return accepted=False (previously True with warning); update
  both affected tests.
- tests/tools/test_registry.py: include feishu_doc_tool and
  feishu_drive_tool in the expected builtin tool set.
- tests/gateway/test_voice_command.py: missing-voice-deps message now
  suggests 'pip install PyNaCl' not 'hermes-agent[messaging]'.

411/411 pass locally across these 7 files.
---
 tests/agent/test_subagent_progress.py     | 107 ++++++++++++----------
 tests/cli/test_reasoning_command.py       |   2 +
 tests/gateway/test_voice_command.py       |   2 +-
 tests/hermes_cli/test_cmd_update.py       |  38 ++++----
 tests/hermes_cli/test_model_validation.py |   8 +-
 tests/tools/test_delegate.py              |   8 ++
 tests/tools/test_registry.py              |   2 +
 7 files changed, 94 insertions(+), 73 deletions(-)

diff --git a/tests/agent/test_subagent_progress.py b/tests/agent/test_subagent_progress.py
index 99375d6bd6a..88b2e379026 100644
--- a/tests/agent/test_subagent_progress.py
+++ b/tests/agent/test_subagent_progress.py
@@ -79,7 +79,7 @@ class TestBuildChildProgressCallback:
         parent._delegate_spinner = None
         parent.tool_progress_callback = None
         
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
         assert cb is None
 
     def test_cli_spinner_tool_event(self):
@@ -93,7 +93,7 @@ class TestBuildChildProgressCallback:
         parent._delegate_spinner = spinner
         parent.tool_progress_callback = None
         
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
         assert cb is not None
         
         cb("tool.started", "web_search", "quantum computing", {})
@@ -113,7 +113,7 @@ class TestBuildChildProgressCallback:
         parent._delegate_spinner = spinner
         parent.tool_progress_callback = None
         
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
         cb("_thinking", "I'll search for papers first")
         
         output = buf.getvalue()
@@ -121,54 +121,64 @@ class TestBuildChildProgressCallback:
         assert "search for papers" in output
 
     def test_gateway_batched_progress(self):
-        """Gateway path should batch tool calls and flush at BATCH_SIZE."""
+        """Gateway path: each tool.started relays a subagent.tool event, and a
+        subagent.progress summary fires once BATCH_SIZE tools accumulate."""
         parent = MagicMock()
         parent._delegate_spinner = None
         parent_cb = MagicMock()
         parent.tool_progress_callback = parent_cb
-        
-        cb = _build_child_progress_callback(0, parent)
-        
-        # Send 4 tool calls — shouldn't flush yet (BATCH_SIZE = 5)
+
+        cb = _build_child_progress_callback(0, "test goal", parent)
+
+        # Each tool.started relays a subagent.tool event immediately (per-tool relay).
         for i in range(4):
             cb("tool.started", f"tool_{i}", f"arg_{i}", {})
-        parent_cb.assert_not_called()
-        
-        # 5th call should trigger flush
-        cb("tool.started", "tool_4", "arg_4", {})
-        parent_cb.assert_called_once()
-        call_args = parent_cb.call_args
-        assert "tool_0" in call_args[0][1]
-        assert "tool_4" in call_args[0][1]
+        # 4 per-tool relays so far, no batch summary yet (BATCH_SIZE=5)
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events == ["subagent.tool"] * 4
 
-    def test_thinking_not_relayed_to_gateway(self):
-        """Thinking events should NOT be sent to gateway (too noisy)."""
+        # 5th call triggers another per-tool relay PLUS the batch-size summary
+        cb("tool.started", "tool_4", "arg_4", {})
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events == ["subagent.tool"] * 5 + ["subagent.progress"]
+        summary_call = parent_cb.call_args_list[-1]
+        summary_text = summary_call.kwargs.get("preview") or summary_call.args[2]
+        assert "tool_0" in summary_text
+        assert "tool_4" in summary_text
+
+    def test_thinking_relayed_to_gateway(self):
+        """Thinking events are relayed as subagent.thinking events."""
         parent = MagicMock()
         parent._delegate_spinner = None
         parent_cb = MagicMock()
         parent.tool_progress_callback = parent_cb
-        
-        cb = _build_child_progress_callback(0, parent)
+
+        cb = _build_child_progress_callback(0, "test goal", parent)
         cb("_thinking", "some reasoning text")
-        
-        parent_cb.assert_not_called()
+
+        parent_cb.assert_called_once()
+        assert parent_cb.call_args.args[0] == "subagent.thinking"
+        assert parent_cb.call_args.args[2] == "some reasoning text"
 
     def test_parallel_callbacks_independent(self):
-        """Each child's callback should have independent batch state."""
+        """Each child's callback batches tool names independently."""
         parent = MagicMock()
         parent._delegate_spinner = None
         parent_cb = MagicMock()
         parent.tool_progress_callback = parent_cb
-        
-        cb0 = _build_child_progress_callback(0, parent)
-        cb1 = _build_child_progress_callback(1, parent)
-        
-        # Send 3 calls to each — neither should flush (batch size = 5)
+
+        cb0 = _build_child_progress_callback(0, "goal a", parent)
+        cb1 = _build_child_progress_callback(1, "goal b", parent)
+
+        # 3 tool.started per child = 6 per-tool relays; neither should hit
+        # the batch-size summary (batch size = 5, counted per-child).
         for i in range(3):
-            cb0(f"tool_{i}")
-            cb1(f"other_{i}")
-        
-        parent_cb.assert_not_called()
+            cb0("tool.started", f"tool_{i}", f"a_{i}", {})
+            cb1("tool.started", f"other_{i}", f"b_{i}", {})
+
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events.count("subagent.tool") == 6
+        assert "subagent.progress" not in events
 
     def test_task_index_prefix_in_batch_mode(self):
         """Batch mode (task_count > 1) should show 1-indexed prefix for all tasks."""
@@ -182,7 +192,7 @@ class TestBuildChildProgressCallback:
         parent.tool_progress_callback = None
         
         # task_index=0 in a batch of 3 → prefix "[1]"
-        cb0 = _build_child_progress_callback(0, parent, task_count=3)
+        cb0 = _build_child_progress_callback(0, "test goal", parent, task_count=3)
         cb0("web_search", "test")
         output = buf.getvalue()
         assert "[1]" in output
@@ -190,7 +200,7 @@ class TestBuildChildProgressCallback:
         # task_index=2 in a batch of 3 → prefix "[3]"
         buf.truncate(0)
         buf.seek(0)
-        cb2 = _build_child_progress_callback(2, parent, task_count=3)
+        cb2 = _build_child_progress_callback(2, "test goal", parent, task_count=3)
         cb2("web_search", "test")
         output = buf.getvalue()
         assert "[3]" in output
@@ -206,7 +216,7 @@ class TestBuildChildProgressCallback:
         parent._delegate_spinner = spinner
         parent.tool_progress_callback = None
         
-        cb = _build_child_progress_callback(0, parent, task_count=1)
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
         cb("tool.started", "web_search", "test", {})
         
         output = buf.getvalue()
@@ -321,26 +331,31 @@ class TestBatchFlush:
     """Tests for gateway batch flush on subagent completion."""
 
     def test_flush_sends_remaining_batch(self):
-        """_flush should send remaining tool names to gateway."""
+        """_flush should send a final subagent.progress summary of any unsent
+        tool names in the batch (less than BATCH_SIZE)."""
         parent = MagicMock()
         parent._delegate_spinner = None
         parent_cb = MagicMock()
         parent.tool_progress_callback = parent_cb
 
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
 
-        # Send 3 tools (below batch size of 5)
+        # Send 3 tools (below batch size of 5) — each relays subagent.tool
         cb("tool.started", "web_search", "query1", {})
         cb("tool.started", "read_file", "file.txt", {})
         cb("tool.started", "write_file", "out.txt", {})
-        parent_cb.assert_not_called()
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events == ["subagent.tool"] * 3  # per-tool relays so far
+        assert "subagent.progress" not in events  # no batch-size summary yet
 
-        # Flush should send the remaining 3
+        # Flush should send the remaining 3 as a summary
         cb._flush()
-        parent_cb.assert_called_once()
-        summary = parent_cb.call_args[0][1]
-        assert "web_search" in summary
-        assert "write_file" in summary
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events[-1] == "subagent.progress"
+        summary_call = parent_cb.call_args_list[-1]
+        summary_text = summary_call.kwargs.get("preview") or summary_call.args[2]
+        assert "web_search" in summary_text
+        assert "write_file" in summary_text
 
     def test_flush_noop_when_batch_empty(self):
         """_flush should not send anything when batch is empty."""
@@ -349,7 +364,7 @@ class TestBatchFlush:
         parent_cb = MagicMock()
         parent.tool_progress_callback = parent_cb
 
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
         cb._flush()
         parent_cb.assert_not_called()
 
@@ -364,7 +379,7 @@ class TestBatchFlush:
         parent._delegate_spinner = spinner
         parent.tool_progress_callback = None
 
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
         cb("tool.started", "web_search", "test", {})
         cb._flush()  # Should not crash
 
diff --git a/tests/cli/test_reasoning_command.py b/tests/cli/test_reasoning_command.py
index 554cb6f96bc..228d2904b16 100644
--- a/tests/cli/test_reasoning_command.py
+++ b/tests/cli/test_reasoning_command.py
@@ -473,6 +473,7 @@ class TestInlineThinkBlockExtraction(unittest.TestCase):
         agent.verbose_logging = False
         agent.reasoning_callback = None
         agent.stream_delta_callback = None  # non-streaming by default
+        agent._stream_callback = None  # non-streaming by default
         return agent
 
     def test_single_think_block_extracted(self):
@@ -619,6 +620,7 @@ class TestReasoningDeltasFiredFlag(unittest.TestCase):
         agent = AIAgent.__new__(AIAgent)
         agent.reasoning_callback = None
         agent.stream_delta_callback = None
+        agent._stream_callback = None
         agent.verbose_logging = False
         return agent
 
diff --git a/tests/gateway/test_voice_command.py b/tests/gateway/test_voice_command.py
index f0c3171d6e7..f25fb972e44 100644
--- a/tests/gateway/test_voice_command.py
+++ b/tests/gateway/test_voice_command.py
@@ -758,7 +758,7 @@ class TestVoiceChannelCommands:
         result = await runner._handle_voice_channel_join(event)
 
         assert "voice dependencies are missing" in result.lower()
-        assert "hermes-agent[messaging]" in result
+        assert "PyNaCl" in result
 
     # -- _handle_voice_channel_leave --
 
diff --git a/tests/hermes_cli/test_cmd_update.py b/tests/hermes_cli/test_cmd_update.py
index c8f284228bd..1e6a2245b2d 100644
--- a/tests/hermes_cli/test_cmd_update.py
+++ b/tests/hermes_cli/test_cmd_update.py
@@ -124,29 +124,23 @@ class TestCmdUpdateBranchFallback:
             if call.args and call.args[0][0] == "/usr/bin/npm"
         ]
 
+        # cmd_update runs npm commands in three locations:
+        #   1. repo root  — slash-command / TUI bridge deps
+        #   2. ui-tui/    — Ink TUI deps
+        #   3. web/       — install + "npm run build" for the web frontend
+        full_flags = [
+            "/usr/bin/npm",
+            "install",
+            "--silent",
+            "--no-fund",
+            "--no-audit",
+            "--progress=false",
+        ]
         assert npm_calls == [
-            (
-                [
-                    "/usr/bin/npm",
-                    "install",
-                    "--silent",
-                    "--no-fund",
-                    "--no-audit",
-                    "--progress=false",
-                ],
-                PROJECT_ROOT,
-            ),
-            (
-                [
-                    "/usr/bin/npm",
-                    "install",
-                    "--silent",
-                    "--no-fund",
-                    "--no-audit",
-                    "--progress=false",
-                ],
-                PROJECT_ROOT / "ui-tui",
-            ),
+            (full_flags, PROJECT_ROOT),
+            (full_flags, PROJECT_ROOT / "ui-tui"),
+            (["/usr/bin/npm", "install", "--silent"], PROJECT_ROOT / "web"),
+            (["/usr/bin/npm", "run", "build"], PROJECT_ROOT / "web"),
         ]
 
     def test_update_non_interactive_skips_migration_prompt(self, mock_args, capsys):
diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py
index cbd41216622..1ddf6ab6399 100644
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@@ -450,9 +450,9 @@ class TestValidateApiNotFound:
         assert result["recognized"] is True
 
     def test_dissimilar_model_shows_suggestions_not_autocorrect(self):
-        """Models too different for auto-correction still get suggestions."""
+        """Models too different for auto-correction are rejected with suggestions."""
         result = _validate("anthropic/claude-nonexistent")
-        assert result["accepted"] is True
+        assert result["accepted"] is False
         assert result.get("corrected_model") is None
         assert "not found" in result["message"]
 
@@ -532,11 +532,11 @@ class TestValidateCodexAutoCorrection:
         assert result["message"] is None
 
     def test_very_different_name_falls_to_suggestions(self):
-        """Names too different for auto-correction get the suggestion list."""
+        """Names too different for auto-correction are rejected with a suggestion list."""
         codex_models = ["gpt-5.4-mini", "gpt-5.4", "gpt-5.3-codex"]
         with patch("hermes_cli.models.provider_model_ids", return_value=codex_models):
             result = validate_requested_model("totally-wrong", "openai-codex")
-        assert result["accepted"] is True
+        assert result["accepted"] is False
         assert result["recognized"] is False
         assert result.get("corrected_model") is None
         assert "not found" in result["message"]
diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py
index 3299b927e56..e1e119d9199 100644
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -274,6 +274,7 @@ class TestDelegateTask(unittest.TestCase):
                 model=None,
                 max_iterations=10,
                 parent_agent=parent,
+                task_count=1,
             )
 
         self.assertIs(mock_child._print_fn, sink)
@@ -294,6 +295,7 @@ class TestDelegateTask(unittest.TestCase):
                 model=None,
                 max_iterations=10,
                 parent_agent=parent,
+                task_count=1,
             )
 
         self.assertTrue(callable(mock_child.thinking_callback))
@@ -363,6 +365,7 @@ class TestToolNamePreservation(unittest.TestCase):
                     model=None,
                     max_iterations=10,
                     parent_agent=parent,
+                    task_count=1,
                 )
             except NameError as exc:
                 self.fail(
@@ -1000,6 +1003,7 @@ class TestChildCredentialPoolResolution(unittest.TestCase):
                 model=None,
                 max_iterations=10,
                 parent_agent=parent,
+                task_count=1,
             )
 
             self.assertEqual(mock_child._credential_pool, mock_pool)
@@ -1225,6 +1229,7 @@ class TestDelegationReasoningEffort(unittest.TestCase):
         _build_child_agent(
             task_index=0, goal="test", context=None, toolsets=None,
             model=None, max_iterations=50, parent_agent=parent,
+            task_count=1,
         )
         call_kwargs = MockAgent.call_args[1]
         self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "xhigh"})
@@ -1241,6 +1246,7 @@ class TestDelegationReasoningEffort(unittest.TestCase):
         _build_child_agent(
             task_index=0, goal="test", context=None, toolsets=None,
             model=None, max_iterations=50, parent_agent=parent,
+            task_count=1,
         )
         call_kwargs = MockAgent.call_args[1]
         self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "low"})
@@ -1257,6 +1263,7 @@ class TestDelegationReasoningEffort(unittest.TestCase):
         _build_child_agent(
             task_index=0, goal="test", context=None, toolsets=None,
             model=None, max_iterations=50, parent_agent=parent,
+            task_count=1,
         )
         call_kwargs = MockAgent.call_args[1]
         self.assertEqual(call_kwargs["reasoning_config"], {"enabled": False})
@@ -1273,6 +1280,7 @@ class TestDelegationReasoningEffort(unittest.TestCase):
         _build_child_agent(
             task_index=0, goal="test", context=None, toolsets=None,
             model=None, max_iterations=50, parent_agent=parent,
+            task_count=1,
         )
         call_kwargs = MockAgent.call_args[1]
         self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "medium"})
diff --git a/tests/tools/test_registry.py b/tests/tools/test_registry.py
index 85246bd7609..eb895e55a1a 100644
--- a/tests/tools/test_registry.py
+++ b/tests/tools/test_registry.py
@@ -296,6 +296,8 @@ class TestBuiltinDiscovery:
             "tools.code_execution_tool",
             "tools.cronjob_tools",
             "tools.delegate_tool",
+            "tools.feishu_doc_tool",
+            "tools.feishu_drive_tool",
             "tools.file_tools",
             "tools.homeassistant_tool",
             "tools.image_generation_tool",

From 73bccc94c7af3a07b4002c2a14a4b54f844bd561 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:36:40 -0700
Subject: [PATCH 011/143] =?UTF-8?q?skills:=20consolidate=20mlops=20redunda?=
 =?UTF-8?q?ncies=20(gguf+llama-cpp,=20grpo+trl,=20guidance=E2=86=92optiona?=
 =?UTF-8?q?l)=20(#11965)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three tightly-scoped built-in skill consolidations to reduce redundancy in
the available_skills listing injected into every system prompt:

1. gguf-quantization → llama-cpp (merged)
   GGUF is llama.cpp's format; two skills covered the same toolchain. The
   merged llama-cpp skill keeps the full K-quant table + imatrix workflow
   from gguf and the ROCm/benchmarks/supported-models sections from the
   original llama-cpp. All 5 reference files preserved.

2. grpo-rl-training → fine-tuning-with-trl (folded in)
   GRPO isn't a framework, it's a trainer inside TRL. Moved the 17KB
   deep-dive SKILL.md to references/grpo-training.md and the working
   template to templates/basic_grpo_training.py. TRL's GRPO workflow
   section now points to both. Atropos skill's related_skills updated.

3. guidance → optional-skills/mlops/
   Dropped from built-in. Outlines (still built-in) covers the same
   structured-generation ground with wider adoption. Listed in the
   optional catalog for users who specifically want Guidance.

Net: 3 fewer built-in skill lines in every system prompt, zero content
loss. Contributor authorship preserved via git rename detection.
---
 .../mlops}/guidance/SKILL.md                  |   0
 .../mlops}/guidance/references/backends.md    |   0
 .../mlops}/guidance/references/constraints.md |   0
 .../mlops}/guidance/references/examples.md    |   0
 .../hermes-atropos-environments/SKILL.md      |   2 +-
 skills/mlops/inference/gguf/SKILL.md          | 430 ---------------
 skills/mlops/inference/llama-cpp/SKILL.md     | 491 ++++++++++++------
 .../references/advanced-usage.md              |   0
 .../references/troubleshooting.md             |   0
 .../mlops/training/grpo-rl-training/README.md |  97 ----
 .../mlops/training/trl-fine-tuning/SKILL.md   |   4 +
 .../references/grpo-training.md}              | 329 +++++-------
 .../templates/basic_grpo_training.py          |   0
 .../docs/reference/optional-skills-catalog.md |   1 +
 website/docs/reference/skills-catalog.md      |   5 +-
 15 files changed, 470 insertions(+), 889 deletions(-)
 rename {skills/mlops/inference => optional-skills/mlops}/guidance/SKILL.md (100%)
 rename {skills/mlops/inference => optional-skills/mlops}/guidance/references/backends.md (100%)
 rename {skills/mlops/inference => optional-skills/mlops}/guidance/references/constraints.md (100%)
 rename {skills/mlops/inference => optional-skills/mlops}/guidance/references/examples.md (100%)
 delete mode 100644 skills/mlops/inference/gguf/SKILL.md
 rename skills/mlops/inference/{gguf => llama-cpp}/references/advanced-usage.md (100%)
 rename skills/mlops/inference/{gguf => llama-cpp}/references/troubleshooting.md (100%)
 delete mode 100644 skills/mlops/training/grpo-rl-training/README.md
 rename skills/mlops/training/{grpo-rl-training/SKILL.md => trl-fine-tuning/references/grpo-training.md} (56%)
 rename skills/mlops/training/{grpo-rl-training => trl-fine-tuning}/templates/basic_grpo_training.py (100%)

diff --git a/skills/mlops/inference/guidance/SKILL.md b/optional-skills/mlops/guidance/SKILL.md
similarity index 100%
rename from skills/mlops/inference/guidance/SKILL.md
rename to optional-skills/mlops/guidance/SKILL.md
diff --git a/skills/mlops/inference/guidance/references/backends.md b/optional-skills/mlops/guidance/references/backends.md
similarity index 100%
rename from skills/mlops/inference/guidance/references/backends.md
rename to optional-skills/mlops/guidance/references/backends.md
diff --git a/skills/mlops/inference/guidance/references/constraints.md b/optional-skills/mlops/guidance/references/constraints.md
similarity index 100%
rename from skills/mlops/inference/guidance/references/constraints.md
rename to optional-skills/mlops/guidance/references/constraints.md
diff --git a/skills/mlops/inference/guidance/references/examples.md b/optional-skills/mlops/guidance/references/examples.md
similarity index 100%
rename from skills/mlops/inference/guidance/references/examples.md
rename to optional-skills/mlops/guidance/references/examples.md
diff --git a/optional-skills/mlops/hermes-atropos-environments/SKILL.md b/optional-skills/mlops/hermes-atropos-environments/SKILL.md
index 9dff4668767..5101886b41a 100644
--- a/optional-skills/mlops/hermes-atropos-environments/SKILL.md
+++ b/optional-skills/mlops/hermes-atropos-environments/SKILL.md
@@ -7,7 +7,7 @@ license: MIT
 metadata:
   hermes:
     tags: [atropos, rl, environments, training, reinforcement-learning, reward-functions]
-    related_skills: [axolotl, grpo-rl-training, trl-fine-tuning, lm-evaluation-harness]
+    related_skills: [axolotl, fine-tuning-with-trl, lm-evaluation-harness]
 ---
 
 # Hermes Agent Atropos Environments
diff --git a/skills/mlops/inference/gguf/SKILL.md b/skills/mlops/inference/gguf/SKILL.md
deleted file mode 100644
index 21bb176c8f9..00000000000
--- a/skills/mlops/inference/gguf/SKILL.md
+++ /dev/null
@@ -1,430 +0,0 @@
----
-name: gguf-quantization
-description: GGUF format and llama.cpp quantization for efficient CPU/GPU inference. Use when deploying models on consumer hardware, Apple Silicon, or when needing flexible quantization from 2-8 bit without GPU requirements.
-version: 1.0.0
-author: Orchestra Research
-license: MIT
-dependencies: [llama-cpp-python>=0.2.0]
-metadata:
-  hermes:
-    tags: [GGUF, Quantization, llama.cpp, CPU Inference, Apple Silicon, Model Compression, Optimization]
-
----
-
-# GGUF - Quantization Format for llama.cpp
-
-The GGUF (GPT-Generated Unified Format) is the standard file format for llama.cpp, enabling efficient inference on CPUs, Apple Silicon, and GPUs with flexible quantization options.
-
-## When to use GGUF
-
-**Use GGUF when:**
-- Deploying on consumer hardware (laptops, desktops)
-- Running on Apple Silicon (M1/M2/M3) with Metal acceleration
-- Need CPU inference without GPU requirements
-- Want flexible quantization (Q2_K to Q8_0)
-- Using local AI tools (LM Studio, Ollama, text-generation-webui)
-
-**Key advantages:**
-- **Universal hardware**: CPU, Apple Silicon, NVIDIA, AMD support
-- **No Python runtime**: Pure C/C++ inference
-- **Flexible quantization**: 2-8 bit with various methods (K-quants)
-- **Ecosystem support**: LM Studio, Ollama, koboldcpp, and more
-- **imatrix**: Importance matrix for better low-bit quality
-
-**Use alternatives instead:**
-- **AWQ/GPTQ**: Maximum accuracy with calibration on NVIDIA GPUs
-- **HQQ**: Fast calibration-free quantization for HuggingFace
-- **bitsandbytes**: Simple integration with transformers library
-- **TensorRT-LLM**: Production NVIDIA deployment with maximum speed
-
-## Quick start
-
-### Installation
-
-```bash
-# Clone llama.cpp
-git clone https://github.com/ggml-org/llama.cpp
-cd llama.cpp
-
-# Build (CPU)
-make
-
-# Build with CUDA (NVIDIA)
-make GGML_CUDA=1
-
-# Build with Metal (Apple Silicon)
-make GGML_METAL=1
-
-# Install Python bindings (optional)
-pip install llama-cpp-python
-```
-
-### Convert model to GGUF
-
-```bash
-# Install requirements
-pip install -r requirements.txt
-
-# Convert HuggingFace model to GGUF (FP16)
-python convert_hf_to_gguf.py ./path/to/model --outfile model-f16.gguf
-
-# Or specify output type
-python convert_hf_to_gguf.py ./path/to/model \
-    --outfile model-f16.gguf \
-    --outtype f16
-```
-
-### Quantize model
-
-```bash
-# Basic quantization to Q4_K_M
-./llama-quantize model-f16.gguf model-q4_k_m.gguf Q4_K_M
-
-# Quantize with importance matrix (better quality)
-./llama-imatrix -m model-f16.gguf -f calibration.txt -o model.imatrix
-./llama-quantize --imatrix model.imatrix model-f16.gguf model-q4_k_m.gguf Q4_K_M
-```
-
-### Run inference
-
-```bash
-# CLI inference
-./llama-cli -m model-q4_k_m.gguf -p "Hello, how are you?"
-
-# Interactive mode
-./llama-cli -m model-q4_k_m.gguf --interactive
-
-# With GPU offload
-./llama-cli -m model-q4_k_m.gguf -ngl 35 -p "Hello!"
-```
-
-## Quantization types
-
-### K-quant methods (recommended)
-
-| Type | Bits | Size (7B) | Quality | Use Case |
-|------|------|-----------|---------|----------|
-| Q2_K | 2.5 | ~2.8 GB | Low | Extreme compression |
-| Q3_K_S | 3.0 | ~3.0 GB | Low-Med | Memory constrained |
-| Q3_K_M | 3.3 | ~3.3 GB | Medium | Balance |
-| Q4_K_S | 4.0 | ~3.8 GB | Med-High | Good balance |
-| Q4_K_M | 4.5 | ~4.1 GB | High | **Recommended default** |
-| Q5_K_S | 5.0 | ~4.6 GB | High | Quality focused |
-| Q5_K_M | 5.5 | ~4.8 GB | Very High | High quality |
-| Q6_K | 6.0 | ~5.5 GB | Excellent | Near-original |
-| Q8_0 | 8.0 | ~7.2 GB | Best | Maximum quality |
-
-### Legacy methods
-
-| Type | Description |
-|------|-------------|
-| Q4_0 | 4-bit, basic |
-| Q4_1 | 4-bit with delta |
-| Q5_0 | 5-bit, basic |
-| Q5_1 | 5-bit with delta |
-
-**Recommendation**: Use K-quant methods (Q4_K_M, Q5_K_M) for best quality/size ratio.
-
-## Conversion workflows
-
-### Workflow 1: HuggingFace to GGUF
-
-```bash
-# 1. Download model
-huggingface-cli download meta-llama/Llama-3.1-8B --local-dir ./llama-3.1-8b
-
-# 2. Convert to GGUF (FP16)
-python convert_hf_to_gguf.py ./llama-3.1-8b \
-    --outfile llama-3.1-8b-f16.gguf \
-    --outtype f16
-
-# 3. Quantize
-./llama-quantize llama-3.1-8b-f16.gguf llama-3.1-8b-q4_k_m.gguf Q4_K_M
-
-# 4. Test
-./llama-cli -m llama-3.1-8b-q4_k_m.gguf -p "Hello!" -n 50
-```
-
-### Workflow 2: With importance matrix (better quality)
-
-```bash
-# 1. Convert to GGUF
-python convert_hf_to_gguf.py ./model --outfile model-f16.gguf
-
-# 2. Create calibration text (diverse samples)
-cat > calibration.txt << 'EOF'
-The quick brown fox jumps over the lazy dog.
-Machine learning is a subset of artificial intelligence.
-Python is a popular programming language.
-# Add more diverse text samples...
-EOF
-
-# 3. Generate importance matrix
-./llama-imatrix -m model-f16.gguf \
-    -f calibration.txt \
-    --chunk 512 \
-    -o model.imatrix \
-    -ngl 35  # GPU layers if available
-
-# 4. Quantize with imatrix
-./llama-quantize --imatrix model.imatrix \
-    model-f16.gguf \
-    model-q4_k_m.gguf \
-    Q4_K_M
-```
-
-### Workflow 3: Multiple quantizations
-
-```bash
-#!/bin/bash
-MODEL="llama-3.1-8b-f16.gguf"
-IMATRIX="llama-3.1-8b.imatrix"
-
-# Generate imatrix once
-./llama-imatrix -m $MODEL -f wiki.txt -o $IMATRIX -ngl 35
-
-# Create multiple quantizations
-for QUANT in Q4_K_M Q5_K_M Q6_K Q8_0; do
-    OUTPUT="llama-3.1-8b-${QUANT,,}.gguf"
-    ./llama-quantize --imatrix $IMATRIX $MODEL $OUTPUT $QUANT
-    echo "Created: $OUTPUT ($(du -h $OUTPUT | cut -f1))"
-done
-```
-
-## Python usage
-
-### llama-cpp-python
-
-```python
-from llama_cpp import Llama
-
-# Load model
-llm = Llama(
-    model_path="./model-q4_k_m.gguf",
-    n_ctx=4096,          # Context window
-    n_gpu_layers=35,     # GPU offload (0 for CPU only)
-    n_threads=8          # CPU threads
-)
-
-# Generate
-output = llm(
-    "What is machine learning?",
-    max_tokens=256,
-    temperature=0.7,
-    stop=["</s>", "\n\n"]
-)
-print(output["choices"][0]["text"])
-```
-
-### Chat completion
-
-```python
-from llama_cpp import Llama
-
-llm = Llama(
-    model_path="./model-q4_k_m.gguf",
-    n_ctx=4096,
-    n_gpu_layers=35,
-    chat_format="llama-3"  # Or "chatml", "mistral", etc.
-)
-
-messages = [
-    {"role": "system", "content": "You are a helpful assistant."},
-    {"role": "user", "content": "What is Python?"}
-]
-
-response = llm.create_chat_completion(
-    messages=messages,
-    max_tokens=256,
-    temperature=0.7
-)
-print(response["choices"][0]["message"]["content"])
-```
-
-### Streaming
-
-```python
-from llama_cpp import Llama
-
-llm = Llama(model_path="./model-q4_k_m.gguf", n_gpu_layers=35)
-
-# Stream tokens
-for chunk in llm(
-    "Explain quantum computing:",
-    max_tokens=256,
-    stream=True
-):
-    print(chunk["choices"][0]["text"], end="", flush=True)
-```
-
-## Server mode
-
-### Start OpenAI-compatible server
-
-```bash
-# Start server
-./llama-server -m model-q4_k_m.gguf \
-    --host 0.0.0.0 \
-    --port 8080 \
-    -ngl 35 \
-    -c 4096
-
-# Or with Python bindings
-python -m llama_cpp.server \
-    --model model-q4_k_m.gguf \
-    --n_gpu_layers 35 \
-    --host 0.0.0.0 \
-    --port 8080
-```
-
-### Use with OpenAI client
-
-```python
-from openai import OpenAI
-
-client = OpenAI(
-    base_url="http://localhost:8080/v1",
-    api_key="not-needed"
-)
-
-response = client.chat.completions.create(
-    model="local-model",
-    messages=[{"role": "user", "content": "Hello!"}],
-    max_tokens=256
-)
-print(response.choices[0].message.content)
-```
-
-## Hardware optimization
-
-### Apple Silicon (Metal)
-
-```bash
-# Build with Metal
-make clean && make GGML_METAL=1
-
-# Run with Metal acceleration
-./llama-cli -m model.gguf -ngl 99 -p "Hello"
-
-# Python with Metal
-llm = Llama(
-    model_path="model.gguf",
-    n_gpu_layers=99,     # Offload all layers
-    n_threads=1          # Metal handles parallelism
-)
-```
-
-### NVIDIA CUDA
-
-```bash
-# Build with CUDA
-make clean && make GGML_CUDA=1
-
-# Run with CUDA
-./llama-cli -m model.gguf -ngl 35 -p "Hello"
-
-# Specify GPU
-CUDA_VISIBLE_DEVICES=0 ./llama-cli -m model.gguf -ngl 35
-```
-
-### CPU optimization
-
-```bash
-# Build with AVX2/AVX512
-make clean && make
-
-# Run with optimal threads
-./llama-cli -m model.gguf -t 8 -p "Hello"
-
-# Python CPU config
-llm = Llama(
-    model_path="model.gguf",
-    n_gpu_layers=0,      # CPU only
-    n_threads=8,         # Match physical cores
-    n_batch=512          # Batch size for prompt processing
-)
-```
-
-## Integration with tools
-
-### Ollama
-
-```bash
-# Create Modelfile
-cat > Modelfile << 'EOF'
-FROM ./model-q4_k_m.gguf
-TEMPLATE """{{ .System }}
-{{ .Prompt }}"""
-PARAMETER temperature 0.7
-PARAMETER num_ctx 4096
-EOF
-
-# Create Ollama model
-ollama create mymodel -f Modelfile
-
-# Run
-ollama run mymodel "Hello!"
-```
-
-### LM Studio
-
-1. Place GGUF file in `~/.cache/lm-studio/models/`
-2. Open LM Studio and select the model
-3. Configure context length and GPU offload
-4. Start inference
-
-### text-generation-webui
-
-```bash
-# Place in models folder
-cp model-q4_k_m.gguf text-generation-webui/models/
-
-# Start with llama.cpp loader
-python server.py --model model-q4_k_m.gguf --loader llama.cpp --n-gpu-layers 35
-```
-
-## Best practices
-
-1. **Use K-quants**: Q4_K_M offers best quality/size balance
-2. **Use imatrix**: Always use importance matrix for Q4 and below
-3. **GPU offload**: Offload as many layers as VRAM allows
-4. **Context length**: Start with 4096, increase if needed
-5. **Thread count**: Match physical CPU cores, not logical
-6. **Batch size**: Increase n_batch for faster prompt processing
-
-## Common issues
-
-**Model loads slowly:**
-```bash
-# Use mmap for faster loading
-./llama-cli -m model.gguf --mmap
-```
-
-**Out of memory:**
-```bash
-# Reduce GPU layers
-./llama-cli -m model.gguf -ngl 20  # Reduce from 35
-
-# Or use smaller quantization
-./llama-quantize model-f16.gguf model-q3_k_m.gguf Q3_K_M
-```
-
-**Poor quality at low bits:**
-```bash
-# Always use imatrix for Q4 and below
-./llama-imatrix -m model-f16.gguf -f calibration.txt -o model.imatrix
-./llama-quantize --imatrix model.imatrix model-f16.gguf model-q4_k_m.gguf Q4_K_M
-```
-
-## References
-
-- **[Advanced Usage](references/advanced-usage.md)** - Batching, speculative decoding, custom builds
-- **[Troubleshooting](references/troubleshooting.md)** - Common issues, debugging, benchmarks
-
-## Resources
-
-- **Repository**: https://github.com/ggml-org/llama.cpp
-- **Python Bindings**: https://github.com/abetlen/llama-cpp-python
-- **Pre-quantized Models**: https://huggingface.co/TheBloke
-- **GGUF Converter**: https://huggingface.co/spaces/ggml-org/gguf-my-repo
-- **License**: MIT
diff --git a/skills/mlops/inference/llama-cpp/SKILL.md b/skills/mlops/inference/llama-cpp/SKILL.md
index 57016c920df..33fc37adb18 100644
--- a/skills/mlops/inference/llama-cpp/SKILL.md
+++ b/skills/mlops/inference/llama-cpp/SKILL.md
@@ -1,138 +1,271 @@
 ---
 name: llama-cpp
-description: Runs LLM inference on CPU, Apple Silicon, and consumer GPUs without NVIDIA hardware. Use for edge deployment, M1/M2/M3 Macs, AMD/Intel GPUs, or when CUDA is unavailable. Supports GGUF quantization (1.5-8 bit) for reduced memory and 4-10× speedup vs PyTorch on CPU.
-version: 1.0.0
+description: Run LLM inference with llama.cpp on CPU, Apple Silicon, AMD/Intel GPUs, or NVIDIA — plus GGUF model conversion and quantization (2–8 bit with K-quants and imatrix). Covers CLI, Python bindings, OpenAI-compatible server, and Ollama/LM Studio integration. Use for edge deployment, M1/M2/M3/M4 Macs, CUDA-less environments, or flexible local quantization.
+version: 2.0.0
 author: Orchestra Research
 license: MIT
-dependencies: [llama-cpp-python]
+dependencies: [llama-cpp-python>=0.2.0]
 metadata:
   hermes:
-    tags: [Inference Serving, Llama.cpp, CPU Inference, Apple Silicon, Edge Deployment, GGUF, Quantization, Non-NVIDIA, AMD GPUs, Intel GPUs, Embedded]
-
+    tags: [llama.cpp, GGUF, Quantization, CPU Inference, Apple Silicon, Edge Deployment, Non-NVIDIA, AMD GPUs, Intel GPUs, Embedded, Model Compression]
 ---
 
-# llama.cpp
+# llama.cpp + GGUF
 
-Pure C/C++ LLM inference with minimal dependencies, optimized for CPUs and non-NVIDIA hardware.
+Pure C/C++ LLM inference with minimal dependencies, plus the GGUF (GPT-Generated Unified Format) standard used for quantized weights. One toolchain covers conversion, quantization, and serving.
 
-## When to use llama.cpp
+## When to use
 
-**Use llama.cpp when:**
-- Running on CPU-only machines
-- Deploying on Apple Silicon (M1/M2/M3/M4)
-- Using AMD or Intel GPUs (no CUDA)
-- Edge deployment (Raspberry Pi, embedded systems)
-- Need simple deployment without Docker/Python
+**Use llama.cpp + GGUF when:**
+- Running on CPU-only machines or Apple Silicon (M1/M2/M3/M4) with Metal acceleration
+- Using AMD (ROCm) or Intel GPUs where CUDA isn't available
+- Edge deployment (Raspberry Pi, embedded systems, consumer laptops)
+- Need flexible quantization (2–8 bit with K-quants)
+- Want local AI tools (LM Studio, Ollama, text-generation-webui, koboldcpp)
+- Want a single binary deploy without Docker/Python
 
-**Use TensorRT-LLM instead when:**
-- Have NVIDIA GPUs (A100/H100)
-- Need maximum throughput (100K+ tok/s)
-- Running in datacenter with CUDA
+**Key advantages:**
+- Universal hardware: CPU, Apple Silicon, NVIDIA, AMD, Intel
+- No Python runtime required (pure C/C++)
+- K-quants + imatrix for better low-bit quality
+- OpenAI-compatible server built in
+- Rich ecosystem (Ollama, LM Studio, llama-cpp-python)
 
-**Use vLLM instead when:**
-- Have NVIDIA GPUs
-- Need Python-first API
-- Want PagedAttention
+**Use alternatives instead:**
+- **vLLM** — NVIDIA GPUs, PagedAttention, Python-first, max throughput
+- **TensorRT-LLM** — Production NVIDIA (A100/H100), maximum speed
+- **AWQ/GPTQ** — Calibrated quantization for NVIDIA-only deployments
+- **bitsandbytes** — Simple HuggingFace transformers integration
+- **HQQ** — Fast calibration-free quantization
 
 ## Quick start
 
-### Installation
+### Install
 
 ```bash
-# macOS/Linux
+# macOS / Linux (simplest)
 brew install llama.cpp
 
 # Or build from source
-git clone https://github.com/ggerganov/llama.cpp
+git clone https://github.com/ggml-org/llama.cpp
 cd llama.cpp
-make
+make                        # CPU
+make GGML_METAL=1           # Apple Silicon
+make GGML_CUDA=1            # NVIDIA CUDA
+make LLAMA_HIP=1            # AMD ROCm
 
-# With Metal (Apple Silicon)
-make LLAMA_METAL=1
-
-# With CUDA (NVIDIA)
-make LLAMA_CUDA=1
-
-# With ROCm (AMD)
-make LLAMA_HIP=1
+# Python bindings (optional)
+pip install llama-cpp-python
+# With CUDA:   CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
+# With Metal:  CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
 ```
 
-### Download model
+### Download a pre-quantized GGUF
 
 ```bash
-# Download from HuggingFace (GGUF format)
+# TheBloke hosts most popular models pre-quantized
 huggingface-cli download \
     TheBloke/Llama-2-7B-Chat-GGUF \
     llama-2-7b-chat.Q4_K_M.gguf \
     --local-dir models/
+```
 
-# Or convert from HuggingFace
-python convert_hf_to_gguf.py models/llama-2-7b-chat/
+### Or convert a HuggingFace model to GGUF
+
+```bash
+# 1. Download HF model
+huggingface-cli download meta-llama/Llama-3.1-8B --local-dir ./llama-3.1-8b
+
+# 2. Convert to FP16 GGUF
+python convert_hf_to_gguf.py ./llama-3.1-8b \
+    --outfile llama-3.1-8b-f16.gguf \
+    --outtype f16
+
+# 3. Quantize to Q4_K_M
+./llama-quantize llama-3.1-8b-f16.gguf llama-3.1-8b-q4_k_m.gguf Q4_K_M
 ```
 
 ### Run inference
 
 ```bash
-# Simple chat
-./llama-cli \
-    -m models/llama-2-7b-chat.Q4_K_M.gguf \
-    -p "Explain quantum computing" \
-    -n 256  # Max tokens
+# One-shot prompt
+./llama-cli -m model.Q4_K_M.gguf -p "Explain quantum computing" -n 256
 
 # Interactive chat
-./llama-cli \
-    -m models/llama-2-7b-chat.Q4_K_M.gguf \
-    --interactive
+./llama-cli -m model.Q4_K_M.gguf --interactive
+
+# With GPU offload
+./llama-cli -m model.Q4_K_M.gguf -ngl 35 -p "Hello!"
 ```
 
-### Server mode
+### Serve an OpenAI-compatible API
 
 ```bash
-# Start OpenAI-compatible server
 ./llama-server \
-    -m models/llama-2-7b-chat.Q4_K_M.gguf \
+    -m model.Q4_K_M.gguf \
     --host 0.0.0.0 \
     --port 8080 \
-    -ngl 32  # Offload 32 layers to GPU
+    -ngl 35 \
+    -c 4096 \
+    --parallel 4 \
+    --cont-batching
+```
 
-# Client request
+```bash
 curl http://localhost:8080/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "llama-2-7b-chat",
+    "model": "local",
     "messages": [{"role": "user", "content": "Hello!"}],
     "temperature": 0.7,
     "max_tokens": 100
   }'
 ```
 
-## Quantization formats
+## Quantization formats (GGUF)
 
-### GGUF format overview
+### K-quant methods (recommended)
 
-| Format | Bits | Size (7B) | Speed | Quality | Use Case |
-|--------|------|-----------|-------|---------|----------|
-| **Q4_K_M** | 4.5 | 4.1 GB | Fast | Good | **Recommended default** |
-| Q4_K_S | 4.3 | 3.9 GB | Faster | Lower | Speed critical |
-| Q5_K_M | 5.5 | 4.8 GB | Medium | Better | Quality critical |
-| Q6_K | 6.5 | 5.5 GB | Slower | Best | Maximum quality |
-| Q8_0 | 8.0 | 7.0 GB | Slow | Excellent | Minimal degradation |
-| Q2_K | 2.5 | 2.7 GB | Fastest | Poor | Testing only |
+| Type | Bits | Size (7B) | Quality | Use Case |
+|------|------|-----------|---------|----------|
+| Q2_K | 2.5 | ~2.8 GB | Low | Extreme compression (testing only) |
+| Q3_K_S | 3.0 | ~3.0 GB | Low-Med | Memory constrained |
+| Q3_K_M | 3.3 | ~3.3 GB | Medium | Fits small devices |
+| Q4_K_S | 4.0 | ~3.8 GB | Med-High | Speed critical |
+| **Q4_K_M** | 4.5 | ~4.1 GB | High | **Recommended default** |
+| Q5_K_S | 5.0 | ~4.6 GB | High | Quality focused |
+| Q5_K_M | 5.5 | ~4.8 GB | Very High | High quality |
+| Q6_K | 6.0 | ~5.5 GB | Excellent | Near-original |
+| Q8_0 | 8.0 | ~7.2 GB | Best | Maximum quality, minimal degradation |
 
-### Choosing quantization
+**Variant suffixes** — `_S` (Small, faster, lower quality), `_M` (Medium, balanced), `_L` (Large, better quality).
+
+**Legacy (Q4_0/Q4_1/Q5_0/Q5_1) exist** but always prefer K-quants for better quality/size ratio.
+
+**IQ quantization** — ultra-low-bit with importance-aware methods: IQ2_XXS, IQ2_XS, IQ2_S, IQ3_XXS, IQ3_XS, IQ3_S, IQ4_XS. Require `--imatrix`.
+
+**Task-specific defaults:**
+- General chat / assistants: Q4_K_M, or Q5_K_M if RAM allows
+- Code generation: Q5_K_M or Q6_K (higher precision helps)
+- Technical / medical: Q6_K or Q8_0
+- Very large (70B, 405B) on consumer hardware: Q3_K_M or Q4_K_S
+- Raspberry Pi / edge: Q2_K or Q3_K_S
+
+## Conversion workflows
+
+### Basic: HF → GGUF → quantized
 
 ```bash
-# General use (balanced)
-Q4_K_M  # 4-bit, medium quality
+python convert_hf_to_gguf.py ./model --outfile model-f16.gguf --outtype f16
+./llama-quantize model-f16.gguf model-q4_k_m.gguf Q4_K_M
+./llama-cli -m model-q4_k_m.gguf -p "Hello!" -n 50
+```
 
-# Maximum speed (more degradation)
-Q2_K or Q3_K_M
+### With importance matrix (imatrix) — better low-bit quality
 
-# Maximum quality (slower)
-Q6_K or Q8_0
+`imatrix` gives 10–20% perplexity improvement at Q4, essential at Q3 and below.
 
-# Very large models (70B, 405B)
-Q3_K_M or Q4_K_S  # Lower bits to fit in memory
+```bash
+# 1. Convert to FP16 GGUF
+python convert_hf_to_gguf.py ./model --outfile model-f16.gguf
+
+# 2. Prepare calibration data (diverse text, ~100MB is ideal)
+cat > calibration.txt << 'EOF'
+The quick brown fox jumps over the lazy dog.
+Machine learning is a subset of artificial intelligence.
+# Add more diverse text samples...
+EOF
+
+# 3. Generate importance matrix
+./llama-imatrix -m model-f16.gguf \
+    -f calibration.txt \
+    --chunk 512 \
+    -o model.imatrix \
+    -ngl 35
+
+# 4. Quantize with imatrix
+./llama-quantize --imatrix model.imatrix \
+    model-f16.gguf model-q4_k_m.gguf Q4_K_M
+```
+
+### Multi-quant batch
+
+```bash
+#!/bin/bash
+MODEL="llama-3.1-8b-f16.gguf"
+IMATRIX="llama-3.1-8b.imatrix"
+
+./llama-imatrix -m $MODEL -f wiki.txt -o $IMATRIX -ngl 35
+
+for QUANT in Q4_K_M Q5_K_M Q6_K Q8_0; do
+    OUTPUT="llama-3.1-8b-${QUANT,,}.gguf"
+    ./llama-quantize --imatrix $IMATRIX $MODEL $OUTPUT $QUANT
+    echo "Created: $OUTPUT ($(du -h $OUTPUT | cut -f1))"
+done
+```
+
+### Quality testing (perplexity)
+
+```bash
+./llama-perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw -c 512
+# Baseline FP16: ~5.96  |  Q4_K_M: ~6.06 (+1.7%)  |  Q2_K: ~6.87 (+15.3%)
+```
+
+## Python bindings (llama-cpp-python)
+
+### Basic generation
+
+```python
+from llama_cpp import Llama
+
+llm = Llama(
+    model_path="./model-q4_k_m.gguf",
+    n_ctx=4096,
+    n_gpu_layers=35,     # 0 for CPU only, 99 to offload everything
+    n_threads=8,
+)
+
+output = llm(
+    "What is machine learning?",
+    max_tokens=256,
+    temperature=0.7,
+    stop=["</s>", "\n\n"],
+)
+print(output["choices"][0]["text"])
+```
+
+### Chat completion + streaming
+
+```python
+llm = Llama(
+    model_path="./model-q4_k_m.gguf",
+    n_ctx=4096,
+    n_gpu_layers=35,
+    chat_format="llama-3",    # Or "chatml", "mistral", etc.
+)
+
+# Non-streaming
+response = llm.create_chat_completion(
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "What is Python?"},
+    ],
+    max_tokens=256,
+    temperature=0.7,
+)
+print(response["choices"][0]["message"]["content"])
+
+# Streaming
+for chunk in llm("Explain quantum computing:", max_tokens=256, stream=True):
+    print(chunk["choices"][0]["text"], end="", flush=True)
+```
+
+### Embeddings
+
+```python
+llm = Llama(model_path="./model-q4_k_m.gguf", embedding=True, n_gpu_layers=35)
+vec = llm.embed("This is a test sentence.")
+print(f"Embedding dimension: {len(vec)}")
 ```
 
 ## Hardware acceleration
@@ -140,122 +273,166 @@ Q3_K_M or Q4_K_S  # Lower bits to fit in memory
 ### Apple Silicon (Metal)
 
 ```bash
-# Build with Metal
-make LLAMA_METAL=1
-
-# Run with GPU acceleration (automatic)
-./llama-cli -m model.gguf -ngl 999  # Offload all layers
-
-# Performance: M3 Max 40-60 tokens/sec (Llama 2-7B Q4_K_M)
+make clean && make GGML_METAL=1
+./llama-cli -m model.gguf -ngl 99 -p "Hello"   # offload all layers
 ```
 
-### NVIDIA GPUs (CUDA)
-
-```bash
-# Build with CUDA
-make LLAMA_CUDA=1
-
-# Offload layers to GPU
-./llama-cli -m model.gguf -ngl 35  # Offload 35/40 layers
-
-# Hybrid CPU+GPU for large models
-./llama-cli -m llama-70b.Q4_K_M.gguf -ngl 20  # GPU: 20 layers, CPU: rest
+```python
+llm = Llama(
+    model_path="model.gguf",
+    n_gpu_layers=99,     # Offload everything
+    n_threads=1,         # Metal handles parallelism
+)
 ```
 
-### AMD GPUs (ROCm)
+Performance: M3 Max ~40–60 tok/s on Llama 2-7B Q4_K_M.
+
+### NVIDIA (CUDA)
+
+```bash
+make clean && make GGML_CUDA=1
+./llama-cli -m model.gguf -ngl 35 -p "Hello"
+
+# Hybrid for large models
+./llama-cli -m llama-70b.Q4_K_M.gguf -ngl 20   # GPU: 20 layers, CPU: rest
+
+# Multi-GPU split
+./llama-cli -m large-model.gguf --tensor-split 0.5,0.5 -ngl 60
+```
+
+### AMD (ROCm)
 
 ```bash
-# Build with ROCm
 make LLAMA_HIP=1
-
-# Run with AMD GPU
 ./llama-cli -m model.gguf -ngl 999
 ```
 
-## Common patterns
-
-### Batch processing
+### CPU
 
 ```bash
-# Process multiple prompts from file
-cat prompts.txt | ./llama-cli \
-    -m model.gguf \
-    --batch-size 512 \
-    -n 100
+# Match PHYSICAL cores, not logical
+./llama-cli -m model.gguf -t 8 -p "Hello"
+
+# BLAS acceleration (2–3× speedup)
+make LLAMA_OPENBLAS=1
 ```
 
-### Constrained generation
-
-```bash
-# JSON output with grammar
-./llama-cli \
-    -m model.gguf \
-    -p "Generate a person: " \
-    --grammar-file grammars/json.gbnf
-
-# Outputs valid JSON only
-```
-
-### Context size
-
-```bash
-# Increase context (default 512)
-./llama-cli \
-    -m model.gguf \
-    -c 4096  # 4K context window
-
-# Very long context (if model supports)
-./llama-cli -m model.gguf -c 32768  # 32K context
+```python
+llm = Llama(
+    model_path="model.gguf",
+    n_gpu_layers=0,
+    n_threads=8,
+    n_batch=512,         # Larger batch = faster prompt processing
+)
 ```
 
 ## Performance benchmarks
 
-### CPU performance (Llama 2-7B Q4_K_M)
+### CPU (Llama 2-7B Q4_K_M)
 
-| CPU | Threads | Speed | Cost |
-|-----|---------|-------|------|
-| Apple M3 Max | 16 | 50 tok/s | $0 (local) |
-| AMD Ryzen 9 7950X | 32 | 35 tok/s | $0.50/hour |
-| Intel i9-13900K | 32 | 30 tok/s | $0.40/hour |
-| AWS c7i.16xlarge | 64 | 40 tok/s | $2.88/hour |
+| CPU | Threads | Speed |
+|-----|---------|-------|
+| Apple M3 Max (Metal) | 16 | 50 tok/s |
+| AMD Ryzen 9 7950X | 32 | 35 tok/s |
+| Intel i9-13900K | 32 | 30 tok/s |
 
-### GPU acceleration (Llama 2-7B Q4_K_M)
+### GPU offloading on RTX 4090
 
-| GPU | Speed | vs CPU | Cost |
-|-----|-------|--------|------|
-| NVIDIA RTX 4090 | 120 tok/s | 3-4× | $0 (local) |
-| NVIDIA A10 | 80 tok/s | 2-3× | $1.00/hour |
-| AMD MI250 | 70 tok/s | 2× | $2.00/hour |
-| Apple M3 Max (Metal) | 50 tok/s | ~Same | $0 (local) |
+| Layers GPU | Speed | VRAM |
+|------------|-------|------|
+| 0 (CPU only) | 30 tok/s | 0 GB |
+| 20 (hybrid) | 80 tok/s | 8 GB |
+| 35 (all) | 120 tok/s | 12 GB |
 
 ## Supported models
 
-**LLaMA family**:
-- Llama 2 (7B, 13B, 70B)
-- Llama 3 (8B, 70B, 405B)
-- Code Llama
+- **LLaMA family**: Llama 2 (7B/13B/70B), Llama 3 (8B/70B/405B), Code Llama
+- **Mistral family**: Mistral 7B, Mixtral 8x7B/8x22B
+- **Other**: Falcon, BLOOM, GPT-J, Phi-3, Gemma, Qwen, LLaVA (vision), Whisper (audio)
 
-**Mistral family**:
-- Mistral 7B
-- Mixtral 8x7B, 8x22B
+Find GGUF models: https://huggingface.co/models?library=gguf
 
-**Other**:
-- Falcon, BLOOM, GPT-J
-- Phi-3, Gemma, Qwen
-- LLaVA (vision), Whisper (audio)
+## Ecosystem integrations
 
-**Find models**: https://huggingface.co/models?library=gguf
+### Ollama
+
+```bash
+cat > Modelfile << 'EOF'
+FROM ./model-q4_k_m.gguf
+TEMPLATE """{{ .System }}
+{{ .Prompt }}"""
+PARAMETER temperature 0.7
+PARAMETER num_ctx 4096
+EOF
+
+ollama create mymodel -f Modelfile
+ollama run mymodel "Hello!"
+```
+
+### LM Studio
+
+1. Place GGUF file in `~/.cache/lm-studio/models/`
+2. Open LM Studio and select the model
+3. Configure context length and GPU offload, start inference
+
+### text-generation-webui
+
+```bash
+cp model-q4_k_m.gguf text-generation-webui/models/
+python server.py --model model-q4_k_m.gguf --loader llama.cpp --n-gpu-layers 35
+```
+
+### OpenAI client → llama-server
+
+```python
+from openai import OpenAI
+
+client = OpenAI(base_url="http://localhost:8080/v1", api_key="not-needed")
+response = client.chat.completions.create(
+    model="local-model",
+    messages=[{"role": "user", "content": "Hello!"}],
+    max_tokens=256,
+)
+print(response.choices[0].message.content)
+```
+
+## Best practices
+
+1. **Use K-quants** — Q4_K_M is the recommended default
+2. **Use imatrix** for Q4 and below (calibration improves quality substantially)
+3. **Offload as many layers as VRAM allows** — start high, reduce by 5 on OOM
+4. **Thread count** — match physical cores, not logical
+5. **Batch size** — increase `n_batch` (e.g. 512) for faster prompt processing
+6. **Context** — start at 4096, grow only as needed (memory scales with ctx)
+7. **Flash Attention** — add `--flash-attn` if your build supports it
+
+## Common issues (quick fixes)
+
+**Model loads slowly** — use `--mmap` for memory-mapped loading.
+
+**Out of memory (GPU)** — reduce `-ngl`, use a smaller quant (Q4_K_S / Q3_K_M), or quantize the KV cache:
+```python
+Llama(model_path="...", type_k=2, type_v=2, n_gpu_layers=35)  # Q4_0 KV cache
+```
+
+**Garbage output** — wrong `chat_format`, temperature too high, or model file corrupted. Test with `temperature=0.1` and verify FP16 baseline works.
+
+**Connection refused (server)** — bind to `--host 0.0.0.0`, check `lsof -i :8080`.
+
+See `references/troubleshooting.md` for the full playbook.
 
 ## References
 
-- **[Quantization Guide](references/quantization.md)** - GGUF formats, conversion, quality comparison
-- **[Server Deployment](references/server.md)** - API endpoints, Docker, monitoring
-- **[Optimization](references/optimization.md)** - Performance tuning, hybrid CPU+GPU
+- **[advanced-usage.md](references/advanced-usage.md)** — speculative decoding, batched inference, grammar-constrained generation, LoRA, multi-GPU, custom builds, benchmark scripts
+- **[quantization.md](references/quantization.md)** — perplexity tables, use-case guide, model size scaling (7B/13B/70B RAM needs), imatrix deep dive
+- **[server.md](references/server.md)** — OpenAI API endpoints, Docker deployment, NGINX load balancing, monitoring
+- **[optimization.md](references/optimization.md)** — CPU threading, BLAS, GPU offload heuristics, batch tuning, benchmarks
+- **[troubleshooting.md](references/troubleshooting.md)** — install/convert/quantize/inference/server issues, Apple Silicon, debugging
 
 ## Resources
 
-- **GitHub**: https://github.com/ggerganov/llama.cpp
-- **Models**: https://huggingface.co/models?library=gguf
-- **Discord**: https://discord.gg/llama-cpp
-
-
+- **GitHub**: https://github.com/ggml-org/llama.cpp
+- **Python bindings**: https://github.com/abetlen/llama-cpp-python
+- **Pre-quantized models**: https://huggingface.co/TheBloke
+- **GGUF converter Space**: https://huggingface.co/spaces/ggml-org/gguf-my-repo
+- **License**: MIT
diff --git a/skills/mlops/inference/gguf/references/advanced-usage.md b/skills/mlops/inference/llama-cpp/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/inference/gguf/references/advanced-usage.md
rename to skills/mlops/inference/llama-cpp/references/advanced-usage.md
diff --git a/skills/mlops/inference/gguf/references/troubleshooting.md b/skills/mlops/inference/llama-cpp/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/inference/gguf/references/troubleshooting.md
rename to skills/mlops/inference/llama-cpp/references/troubleshooting.md
diff --git a/skills/mlops/training/grpo-rl-training/README.md b/skills/mlops/training/grpo-rl-training/README.md
deleted file mode 100644
index 99b60d66438..00000000000
--- a/skills/mlops/training/grpo-rl-training/README.md
+++ /dev/null
@@ -1,97 +0,0 @@
-# GRPO/RL Training Skill
-
-**Expert-level guidance for Group Relative Policy Optimization with TRL**
-
-## 📁 Skill Structure
-
-```
-grpo-rl-training/
-├── SKILL.md                              # Main skill documentation (READ THIS FIRST)
-├── README.md                             # This file
-├── templates/
-│   └── basic_grpo_training.py            # Production-ready training template
-└── examples/
-    └── reward_functions_library.py       # 20+ reward function examples
-```
-
-## 🚀 Quick Start
-
-1. **Read SKILL.md** - Comprehensive guide with all concepts and patterns
-2. **Copy `templates/basic_grpo_training.py`** - Start with working code
-3. **Browse `examples/reward_functions_library.py`** - Pick reward functions for your task
-4. **Modify for your use case** - Adapt dataset, rewards, and config
-
-## 💡 What's Inside
-
-### SKILL.md (Main Documentation)
-- Core GRPO concepts and algorithm fundamentals
-- Complete implementation workflow (dataset → rewards → training → deployment)
-- 10+ reward function examples with code
-- Hyperparameter tuning guide
-- Training insights (loss behavior, metrics, debugging)
-- Troubleshooting guide
-- Production best practices
-
-### Templates
-- **basic_grpo_training.py**: Minimal, production-ready training script
-  - Uses Qwen 2.5 1.5B Instruct
-  - 3 reward functions (format + correctness)
-  - LoRA for efficient training
-  - Fully documented and ready to run
-
-### Examples
-- **reward_functions_library.py**: 20+ battle-tested reward functions
-  - Correctness rewards (exact match, fuzzy match, numeric, code execution)
-  - Format rewards (XML, JSON, strict/soft)
-  - Length rewards (ideal length, min/max)
-  - Style rewards (reasoning quality, citations, repetition penalty)
-  - Combined rewards (multi-objective optimization)
-  - Preset collections for common tasks
-
-## 📖 Usage for Agents
-
-When this skill is loaded in your agent's context:
-
-1. **Always read SKILL.md first** before implementing
-2. **Start simple** - Use length-based reward to validate setup
-3. **Build incrementally** - Add one reward function at a time
-4. **Reference examples** - Copy patterns from reward_functions_library.py
-5. **Monitor training** - Watch reward metrics (not loss!)
-
-## 🎯 Common Use Cases
-
-| Task Type | Recommended Rewards | Template |
-|-----------|---------------------|----------|
-| Math reasoning | `MATH_REASONING_REWARDS` preset | basic_grpo_training.py |
-| Code generation | `CODE_GENERATION_REWARDS` preset | Modify dataset in template |
-| Summarization | `SUMMARIZATION_REWARDS` preset | Adjust prompts + rewards |
-| Q&A | `QA_REWARDS` preset | Use fuzzy match + citations |
-
-## ⚠️ Critical Reminders
-
-- **Loss goes UP during training** - This is normal (it's KL divergence)
-- **Use 3-5 reward functions** - Single rewards often fail
-- **Test rewards before training** - Debug each function independently
-- **Monitor reward_std** - Should stay > 0.1 (avoid mode collapse)
-- **Start with num_generations=4-8** - Scale up if GPU allows
-
-## 🔗 External Resources
-
-- [TRL Documentation](https://huggingface.co/docs/trl)
-- [DeepSeek R1 Paper](https://arxiv.org/abs/2501.12948)
-- [Open R1 Implementation](https://github.com/huggingface/open-r1)
-- [Unsloth (2-3x faster)](https://docs.unsloth.ai/)
-
-## 📝 Version
-
-**v1.0.0** - Initial release (January 2025)
-
-## 👨‍💻 Maintained By
-
-Orchestra Research
-For questions or improvements, see https://orchestra.com
-
----
-
-**License:** MIT
-**Last Updated:** January 2025
diff --git a/skills/mlops/training/trl-fine-tuning/SKILL.md b/skills/mlops/training/trl-fine-tuning/SKILL.md
index 3bf4f6e12ba..70023fc707f 100644
--- a/skills/mlops/training/trl-fine-tuning/SKILL.md
+++ b/skills/mlops/training/trl-fine-tuning/SKILL.md
@@ -252,6 +252,8 @@ trl dpo \
 
 Train with reinforcement learning using minimal memory.
 
+For in-depth GRPO guidance — reward function design, critical training insights (loss behavior, mode collapse, tuning), and advanced multi-stage patterns — see **[references/grpo-training.md](references/grpo-training.md)**. A production-ready training script is in **[templates/basic_grpo_training.py](templates/basic_grpo_training.py)**.
+
 Copy this checklist:
 
 ```
@@ -428,6 +430,8 @@ config = PPOConfig(
 
 **Online RL methods**: See [references/online-rl.md](references/online-rl.md) for PPO, GRPO, RLOO, and OnlineDPO with detailed configurations.
 
+**GRPO deep dive**: See [references/grpo-training.md](references/grpo-training.md) for expert-level GRPO patterns — reward function design philosophy, training insights (why loss increases, mode collapse detection), hyperparameter tuning, multi-stage training, and troubleshooting. Production-ready template in [templates/basic_grpo_training.py](templates/basic_grpo_training.py).
+
 ## Hardware requirements
 
 - **GPU**: NVIDIA (CUDA required)
diff --git a/skills/mlops/training/grpo-rl-training/SKILL.md b/skills/mlops/training/trl-fine-tuning/references/grpo-training.md
similarity index 56%
rename from skills/mlops/training/grpo-rl-training/SKILL.md
rename to skills/mlops/training/trl-fine-tuning/references/grpo-training.md
index 1d7629ab633..a22bd40945d 100644
--- a/skills/mlops/training/grpo-rl-training/SKILL.md
+++ b/skills/mlops/training/trl-fine-tuning/references/grpo-training.md
@@ -1,51 +1,36 @@
----
-name: grpo-rl-training
-description: Expert guidance for GRPO/RL fine-tuning with TRL for reasoning and task-specific model training
-version: 1.0.0
-author: Orchestra Research
-license: MIT
-dependencies: [transformers>=4.47.0, trl>=0.14.0, datasets>=3.2.0, peft>=0.14.0, torch]
-metadata:
-  hermes:
-    tags: [Post-Training, Reinforcement Learning, GRPO, TRL, RLHF, Reward Modeling, Reasoning, DPO, PPO, Structured Output]
+# GRPO (Group Relative Policy Optimization) — Deep Guide
 
----
+Expert-level patterns, critical insights, and production-ready workflows for fine-tuning language models with custom reward functions using TRL's `GRPOTrainer`. This is the deep reference for the GRPO workflow summarized in the main skill.
 
-# GRPO/RL Training with TRL
+## When to use GRPO
 
-Expert-level guidance for implementing Group Relative Policy Optimization (GRPO) using the Transformer Reinforcement Learning (TRL) library. This skill provides battle-tested patterns, critical insights, and production-ready workflows for fine-tuning language models with custom reward functions.
-
-## When to Use This Skill
-
-Use GRPO training when you need to:
-- **Enforce specific output formats** (e.g., XML tags, JSON, structured reasoning)
+Use GRPO when you need to:
+- **Enforce specific output formats** (XML tags, JSON, structured reasoning)
 - **Teach verifiable tasks** with objective correctness metrics (math, coding, fact-checking)
 - **Improve reasoning capabilities** by rewarding chain-of-thought patterns
 - **Align models to domain-specific behaviors** without labeled preference data
 - **Optimize for multiple objectives** simultaneously (format + correctness + style)
 
 **Do NOT use GRPO for:**
-- Simple supervised fine-tuning tasks (use SFT instead)
+- Simple supervised fine-tuning tasks → use SFT
 - Tasks without clear reward signals
-- When you already have high-quality preference pairs (use DPO/PPO instead)
+- When you already have high-quality preference pairs → use DPO/PPO
 
----
+## Core concepts
 
-## Core Concepts
+### 1. GRPO algorithm fundamentals
 
-### 1. GRPO Algorithm Fundamentals
-
-**Key Mechanism:**
-- Generates **multiple completions** for each prompt (group size: 4-16)
+**Key mechanism:**
+- Generates **multiple completions** per prompt (group size: 4–16)
 - Compares completions within each group using reward functions
 - Updates policy to favor higher-rewarded responses relative to the group
 
-**Critical Difference from PPO:**
+**Critical differences from PPO:**
 - No separate reward model needed
 - More sample-efficient (learns from within-group comparisons)
 - Simpler to implement and debug
 
-**Mathematical Intuition:**
+**Mathematical intuition:**
 ```
 For each prompt p:
   1. Generate N completions: {c₁, c₂, ..., cₙ}
@@ -54,35 +39,32 @@ For each prompt p:
      relative to low-reward ones in the same group
 ```
 
-### 2. Reward Function Design Philosophy
+### 2. Reward function design philosophy
 
-**Golden Rules:**
-1. **Compose multiple reward functions** - Each handles one aspect (format, correctness, style)
-2. **Scale rewards appropriately** - Higher weight = stronger signal
-3. **Use incremental rewards** - Partial credit for partial compliance
-4. **Test rewards independently** - Debug each reward function in isolation
+**Golden rules:**
+1. **Compose multiple reward functions** — each handles one aspect (format, correctness, style)
+2. **Scale rewards appropriately** — higher weight = stronger signal
+3. **Use incremental rewards** — partial credit for partial compliance
+4. **Test rewards independently** — debug each reward function in isolation
 
-**Reward Function Types:**
+**Reward function types:**
 
 | Type | Use Case | Example Weight |
 |------|----------|----------------|
 | **Correctness** | Verifiable tasks (math, code) | 2.0 (highest) |
-| **Format** | Strict structure enforcement | 0.5-1.0 |
-| **Length** | Encourage verbosity/conciseness | 0.1-0.5 |
-| **Style** | Penalize unwanted patterns | -0.5 to 0.5 |
+| **Format** | Strict structure enforcement | 0.5–1.0 |
+| **Length** | Encourage verbosity/conciseness | 0.1–0.5 |
+| **Style** | Penalize unwanted patterns | −0.5 to 0.5 |
 
----
+## Implementation workflow
 
-## Implementation Workflow
+### Step 1: Dataset preparation
 
-### Step 1: Dataset Preparation
-
-**Critical Requirements:**
-- Prompts in chat format (list of dicts with 'role' and 'content')
+**Critical requirements:**
+- Prompts in chat format (list of dicts with `role` and `content`)
 - Include system prompts to set expectations
 - For verifiable tasks, include ground truth answers as additional columns
 
-**Example Structure:**
 ```python
 from datasets import load_dataset, Dataset
 
@@ -97,8 +79,7 @@ Respond in the following format:
 """
 
 def prepare_dataset(raw_data):
-    """
-    Transform raw data into GRPO-compatible format.
+    """Transform raw data into GRPO-compatible format.
 
     Returns: Dataset with columns:
     - 'prompt': List[Dict] with role/content (system + user messages)
@@ -113,14 +94,14 @@ def prepare_dataset(raw_data):
     })
 ```
 
-**Pro Tips:**
-- Use one-shot or few-shot examples in system prompt for complex formats
-- Keep prompts concise (max_prompt_length: 256-512 tokens)
+**Pro tips:**
+- Use one-shot or few-shot examples in the system prompt for complex formats
+- Keep prompts concise (max_prompt_length: 256–512 tokens)
 - Validate data quality before training (garbage in = garbage out)
 
-### Step 2: Reward Function Implementation
+### Step 2: Reward function implementation
 
-**Template Structure:**
+**Template structure:**
 ```python
 def reward_function_name(
     prompts,        # List[List[Dict]]: Original prompts
@@ -128,24 +109,16 @@ def reward_function_name(
     answer=None,    # Optional: Ground truth from dataset
     **kwargs        # Additional dataset columns
 ) -> list[float]:
-    """
-    Evaluate completions and return rewards.
-
-    Returns: List of floats (one per completion)
-    """
-    # Extract completion text
+    """Evaluate completions and return rewards (one per completion)."""
     responses = [comp[0]['content'] for comp in completions]
-
-    # Compute rewards
     rewards = []
     for response in responses:
         score = compute_score(response)
         rewards.append(score)
-
     return rewards
 ```
 
-**Example 1: Correctness Reward (Math/Coding)**
+**Example 1: correctness reward (math/coding)**
 ```python
 def correctness_reward(prompts, completions, answer, **kwargs):
     """Reward correct answers with high score."""
@@ -155,7 +128,7 @@ def correctness_reward(prompts, completions, answer, **kwargs):
             for ans, gt in zip(extracted, answer)]
 ```
 
-**Example 2: Format Reward (Structured Output)**
+**Example 2: format reward (structured output)**
 ```python
 import re
 
@@ -167,7 +140,7 @@ def format_reward(completions, **kwargs):
             for r in responses]
 ```
 
-**Example 3: Incremental Format Reward (Partial Credit)**
+**Example 3: incremental format reward (partial credit)**
 ```python
 def incremental_format_reward(completions, **kwargs):
     """Award partial credit for format compliance."""
@@ -176,14 +149,10 @@ def incremental_format_reward(completions, **kwargs):
 
     for r in responses:
         score = 0.0
-        if '<reasoning>' in r:
-            score += 0.25
-        if '</reasoning>' in r:
-            score += 0.25
-        if '<answer>' in r:
-            score += 0.25
-        if '</answer>' in r:
-            score += 0.25
+        if '<reasoning>' in r:  score += 0.25
+        if '</reasoning>' in r: score += 0.25
+        if '<answer>' in r:     score += 0.25
+        if '</answer>' in r:    score += 0.25
         # Penalize extra text after closing tag
         if r.count('</answer>') == 1:
             extra_text = r.split('</answer>')[-1].strip()
@@ -193,12 +162,11 @@ def incremental_format_reward(completions, **kwargs):
     return rewards
 ```
 
-**Critical Insight:**
-Combine 3-5 reward functions for robust training. Order matters less than diversity of signals.
+**Critical insight:** Combine 3–5 reward functions for robust training. Order matters less than diversity of signals.
 
-### Step 3: Training Configuration
+### Step 3: Training configuration
 
-**Memory-Optimized Config (Small GPU)**
+**Memory-optimized config (small GPU)**
 ```python
 from trl import GRPOConfig
 
@@ -218,13 +186,13 @@ training_args = GRPOConfig(
     gradient_accumulation_steps=4,  # Effective batch = 4
 
     # GRPO-specific
-    num_generations=8,            # Group size: 8-16 recommended
+    num_generations=8,            # Group size: 8–16 recommended
     max_prompt_length=256,
     max_completion_length=512,
 
     # Training duration
     num_train_epochs=1,
-    max_steps=None,               # Or set fixed steps (e.g., 500)
+    max_steps=None,
 
     # Optimization
     bf16=True,                    # Faster on A100/H100
@@ -234,11 +202,11 @@ training_args = GRPOConfig(
     # Logging
     logging_steps=1,
     save_steps=100,
-    report_to="wandb",            # Or "none" for no logging
+    report_to="wandb",
 )
 ```
 
-**High-Performance Config (Large GPU)**
+**High-performance config (large GPU)**
 ```python
 training_args = GRPOConfig(
     output_dir="outputs/grpo-model",
@@ -255,31 +223,30 @@ training_args = GRPOConfig(
 )
 ```
 
-**Critical Hyperparameters:**
+**Critical hyperparameters:**
 
 | Parameter | Impact | Tuning Advice |
 |-----------|--------|---------------|
-| `num_generations` | Group size for comparison | Start with 8, increase to 16 if GPU allows |
+| `num_generations` | Group size for comparison | Start 8, increase to 16 if GPU allows |
 | `learning_rate` | Convergence speed/stability | 5e-6 (safe), 1e-5 (faster, riskier) |
-| `max_completion_length` | Output verbosity | Match your task (512 for reasoning, 256 for short answers) |
+| `max_completion_length` | Output verbosity | Match your task (512 reasoning, 256 short answers) |
 | `gradient_accumulation_steps` | Effective batch size | Increase if GPU memory limited |
 
-### Step 4: Model Setup and Training
+### Step 4: Model setup and training
 
-**Standard Setup (Transformers)**
+**Standard setup (Transformers + TRL)**
 ```python
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import LoraConfig
 from trl import GRPOTrainer
 
-# Load model
 model_name = "Qwen/Qwen2.5-1.5B-Instruct"
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.bfloat16,
-    attn_implementation="flash_attention_2",  # 2-3x faster
-    device_map="auto"
+    attn_implementation="flash_attention_2",  # 2–3× faster
+    device_map="auto",
 )
 
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -287,17 +254,16 @@ tokenizer.pad_token = tokenizer.eos_token
 
 # Optional: LoRA for parameter-efficient training
 peft_config = LoraConfig(
-    r=16,                         # Rank (higher = more capacity)
-    lora_alpha=32,               # Scaling factor (typically 2*r)
+    r=16,
+    lora_alpha=32,
     target_modules=[
         "q_proj", "k_proj", "v_proj", "o_proj",
-        "gate_proj", "up_proj", "down_proj"
+        "gate_proj", "up_proj", "down_proj",
     ],
     task_type="CAUSAL_LM",
     lora_dropout=0.05,
 )
 
-# Initialize trainer
 trainer = GRPOTrainer(
     model=model,
     processing_class=tokenizer,
@@ -308,17 +274,14 @@ trainer = GRPOTrainer(
     ],
     args=training_args,
     train_dataset=dataset,
-    peft_config=peft_config,      # Remove for full fine-tuning
+    peft_config=peft_config,   # Remove for full fine-tuning
 )
 
-# Train
 trainer.train()
-
-# Save
 trainer.save_model("final_model")
 ```
 
-**Unsloth Setup (2-3x Faster)**
+**Unsloth setup (2–3× faster)**
 ```python
 from unsloth import FastLanguageModel
 
@@ -339,28 +302,26 @@ model = FastLanguageModel.get_peft_model(
     use_gradient_checkpointing="unsloth",
 )
 
-# Rest is identical to standard setup
+# Rest is identical to the standard setup
 trainer = GRPOTrainer(model=model, ...)
 trainer.train()
 ```
 
----
+## Critical training insights
 
-## Critical Training Insights
+### 1. Loss behavior (EXPECTED pattern)
+- **Loss starts near 0 and INCREASES during training** — this is CORRECT
+- Loss measures KL divergence from initial policy; the model is learning (diverging from original behavior to optimize rewards)
+- **Monitor reward metrics, not loss, for progress**
 
-### 1. Loss Behavior (EXPECTED PATTERN)
-- **Loss starts near 0 and INCREASES during training**
-- This is CORRECT - loss measures KL divergence from initial policy
-- Model is learning (diverging from original behavior to optimize rewards)
-- Monitor reward metrics instead of loss for progress
+### 2. Reward tracking
 
-### 2. Reward Tracking
 Key metrics to watch:
-- `reward`: Average across all completions
-- `reward_std`: Diversity within groups (should remain > 0)
-- `kl`: KL divergence from reference (should grow moderately)
+- `reward` — average across all completions
+- `reward_std` — diversity within groups (should remain > 0)
+- `kl` — KL divergence from reference (should grow moderately)
 
-**Healthy Training Pattern:**
+**Healthy pattern:**
 ```
 Step   Reward    Reward_Std   KL
 100    0.5       0.3          0.02
@@ -369,12 +330,12 @@ Step   Reward    Reward_Std   KL
 400    1.5       0.15         0.12
 ```
 
-**Warning Signs:**
-- Reward std → 0 (model collapsing to single response)
-- KL exploding (> 0.5) (diverging too much, reduce LR)
-- Reward stuck (reward functions too harsh or model capacity issue)
+**Warning signs:**
+- `reward_std` → 0 (model collapsing to a single response)
+- `kl` exploding (> 0.5) — diverging too much, reduce LR
+- Reward stuck — reward functions too harsh or model capacity issue
 
-### 3. Common Pitfalls and Solutions
+### 3. Common pitfalls and solutions
 
 | Problem | Symptom | Solution |
 |---------|---------|----------|
@@ -384,15 +345,14 @@ Step   Reward    Reward_Std   KL
 | **Slow training** | < 1 it/s | Enable `use_vllm=True`, use Unsloth, reduce seq length |
 | **Format ignored** | Model doesn't follow structure | Increase format reward weight, add incremental rewards |
 
----
+## Advanced patterns
 
-## Advanced Patterns
+### 1. Multi-stage training
 
-### 1. Multi-Stage Training
 For complex tasks, train in stages:
 
 ```python
-# Stage 1: Format compliance (epochs=1)
+# Stage 1: Format compliance
 trainer_stage1 = GRPOTrainer(
     model=model,
     reward_funcs=[incremental_format_reward, format_reward],
@@ -400,7 +360,7 @@ trainer_stage1 = GRPOTrainer(
 )
 trainer_stage1.train()
 
-# Stage 2: Correctness (epochs=1)
+# Stage 2: Correctness
 trainer_stage2 = GRPOTrainer(
     model=model,
     reward_funcs=[format_reward, correctness_reward],
@@ -409,7 +369,8 @@ trainer_stage2 = GRPOTrainer(
 trainer_stage2.train()
 ```
 
-### 2. Adaptive Reward Scaling
+### 2. Adaptive reward scaling
+
 ```python
 class AdaptiveReward:
     def __init__(self, base_reward_func, initial_weight=1.0):
@@ -428,148 +389,116 @@ class AdaptiveReward:
             self.weight *= 0.9
 ```
 
-### 3. Custom Dataset Integration
+### 3. Custom dataset integration
+
 ```python
 def load_custom_knowledge_base(csv_path):
-    """Example: School communication platform docs."""
     import pandas as pd
     df = pd.read_csv(csv_path)
-
-    dataset = Dataset.from_pandas(df).map(lambda x: {
+    return Dataset.from_pandas(df).map(lambda x: {
         'prompt': [
             {'role': 'system', 'content': CUSTOM_SYSTEM_PROMPT},
             {'role': 'user', 'content': x['question']}
         ],
         'answer': x['expert_answer']
     })
-    return dataset
 ```
 
----
+## Deployment and inference
 
-## Deployment and Inference
-
-### Save and Merge LoRA
+### Save and merge LoRA
 ```python
-# Merge LoRA adapters into base model
 if hasattr(trainer.model, 'merge_and_unload'):
     merged_model = trainer.model.merge_and_unload()
     merged_model.save_pretrained("production_model")
     tokenizer.save_pretrained("production_model")
 ```
 
-### Inference Example
+### Inference
 ```python
 from transformers import pipeline
 
-generator = pipeline(
-    "text-generation",
-    model="production_model",
-    tokenizer=tokenizer
-)
+generator = pipeline("text-generation", model="production_model", tokenizer=tokenizer)
 
 result = generator(
     [
         {'role': 'system', 'content': SYSTEM_PROMPT},
-        {'role': 'user', 'content': "What is 15 + 27?"}
+        {'role': 'user', 'content': "What is 15 + 27?"},
     ],
     max_new_tokens=256,
     do_sample=True,
     temperature=0.7,
-    top_p=0.9
+    top_p=0.9,
 )
 print(result[0]['generated_text'])
 ```
 
----
+## Best practices checklist
 
-## Best Practices Checklist
-
-**Before Training:**
+**Before training:**
 - [ ] Validate dataset format (prompts as List[Dict])
 - [ ] Test reward functions on sample data
-- [ ] Calculate expected max_prompt_length from data
-- [ ] Choose appropriate num_generations based on GPU memory
+- [ ] Calculate expected `max_prompt_length` from data
+- [ ] Choose `num_generations` based on GPU memory
 - [ ] Set up logging (wandb recommended)
 
-**During Training:**
+**During training:**
 - [ ] Monitor reward progression (should increase)
-- [ ] Check reward_std (should stay > 0.1)
+- [ ] Check `reward_std` (should stay > 0.1)
 - [ ] Watch for OOM errors (reduce batch size if needed)
-- [ ] Sample generations every 50-100 steps
+- [ ] Sample generations every 50–100 steps
 - [ ] Validate format compliance on holdout set
 
-**After Training:**
+**After training:**
 - [ ] Merge LoRA weights if using PEFT
 - [ ] Test on diverse prompts
 - [ ] Compare to baseline model
 - [ ] Document reward weights and hyperparameters
 - [ ] Save reproducibility config
 
----
+## Troubleshooting
 
-## Troubleshooting Guide
+### Debugging workflow
+1. **Isolate reward functions** — test each independently
+2. **Check data distribution** — ensure diversity in prompts
+3. **Reduce complexity** — start with single reward, add gradually
+4. **Monitor generations** — print samples every N steps
+5. **Validate extraction logic** — ensure answer parsing works
 
-### Debugging Workflow
-1. **Isolate reward functions** - Test each independently
-2. **Check data distribution** - Ensure diversity in prompts
-3. **Reduce complexity** - Start with single reward, add gradually
-4. **Monitor generations** - Print samples every N steps
-5. **Validate extraction logic** - Ensure answer parsing works
-
-### Quick Fixes
+### Quick debug reward
 ```python
-# Debug reward function
 def debug_reward(completions, **kwargs):
     responses = [comp[0]['content'] for comp in completions]
-    for i, r in enumerate(responses[:2]):  # Print first 2
+    for i, r in enumerate(responses[:2]):
         print(f"Response {i}: {r[:200]}...")
-    return [1.0] * len(responses)  # Dummy rewards
+    return [1.0] * len(responses)
 
 # Test without training
 trainer = GRPOTrainer(..., reward_funcs=[debug_reward])
-trainer.generate_completions(dataset[:1])  # Generate without updating
+trainer.generate_completions(dataset[:1])
 ```
 
----
+## Template
 
-## References and Resources
+A production-ready training script lives at **`../templates/basic_grpo_training.py`**. It uses Qwen 2.5-1.5B-Instruct with LoRA and three reward functions (incremental format, strict format, correctness) on GSM8K. Copy and adapt:
+1. `get_dataset()` — swap in your data loader
+2. Reward functions — tune to your task
+3. `SYSTEM_PROMPT` — match your output format
+4. `GRPOConfig` — adjust hyperparameters for your GPU
+
+## References and resources
 
-**Official Documentation:**
 - TRL GRPO Trainer: https://huggingface.co/docs/trl/grpo_trainer
-- DeepSeek R1 Paper: https://arxiv.org/abs/2501.12948
-- Unsloth Docs: https://docs.unsloth.ai/
-
-**Example Repositories:**
-- Open R1 Implementation: https://github.com/huggingface/open-r1
-- TRL Examples: https://github.com/huggingface/trl/tree/main/examples
-
-**Recommended Reading:**
-- Progressive Disclosure Pattern for agent instructions
-- Reward shaping in RL (Ng et al.)
-- LoRA paper (Hu et al., 2021)
-
----
-
-## Usage Instructions for Agents
-
-When this skill is loaded:
-
-1. **Read this entire file** before implementing GRPO training
-2. **Start with the simplest reward function** (e.g., length-based) to validate setup
-3. **Use the templates** in `templates/` directory as starting points
-4. **Reference examples** in `examples/` for task-specific implementations
-5. **Follow the workflow** sequentially (don't skip steps)
-6. **Debug incrementally** - add one reward function at a time
-
-**Critical Reminders:**
-- Always use multiple reward functions (3-5 is optimal)
-- Monitor reward metrics, not loss
-- Test reward functions before training
-- Start small (num_generations=4), scale up gradually
-- Save checkpoints frequently (every 100 steps)
-
-This skill is designed for **expert-level implementation**. Beginners should start with supervised fine-tuning before attempting GRPO.
-
+- GRPO paper (DeepSeek): https://arxiv.org/abs/2402.03300
+- DeepSeek R1 paper: https://arxiv.org/abs/2501.12948
+- Open R1 implementation: https://github.com/huggingface/open-r1
+- TRL examples: https://github.com/huggingface/trl/tree/main/examples
+- Unsloth (faster training): https://docs.unsloth.ai/
 
+## Critical reminders
 
+- **Loss goes UP during training** — this is normal (it's KL divergence)
+- **Use 3–5 reward functions** — single rewards often fail
+- **Test rewards before training** — debug each function independently
+- **Monitor `reward_std`** — should stay > 0.1 (avoid mode collapse)
+- **Start with `num_generations=4–8`** — scale up if GPU allows
diff --git a/skills/mlops/training/grpo-rl-training/templates/basic_grpo_training.py b/skills/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py
similarity index 100%
rename from skills/mlops/training/grpo-rl-training/templates/basic_grpo_training.py
rename to skills/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py
diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index 6fde99b5ee8..bbb2c3b80ea 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -98,6 +98,7 @@ The largest optional category — covers the full ML pipeline from data curation
 | **chroma** | Open-source embedding database. Store embeddings and metadata, perform vector and full-text search. Simple 4-function API for RAG and semantic search. |
 | **faiss** | Facebook's library for efficient similarity search and clustering of dense vectors. Supports billions of vectors, GPU acceleration, and various index types (Flat, IVF, HNSW). |
 | **flash-attention** | Optimize transformer attention with Flash Attention for 2-4x speedup and 10-20x memory reduction. Supports PyTorch SDPA, flash-attn library, H100 FP8, and sliding window. |
+| **guidance** | Control LLM output with regex and grammars, guarantee valid JSON/XML/code generation, enforce structured formats, and build multi-step workflows with Guidance — Microsoft Research's constrained generation framework. |
 | **hermes-atropos-environments** | Build, test, and debug Hermes Agent RL environments for Atropos training. Covers the HermesAgentBaseEnv interface, reward functions, agent loop integration, and evaluation. |
 | **huggingface-tokenizers** | Fast Rust-based tokenizers for research and production. Tokenizes 1GB in under 20 seconds. Supports BPE, WordPiece, and Unigram algorithms. |
 | **instructor** | Extract structured data from LLM responses with Pydantic validation, retry failed extractions automatically, and stream partial results. |
diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md
index 13ef2f7fc4a..ead50dbea67 100644
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@@ -163,10 +163,8 @@ Model serving, quantization (GGUF/GPTQ), structured output, inference optimizati
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `gguf-quantization` | GGUF format and llama.cpp quantization for efficient CPU/GPU inference. Use when deploying models on consumer hardware, Apple Silicon, or when needing flexible quantization from 2-8 bit without GPU requirements. | `mlops/inference/gguf` |
-| `guidance` | Control LLM output with regex and grammars, guarantee valid JSON/XML/code generation, enforce structured formats, and build multi-step workflows with Guidance - Microsoft Research's constrained generation framework | `mlops/inference/guidance` |
 | `instructor` | Extract structured data from LLM responses with Pydantic validation, retry failed extractions automatically, parse complex JSON with type safety, and stream partial results with Instructor - battle-tested structured output library | `mlops/inference/instructor` |
-| `llama-cpp` | Runs LLM inference on CPU, Apple Silicon, and consumer GPUs without NVIDIA hardware. Use for edge deployment, M1/M2/M3 Macs, AMD/Intel GPUs, or when CUDA is unavailable. Supports GGUF quantization (1.5-8 bit) for reduced memory and 4-10× speedup vs PyTorch on CPU. | `mlops/inference/llama-cpp` |
+| `llama-cpp` | Run LLM inference with llama.cpp on CPU, Apple Silicon, AMD/Intel GPUs, or NVIDIA — plus GGUF model conversion and quantization (2–8 bit with K-quants and imatrix). Covers CLI, Python bindings, OpenAI-compatible server, and Ollama/LM Studio integration. Use for edge deployment, M1/M2/M3/M4 Macs, CUDA-less environments, or flexible local quantization. | `mlops/inference/llama-cpp` |
 | `obliteratus` | Remove refusal behaviors from open-weight LLMs using OBLITERATUS — mechanistic interpretability techniques (diff-in-means, SVD, whitened SVD, LEACE, SAE decomposition, etc.) to excise guardrails while preserving reasoning. 9 CLI methods, 28 analysis modules, 116 model presets ac… | `mlops/inference/obliteratus` |
 | `outlines` | Guarantee valid JSON/XML/code structure during generation, use Pydantic models for type-safe outputs, support local models (Transformers, vLLM), and maximize inference speed with Outlines - dottxt.ai's structured generation library | `mlops/inference/outlines` |
 | `serving-llms-vllm` | Serves LLMs with high throughput using vLLM's PagedAttention and continuous batching. Use when deploying production LLM APIs, optimizing inference latency/throughput, or serving models with limited GPU memory. Supports OpenAI-compatible endpoints, quantization (GPTQ/AWQ/FP8), an… | `mlops/inference/vllm` |
@@ -202,7 +200,6 @@ Fine-tuning, RLHF/DPO/GRPO training, distributed training frameworks, and optimi
 | `axolotl` | Expert guidance for fine-tuning LLMs with Axolotl - YAML configs, 100+ models, LoRA/QLoRA, DPO/KTO/ORPO/GRPO, multimodal support | `mlops/training/axolotl` |
 | `distributed-llm-pretraining-torchtitan` | Provides PyTorch-native distributed LLM pretraining using torchtitan with 4D parallelism (FSDP2, TP, PP, CP). Use when pretraining Llama 3.1, DeepSeek V3, or custom models at scale from 8 to 512+ GPUs with Float8, torch.compile, and distributed checkpointing. | `mlops/training/torchtitan` |
 | `fine-tuning-with-trl` | Fine-tune LLMs using reinforcement learning with TRL - SFT for instruction tuning, DPO for preference alignment, PPO/GRPO for reward optimization, and reward model training. Use when need RLHF, align model with preferences, or train from human feedback. Works with HuggingFace Tr… | `mlops/training/trl-fine-tuning` |
-| `grpo-rl-training` | Expert guidance for GRPO/RL fine-tuning with TRL for reasoning and task-specific model training | `mlops/training/grpo-rl-training` |
 | `hermes-atropos-environments` | Build, test, and debug Hermes Agent RL environments for Atropos training. Covers the HermesAgentBaseEnv interface, reward functions, agent loop integration, evaluation with tools, wandb logging, and the three CLI modes (serve/process/evaluate). Use when creating, reviewing, or f… | `mlops/training/hermes-atropos-environments` |
 | `huggingface-accelerate` | Simplest distributed training API. 4 lines to add distributed support to any PyTorch script. Unified API for DeepSpeed/FSDP/Megatron/DDP. Automatic device placement, mixed precision (FP16/BF16/FP8). Interactive config, single launch command. HuggingFace ecosystem standard. | `mlops/training/accelerate` |
 | `optimizing-attention-flash` | Optimizes transformer attention with Flash Attention for 2-4x speedup and 10-20x memory reduction. Use when training/running transformers with long sequences (&gt;512 tokens), encountering GPU memory issues with attention, or need faster inference. Supports PyTorch native SDPA,… | `mlops/training/flash-attention` |

From 54e0eb24c0c9700fd0139242aab740c51711bacb Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 01:45:48 -0700
Subject: [PATCH 012/143] =?UTF-8?q?docs:=20correctness=20audit=20=E2=80=94?=
 =?UTF-8?q?=20fix=20wrong=20values,=20add=20missing=20coverage=20(#11972)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Comprehensive audit of every reference/messaging/feature doc page against the
live code registries (PROVIDER_REGISTRY, OPTIONAL_ENV_VARS, COMMAND_REGISTRY,
TOOLSETS, tool registry, on-disk skills). Every fix was verified against code
before writing.

### Wrong values fixed (users would paste-and-fail)

- reference/environment-variables.md:
  - DASHSCOPE_BASE_URL default was `coding-intl.dashscope.aliyuncs.com/v1` \u2192
    actual `dashscope-intl.aliyuncs.com/compatible-mode/v1`.
  - MINIMAX_BASE_URL and MINIMAX_CN_BASE_URL defaults were `/v1` \u2192 actual
    `/anthropic` (Hermes calls MiniMax via its Anthropic Messages endpoint).
- reference/toolsets-reference.md MCP example used the non-existent nested
  `mcp: servers:` key \u2192 real key is the flat `mcp_servers:`.
- reference/skills-catalog.md listed ~20 bundled skills that no longer exist
  on disk (all moved to `optional-skills/`). Regenerated the whole bundled
  section from `skills/**/SKILL.md` \u2014 79 skills, accurate paths and names.
- messaging/slack.md ":::info" callout claimed Slack has no
  `free_response_channels` equivalent; both the env var and the yaml key are
  in fact read.
- messaging/qqbot.md documented `QQ_MARKDOWN_SUPPORT` as an env var, but the
  adapter only reads `extra.markdown_support` from config.yaml. Removed the
  env var row and noted config-only nature.
- messaging/qqbot.md `hermes setup gateway` \u2192 `hermes gateway setup`.

### Missing coverage added

- Providers: AWS Bedrock and Qwen Portal (qwen-oauth) \u2014 both in
  PROVIDER_REGISTRY but undocumented everywhere. Added sections to
  integrations/providers.md, rows to quickstart.md and fallback-providers.md.
- integrations/providers.md "Fallback Model" provider list now includes
  gemini, google-gemini-cli, qwen-oauth, xai, nvidia, ollama-cloud, bedrock.
- reference/cli-commands.md `--provider` enum and HERMES_INFERENCE_PROVIDER
  enum in env-vars now include the same set.
- reference/slash-commands.md: added `/agents` (alias `/tasks`) and `/copy`.
  Removed duplicate rows for `/snapshot`, `/fast` (\u00d72), `/debug`.
- reference/tools-reference.md: fixed "47 built-in tools" \u2192 52. Added
  `feishu_doc` and `feishu_drive` toolset sections.
- reference/toolsets-reference.md: added `feishu_doc` / `feishu_drive` core
  rows + all missing `hermes-<platform>` toolsets in the platform table
  (bluebubbles, dingtalk, feishu, qqbot, wecom, wecom-callback, weixin,
  homeassistant, webhook, gateway). Fixed the `debugging` composite to
  describe the actual `includes=[...]` mechanism.
- reference/optional-skills-catalog.md: added `fitness-nutrition`.
- reference/environment-variables.md: added NOUS_BASE_URL,
  NOUS_INFERENCE_BASE_URL, NVIDIA_API_KEY/BASE_URL, OLLAMA_API_KEY/BASE_URL,
  XAI_API_KEY/BASE_URL, MISTRAL_API_KEY, AWS_REGION/AWS_PROFILE,
  BEDROCK_BASE_URL, HERMES_QWEN_BASE_URL, DISCORD_ALLOWED_CHANNELS,
  DISCORD_PROXY, TELEGRAM_REPLY_TO_MODE, MATRIX_DEVICE_ID, MATRIX_REACTIONS,
  QQBOT_HOME_CHANNEL_NAME, QQ_SANDBOX.
- messaging/discord.md: documented DISCORD_ALLOWED_CHANNELS, DISCORD_PROXY,
  HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS and HERMES_DISCORD_TEXT_BATCH_SPLIT
  _DELAY_SECONDS (all actively read by the adapter).
- messaging/matrix.md: documented MATRIX_REACTIONS (default true).
- messaging/telegram.md: removed the redundant second Webhook Mode section
  that invented a `telegram.webhook_mode: true` yaml key the adapter does
  not read.
- user-guide/features/hooks.md: added `on_session_finalize` and
  `on_session_reset` (both emitted via invoke_hook but undocumented).
- user-guide/features/api-server.md: documented GET /health/detailed, the
  `/api/jobs/*` CRUD surface, POST /v1/runs, and GET /v1/runs/{id}/events
  (10 routes that were live but undocumented).
- user-guide/features/fallback-providers.md: added `approval` and
  `title_generation` auxiliary-task rows; added gemini, bedrock, qwen-oauth
  to the supported-providers table.
- user-guide/features/tts.md: "seven providers" \u2192 "eight" (post-xAI add
  oversight in #11942).
- user-guide/configuration.md: TTS provider enum gains `xai` and `gemini`;
  yaml example block gains `mistral:`, `gemini:`, `xai:` subsections.
  Auxiliary-provider enum now enumerates all real registry entries.
- reference/faq.md: stale AIAgent/config examples bumped from
  `nous/hermes-3-llama-3.1-70b` and `claude-sonnet-4.6` to
  `claude-opus-4.7`.

### Docs-site integrity

- guides/build-a-hermes-plugin.md referenced two nonexistent hooks
  (`pre_api_request`, `post_api_request`). Replaced with the real
  `on_session_finalize` / `on_session_reset` entries.
- messaging/open-webui.md and features/api-server.md had pre-existing
  broken links to `/docs/user-guide/features/profiles` (actual path is
  `/docs/user-guide/profiles`). Fixed.
- reference/skills-catalog.md had one `<1%` literal that MDX parsed as a
  JSX tag. Escaped to `&lt;1%`.

### False positives filtered out (not changed, verified correct)

- `/set-home` is a registered alias of `/sethome` \u2014 docs were fine.
- `hermes setup gateway` is valid syntax (`hermes setup \<section\>`);
  changed in qqbot.md for cross-doc consistency, not as a bug fix.
- Telegram reactions "disabled by default" matches code (default `"false"`).
- Matrix encryption "opt-in" matches code (empty env default \u2192 disabled).
- `pre_api_request` / `post_api_request` hooks do NOT exist in current code;
  documented instead the real `on_session_finalize` / `on_session_reset`.
- SIGNAL_IGNORE_STORIES is already in env-vars.md (subagent missed it).

Validation:
- `docusaurus build` \u2014 passes (only pre-existing nix-setup anchor warning).
- `ascii-guard lint docs` \u2014 124 files, 0 errors.
- 22 files changed, +317 / \u2212158.
---
 website/docs/getting-started/quickstart.md    |   3 +
 website/docs/guides/build-a-hermes-plugin.md  |   4 +-
 website/docs/integrations/providers.md        |  60 ++++++-
 website/docs/reference/cli-commands.md        |   5 +-
 .../docs/reference/environment-variables.md   |  22 ++-
 website/docs/reference/faq.md                 |   6 +-
 .../docs/reference/optional-skills-catalog.md |   1 +
 website/docs/reference/skills-catalog.md      | 149 ++++++++----------
 website/docs/reference/slash-commands.md      |   8 +-
 website/docs/reference/tools-reference.md     |  23 ++-
 website/docs/reference/toolsets-reference.md  |  29 ++--
 website/docs/user-guide/configuration.md      |  16 +-
 .../docs/user-guide/features/api-server.md    |  56 ++++++-
 .../user-guide/features/fallback-providers.md |   7 +
 website/docs/user-guide/features/hooks.md     |  48 ++++++
 website/docs/user-guide/features/tts.md       |   2 +-
 website/docs/user-guide/messaging/discord.md  |   4 +
 website/docs/user-guide/messaging/matrix.md   |   5 +
 .../docs/user-guide/messaging/open-webui.md   |   2 +-
 website/docs/user-guide/messaging/qqbot.md    |   6 +-
 website/docs/user-guide/messaging/slack.md    |   2 +-
 website/docs/user-guide/messaging/telegram.md |  34 ----
 22 files changed, 326 insertions(+), 166 deletions(-)

diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index 77d6ac84904..8a39c49f1e8 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -53,6 +53,9 @@ hermes setup       # Or configure everything at once
 | **Kimi / Moonshot** | Moonshot-hosted coding and chat models | Set `KIMI_API_KEY` |
 | **Kimi / Moonshot China** | China-region Moonshot endpoint | Set `KIMI_CN_API_KEY` |
 | **Arcee AI** | Trinity models | Set `ARCEEAI_API_KEY` |
+| **Xiaomi MiMo** | Xiaomi MiMo models via [platform.xiaomimimo.com](https://platform.xiaomimimo.com) | Set `XIAOMI_API_KEY` |
+| **AWS Bedrock** | Anthropic Claude, Amazon Nova, DeepSeek v3.2, and Meta Llama via AWS | Standard boto3 auth (`AWS_PROFILE` or `AWS_ACCESS_KEY_ID` + `AWS_REGION`) |
+| **Qwen Portal (OAuth)** | Qwen 3.5 / Qwen-Coder models via Alibaba's consumer Qwen Portal | OAuth via `hermes model` (optional: `HERMES_QWEN_BASE_URL`) |
 | **MiniMax** | International MiniMax endpoint | Set `MINIMAX_API_KEY` |
 | **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` |
 | **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` |
diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md
index e8611197a17..4e2ee5cf267 100644
--- a/website/docs/guides/build-a-hermes-plugin.md
+++ b/website/docs/guides/build-a-hermes-plugin.md
@@ -419,8 +419,8 @@ Each hook is documented in full on the **[Event Hooks reference](/docs/user-guid
 | [`post_llm_call`](/docs/user-guide/features/hooks#post_llm_call) | Once per turn, after the tool-calling loop (successful turns only) | `session_id: str, user_message: str, assistant_response: str, conversation_history: list, model: str, platform: str` | ignored |
 | [`on_session_start`](/docs/user-guide/features/hooks#on_session_start) | New session created (first turn only) | `session_id: str, model: str, platform: str` | ignored |
 | [`on_session_end`](/docs/user-guide/features/hooks#on_session_end) | End of every `run_conversation` call + CLI exit | `session_id: str, completed: bool, interrupted: bool, model: str, platform: str` | ignored |
-| [`pre_api_request`](/docs/user-guide/features/hooks#pre_api_request) | Before each HTTP request to the LLM provider | `method: str, url: str, headers: dict, body: dict` | ignored |
-| [`post_api_request`](/docs/user-guide/features/hooks#post_api_request) | After each HTTP response from the LLM provider | `method: str, url: str, status_code: int, response: dict` | ignored |
+| [`on_session_finalize`](/docs/user-guide/features/hooks#on_session_finalize) | CLI/gateway tears down an active session | `session_id: str \| None, platform: str` | ignored |
+| [`on_session_reset`](/docs/user-guide/features/hooks#on_session_reset) | Gateway swaps in a new session key (`/new`, `/reset`) | `session_id: str, platform: str` | ignored |
 
 Most hooks are fire-and-forget observers — their return values are ignored. The exception is `pre_llm_call`, which can inject context into the conversation.
 
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index 56d2f0ea38d..4f536ec7496 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -323,6 +323,64 @@ The model catalog is fetched dynamically from `ollama.com/v1/models` and cached
 Both speak the same OpenAI-compatible API. Cloud is a first-class provider (`--provider ollama-cloud`, `OLLAMA_API_KEY`); local Ollama is reached via the Custom Endpoint flow (base URL `http://localhost:11434/v1`, no key). Use cloud for large models you can't run locally; use local for privacy or offline work.
 :::
 
+### AWS Bedrock
+
+Anthropic Claude, Amazon Nova, DeepSeek v3.2, Meta Llama 4, and other models via AWS Bedrock. Uses the AWS SDK (`boto3`) credential chain — no API key, just standard AWS auth.
+
+```bash
+# Simplest — named profile in ~/.aws/credentials
+hermes chat --provider bedrock --model us.anthropic.claude-sonnet-4-6
+
+# Or with explicit env vars
+AWS_PROFILE=myprofile AWS_REGION=us-east-1 hermes chat --provider bedrock --model us.anthropic.claude-sonnet-4-6
+```
+
+Or permanently in `config.yaml`:
+```yaml
+model:
+  provider: "bedrock"
+  default: "us.anthropic.claude-sonnet-4-6"
+bedrock:
+  region: "us-east-1"          # or set AWS_REGION
+  # profile: "myprofile"       # or set AWS_PROFILE
+  # discovery: true            # auto-discover region from IAM
+  # guardrail:                 # optional Bedrock Guardrails
+  #   id: "your-guardrail-id"
+  #   version: "DRAFT"
+```
+
+Authentication uses the standard boto3 chain: explicit `AWS_ACCESS_KEY_ID`/`AWS_SECRET_ACCESS_KEY`, `AWS_PROFILE` from `~/.aws/credentials`, IAM role on EC2/ECS/Lambda, IMDS, or SSO. No env var is required if you're already authenticated with the AWS CLI.
+
+Bedrock uses the **Converse API** under the hood — requests are translated to Bedrock's model-agnostic shape, so the same config works for Claude, Nova, DeepSeek, and Llama models. Set `BEDROCK_BASE_URL` only if you're calling a non-default regional endpoint.
+
+See the [AWS Bedrock guide](/docs/guides/aws-bedrock) for a walkthrough of IAM setup, region selection, and cross-region inference.
+
+### Qwen Portal (OAuth)
+
+Alibaba's Qwen Portal with browser-based OAuth login. Pick **Qwen OAuth (Portal)** in `hermes model`, sign in through the browser, and Hermes persists the refresh token.
+
+```bash
+hermes model
+# → pick "Qwen OAuth (Portal)"
+# → browser opens; sign in with your Alibaba account
+# → confirm — credentials are saved to ~/.hermes/auth.json
+
+hermes chat   # uses portal.qwen.ai/v1 endpoint
+```
+
+Or configure `config.yaml`:
+```yaml
+model:
+  provider: "qwen-oauth"
+  default: "qwen3-coder-plus"
+```
+
+Set `HERMES_QWEN_BASE_URL` only if the portal endpoint relocates (default: `https://portal.qwen.ai/v1`).
+
+:::tip Qwen OAuth vs DashScope (Alibaba)
+`qwen-oauth` uses the consumer-facing Qwen Portal with OAuth login — ideal for individual users. The `alibaba` provider uses DashScope's enterprise API with a `DASHSCOPE_API_KEY` — ideal for programmatic / production workloads. Both route to Qwen-family models but live at different endpoints.
+:::
+
 ### NVIDIA NIM
 
 Nemotron and other open source models via [build.nvidia.com](https://build.nvidia.com) (free API key) or a local NIM endpoint.
@@ -1101,7 +1159,7 @@ fallback_model:
 
 When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session.
 
-Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `custom`.
+Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `custom`.
 
 :::tip
 Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers).
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index ea5557a193d..1fc4911158b 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -63,9 +63,6 @@ hermes [global-options] <command> [subcommand/options]
 | `hermes insights` | Show token/cost/activity analytics. |
 | `hermes claw` | OpenClaw migration helpers. |
 | `hermes dashboard` | Launch the web dashboard for managing config, API keys, and sessions. |
-| `hermes debug` | Debug tools — upload logs and system info for support. |
-| `hermes backup` | Back up Hermes home directory to a zip file. |
-| `hermes import` | Restore a Hermes backup from a zip file. |
 | `hermes profile` | Manage profiles — multiple isolated Hermes instances. |
 | `hermes completion` | Print shell completion scripts (bash/zsh). |
 | `hermes version` | Show version information. |
@@ -85,7 +82,7 @@ Common options:
 | `-q`, `--query "..."` | One-shot, non-interactive prompt. |
 | `-m`, `--model <model>` | Override the model for this run. |
 | `-t`, `--toolsets <csv>` | Enable a comma-separated set of toolsets. |
-| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`). |
+| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`. |
 | `-s`, `--skills <name>` | Preload one or more skills for the session (can be repeated or comma-separated). |
 | `-v`, `--verbose` | Verbose output. |
 | `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. |
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index ff223739af3..640e7be999b 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -14,6 +14,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 |----------|-------------|
 | `OPENROUTER_API_KEY` | OpenRouter API key (recommended for flexibility) |
 | `OPENROUTER_BASE_URL` | Override the OpenRouter-compatible base URL |
+| `NOUS_BASE_URL` | Override Nous Portal base URL (rarely needed; development/testing only) |
+| `NOUS_INFERENCE_BASE_URL` | Override Nous inference endpoint directly |
 | `AI_GATEWAY_API_KEY` | Vercel AI Gateway API key ([ai-gateway.vercel.sh](https://ai-gateway.vercel.sh)) |
 | `AI_GATEWAY_BASE_URL` | Override AI Gateway base URL (default: `https://ai-gateway.vercel.sh/v1`) |
 | `OPENAI_API_KEY` | API key for custom OpenAI-compatible endpoints (used with `OPENAI_BASE_URL`) |
@@ -35,9 +37,9 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 | `ARCEEAI_API_KEY` | Arcee AI API key ([chat.arcee.ai](https://chat.arcee.ai/)) |
 | `ARCEE_BASE_URL` | Override Arcee base URL (default: `https://api.arcee.ai/api/v1`) |
 | `MINIMAX_API_KEY` | MiniMax API key — global endpoint ([minimax.io](https://www.minimax.io)) |
-| `MINIMAX_BASE_URL` | Override MiniMax base URL (default: `https://api.minimax.io/v1`) |
+| `MINIMAX_BASE_URL` | Override MiniMax base URL (default: `https://api.minimax.io/anthropic` — Hermes uses MiniMax's Anthropic Messages-compatible endpoint) |
 | `MINIMAX_CN_API_KEY` | MiniMax API key — China endpoint ([minimaxi.com](https://www.minimaxi.com)) |
-| `MINIMAX_CN_BASE_URL` | Override MiniMax China base URL (default: `https://api.minimaxi.com/v1`) |
+| `MINIMAX_CN_BASE_URL` | Override MiniMax China base URL (default: `https://api.minimaxi.com/anthropic`) |
 | `KILOCODE_API_KEY` | Kilo Code API key ([kilo.ai](https://kilo.ai)) |
 | `KILOCODE_BASE_URL` | Override Kilo Code base URL (default: `https://api.kilo.ai/api/gateway`) |
 | `XIAOMI_API_KEY` | Xiaomi MiMo API key ([platform.xiaomimimo.com](https://platform.xiaomimimo.com)) |
@@ -53,7 +55,7 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 | `ANTHROPIC_API_KEY` | Anthropic Console API key ([console.anthropic.com](https://console.anthropic.com/)) |
 | `ANTHROPIC_TOKEN` | Manual or legacy Anthropic OAuth/setup-token override |
 | `DASHSCOPE_API_KEY` | Alibaba Cloud DashScope API key for Qwen models ([modelstudio.console.alibabacloud.com](https://modelstudio.console.alibabacloud.com/)) |
-| `DASHSCOPE_BASE_URL` | Custom DashScope base URL (default: `https://coding-intl.dashscope.aliyuncs.com/v1`) |
+| `DASHSCOPE_BASE_URL` | Custom DashScope base URL (default: `https://dashscope-intl.aliyuncs.com/compatible-mode/v1`; use `https://dashscope.aliyuncs.com/compatible-mode/v1` for mainland-China region) |
 | `DEEPSEEK_API_KEY` | DeepSeek API key for direct DeepSeek access ([platform.deepseek.com](https://platform.deepseek.com/api_keys)) |
 | `DEEPSEEK_BASE_URL` | Custom DeepSeek API base URL |
 | `NVIDIA_API_KEY` | NVIDIA NIM API key — Nemotron and open models ([build.nvidia.com](https://build.nvidia.com)) |
@@ -62,6 +64,11 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 | `OLLAMA_BASE_URL` | Override Ollama Cloud base URL (default: `https://ollama.com/v1`) |
 | `XAI_API_KEY` | xAI (Grok) API key for chat + TTS ([console.x.ai](https://console.x.ai/)) |
 | `XAI_BASE_URL` | Override xAI base URL (default: `https://api.x.ai/v1`) |
+| `MISTRAL_API_KEY` | Mistral API key for Voxtral TTS and Voxtral STT ([console.mistral.ai](https://console.mistral.ai)) |
+| `AWS_REGION` | AWS region for Bedrock inference (e.g. `us-east-1`, `eu-central-1`). Read by boto3. |
+| `AWS_PROFILE` | AWS named profile for Bedrock authentication (reads `~/.aws/credentials`). Leave unset to use default boto3 credential chain. |
+| `BEDROCK_BASE_URL` | Override Bedrock runtime base URL (default: `https://bedrock-runtime.us-east-1.amazonaws.com`; usually leave unset and use `AWS_REGION` instead) |
+| `HERMES_QWEN_BASE_URL` | Qwen Portal base URL override (default: `https://portal.qwen.ai/v1`) |
 | `OPENCODE_ZEN_API_KEY` | OpenCode Zen API key — pay-as-you-go access to curated models ([opencode.ai](https://opencode.ai/auth)) |
 | `OPENCODE_ZEN_BASE_URL` | Override OpenCode Zen base URL |
 | `OPENCODE_GO_API_KEY` | OpenCode Go API key — $10/month subscription for open models ([opencode.ai](https://opencode.ai/auth)) |
@@ -79,7 +86,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
 
 | Variable | Description |
 |----------|-------------|
-| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
+| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
 | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
 | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
 | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
@@ -189,11 +196,14 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `TELEGRAM_WEBHOOK_PORT` | Local listen port for webhook server (default: `8443`) |
 | `TELEGRAM_WEBHOOK_SECRET` | Secret token for verifying updates come from Telegram |
 | `TELEGRAM_REACTIONS` | Enable emoji reactions on messages during processing (default: `false`) |
+| `TELEGRAM_REPLY_TO_MODE` | Reply-reference behavior: `off`, `first` (default), or `all`. Matches the Discord pattern. |
 | `TELEGRAM_IGNORED_THREADS` | Comma-separated Telegram forum topic/thread IDs where the bot never responds |
 | `TELEGRAM_PROXY` | Proxy URL for Telegram connections — overrides `HTTPS_PROXY`. Supports `http://`, `https://`, `socks5://` |
 | `DISCORD_BOT_TOKEN` | Discord bot token |
 | `DISCORD_ALLOWED_USERS` | Comma-separated Discord user IDs allowed to use the bot |
 | `DISCORD_ALLOWED_ROLES` | Comma-separated Discord role IDs allowed to use the bot (OR with `DISCORD_ALLOWED_USERS`). Auto-enables the Members intent. Useful when moderation teams churn — role grants propagate automatically. |
+| `DISCORD_ALLOWED_CHANNELS` | Comma-separated Discord channel IDs. When set, the bot only responds in these channels (plus DMs if allowed). Overrides `config.yaml` `discord.allowed_channels`. |
+| `DISCORD_PROXY` | Proxy URL for Discord connections — overrides `HTTPS_PROXY`. Supports `http://`, `https://`, `socks5://` |
 | `DISCORD_HOME_CHANNEL` | Default Discord channel for cron delivery |
 | `DISCORD_HOME_CHANNEL_NAME` | Display name for the Discord home channel |
 | `DISCORD_REQUIRE_MENTION` | Require an @mention before responding in server channels |
@@ -298,6 +308,8 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `QQ_GROUP_ALLOWED_USERS` | Comma-separated QQ group IDs for group @-message access |
 | `QQ_ALLOW_ALL_USERS` | Allow all users (`true`/`false`, overrides `QQ_ALLOWED_USERS`) |
 | `QQBOT_HOME_CHANNEL` | QQ user/group openID for cron delivery and notifications |
+| `QQBOT_HOME_CHANNEL_NAME` | Display name for the QQ home channel |
+| `QQ_SANDBOX` | Route QQ Bot to the sandbox gateway for development testing (`true`/`false`). Use with a sandbox app credential from [q.qq.com](https://q.qq.com). |
 | `MATTERMOST_URL` | Mattermost server URL (e.g. `https://mm.example.com`) |
 | `MATTERMOST_TOKEN` | Bot token or personal access token for Mattermost |
 | `MATTERMOST_ALLOWED_USERS` | Comma-separated Mattermost user IDs allowed to message the bot |
@@ -312,6 +324,8 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `MATRIX_ALLOWED_USERS` | Comma-separated Matrix user IDs allowed to message the bot (e.g. `@alice:matrix.org`) |
 | `MATRIX_HOME_ROOM` | Room ID for proactive message delivery (e.g. `!abc123:matrix.org`) |
 | `MATRIX_ENCRYPTION` | Enable end-to-end encryption (`true`/`false`, default: `false`) |
+| `MATRIX_DEVICE_ID` | Stable Matrix device ID for E2EE persistence across restarts (e.g. `HERMES_BOT`). Without this, E2EE keys rotate every startup and historic-room decrypt breaks. |
+| `MATRIX_REACTIONS` | Enable processing-lifecycle emoji reactions on inbound messages (default: `true`). Set to `false` to disable. |
 | `MATRIX_REQUIRE_MENTION` | Require `@mention` in rooms (default: `true`). Set to `false` to respond to all messages. |
 | `MATRIX_FREE_RESPONSE_ROOMS` | Comma-separated room IDs where bot responds without `@mention` |
 | `MATRIX_AUTO_THREAD` | Auto-create threads for room messages (default: `true`) |
diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md
index c39f510b1ff..132a4d00a9e 100644
--- a/website/docs/reference/faq.md
+++ b/website/docs/reference/faq.md
@@ -110,7 +110,7 @@ Yes. Import the `AIAgent` class and use Hermes programmatically:
 ```python
 from run_agent import AIAgent
 
-agent = AIAgent(model="openrouter/nous/hermes-3-llama-3.1-70b")
+agent = AIAgent(model="anthropic/claude-opus-4.7")
 response = agent.chat("Explain quantum computing briefly")
 ```
 
@@ -243,7 +243,7 @@ Make sure the key matches the provider. An OpenAI key won't work with OpenRouter
 hermes model
 
 # Set a valid model
-hermes config set HERMES_MODEL openrouter/nous/hermes-3-llama-3.1-70b
+hermes config set HERMES_MODEL anthropic/claude-opus-4.7
 
 # Or specify per-session
 hermes chat --model openrouter/meta-llama/llama-3.1-70b-instruct
@@ -781,7 +781,7 @@ hermes config show | head -20
 hermes model
 
 # Or test with a known-good model
-hermes chat -q "hello" --model anthropic/claude-sonnet-4.6
+hermes chat -q "hello" --model anthropic/claude-opus-4.7
 ```
 
 If using OpenRouter, make sure your API key has credits. A 400 from OpenRouter often means the model requires a paid plan or the model ID has a typo.
diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index bbb2c3b80ea..1501567b791 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -74,6 +74,7 @@ hermes skills uninstall <skill-name>
 
 | Skill | Description |
 |-------|-------------|
+| **fitness-nutrition** | Gym workout planner and nutrition tracker. Search 690+ exercises by muscle, equipment, or category via wger. Look up macros and calories for 380,000+ foods via USDA FoodData Central. Computes BMI, TDEE, one-rep max, macro splits, and body fat — pure Python, no pip installs. |
 | **neuroskill-bci** | Brain-Computer Interface (BCI) integration for neuroscience research workflows. |
 
 ## MCP
diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md
index ead50dbea67..e5283ba0154 100644
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@@ -27,27 +27,32 @@ Skills for spawning and orchestrating autonomous AI coding agents and multi-agen
 |-------|-------------|------|
 | `claude-code` | Delegate coding tasks to Claude Code (Anthropic's CLI agent). Use for building features, refactoring, PR reviews, and iterative coding. Requires the claude CLI installed. | `autonomous-ai-agents/claude-code` |
 | `codex` | Delegate coding tasks to OpenAI Codex CLI agent. Use for building features, refactoring, PR reviews, and batch issue fixing. Requires the codex CLI and a git repository. | `autonomous-ai-agents/codex` |
-| `hermes-agent-spawning` | Spawn additional Hermes Agent instances as autonomous subprocesses for independent long-running tasks. Supports non-interactive one-shot mode (-q) and interactive PTY mode for multi-turn collaboration. Different from delegate_task — this runs a full separate hermes process. | `autonomous-ai-agents/hermes-agent` |
+| `hermes-agent` | Complete guide to using and extending Hermes Agent — CLI usage, setup, configuration, spawning additional agents, gateway platforms, skills, voice, tools, profiles, and a concise contributor reference. Load this skill when helping users configure Hermes, troubleshoot issues, s… | `autonomous-ai-agents/hermes-agent` |
 | `opencode` | Delegate coding tasks to OpenCode CLI agent for feature implementation, refactoring, PR review, and long-running autonomous sessions. Requires the opencode CLI installed and authenticated. | `autonomous-ai-agents/opencode` |
 
+## creative
+
+Creative content generation — ASCII art, hand-drawn diagrams, animations, music, and visual design tools.
+
+| Skill | Description | Path |
+|-------|-------------|------|
+| `architecture-diagram` | Generate dark-themed SVG diagrams of software systems and cloud infrastructure as standalone HTML files with inline SVG graphics. Semantic component colors (cyan=frontend, emerald=backend, violet=database, amber=cloud/AWS, rose=security, orange=message bus), JetBrains Mono fon… | `creative/architecture-diagram` |
+| `ascii-art` | Generate ASCII art using pyfiglet (571 fonts), cowsay, boxes, toilet, image-to-ascii, remote APIs (asciified, ascii.co.uk), and LLM fallback. No API keys required. | `creative/ascii-art` |
+| `ascii-video` | Production pipeline for ASCII art video — any format. Converts video/audio/images/generative input into colored ASCII character video output (MP4, GIF, image sequence). Covers: video-to-ASCII conversion, audio-reactive music visualizers, generative ASCII art animations, hybrid… | `creative/ascii-video` |
+| `excalidraw` | Create hand-drawn style diagrams using Excalidraw JSON format. Generate .excalidraw files for architecture diagrams, flowcharts, sequence diagrams, concept maps, and more. Files can be opened at excalidraw.com or uploaded for shareable links. | `creative/excalidraw` |
+| `ideation` | Generate project ideas through creative constraints. Use when the user says 'I want to build something', 'give me a project idea', 'I'm bored', 'what should I make', 'inspire me', or any variant of 'I have tools but no direction'. Works for code, art, hardware, writing, tools,… | `creative/creative-ideation` |
+| `manim-video` | Production pipeline for mathematical and technical animations using Manim Community Edition. Creates 3Blue1Brown-style explainer videos, algorithm visualizations, equation derivations, architecture diagrams, and data stories. Use when users request: animated explanations, math… | `creative/manim-video` |
+| `p5js` | Production pipeline for interactive and generative visual art using p5.js. Creates browser-based sketches, generative art, data visualizations, interactive experiences, 3D scenes, audio-reactive visuals, and motion graphics — exported as HTML, PNG, GIF, MP4, or SVG. Covers: 2D… | `creative/p5js` |
+| `popular-web-designs` | 54 production-quality design systems extracted from real websites. Load a template to generate HTML/CSS that matches the visual identity of sites like Stripe, Linear, Vercel, Notion, Airbnb, and more. Each template includes colors, typography, components, layout rules, and rea… | `creative/popular-web-designs` |
+| `songwriting-and-ai-music` | Songwriting craft, AI music generation prompts (Suno focus), parody/adaptation techniques, phonetic tricks, and lessons learned. These are tools and ideas, not rules. Break any of them when the art calls for it. | `creative/songwriting-and-ai-music` |
+
 ## data-science
 
 Skills for data science workflows — interactive exploration, Jupyter notebooks, data analysis, and visualization.
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `jupyter-live-kernel` | Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb. Load this skill when the task involves exploration, iteration, or inspecting intermediate results. | `data-science/jupyter-live-kernel` |
-
-## creative
-
-Creative content generation — ASCII art, hand-drawn style diagrams, and visual design tools.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `ascii-art` | Generate ASCII art using pyfiglet (571 fonts), cowsay, boxes, toilet, image-to-ascii, remote APIs (asciified, ascii.co.uk), and LLM fallback. No API keys required. | `creative/ascii-art` |
-| `ascii-video` | "Production pipeline for ASCII art video — any format. Converts video/audio/images/generative input into colored ASCII character video output (MP4, GIF, image sequence). Covers: video-to-ASCII conversion, audio-reactive music visualizers, generative ASCII art animations, hybrid… | `creative/ascii-video` |
-| `excalidraw` | Create hand-drawn style diagrams using Excalidraw JSON format. Generate .excalidraw files for architecture diagrams, flowcharts, sequence diagrams, concept maps, and more. Files can be opened at excalidraw.com or uploaded for shareable links. | `creative/excalidraw` |
-| `p5js` | Production pipeline for interactive and generative visual art using p5.js. Create sketches, render them to images/video via headless browser, and serve live previews. Supports canvas animations, data visualizations, and creative coding experiments. | `creative/p5js` |
+| `jupyter-live-kernel` | Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb. Load this skill when the task involves exploration, iteration, or inspecting intermediate results — data science, ML experimentation, API exploration, or building up complex code step-by-step. Uses… | `data-science/jupyter-live-kernel` |
 
 ## devops
 
@@ -55,14 +60,15 @@ DevOps and infrastructure automation skills.
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `webhook-subscriptions` | Create and manage webhook subscriptions for event-driven agent activation. External services (GitHub, Stripe, CI/CD, IoT) POST events to trigger agent runs. Requires webhook platform to be enabled. | `devops/webhook-subscriptions` |
+| `webhook-subscriptions` | Create and manage webhook subscriptions for event-driven agent activation. Use when the user wants external services to trigger agent runs automatically. | `devops/webhook-subscriptions` |
 
 ## dogfood
 
+Internal dogfooding and QA skills used to test Hermes Agent itself.
+
 | Skill | Description | Path |
 |-------|-------------|------|
-| `dogfood` | Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports. | `dogfood/dogfood` |
-| `hermes-agent-setup` | Help users configure Hermes Agent — CLI usage, setup wizard, model/provider selection, tools, skills, voice/STT/TTS, gateway, and troubleshooting. | `dogfood/hermes-agent-setup` |
+| `dogfood` | Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports | `dogfood` |
 
 ## email
 
@@ -83,7 +89,7 @@ Skills for setting up, configuring, and managing game servers, modpacks, and gam
 
 ## github
 
-GitHub workflow skills for managing repositories, pull requests, code reviews, issues, and CI/CD pipelines using the gh CLI and git via terminal.
+GitHub workflow skills for managing repositories, pull requests, code reviews, issues, and CI/CD pipelines.
 
 | Skill | Description | Path |
 |-------|-------------|------|
@@ -94,23 +100,17 @@ GitHub workflow skills for managing repositories, pull requests, code reviews, i
 | `github-pr-workflow` | Full pull request lifecycle — create branches, commit changes, open PRs, monitor CI status, auto-fix failures, and merge. Works with gh CLI or falls back to git + GitHub REST API via curl. | `github/github-pr-workflow` |
 | `github-repo-management` | Clone, create, fork, configure, and manage GitHub repositories. Manage remotes, secrets, releases, and workflows. Works with gh CLI or falls back to git + GitHub REST API via curl. | `github/github-repo-management` |
 
-## inference-sh
-
-Skills for AI app execution via inference.sh cloud platform.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `inference-sh-cli` | Run 150+ AI apps via inference.sh CLI (infsh) — image generation, video creation, LLMs, search, 3D, social automation. | `inference-sh/cli` |
-
 ## leisure
 
+Skills for discovery and everyday tasks.
+
 | Skill | Description | Path |
 |-------|-------------|------|
 | `find-nearby` | Find nearby places (restaurants, cafes, bars, pharmacies, etc.) using OpenStreetMap. Works with coordinates, addresses, cities, zip codes, or Telegram location pins. No API keys needed. | `leisure/find-nearby` |
 
 ## mcp
 
-Skills for working with MCP (Model Context Protocol) servers, tools, and integrations. Includes the built-in native MCP client (configure servers in config.yaml for automatic tool discovery) and the mcporter CLI bridge for ad-hoc server interaction.
+Skills for working with MCP (Model Context Protocol) servers, tools, and integrations.
 
 | Skill | Description | Path |
 |-------|-------------|------|
@@ -126,7 +126,7 @@ Skills for working with media content — YouTube transcripts, GIF search, music
 | `gif-search` | Search and download GIFs from Tenor using curl. No dependencies beyond curl and jq. Useful for finding reaction GIFs, creating visual content, and sending GIFs in chat. | `media/gif-search` |
 | `heartmula` | Set up and run HeartMuLa, the open-source music generation model family (Suno-like). Generates full songs from lyrics + tags with multilingual support. | `media/heartmula` |
 | `songsee` | Generate spectrograms and audio feature visualizations (mel, chroma, MFCC, tempogram, etc.) from audio files via CLI. Useful for audio analysis, music production debugging, and visual documentation. | `media/songsee` |
-| `youtube-content` | Fetch YouTube video transcripts and transform them into structured content (chapters, summaries, threads, blog posts). | `media/youtube-content` |
+| `youtube-content` | Fetch YouTube video transcripts and transform them into structured content (chapters, summaries, threads, blog posts). Use when the user shares a YouTube URL or video link, asks to summarize a video, requests a transcript, or wants to extract and reformat content from any YouT… | `media/youtube-content` |
 
 ## mlops
 
@@ -134,7 +134,7 @@ General-purpose ML operations tools — model hub management, dataset operations
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `huggingface-hub` | Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, deploy inference endpoints. | `mlops/huggingface-hub` |
+| `huggingface-hub` | Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Spaces and buckets. | `mlops/huggingface-hub` |
 
 ## mlops/cloud
 
@@ -142,19 +142,15 @@ GPU cloud providers and serverless compute platforms for ML workloads.
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `lambda-labs-gpu-cloud` | Reserved and on-demand GPU cloud instances for ML training and inference. Use when you need dedicated GPU instances with simple SSH access, persistent filesystems, or high-performance multi-node clusters for large-scale training. | `mlops/cloud/lambda-labs` |
 | `modal-serverless-gpu` | Serverless GPU cloud platform for running ML workloads. Use when you need on-demand GPU access without infrastructure management, deploying ML models as APIs, or running batch jobs with automatic scaling. | `mlops/cloud/modal` |
 
 ## mlops/evaluation
 
-Model evaluation benchmarks, experiment tracking, data curation, tokenizers, and interpretability tools.
+Model evaluation benchmarks, experiment tracking, and interpretability tools.
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `evaluating-llms-harness` | Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag). Use when benchmarking model quality, comparing models, reporting academic results, or tracking training progress. Industry standard used by EleutherAI, HuggingFace, and major labs. Sup… | `mlops/evaluation/lm-evaluation-harness` |
-| `huggingface-tokenizers` | Fast tokenizers optimized for research and production. Rust-based implementation tokenizes 1GB in &lt;20 seconds. Supports BPE, WordPiece, and Unigram algorithms. Train custom vocabularies, track alignments, handle padding/truncation. Integrates seamlessly with transformers. Use… | `mlops/evaluation/huggingface-tokenizers` |
-| `nemo-curator` | GPU-accelerated data curation for LLM training. Supports text/image/video/audio. Features fuzzy deduplication (16× faster), quality filtering (30+ heuristics), semantic deduplication, PII redaction, NSFW detection. Scales across GPUs with RAPIDS. Use for preparing high-quality t… | `mlops/evaluation/nemo-curator` |
-| `sparse-autoencoder-training` | Provides guidance for training and analyzing Sparse Autoencoders (SAEs) using SAELens to decompose neural network activations into interpretable features. Use when discovering interpretable features, analyzing superposition, or studying monosemantic representations in language m… | `mlops/evaluation/saelens` |
+| `evaluating-llms-harness` | Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag). Use when benchmarking model quality, comparing models, reporting academic results, or tracking training progress. Industry standard used by EleutherAI, HuggingFace, and major labs. S… | `mlops/evaluation/lm-evaluation-harness` |
 | `weights-and-biases` | Track ML experiments with automatic logging, visualize training in real-time, optimize hyperparameters with sweeps, and manage model registry with W&B - collaborative MLOps platform | `mlops/evaluation/weights-and-biases` |
 
 ## mlops/inference
@@ -163,25 +159,22 @@ Model serving, quantization (GGUF/GPTQ), structured output, inference optimizati
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `instructor` | Extract structured data from LLM responses with Pydantic validation, retry failed extractions automatically, parse complex JSON with type safety, and stream partial results with Instructor - battle-tested structured output library | `mlops/inference/instructor` |
-| `llama-cpp` | Run LLM inference with llama.cpp on CPU, Apple Silicon, AMD/Intel GPUs, or NVIDIA — plus GGUF model conversion and quantization (2–8 bit with K-quants and imatrix). Covers CLI, Python bindings, OpenAI-compatible server, and Ollama/LM Studio integration. Use for edge deployment, M1/M2/M3/M4 Macs, CUDA-less environments, or flexible local quantization. | `mlops/inference/llama-cpp` |
-| `obliteratus` | Remove refusal behaviors from open-weight LLMs using OBLITERATUS — mechanistic interpretability techniques (diff-in-means, SVD, whitened SVD, LEACE, SAE decomposition, etc.) to excise guardrails while preserving reasoning. 9 CLI methods, 28 analysis modules, 116 model presets ac… | `mlops/inference/obliteratus` |
+| `llama-cpp` | Run LLM inference with llama.cpp on CPU, Apple Silicon, AMD/Intel GPUs, or NVIDIA — plus GGUF model conversion and quantization (2–8 bit with K-quants and imatrix). Covers CLI, Python bindings, OpenAI-compatible server, and Ollama/LM Studio integration. Use for edge deployment… | `mlops/inference/llama-cpp` |
+| `obliteratus` | Remove refusal behaviors from open-weight LLMs using OBLITERATUS — mechanistic interpretability techniques (diff-in-means, SVD, whitened SVD, LEACE, SAE decomposition, etc.) to excise guardrails while preserving reasoning. 9 CLI methods, 28 analysis modules, 116 model presets … | `mlops/inference/obliteratus` |
 | `outlines` | Guarantee valid JSON/XML/code structure during generation, use Pydantic models for type-safe outputs, support local models (Transformers, vLLM), and maximize inference speed with Outlines - dottxt.ai's structured generation library | `mlops/inference/outlines` |
-| `serving-llms-vllm` | Serves LLMs with high throughput using vLLM's PagedAttention and continuous batching. Use when deploying production LLM APIs, optimizing inference latency/throughput, or serving models with limited GPU memory. Supports OpenAI-compatible endpoints, quantization (GPTQ/AWQ/FP8), an… | `mlops/inference/vllm` |
-| `tensorrt-llm` | Optimizes LLM inference with NVIDIA TensorRT for maximum throughput and lowest latency. Use for production deployment on NVIDIA GPUs (A100/H100), when you need 10-100x faster inference than PyTorch, or for serving models with quantization (FP8/INT4), in-flight batching, and mult… | `mlops/inference/tensorrt-llm` |
+| `serving-llms-vllm` | Serves LLMs with high throughput using vLLM's PagedAttention and continuous batching. Use when deploying production LLM APIs, optimizing inference latency/throughput, or serving models with limited GPU memory. Supports OpenAI-compatible endpoints, quantization (GPTQ/AWQ/FP8), … | `mlops/inference/vllm` |
 
 ## mlops/models
 
-Specific model architectures and tools — computer vision (CLIP, SAM, Stable Diffusion), speech (Whisper), audio generation (AudioCraft), and multimodal models (LLaVA).
+Specific model architectures — computer vision (CLIP, SAM, Stable Diffusion), speech (Whisper), and audio generation (AudioCraft).
 
 | Skill | Description | Path |
 |-------|-------------|------|
 | `audiocraft-audio-generation` | PyTorch library for audio generation including text-to-music (MusicGen) and text-to-sound (AudioGen). Use when you need to generate music from text descriptions, create sound effects, or perform melody-conditioned music generation. | `mlops/models/audiocraft` |
-| `clip` | OpenAI's model connecting vision and language. Enables zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks without fine-tuning. Best for general-purpo… | `mlops/models/clip` |
-| `llava` | Large Language and Vision Assistant. Enables visual instruction tuning and image-based conversations. Combines CLIP vision encoder with Vicuna/LLaMA language models. Supports multi-turn image chat, visual question answering, and instruction following. Use for vision-language cha… | `mlops/models/llava` |
+| `clip` | OpenAI's model connecting vision and language. Enables zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks without fine-tuning. Best for general-pur… | `mlops/models/clip` |
 | `segment-anything-model` | Foundation model for image segmentation with zero-shot transfer. Use when you need to segment any object in images using points, boxes, or masks as prompts, or automatically generate all object masks in an image. | `mlops/models/segment-anything` |
 | `stable-diffusion-image-generation` | State-of-the-art text-to-image generation with Stable Diffusion models via HuggingFace Diffusers. Use when generating images from text prompts, performing image-to-image translation, inpainting, or building custom diffusion pipelines. | `mlops/models/stable-diffusion` |
-| `whisper` | OpenAI's general-purpose speech recognition model. Supports 99 languages, transcription, translation to English, and language identification. Six model sizes from tiny (39M params) to large (1550M params). Use for speech-to-text, podcast transcription, or multilingual audio proc… | `mlops/models/whisper` |
+| `whisper` | OpenAI's general-purpose speech recognition model. Supports 99 languages, transcription, translation to English, and language identification. Six model sizes from tiny (39M params) to large (1550M params). Use for speech-to-text, podcast transcription, or multilingual audio pr… | `mlops/models/whisper` |
 
 ## mlops/research
 
@@ -193,37 +186,19 @@ ML research frameworks for building and optimizing AI systems with declarative p
 
 ## mlops/training
 
-Fine-tuning, RLHF/DPO/GRPO training, distributed training frameworks, and optimization tools for training LLMs and other models.
+Fine-tuning, RLHF/DPO/GRPO training, distributed training frameworks, and optimization tools.
 
 | Skill | Description | Path |
 |-------|-------------|------|
 | `axolotl` | Expert guidance for fine-tuning LLMs with Axolotl - YAML configs, 100+ models, LoRA/QLoRA, DPO/KTO/ORPO/GRPO, multimodal support | `mlops/training/axolotl` |
-| `distributed-llm-pretraining-torchtitan` | Provides PyTorch-native distributed LLM pretraining using torchtitan with 4D parallelism (FSDP2, TP, PP, CP). Use when pretraining Llama 3.1, DeepSeek V3, or custom models at scale from 8 to 512+ GPUs with Float8, torch.compile, and distributed checkpointing. | `mlops/training/torchtitan` |
-| `fine-tuning-with-trl` | Fine-tune LLMs using reinforcement learning with TRL - SFT for instruction tuning, DPO for preference alignment, PPO/GRPO for reward optimization, and reward model training. Use when need RLHF, align model with preferences, or train from human feedback. Works with HuggingFace Tr… | `mlops/training/trl-fine-tuning` |
-| `hermes-atropos-environments` | Build, test, and debug Hermes Agent RL environments for Atropos training. Covers the HermesAgentBaseEnv interface, reward functions, agent loop integration, evaluation with tools, wandb logging, and the three CLI modes (serve/process/evaluate). Use when creating, reviewing, or f… | `mlops/training/hermes-atropos-environments` |
-| `huggingface-accelerate` | Simplest distributed training API. 4 lines to add distributed support to any PyTorch script. Unified API for DeepSpeed/FSDP/Megatron/DDP. Automatic device placement, mixed precision (FP16/BF16/FP8). Interactive config, single launch command. HuggingFace ecosystem standard. | `mlops/training/accelerate` |
-| `optimizing-attention-flash` | Optimizes transformer attention with Flash Attention for 2-4x speedup and 10-20x memory reduction. Use when training/running transformers with long sequences (&gt;512 tokens), encountering GPU memory issues with attention, or need faster inference. Supports PyTorch native SDPA,… | `mlops/training/flash-attention` |
-| `peft-fine-tuning` | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Use when fine-tuning large models (7B-70B) with limited GPU memory, when you need to train &lt;1% of parameters with minimal accuracy loss, or for multi-adapter serving. HuggingFace's official library i… | `mlops/training/peft` |
+| `fine-tuning-with-trl` | Fine-tune LLMs using reinforcement learning with TRL - SFT for instruction tuning, DPO for preference alignment, PPO/GRPO for reward optimization, and reward model training. Use when need RLHF, align model with preferences, or train from human feedback. Works with HuggingFace … | `mlops/training/trl-fine-tuning` |
+| `peft-fine-tuning` | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Use when fine-tuning large models (7B-70B) with limited GPU memory, when you need to train &lt;1% of parameters with minimal accuracy loss, or for multi-adapter serving. HuggingFace's official library… | `mlops/training/peft` |
 | `pytorch-fsdp` | Expert guidance for Fully Sharded Data Parallel training with PyTorch FSDP - parameter sharding, mixed precision, CPU offloading, FSDP2 | `mlops/training/pytorch-fsdp` |
-| `pytorch-lightning` | High-level PyTorch framework with Trainer class, automatic distributed training (DDP/FSDP/DeepSpeed), callbacks system, and minimal boilerplate. Scales from laptop to supercomputer with same code. Use when you want clean training loops with built-in best practices. | `mlops/training/pytorch-lightning` |
-| `simpo-training` | Simple Preference Optimization for LLM alignment. Reference-free alternative to DPO with better performance (+6.4 points on AlpacaEval 2.0). No reference model needed, more efficient than DPO. Use for preference alignment when want simpler, faster training than DPO/PPO. | `mlops/training/simpo` |
-| `slime-rl-training` | Provides guidance for LLM post-training with RL using slime, a Megatron+SGLang framework. Use when training GLM models, implementing custom data generation workflows, or needing tight Megatron-LM integration for RL scaling. | `mlops/training/slime` |
 | `unsloth` | Expert guidance for fast fine-tuning with Unsloth - 2-5x faster training, 50-80% less memory, LoRA/QLoRA optimization | `mlops/training/unsloth` |
 
-## mlops/vector-databases
-
-Vector similarity search and embedding databases for RAG, semantic search, and AI application backends.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `chroma` | Open-source embedding database for AI applications. Store embeddings and metadata, perform vector and full-text search, filter by metadata. Simple 4-function API. Scales from notebooks to production clusters. Use for semantic search, RAG applications, or document retrieval. Best… | `mlops/vector-databases/chroma` |
-| `faiss` | Facebook's library for efficient similarity search and clustering of dense vectors. Supports billions of vectors, GPU acceleration, and various index types (Flat, IVF, HNSW). Use for fast k-NN search, large-scale vector retrieval, or when you need pure similarity search without… | `mlops/vector-databases/faiss` |
-| `pinecone` | Managed vector database for production AI applications. Fully managed, auto-scaling, with hybrid search (dense + sparse), metadata filtering, and namespaces. Low latency (&lt;100ms p95). Use for production RAG, recommendation systems, or semantic search at scale. Best for server… | `mlops/vector-databases/pinecone` |
-| `qdrant-vector-search` | High-performance vector similarity search engine for RAG and semantic search. Use when building production RAG systems requiring fast nearest neighbor search, hybrid search with filtering, or scalable vector storage with Rust-powered performance. | `mlops/vector-databases/qdrant` |
-
 ## note-taking
 
-Note taking skills, to save information, assist with research, and collab on multi-session planning and information sharing.
+Note taking skills, to save information, assist with research, and collaborate on multi-session planning.
 
 | Skill | Description | Path |
 |-------|-------------|------|
@@ -235,26 +210,12 @@ Skills for document creation, presentations, spreadsheets, and other productivit
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `google-workspace` | Gmail, Calendar, Drive, Contacts, Sheets, and Docs integration via Python. Uses OAuth2 with automatic token refresh. No external binaries needed — runs entirely with Google's Python client libraries in the Hermes venv. | `productivity/google-workspace` |
-| `linear` | Manage Linear issues, projects, and teams via the GraphQL API. Create, update, search, and organize issues. | `productivity/linear` |
+| `google-workspace` | Gmail, Calendar, Drive, Contacts, Sheets, and Docs integration for Hermes. Uses Hermes-managed OAuth2 setup, prefers the Google Workspace CLI (`gws`) when available for broader API coverage, and falls back to the Python client libraries otherwise. | `productivity/google-workspace` |
+| `linear` | Manage Linear issues, projects, and teams via the GraphQL API. Create, update, search, and organize issues. Uses API key auth (no OAuth needed). All operations via curl — no dependencies. | `productivity/linear` |
 | `nano-pdf` | Edit PDFs with natural-language instructions using the nano-pdf CLI. Modify text, fix typos, update titles, and make content changes to specific pages without manual editing. | `productivity/nano-pdf` |
 | `notion` | Notion API for creating and managing pages, databases, and blocks via curl. Search, create, update, and query Notion workspaces directly from the terminal. | `productivity/notion` |
 | `ocr-and-documents` | Extract text from PDFs and scanned documents. Use web_extract for remote URLs, pymupdf for local text-based PDFs, marker-pdf for OCR/scanned docs. For DOCX use python-docx, for PPTX see the powerpoint skill. | `productivity/ocr-and-documents` |
-| `powerpoint` | "Use this skill any time a .pptx file is involved in any way — as input, output, or both. This includes: creating slide decks, pitch decks, or presentations; reading, parsing, or extracting text from any .pptx file (even if the extracted content will be used elsewhere, like in a… | `productivity/powerpoint` |
-
-## research
-
-Skills for academic research, paper discovery, literature review, domain reconnaissance, market data, content monitoring, and scientific knowledge retrieval.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `arxiv` | Search and retrieve academic papers from arXiv using their free REST API. No API key needed. Search by keyword, author, category, or ID. Combine with web_extract or the ocr-and-documents skill to read full paper content. | `research/arxiv` |
-| `blogwatcher` | Monitor blogs and RSS/Atom feeds for updates using the blogwatcher CLI. Add blogs, scan for new articles, and track what you've read. | `research/blogwatcher` |
-| `llm-wiki` | Karpathy's LLM Wiki — build and maintain a persistent, interlinked markdown knowledge base. Ingest sources, query compiled knowledge, and lint for consistency. Unlike RAG, the wiki compiles knowledge once and keeps it current. Works as an Obsidian vault. Wiki path is controlled by the `WIKI_PATH` env var (defaults to `~/wiki`). | `research/llm-wiki` |
-| `domain-intel` | Passive domain reconnaissance using Python stdlib. Subdomain discovery, SSL certificate inspection, WHOIS lookups, DNS records, domain availability checks, and bulk multi-domain analysis. No API keys required. | `research/domain-intel` |
-| `duckduckgo-search` | Free web search via DuckDuckGo — text, news, images, videos. No API key needed. Prefer the `ddgs` CLI when installed; use the Python DDGS library only after verifying that `ddgs` is available in the current runtime. | `research/duckduckgo-search` |
-| `ml-paper-writing` | Write publication-ready ML/AI papers for NeurIPS, ICML, ICLR, ACL, AAAI, COLM. Use when drafting papers from research repos, structuring arguments, verifying citations, or preparing camera-ready submissions. Includes LaTeX templates, reviewer guidelines, and citation verificatio… | `research/ml-paper-writing` |
-| `polymarket` | Query Polymarket prediction market data — search markets, get prices, orderbooks, and price history. Read-only via public REST APIs, no API key needed. | `research/polymarket` |
+| `powerpoint` | Use this skill any time a .pptx file is involved in any way — as input, output, or both. This includes: creating slide decks, pitch decks, or presentations; reading, parsing, or extracting text from any .pptx file (even if the extracted content will be used elsewhere, like in … | `productivity/powerpoint` |
 
 ## red-teaming
 
@@ -262,7 +223,19 @@ Skills for LLM red-teaming, jailbreaking, and safety filter bypass research.
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `godmode` | Jailbreak API-served LLMs using G0DM0D3 techniques — Parseltongue input obfuscation (33 techniques), GODMODE CLASSIC system prompt templates, ULTRAPLINIAN multi-model racing, encoding escalation, and Hermes-native prefill/system prompt integration. Works on any model accessible via API including closed-source models. | `red-teaming/godmode` |
+| `godmode` | Jailbreak API-served LLMs using G0DM0D3 techniques — Parseltongue input obfuscation (33 techniques), GODMODE CLASSIC system prompt templates, ULTRAPLINIAN multi-model racing, encoding escalation, and Hermes-native prefill/system prompt integration. Use when a user wants to byp… | `red-teaming/godmode` |
+
+## research
+
+Skills for academic research, paper discovery, literature review, market data, content monitoring, and scientific knowledge retrieval.
+
+| Skill | Description | Path |
+|-------|-------------|------|
+| `arxiv` | Search and retrieve academic papers from arXiv using their free REST API. No API key needed. Search by keyword, author, category, or ID. Combine with web_extract or the ocr-and-documents skill to read full paper content. | `research/arxiv` |
+| `blogwatcher` | Monitor blogs and RSS/Atom feeds for updates using the blogwatcher-cli tool. Add blogs, scan for new articles, track read status, and filter by category. | `research/blogwatcher` |
+| `llm-wiki` | Karpathy's LLM Wiki — build and maintain a persistent, interlinked markdown knowledge base. Ingest sources, query compiled knowledge, and lint for consistency. | `research/llm-wiki` |
+| `polymarket` | Query Polymarket prediction market data — search markets, get prices, orderbooks, and price history. Read-only via public REST APIs, no API key needed. | `research/polymarket` |
+| `research-paper-writing` | End-to-end pipeline for writing ML/AI research papers — from experiment design through analysis, drafting, revision, and submission. Covers NeurIPS, ICML, ICLR, ACL, AAAI, COLM. Integrates automated experiment monitoring, statistical analysis, iterative writing, and citation v… | `research/research-paper-writing` |
 
 ## smart-home
 
@@ -278,20 +251,22 @@ Skills for interacting with social platforms — posting, reading, monitoring, a
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `xitter` | Interact with X/Twitter via the x-cli terminal client using official X API credentials. | `social-media/xitter` |
+| `xitter` | Interact with X/Twitter via the x-cli terminal client using official X API credentials. Use for posting, reading timelines, searching tweets, liking, retweeting, bookmarks, mentions, and user lookups. | `social-media/xitter` |
 
 ## software-development
 
+General software-engineering skills — planning, reviewing, debugging, and test-driven development.
+
 | Skill | Description | Path |
 |-------|-------------|------|
-| `code-review` | Guidelines for performing thorough code reviews with security and quality focus | `software-development/code-review` |
-| `plan` | Plan mode for Hermes — inspect context, write a markdown plan into `.hermes/plans/` in the active workspace/backend working directory, and do not execute the work. | `software-development/plan` |
-| `requesting-code-review` | Use when completing tasks, implementing major features, or before merging. Validates work meets requirements through systematic review process. | `software-development/requesting-code-review` |
+| `plan` | Plan mode for Hermes — inspect context, write a markdown plan into the active workspace's `.hermes/plans/` directory, and do not execute the work. | `software-development/plan` |
+| `requesting-code-review` | Pre-commit verification pipeline — static security scan, baseline-aware quality gates, independent reviewer subagent, and auto-fix loop. Use after code changes and before committing, pushing, or opening a PR. | `software-development/requesting-code-review` |
 | `subagent-driven-development` | Use when executing implementation plans with independent tasks. Dispatches fresh delegate_task per task with two-stage review (spec compliance then code quality). | `software-development/subagent-driven-development` |
 | `systematic-debugging` | Use when encountering any bug, test failure, or unexpected behavior. 4-phase root cause investigation — NO fixes without understanding the problem first. | `software-development/systematic-debugging` |
 | `test-driven-development` | Use when implementing any feature or bugfix, before writing implementation code. Enforces RED-GREEN-REFACTOR cycle with test-first approach. | `software-development/test-driven-development` |
 | `writing-plans` | Use when you have a spec or requirements for a multi-step task. Creates comprehensive implementation plans with bite-sized tasks, exact file paths, and complete code examples. | `software-development/writing-plans` |
 
+
 ---
 
 # Optional Skills
diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md
index 214b2866d07..79453474fc8 100644
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@@ -35,7 +35,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/queue <prompt>` (alias: `/q`) | Queue a prompt for the next turn (doesn't interrupt the current agent response). **Note:** `/q` is claimed by both `/queue` and `/quit`; the last registration wins, so `/q` resolves to `/quit` in practice. Use `/queue` explicitly. |
 | `/resume [name]` | Resume a previously-named session |
 | `/status` | Show session info |
-| `/snapshot` (alias: `/snap`) | Create or restore state snapshots of Hermes config/state (usage: /snapshot [create\|restore \<id\>\|prune]) |
+| `/agents` (alias: `/tasks`) | Show active agents and running tasks across the current session. |
 | `/background <prompt>` (alias: `/bg`) | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). |
 | `/btw <question>` | Ephemeral side question using session context (no tools, not persisted). Useful for quick clarifications without affecting the conversation history. |
 | `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
@@ -50,9 +50,8 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/provider` | Show available providers and current provider |
 | `/personality` | Set a predefined personality |
 | `/verbose` | Cycle tool progress display: off → new → all → verbose. Can be [enabled for messaging](#notes) via config. |
-| `/fast` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode (usage: /fast [normal\|fast\|status]) |
+| `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. Options: `normal`, `fast`, `status`. |
 | `/reasoning` | Manage reasoning effort and display (usage: /reasoning [level\|show\|hide]) |
-| `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. Options: `normal`, `fast`, `status`, `on`, `off`. |
 | `/skin` | Show or change the display skin/theme |
 | `/statusbar` (alias: `/sb`) | Toggle the context/model status bar on or off |
 | `/voice [on\|off\|tts\|status]` | Toggle CLI voice mode and spoken playback. Recording uses `voice.record_key` (default: `Ctrl+B`). |
@@ -80,6 +79,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/insights` | Show usage insights and analytics (last 30 days) |
 | `/platforms` (alias: `/gateway`) | Show gateway/messaging platform status |
 | `/paste` | Check clipboard for an image and attach it |
+| `/copy [number]` | Copy the last assistant response to clipboard (or the Nth-from-last with a number). CLI-only. |
 | `/image <path>` | Attach a local image file for your next prompt. |
 | `/debug` | Upload debug report (system info + logs) and get shareable links. Also available in messaging. |
 | `/profile` | Show active profile name and home directory |
@@ -151,8 +151,6 @@ The messaging gateway supports the following built-in commands inside Telegram,
 | `/deny` | Reject a pending dangerous command. |
 | `/update` | Update Hermes Agent to the latest version. |
 | `/restart` | Gracefully restart the gateway after draining active runs. When the gateway comes back online, it sends a confirmation to the requester's chat/thread. |
-| `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. |
-| `/debug` | Upload debug report (system info + logs) and get shareable links. |
 | `/debug` | Upload debug report (system info + logs) and get shareable links. |
 | `/help` | Show messaging help. |
 | `/<skill-name>` | Invoke any installed skill by name. |
diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md
index e1138dc00a1..40d44627ec7 100644
--- a/website/docs/reference/tools-reference.md
+++ b/website/docs/reference/tools-reference.md
@@ -6,9 +6,9 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool
 
 # Built-in Tools Reference
 
-This page documents all 47 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
+This page documents all 52 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
 
-**Quick counts:** 10 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, and 15 standalone tools across other toolsets.
+**Quick counts:** 10 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets.
 
 :::tip MCP Tools
 In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration.
@@ -53,6 +53,25 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server
 |------|-------------|----------------------|
 | `delegate_task` | Spawn one or more subagents to work on tasks in isolated contexts. Each subagent gets its own conversation, terminal session, and toolset. Only the final summary is returned -- intermediate tool results never enter your context window. TWO… | — |
 
+## `feishu_doc` toolset
+
+Scoped to the Feishu document-comment intelligent-reply handler (`gateway/platforms/feishu_comment.py`). Not exposed on `hermes-cli` or the regular Feishu chat adapter.
+
+| Tool | Description | Requires environment |
+|------|-------------|----------------------|
+| `feishu_doc_read` | Read the full text content of a Feishu/Lark document (Docx, Doc, or Sheet) given its file_type and token. | Feishu app credentials |
+
+## `feishu_drive` toolset
+
+Scoped to the Feishu document-comment handler. Drives comment read/write operations on drive files.
+
+| Tool | Description | Requires environment |
+|------|-------------|----------------------|
+| `feishu_drive_add_comment` | Add a top-level comment on a Feishu/Lark document or file. | Feishu app credentials |
+| `feishu_drive_list_comments` | List whole-document comments on a Feishu/Lark file, most recent first. | Feishu app credentials |
+| `feishu_drive_list_comment_replies` | List replies on a specific Feishu comment thread (whole-doc or local-selection). | Feishu app credentials |
+| `feishu_drive_reply_comment` | Post a reply on a Feishu comment thread, with optional `@`-mention. | Feishu app credentials |
+
 ## `file` toolset
 
 | Tool | Description | Requires environment |
diff --git a/website/docs/reference/toolsets-reference.md b/website/docs/reference/toolsets-reference.md
index e941015b6a9..7593a3fdcfd 100644
--- a/website/docs/reference/toolsets-reference.md
+++ b/website/docs/reference/toolsets-reference.md
@@ -57,6 +57,8 @@ Or in-session:
 | `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. |
 | `cronjob` | `cronjob` | Schedule and manage recurring tasks. |
 | `delegation` | `delegate_task` | Spawn isolated subagent instances for parallel work. |
+| `feishu_doc` | `feishu_doc_read` | Read Feishu/Lark document content. Used by the Feishu document-comment intelligent-reply handler. |
+| `feishu_drive` | `feishu_drive_add_comment`, `feishu_drive_list_comments`, `feishu_drive_list_comment_replies`, `feishu_drive_reply_comment` | Feishu/Lark drive comment operations. Scoped to the comment agent; not exposed on `hermes-cli` or other messaging toolsets. |
 | `file` | `patch`, `read_file`, `search_files`, `write_file` | File reading, writing, searching, and editing. |
 | `homeassistant` | `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services` | Smart home control via Home Assistant. Only available when `HASS_TOKEN` is set. |
 | `image_gen` | `image_generate` | Text-to-image generation via FAL.ai. |
@@ -79,7 +81,7 @@ These expand to multiple core toolsets, providing a convenient shorthand for com
 
 | Toolset | Expands to | Use case |
 |---------|-----------|----------|
-| `debugging` | `patch`, `process`, `read_file`, `search_files`, `terminal`, `web_extract`, `web_search`, `write_file` | Debug sessions — file access, terminal, and web research without browser or delegation overhead. |
+| `debugging` | `web` + `file` + `process`, `terminal` (via `includes`) — effectively `patch`, `process`, `read_file`, `search_files`, `terminal`, `web_extract`, `web_search`, `write_file` | Debug sessions — file access, terminal, and web research without browser or delegation overhead. |
 | `safe` | `image_generate`, `vision_analyze`, `web_extract`, `web_search` | Read-only research and media generation. No file writes, no terminal access, no code execution. Good for untrusted or constrained environments. |
 
 ## Platform Toolsets
@@ -88,7 +90,7 @@ Platform toolsets define the complete tool configuration for a deployment target
 
 | Toolset | Differences from `hermes-cli` |
 |---------|-------------------------------|
-| `hermes-cli` | Full toolset — all 36 tools including `clarify`. The default for interactive CLI sessions. |
+| `hermes-cli` | Full toolset — all 36 core tools including `clarify`. The default for interactive CLI sessions. |
 | `hermes-acp` | Drops `clarify`, `cronjob`, `image_generate`, `send_message`, `text_to_speech`, homeassistant tools. Focused on coding tasks in IDE context. |
 | `hermes-api-server` | Drops `clarify`, `send_message`, and `text_to_speech`. Adds everything else — suitable for programmatic access where user interaction isn't possible. |
 | `hermes-telegram` | Same as `hermes-cli`. |
@@ -100,16 +102,16 @@ Platform toolsets define the complete tool configuration for a deployment target
 | `hermes-mattermost` | Same as `hermes-cli`. |
 | `hermes-email` | Same as `hermes-cli`. |
 | `hermes-sms` | Same as `hermes-cli`. |
-| `hermes-dingtalk` | Same as `hermes-cli`. |
-| `hermes-feishu` | Same as `hermes-cli`. |
-| `hermes-wecom` | Same as `hermes-cli`. |
-| `hermes-wecom-callback` | WeCom callback toolset — enterprise self-built app messaging (full access). |
-| `hermes-weixin` | Same as `hermes-cli`. |
 | `hermes-bluebubbles` | Same as `hermes-cli`. |
+| `hermes-dingtalk` | Same as `hermes-cli`. |
+| `hermes-feishu` | Same as `hermes-cli`. Note: the `feishu_doc` / `feishu_drive` toolsets are used only by the document-comment handler, not by the regular Feishu chat adapter. |
 | `hermes-qqbot` | Same as `hermes-cli`. |
-| `hermes-homeassistant` | Same as `hermes-cli`. |
+| `hermes-wecom` | Same as `hermes-cli`. |
+| `hermes-wecom-callback` | Same as `hermes-cli`. |
+| `hermes-weixin` | Same as `hermes-cli`. |
+| `hermes-homeassistant` | Same as `hermes-cli` plus the `homeassistant` toolset always on. |
 | `hermes-webhook` | Same as `hermes-cli`. |
-| `hermes-gateway` | Union of all messaging platform toolsets. Used internally when the gateway needs the broadest possible tool set. |
+| `hermes-gateway` | Internal gateway orchestrator toolset — union of the broadest possible tool set when the gateway needs to accept any message source. |
 
 ## Dynamic Toolsets
 
@@ -119,11 +121,10 @@ Each configured MCP server generates a `mcp-<server>` toolset at runtime. For ex
 
 ```yaml
 # config.yaml
-mcp:
-  servers:
-    github:
-      command: npx
-      args: ["-y", "@modelcontextprotocol/server-github"]
+mcp_servers:
+  github:
+    command: npx
+    args: ["-y", "@modelcontextprotocol/server-github"]
 ```
 
 This creates a `mcp-github` toolset you can reference in `--toolsets` or platform configs.
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index bef9b5cfd55..29d1665627e 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -601,7 +601,7 @@ Every model slot in Hermes — auxiliary tasks, compression, fallback — uses t
 
 When `base_url` is set, Hermes ignores the provider and calls that endpoint directly (using `api_key` or `OPENAI_API_KEY` for auth). When only `provider` is set, Hermes uses that provider's built-in auth and base URL.
 
-Available providers for auxiliary tasks: `auto`, `openrouter`, `nous`, `codex`, `copilot`, `anthropic`, `main`, `zai`, `kimi-coding`, `kimi-coding-cn`, `arcee`, `minimax`, any provider registered in the [provider registry](/docs/reference/environment-variables), or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`).
+Available providers for auxiliary tasks: `auto`, `main`, plus any provider in the [provider registry](/docs/reference/environment-variables) — `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `alibaba`, `bedrock`, `huggingface`, `arcee`, `xiaomi`, `kilocode`, `opencode-zen`, `opencode-go`, `ai-gateway` — or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`).
 
 :::warning `"main"` is for auxiliary tasks only
 The `"main"` provider option means "use whatever provider my main agent uses" — it's only valid inside `auxiliary:`, `compression:`, and `fallback_model:` configs. It is **not** a valid value for your top-level `model.provider` setting. If you use a custom OpenAI-compatible endpoint, set `provider: custom` in your `model:` section. See [AI Providers](/docs/integrations/providers) for all main model provider options.
@@ -851,7 +851,7 @@ agent:
 
 ```yaml
 tts:
-  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "neutts"
+  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "xai" | "neutts"
   speed: 1.0                    # Global speed multiplier (fallback for all providers)
   edge:
     voice: "en-US-AriaNeural"   # 322 voices, 74 languages
@@ -867,6 +867,18 @@ tts:
   minimax:
     speed: 1.0                  # Speech speed multiplier
     # base_url: ""              # Optional: override for OpenAI-compatible TTS endpoints
+  mistral:
+    model: "voxtral-mini-tts-2603"
+    voice_id: "c69964a6-ab8b-4f8a-9465-ec0925096ec8"  # Paul - Neutral (default)
+  gemini:
+    model: "gemini-2.5-flash-preview-tts"   # or gemini-2.5-pro-preview-tts
+    voice: "Kore"               # 30 prebuilt voices: Zephyr, Puck, Kore, Enceladus, etc.
+  xai:
+    voice_id: "eve"             # xAI TTS voice
+    language: "en"              # ISO 639-1
+    sample_rate: 24000
+    bit_rate: 128000            # MP3 bitrate
+    # base_url: "https://api.x.ai/v1"
   neutts:
     ref_audio: ''
     ref_text: ''
diff --git a/website/docs/user-guide/features/api-server.md b/website/docs/user-guide/features/api-server.md
index ebcb4523e86..82c6db0b2c2 100644
--- a/website/docs/user-guide/features/api-server.md
+++ b/website/docs/user-guide/features/api-server.md
@@ -154,12 +154,64 @@ Delete a stored response.
 
 ### GET /v1/models
 
-Lists the agent as an available model. The advertised model name defaults to the [profile](/docs/user-guide/features/profiles) name (or `hermes-agent` for the default profile). Required by most frontends for model discovery.
+Lists the agent as an available model. The advertised model name defaults to the [profile](/docs/user-guide/profiles) name (or `hermes-agent` for the default profile). Required by most frontends for model discovery.
 
 ### GET /health
 
 Health check. Returns `{"status": "ok"}`. Also available at **GET /v1/health** for OpenAI-compatible clients that expect the `/v1/` prefix.
 
+### GET /health/detailed
+
+Extended health check that also reports active sessions, running agents, and resource usage. Useful for monitoring/observability tooling.
+
+## Runs API (streaming-friendly alternative)
+
+In addition to `/v1/chat/completions` and `/v1/responses`, the server exposes a **runs** API for long-form sessions where the client wants to subscribe to progress events instead of managing streaming themselves.
+
+### POST /v1/runs
+
+Create a new agent run. Returns a `run_id` that can be used to subscribe to progress events.
+
+### GET /v1/runs/\{run_id\}/events
+
+Server-Sent Events stream of the run's tool-call progress, token deltas, and lifecycle events. Designed for dashboards and thick clients that want to attach/detach without losing state.
+
+## Jobs API (background scheduled work)
+
+The server exposes a lightweight jobs CRUD surface for managing scheduled / background agent runs from a remote client. All endpoints are gated behind the same bearer auth.
+
+### GET /api/jobs
+
+List all scheduled jobs.
+
+### POST /api/jobs
+
+Create a new scheduled job. Body accepts the same shape as `hermes cron` — prompt, schedule, skills, provider override, delivery target.
+
+### GET /api/jobs/\{job_id\}
+
+Fetch a single job's definition and last-run state.
+
+### PATCH /api/jobs/\{job_id\}
+
+Update fields on an existing job (prompt, schedule, etc.). Partial updates are merged.
+
+### DELETE /api/jobs/\{job_id\}
+
+Remove a job. Also cancels any in-flight run.
+
+### POST /api/jobs/\{job_id\}/pause
+
+Pause a job without deleting it. Next-scheduled-run timestamps are suspended until resumed.
+
+### POST /api/jobs/\{job_id\}/resume
+
+Resume a previously paused job.
+
+### POST /api/jobs/\{job_id\}/run
+
+Trigger the job to run immediately, out of schedule.
+
 ## System Prompt Handling
 
 When a frontend sends a `system` message (Chat Completions) or `instructions` field (Responses API), hermes-agent **layers it on top** of its core system prompt. Your agent keeps all its tools, memory, and skills — the frontend's system prompt adds extra instructions.
@@ -247,7 +299,7 @@ Any frontend that supports the OpenAI API format works. Tested/documented integr
 
 ## Multi-User Setup with Profiles
 
-To give multiple users their own isolated Hermes instance (separate config, memory, skills), use [profiles](/docs/user-guide/features/profiles):
+To give multiple users their own isolated Hermes instance (separate config, memory, skills), use [profiles](/docs/user-guide/profiles):
 
 ```bash
 # Create a profile per user
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index 8d16079c2e5..2e9bcad99b0 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -50,7 +50,10 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
 | NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
 | Ollama Cloud | `ollama-cloud` | `OLLAMA_API_KEY` |
 | Google Gemini (OAuth) | `google-gemini-cli` | `hermes model` (Google OAuth; optional: `HERMES_GEMINI_PROJECT_ID`) |
+| Google AI Studio | `gemini` | `GOOGLE_API_KEY` (alias: `GEMINI_API_KEY`) |
 | xAI (Grok) | `xai` (alias `grok`) | `XAI_API_KEY` (optional: `XAI_BASE_URL`) |
+| AWS Bedrock | `bedrock` | Standard boto3 auth (`AWS_REGION` + `AWS_PROFILE` or `AWS_ACCESS_KEY_ID`) |
+| Qwen Portal (OAuth) | `qwen-oauth` | `hermes model` (Qwen Portal OAuth; optional: `HERMES_QWEN_BASE_URL`) |
 | OpenCode Zen | `opencode-zen` | `OPENCODE_ZEN_API_KEY` |
 | OpenCode Go | `opencode-go` | `OPENCODE_GO_API_KEY` |
 | Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
@@ -166,6 +169,8 @@ Hermes uses separate lightweight models for side tasks. Each task has its own pr
 | Skills Hub | Skill search and discovery | `auxiliary.skills_hub` |
 | MCP | MCP helper operations | `auxiliary.mcp` |
 | Memory Flush | Memory consolidation | `auxiliary.flush_memories` |
+| Approval | Smart command-approval classification | `auxiliary.approval` |
+| Title Generation | Session title summaries | `auxiliary.title_generation` |
 
 ### Auto-Detection Chain
 
@@ -339,5 +344,7 @@ See [Scheduled Tasks (Cron)](/docs/user-guide/features/cron) for full configurat
 | Skills hub | Auto-detection chain | `auxiliary.skills_hub` |
 | MCP helpers | Auto-detection chain | `auxiliary.mcp` |
 | Memory flush | Auto-detection chain | `auxiliary.flush_memories` |
+| Approval classification | Auto-detection chain | `auxiliary.approval` |
+| Title generation | Auto-detection chain | `auxiliary.title_generation` |
 | Delegation | Provider override only (no automatic fallback) | `delegation.provider` / `delegation.model` |
 | Cron jobs | Per-job provider override only (no automatic fallback) | Per-job `provider` / `model` |
diff --git a/website/docs/user-guide/features/hooks.md b/website/docs/user-guide/features/hooks.md
index c1c7ef05bf7..a64f3220956 100644
--- a/website/docs/user-guide/features/hooks.md
+++ b/website/docs/user-guide/features/hooks.md
@@ -243,6 +243,8 @@ def register(ctx):
 | [`post_llm_call`](#post_llm_call) | Once per turn, after the tool-calling loop | ignored |
 | [`on_session_start`](#on_session_start) | New session created (first turn only) | ignored |
 | [`on_session_end`](#on_session_end) | Session ends | ignored |
+| [`on_session_finalize`](#on_session_finalize) | CLI/gateway tears down an active session (flush, save, stats) | ignored |
+| [`on_session_reset`](#on_session_reset) | Gateway swaps in a fresh session key (e.g. `/new`, `/reset`) | ignored |
 
 ---
 
@@ -600,4 +602,50 @@ def register(ctx):
 
 ---
 
+### `on_session_finalize`
+
+Fires when the CLI or gateway **tears down** an active session — for example, when the user runs `/new`, the gateway GC'd an idle session, or the CLI quit with an active agent. This is the last chance to flush state tied to the outgoing session before its identity is gone.
+
+**Callback signature:**
+
+```python
+def my_callback(session_id: str | None, platform: str, **kwargs):
+```
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `session_id` | `str` or `None` | The outgoing session ID. May be `None` if no active session existed. |
+| `platform` | `str` | `"cli"` or the messaging platform name (`"telegram"`, `"discord"`, etc.). |
+
+**Fires:** In `cli.py` (on `/new` / CLI exit) and `gateway/run.py` (when a session is reset or GC'd). Always paired with `on_session_reset` on the gateway side.
+
+**Return value:** Ignored.
+
+**Use cases:** Persist final session metrics before the session ID is discarded, close per-session resources, emit a final telemetry event, drain queued writes.
+
+---
+
+### `on_session_reset`
+
+Fires when the gateway **swaps in a new session key** for an active chat — the user invoked `/new`, `/reset`, `/clear`, or the adapter picked a fresh session after an idle window. This lets plugins react to the fact that conversation state has been wiped without waiting for the next `on_session_start`.
+
+**Callback signature:**
+
+```python
+def my_callback(session_id: str, platform: str, **kwargs):
+```
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `session_id` | `str` | The new session's ID (already rotated to the fresh value). |
+| `platform` | `str` | The messaging platform name. |
+
+**Fires:** In `gateway/run.py`, immediately after the new session key is allocated but before the next inbound message is processed. On the gateway, the order is: `on_session_finalize(old_id)` → swap → `on_session_reset(new_id)` → `on_session_start(new_id)` on the first inbound turn.
+
+**Return value:** Ignored.
+
+**Use cases:** Reset per-session caches keyed by `session_id`, emit "session rotated" analytics, prime a fresh state bucket.
+
+---
+
 See the **[Build a Plugin guide](/docs/guides/build-a-hermes-plugin)** for the full walkthrough including tool schemas, handlers, and advanced hook patterns.
diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md
index 9f9d257fcc4..6f7fc895062 100644
--- a/website/docs/user-guide/features/tts.md
+++ b/website/docs/user-guide/features/tts.md
@@ -14,7 +14,7 @@ If you have a paid [Nous Portal](https://portal.nousresearch.com) subscription,
 
 ## Text-to-Speech
 
-Convert text to speech with seven providers:
+Convert text to speech with eight providers:
 
 | Provider | Quality | Cost | API Key |
 |----------|---------|------|---------|
diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md
index 44e08330dfa..0efe909b0d1 100644
--- a/website/docs/user-guide/messaging/discord.md
+++ b/website/docs/user-guide/messaging/discord.md
@@ -282,12 +282,16 @@ Discord behavior is controlled through two files: **`~/.hermes/.env`** for crede
 | `DISCORD_ALLOW_BOTS` | No | `"none"` | Controls how the bot handles messages from other Discord bots. `"none"` — ignore all other bots. `"mentions"` — only accept bot messages that `@mention` Hermes. `"all"` — accept all bot messages. |
 | `DISCORD_REACTIONS` | No | `true` | When `true`, the bot adds emoji reactions to messages during processing (👀 when starting, ✅ on success, ❌ on error). Set to `false` to disable reactions entirely. |
 | `DISCORD_IGNORED_CHANNELS` | No | — | Comma-separated channel IDs where the bot **never** responds, even when `@mentioned`. Takes priority over all other channel settings. |
+| `DISCORD_ALLOWED_CHANNELS` | No | — | Comma-separated channel IDs. When set, the bot **only** responds in these channels (plus DMs if allowed). Overrides `config.yaml` `discord.allowed_channels`. Combine with `DISCORD_IGNORED_CHANNELS` to express allow/deny rules. |
 | `DISCORD_NO_THREAD_CHANNELS` | No | — | Comma-separated channel IDs where the bot responds directly in the channel instead of creating a thread. Only relevant when `DISCORD_AUTO_THREAD` is `true`. |
 | `DISCORD_REPLY_TO_MODE` | No | `"first"` | Controls reply-reference behavior: `"off"` — never reply to the original message, `"first"` — reply-reference on the first message chunk only (default), `"all"` — reply-reference on every chunk. |
 | `DISCORD_ALLOW_MENTION_EVERYONE` | No | `false` | When `false` (default), the bot cannot ping `@everyone` or `@here` even if its response contains those tokens. Set to `true` to opt back in. See [Mention Control](#mention-control) below. |
 | `DISCORD_ALLOW_MENTION_ROLES` | No | `false` | When `false` (default), the bot cannot ping `@role` mentions. Set to `true` to allow. |
 | `DISCORD_ALLOW_MENTION_USERS` | No | `true` | When `true` (default), the bot can ping individual users by ID. |
 | `DISCORD_ALLOW_MENTION_REPLIED_USER` | No | `true` | When `true` (default), replying to a message pings the original author. |
+| `DISCORD_PROXY` | No | — | Proxy URL for Discord connections (HTTP, WebSocket, REST). Overrides `HTTPS_PROXY`/`ALL_PROXY`. Supports `http://`, `https://`, and `socks5://` schemes. |
+| `HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS` | No | `0.6` | Grace window the adapter waits before flushing a queued text chunk. Useful for smoothing streamed output. |
+| `HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS` | No | `0.1` | Delay between split chunks when a single message exceeds Discord's length limit. |
 
 ### Config File (`config.yaml`)
 
diff --git a/website/docs/user-guide/messaging/matrix.md b/website/docs/user-guide/messaging/matrix.md
index ec77b5bc33e..255806c01ba 100644
--- a/website/docs/user-guide/messaging/matrix.md
+++ b/website/docs/user-guide/messaging/matrix.md
@@ -72,8 +72,13 @@ MATRIX_REQUIRE_MENTION=true
 MATRIX_FREE_RESPONSE_ROOMS=!abc123:matrix.org,!def456:matrix.org
 MATRIX_AUTO_THREAD=true
 MATRIX_DM_MENTION_THREADS=false
+MATRIX_REACTIONS=true          # default: true — emoji reactions during processing
 ```
 
+:::tip Disabling reactions
+`MATRIX_REACTIONS=false` turns off the processing-lifecycle emoji reactions (👀/✅/❌) the bot posts on inbound messages. Useful for rooms where reaction events are noisy or aren't supported by all participating clients.
+:::
+
 :::note
 If you are upgrading from a version that did not have `MATRIX_REQUIRE_MENTION`, the bot previously responded to all messages in rooms. To preserve that behavior, set `MATRIX_REQUIRE_MENTION=false`.
 :::
diff --git a/website/docs/user-guide/messaging/open-webui.md b/website/docs/user-guide/messaging/open-webui.md
index b26d23eddfd..efdf901371b 100644
--- a/website/docs/user-guide/messaging/open-webui.md
+++ b/website/docs/user-guide/messaging/open-webui.md
@@ -198,7 +198,7 @@ Make sure your `OPENAI_API_KEY` in Open WebUI matches the `API_SERVER_KEY` in He
 
 ## Multi-User Setup with Profiles
 
-To run separate Hermes instances per user — each with their own config, memory, and skills — use [profiles](/docs/user-guide/features/profiles). Each profile runs its own API server on a different port and automatically advertises the profile name as the model in Open WebUI.
+To run separate Hermes instances per user — each with their own config, memory, and skills — use [profiles](/docs/user-guide/profiles). Each profile runs its own API server on a different port and automatically advertises the profile name as the model in Open WebUI.
 
 ### 1. Create profiles and configure API servers
 
diff --git a/website/docs/user-guide/messaging/qqbot.md b/website/docs/user-guide/messaging/qqbot.md
index d9da90d5868..8da6f92def5 100644
--- a/website/docs/user-guide/messaging/qqbot.md
+++ b/website/docs/user-guide/messaging/qqbot.md
@@ -28,7 +28,7 @@ The QQ Bot adapter uses the [Official QQ Bot API](https://bot.q.qq.com/wiki/deve
 ### Interactive setup
 
 ```bash
-hermes setup gateway
+hermes gateway setup
 ```
 
 Select **QQ Bot** from the platform list and follow the prompts.
@@ -52,7 +52,7 @@ QQ_CLIENT_SECRET=your-app-secret
 | `QQBOT_HOME_CHANNEL_NAME` | Display name for home channel | `Home` |
 | `QQ_ALLOWED_USERS` | Comma-separated user OpenIDs for DM access | open (all users) |
 | `QQ_ALLOW_ALL_USERS` | Set to `true` to allow all DMs | `false` |
-| `QQ_MARKDOWN_SUPPORT` | Enable QQ markdown (msg_type 2) | `true` |
+| `QQ_SANDBOX` | Route requests to the QQ sandbox gateway for development testing | `false` |
 | `QQ_STT_API_KEY` | API key for voice-to-text provider | — |
 | `QQ_STT_BASE_URL` | Base URL for STT provider | `https://open.bigmodel.cn/api/coding/paas/v4` |
 | `QQ_STT_MODEL` | STT model name | `glm-asr` |
@@ -68,7 +68,7 @@ platforms:
     extra:
       app_id: "your-app-id"
       client_secret: "your-secret"
-      markdown_support: true
+      markdown_support: true       # enable QQ markdown (msg_type 2). Config-only; no env-var equivalent.
       dm_policy: "open"          # open | allowlist | disabled
       allow_from:
         - "user_openid_1"
diff --git a/website/docs/user-guide/messaging/slack.md b/website/docs/user-guide/messaging/slack.md
index 5f6492216a9..a7eff683da8 100644
--- a/website/docs/user-guide/messaging/slack.md
+++ b/website/docs/user-guide/messaging/slack.md
@@ -283,7 +283,7 @@ slack:
 ```
 
 :::info
-Unlike Discord and Telegram, Slack does not have a `free_response_channels` equivalent. The Slack adapter requires `@mention` to start a conversation in channels. However, once the bot has an active session in a thread, subsequent thread replies do not require a mention. In DMs, the bot always responds without needing a mention.
+Slack supports both patterns: `@mention` required to start a conversation by default, but you can opt specific channels out via `SLACK_FREE_RESPONSE_CHANNELS` (comma-separated channel IDs) or `slack.free_response_channels` in `config.yaml`. Once the bot has an active session in a thread, subsequent thread replies do not require a mention. In DMs the bot always responds without needing a mention.
 :::
 
 ### Unauthorized User Handling
diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md
index 0fa2e830b9d..6dbf9e61dff 100644
--- a/website/docs/user-guide/messaging/telegram.md
+++ b/website/docs/user-guide/messaging/telegram.md
@@ -422,40 +422,6 @@ The current model and provider are displayed at the top. All navigation happens
 If you know the exact model name, type `/model <name>` directly to skip the picker. You can also type `/model <name> --global` to persist the change across sessions.
 :::
 
-## Webhook Mode
-
-By default, the Telegram adapter connects via **long polling** — the gateway makes outbound connections to Telegram's servers. This works everywhere but keeps a persistent connection open.
-
-**Webhook mode** is an alternative where Telegram pushes updates to your server over HTTPS. This is ideal for **serverless and cloud deployments** (Fly.io, Railway, etc.) where inbound HTTP can wake a suspended machine.
-
-### Configuration
-
-Set the `TELEGRAM_WEBHOOK_URL` environment variable to enable webhook mode:
-
-```bash
-# Required — your public HTTPS endpoint
-TELEGRAM_WEBHOOK_URL=https://app.fly.dev/telegram
-
-# Optional — local listen port (default: 8443)
-TELEGRAM_WEBHOOK_PORT=8443
-
-# Optional — secret token for update verification (auto-generated if not set)
-TELEGRAM_WEBHOOK_SECRET=my-secret-token
-```
-
-Or in `~/.hermes/config.yaml`:
-
-```yaml
-telegram:
-  webhook_mode: true
-```
-
-When `TELEGRAM_WEBHOOK_URL` is set, the gateway starts an HTTP server listening on `0.0.0.0:<port>` and registers the webhook URL with Telegram. The URL path is extracted from the webhook URL (defaults to `/telegram`).
-
-:::warning
-Telegram requires a **valid TLS certificate** on the webhook endpoint. Self-signed certificates will be rejected. Use a reverse proxy (nginx, Caddy) or a platform that provides TLS termination (Fly.io, Railway, Cloudflare Tunnel).
-:::
-
 ## DNS-over-HTTPS Fallback IPs
 
 In some restricted networks, `api.telegram.org` may resolve to an IP that is unreachable. The Telegram adapter includes a **fallback IP** mechanism that transparently retries connections against alternative IPs while preserving the correct TLS hostname and SNI.

From 285bb2b9150b93445e5eded9bc897a4001b66e55 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 01:46:25 -0700
Subject: [PATCH 013/143] feat(execute_code): add project/strict execution
 modes, default to project (#11971)

Weaker models (Gemma-class) repeatedly rediscover and forget that
execute_code uses a different CWD and Python interpreter than terminal(),
causing them to flip-flop on whether user files exist and to hit import
errors on project dependencies like pandas.

Adds a new 'code_execution.mode' config key (default 'project') that
brings execute_code into line with terminal()'s filesystem/interpreter:

  project (new default):
    - cwd       = session's TERMINAL_CWD (falls back to os.getcwd())
    - python    = active VIRTUAL_ENV/bin/python or CONDA_PREFIX/bin/python
                  with a Python 3.8+ version check; falls back cleanly to
                  sys.executable if no venv or the candidate fails
    - result    : 'import pandas' works, '.env' resolves, matches terminal()

  strict (opt-in):
    - cwd       = staging tmpdir (today's behavior)
    - python    = sys.executable (today's behavior)
    - result    : maximum reproducibility and isolation; project deps
                  won't resolve

Security-critical invariants are identical across both modes and covered by
explicit regression tests:

  - env scrubbing (strips *_API_KEY, *_TOKEN, *_SECRET, *_PASSWORD,
    *_CREDENTIAL, *_PASSWD, *_AUTH substrings)
  - SANDBOX_ALLOWED_TOOLS whitelist (no execute_code recursion, no
    delegate_task, no MCP from inside scripts)
  - resource caps (5-min timeout, 50KB stdout, 50 tool calls)

Deliberately avoids 'sandbox'/'isolated'/'cloud' language in tool
descriptions (regression from commit 39b83f34 where agents on local
backends falsely believed they were sandboxed and refused networking).

Override via env var: HERMES_EXECUTE_CODE_MODE=strict|project
---
 hermes_cli/config.py                     |  16 +-
 model_tools.py                           |   4 +-
 tests/hermes_cli/test_config.py          |   6 +-
 tests/tools/test_code_execution_modes.py | 455 +++++++++++++++++++++++
 tools/code_execution_tool.py             | 176 ++++++++-
 5 files changed, 643 insertions(+), 14 deletions(-)
 create mode 100644 tests/tools/test_code_execution_modes.py

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index c9e05e3e882..dfb6b7210a4 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -771,6 +771,20 @@ DEFAULT_CONFIG = {
         "wrap_response": True,
     },
 
+    # execute_code settings — controls the tool used for programmatic tool calls.
+    "code_execution": {
+        # Execution mode:
+        #   project (default) — scripts run in the session's working directory
+        #     with the active virtualenv/conda env's python, so project deps
+        #     (pandas, torch, project packages) and relative paths resolve.
+        #   strict            — scripts run in an isolated temp directory with
+        #     hermes-agent's own python (sys.executable). Maximum isolation
+        #     and reproducibility; project deps and relative paths won't work.
+        # Env scrubbing (strips *_API_KEY, *_TOKEN, *_SECRET, ...) and the
+        # tool whitelist apply identically in both modes.
+        "mode": "project",
+    },
+
     # Logging — controls file logging to ~/.hermes/logs/.
     # agent.log captures INFO+ (all agent activity); errors.log captures WARNING+.
     "logging": {
@@ -788,7 +802,7 @@ DEFAULT_CONFIG = {
     },
 
     # Config schema version - bump this when adding new required fields
-    "_config_version": 18,
+    "_config_version": 19,
 }
 
 # =============================================================================
diff --git a/model_tools.py b/model_tools.py
index 801255b7978..5ec806e78bf 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -274,9 +274,9 @@ def get_tool_definitions(
     # execute_code" even when the API key isn't configured or the toolset is
     # disabled (#560-discord).
     if "execute_code" in available_tool_names:
-        from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema
+        from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema, _get_execution_mode
         sandbox_enabled = SANDBOX_ALLOWED_TOOLS & available_tool_names
-        dynamic_schema = build_execute_code_schema(sandbox_enabled)
+        dynamic_schema = build_execute_code_schema(sandbox_enabled, mode=_get_execution_mode())
         for i, td in enumerate(filtered_tools):
             if td.get("function", {}).get("name") == "execute_code":
                 filtered_tools[i] = {"type": "function", "function": dynamic_schema}
diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py
index f31ac045c4f..4330424b9a2 100644
--- a/tests/hermes_cli/test_config.py
+++ b/tests/hermes_cli/test_config.py
@@ -459,7 +459,7 @@ class TestCustomProviderCompatibility:
             migrate_config(interactive=False, quiet=True)
             raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
 
-        assert raw["_config_version"] == 18
+        assert raw["_config_version"] == 19
         assert raw["providers"]["openai-direct"] == {
             "api": "https://api.openai.com/v1",
             "api_key": "test-key",
@@ -606,7 +606,7 @@ class TestInterimAssistantMessageConfig:
             migrate_config(interactive=False, quiet=True)
             raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
 
-        assert raw["_config_version"] == 18
+        assert raw["_config_version"] == 19
         assert raw["display"]["tool_progress"] == "off"
         assert raw["display"]["interim_assistant_messages"] is True
 
@@ -626,6 +626,6 @@ class TestDiscordChannelPromptsConfig:
             migrate_config(interactive=False, quiet=True)
             raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
 
-        assert raw["_config_version"] == 18
+        assert raw["_config_version"] == 19
         assert raw["discord"]["auto_thread"] is True
         assert raw["discord"]["channel_prompts"] == {}
diff --git a/tests/tools/test_code_execution_modes.py b/tests/tools/test_code_execution_modes.py
new file mode 100644
index 00000000000..875eaf7aeda
--- /dev/null
+++ b/tests/tools/test_code_execution_modes.py
@@ -0,0 +1,455 @@
+#!/usr/bin/env python3
+"""Tests for execute_code's strict / project execution modes.
+
+The mode switch controls two things:
+  - working directory: staging tmpdir (strict) vs session CWD (project)
+  - interpreter:       sys.executable (strict) vs active venv's python (project)
+
+Security-critical invariants — env scrubbing, tool whitelist, resource caps —
+must apply identically in both modes. These tests guard all three layers.
+
+Mode is sourced exclusively from ``code_execution.mode`` in config.yaml —
+there is no env-var override. Tests patch ``_load_config`` directly.
+"""
+
+import json
+import os
+import sys
+import unittest
+from contextlib import contextmanager
+from unittest.mock import patch
+
+import pytest
+
+os.environ["TERMINAL_ENV"] = "local"
+
+
+@pytest.fixture(autouse=True)
+def _force_local_terminal(monkeypatch):
+    """Mirror test_code_execution.py — guarantee local backend under xdist."""
+    monkeypatch.setenv("TERMINAL_ENV", "local")
+
+
+from tools.code_execution_tool import (
+    SANDBOX_ALLOWED_TOOLS,
+    DEFAULT_EXECUTION_MODE,
+    EXECUTION_MODES,
+    _get_execution_mode,
+    _is_usable_python,
+    _resolve_child_cwd,
+    _resolve_child_python,
+    build_execute_code_schema,
+    execute_code,
+)
+
+
+@contextmanager
+def _mock_mode(mode):
+    """Context manager that pins code_execution.mode to the given value."""
+    with patch("tools.code_execution_tool._load_config",
+               return_value={"mode": mode}):
+        yield
+
+
+def _mock_handle_function_call(function_name, function_args, task_id=None, user_task=None):
+    """Minimal mock dispatcher reused across tests."""
+    if function_name == "terminal":
+        return json.dumps({"output": "mock", "exit_code": 0})
+    if function_name == "read_file":
+        return json.dumps({"content": "line1\n", "total_lines": 1})
+    return json.dumps({"error": f"Unknown tool: {function_name}"})
+
+
+# ---------------------------------------------------------------------------
+# Mode resolution
+# ---------------------------------------------------------------------------
+
+class TestGetExecutionMode(unittest.TestCase):
+    """_get_execution_mode reads config.yaml only (no env var surface)."""
+
+    def test_default_is_project(self):
+        self.assertEqual(DEFAULT_EXECUTION_MODE, "project")
+
+    def test_config_project(self):
+        with patch("tools.code_execution_tool._load_config",
+                   return_value={"mode": "project"}):
+            self.assertEqual(_get_execution_mode(), "project")
+
+    def test_config_strict(self):
+        with patch("tools.code_execution_tool._load_config",
+                   return_value={"mode": "strict"}):
+            self.assertEqual(_get_execution_mode(), "strict")
+
+    def test_config_case_insensitive(self):
+        with patch("tools.code_execution_tool._load_config",
+                   return_value={"mode": "STRICT"}):
+            self.assertEqual(_get_execution_mode(), "strict")
+
+    def test_config_strips_whitespace(self):
+        with patch("tools.code_execution_tool._load_config",
+                   return_value={"mode": "  project  "}):
+            self.assertEqual(_get_execution_mode(), "project")
+
+    def test_empty_config_falls_back_to_default(self):
+        with patch("tools.code_execution_tool._load_config", return_value={}):
+            self.assertEqual(_get_execution_mode(), DEFAULT_EXECUTION_MODE)
+
+    def test_bogus_config_falls_back_to_default(self):
+        with patch("tools.code_execution_tool._load_config",
+                   return_value={"mode": "banana"}):
+            self.assertEqual(_get_execution_mode(), DEFAULT_EXECUTION_MODE)
+
+    def test_none_config_falls_back_to_default(self):
+        with patch("tools.code_execution_tool._load_config",
+                   return_value={"mode": None}):
+            # str(None).lower() = "none" → not in EXECUTION_MODES → default
+            self.assertEqual(_get_execution_mode(), DEFAULT_EXECUTION_MODE)
+
+    def test_execution_modes_tuple(self):
+        """Canonical set of modes — tests + config layer rely on this shape."""
+        self.assertEqual(set(EXECUTION_MODES), {"project", "strict"})
+
+
+# ---------------------------------------------------------------------------
+# Interpreter resolver
+# ---------------------------------------------------------------------------
+
+class TestResolveChildPython(unittest.TestCase):
+    """_resolve_child_python — picks the right interpreter per mode."""
+
+    def test_strict_always_sys_executable(self):
+        """Strict mode never leaves sys.executable, even if venv is set."""
+        with patch.dict(os.environ, {"VIRTUAL_ENV": "/some/venv"}):
+            self.assertEqual(_resolve_child_python("strict"), sys.executable)
+
+    def test_project_with_no_venv_falls_back(self):
+        """Project mode without VIRTUAL_ENV or CONDA_PREFIX → sys.executable."""
+        env = {k: v for k, v in os.environ.items()
+               if k not in ("VIRTUAL_ENV", "CONDA_PREFIX")}
+        with patch.dict(os.environ, env, clear=True):
+            self.assertEqual(_resolve_child_python("project"), sys.executable)
+
+    def test_project_with_virtualenv_picks_venv_python(self):
+        """Project mode + VIRTUAL_ENV pointing at a real venv → that python."""
+        import tempfile, pathlib
+        with tempfile.TemporaryDirectory() as td:
+            fake_venv = pathlib.Path(td)
+            (fake_venv / "bin").mkdir()
+            # Symlink to real python so the version check actually passes
+            (fake_venv / "bin" / "python").symlink_to(sys.executable)
+            with patch.dict(os.environ, {"VIRTUAL_ENV": str(fake_venv)}):
+                # Clear cache — _is_usable_python memoizes on path
+                _is_usable_python.cache_clear()
+                result = _resolve_child_python("project")
+                self.assertEqual(result, str(fake_venv / "bin" / "python"))
+
+    def test_project_with_broken_venv_falls_back(self):
+        """VIRTUAL_ENV set but bin/python missing → sys.executable."""
+        import tempfile
+        with tempfile.TemporaryDirectory() as td:
+            # No bin/python inside — broken venv
+            with patch.dict(os.environ, {"VIRTUAL_ENV": td}):
+                _is_usable_python.cache_clear()
+                self.assertEqual(_resolve_child_python("project"), sys.executable)
+
+    def test_project_prefers_virtualenv_over_conda(self):
+        """If both VIRTUAL_ENV and CONDA_PREFIX are set, VIRTUAL_ENV wins."""
+        import tempfile, pathlib
+        with tempfile.TemporaryDirectory() as ve_td, tempfile.TemporaryDirectory() as conda_td:
+            ve = pathlib.Path(ve_td)
+            (ve / "bin").mkdir()
+            (ve / "bin" / "python").symlink_to(sys.executable)
+
+            conda = pathlib.Path(conda_td)
+            (conda / "bin").mkdir()
+            (conda / "bin" / "python").symlink_to(sys.executable)
+
+            with patch.dict(os.environ, {"VIRTUAL_ENV": str(ve), "CONDA_PREFIX": str(conda)}):
+                _is_usable_python.cache_clear()
+                result = _resolve_child_python("project")
+                self.assertEqual(result, str(ve / "bin" / "python"))
+
+    def test_is_usable_python_rejects_nonexistent(self):
+        _is_usable_python.cache_clear()
+        self.assertFalse(_is_usable_python("/does/not/exist/python"))
+
+    def test_is_usable_python_accepts_real_python(self):
+        _is_usable_python.cache_clear()
+        self.assertTrue(_is_usable_python(sys.executable))
+
+
+# ---------------------------------------------------------------------------
+# CWD resolver
+# ---------------------------------------------------------------------------
+
+class TestResolveChildCwd(unittest.TestCase):
+
+    def test_strict_uses_staging_dir(self):
+        self.assertEqual(_resolve_child_cwd("strict", "/tmp/staging"), "/tmp/staging")
+
+    def test_project_without_terminal_cwd_uses_getcwd(self):
+        env = {k: v for k, v in os.environ.items() if k != "TERMINAL_CWD"}
+        with patch.dict(os.environ, env, clear=True):
+            self.assertEqual(_resolve_child_cwd("project", "/tmp/staging"), os.getcwd())
+
+    def test_project_uses_terminal_cwd_when_set(self):
+        import tempfile
+        with tempfile.TemporaryDirectory() as td:
+            with patch.dict(os.environ, {"TERMINAL_CWD": td}):
+                self.assertEqual(_resolve_child_cwd("project", "/tmp/staging"), td)
+
+    def test_project_bogus_terminal_cwd_falls_back_to_getcwd(self):
+        with patch.dict(os.environ, {"TERMINAL_CWD": "/does/not/exist/anywhere"}):
+            self.assertEqual(_resolve_child_cwd("project", "/tmp/staging"), os.getcwd())
+
+    def test_project_expands_tilde(self):
+        import pathlib
+        home = str(pathlib.Path.home())
+        with patch.dict(os.environ, {"TERMINAL_CWD": "~"}):
+            self.assertEqual(_resolve_child_cwd("project", "/tmp/staging"), home)
+
+
+# ---------------------------------------------------------------------------
+# Schema description
+# ---------------------------------------------------------------------------
+
+class TestModeAwareSchema(unittest.TestCase):
+
+    def test_strict_description_mentions_temp_dir(self):
+        desc = build_execute_code_schema(mode="strict")["description"]
+        self.assertIn("temp dir", desc)
+
+    def test_project_description_mentions_session_and_venv(self):
+        desc = build_execute_code_schema(mode="project")["description"]
+        self.assertIn("session", desc)
+        self.assertIn("venv", desc)
+
+    def test_neither_description_uses_sandbox_language(self):
+        """REGRESSION GUARD for commit 39b83f34.
+
+        Agents on local backends falsely believed they were sandboxed and
+        refused networking tasks. Do not reintroduce any 'sandbox' /
+        'isolated' / 'cloud' language in the tool description.
+        """
+        for mode in EXECUTION_MODES:
+            desc = build_execute_code_schema(mode=mode)["description"].lower()
+            for forbidden in ("sandbox", "isolated", "cloud"):
+                self.assertNotIn(forbidden, desc,
+                                 f"mode={mode}: '{forbidden}' leaked into description")
+
+    def test_descriptions_are_similar_length(self):
+        """Both modes should have roughly the same-size description."""
+        strict = len(build_execute_code_schema(mode="strict")["description"])
+        project = len(build_execute_code_schema(mode="project")["description"])
+        self.assertLess(abs(strict - project), 200)
+
+    def test_default_mode_reads_config(self):
+        """build_execute_code_schema() with mode=None reads config.yaml."""
+        with _mock_mode("strict"):
+            desc = build_execute_code_schema()["description"]
+            self.assertIn("temp dir", desc)
+        with _mock_mode("project"):
+            desc = build_execute_code_schema()["description"]
+            self.assertIn("session", desc)
+
+
+# ---------------------------------------------------------------------------
+# Integration: what actually happens when execute_code runs per mode
+# ---------------------------------------------------------------------------
+
+@pytest.mark.skipif(sys.platform == "win32", reason="execute_code is POSIX-only")
+class TestExecuteCodeModeIntegration(unittest.TestCase):
+    """End-to-end: verify the subprocess actually runs where we expect."""
+
+    def _run(self, code, mode, enabled_tools=None, extra_env=None):
+        env_overrides = extra_env or {}
+        with _mock_mode(mode):
+            with patch.dict(os.environ, env_overrides):
+                with patch("model_tools.handle_function_call",
+                           side_effect=_mock_handle_function_call):
+                    raw = execute_code(
+                        code=code,
+                        task_id=f"test-{mode}",
+                        enabled_tools=enabled_tools or list(SANDBOX_ALLOWED_TOOLS),
+                    )
+        return json.loads(raw)
+
+    def test_strict_mode_runs_in_tmpdir(self):
+        """Strict mode: script's os.getcwd() is the staging tmpdir."""
+        result = self._run("import os; print(os.getcwd())", mode="strict")
+        self.assertEqual(result["status"], "success")
+        self.assertIn("hermes_sandbox_", result["output"])
+
+    def test_project_mode_runs_in_session_cwd(self):
+        """Project mode: script's os.getcwd() is the session's working dir."""
+        import tempfile
+        with tempfile.TemporaryDirectory() as td:
+            result = self._run(
+                "import os; print(os.getcwd())",
+                mode="project",
+                extra_env={"TERMINAL_CWD": td},
+            )
+            self.assertEqual(result["status"], "success")
+            # Resolve symlinks (macOS /tmp → /private/tmp) on both sides
+            self.assertEqual(
+                os.path.realpath(result["output"].strip()),
+                os.path.realpath(td),
+            )
+
+    def test_project_mode_interpreter_is_venv_python(self):
+        """Project mode: sys.executable inside the child is the venv's python
+        when VIRTUAL_ENV is set to a real venv."""
+        # The hermes-agent venv is always active during tests, so this also
+        # happens to equal sys.executable of the parent. What we're asserting
+        # is: resolver picked a venv-bin/python path, not that it differs
+        # from sys.executable.
+        result = self._run("import sys; print(sys.executable)", mode="project")
+        self.assertEqual(result["status"], "success")
+        # Either VIRTUAL_ENV-bin/python or sys.executable fallback, both OK.
+        output = result["output"].strip()
+        ve = os.environ.get("VIRTUAL_ENV", "").strip()
+        if ve:
+            self.assertTrue(
+                output.startswith(ve) or output == sys.executable,
+                f"project-mode python should be under VIRTUAL_ENV={ve} or sys.executable={sys.executable}, got {output}",
+            )
+
+    def test_project_mode_can_still_import_hermes_tools(self):
+        """Regression: hermes_tools still importable from non-tmpdir CWD.
+
+        This is the PYTHONPATH fix — without it, switching to session CWD
+        breaks `from hermes_tools import terminal`.
+        """
+        import tempfile
+        with tempfile.TemporaryDirectory() as td:
+            code = (
+                "from hermes_tools import terminal\n"
+                "r = terminal('echo x')\n"
+                "print(r.get('output', 'MISSING'))\n"
+            )
+            result = self._run(code, mode="project", extra_env={"TERMINAL_CWD": td})
+            self.assertEqual(result["status"], "success")
+            self.assertIn("mock", result["output"])
+
+    def test_strict_mode_can_still_import_hermes_tools(self):
+        """Regression: strict mode's tmpdir CWD still works for imports."""
+        code = (
+            "from hermes_tools import terminal\n"
+            "r = terminal('echo x')\n"
+            "print(r.get('output', 'MISSING'))\n"
+        )
+        result = self._run(code, mode="strict")
+        self.assertEqual(result["status"], "success")
+        self.assertIn("mock", result["output"])
+
+
+# ---------------------------------------------------------------------------
+# SECURITY-CRITICAL regression guards
+#
+# These MUST pass in both strict and project mode. The whole tiered-mode
+# proposition rests on the claim that switching from strict to project only
+# changes CWD + interpreter, not the security posture.
+# ---------------------------------------------------------------------------
+
+@pytest.mark.skipif(sys.platform == "win32", reason="execute_code is POSIX-only")
+class TestSecurityInvariantsAcrossModes(unittest.TestCase):
+
+    def _run(self, code, mode):
+        with _mock_mode(mode):
+            with patch("model_tools.handle_function_call",
+                       side_effect=_mock_handle_function_call):
+                raw = execute_code(
+                    code=code,
+                    task_id=f"test-sec-{mode}",
+                    enabled_tools=list(SANDBOX_ALLOWED_TOOLS),
+                )
+        return json.loads(raw)
+
+    def test_api_keys_scrubbed_in_strict_mode(self):
+        code = (
+            "import os\n"
+            "print('KEY=' + os.environ.get('OPENAI_API_KEY', 'MISSING'))\n"
+            "print('TOK=' + os.environ.get('ANTHROPIC_API_KEY', 'MISSING'))\n"
+        )
+        with patch.dict(os.environ, {
+            "OPENAI_API_KEY": "sk-should-not-leak",
+            "ANTHROPIC_API_KEY": "ant-should-not-leak",
+        }):
+            result = self._run(code, mode="strict")
+        self.assertEqual(result["status"], "success")
+        self.assertIn("KEY=MISSING", result["output"])
+        self.assertIn("TOK=MISSING", result["output"])
+        self.assertNotIn("sk-should-not-leak", result["output"])
+        self.assertNotIn("ant-should-not-leak", result["output"])
+
+    def test_api_keys_scrubbed_in_project_mode(self):
+        """CRITICAL: the project-mode default does NOT leak user credentials."""
+        code = (
+            "import os\n"
+            "print('KEY=' + os.environ.get('OPENAI_API_KEY', 'MISSING'))\n"
+            "print('TOK=' + os.environ.get('ANTHROPIC_API_KEY', 'MISSING'))\n"
+            "print('SEC=' + os.environ.get('GITHUB_TOKEN', 'MISSING'))\n"
+        )
+        with patch.dict(os.environ, {
+            "OPENAI_API_KEY": "sk-should-not-leak",
+            "ANTHROPIC_API_KEY": "ant-should-not-leak",
+            "GITHUB_TOKEN": "ghp-should-not-leak",
+        }):
+            result = self._run(code, mode="project")
+        self.assertEqual(result["status"], "success")
+        for needle in ("KEY=MISSING", "TOK=MISSING", "SEC=MISSING"):
+            self.assertIn(needle, result["output"])
+        for leaked in ("sk-should-not-leak", "ant-should-not-leak", "ghp-should-not-leak"):
+            self.assertNotIn(leaked, result["output"])
+
+    def test_secret_substrings_scrubbed_in_project_mode(self):
+        """SECRET/PASSWORD/CREDENTIAL/PASSWD/AUTH filters still apply."""
+        code = (
+            "import os\n"
+            "for k in ('MY_SECRET', 'DB_PASSWORD', 'VAULT_CREDENTIAL', "
+            "'LDAP_PASSWD', 'AUTH_TOKEN'):\n"
+            "    print(f'{k}=' + os.environ.get(k, 'MISSING'))\n"
+        )
+        with patch.dict(os.environ, {
+            "MY_SECRET": "secret-should-not-leak",
+            "DB_PASSWORD": "password-should-not-leak",
+            "VAULT_CREDENTIAL": "cred-should-not-leak",
+            "LDAP_PASSWD": "passwd-should-not-leak",
+            "AUTH_TOKEN": "auth-should-not-leak",
+        }):
+            result = self._run(code, mode="project")
+        self.assertEqual(result["status"], "success")
+        for leaked in ("secret-should-not-leak", "password-should-not-leak",
+                       "cred-should-not-leak", "passwd-should-not-leak",
+                       "auth-should-not-leak"):
+            self.assertNotIn(leaked, result["output"])
+
+    def test_tool_whitelist_enforced_in_strict_mode(self):
+        """A script cannot RPC-call tools outside SANDBOX_ALLOWED_TOOLS."""
+        # execute_code is NOT in SANDBOX_ALLOWED_TOOLS (no recursion)
+        self.assertNotIn("execute_code", SANDBOX_ALLOWED_TOOLS)
+        code = (
+            "import hermes_tools as ht\n"
+            "print('execute_code_available:', hasattr(ht, 'execute_code'))\n"
+            "print('delegate_task_available:', hasattr(ht, 'delegate_task'))\n"
+        )
+        result = self._run(code, mode="strict")
+        self.assertEqual(result["status"], "success")
+        self.assertIn("execute_code_available: False", result["output"])
+        self.assertIn("delegate_task_available: False", result["output"])
+
+    def test_tool_whitelist_enforced_in_project_mode(self):
+        """CRITICAL: project mode does NOT widen the tool whitelist."""
+        code = (
+            "import hermes_tools as ht\n"
+            "print('execute_code_available:', hasattr(ht, 'execute_code'))\n"
+            "print('delegate_task_available:', hasattr(ht, 'delegate_task'))\n"
+        )
+        result = self._run(code, mode="project")
+        self.assertEqual(result["status"], "success")
+        self.assertIn("execute_code_available: False", result["output"])
+        self.assertIn("delegate_task_available: False", result["output"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index 8268024fc72..c5a89488a08 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -29,6 +29,7 @@ Remote execution additionally requires Python 3 in the terminal backend.
 """
 
 import base64
+import functools
 import json
 import logging
 import os
@@ -1022,10 +1023,15 @@ def execute_code(
         child_env["HERMES_RPC_SOCKET"] = sock_path
         child_env["PYTHONDONTWRITEBYTECODE"] = "1"
         # Ensure the hermes-agent root is importable in the sandbox so
-        # repo-root modules are available to child scripts.
+        # repo-root modules are available to child scripts.  We also prepend
+        # the staging tmpdir so ``from hermes_tools import ...`` resolves even
+        # when the subprocess CWD is not tmpdir (project mode).
         _hermes_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
         _existing_pp = child_env.get("PYTHONPATH", "")
-        child_env["PYTHONPATH"] = _hermes_root + (os.pathsep + _existing_pp if _existing_pp else "")
+        _pp_parts = [tmpdir, _hermes_root]
+        if _existing_pp:
+            _pp_parts.append(_existing_pp)
+        child_env["PYTHONPATH"] = os.pathsep.join(_pp_parts)
         # Inject user's configured timezone so datetime.now() in sandboxed
         # code reflects the correct wall-clock time.  Only TZ is set —
         # HERMES_TIMEZONE is an internal Hermes setting and must not leak
@@ -1042,9 +1048,19 @@ def execute_code(
         if _profile_home:
             child_env["HOME"] = _profile_home
 
+        # Resolve interpreter + CWD based on execute_code mode.
+        #   - strict : today's behavior (sys.executable + tmpdir CWD).
+        #   - project: user's venv python + session's working directory, so
+        #              project deps like pandas and user files resolve.
+        # Env scrubbing and tool whitelist apply identically in both modes.
+        _mode = _get_execution_mode()
+        _child_python = _resolve_child_python(_mode)
+        _child_cwd = _resolve_child_cwd(_mode, tmpdir)
+        _script_path = os.path.join(tmpdir, "script.py")
+
         proc = subprocess.Popen(
-            [sys.executable, "script.py"],
-            cwd=tmpdir,
+            [_child_python, _script_path],
+            cwd=_child_cwd,
             env=child_env,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
@@ -1299,6 +1315,127 @@ def _load_config() -> dict:
         return {}
 
 
+# ---------------------------------------------------------------------------
+# Execution mode resolution (strict vs project)
+# ---------------------------------------------------------------------------
+
+# Valid values for code_execution.mode. Kept as a module constant so tests
+# and the config layer can reference the canonical set.
+EXECUTION_MODES = ("project", "strict")
+DEFAULT_EXECUTION_MODE = "project"
+
+
+def _get_execution_mode() -> str:
+    """Return the active execute_code mode — 'project' or 'strict'.
+
+    Reads ``code_execution.mode`` from config.yaml; invalid values fall back
+    to ``DEFAULT_EXECUTION_MODE`` ('project') with a log warning.
+
+    Mode semantics:
+      - ``project`` (default): scripts run in the session's working directory
+        with the active virtual environment's python, so project dependencies
+        (pandas, torch, project packages) and files resolve naturally.
+      - ``strict``: scripts run in an isolated temp directory with
+        ``sys.executable`` (hermes-agent's python). Reproducible and the
+        interpreter is guaranteed to work, but project deps and relative paths
+        won't resolve.
+
+    Env scrubbing and tool whitelist apply identically in both modes.
+    """
+    cfg_value = str(_load_config().get("mode", DEFAULT_EXECUTION_MODE)).strip().lower()
+    if cfg_value in EXECUTION_MODES:
+        return cfg_value
+    logger.warning(
+        "Ignoring code_execution.mode=%r (expected one of %s), falling back to %r",
+        cfg_value, EXECUTION_MODES, DEFAULT_EXECUTION_MODE,
+    )
+    return DEFAULT_EXECUTION_MODE
+
+
+@functools.lru_cache(maxsize=32)
+def _is_usable_python(python_path: str) -> bool:
+    """Check whether a candidate Python interpreter is usable for execute_code.
+
+    Requires Python 3.8+ (f-strings and stdlib modules the RPC stubs need).
+    Cached so we don't fork a subprocess on every execute_code call.
+    """
+    try:
+        result = subprocess.run(
+            [python_path, "-c",
+             "import sys; sys.exit(0 if sys.version_info >= (3, 8) else 1)"],
+            timeout=5,
+            capture_output=True,
+        )
+        return result.returncode == 0
+    except (OSError, subprocess.TimeoutExpired, subprocess.SubprocessError):
+        return False
+
+
+def _resolve_child_python(mode: str) -> str:
+    """Pick the Python interpreter for the execute_code subprocess.
+
+    In ``strict`` mode, always ``sys.executable`` — guaranteed to work and
+    keeps behavior fully reproducible across sessions.
+
+    In ``project`` mode, prefer the user's active virtualenv/conda env's
+    python so ``import pandas`` etc. work. Falls back to ``sys.executable``
+    if no venv is detected, the candidate binary is missing/not executable,
+    or it fails a Python 3.8+ version check.
+    """
+    if mode != "project":
+        return sys.executable
+
+    if _IS_WINDOWS:
+        exe_names = ("python.exe", "python3.exe")
+        subdirs = ("Scripts",)
+    else:
+        exe_names = ("python", "python3")
+        subdirs = ("bin",)
+
+    for var in ("VIRTUAL_ENV", "CONDA_PREFIX"):
+        root = os.environ.get(var, "").strip()
+        if not root:
+            continue
+        for subdir in subdirs:
+            for exe in exe_names:
+                candidate = os.path.join(root, subdir, exe)
+                if not (os.path.isfile(candidate) and os.access(candidate, os.X_OK)):
+                    continue
+                if _is_usable_python(candidate):
+                    return candidate
+                # Found the interpreter but it failed the version check —
+                # log once and fall through to sys.executable.
+                logger.info(
+                    "execute_code: skipping %s=%s (Python version < 3.8 or broken). "
+                    "Using sys.executable instead.", var, candidate,
+                )
+                return sys.executable
+
+    return sys.executable
+
+
+def _resolve_child_cwd(mode: str, staging_dir: str) -> str:
+    """Resolve the working directory for the execute_code subprocess.
+
+    - ``strict``: the staging tmpdir (today's behavior).
+    - ``project``: the session's TERMINAL_CWD (same as the terminal tool), or
+      ``os.getcwd()`` if TERMINAL_CWD is unset or doesn't point at a real dir.
+      Falls back to the staging tmpdir as a last resort so we never invoke
+      Popen with a nonexistent cwd.
+    """
+    if mode != "project":
+        return staging_dir
+    raw = os.environ.get("TERMINAL_CWD", "").strip()
+    if raw:
+        expanded = os.path.expanduser(raw)
+        if os.path.isdir(expanded):
+            return expanded
+    here = os.getcwd()
+    if os.path.isdir(here):
+        return here
+    return staging_dir
+
+
 # ---------------------------------------------------------------------------
 # OpenAI Function-Calling Schema
 # ---------------------------------------------------------------------------
@@ -1330,15 +1467,24 @@ _TOOL_DOC_LINES = [
 ]
 
 
-def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
+def build_execute_code_schema(enabled_sandbox_tools: set = None,
+                              mode: str = None) -> dict:
     """Build the execute_code schema with description listing only enabled tools.
 
     When tools are disabled via ``hermes tools`` (e.g. web is turned off),
     the schema description should NOT mention web_search / web_extract —
     otherwise the model thinks they are available and keeps trying to use them.
+
+    ``mode`` controls the working-directory sentence in the description:
+      - ``'strict'``: scripts run in a temp dir (not the session's CWD)
+      - ``'project'`` (default): scripts run in the session's CWD with the
+        active venv's python
+    If ``mode`` is None, the current ``code_execution.mode`` config is read.
     """
     if enabled_sandbox_tools is None:
         enabled_sandbox_tools = SANDBOX_ALLOWED_TOOLS
+    if mode is None:
+        mode = _get_execution_mode()
 
     # Build tool documentation lines for only the enabled tools
     tool_lines = "\n".join(
@@ -1354,6 +1500,20 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
     else:
         import_str = "..."
 
+    # Mode-specific CWD guidance. Project mode is the default and matches
+    # terminal()'s filesystem/interpreter; strict mode retains the isolated
+    # temp-dir staging and hermes-agent's own python.
+    if mode == "strict":
+        cwd_note = (
+            "Scripts run in their own temp dir, not the session's CWD — use absolute paths "
+            "(os.path.expanduser('~/.hermes/.env')) or terminal()/read_file() for user files."
+        )
+    else:
+        cwd_note = (
+            "Scripts run in the session's working directory with the active venv's python, "
+            "so project deps (pandas, etc.) and relative paths work like in terminal()."
+        )
+
     description = (
         "Run a Python script that can call Hermes tools programmatically. "
         "Use this when you need 3+ tool calls with processing logic between them, "
@@ -1367,8 +1527,7 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
         f"{tool_lines}\n\n"
         "Limits: 5-minute timeout, 50KB stdout cap, max 50 tool calls per script. "
         "terminal() is foreground-only (no background or pty).\n\n"
-        "Scripts run in their own temp dir, not the session's CWD — use absolute paths "
-        "(os.path.expanduser('~/.hermes/.env')) or terminal()/read_file() for user files.\n\n"
+        f"{cwd_note}\n\n"
         "Print your final result to stdout. Use Python stdlib (json, re, math, csv, "
         "datetime, collections, etc.) for processing between tool calls.\n\n"
         "Also available (no import needed — built into hermes_tools):\n"
@@ -1397,7 +1556,8 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
     }
 
 
-# Default schema used at registration time (all sandbox tools listed)
+# Default schema used at registration time (all sandbox tools listed,
+# current configured mode).  model_tools.py rebuilds per-session anyway.
 EXECUTE_CODE_SCHEMA = build_execute_code_schema()
 
 

From 8322b42c6cd0f6ae9bc6721e8b0d8cbff4a856f2 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 01:52:06 -0700
Subject: [PATCH 014/143] fix(streaming): surface dropped tool-call on
 mid-stream stall (#12072)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When streaming died after text was already delivered to the user but
before a tool-call's arguments finished streaming, the partial-stream
stub at the end of _interruptible_streaming_api_call silently set
`tool_calls=None` on the returned message and kept `finish_reason=stop`.
The agent treated the turn as complete, the session exited cleanly with
code 0, and the attempted action was lost with zero user-facing signal.

Live-observed Apr 2026 with MiniMax M2.7 on a ~6-minute audit task:
agent streamed 'Let me write the audit:', started emitting a write_file
tool call, MiniMax stalled for 240s mid-arguments, the stale-stream
detector killed the connection, the stub fired, session ended, no file
written, no error shown.

Fix: the streaming accumulator now records each tool-call's name into
`result['partial_tool_names']` as soon as the name is known. When the
stub builder fires after a partial delivery and finds any recorded tool
names, it appends a human-visible warning to the stub's content — and
also fires it as a live stream delta so the user sees it immediately,
not only in the persisted transcript. The next turn's model also sees
the warning in conversation history and can retry on its own. Text-only
partial streams keep the original bare-recovery behaviour (no warning).

Validation:
| Scenario                                    | Before                    | After                                       |
|---------------------------------------------|---------------------------|---------------------------------------------|
| Stream dies mid tool-call, text already sent | Silent exit, no indication | User sees ⚠ warning naming the dropped tool |
| Text-only partial stream                     | Bare recovered text       | Unchanged                                   |
| tests/run_agent/test_streaming.py            | 24 passed                 | 26 passed (2 new)                           |
---
 run_agent.py                      |  55 ++++++++++--
 tests/run_agent/test_streaming.py | 135 ++++++++++++++++++++++++++++++
 2 files changed, 182 insertions(+), 8 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index e8d23d39cac..d5ff125e33b 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -5579,7 +5579,7 @@ class AIAgent:
                 raise result["error"]
             return result["response"]
 
-        result = {"response": None, "error": None}
+        result = {"response": None, "error": None, "partial_tool_names": []}
         request_client_holder = {"client": None}
         first_delta_fired = {"done": False}
         deltas_were_sent = {"yes": False}  # Track if any deltas were fired (for fallback)
@@ -5751,6 +5751,14 @@ class AIAgent:
                             tool_gen_notified.add(idx)
                             _fire_first_delta()
                             self._fire_tool_gen_started(name)
+                            # Record the partial tool-call name so the outer
+                            # stub-builder can surface a user-visible warning
+                            # if streaming dies before this tool's arguments
+                            # are fully delivered.  Without this, a stall
+                            # during tool-call JSON generation lets the stub
+                            # at line ~6107 return `tool_calls=None`, silently
+                            # discarding the attempted action.
+                            result["partial_tool_names"].append(name)
 
                 if chunk.choices[0].finish_reason:
                     finish_reason = chunk.choices[0].finish_reason
@@ -6117,13 +6125,44 @@ class AIAgent:
                 _partial_text = (
                     getattr(self, "_current_streamed_assistant_text", "") or ""
                 ).strip() or None
-                logger.warning(
-                    "Partial stream delivered before error; returning stub "
-                    "response with %s chars of recovered content to prevent "
-                    "duplicate messages: %s",
-                    len(_partial_text or ""),
-                    result["error"],
-                )
+
+                # If the stream died while the model was emitting a tool call,
+                # the stub below will silently set `tool_calls=None` and the
+                # agent loop will treat the turn as complete — the attempted
+                # action is lost with no user-facing signal.  Append a
+                # human-visible warning to the stub content so (a) the user
+                # knows something failed, and (b) the next turn's model sees
+                # in conversation history what was attempted and can retry.
+                _partial_names = list(result.get("partial_tool_names") or [])
+                if _partial_names:
+                    _name_str = ", ".join(_partial_names[:3])
+                    if len(_partial_names) > 3:
+                        _name_str += f", +{len(_partial_names) - 3} more"
+                    _warn = (
+                        f"\n\n⚠ Stream stalled mid tool-call "
+                        f"({_name_str}); the action was not executed. "
+                        f"Ask me to retry if you want to continue."
+                    )
+                    _partial_text = (_partial_text or "") + _warn
+                    # Also fire as a streaming delta so the user sees it now
+                    # instead of only in the persisted transcript.
+                    try:
+                        self._fire_stream_delta(_warn)
+                    except Exception:
+                        pass
+                    logger.warning(
+                        "Partial stream dropped tool call(s) %s after %s chars "
+                        "of text; surfaced warning to user: %s",
+                        _partial_names, len(_partial_text or ""), result["error"],
+                    )
+                else:
+                    logger.warning(
+                        "Partial stream delivered before error; returning stub "
+                        "response with %s chars of recovered content to prevent "
+                        "duplicate messages: %s",
+                        len(_partial_text or ""),
+                        result["error"],
+                    )
                 _stub_msg = SimpleNamespace(
                     role="assistant", content=_partial_text, tool_calls=None,
                     reasoning_content=None,
diff --git a/tests/run_agent/test_streaming.py b/tests/run_agent/test_streaming.py
index 73a9872020e..6afe36ee3ad 100644
--- a/tests/run_agent/test_streaming.py
+++ b/tests/run_agent/test_streaming.py
@@ -952,3 +952,138 @@ class TestAnthropicStreamCallbacks:
         agent._interruptible_streaming_api_call({})
 
         assert touch_calls.count("receiving stream response") == len(events)
+
+
+class TestPartialToolCallWarning:
+    """Regression: when a stream dies mid tool-call argument generation after
+    text was already delivered, the partial-stream stub at run_agent.py
+    line ~6107 used to silently set ``tool_calls=None`` and return
+    ``finish_reason=stop``, losing the attempted action with zero user-facing
+    signal.  Live-observed Apr 2026 with MiniMax M2.7 on a 6-minute audit
+    task — agent streamed commentary, emitted a write_file tool call,
+    MiniMax stalled for 240 s mid-arguments, stale-stream detector killed
+    the connection, the stub returned, session ended with no file written
+    and no error shown.
+
+    Fix: when the stream accumulator captured any tool-call names before the
+    error, the stub now appends a user-visible warning to content AND fires
+    it as a stream delta so the user sees it immediately.
+    """
+
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_partial_tool_call_surfaces_warning(self, mock_close, mock_create):
+        """Stream with text + partial tool-call name + mid-stream error
+        produces a stub whose content contains the user-visible warning
+        and whose tool_calls is None."""
+        from run_agent import AIAgent
+
+        class _StallError(RuntimeError):
+            pass
+
+        def _stalling_stream():
+            yield _make_stream_chunk(content="Let me write the audit: ")
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_1", name="write_file"),
+            ])
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, arguments='{"path": "/tmp/x", '),
+            ])
+            raise _StallError("simulated upstream stall")
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = lambda *a, **kw: _stalling_stream()
+        mock_create.return_value = mock_client
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+
+        fired_deltas: list = []
+        agent._fire_stream_delta = lambda text: fired_deltas.append(text)
+        agent._current_streamed_assistant_text = "Let me write the audit: "
+
+        import os as _os
+        _prev = _os.environ.get("HERMES_STREAM_RETRIES")
+        _os.environ["HERMES_STREAM_RETRIES"] = "0"
+        try:
+            response = agent._interruptible_streaming_api_call({})
+        finally:
+            if _prev is None:
+                _os.environ.pop("HERMES_STREAM_RETRIES", None)
+            else:
+                _os.environ["HERMES_STREAM_RETRIES"] = _prev
+
+        content = response.choices[0].message.content or ""
+        assert "Let me write the audit:" in content, (
+            f"Partial text not preserved in stub: {content!r}"
+        )
+        assert "Stream stalled mid tool-call" in content, (
+            f"Stub content is missing the dropped-tool-call warning; users "
+            f"get silent failure.  Got content={content!r}"
+        )
+        assert "write_file" in content, (
+            f"Warning should name the dropped tool. Got: {content!r}"
+        )
+        assert response.choices[0].message.tool_calls is None
+        assert any("Stream stalled mid tool-call" in d for d in fired_deltas), (
+            f"Warning was not surfaced as a live stream delta. "
+            f"fired_deltas={fired_deltas}"
+        )
+
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_partial_text_only_no_warning(self, mock_close, mock_create):
+        """Text-only partial stream (no tool call mid-flight) keeps the
+        pre-fix behaviour: bare recovered text, no warning noise."""
+        from run_agent import AIAgent
+
+        class _StallError(RuntimeError):
+            pass
+
+        def _stalling_stream():
+            yield _make_stream_chunk(content="Here's my answer so far")
+            raise _StallError("simulated upstream stall")
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = lambda *a, **kw: _stalling_stream()
+        mock_create.return_value = mock_client
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+        agent._current_streamed_assistant_text = "Here's my answer so far"
+
+        import os as _os
+        _prev = _os.environ.get("HERMES_STREAM_RETRIES")
+        _os.environ["HERMES_STREAM_RETRIES"] = "0"
+        try:
+            response = agent._interruptible_streaming_api_call({})
+        finally:
+            if _prev is None:
+                _os.environ.pop("HERMES_STREAM_RETRIES", None)
+            else:
+                _os.environ["HERMES_STREAM_RETRIES"] = _prev
+
+        content = response.choices[0].message.content or ""
+        assert content == "Here's my answer so far", (
+            f"Pre-fix behaviour regressed for text-only partial streams: {content!r}"
+        )
+        assert "Stream stalled" not in content, (
+            f"Unexpected warning on text-only partial stream: {content!r}"
+        )
+

From a2c9f5d0a79d7d7fb4ff7bffc44cc9dd1c8f2259 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 01:53:09 -0700
Subject: [PATCH 015/143] docs(execute_code): document project/strict execution
 modes (#12073)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to PR #11971. Documents the new code_execution.mode config
key and what each mode actually does.

- user-guide/configuration.md: add mode: project to the yaml example,
  explain project vs strict and call out that security invariants are
  identical across modes.
- user-guide/features/code-execution.md: new 'Execution Mode' section
  with a comparison table and usage guidance; update the 'temporary
  directory' note so it reflects that script.py runs in the session
  CWD in project mode (staging dir stays on PYTHONPATH for imports);
  drop stale 'sandboxed' framing from the intro and skill-passthrough
  paragraph.
- getting-started/learning-path.md: update the one-line Code Execution
  summary to match (no longer 'sandboxed environments' — the default
  runs in the session's real working directory).

No code changes.
---
 website/docs/getting-started/learning-path.md |  2 +-
 website/docs/user-guide/configuration.md      | 10 ++++-
 .../user-guide/features/code-execution.md     | 40 ++++++++++++++++---
 3 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/website/docs/getting-started/learning-path.md b/website/docs/getting-started/learning-path.md
index bcdbb44d420..41170ccccdb 100644
--- a/website/docs/getting-started/learning-path.md
+++ b/website/docs/getting-started/learning-path.md
@@ -129,7 +129,7 @@ Not sure what's available? Here's a quick directory of major features:
 | **MCP** | Connect to external tool servers via Model Context Protocol | [MCP](/docs/user-guide/features/mcp) |
 | **Cron** | Schedule recurring agent tasks | [Cron](/docs/user-guide/features/cron) |
 | **Delegation** | Spawn sub-agents for parallel work | [Delegation](/docs/user-guide/features/delegation) |
-| **Code Execution** | Run code in sandboxed environments | [Code Execution](/docs/user-guide/features/code-execution) |
+| **Code Execution** | Run Python scripts that call Hermes tools programmatically | [Code Execution](/docs/user-guide/features/code-execution) |
 | **Browser** | Web browsing and scraping | [Browser](/docs/user-guide/features/browser) |
 | **Hooks** | Event-driven callbacks and middleware | [Hooks](/docs/user-guide/features/hooks) |
 | **Batch Processing** | Process multiple inputs in bulk | [Batch Processing](/docs/user-guide/features/batch-processing) |
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 29d1665627e..dbc6b0e47e6 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -1104,14 +1104,22 @@ human_delay:
 
 ## Code Execution
 
-Configure the sandboxed Python code execution tool:
+Configure the `execute_code` tool:
 
 ```yaml
 code_execution:
+  mode: project                # project (default) | strict
   timeout: 300                 # Max execution time in seconds
   max_tool_calls: 50           # Max tool calls within code execution
 ```
 
+**`mode`** controls the working directory and Python interpreter for scripts:
+
+- **`project`** (default) — scripts run in the session's working directory with the active virtualenv/conda env's python. Project deps (`pandas`, `torch`, project packages) and relative paths (`.env`, `./data.csv`) resolve naturally, matching what `terminal()` sees.
+- **`strict`** — scripts run in a temp staging directory with `sys.executable` (Hermes's own python). Maximum reproducibility, but project deps and relative paths won't resolve.
+
+Environment scrubbing (strips `*_API_KEY`, `*_TOKEN`, `*_SECRET`, `*_PASSWORD`, `*_CREDENTIAL`, `*_PASSWD`, `*_AUTH`) and the tool whitelist apply identically in both modes — switching mode does not change the security posture.
+
 ## Web Search Backends
 
 The `web_search`, `web_extract`, and `web_crawl` tools support four backend providers. Configure the backend in `config.yaml` or via `hermes tools`:
diff --git a/website/docs/user-guide/features/code-execution.md b/website/docs/user-guide/features/code-execution.md
index 53668da9010..4deae296220 100644
--- a/website/docs/user-guide/features/code-execution.md
+++ b/website/docs/user-guide/features/code-execution.md
@@ -1,12 +1,12 @@
 ---
 sidebar_position: 8
 title: "Code Execution"
-description: "Sandboxed Python execution with RPC tool access — collapse multi-step workflows into a single turn"
+description: "Programmatic Python execution with RPC tool access — collapse multi-step workflows into a single turn"
 ---
 
 # Code Execution (Programmatic Tool Calling)
 
-The `execute_code` tool lets the agent write Python scripts that call Hermes tools programmatically, collapsing multi-step workflows into a single LLM turn. The script runs in a sandboxed child process on the agent host, communicating via Unix domain socket RPC.
+The `execute_code` tool lets the agent write Python scripts that call Hermes tools programmatically, collapsing multi-step workflows into a single LLM turn. The script runs in a child process on the agent host, communicating with Hermes over a Unix domain socket RPC.
 
 ## How It Works
 
@@ -27,7 +27,7 @@ for r in results["data"]["web"]:
 print(summary)
 ```
 
-**Available tools in sandbox:** `web_search`, `web_extract`, `read_file`, `write_file`, `search_files`, `patch`, `terminal` (foreground only).
+**Available tools inside scripts:** `web_search`, `web_extract`, `read_file`, `write_file`, `search_files`, `patch`, `terminal` (foreground only).
 
 ## When the Agent Uses This
 
@@ -126,6 +126,35 @@ report = {
 print(json.dumps(report, indent=2))
 ```
 
+## Execution Mode
+
+`execute_code` has two execution modes controlled by `code_execution.mode` in `~/.hermes/config.yaml`:
+
+| Mode | Working directory | Python interpreter |
+|------|-------------------|--------------------|
+| **`project`** (default) | The session's working directory (same as `terminal()`) | Active `VIRTUAL_ENV` / `CONDA_PREFIX` python, falling back to Hermes's own python |
+| `strict` | A temp staging directory isolated from the user's project | `sys.executable` (Hermes's own python) |
+
+**When to leave it on `project`:** you want `import pandas`, `from my_project import foo`, or relative paths like `open(".env")` to work the same way they do in `terminal()`. This is almost always what you want.
+
+**When to flip to `strict`:** you need maximum reproducibility — you want the same interpreter every session regardless of which venv the user activated, and you want scripts quarantined from the project tree (no risk of accidentally reading project files through a relative path).
+
+```yaml
+# ~/.hermes/config.yaml
+code_execution:
+  mode: project   # or "strict"
+```
+
+Fallback behavior in `project` mode: if `VIRTUAL_ENV` / `CONDA_PREFIX` is unset, broken, or points at a Python older than 3.8, the resolver falls back cleanly to `sys.executable` — it never leaves the agent without a working interpreter.
+
+Security-critical invariants are identical across both modes:
+
+- environment scrubbing (API keys, tokens, credentials stripped)
+- tool whitelist (scripts cannot call `execute_code` recursively, `delegate_task`, or MCP tools)
+- resource limits (timeout, stdout cap, tool-call cap)
+
+Switching mode changes where scripts run and which interpreter runs them, not what credentials they can see or which tools they can call.
+
 ## Resource Limits
 
 | Resource | Limit | Notes |
@@ -140,6 +169,7 @@ All limits are configurable via `config.yaml`:
 ```yaml
 # In ~/.hermes/config.yaml
 code_execution:
+  mode: project      # project (default) | strict
   timeout: 300       # Max seconds per script (default: 300)
   max_tool_calls: 50 # Max tool calls per execution (default: 50)
 ```
@@ -176,7 +206,7 @@ Environment variables containing `KEY`, `TOKEN`, `SECRET`, `PASSWORD`, `CREDENTI
 
 ### Skill Environment Variable Passthrough
 
-When a skill declares `required_environment_variables` in its frontmatter, those variables are **automatically passed through** to both `execute_code` and `terminal` sandboxes after the skill is loaded. This lets skills use their declared API keys without weakening the security posture for arbitrary code.
+When a skill declares `required_environment_variables` in its frontmatter, those variables are **automatically passed through** to both `execute_code` and `terminal` child processes after the skill is loaded. This lets skills use their declared API keys without weakening the security posture for arbitrary code.
 
 For non-skill use cases, you can explicitly allowlist variables in `config.yaml`:
 
@@ -189,7 +219,7 @@ terminal:
 
 See the [Security guide](/docs/user-guide/security#environment-variable-passthrough) for full details.
 
-The script runs in a temporary directory that is cleaned up after execution. The child process runs in its own process group so it can be cleanly killed on timeout or interruption.
+Hermes always writes the script and the auto-generated `hermes_tools.py` RPC stub into a temp staging directory that is cleaned up after execution. In `strict` mode the script also *runs* there; in `project` mode it runs in the session's working directory (the staging directory stays on `PYTHONPATH` so imports still resolve). The child process runs in its own process group so it can be cleanly killed on timeout or interruption.
 
 ## execute_code vs terminal
 

From 8826d9c19796da80bd4d5cc6a3e61a6f45a09775 Mon Sep 17 00:00:00 2001
From: vominh1919 <92574218+vominh1919@users.noreply.github.com>
Date: Fri, 17 Apr 2026 16:35:02 +0700
Subject: [PATCH 016/143] fix: FTS5 LIKE fallback for CJK
 (Chinese/Japanese/Korean) queries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FTS5 default tokenizer splits CJK text character-by-character, causing
multi-character queries like '记忆断裂' to return 0 results.

This fix adds a LIKE fallback: when FTS5 returns no results and the
query contains CJK characters, retry with WHERE content LIKE '%query%'.
Preserves FTS5 performance for English queries.

Fixes #11511
---
 hermes_state.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/hermes_state.py b/hermes_state.py
index 5e563666e83..0a8b000ab47 100644
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -987,6 +987,22 @@ class SessionDB:
 
         return sanitized.strip()
 
+
+    @staticmethod
+    def _contains_cjk(text: str) -> bool:
+        """Check if text contains CJK (Chinese, Japanese, Korean) characters."""
+        for ch in text:
+            cp = ord(ch)
+            if (0x4E00 <= cp <= 0x9FFF or    # CJK Unified Ideographs
+                0x3400 <= cp <= 0x4DBF or    # CJK Extension A
+                0x20000 <= cp <= 0x2A6DF or  # CJK Extension B
+                0x3000 <= cp <= 0x303F or    # CJK Symbols
+                0x3040 <= cp <= 0x309F or    # Hiragana
+                0x30A0 <= cp <= 0x30FF or    # Katakana
+                0xAC00 <= cp <= 0xD7AF):     # Hangul Syllables
+                return True
+        return False
+
     def search_messages(
         self,
         query: str,
@@ -1062,8 +1078,42 @@ class SessionDB:
                 cursor = self._conn.execute(sql, params)
             except sqlite3.OperationalError:
                 # FTS5 query syntax error despite sanitization — return empty
-                return []
-            matches = [dict(row) for row in cursor.fetchall()]
+                # unless query contains CJK (fall back to LIKE below)
+                if not self._contains_cjk(query):
+                    return []
+                matches = []
+            else:
+                matches = [dict(row) for row in cursor.fetchall()]
+
+        # LIKE fallback for CJK queries: FTS5 default tokenizer splits CJK
+        # characters individually, causing multi-character queries to fail.
+        if not matches and self._contains_cjk(query):
+            raw_query = query.strip('"').strip()
+            like_where = ["m.content LIKE ?"]
+            like_params: list = [f"%{raw_query}%"]
+            if source_filter is not None:
+                like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
+                like_params.extend(source_filter)
+            if exclude_sources is not None:
+                like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
+                like_params.extend(exclude_sources)
+            if role_filter:
+                like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
+                like_params.extend(role_filter)
+            like_sql = f"""
+                SELECT m.id, m.session_id, m.role, m.content AS snippet,
+                       m.content, m.timestamp, m.tool_name,
+                       s.source, s.model, s.started_at AS session_started
+                FROM messages m
+                JOIN sessions s ON s.id = m.session_id
+                WHERE {' AND '.join(like_where)}
+                ORDER BY m.timestamp DESC
+                LIMIT ? OFFSET ?
+            """
+            like_params.extend([limit, offset])
+            with self._lock:
+                like_cursor = self._conn.execute(like_sql, like_params)
+                matches = [dict(row) for row in like_cursor.fetchall()]
 
         # Add surrounding context (1 message before + after each match).
         # Done outside the lock so we don't hold it across N sequential queries.

From 3b69b2fd615c4679c647946d597ef6c84763f370 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium@users.noreply.github.com>
Date: Sat, 18 Apr 2026 01:56:22 -0700
Subject: [PATCH 017/143] test(session-search): regression coverage for CJK
 LIKE fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Twelve tests under TestCJKSearchFallback guarding:
 - CJK detection across Chinese/Japanese/Korean/Hiragana/Katakana ranges
   (including the full Hangul syllables block \uac00-\ud7af, to catch
   the shorter-range typo from one of the duplicate PRs)
 - Substring match for multi-char Chinese, Japanese, Korean queries
 - Filter preservation (source_filter, exclude_sources, role_filter)
   in the LIKE path — guards against the SQL-builder bug from another
   duplicate PR where filter clauses landed after LIMIT/OFFSET
 - Snippet centered on the matched term (instr-based substr window),
   not the leading 200 chars of content
 - English fast-path untouched
 - Empty/no-match cases
 - Mixed CJK+English queries

Also:
 - hermes_state.py: LIKE-fallback snippet is now
   `substr(content, max(1, instr(content, ?) - 40), 120)`, centered on
   the match instead of the whole-content default. Credit goes to
   @iamagenius00 for the snippet idea in PR #11517.
 - scripts/release.py: add @iamagenius00 to AUTHOR_MAP so future
   release attribution resolves cleanly.

Refs #11511, #11516, #11517, #11541.

Co-authored-by: iamagenius00 <iamagenius00@users.noreply.github.com>
---
 hermes_state.py            |   7 +-
 scripts/release.py         |   1 +
 tests/test_hermes_state.py | 135 +++++++++++++++++++++++++++++++++++++
 3 files changed, 142 insertions(+), 1 deletion(-)

diff --git a/hermes_state.py b/hermes_state.py
index 0a8b000ab47..af97f7fbd89 100644
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -1101,7 +1101,10 @@ class SessionDB:
                 like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
                 like_params.extend(role_filter)
             like_sql = f"""
-                SELECT m.id, m.session_id, m.role, m.content AS snippet,
+                SELECT m.id, m.session_id, m.role,
+                       substr(m.content,
+                              max(1, instr(m.content, ?) - 40),
+                              120) AS snippet,
                        m.content, m.timestamp, m.tool_name,
                        s.source, s.model, s.started_at AS session_started
                 FROM messages m
@@ -1111,6 +1114,8 @@ class SessionDB:
                 LIMIT ? OFFSET ?
             """
             like_params.extend([limit, offset])
+            # instr() parameter goes first in the bound list
+            like_params = [raw_query] + like_params
             with self._lock:
                 like_cursor = self._conn.execute(like_sql, like_params)
                 matches = [dict(row) for row in like_cursor.fetchall()]
diff --git a/scripts/release.py b/scripts/release.py
index 5e909de76ec..372a4802ba7 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -207,6 +207,7 @@ AUTHOR_MAP = {
     "cola-runner@users.noreply.github.com": "cola-runner",
     "ygd58@users.noreply.github.com": "ygd58",
     "vominh1919@users.noreply.github.com": "vominh1919",
+    "iamagenius00@users.noreply.github.com": "iamagenius00",
     "trevmanthony@gmail.com": "trevthefoolish",
     "ziliangpeng@users.noreply.github.com": "ziliangpeng",
     "centripetal-star@users.noreply.github.com": "centripetal-star",
diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py
index 5f9a16a529c..d54d7b9fb0f 100644
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -479,6 +479,141 @@ class TestFTS5Search:
         assert s('my-app.config.ts') == '"my-app.config.ts"'
 
 
+# =========================================================================
+# CJK (Chinese/Japanese/Korean) LIKE fallback
+# =========================================================================
+
+class TestCJKSearchFallback:
+    """Regression tests for CJK search (see #11511).
+
+    SQLite FTS5's default tokenizer treats contiguous CJK runs as a single
+    token ("和其他agent的聊天记录" → one token), so substring queries like
+    "记忆断裂" return 0 rows despite the data being present. SessionDB falls
+    back to LIKE substring matching whenever FTS5 returns no results and
+    the query contains CJK characters.
+    """
+
+    def test_cjk_detection_covers_all_ranges(self):
+        from hermes_state import SessionDB
+        f = SessionDB._contains_cjk
+        # Chinese (CJK Unified Ideographs)
+        assert f("记忆断裂") is True
+        # Japanese Hiragana + Katakana
+        assert f("こんにちは") is True
+        assert f("カタカナ") is True
+        # Korean Hangul syllables (both early and late — guards against
+        # the \ud7a0-\ud7af typo seen in one of the duplicate PRs)
+        assert f("안녕하세요") is True
+        assert f("기억") is True
+        # Non-CJK
+        assert f("hello world") is False
+        assert f("日本語mixedwithenglish") is True
+        assert f("") is False
+
+    def test_chinese_multichar_query_returns_results(self, db):
+        """The headline bug: multi-char Chinese query must not return []."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message(
+            "s1", role="user",
+            content="昨天和其他Agent的聊天记录，记忆断裂问题复现了",
+        )
+        results = db.search_messages("记忆断裂")
+        assert len(results) == 1
+        assert results[0]["session_id"] == "s1"
+
+    def test_chinese_bigram_query(self, db):
+        db.create_session(session_id="s1", source="telegram")
+        db.append_message("s1", role="user", content="今天讨论A2A通信协议的实现")
+        results = db.search_messages("通信")
+        assert len(results) == 1
+
+    def test_korean_query_returns_results(self, db):
+        """Guards against Hangul range typos (\\uac00-\\ud7af, not \\ud7a0-)."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message("s1", role="user", content="안녕하세요 반갑습니다")
+        results = db.search_messages("안녕")
+        assert len(results) == 1
+
+    def test_japanese_query_returns_results(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.append_message("s1", role="user", content="こんにちは世界")
+        assert len(db.search_messages("こんにちは")) == 1
+        assert len(db.search_messages("世界")) == 1
+
+    def test_cjk_fallback_preserves_source_filter(self, db):
+        """Guards against the SQL-builder bug where filter clauses land
+        after LIMIT/OFFSET (seen in one of the duplicate PRs)."""
+        db.create_session(session_id="s1", source="cli")
+        db.create_session(session_id="s2", source="telegram")
+        db.append_message("s1", role="user", content="记忆断裂在CLI")
+        db.append_message("s2", role="user", content="记忆断裂在Telegram")
+
+        results = db.search_messages("记忆断裂", source_filter=["telegram"])
+        assert len(results) == 1
+        assert results[0]["source"] == "telegram"
+
+    def test_cjk_fallback_preserves_exclude_sources(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.create_session(session_id="s2", source="tool")
+        db.append_message("s1", role="user", content="记忆断裂在CLI")
+        db.append_message("s2", role="assistant", content="记忆断裂在tool")
+
+        results = db.search_messages("记忆断裂", exclude_sources=["tool"])
+        sources = {r["source"] for r in results}
+        assert "tool" not in sources
+        assert "cli" in sources
+
+    def test_cjk_fallback_preserves_role_filter(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.append_message("s1", role="user", content="用户说的记忆断裂")
+        db.append_message("s1", role="assistant", content="助手说的记忆断裂")
+
+        results = db.search_messages("记忆断裂", role_filter=["assistant"])
+        assert len(results) == 1
+        assert results[0]["role"] == "assistant"
+
+    def test_cjk_snippet_is_centered_on_match(self, db):
+        """Snippet should contain the search term, not just the first N chars."""
+        db.create_session(session_id="s1", source="cli")
+        long_prefix = "这是一段很长的前缀用来把匹配位置推到文档中间" * 3
+        long_suffix = "这是一段很长的后缀内容填充剩余空间" * 3
+        db.append_message(
+            "s1", role="user",
+            content=f"{long_prefix}记忆断裂{long_suffix}",
+        )
+        results = db.search_messages("记忆断裂")
+        assert len(results) == 1
+        # The centered substr() snippet must include the matched term.
+        assert "记忆断裂" in results[0]["snippet"]
+
+    def test_english_query_still_uses_fts5_fast_path(self, db):
+        """English queries must not trigger the LIKE fallback (fast path regression)."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message("s1", role="user", content="Deploy docker containers")
+        results = db.search_messages("docker")
+        assert len(results) == 1
+        # No CJK in query → LIKE fallback must not run. We don't assert this
+        # directly (no instrumentation), but the FTS5 path produces an
+        # FTS5-style snippet with highlight markers when the term is short.
+        # At minimum: english queries must still match.
+
+    def test_cjk_query_with_no_matches_returns_empty(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.append_message("s1", role="user", content="unrelated English content")
+        results = db.search_messages("记忆断裂")
+        assert results == []
+
+    def test_mixed_cjk_english_query(self, db):
+        """Mixed queries should still fall back to LIKE when FTS5 misses."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message("s1", role="user", content="讨论Agent通信协议")
+        # "Agent通信" is CJK+English — FTS5 default tokenizer indexes the
+        # whole CJK run with embedded "agent" as separate tokens; the LIKE
+        # fallback handles the substring correctly.
+        results = db.search_messages("Agent通信")
+        assert len(results) == 1
+
+
 # =========================================================================
 # Session search and listing
 # =========================================================================

From cf012a05d895b4f2c19f75b27f799d222421be82 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 03:53:21 -0700
Subject: [PATCH 018/143] docs(terminal): warn against stacking watch_patterns
 + notify_on_complete on end-of-run markers (#12113)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stacking both features on the same event produces duplicate, delayed
notifications — delivery is async and continues firing after the process
exits, so matches on end-of-run markers (SUMMARY, DONE, PASS) arrive
after the agent has already polled/waited and moved on.

Updates both the terminal tool JSON schema description and the
terminal_tool() function docstring to make the split explicit:

- watch_patterns: mid-process signals only (errors, readiness markers,
  intermediate steps you want to react to before the process exits)
- notify_on_complete: end-of-run completion signal

No behavioural change.
---
 tools/terminal_tool.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 69832cc1c7a..1182207b84c 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -1126,7 +1126,7 @@ def terminal_tool(
         workdir: Working directory for this command (optional, uses session cwd if not set)
         pty: If True, use pseudo-terminal for interactive CLI tools (local backend only)
         notify_on_complete: If True and background=True, auto-notify the agent when the process exits
-        watch_patterns: List of strings to watch for in background output; triggers notification on match
+        watch_patterns: List of strings to watch for in background output; fires a notification on first match per pattern. Use ONLY for mid-process signals (errors, readiness markers) that appear before exit. For end-of-run markers use notify_on_complete instead — stacking both produces duplicate, delayed notifications.
 
     Returns:
         str: JSON string with output, exit_code, and error fields
@@ -1724,7 +1724,7 @@ TERMINAL_SCHEMA = {
             "watch_patterns": {
                 "type": "array",
                 "items": {"type": "string"},
-                "description": "List of strings to watch for in background process output. When any pattern matches a line of output, you'll be notified with the matching text — like notify_on_complete but triggers mid-process on specific output. Use for monitoring logs, watching for errors, or waiting for specific events (e.g. [\"ERROR\", \"FAIL\", \"listening on port\"])."
+                "description": "Strings to watch for in background process output. Fires a notification the first time each pattern matches a line of output. **Use ONLY for mid-process signals** you want to react to before the process exits — errors, readiness markers, intermediate step markers (e.g. [\"ERROR\", \"Traceback\", \"listening on port\"]). Do NOT use for end-of-run markers (summary headers, 'DONE', 'PASS' printed right before exit) — use `notify_on_complete` for that instead. Stacking end-of-run patterns on top of `notify_on_complete` produces duplicate, delayed notifications that arrive after you've already moved on, since delivery is asynchronous and continues after the process exits."
             }
         },
         "required": ["command"]

From 9527707f805a35377169616fa41dd7711e42a9dc Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 04:13:32 -0700
Subject: [PATCH 019/143] fix(signal): back off sendTyping spam for unreachable
 recipients (#12118)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

base.py's _keep_typing refresh loop calls send_typing every ~2s while
the agent is processing. If signal-cli returns NETWORK_FAILURE for the
recipient (offline, unroutable, group membership lost), the unmitigated
path was a WARNING log every 2 seconds for as long as the agent stayed
busy — a user report showed 1048 warnings in 41 minutes for one
offline contact, plus the matching volume of pointless RPC traffic to
signal-cli.

- _rpc() accepts log_failures=False so callers can route repeated
  expected failures (typing) to DEBUG while keeping send/receive at
  WARNING.
- send_typing() tracks consecutive failures per chat. First failure
  still logs WARNING so transport issues remain visible; subsequent
  failures log at DEBUG. After three consecutive failures we skip the
  RPC during an exponential cooldown (16s, 32s, 60s cap) so we stop
  hammering signal-cli for a recipient it can't deliver to. A
  successful sendTyping resets the counters.
- _stop_typing_indicator() clears the backoff state so the next agent
  turn starts fresh.

E2E simulation against the reported 41-minute window: RPCs drop from
1230 to 45 (-96%), log lines from 1048 WARNINGs to 1 WARNING + 44
DEBUGs.

Credits kshitijk4poor (#12056) for the _rpc log_failures kwarg idea;
the broader restructure in that PR (nested per-chat loop inside
send_typing) is avoided here in favour of stateful backoff that
preserves base.py's existing _keep_typing architecture.
---
 gateway/platforms/signal.py  |  84 +++++++++++++++++++--
 tests/gateway/test_signal.py | 137 +++++++++++++++++++++++++++++++++++
 2 files changed, 215 insertions(+), 6 deletions(-)

diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py
index 617713ad908..4df4193bc0d 100644
--- a/gateway/platforms/signal.py
+++ b/gateway/platforms/signal.py
@@ -160,6 +160,14 @@ class SignalAdapter(BasePlatformAdapter):
         self._sse_task: Optional[asyncio.Task] = None
         self._health_monitor_task: Optional[asyncio.Task] = None
         self._typing_tasks: Dict[str, asyncio.Task] = {}
+        # Per-chat typing-indicator backoff. When signal-cli reports
+        # NETWORK_FAILURE (recipient offline / unroutable), base.py's
+        # _keep_typing refresh loop would otherwise hammer sendTyping every
+        # ~2s indefinitely, producing WARNING-level log spam and pointless
+        # RPC traffic. We track consecutive failures per chat and skip the
+        # RPC during a cooldown window instead.
+        self._typing_failures: Dict[str, int] = {}
+        self._typing_skip_until: Dict[str, float] = {}
         self._running = False
         self._last_sse_activity = 0.0
         self._sse_response: Optional[httpx.Response] = None
@@ -548,8 +556,22 @@ class SignalAdapter(BasePlatformAdapter):
     # JSON-RPC Communication
     # ------------------------------------------------------------------
 
-    async def _rpc(self, method: str, params: dict, rpc_id: str = None) -> Any:
-        """Send a JSON-RPC 2.0 request to signal-cli daemon."""
+    async def _rpc(
+        self,
+        method: str,
+        params: dict,
+        rpc_id: str = None,
+        *,
+        log_failures: bool = True,
+    ) -> Any:
+        """Send a JSON-RPC 2.0 request to signal-cli daemon.
+
+        When ``log_failures=False``, error and exception paths log at DEBUG
+        instead of WARNING — used by the typing-indicator path to silence
+        repeated NETWORK_FAILURE spam for unreachable recipients while
+        still preserving visibility for the first occurrence and for
+        unrelated RPCs.
+        """
         if not self.client:
             logger.warning("Signal: RPC called but client not connected")
             return None
@@ -574,13 +596,19 @@ class SignalAdapter(BasePlatformAdapter):
             data = resp.json()
 
             if "error" in data:
-                logger.warning("Signal RPC error (%s): %s", method, data["error"])
+                if log_failures:
+                    logger.warning("Signal RPC error (%s): %s", method, data["error"])
+                else:
+                    logger.debug("Signal RPC error (%s): %s", method, data["error"])
                 return None
 
             return data.get("result")
 
         except Exception as e:
-            logger.warning("Signal RPC %s failed: %s", method, e)
+            if log_failures:
+                logger.warning("Signal RPC %s failed: %s", method, e)
+            else:
+                logger.debug("Signal RPC %s failed: %s", method, e)
             return None
 
     # ------------------------------------------------------------------
@@ -627,7 +655,28 @@ class SignalAdapter(BasePlatformAdapter):
                 self._recent_sent_timestamps.pop()
 
     async def send_typing(self, chat_id: str, metadata=None) -> None:
-        """Send a typing indicator."""
+        """Send a typing indicator.
+
+        base.py's ``_keep_typing`` refresh loop calls this every ~2s while
+        the agent is processing. If signal-cli returns NETWORK_FAILURE for
+        this recipient (offline, unroutable, group membership lost, etc.)
+        the unmitigated behaviour is: a WARNING log every 2 seconds for as
+        long as the agent keeps running. Instead we:
+
+        - silence the WARNING after the first consecutive failure (subsequent
+          attempts log at DEBUG) so transport issues are still visible once
+          but don't flood the log,
+        - skip the RPC entirely during an exponential cooldown window once
+          three consecutive failures have happened, so we stop hammering
+          signal-cli with requests it can't deliver.
+
+        A successful sendTyping clears the counters.
+        """
+        now = time.monotonic()
+        skip_until = self._typing_skip_until.get(chat_id, 0.0)
+        if now < skip_until:
+            return
+
         params: Dict[str, Any] = {
             "account": self.account,
         }
@@ -637,7 +686,26 @@ class SignalAdapter(BasePlatformAdapter):
         else:
             params["recipient"] = [chat_id]
 
-        await self._rpc("sendTyping", params, rpc_id="typing")
+        fails = self._typing_failures.get(chat_id, 0)
+        result = await self._rpc(
+            "sendTyping",
+            params,
+            rpc_id="typing",
+            log_failures=(fails == 0),
+        )
+
+        if result is None:
+            fails += 1
+            self._typing_failures[chat_id] = fails
+            # After 3 consecutive failures, back off exponentially (16s,
+            # 32s, 60s cap) to stop spamming signal-cli for a recipient
+            # that clearly isn't reachable right now.
+            if fails >= 3:
+                backoff = min(60.0, 16.0 * (2 ** (fails - 3)))
+                self._typing_skip_until[chat_id] = now + backoff
+        else:
+            self._typing_failures.pop(chat_id, None)
+            self._typing_skip_until.pop(chat_id, None)
 
     async def send_image(
         self,
@@ -789,6 +857,10 @@ class SignalAdapter(BasePlatformAdapter):
                 await task
             except asyncio.CancelledError:
                 pass
+        # Reset per-chat typing backoff state so the next agent turn starts
+        # fresh rather than inheriting a cooldown from a prior conversation.
+        self._typing_failures.pop(chat_id, None)
+        self._typing_skip_until.pop(chat_id, None)
 
     async def stop_typing(self, chat_id: str) -> None:
         """Public interface for stopping typing — called by base adapter's
diff --git a/tests/gateway/test_signal.py b/tests/gateway/test_signal.py
index 26f1e4f3bb3..eee3a0db8aa 100644
--- a/tests/gateway/test_signal.py
+++ b/tests/gateway/test_signal.py
@@ -740,3 +740,140 @@ class TestSignalStopTyping:
         await adapter.stop_typing("+155****4567")
 
         adapter._stop_typing_indicator.assert_awaited_once_with("+155****4567")
+
+
+# ---------------------------------------------------------------------------
+# Typing-indicator backoff on repeated failures (Signal RPC spam fix)
+# ---------------------------------------------------------------------------
+
+class TestSignalTypingBackoff:
+    """When base.py's _keep_typing refresh loop calls send_typing every ~2s
+    and the recipient is unreachable (NETWORK_FAILURE), the adapter must:
+
+    - log WARNING only for the first failure (subsequent failures use DEBUG
+      via log_failures=False on the _rpc call)
+    - after 3 consecutive failures, skip the RPC entirely during an
+      exponential cooldown window instead of hammering signal-cli every 2s
+    - reset counters on a successful sendTyping
+    - reset counters when _stop_typing_indicator() is called for the chat
+    """
+
+    @pytest.mark.asyncio
+    async def test_first_failure_logs_at_warning_subsequent_at_debug(
+        self, monkeypatch
+    ):
+        adapter = _make_signal_adapter(monkeypatch)
+        calls = []
+
+        async def _fake_rpc(method, params, rpc_id=None, *, log_failures=True):
+            calls.append({"log_failures": log_failures})
+            return None  # simulate NETWORK_FAILURE
+
+        adapter._rpc = _fake_rpc
+
+        await adapter.send_typing("+155****4567")
+        await adapter.send_typing("+155****4567")
+
+        assert len(calls) == 2
+        assert calls[0]["log_failures"] is True   # first failure — warn
+        assert calls[1]["log_failures"] is False  # subsequent — debug
+
+    @pytest.mark.asyncio
+    async def test_three_consecutive_failures_trigger_cooldown(
+        self, monkeypatch
+    ):
+        adapter = _make_signal_adapter(monkeypatch)
+        call_count = {"n": 0}
+
+        async def _fake_rpc(method, params, rpc_id=None, *, log_failures=True):
+            call_count["n"] += 1
+            return None
+
+        adapter._rpc = _fake_rpc
+
+        # Three failures engage the cooldown.
+        await adapter.send_typing("+155****4567")
+        await adapter.send_typing("+155****4567")
+        await adapter.send_typing("+155****4567")
+        assert call_count["n"] == 3
+        assert "+155****4567" in adapter._typing_skip_until
+
+        # Fourth, fifth, ... calls during the cooldown window are short-
+        # circuited — the RPC is not issued at all.
+        await adapter.send_typing("+155****4567")
+        await adapter.send_typing("+155****4567")
+        assert call_count["n"] == 3
+
+    @pytest.mark.asyncio
+    async def test_cooldown_is_per_chat_not_global(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch)
+        call_log = []
+
+        async def _fake_rpc(method, params, rpc_id=None, *, log_failures=True):
+            call_log.append(params.get("recipient") or params.get("groupId"))
+            return None
+
+        adapter._rpc = _fake_rpc
+
+        # Drive chat A into cooldown.
+        for _ in range(3):
+            await adapter.send_typing("+155****4567")
+        assert "+155****4567" in adapter._typing_skip_until
+
+        # Chat B is unaffected — still makes RPCs.
+        await adapter.send_typing("+155****9999")
+        await adapter.send_typing("+155****9999")
+        assert "+155****9999" not in adapter._typing_skip_until
+        # Chat A cooldown untouched
+        assert "+155****4567" in adapter._typing_skip_until
+
+    @pytest.mark.asyncio
+    async def test_success_resets_failure_counter_and_cooldown(
+        self, monkeypatch
+    ):
+        adapter = _make_signal_adapter(monkeypatch)
+        result_queue = [None, None, {"timestamp": 12345}]
+        call_log = []
+
+        async def _fake_rpc(method, params, rpc_id=None, *, log_failures=True):
+            call_log.append(log_failures)
+            return result_queue.pop(0)
+
+        adapter._rpc = _fake_rpc
+
+        await adapter.send_typing("+155****4567")   # fail 1 — warn
+        await adapter.send_typing("+155****4567")   # fail 2 — debug
+        await adapter.send_typing("+155****4567")   # success — reset
+
+        assert adapter._typing_failures.get("+155****4567", 0) == 0
+        assert "+155****4567" not in adapter._typing_skip_until
+
+        # Next failure after recovery logs at WARNING again (fresh counter).
+        async def _fail(method, params, rpc_id=None, *, log_failures=True):
+            call_log.append(log_failures)
+            return None
+
+        adapter._rpc = _fail
+        await adapter.send_typing("+155****4567")
+        assert call_log[-1] is True   # first failure in a fresh cycle
+
+    @pytest.mark.asyncio
+    async def test_stop_typing_indicator_clears_backoff_state(
+        self, monkeypatch
+    ):
+        adapter = _make_signal_adapter(monkeypatch)
+
+        async def _fail(method, params, rpc_id=None, *, log_failures=True):
+            return None
+
+        adapter._rpc = _fail
+
+        for _ in range(3):
+            await adapter.send_typing("+155****4567")
+        assert adapter._typing_failures.get("+155****4567") == 3
+        assert "+155****4567" in adapter._typing_skip_until
+
+        await adapter._stop_typing_indicator("+155****4567")
+
+        assert "+155****4567" not in adapter._typing_failures
+        assert "+155****4567" not in adapter._typing_skip_until

From f9667331e559caf8476fa4775b8add4c0c23d933 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 04:14:05 -0700
Subject: [PATCH 020/143] docs(browser): improve /browser connect setup
 guidance (#12123)

- Note that /browser connect is CLI-only and won't work in gateways (WebUI, Telegram, Discord).
- Update the Chrome launch command to use a dedicated --user-data-dir, so port 9222 actually comes up even when Chrome is already running with the user's regular profile.
- Add --no-first-run --no-default-browser-check to skip the fresh-profile wizard.
- Explain why the dedicated user-data-dir matters.

Community tip via Karamjit Singh.

Co-authored-by: teknium1 <teknium@noreply.github.com>
---
 website/docs/user-guide/features/browser.md | 23 ++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md
index 42b6815df51..5b2462d2e37 100644
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@@ -163,6 +163,10 @@ When Camofox runs in headed mode (with a visible browser window), it exposes a V
 
 Instead of a cloud provider, you can attach Hermes browser tools to your own running Chrome instance via the Chrome DevTools Protocol (CDP). This is useful when you want to see what the agent is doing in real-time, interact with pages that require your own cookies/sessions, or avoid cloud browser costs.
 
+:::note
+`/browser connect` is an **interactive-CLI slash command** — it is not dispatched by the gateway. If you try to run it inside a WebUI, Telegram, Discord, or other gateway chat, the message will be sent to the agent as plain text and the command will not execute. Start Hermes from the terminal (`hermes` or `hermes chat`) and issue `/browser connect` there.
+:::
+
 In the CLI, use:
 
 ```
@@ -175,14 +179,27 @@ In the CLI, use:
 If Chrome isn't already running with remote debugging, Hermes will attempt to auto-launch it with `--remote-debugging-port=9222`.
 
 :::tip
-To start Chrome manually with CDP enabled:
+To start Chrome manually with CDP enabled, use a dedicated user-data-dir so the debug port actually comes up even if Chrome is already running with your normal profile:
+
 ```bash
 # Linux
-google-chrome --remote-debugging-port=9222
+google-chrome \
+  --remote-debugging-port=9222 \
+  --user-data-dir=$HOME/.hermes/chrome-debug \
+  --no-first-run \
+  --no-default-browser-check &
 
 # macOS
-"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --remote-debugging-port=9222
+"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
+  --remote-debugging-port=9222 \
+  --user-data-dir="$HOME/.hermes/chrome-debug" \
+  --no-first-run \
+  --no-default-browser-check &
 ```
+
+Then launch the Hermes CLI and run `/browser connect`.
+
+**Why `--user-data-dir`?** Without it, launching Chrome while a regular Chrome instance is already running typically opens a new window on the existing process — and that existing process was not started with `--remote-debugging-port`, so port 9222 never opens. A dedicated user-data-dir forces a fresh Chrome process where the debug port actually listens. `--no-first-run --no-default-browser-check` skips the first-launch wizard for the fresh profile.
 :::
 
 When connected via CDP, all browser tools (`browser_navigate`, `browser_click`, etc.) operate on your live Chrome instance instead of spinning up a cloud session.

From 2edebedc9eeb48093dda2a58ce3715b34d23bc15 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 04:17:18 -0700
Subject: [PATCH 021/143] feat(steer): /steer <prompt> injects a mid-run note
 after the next tool call (#12116)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(steer): /steer <prompt> injects a mid-run note after the next tool call

Adds a new slash command that sits between /queue (turn boundary) and
interrupt. /steer <text> stashes the message on the running agent and
the agent loop appends it to the LAST tool result's content once the
current tool batch finishes. The model sees it as part of the tool
output on its next iteration.

No interrupt is fired, no new user turn is inserted, and no prompt
cache invalidation happens beyond the normal per-turn tool-result
churn. Message-role alternation is preserved — we only modify an
existing role:"tool" message's content.

Wiring
------
- hermes_cli/commands.py: register /steer + add to ACTIVE_SESSION_BYPASS_COMMANDS.
- run_agent.py: add _pending_steer state, AIAgent.steer(), _drain_pending_steer(),
  _apply_pending_steer_to_tool_results(); drain at end of both parallel and
  sequential tool executors; clear on interrupt; return leftover as
  result['pending_steer'] if the agent exits before another tool batch.
- cli.py: /steer handler — route to agent.steer() when running, fall back to
  the regular queue otherwise; deliver result['pending_steer'] as next turn.
- gateway/run.py: running-agent intercept calls running_agent.steer(); idle-agent
  path strips the prefix and forwards as a regular user message.
- tui_gateway/server.py: new session.steer JSON-RPC method.
- ui-tui: SessionSteerResponse type + local /steer slash command that calls
  session.steer when ui.busy, otherwise enqueues for the next turn.

Fallbacks
---------
- Agent exits mid-steer → surfaces in run_conversation result as pending_steer
  so CLI/gateway deliver it as the next user turn instead of silently dropping it.
- All tools skipped after interrupt → re-stashes pending_steer for the caller.
- No active agent → /steer reduces to sending the text as a normal message.

Tests
-----
- tests/run_agent/test_steer.py — accept/reject, concatenation, drain,
  last-tool-result injection, multimodal list content, thread safety,
  cleared-on-interrupt, registry membership, bypass-set membership.
- tests/gateway/test_steer_command.py — running agent, pending sentinel,
  missing steer() method, rejected payload, empty payload.
- tests/gateway/test_command_bypass_active_session.py — /steer bypasses
  the Level-1 base adapter guard.
- tests/test_tui_gateway_server.py — session.steer RPC paths.

72/72 targeted tests pass under scripts/run_tests.sh.

* feat(steer): register /steer in Discord's native slash tree

Discord's app_commands tree is a curated subset of slash commands (not
derived from COMMAND_REGISTRY like Telegram/Slack). /steer already
works there as plain text (routes through handle_message → base
adapter bypass → runner), but registering it here adds Discord's
native autocomplete + argument hint UI so users can discover and
type it like any other first-class command.
---
 cli.py                                        |  34 ++-
 gateway/platforms/discord.py                  |   5 +
 gateway/run.py                                |  63 +++++
 hermes_cli/commands.py                        |   3 +
 run_agent.py                                  | 152 ++++++++++++
 .../test_command_bypass_active_session.py     |  19 ++
 tests/gateway/test_steer_command.py           | 191 +++++++++++++++
 tests/run_agent/test_steer.py                 | 228 ++++++++++++++++++
 tests/test_tui_gateway_server.py              |  71 ++++++
 tui_gateway/server.py                         |  25 ++
 ui-tui/src/app/slash/commands/core.ts         |  32 ++-
 ui-tui/src/gatewayTypes.ts                    |   5 +
 12 files changed, 826 insertions(+), 2 deletions(-)
 create mode 100644 tests/gateway/test_steer_command.py
 create mode 100644 tests/run_agent/test_steer.py

diff --git a/cli.py b/cli.py
index ea76991acc3..8aa8bb03f11 100644
--- a/cli.py
+++ b/cli.py
@@ -5720,6 +5720,30 @@ class HermesCLI:
                     _cprint(f"  Queued for the next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}")
                 else:
                     _cprint(f"  Queued: {payload[:80]}{'...' if len(payload) > 80 else ''}")
+        elif canonical == "steer":
+            # Inject a message after the next tool call without interrupting.
+            # If the agent is actively running, push the text into the agent's
+            # pending_steer slot — the drain hook in _execute_tool_calls_*
+            # will append it to the next tool result's content. If no agent
+            # is running, fall back to queue semantics (same as /queue).
+            parts = cmd_original.split(None, 1)
+            payload = parts[1].strip() if len(parts) > 1 else ""
+            if not payload:
+                _cprint("  Usage: /steer <prompt>")
+            elif self._agent_running and self.agent is not None and hasattr(self.agent, "steer"):
+                try:
+                    accepted = self.agent.steer(payload)
+                except Exception as exc:
+                    _cprint(f"  Steer failed: {exc}")
+                else:
+                    if accepted:
+                        _cprint(f"  ⏩ Steer queued — arrives after the next tool call: {payload[:80]}{'...' if len(payload) > 80 else ''}")
+                    else:
+                        _cprint("  Steer rejected (empty payload).")
+            else:
+                # No active run — treat as a normal next-turn message.
+                self._pending_input.put(payload)
+                _cprint(f"  No agent running; queued as next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}")
         elif canonical == "skin":
             self._handle_skin_command(cmd_original)
         elif canonical == "voice":
@@ -8244,7 +8268,15 @@ class HermesCLI:
                 else:
                     print(f"\n⚡ Sending after interrupt: '{preview}'")
                 self._pending_input.put(combined)
-            
+
+            # If a /steer was left over (agent finished before another tool
+            # batch could absorb it), deliver it as the next user turn.
+            _leftover_steer = result.get("pending_steer") if result else None
+            if _leftover_steer and hasattr(self, '_pending_input'):
+                preview = _leftover_steer[:60] + ("..." if len(_leftover_steer) > 60 else "")
+                print(f"\n⏩ Delivering leftover /steer as next turn: '{preview}'")
+                self._pending_input.put(_leftover_steer)
+
             return response
             
         except Exception as e:
diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 5cad956a362..31973b9629b 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -1994,6 +1994,11 @@ class DiscordAdapter(BasePlatformAdapter):
         async def slash_stop(interaction: discord.Interaction):
             await self._run_simple_slash(interaction, "/stop", "Stop requested~")
 
+        @tree.command(name="steer", description="Inject a message after the next tool call (no interrupt)")
+        @discord.app_commands.describe(prompt="Text to inject into the agent's next tool result")
+        async def slash_steer(interaction: discord.Interaction, prompt: str):
+            await self._run_simple_slash(interaction, f"/steer {prompt}".strip())
+
         @tree.command(name="compress", description="Compress conversation context")
         async def slash_compress(interaction: discord.Interaction):
             await self._run_simple_slash(interaction, "/compress")
diff --git a/gateway/run.py b/gateway/run.py
index 62b813f0d6b..1525ad14776 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3019,6 +3019,54 @@ class GatewayRunner:
                     adapter._pending_messages[_quick_key] = queued_event
                 return "Queued for the next turn."
 
+            # /steer <prompt> — inject mid-run after the next tool call.
+            # Unlike /queue (turn boundary), /steer lands BETWEEN tool-call
+            # iterations inside the same agent run, by appending to the
+            # last tool result's content. No interrupt, no new user turn,
+            # no role-alternation violation.
+            if _cmd_def_inner and _cmd_def_inner.name == "steer":
+                steer_text = event.get_command_args().strip()
+                if not steer_text:
+                    return "Usage: /steer <prompt>"
+                running_agent = self._running_agents.get(_quick_key)
+                if running_agent is _AGENT_PENDING_SENTINEL:
+                    # Agent hasn't started yet — queue as turn-boundary fallback.
+                    adapter = self.adapters.get(source.platform)
+                    if adapter:
+                        from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
+                        queued_event = _ME(
+                            text=steer_text,
+                            message_type=_MT.TEXT,
+                            source=event.source,
+                            message_id=event.message_id,
+                            channel_prompt=event.channel_prompt,
+                        )
+                        adapter._pending_messages[_quick_key] = queued_event
+                    return "Agent still starting — /steer queued for the next turn."
+                if running_agent and hasattr(running_agent, "steer"):
+                    try:
+                        accepted = running_agent.steer(steer_text)
+                    except Exception as exc:
+                        logger.warning("Steer failed for session %s: %s", _quick_key[:20], exc)
+                        return f"⚠️ Steer failed: {exc}"
+                    if accepted:
+                        preview = steer_text[:60] + ("..." if len(steer_text) > 60 else "")
+                        return f"⏩ Steer queued — arrives after the next tool call: '{preview}'"
+                    return "Steer rejected (empty payload)."
+                # Running agent is missing or lacks steer() — fall back to queue.
+                adapter = self.adapters.get(source.platform)
+                if adapter:
+                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
+                    queued_event = _ME(
+                        text=steer_text,
+                        message_type=_MT.TEXT,
+                        source=event.source,
+                        message_id=event.message_id,
+                        channel_prompt=event.channel_prompt,
+                    )
+                    adapter._pending_messages[_quick_key] = queued_event
+                return "No active agent — /steer queued for the next turn."
+
             # /model must not be used while the agent is running.
             if _cmd_def_inner and _cmd_def_inner.name == "model":
                 return "Agent is running — wait or /stop first, then switch models."
@@ -3260,6 +3308,21 @@ class GatewayRunner:
         if canonical == "btw":
             return await self._handle_btw_command(event)
 
+        if canonical == "steer":
+            # No active agent — /steer has no tool call to inject into.
+            # Strip the prefix so downstream treats it as a normal user
+            # message. If the payload is empty, surface the usage hint.
+            steer_payload = event.get_command_args().strip()
+            if not steer_payload:
+                return "Usage: /steer <prompt>  (no agent is running; sending as a normal message)"
+            try:
+                event.text = steer_payload
+            except Exception:
+                pass
+            # Do NOT return — fall through to _handle_message_with_agent
+            # at the end of this function so the rewritten text is sent
+            # to the agent as a regular user turn.
+
         if canonical == "voice":
             return await self._handle_voice_command(event)
 
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index ce257b0d7cb..681e6f9b265 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -91,6 +91,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
                aliases=("tasks",)),
     CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
                aliases=("q",), args_hint="<prompt>"),
+    CommandDef("steer", "Inject a message after the next tool call without interrupting", "Session",
+               args_hint="<prompt>"),
     CommandDef("status", "Show session info", "Session"),
     CommandDef("profile", "Show active profile name and home directory", "Info"),
     CommandDef("sethome", "Set this chat as the home channel", "Session",
@@ -275,6 +277,7 @@ ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
         "queue",
         "restart",
         "status",
+        "steer",
         "stop",
         "update",
     }
diff --git a/run_agent.py b/run_agent.py
index d5ff125e33b..a47455e5345 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -832,6 +832,16 @@ class AIAgent:
         self._interrupt_thread_signal_pending = False
         self._client_lock = threading.RLock()
 
+        # /steer mechanism — inject a user note into the next tool result
+        # without interrupting the agent. Unlike interrupt(), steer() does
+        # NOT set _interrupt_requested; it waits for the current tool batch
+        # to finish naturally, then the drain hook appends the text to the
+        # last tool result's content so the model sees it on its next
+        # iteration. Message-role alternation is preserved (we modify an
+        # existing tool message rather than inserting a new user turn).
+        self._pending_steer: Optional[str] = None
+        self._pending_steer_lock = threading.Lock()
+
         # Concurrent-tool worker thread tracking.  `_execute_tool_calls_concurrent`
         # runs each tool on its own ThreadPoolExecutor worker — those worker
         # threads have tids distinct from `_execution_thread_id`, so
@@ -3265,6 +3275,129 @@ class AIAgent:
                     _set_interrupt(False, _wtid)
                 except Exception:
                     pass
+        # A hard interrupt supersedes any pending /steer — the steer was
+        # meant for the agent's next tool-call iteration, which will no
+        # longer happen. Drop it instead of surprising the user with a
+        # late injection on the post-interrupt turn.
+        _steer_lock = getattr(self, "_pending_steer_lock", None)
+        if _steer_lock is not None:
+            with _steer_lock:
+                self._pending_steer = None
+
+    def steer(self, text: str) -> bool:
+        """
+        Inject a user message into the next tool result without interrupting.
+
+        Unlike interrupt(), this does NOT stop the current tool call. The
+        text is stashed and the agent loop appends it to the LAST tool
+        result's content once the current tool batch finishes. The model
+        sees the steer as part of the tool output on its next iteration.
+
+        Thread-safe: callable from gateway/CLI/TUI threads. Multiple calls
+        before the drain point concatenate with newlines.
+
+        Args:
+            text: The user text to inject. Empty strings are ignored.
+
+        Returns:
+            True if the steer was accepted, False if the text was empty.
+        """
+        if not text or not text.strip():
+            return False
+        cleaned = text.strip()
+        _lock = getattr(self, "_pending_steer_lock", None)
+        if _lock is None:
+            # Test stubs that built AIAgent via object.__new__ skip __init__.
+            # Fall back to direct attribute set; no concurrent callers expected
+            # in those stubs.
+            existing = getattr(self, "_pending_steer", None)
+            self._pending_steer = (existing + "\n" + cleaned) if existing else cleaned
+            return True
+        with _lock:
+            if self._pending_steer:
+                self._pending_steer = self._pending_steer + "\n" + cleaned
+            else:
+                self._pending_steer = cleaned
+        return True
+
+    def _drain_pending_steer(self) -> Optional[str]:
+        """Return the pending steer text (if any) and clear the slot.
+
+        Safe to call from the agent execution thread after appending tool
+        results. Returns None when no steer is pending.
+        """
+        _lock = getattr(self, "_pending_steer_lock", None)
+        if _lock is None:
+            text = getattr(self, "_pending_steer", None)
+            self._pending_steer = None
+            return text
+        with _lock:
+            text = self._pending_steer
+            self._pending_steer = None
+        return text
+
+    def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None:
+        """Append any pending /steer text to the last tool result in this turn.
+
+        Called at the end of a tool-call batch, before the next API call.
+        The steer is appended to the last ``role:"tool"`` message's content
+        with a clear marker so the model understands it came from the user
+        and NOT from the tool itself. Role alternation is preserved —
+        nothing new is inserted, we only modify existing content.
+
+        Args:
+            messages: The running messages list.
+            num_tool_msgs: Number of tool results appended in this batch;
+                used to locate the tail slice safely.
+        """
+        if num_tool_msgs <= 0 or not messages:
+            return
+        steer_text = self._drain_pending_steer()
+        if not steer_text:
+            return
+        # Find the last tool-role message in the recent tail. Skipping
+        # non-tool messages defends against future code appending
+        # something else at the boundary.
+        target_idx = None
+        for j in range(len(messages) - 1, max(len(messages) - num_tool_msgs - 1, -1), -1):
+            msg = messages[j]
+            if isinstance(msg, dict) and msg.get("role") == "tool":
+                target_idx = j
+                break
+        if target_idx is None:
+            # No tool result in this batch (e.g. all skipped by interrupt);
+            # put the steer back so the caller's fallback path can deliver
+            # it as a normal next-turn user message.
+            _lock = getattr(self, "_pending_steer_lock", None)
+            if _lock is not None:
+                with _lock:
+                    if self._pending_steer:
+                        self._pending_steer = self._pending_steer + "\n" + steer_text
+                    else:
+                        self._pending_steer = steer_text
+            else:
+                existing = getattr(self, "_pending_steer", None)
+                self._pending_steer = (existing + "\n" + steer_text) if existing else steer_text
+            return
+        marker = f"\n\n[USER STEER (injected mid-run, not tool output): {steer_text}]"
+        existing_content = messages[target_idx].get("content", "")
+        if not isinstance(existing_content, str):
+            # Anthropic multimodal content blocks — preserve them and append
+            # a text block at the end.
+            try:
+                blocks = list(existing_content) if existing_content else []
+                blocks.append({"type": "text", "text": marker.lstrip()})
+                messages[target_idx]["content"] = blocks
+            except Exception:
+                # Fall back to string replacement if content shape is unexpected.
+                messages[target_idx]["content"] = f"{existing_content}{marker}"
+        else:
+            messages[target_idx]["content"] = existing_content + marker
+        logger.info(
+            "Delivered /steer to agent after tool batch (%d chars): %s",
+            len(steer_text),
+            steer_text[:120] + ("..." if len(steer_text) > 120 else ""),
+        )
 
     def _touch_activity(self, desc: str) -> None:
         """Update the last-activity timestamp and description (thread-safe)."""
@@ -7951,6 +8084,13 @@ class AIAgent:
             turn_tool_msgs = messages[-num_tools:]
             enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id))
 
+        # ── /steer injection ──────────────────────────────────────────────
+        # Append any pending user steer text to the last tool result so the
+        # agent sees it on its next iteration. Runs AFTER budget enforcement
+        # so the steer marker is never truncated. See steer() for details.
+        if num_tools > 0:
+            self._apply_pending_steer_to_tool_results(messages, num_tools)
+
     def _execute_tool_calls_sequential(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
         """Execute tool calls sequentially (original behavior). Used for single calls or interactive tools."""
         for i, tool_call in enumerate(assistant_message.tool_calls, 1):
@@ -8330,6 +8470,12 @@ class AIAgent:
         if num_tools_seq > 0:
             enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id))
 
+        # ── /steer injection ──────────────────────────────────────────────
+        # See _execute_tool_calls_parallel for the rationale. Same hook,
+        # applied to sequential execution as well.
+        if num_tools_seq > 0:
+            self._apply_pending_steer_to_tool_results(messages, num_tools_seq)
+
 
 
     def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
@@ -11610,6 +11756,12 @@ class AIAgent:
             "cost_status": self.session_cost_status,
             "cost_source": self.session_cost_source,
         }
+        # If a /steer landed after the final assistant turn (no more tool
+        # batches to drain into), hand it back to the caller so it can be
+        # delivered as the next user turn instead of being silently lost.
+        _leftover_steer = self._drain_pending_steer()
+        if _leftover_steer:
+            result["pending_steer"] = _leftover_steer
         self._response_was_previewed = False
         
         # Include interrupt message if one triggered the interrupt
diff --git a/tests/gateway/test_command_bypass_active_session.py b/tests/gateway/test_command_bypass_active_session.py
index 10ff062126a..c456243945a 100644
--- a/tests/gateway/test_command_bypass_active_session.py
+++ b/tests/gateway/test_command_bypass_active_session.py
@@ -200,6 +200,25 @@ class TestCommandBypassActiveSession:
             "/background response was not sent back to the user"
         )
 
+    @pytest.mark.asyncio
+    async def test_steer_bypasses_guard(self):
+        """/steer must bypass the Level-1 active-session guard so it reaches
+        the gateway runner's /steer handler and injects into the running
+        agent instead of being queued as user text for the next turn.
+        """
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/steer also check auth.log"))
+
+        assert sk not in adapter._pending_messages, (
+            "/steer was queued as a pending message instead of being dispatched"
+        )
+        assert any("handled:steer" in r for r in adapter.sent_responses), (
+            "/steer response was not sent back to the user"
+        )
+
     @pytest.mark.asyncio
     async def test_help_bypasses_guard(self):
         """/help must bypass so it is not silently dropped as pending slash text."""
diff --git a/tests/gateway/test_steer_command.py b/tests/gateway/test_steer_command.py
new file mode 100644
index 00000000000..b756ff09622
--- /dev/null
+++ b/tests/gateway/test_steer_command.py
@@ -0,0 +1,191 @@
+"""Tests for the gateway /steer command handler.
+
+/steer injects a user message into the agent's next tool result without
+interrupting. The gateway runner must:
+
+  1. When an agent IS running → call ``agent.steer(text)``, do NOT set
+     ``_interrupt_requested``, do NOT touch ``_pending_messages``.
+  2. When the agent is the PENDING sentinel → fall back to /queue
+     semantics (store in ``adapter._pending_messages``).
+  3. When no agent is active → strip the slash prefix and let the normal
+     prompt pipeline handle it as a regular user message.
+"""
+from __future__ import annotations
+
+from datetime import datetime
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource, build_session_key
+
+
+def _make_source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="u1",
+        chat_id="c1",
+        user_name="tester",
+        chat_type="dm",
+    )
+
+
+def _make_event(text: str) -> MessageEvent:
+    return MessageEvent(
+        text=text,
+        source=_make_source(),
+        message_id="m1",
+    )
+
+
+def _make_runner(session_entry: SessionEntry):
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+    adapter = MagicMock()
+    adapter.send = AsyncMock()
+    adapter._pending_messages = {}
+    runner.adapters = {Platform.TELEGRAM: adapter}
+    runner._voice_mode = {}
+    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = session_entry
+    runner.session_store.load_transcript.return_value = []
+    runner.session_store.has_any_sessions.return_value = True
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._session_db = MagicMock()
+    runner._session_db.get_session_title.return_value = None
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._show_reasoning = False
+    runner._is_user_authorized = lambda _source: True
+    runner._set_session_env = lambda _context: None
+    runner._should_send_voice_reply = lambda *_args, **_kwargs: False
+    runner._send_voice_reply = AsyncMock()
+    runner._capture_gateway_honcho_if_configured = lambda *args, **kwargs: None
+    runner._emit_gateway_run_progress = AsyncMock()
+    return runner, adapter
+
+
+def _session_entry() -> SessionEntry:
+    return SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+        total_tokens=0,
+    )
+
+
+@pytest.mark.asyncio
+async def test_steer_calls_agent_steer_and_does_not_interrupt():
+    """When an agent is running, /steer must call agent.steer(text) and
+    leave interrupt state untouched."""
+    runner, adapter = _make_runner(_session_entry())
+    sk = build_session_key(_make_source())
+
+    running_agent = MagicMock()
+    running_agent.steer.return_value = True
+    runner._running_agents[sk] = running_agent
+
+    result = await runner._handle_message(_make_event("/steer also check auth.log"))
+
+    # The handler replied with a confirmation
+    assert result is not None
+    assert "steer" in result.lower() or "queued" in result.lower()
+    # The agent's steer() was called with the payload (prefix stripped)
+    running_agent.steer.assert_called_once_with("also check auth.log")
+    # Critically: interrupt was NOT called
+    running_agent.interrupt.assert_not_called()
+    # And no user-text queueing happened — the steer doesn't go into
+    # _pending_messages (that would be turn-boundary /queue semantics).
+    assert runner._pending_messages == {}
+    assert adapter._pending_messages == {}
+
+
+@pytest.mark.asyncio
+async def test_steer_without_payload_returns_usage():
+    runner, _adapter = _make_runner(_session_entry())
+    sk = build_session_key(_make_source())
+    running_agent = MagicMock()
+    runner._running_agents[sk] = running_agent
+
+    result = await runner._handle_message(_make_event("/steer"))
+
+    assert result is not None
+    assert "Usage" in result or "usage" in result
+    running_agent.steer.assert_not_called()
+    running_agent.interrupt.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_steer_with_pending_sentinel_falls_back_to_queue():
+    """When the agent hasn't finished booting (sentinel), /steer should
+    queue as a turn-boundary follow-up instead of crashing."""
+    from gateway.run import _AGENT_PENDING_SENTINEL
+
+    runner, adapter = _make_runner(_session_entry())
+    sk = build_session_key(_make_source())
+    runner._running_agents[sk] = _AGENT_PENDING_SENTINEL
+
+    result = await runner._handle_message(_make_event("/steer wait up"))
+
+    assert result is not None
+    assert "queued" in result.lower() or "starting" in result.lower()
+    # The fallback put the text into the adapter's pending queue.
+    assert sk in adapter._pending_messages
+    assert adapter._pending_messages[sk].text == "wait up"
+
+
+@pytest.mark.asyncio
+async def test_steer_agent_without_steer_method_falls_back():
+    """If the running agent somehow lacks the steer() method (older build,
+    test stub), the handler must not explode — fall back to /queue."""
+    runner, adapter = _make_runner(_session_entry())
+    sk = build_session_key(_make_source())
+
+    # A bare object that does NOT have steer() — use a spec'd Mock so
+    # hasattr(agent, "steer") returns False.
+    running_agent = MagicMock(spec=[])
+    runner._running_agents[sk] = running_agent
+
+    result = await runner._handle_message(_make_event("/steer fallback"))
+
+    assert result is not None
+    # Must mention queueing since steer wasn't available
+    assert "queued" in result.lower()
+    assert sk in adapter._pending_messages
+    assert adapter._pending_messages[sk].text == "fallback"
+
+
+@pytest.mark.asyncio
+async def test_steer_rejected_payload_returns_rejection_message():
+    """If agent.steer() returns False (e.g. empty after strip — though
+    the gateway already guards this), surface a rejection message."""
+    runner, _adapter = _make_runner(_session_entry())
+    sk = build_session_key(_make_source())
+
+    running_agent = MagicMock()
+    running_agent.steer.return_value = False
+    runner._running_agents[sk] = running_agent
+
+    result = await runner._handle_message(_make_event("/steer hello"))
+
+    assert result is not None
+    assert "rejected" in result.lower() or "empty" in result.lower()
+
+
+if __name__ == "__main__":  # pragma: no cover
+    pytest.main([__file__, "-v"])
diff --git a/tests/run_agent/test_steer.py b/tests/run_agent/test_steer.py
new file mode 100644
index 00000000000..a298ede8c08
--- /dev/null
+++ b/tests/run_agent/test_steer.py
@@ -0,0 +1,228 @@
+"""Tests for AIAgent.steer() — mid-run user message injection.
+
+/steer lets the user add a note to the agent's next tool result without
+interrupting the current tool call. The agent sees the note inline with
+tool output on its next iteration, preserving message-role alternation
+and prompt-cache integrity.
+"""
+from __future__ import annotations
+
+import threading
+
+import pytest
+
+from run_agent import AIAgent
+
+
+def _bare_agent() -> AIAgent:
+    """Build an AIAgent without running __init__, then install the steer
+    state manually — matches the existing object.__new__ stub pattern
+    used elsewhere in the test suite.
+    """
+    agent = object.__new__(AIAgent)
+    agent._pending_steer = None
+    agent._pending_steer_lock = threading.Lock()
+    return agent
+
+
+class TestSteerAcceptance:
+    def test_accepts_non_empty_text(self):
+        agent = _bare_agent()
+        assert agent.steer("go ahead and check the logs") is True
+        assert agent._pending_steer == "go ahead and check the logs"
+
+    def test_rejects_empty_string(self):
+        agent = _bare_agent()
+        assert agent.steer("") is False
+        assert agent._pending_steer is None
+
+    def test_rejects_whitespace_only(self):
+        agent = _bare_agent()
+        assert agent.steer("   \n\t  ") is False
+        assert agent._pending_steer is None
+
+    def test_rejects_none(self):
+        agent = _bare_agent()
+        assert agent.steer(None) is False  # type: ignore[arg-type]
+        assert agent._pending_steer is None
+
+    def test_strips_surrounding_whitespace(self):
+        agent = _bare_agent()
+        assert agent.steer("  hello world  \n") is True
+        assert agent._pending_steer == "hello world"
+
+    def test_concatenates_multiple_steers_with_newlines(self):
+        agent = _bare_agent()
+        agent.steer("first note")
+        agent.steer("second note")
+        agent.steer("third note")
+        assert agent._pending_steer == "first note\nsecond note\nthird note"
+
+
+class TestSteerDrain:
+    def test_drain_returns_and_clears(self):
+        agent = _bare_agent()
+        agent.steer("hello")
+        assert agent._drain_pending_steer() == "hello"
+        assert agent._pending_steer is None
+
+    def test_drain_on_empty_returns_none(self):
+        agent = _bare_agent()
+        assert agent._drain_pending_steer() is None
+
+
+class TestSteerInjection:
+    def test_appends_to_last_tool_result(self):
+        agent = _bare_agent()
+        agent.steer("please also check auth.log")
+        messages = [
+            {"role": "user", "content": "what's in /var/log?"},
+            {"role": "assistant", "tool_calls": [{"id": "a"}, {"id": "b"}]},
+            {"role": "tool", "content": "ls output A", "tool_call_id": "a"},
+            {"role": "tool", "content": "ls output B", "tool_call_id": "b"},
+        ]
+        agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=2)
+        # The LAST tool result is modified; earlier ones are untouched.
+        assert messages[2]["content"] == "ls output A"
+        assert "ls output B" in messages[3]["content"]
+        assert "[USER STEER" in messages[3]["content"]
+        assert "please also check auth.log" in messages[3]["content"]
+        # And pending_steer is consumed.
+        assert agent._pending_steer is None
+
+    def test_no_op_when_no_steer_pending(self):
+        agent = _bare_agent()
+        messages = [
+            {"role": "assistant", "tool_calls": [{"id": "a"}]},
+            {"role": "tool", "content": "output", "tool_call_id": "a"},
+        ]
+        agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=1)
+        assert messages[-1]["content"] == "output"  # unchanged
+
+    def test_no_op_when_num_tool_msgs_zero(self):
+        agent = _bare_agent()
+        agent.steer("steer")
+        messages = [{"role": "user", "content": "hi"}]
+        agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=0)
+        # Steer should remain pending (nothing to drain into)
+        assert agent._pending_steer == "steer"
+
+    def test_marker_is_unambiguous_about_origin(self):
+        """The injection marker must make clear the text is from the user
+        and not tool output — this is the cache-safe way to signal
+        provenance without violating message-role alternation.
+        """
+        agent = _bare_agent()
+        agent.steer("stop after next step")
+        messages = [{"role": "tool", "content": "x", "tool_call_id": "1"}]
+        agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=1)
+        content = messages[-1]["content"]
+        assert "USER STEER" in content
+        assert "not tool output" in content.lower() or "injected mid-run" in content.lower()
+
+    def test_multimodal_content_list_preserved(self):
+        """Anthropic-style list content should be preserved, with the steer
+        appended as a text block."""
+        agent = _bare_agent()
+        agent.steer("extra note")
+        original_blocks = [{"type": "text", "text": "existing output"}]
+        messages = [
+            {"role": "tool", "content": list(original_blocks), "tool_call_id": "1"}
+        ]
+        agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=1)
+        new_content = messages[-1]["content"]
+        assert isinstance(new_content, list)
+        assert len(new_content) == 2
+        assert new_content[0] == {"type": "text", "text": "existing output"}
+        assert new_content[1]["type"] == "text"
+        assert "extra note" in new_content[1]["text"]
+
+    def test_restashed_when_no_tool_result_in_batch(self):
+        """If the 'batch' contains no tool-role messages (e.g. all skipped
+        after an interrupt), the steer should be put back into the pending
+        slot so the caller's fallback path can deliver it."""
+        agent = _bare_agent()
+        agent.steer("ping")
+        messages = [
+            {"role": "user", "content": "x"},
+            {"role": "assistant", "content": "y"},
+        ]
+        # Claim there were N tool msgs, but the tail has none — simulates
+        # the interrupt-cancelled case.
+        agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=2)
+        # Messages untouched
+        assert messages[-1]["content"] == "y"
+        # And the steer is back in pending so the fallback can grab it
+        assert agent._pending_steer == "ping"
+
+
+class TestSteerThreadSafety:
+    def test_concurrent_steer_calls_preserve_all_text(self):
+        agent = _bare_agent()
+        N = 200
+
+        def worker(idx: int) -> None:
+            agent.steer(f"note-{idx}")
+
+        threads = [threading.Thread(target=worker, args=(i,)) for i in range(N)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        text = agent._drain_pending_steer()
+        assert text is not None
+        # Every single note must be preserved — none dropped by the lock.
+        lines = text.split("\n")
+        assert len(lines) == N
+        assert set(lines) == {f"note-{i}" for i in range(N)}
+
+
+class TestSteerClearedOnInterrupt:
+    def test_clear_interrupt_drops_pending_steer(self):
+        """A hard interrupt supersedes any pending steer — the agent's
+        next tool iteration won't happen, so delivering the steer later
+        would be surprising."""
+        agent = _bare_agent()
+        # Minimal surface needed by clear_interrupt()
+        agent._interrupt_requested = True
+        agent._interrupt_message = None
+        agent._interrupt_thread_signal_pending = False
+        agent._execution_thread_id = None
+        agent._tool_worker_threads = None
+        agent._tool_worker_threads_lock = None
+
+        agent.steer("will be dropped")
+        assert agent._pending_steer == "will be dropped"
+
+        agent.clear_interrupt()
+        assert agent._pending_steer is None
+
+
+class TestSteerCommandRegistry:
+    def test_steer_in_command_registry(self):
+        """The /steer slash command must be registered so it reaches all
+        platforms (CLI, gateway, TUI autocomplete, Telegram/Slack menus).
+        """
+        from hermes_cli.commands import resolve_command, ACTIVE_SESSION_BYPASS_COMMANDS
+
+        cmd = resolve_command("steer")
+        assert cmd is not None
+        assert cmd.name == "steer"
+        assert cmd.category == "Session"
+        assert cmd.args_hint == "<prompt>"
+
+    def test_steer_in_bypass_set(self):
+        """When the agent is running, /steer MUST bypass the Level-1
+        base-adapter queue so it reaches the gateway runner's /steer
+        handler. Otherwise it would be queued as user text and only
+        delivered at turn end — defeating the whole point.
+        """
+        from hermes_cli.commands import ACTIVE_SESSION_BYPASS_COMMANDS, should_bypass_active_session
+
+        assert "steer" in ACTIVE_SESSION_BYPASS_COMMANDS
+        assert should_bypass_active_session("steer") is True
+
+
+if __name__ == "__main__":  # pragma: no cover
+    pytest.main([__file__, "-v"])
diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index e7681b784cf..ea231e626e5 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -438,3 +438,74 @@ def test_rollback_restore_resolves_number_and_file_path():
     assert resp["result"]["success"] is True
     assert calls["args"][1] == "bbb222"
     assert calls["args"][2] == "src/app.tsx"
+
+
+# ── session.steer ────────────────────────────────────────────────────
+
+
+def test_session_steer_calls_agent_steer_when_agent_supports_it():
+    """The TUI RPC method must call agent.steer(text) and return a
+    queued status without touching interrupt state.
+    """
+    calls = {}
+
+    class _Agent:
+        def steer(self, text):
+            calls["steer_text"] = text
+            return True
+
+        def interrupt(self, *args, **kwargs):
+            calls["interrupt_called"] = True
+
+    server._sessions["sid"] = _session(agent=_Agent())
+    try:
+        resp = server.handle_request(
+            {
+                "id": "1",
+                "method": "session.steer",
+                "params": {"session_id": "sid", "text": "also check auth.log"},
+            }
+        )
+    finally:
+        server._sessions.pop("sid", None)
+
+    assert "result" in resp, resp
+    assert resp["result"]["status"] == "queued"
+    assert resp["result"]["text"] == "also check auth.log"
+    assert calls["steer_text"] == "also check auth.log"
+    assert "interrupt_called" not in calls  # must NOT interrupt
+
+
+def test_session_steer_rejects_empty_text():
+    server._sessions["sid"] = _session(agent=types.SimpleNamespace(steer=lambda t: True))
+    try:
+        resp = server.handle_request(
+            {
+                "id": "1",
+                "method": "session.steer",
+                "params": {"session_id": "sid", "text": "   "},
+            }
+        )
+    finally:
+        server._sessions.pop("sid", None)
+
+    assert "error" in resp, resp
+    assert resp["error"]["code"] == 4002
+
+
+def test_session_steer_errors_when_agent_has_no_steer_method():
+    server._sessions["sid"] = _session(agent=types.SimpleNamespace())  # no steer()
+    try:
+        resp = server.handle_request(
+            {
+                "id": "1",
+                "method": "session.steer",
+                "params": {"session_id": "sid", "text": "hi"},
+            }
+        )
+    finally:
+        server._sessions.pop("sid", None)
+
+    assert "error" in resp, resp
+    assert resp["error"]["code"] == 4010
+
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 3ef76a0f02e..a7dae9e5c60 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1340,6 +1340,31 @@ def _(rid, params: dict) -> dict:
     return _ok(rid, {"status": "interrupted"})
 
 
+@method("session.steer")
+def _(rid, params: dict) -> dict:
+    """Inject a user message into the next tool result without interrupting.
+
+    Mirrors AIAgent.steer(). Safe to call while a turn is running — the text
+    lands on the last tool result of the next tool batch and the model sees
+    it on its next iteration. No interrupt, no new user turn, no role
+    alternation violation.
+    """
+    text = (params.get("text") or "").strip()
+    if not text:
+        return _err(rid, 4002, "text is required")
+    session, err = _sess_nowait(params, rid)
+    if err:
+        return err
+    agent = session.get("agent")
+    if agent is None or not hasattr(agent, "steer"):
+        return _err(rid, 4010, "agent does not support steer")
+    try:
+        accepted = agent.steer(text)
+    except Exception as exc:
+        return _err(rid, 5000, f"steer failed: {exc}")
+    return _ok(rid, {"status": "queued" if accepted else "rejected", "text": text})
+
+
 @method("terminal.resize")
 def _(rid, params: dict) -> dict:
     session, err = _sess_nowait(params, rid)
diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts
index e0832c7a694..a151b2cdc87 100644
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@@ -1,7 +1,7 @@
 import { dailyFortune, randomFortune } from '../../../content/fortunes.js'
 import { HOTKEYS } from '../../../content/hotkeys.js'
 import { nextDetailsMode, parseDetailsMode } from '../../../domain/details.js'
-import type { ConfigGetValueResponse, ConfigSetResponse, SessionUndoResponse } from '../../../gatewayTypes.js'
+import type { ConfigGetValueResponse, ConfigSetResponse, SessionSteerResponse, SessionUndoResponse } from '../../../gatewayTypes.js'
 import { writeOsc52Clipboard } from '../../../lib/osc52.js'
 import type { DetailsMode, Msg, PanelSection } from '../../../types.js'
 import { patchOverlayState } from '../../overlayStore.js'
@@ -245,6 +245,36 @@ export const coreCommands: SlashCommand[] = [
     }
   },
 
+  {
+    help: 'inject a message after the next tool call (no interrupt)',
+    name: 'steer',
+    run: (arg, ctx) => {
+      const payload = arg?.trim() ?? ''
+
+      if (!payload) {
+        return ctx.transcript.sys('usage: /steer <prompt>')
+      }
+
+      // If the agent isn't running, fall back to the queue so the user's
+      // message isn't lost — identical semantics to the gateway handler.
+      if (!ctx.ui.busy || !ctx.sid) {
+        ctx.composer.enqueue(payload)
+        ctx.transcript.sys(`no active turn — queued for next: "${payload.slice(0, 50)}${payload.length > 50 ? '…' : ''}"`)
+        return
+      }
+
+      ctx.gateway.rpc<SessionSteerResponse>('session.steer', { session_id: ctx.sid, text: payload }).then(
+        ctx.guarded<SessionSteerResponse>(r => {
+          if (r?.status === 'queued') {
+            ctx.transcript.sys(`⏩ steer queued — arrives after next tool call: "${payload.slice(0, 50)}${payload.length > 50 ? '…' : ''}"`)
+          } else {
+            ctx.transcript.sys('steer rejected')
+          }
+        })
+      ).catch(ctx.guardedErr)
+    }
+  },
+
   {
     help: 'undo last exchange',
     name: 'undo',
diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts
index 9e21b9bc587..c8d1c685523 100644
--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@@ -152,6 +152,11 @@ export interface SessionInterruptResponse {
   ok?: boolean
 }
 
+export interface SessionSteerResponse {
+  status?: 'queued' | 'rejected'
+  text?: string
+}
+
 // ── Prompt / submission ──────────────────────────────────────────────
 
 export interface PromptSubmitResponse {

From 6fb69229caba4bd5699228e520de4956b3458187 Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Sat, 18 Apr 2026 06:51:28 -0700
Subject: [PATCH 022/143] fix(nix): fix build failures, TUI Node.js crash, and
 upgrade container to Node 22 (#12159)

* Add setuptools build dep for legacy alibabacloud packages and updated
stale npm-deps hash

* Add HERMES_NODE env var to pin Node.js version

The TUI requires Node.js 20+ for regex `/v` flag support (used by
string-width). Instead of relying on PATH lookup, explicitly set
HERMES_NODE to the bundled Node 22 in the Nix wrapper, and add a
fallback check in the Python code to use HERMES_NODE if available.

Also upgrade container provisioning to Node 22 via NodeSource (Ubuntu
24.04 ships Node 18 which is EOL) and add a Nix check to verify the
wrapper and Node version at build time.
---
 hermes_cli/main.py   |  4 ++++
 nix/checks.nix       | 23 +++++++++++++++++++++++
 nix/nixosModules.nix | 14 +++++++++++---
 nix/packages.nix     |  3 ++-
 nix/python.nix       | 15 +++++++++++++++
 nix/tui.nix          |  7 +------
 6 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 0afadac3d16..a13a6f88ee9 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -897,6 +897,10 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
     _ensure_tui_node()
 
     def _node_bin(bin: str) -> str:
+        if bin == "node":
+            env_node = os.environ.get("HERMES_NODE")
+            if env_node and os.path.isfile(env_node) and os.access(env_node, os.X_OK):
+                return env_node
         path = shutil.which(bin)
         if not path:
             print(f"{bin} not found — install Node.js to use the TUI.")
diff --git a/nix/checks.nix b/nix/checks.nix
index 55068a94f16..ff8e7947c57 100644
--- a/nix/checks.nix
+++ b/nix/checks.nix
@@ -125,6 +125,29 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
           echo "ok" > $out/result
         '';
 
+        # Verify HERMES_NODE is set in wrapper and points to Node 20+
+        # (string-width uses the /v regex flag which requires Node 20+)
+        hermes-node = pkgs.runCommand "hermes-node-version" { } ''
+          set -e
+          echo "=== Checking HERMES_NODE in wrapper ==="
+          grep -q "HERMES_NODE" ${hermes-agent}/bin/hermes || \
+            (echo "FAIL: HERMES_NODE not set in wrapper"; exit 1)
+          echo "PASS: HERMES_NODE present in wrapper"
+
+          HERMES_NODE=$(sed -n "s/^export HERMES_NODE='\(.*\)'/\1/p" ${hermes-agent}/bin/hermes)
+          test -x "$HERMES_NODE" || (echo "FAIL: HERMES_NODE=$HERMES_NODE not executable"; exit 1)
+          echo "PASS: HERMES_NODE executable at $HERMES_NODE"
+
+          NODE_MAJOR=$("$HERMES_NODE" --version | sed 's/^v//' | cut -d. -f1)
+          test "$NODE_MAJOR" -ge 20 || \
+            (echo "FAIL: Node v$NODE_MAJOR < 20, TUI needs /v regex flag support"; exit 1)
+          echo "PASS: Node v$NODE_MAJOR >= 20"
+
+          echo "=== All HERMES_NODE checks passed ==="
+          mkdir -p $out
+          echo "ok" > $out/result
+        '';
+
         # Verify HERMES_MANAGED guard works on all mutation commands
         managed-guard = pkgs.runCommand "hermes-managed-guard" { } ''
           set -e
diff --git a/nix/nixosModules.nix b/nix/nixosModules.nix
index 75b3dca31b2..24a2a1b6ddc 100644
--- a/nix/nixosModules.nix
+++ b/nix/nixosModules.nix
@@ -121,11 +121,19 @@
       # ── Provision apt packages (first boot only, cached in writable layer) ──
       # sudo: agent self-modification
       # nodejs/npm: writable node so npm i -g works (nix store copies are read-only)
-      # curl: needed for uv installer
+      #   Node 22 via NodeSource — Ubuntu 24.04 ships Node 18 which is EOL.
+      # curl: needed for uv installer + NodeSource setup
       if [ ! -f /var/lib/hermes-tools-provisioned ] && command -v apt-get >/dev/null 2>&1; then
         echo "First boot: provisioning agent tools..."
         apt-get update -qq
-        apt-get install -y -qq sudo nodejs npm curl
+        apt-get install -y -qq sudo curl ca-certificates gnupg
+        mkdir -p /etc/apt/keyrings
+        curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
+          | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
+        echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" \
+          > /etc/apt/sources.list.d/nodesource.list
+        apt-get update -qq
+        apt-get install -y -qq nodejs
         touch /var/lib/hermes-tools-provisioned
       fi
 
@@ -171,7 +179,7 @@
     # Package and entrypoint use stable symlinks (current-package, current-entrypoint)
     # so they can update without recreation. Env vars go through $HERMES_HOME/.env.
     containerIdentity = builtins.hashString "sha256" (builtins.toJSON {
-      schema = 3; # bump when identity inputs change
+      schema = 4; # bump when identity inputs change (4: Node 18→22 via NodeSource)
       image = cfg.container.image;
       extraVolumes = cfg.container.extraVolumes;
       extraOptions = cfg.container.extraOptions;
diff --git a/nix/packages.nix b/nix/packages.nix
index f39d9d0b2be..968ad12fb71 100644
--- a/nix/packages.nix
+++ b/nix/packages.nix
@@ -63,7 +63,8 @@
                   --suffix PATH : "${runtimePath}" \
                   --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
                   --set HERMES_TUI_DIR $out/ui-tui \
-                  --set HERMES_PYTHON ${hermesVenv}/bin/python3
+                  --set HERMES_PYTHON ${hermesVenv}/bin/python3 \
+                  --set HERMES_NODE ${pkgs.nodejs_22}/bin/node
               '')
               [
                 "hermes"
diff --git a/nix/python.nix b/nix/python.nix
index 160b4ee790b..91411f4d754 100644
--- a/nix/python.nix
+++ b/nix/python.nix
@@ -35,6 +35,20 @@ let
       };
     };
 
+  # Legacy alibabacloud packages ship only sdists with setup.py/setup.cfg
+  # and no pyproject.toml, so setuptools isn't declared as a build dep.
+  buildSystemOverrides = final: prev: builtins.mapAttrs
+    (name: _: prev.${name}.overrideAttrs (old: {
+      nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ final.setuptools ];
+    }))
+    (lib.genAttrs [
+      "alibabacloud-credentials-api"
+      "alibabacloud-endpoint-util"
+      "alibabacloud-gateway-dingtalk"
+      "alibabacloud-gateway-spi"
+      "alibabacloud-tea"
+    ] (_: null));
+
   pythonPackageOverrides = final: _prev:
     if isAarch64Darwin then {
       numpy = mkPrebuiltOverride final python311.pkgs.numpy { };
@@ -75,6 +89,7 @@ let
       (lib.composeManyExtensions [
         pyproject-build-systems.overlays.default
         overlay
+        buildSystemOverrides
         pythonPackageOverrides
       ]);
 in
diff --git a/nix/tui.nix b/nix/tui.nix
index 70eb67f949a..7303edecb9f 100644
--- a/nix/tui.nix
+++ b/nix/tui.nix
@@ -4,7 +4,7 @@ let
   src = ../ui-tui;
   npmDeps = pkgs.fetchNpmDeps {
     inherit src;
-    hash = "sha256-zsUPmbC6oMUO10EhS3ptvDjwlfpCSEmrkjyeORw7fac=";
+    hash = "sha256-mG3vpgGi4ljt4X3XIf3I/5mIcm+rVTUAmx2DQ6YVA90=";
   };
 
   packageJson = builtins.fromJSON (builtins.readFile (src + "/package.json"));
@@ -18,11 +18,6 @@ pkgs.buildNpmPackage {
 
   doCheck = false;
 
-  postPatch = ''
-    # fetchNpmDeps strips the trailing newline; match it so the diff passes
-    sed -i -z 's/\n$//' package-lock.json
-  '';
-
   installPhase = ''
     runHook preInstall
 

From f0638f35964ee28cff608a05614524065488c0b7 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:11:53 -0500
Subject: [PATCH 023/143] fix(tui): split /model picker from /provider wizard
 to resolve registry collision

---
 ui-tui/src/app/slash/commands/setup.ts | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ui-tui/src/app/slash/commands/setup.ts b/ui-tui/src/app/slash/commands/setup.ts
index c6d5cc8637b..d9a948e5419 100644
--- a/ui-tui/src/app/slash/commands/setup.ts
+++ b/ui-tui/src/app/slash/commands/setup.ts
@@ -6,9 +6,8 @@ import type { SlashCommand } from '../types.js'
 
 export const setupCommands: SlashCommand[] = [
   {
-    aliases: ['provider'],
-    help: 'configure LLM provider and model (launches `hermes model`)',
-    name: 'model',
+    help: 'configure LLM provider + model (launches `hermes model`)',
+    name: 'provider',
     run: (_arg, ctx) =>
       void runExternalSetup({
         args: ['model'],

From 4e1ea79edc8fa6d1e4958e9df19fcca042efa566 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:11:57 -0500
Subject: [PATCH 024/143] feat(tui): accept raw Ctrl+V as clipboard image paste
 fallback

---
 ui-tui/src/components/textInput.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index f2bbee63cf2..6503da4dbff 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -464,7 +464,7 @@ export function TextInput({
     (inp: string, k: Key, event: InputEvent) => {
       const eventRaw = event.keypress.raw
 
-      if (eventRaw === '\x1bv' || eventRaw === '\x1bV') {
+      if (eventRaw === '\x1bv' || eventRaw === '\x1bV' || eventRaw === '\x16') {
         return void emitPaste({ cursor: curRef.current, hotkey: true, text: '', value: vRef.current })
       }
 

From 5152e1ad8646235e4b745cf3d1337417b13f5ef5 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:16:37 -0500
Subject: [PATCH 025/143] feat(tui-gateway): surface config.quick_commands in
 commands.catalog

---
 tui_gateway/server.py | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index a7dae9e5c60..fad674aeb78 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1987,8 +1987,35 @@ def _(rid, params: dict) -> dict:
                 cat_order.append(cat)
             cat_map[cat].append([name, desc])
 
-        skill_count = 0
         warning = ""
+        try:
+            qcmds = _load_cfg().get("quick_commands", {}) or {}
+            if isinstance(qcmds, dict) and qcmds:
+                bucket = "User commands"
+                if bucket not in cat_map:
+                    cat_map[bucket] = []
+                    cat_order.append(bucket)
+                for qname, qc in sorted(qcmds.items()):
+                    if not isinstance(qc, dict):
+                        continue
+                    key = f"/{qname}"
+                    canon[key.lower()] = key
+                    qtype = qc.get("type", "")
+                    if qtype == "exec":
+                        default_desc = f"exec: {qc.get('command', '')}"
+                    elif qtype == "alias":
+                        default_desc = f"alias → {qc.get('target', '')}"
+                    else:
+                        default_desc = qtype or "quick command"
+                    qdesc = str(qc.get("description") or default_desc)
+                    qdesc = qdesc[:120] + ("…" if len(qdesc) > 120 else "")
+                    all_pairs.append([key, qdesc])
+                    cat_map[bucket].append([key, qdesc])
+        except Exception as e:
+            if not warning:
+                warning = f"quick_commands discovery unavailable: {e}"
+
+        skill_count = 0
         try:
             from agent.skill_commands import scan_skill_commands
             for k, info in sorted(scan_skill_commands().items()):

From a397b0fd4d5c95b6aef4eecbb13eabad3d7e659b Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:16:39 -0500
Subject: [PATCH 026/143] test(tui-gateway): assert quick_commands appear in
 commands.catalog output

---
 tests/test_tui_gateway_server.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index ea231e626e5..d441e2b32d0 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -363,6 +363,28 @@ def test_image_attach_appends_local_image(monkeypatch):
     assert len(server._sessions["sid"]["attached_images"]) == 1
 
 
+def test_commands_catalog_surfaces_quick_commands(monkeypatch):
+    monkeypatch.setattr(server, "_load_cfg", lambda: {"quick_commands": {
+        "build": {"type": "exec", "command": "npm run build"},
+        "git": {"type": "alias", "target": "/shell git"},
+        "notes": {"type": "exec", "command": "cat NOTES.md", "description": "Open design notes"},
+    }})
+
+    resp = server.handle_request({"id": "1", "method": "commands.catalog", "params": {}})
+
+    pairs = dict(resp["result"]["pairs"])
+    assert "npm run build" in pairs["/build"]
+    assert pairs["/git"].startswith("alias →")
+    assert pairs["/notes"] == "Open design notes"
+
+    user_cat = next(c for c in resp["result"]["categories"] if c["name"] == "User commands")
+    user_pairs = dict(user_cat["pairs"])
+    assert set(user_pairs) == {"/build", "/git", "/notes"}
+
+    assert resp["result"]["canon"]["/build"] == "/build"
+    assert resp["result"]["canon"]["/notes"] == "/notes"
+
+
 def test_command_dispatch_exec_nonzero_surfaces_error(monkeypatch):
     monkeypatch.setattr(server, "_load_cfg", lambda: {"quick_commands": {"boom": {"type": "exec", "command": "boom"}}})
     monkeypatch.setattr(

From 586b2f208913e2d63f08e426ac0c2ac6b3bc3823 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:16:44 -0500
Subject: [PATCH 027/143] feat(tui): persist large pastes to ~/.hermes/pastes/
 via paste.collapse

---
 ui-tui/src/app/interfaces.ts       |  1 +
 ui-tui/src/app/useComposerState.ts | 15 ++++++++++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts
index 998afe2a198..ff2b1e5b5a5 100644
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -335,5 +335,6 @@ export interface AppOverlaysProps {
 
 export interface PasteSnippet {
   label: string
+  path?: string
   text: string
 }
diff --git a/ui-tui/src/app/useComposerState.ts b/ui-tui/src/app/useComposerState.ts
index 14a40412c99..bebda273d9f 100644
--- a/ui-tui/src/app/useComposerState.ts
+++ b/ui-tui/src/app/useComposerState.ts
@@ -70,12 +70,25 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
 
       setPasteSnips(prev => [...prev, { label, text: cleanedText }].slice(-32))
 
+      void gw
+        .request<{ path?: string }>('paste.collapse', { text: cleanedText })
+        .then(r => {
+          const path = r?.path
+
+          if (!path) {
+            return
+          }
+
+          setPasteSnips(prev => prev.map(s => (s.label === label ? { ...s, path } : s)))
+        })
+        .catch(() => {})
+
       return {
         cursor: cursor + insert.length,
         value: value.slice(0, cursor) + insert + value.slice(cursor)
       }
     },
-    [onClipboardPaste]
+    [gw, onClipboardPaste]
   )
 
   const openEditor = useCallback(() => {

From 200c17433c0ce24a9332b857e64b6db3041a1f59 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:23:29 -0500
Subject: [PATCH 028/143] feat(tui): read display.streaming / show_reasoning /
 show_cost / inline_diffs from config

Extends ConfigDisplayConfig and UiState so the four new display flags
flow from `config.get {key:"full"}` into the nanostore. applyDisplay is
exported to keep the fan-out testable without an Ink harness.

Defaults mirror v1 parity: streaming + inline_diffs default true
(opt-out via `=== false`), show_cost + show_reasoning default false
(opt-in via plain truthy check).
---
 ui-tui/src/__tests__/useConfigSync.test.ts | 67 ++++++++++++++++++++++
 ui-tui/src/app/interfaces.ts               |  4 ++
 ui-tui/src/app/uiStore.ts                  |  4 ++
 ui-tui/src/app/useConfigSync.ts            |  8 ++-
 ui-tui/src/gatewayTypes.ts                 |  4 ++
 5 files changed, 85 insertions(+), 2 deletions(-)
 create mode 100644 ui-tui/src/__tests__/useConfigSync.test.ts

diff --git a/ui-tui/src/__tests__/useConfigSync.test.ts b/ui-tui/src/__tests__/useConfigSync.test.ts
new file mode 100644
index 00000000000..c14ecff3aa7
--- /dev/null
+++ b/ui-tui/src/__tests__/useConfigSync.test.ts
@@ -0,0 +1,67 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+
+import { $uiState, resetUiState } from '../app/uiStore.js'
+import { applyDisplay } from '../app/useConfigSync.js'
+
+describe('applyDisplay', () => {
+  beforeEach(() => {
+    resetUiState()
+  })
+
+  it('fans every display flag out to $uiState and the bell callback', () => {
+    const setBell = vi.fn()
+
+    applyDisplay(
+      {
+        config: {
+          display: {
+            bell_on_complete: true,
+            details_mode: 'expanded',
+            inline_diffs: false,
+            show_cost: true,
+            show_reasoning: true,
+            streaming: false,
+            tui_compact: true,
+            tui_statusbar: false
+          }
+        }
+      },
+      setBell
+    )
+
+    const s = $uiState.get()
+    expect(setBell).toHaveBeenCalledWith(true)
+    expect(s.compact).toBe(true)
+    expect(s.detailsMode).toBe('expanded')
+    expect(s.inlineDiffs).toBe(false)
+    expect(s.showCost).toBe(true)
+    expect(s.showReasoning).toBe(true)
+    expect(s.statusBar).toBe(false)
+    expect(s.streaming).toBe(false)
+  })
+
+  it('applies v1 parity defaults when display fields are missing', () => {
+    const setBell = vi.fn()
+
+    applyDisplay({ config: { display: {} } }, setBell)
+
+    const s = $uiState.get()
+    expect(setBell).toHaveBeenCalledWith(false)
+    expect(s.inlineDiffs).toBe(true)
+    expect(s.showCost).toBe(false)
+    expect(s.showReasoning).toBe(false)
+    expect(s.statusBar).toBe(true)
+    expect(s.streaming).toBe(true)
+  })
+
+  it('treats a null config like an empty display block', () => {
+    const setBell = vi.fn()
+
+    applyDisplay(null, setBell)
+
+    const s = $uiState.get()
+    expect(setBell).toHaveBeenCalledWith(false)
+    expect(s.inlineDiffs).toBe(true)
+    expect(s.streaming).toBe(true)
+  })
+})
diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts
index ff2b1e5b5a5..bf3d54c627b 100644
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -78,9 +78,13 @@ export interface UiState {
   compact: boolean
   detailsMode: DetailsMode
   info: null | SessionInfo
+  inlineDiffs: boolean
+  showCost: boolean
+  showReasoning: boolean
   sid: null | string
   status: string
   statusBar: boolean
+  streaming: boolean
   theme: Theme
   usage: Usage
 }
diff --git a/ui-tui/src/app/uiStore.ts b/ui-tui/src/app/uiStore.ts
index b7f5c20f4df..81089f1795a 100644
--- a/ui-tui/src/app/uiStore.ts
+++ b/ui-tui/src/app/uiStore.ts
@@ -11,9 +11,13 @@ const buildUiState = (): UiState => ({
   compact: false,
   detailsMode: 'collapsed',
   info: null,
+  inlineDiffs: true,
+  showCost: false,
+  showReasoning: false,
   sid: null,
   status: 'summoning hermes…',
   statusBar: true,
+  streaming: true,
   theme: DEFAULT_THEME,
   usage: ZERO
 })
diff --git a/ui-tui/src/app/useConfigSync.ts b/ui-tui/src/app/useConfigSync.ts
index fe3cec57378..8a3756342ba 100644
--- a/ui-tui/src/app/useConfigSync.ts
+++ b/ui-tui/src/app/useConfigSync.ts
@@ -27,14 +27,18 @@ const quietRpc = async <T extends Record<string, any> = Record<string, any>>(
   }
 }
 
-const applyDisplay = (cfg: ConfigFullResponse | null, setBell: (v: boolean) => void) => {
+export const applyDisplay = (cfg: ConfigFullResponse | null, setBell: (v: boolean) => void) => {
   const d = cfg?.config?.display ?? {}
 
   setBell(!!d.bell_on_complete)
   patchUiState({
     compact: !!d.tui_compact,
     detailsMode: resolveDetailsMode(d),
-    statusBar: d.tui_statusbar !== false
+    inlineDiffs: d.inline_diffs !== false,
+    showCost: !!d.show_cost,
+    showReasoning: !!d.show_reasoning,
+    statusBar: d.tui_statusbar !== false,
+    streaming: d.streaming !== false
   })
 }
 
diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts
index c8d1c685523..fd5b6c13472 100644
--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@@ -53,6 +53,10 @@ export type CommandDispatchResponse =
 export interface ConfigDisplayConfig {
   bell_on_complete?: boolean
   details_mode?: string
+  inline_diffs?: boolean
+  show_cost?: boolean
+  show_reasoning?: boolean
+  streaming?: boolean
   thinking_mode?: string
   tui_compact?: boolean
   tui_statusbar?: boolean

From fd6ffc777fea792f368dd3e3e86a66e438adafd3 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:26:03 -0500
Subject: [PATCH 029/143] feat(tui): honor display.* flags in turn renderer,
 status bar, and event handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- turnController gates scheduleStreaming / reasoning recorders on
  streaming + showReasoning so disabling them keeps the buffer silent
  until message.complete flushes
- createGatewayEventHandler only surfaces inline_diff previews when
  inlineDiffs is on
- StatusRule takes a showCost prop and renders `· $X.XXXX` with the
  same toFixed(4) formatting as /usage when usage.cost_usd is present
- Usage grows cost_usd?: number to match the gateway payload
- Existing handler tests flip showReasoning on in beforeEach so
  reasoning-flow assertions keep their meaning
---
 .../__tests__/createGatewayEventHandler.test.ts   |  3 ++-
 ui-tui/src/app/createGatewayEventHandler.ts       |  2 +-
 ui-tui/src/app/turnController.ts                  | 15 +++++++++++++--
 ui-tui/src/components/appChrome.tsx               |  5 +++++
 ui-tui/src/components/appLayout.tsx               |  1 +
 ui-tui/src/types.ts                               |  1 +
 6 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
index e546ce640e4..f1f0c306bcd 100644
--- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
+++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
@@ -4,7 +4,7 @@ import { createGatewayEventHandler } from '../app/createGatewayEventHandler.js'
 import { resetOverlayState } from '../app/overlayStore.js'
 import { turnController } from '../app/turnController.js'
 import { resetTurnState } from '../app/turnStore.js'
-import { resetUiState } from '../app/uiStore.js'
+import { patchUiState, resetUiState } from '../app/uiStore.js'
 import { estimateTokensRough } from '../lib/text.js'
 import type { Msg } from '../types.js'
 
@@ -47,6 +47,7 @@ describe('createGatewayEventHandler', () => {
     resetUiState()
     resetTurnState()
     turnController.fullReset()
+    patchUiState({ showReasoning: true })
   })
 
   it('persists completed tool rows when message.complete lands immediately after tool.complete', () => {
diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts
index e728f8bbd01..699a3794dee 100644
--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@@ -266,7 +266,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
       case 'tool.complete':
         turnController.recordToolComplete(ev.payload.tool_id, ev.payload.name, ev.payload.error, ev.payload.summary)
 
-        if (ev.payload.inline_diff) {
+        if (ev.payload.inline_diff && getUiState().inlineDiffs) {
           sys(ev.payload.inline_diff)
         }
 
diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts
index 73d0571734e..de57b2dd053 100644
--- a/ui-tui/src/app/turnController.ts
+++ b/ui-tui/src/app/turnController.ts
@@ -11,7 +11,7 @@ import type { ActiveTool, ActivityItem, Msg, SubagentProgress } from '../types.j
 
 import { resetOverlayState } from './overlayStore.js'
 import { patchTurnState, resetTurnState } from './turnStore.js'
-import { patchUiState } from './uiStore.js'
+import { getUiState, patchUiState } from './uiStore.js'
 
 const INTERRUPT_COOLDOWN_MS = 1500
 const ACTIVITY_LIMIT = 8
@@ -226,10 +226,17 @@ class TurnController {
     }
 
     this.bufRef = rendered ?? this.bufRef + text
-    this.scheduleStreaming()
+
+    if (getUiState().streaming) {
+      this.scheduleStreaming()
+    }
   }
 
   recordReasoningAvailable(text: string) {
+    if (!getUiState().showReasoning) {
+      return
+    }
+
     const incoming = text.trim()
 
     if (!incoming || this.reasoningText.trim()) {
@@ -242,6 +249,10 @@ class TurnController {
   }
 
   recordReasoningDelta(text: string) {
+    if (!getUiState().showReasoning) {
+      return
+    }
+
     this.reasoningText += text
     this.scheduleReasoning()
     this.pulseReasoningStreaming()
diff --git a/ui-tui/src/components/appChrome.tsx b/ui-tui/src/components/appChrome.tsx
index ed6f914c96b..2f5f807dec7 100644
--- a/ui-tui/src/components/appChrome.tsx
+++ b/ui-tui/src/components/appChrome.tsx
@@ -99,6 +99,7 @@ export function StatusRule({
   usage,
   bgCount,
   sessionStartedAt,
+  showCost,
   voiceLabel,
   t
 }: StatusRuleProps) {
@@ -136,6 +137,9 @@ export function StatusRule({
           ) : null}
           {voiceLabel ? <Text color={t.color.dim}> │ {voiceLabel}</Text> : null}
           {bgCount > 0 ? <Text color={t.color.dim}> │ {bgCount} bg</Text> : null}
+          {showCost && typeof usage.cost_usd === 'number' ? (
+            <Text color={t.color.dim}> │ ${usage.cost_usd.toFixed(4)}</Text>
+          ) : null}
         </Text>
       </Box>
 
@@ -285,6 +289,7 @@ interface StatusRuleProps {
   cwdLabel: string
   model: string
   sessionStartedAt?: number | null
+  showCost: boolean
   status: string
   statusColor: string
   t: Theme
diff --git a/ui-tui/src/components/appLayout.tsx b/ui-tui/src/components/appLayout.tsx
index 26d8e4b0a99..f13adf1bbd0 100644
--- a/ui-tui/src/components/appLayout.tsx
+++ b/ui-tui/src/components/appLayout.tsx
@@ -190,6 +190,7 @@ const ComposerPane = memo(function ComposerPane({
             cwdLabel={status.cwdLabel}
             model={ui.info?.model?.split('/').pop() ?? ''}
             sessionStartedAt={status.sessionStartedAt}
+            showCost={ui.showCost}
             status={ui.status}
             statusColor={status.statusColor}
             t={ui.theme}
diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts
index ab7d7efab96..32e99983ac9 100644
--- a/ui-tui/src/types.ts
+++ b/ui-tui/src/types.ts
@@ -68,6 +68,7 @@ export interface Usage {
   context_max?: number
   context_percent?: number
   context_used?: number
+  cost_usd?: number
   input: number
   output: number
   total: number

From 202b78ec684aee2a0bc5964bc2a58d2d20f8fbfc Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:23:47 -0500
Subject: [PATCH 030/143] feat(tui-gateway): include per-MCP-server status in
 session.info payload

---
 tui_gateway/server.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index fad674aeb78..7d4c3fa3c17 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -588,6 +588,11 @@ def _session_info(agent) -> dict:
         info["skills"] = get_available_skills()
     except Exception:
         pass
+    try:
+        from tools.mcp_tool import get_mcp_status
+        info["mcp_servers"] = get_mcp_status()
+    except Exception:
+        info["mcp_servers"] = []
     try:
         from hermes_cli.banner import get_update_result
         from hermes_cli.config import recommended_update_command

From b82ec6419d8fe49bf0bef46b45d78276157b9838 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:23:47 -0500
Subject: [PATCH 031/143] test(tui-gateway): cover mcp_servers field in
 _session_info output

---
 tests/test_tui_gateway_server.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index d441e2b32d0..35bc3f449b2 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -531,3 +531,18 @@ def test_session_steer_errors_when_agent_has_no_steer_method():
     assert "error" in resp, resp
     assert resp["error"]["code"] == 4010
 
+
+def test_session_info_includes_mcp_servers(monkeypatch):
+    fake_status = [
+        {"name": "github", "transport": "http", "tools": 12, "connected": True},
+        {"name": "filesystem", "transport": "stdio", "tools": 4, "connected": True},
+        {"name": "broken", "transport": "stdio", "tools": 0, "connected": False},
+    ]
+    fake_mod = types.ModuleType("tools.mcp_tool")
+    fake_mod.get_mcp_status = lambda: fake_status
+    monkeypatch.setitem(sys.modules, "tools.mcp_tool", fake_mod)
+
+    info = server._session_info(types.SimpleNamespace(tools=[], model=""))
+
+    assert info["mcp_servers"] == fake_status
+

From 382132302917348e060063b7516c0cd616b07df6 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:23:47 -0500
Subject: [PATCH 032/143] feat(tui): render per-MCP-server status block in
 SessionPanel

---
 ui-tui/src/components/branding.tsx | 25 +++++++++++++++++++++++++
 ui-tui/src/types.ts                |  8 ++++++++
 2 files changed, 33 insertions(+)

diff --git a/ui-tui/src/components/branding.tsx b/ui-tui/src/components/branding.tsx
index fc019ac86f0..919c34b612f 100644
--- a/ui-tui/src/components/branding.tsx
+++ b/ui-tui/src/components/branding.tsx
@@ -126,11 +126,36 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
 
         {section('Tools', info.tools, 8, 'more toolsets…')}
         {section('Skills', info.skills)}
+
+        {info.mcp_servers && info.mcp_servers.length > 0 && (
+          <Box flexDirection="column" marginTop={1}>
+            <Text bold color={t.color.amber}>
+              MCP Servers
+            </Text>
+
+            {info.mcp_servers.map(s => (
+              <Text key={s.name} wrap="truncate">
+                <Text color={t.color.dim}>{`  ${s.name} `}</Text>
+                <Text color={t.color.dim}>{`[${s.transport}]`}</Text>
+                <Text color={t.color.dim}>: </Text>
+                {s.connected ? (
+                  <Text color={t.color.cornsilk}>
+                    {s.tools} tool{s.tools === 1 ? '' : 's'}
+                  </Text>
+                ) : (
+                  <Text color={t.color.error}>failed</Text>
+                )}
+              </Text>
+            ))}
+          </Box>
+        )}
+
         <Text />
 
         <Text color={t.color.cornsilk}>
           {flat(info.tools).length} tools{' · '}
           {flat(info.skills).length} skills
+          {info.mcp_servers?.length ? ` · ${info.mcp_servers.length} MCP` : ''}
           {' · '}
           <Text color={t.color.dim}>/help for commands</Text>
         </Text>
diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts
index 32e99983ac9..98cc31203c5 100644
--- a/ui-tui/src/types.ts
+++ b/ui-tui/src/types.ts
@@ -51,8 +51,16 @@ export type Role = 'assistant' | 'system' | 'tool' | 'user'
 export type DetailsMode = 'hidden' | 'collapsed' | 'expanded'
 export type ThinkingMode = 'collapsed' | 'truncated' | 'full'
 
+export interface McpServerStatus {
+  connected: boolean
+  name: string
+  tools: number
+  transport: string
+}
+
 export interface SessionInfo {
   cwd?: string
+  mcp_servers?: McpServerStatus[]
   model: string
   release_date?: string
   skills: Record<string, string[]>

From 6fbfae8f42297a71e170de6103af459bd0a81f27 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:26:24 -0500
Subject: [PATCH 033/143] feat(tui): add skillsHub overlay state wiring

Extend OverlayState with a skillsHub flag, fold it into $isBlocked, and
teach Ctrl+C to close the overlay so later PRs can render the component
behind this slot.
---
 ui-tui/src/app/interfaces.ts       | 1 +
 ui-tui/src/app/overlayStore.ts     | 7 +++++--
 ui-tui/src/app/useInputHandlers.ts | 4 ++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts
index bf3d54c627b..a23b2068836 100644
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -57,6 +57,7 @@ export interface OverlayState {
   pager: null | PagerState
   picker: boolean
   secret: null | SecretReq
+  skillsHub: boolean
   sudo: null | SudoReq
 }
 
diff --git a/ui-tui/src/app/overlayStore.ts b/ui-tui/src/app/overlayStore.ts
index 4b24f0daab9..a2ea4002331 100644
--- a/ui-tui/src/app/overlayStore.ts
+++ b/ui-tui/src/app/overlayStore.ts
@@ -9,13 +9,16 @@ const buildOverlayState = (): OverlayState => ({
   pager: null,
   picker: false,
   secret: null,
+  skillsHub: false,
   sudo: null
 })
 
 export const $overlayState = atom<OverlayState>(buildOverlayState())
 
-export const $isBlocked = computed($overlayState, ({ approval, clarify, modelPicker, pager, picker, secret, sudo }) =>
-  Boolean(approval || clarify || modelPicker || pager || picker || secret || sudo)
+export const $isBlocked = computed(
+  $overlayState,
+  ({ approval, clarify, modelPicker, pager, picker, secret, skillsHub, sudo }) =>
+    Boolean(approval || clarify || modelPicker || pager || picker || secret || skillsHub || sudo)
 )
 
 export const getOverlayState = () => $overlayState.get()
diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts
index 70000b73c8c..0279a203cac 100644
--- a/ui-tui/src/app/useInputHandlers.ts
+++ b/ui-tui/src/app/useInputHandlers.ts
@@ -63,6 +63,10 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
       return patchOverlayState({ modelPicker: false })
     }
 
+    if (overlay.skillsHub) {
+      return patchOverlayState({ skillsHub: false })
+    }
+
     if (overlay.picker) {
       return patchOverlayState({ picker: false })
     }

From ef284e021ac73fcdac9a8392a10bb42f2018b74f Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:27:48 -0500
Subject: [PATCH 034/143] feat(tui): add two-step SkillsHub overlay component
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New SkillsHub mirrors ModelPicker's category → item → actions flow with
paginated 12-line lists, 1-9/0 quick-pick, Esc-back navigation, and
lazy skills.manage inspect/install calls. Mount it from appOverlays
when overlay.skillsHub is true.
---
 ui-tui/src/components/appOverlays.tsx |   9 +-
 ui-tui/src/components/skillsHub.tsx   | 290 ++++++++++++++++++++++++++
 2 files changed, 298 insertions(+), 1 deletion(-)
 create mode 100644 ui-tui/src/components/skillsHub.tsx

diff --git a/ui-tui/src/components/appOverlays.tsx b/ui-tui/src/components/appOverlays.tsx
index 23187cf3f92..27db09024fc 100644
--- a/ui-tui/src/components/appOverlays.tsx
+++ b/ui-tui/src/components/appOverlays.tsx
@@ -11,6 +11,7 @@ import { MaskedPrompt } from './maskedPrompt.js'
 import { ModelPicker } from './modelPicker.js'
 import { ApprovalPrompt, ClarifyPrompt } from './prompts.js'
 import { SessionPicker } from './sessionPicker.js'
+import { SkillsHub } from './skillsHub.js'
 
 export function PromptZone({
   cols,
@@ -82,7 +83,7 @@ export function FloatingOverlays({
   const overlay = useStore($overlayState)
   const ui = useStore($uiState)
 
-  const hasAny = overlay.modelPicker || overlay.pager || overlay.picker || completions.length
+  const hasAny = overlay.modelPicker || overlay.pager || overlay.picker || overlay.skillsHub || completions.length
 
   if (!hasAny) {
     return null
@@ -115,6 +116,12 @@ export function FloatingOverlays({
         </FloatBox>
       )}
 
+      {overlay.skillsHub && (
+        <FloatBox color={ui.theme.color.bronze}>
+          <SkillsHub gw={gw} onClose={() => patchOverlayState({ skillsHub: false })} t={ui.theme} />
+        </FloatBox>
+      )}
+
       {overlay.pager && (
         <FloatBox color={ui.theme.color.bronze}>
           <Box flexDirection="column" paddingX={1} paddingY={1}>
diff --git a/ui-tui/src/components/skillsHub.tsx b/ui-tui/src/components/skillsHub.tsx
new file mode 100644
index 00000000000..03ed3d92f37
--- /dev/null
+++ b/ui-tui/src/components/skillsHub.tsx
@@ -0,0 +1,290 @@
+import { Box, Text, useInput } from '@hermes/ink'
+import { useEffect, useState } from 'react'
+
+import type { GatewayClient } from '../gatewayClient.js'
+import { rpcErrorMessage } from '../lib/rpc.js'
+import type { Theme } from '../theme.js'
+
+const VISIBLE = 12
+
+const pageOffset = (count: number, sel: number) => Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), count - VISIBLE))
+
+const visibleItems = (items: string[], sel: number) => {
+  const off = pageOffset(items.length, sel)
+
+  return { items: items.slice(off, off + VISIBLE), off }
+}
+
+export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
+  const [skillsByCat, setSkillsByCat] = useState<Record<string, string[]>>({})
+  const [selectedCat, setSelectedCat] = useState('')
+  const [catIdx, setCatIdx] = useState(0)
+  const [skillIdx, setSkillIdx] = useState(0)
+  const [stage, setStage] = useState<'actions' | 'category' | 'skill'>('category')
+  const [info, setInfo] = useState<null | SkillInfo>(null)
+  const [installing, setInstalling] = useState(false)
+  const [err, setErr] = useState('')
+  const [loading, setLoading] = useState(true)
+
+  useEffect(() => {
+    gw.request<{ skills?: Record<string, string[]> }>('skills.manage', { action: 'list' })
+      .then(r => {
+        setSkillsByCat(r?.skills ?? {})
+        setErr('')
+        setLoading(false)
+      })
+      .catch((e: unknown) => {
+        setErr(rpcErrorMessage(e))
+        setLoading(false)
+      })
+  }, [gw])
+
+  const cats = Object.keys(skillsByCat).sort()
+  const skills = selectedCat ? (skillsByCat[selectedCat] ?? []) : []
+  const skillName = skills[skillIdx] ?? ''
+
+  const inspect = (name: string) => {
+    setInfo(null)
+    setErr('')
+
+    gw.request<{ info?: SkillInfo }>('skills.manage', { action: 'inspect', query: name })
+      .then(r => setInfo(r?.info ?? { name }))
+      .catch((e: unknown) => setErr(rpcErrorMessage(e)))
+  }
+
+  const install = (name: string) => {
+    setInstalling(true)
+    setErr('')
+
+    gw.request<{ installed?: boolean; name?: string }>('skills.manage', { action: 'install', query: name })
+      .then(() => onClose())
+      .catch((e: unknown) => setErr(rpcErrorMessage(e)))
+      .finally(() => setInstalling(false))
+  }
+
+  useInput((ch, key) => {
+    if (installing) {
+      return
+    }
+
+    if (key.escape) {
+      if (stage === 'actions') {
+        setStage('skill')
+        setInfo(null)
+        setErr('')
+
+        return
+      }
+
+      if (stage === 'skill') {
+        setStage('category')
+        setSkillIdx(0)
+
+        return
+      }
+
+      onClose()
+
+      return
+    }
+
+    if (stage === 'actions') {
+      if (key.return || ch.toLowerCase() === 'x') {
+        if (skillName) {
+          install(skillName)
+        }
+
+        return
+      }
+
+      if (ch.toLowerCase() === 'i' && skillName) {
+        inspect(skillName)
+      }
+
+      return
+    }
+
+    const count = stage === 'category' ? cats.length : skills.length
+    const sel = stage === 'category' ? catIdx : skillIdx
+    const setSel = stage === 'category' ? setCatIdx : setSkillIdx
+
+    if (key.upArrow && sel > 0) {
+      setSel(v => v - 1)
+
+      return
+    }
+
+    if (key.downArrow && sel < count - 1) {
+      setSel(v => v + 1)
+
+      return
+    }
+
+    if (key.return) {
+      if (stage === 'category') {
+        const cat = cats[catIdx]
+
+        if (!cat) {
+          return
+        }
+
+        setSelectedCat(cat)
+        setSkillIdx(0)
+        setStage('skill')
+
+        return
+      }
+
+      const name = skills[skillIdx]
+
+      if (name) {
+        setStage('actions')
+        inspect(name)
+      }
+
+      return
+    }
+
+    const n = ch === '0' ? 10 : parseInt(ch, 10)
+
+    if (!Number.isNaN(n) && n >= 1 && n <= Math.min(10, count)) {
+      const off = pageOffset(count, sel)
+      const next = off + n - 1
+
+      if (stage === 'category') {
+        const cat = cats[next]
+
+        if (cat) {
+          setSelectedCat(cat)
+          setCatIdx(next)
+          setSkillIdx(0)
+          setStage('skill')
+        }
+
+        return
+      }
+
+      const name = skills[next]
+
+      if (name) {
+        setSkillIdx(next)
+        setStage('actions')
+        inspect(name)
+      }
+    }
+  })
+
+  if (loading) {
+    return <Text color={t.color.dim}>loading skills…</Text>
+  }
+
+  if (err && stage === 'category') {
+    return (
+      <Box flexDirection="column">
+        <Text color={t.color.label}>error: {err}</Text>
+        <Text color={t.color.dim}>Esc to cancel</Text>
+      </Box>
+    )
+  }
+
+  if (!cats.length) {
+    return (
+      <Box flexDirection="column">
+        <Text color={t.color.dim}>no skills available</Text>
+        <Text color={t.color.dim}>Esc to cancel</Text>
+      </Box>
+    )
+  }
+
+  if (stage === 'category') {
+    const rows = cats.map(c => `${c} · ${skillsByCat[c]?.length ?? 0} skills`)
+    const { items, off } = visibleItems(rows, catIdx)
+
+    return (
+      <Box flexDirection="column">
+        <Text bold color={t.color.amber}>
+          Skills Hub
+        </Text>
+
+        <Text color={t.color.dim}>select a category</Text>
+        {off > 0 && <Text color={t.color.dim}> ↑ {off} more</Text>}
+
+        {items.map((row, i) => {
+          const idx = off + i
+
+          return (
+            <Text color={catIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+              {catIdx === idx ? '▸ ' : '  '}
+              {i + 1}. {row}
+            </Text>
+          )
+        })}
+
+        {off + VISIBLE < rows.length && <Text color={t.color.dim}> ↓ {rows.length - off - VISIBLE} more</Text>}
+        <Text color={t.color.dim}>↑/↓ select · Enter open · 1-9,0 quick · Esc cancel</Text>
+      </Box>
+    )
+  }
+
+  if (stage === 'skill') {
+    const { items, off } = visibleItems(skills, skillIdx)
+
+    return (
+      <Box flexDirection="column">
+        <Text bold color={t.color.amber}>
+          {selectedCat}
+        </Text>
+
+        <Text color={t.color.dim}>{skills.length} skill(s)</Text>
+        {!skills.length ? <Text color={t.color.dim}>no skills in this category</Text> : null}
+        {off > 0 && <Text color={t.color.dim}> ↑ {off} more</Text>}
+
+        {items.map((row, i) => {
+          const idx = off + i
+
+          return (
+            <Text color={skillIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+              {skillIdx === idx ? '▸ ' : '  '}
+              {i + 1}. {row}
+            </Text>
+          )
+        })}
+
+        {off + VISIBLE < skills.length && <Text color={t.color.dim}> ↓ {skills.length - off - VISIBLE} more</Text>}
+        <Text color={t.color.dim}>
+          {skills.length ? '↑/↓ select · Enter open · 1-9,0 quick · Esc back' : 'Esc back'}
+        </Text>
+      </Box>
+    )
+  }
+
+  return (
+    <Box flexDirection="column">
+      <Text bold color={t.color.amber}>
+        {info?.name ?? skillName}
+      </Text>
+
+      <Text color={t.color.dim}>{info?.category ?? selectedCat}</Text>
+      {info?.description ? <Text color={t.color.cornsilk}>{info.description}</Text> : null}
+      {info?.path ? <Text color={t.color.dim}>path: {info.path}</Text> : null}
+      {!info && !err ? <Text color={t.color.dim}>loading…</Text> : null}
+      {err ? <Text color={t.color.label}>error: {err}</Text> : null}
+      {installing ? <Text color={t.color.amber}>installing…</Text> : null}
+
+      <Text color={t.color.dim}>Enter install · i inspect · x install · Esc back</Text>
+    </Box>
+  )
+}
+
+interface SkillInfo {
+  category?: string
+  description?: string
+  name?: string
+  path?: string
+}
+
+interface SkillsHubProps {
+  gw: GatewayClient
+  onClose: () => void
+  t: Theme
+}

From 949b8f5521a6fc98d472f58aa9be3dedaa90e1d3 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:29:39 -0500
Subject: [PATCH 035/143] feat(tui): register /skills slash command to open
 Skills Hub

Intercept bare /skills locally and flip overlay.skillsHub, so the
overlay opens instantly without waiting on slash.exec. /skills <args>
still forwards to slash.exec and paginates any output. Tests cover
both branches.
---
 .../src/__tests__/createSlashHandler.test.ts  | 20 ++++++++++++++++
 ui-tui/src/app/slash/commands/ops.ts          | 24 ++++++++++++++++++-
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts
index 9e1db994634..c54a659b94c 100644
--- a/ui-tui/src/__tests__/createSlashHandler.test.ts
+++ b/ui-tui/src/__tests__/createSlashHandler.test.ts
@@ -17,6 +17,26 @@ describe('createSlashHandler', () => {
     expect(getOverlayState().picker).toBe(true)
   })
 
+  it('opens the skills hub locally for bare /skills', () => {
+    const ctx = buildCtx()
+
+    expect(createSlashHandler(ctx)('/skills')).toBe(true)
+    expect(getOverlayState().skillsHub).toBe(true)
+    expect(ctx.gateway.rpc).not.toHaveBeenCalled()
+    expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
+  })
+
+  it('falls through /skills with args to slash.exec without opening overlay', () => {
+    const ctx = buildCtx()
+
+    expect(createSlashHandler(ctx)('/skills install foo')).toBe(true)
+    expect(getOverlayState().skillsHub).toBe(false)
+    expect(ctx.gateway.rpc).toHaveBeenCalledWith('slash.exec', {
+      command: 'skills install foo',
+      session_id: null
+    })
+  })
+
   it('cycles details mode and persists it', async () => {
     const ctx = buildCtx()
 
diff --git a/ui-tui/src/app/slash/commands/ops.ts b/ui-tui/src/app/slash/commands/ops.ts
index 979e1f470aa..aa02fa6cbbb 100644
--- a/ui-tui/src/app/slash/commands/ops.ts
+++ b/ui-tui/src/app/slash/commands/ops.ts
@@ -1,7 +1,29 @@
-import type { ToolsConfigureResponse } from '../../../gatewayTypes.js'
+import type { SlashExecResponse, ToolsConfigureResponse } from '../../../gatewayTypes.js'
+import { patchOverlayState } from '../../overlayStore.js'
 import type { SlashCommand } from '../types.js'
 
 export const opsCommands: SlashCommand[] = [
+  {
+    help: 'browse, inspect, and install skills',
+    name: 'skills',
+    run: (arg, ctx) => {
+      if (!arg.trim()) {
+        return patchOverlayState({ skillsHub: true })
+      }
+
+      ctx.gateway
+        .rpc<SlashExecResponse>('slash.exec', { command: `skills ${arg}`, session_id: ctx.sid })
+        .then(
+          ctx.guarded<SlashExecResponse>(r => {
+            if (r.output) {
+              ctx.transcript.page(r.output, 'Skills')
+            }
+          })
+        )
+        .catch(ctx.guardedErr)
+    }
+  },
+
   {
     help: 'enable or disable tools (client-side history reset on change)',
     name: 'tools',

From 5e148ca3d03f70d13b2f97d45f57d8664c2f7d55 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:46:36 -0500
Subject: [PATCH 036/143] fix(tui): route /skills subcommands through
 skills.manage instead of curses slash.exec
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

/skills install, inspect, search, browse, list now call the typed skills.manage RPC
and render results via panel/page. Previously they fell through to slash.exec which
invokes v1's curses code path — that hangs or crashes inside the Ink worker per the
§2 parity-audit finding.

Also drop Enter-as-install from the Skills Hub action stage since the Hub lists
locally installed skills; primary action is inspect-and-close. x still triggers a
manual reinstall for power users.
---
 .../src/__tests__/createSlashHandler.test.ts  |  46 ++++-
 ui-tui/src/app/slash/commands/ops.ts          | 158 ++++++++++++++++--
 ui-tui/src/components/skillsHub.tsx           |  16 +-
 3 files changed, 198 insertions(+), 22 deletions(-)

diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts
index c54a659b94c..67aa27f768c 100644
--- a/ui-tui/src/__tests__/createSlashHandler.test.ts
+++ b/ui-tui/src/__tests__/createSlashHandler.test.ts
@@ -26,17 +26,55 @@ describe('createSlashHandler', () => {
     expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
   })
 
-  it('falls through /skills with args to slash.exec without opening overlay', () => {
+  it('routes /skills install <name> to skills.manage without opening overlay', () => {
     const ctx = buildCtx()
 
     expect(createSlashHandler(ctx)('/skills install foo')).toBe(true)
     expect(getOverlayState().skillsHub).toBe(false)
-    expect(ctx.gateway.rpc).toHaveBeenCalledWith('slash.exec', {
-      command: 'skills install foo',
-      session_id: null
+    expect(ctx.gateway.rpc).toHaveBeenCalledWith('skills.manage', {
+      action: 'install',
+      query: 'foo'
     })
   })
 
+  it('routes /skills inspect <name> to skills.manage', () => {
+    const ctx = buildCtx()
+
+    createSlashHandler(ctx)('/skills inspect my-skill')
+    expect(ctx.gateway.rpc).toHaveBeenCalledWith('skills.manage', {
+      action: 'inspect',
+      query: 'my-skill'
+    })
+  })
+
+  it('routes /skills search <query> to skills.manage', () => {
+    const ctx = buildCtx()
+
+    createSlashHandler(ctx)('/skills search vibe')
+    expect(ctx.gateway.rpc).toHaveBeenCalledWith('skills.manage', {
+      action: 'search',
+      query: 'vibe'
+    })
+  })
+
+  it('routes /skills browse [page] to skills.manage with a numeric page', () => {
+    const ctx = buildCtx()
+
+    createSlashHandler(ctx)('/skills browse 3')
+    expect(ctx.gateway.rpc).toHaveBeenCalledWith('skills.manage', {
+      action: 'browse',
+      page: 3
+    })
+  })
+
+  it('shows usage for an unknown /skills subcommand', () => {
+    const ctx = buildCtx()
+
+    createSlashHandler(ctx)('/skills zzz')
+    expect(ctx.gateway.rpc).not.toHaveBeenCalled()
+    expect(ctx.transcript.sys).toHaveBeenCalledWith(expect.stringContaining('usage: /skills'))
+  })
+
   it('cycles details mode and persists it', async () => {
     const ctx = buildCtx()
 
diff --git a/ui-tui/src/app/slash/commands/ops.ts b/ui-tui/src/app/slash/commands/ops.ts
index aa02fa6cbbb..d941c5af410 100644
--- a/ui-tui/src/app/slash/commands/ops.ts
+++ b/ui-tui/src/app/slash/commands/ops.ts
@@ -1,26 +1,158 @@
-import type { SlashExecResponse, ToolsConfigureResponse } from '../../../gatewayTypes.js'
+import type { ToolsConfigureResponse } from '../../../gatewayTypes.js'
+import type { PanelSection } from '../../../types.js'
 import { patchOverlayState } from '../../overlayStore.js'
 import type { SlashCommand } from '../types.js'
 
+interface SkillInfo {
+  category?: string
+  description?: string
+  name?: string
+  path?: string
+}
+
+interface SkillsListResponse {
+  skills?: Record<string, string[]>
+}
+
+interface SkillsInspectResponse {
+  info?: SkillInfo
+}
+
+interface SkillsSearchResponse {
+  results?: { description?: string; name: string }[]
+}
+
+interface SkillsInstallResponse {
+  installed?: boolean
+  name?: string
+}
+
 export const opsCommands: SlashCommand[] = [
   {
-    help: 'browse, inspect, and install skills',
+    help: 'browse, inspect, install skills',
     name: 'skills',
     run: (arg, ctx) => {
-      if (!arg.trim()) {
+      const text = arg.trim()
+
+      if (!text) {
         return patchOverlayState({ skillsHub: true })
       }
 
-      ctx.gateway
-        .rpc<SlashExecResponse>('slash.exec', { command: `skills ${arg}`, session_id: ctx.sid })
-        .then(
-          ctx.guarded<SlashExecResponse>(r => {
-            if (r.output) {
-              ctx.transcript.page(r.output, 'Skills')
-            }
-          })
-        )
-        .catch(ctx.guardedErr)
+      const [sub, ...rest] = text.split(/\s+/)
+      const query = rest.join(' ').trim()
+      const { rpc } = ctx.gateway
+      const { page, panel, sys } = ctx.transcript
+
+      if (sub === 'list') {
+        rpc<SkillsListResponse>('skills.manage', { action: 'list' })
+          .then(
+            ctx.guarded<SkillsListResponse>(r => {
+              const cats = Object.entries(r.skills ?? {}).sort()
+
+              if (!cats.length) {
+                return sys('no skills available')
+              }
+
+              panel(
+                'Skills',
+                cats.map<PanelSection>(([title, items]) => ({ items, title }))
+              )
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      if (sub === 'inspect') {
+        if (!query) {
+          return sys('usage: /skills inspect <name>')
+        }
+
+        rpc<SkillsInspectResponse>('skills.manage', { action: 'inspect', query })
+          .then(
+            ctx.guarded<SkillsInspectResponse>(r => {
+              const info = r.info ?? {}
+
+              if (!info.name) {
+                return sys(`unknown skill: ${query}`)
+              }
+
+              const rows: [string, string][] = [
+                ['Name', String(info.name)],
+                ['Category', String(info.category ?? '')],
+                ['Path', String(info.path ?? '')]
+              ]
+
+              const sections: PanelSection[] = [{ rows }]
+
+              if (info.description) {
+                sections.push({ text: String(info.description) })
+              }
+
+              panel('Skill', sections)
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      if (sub === 'search') {
+        if (!query) {
+          return sys('usage: /skills search <query>')
+        }
+
+        rpc<SkillsSearchResponse>('skills.manage', { action: 'search', query })
+          .then(
+            ctx.guarded<SkillsSearchResponse>(r => {
+              const results = r.results ?? []
+
+              if (!results.length) {
+                return sys(`no results for: ${query}`)
+              }
+
+              panel(`Search: ${query}`, [{ rows: results.map(s => [s.name, s.description ?? '']) }])
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      if (sub === 'install') {
+        if (!query) {
+          return sys('usage: /skills install <name or url>')
+        }
+
+        sys(`installing ${query}…`)
+
+        rpc<SkillsInstallResponse>('skills.manage', { action: 'install', query })
+          .then(
+            ctx.guarded<SkillsInstallResponse>(r =>
+              sys(r.installed ? `installed ${r.name ?? query}` : 'install failed')
+            )
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      if (sub === 'browse') {
+        const pageNum = parseInt(query, 10) || 1
+
+        rpc<Record<string, unknown>>('skills.manage', { action: 'browse', page: pageNum })
+          .then(
+            ctx.guarded<Record<string, unknown>>(r =>
+              page(JSON.stringify(r, null, 2).slice(0, 4000), `Browse Skills — p${pageNum}`)
+            )
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      sys('usage: /skills [list | inspect <n> | install <n> | search <q> | browse [page]]')
     }
   },
 
diff --git a/ui-tui/src/components/skillsHub.tsx b/ui-tui/src/components/skillsHub.tsx
index 03ed3d92f37..877bb0ef384 100644
--- a/ui-tui/src/components/skillsHub.tsx
+++ b/ui-tui/src/components/skillsHub.tsx
@@ -89,10 +89,16 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
     }
 
     if (stage === 'actions') {
-      if (key.return || ch.toLowerCase() === 'x') {
-        if (skillName) {
-          install(skillName)
-        }
+      if (key.return) {
+        setStage('skill')
+        setInfo(null)
+        setErr('')
+
+        return
+      }
+
+      if (ch.toLowerCase() === 'x' && skillName) {
+        install(skillName)
 
         return
       }
@@ -271,7 +277,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
       {err ? <Text color={t.color.label}>error: {err}</Text> : null}
       {installing ? <Text color={t.color.amber}>installing…</Text> : null}
 
-      <Text color={t.color.dim}>Enter install · i inspect · x install · Esc back</Text>
+      <Text color={t.color.dim}>i reinspect · x reinstall · Enter/Esc back</Text>
     </Box>
   )
 }

From f8becbfbeab87b35424bf4c636a3b192a2072e5d Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 09:48:38 -0500
Subject: [PATCH 037/143] feat(tui): per-language syntax highlighting in
 markdown code fences
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a minimal hand-rolled highlighter for ts/js/jsx/tsx, py, sh/bash, go, rust,
json, yaml, sql. Recognizes whole-line comments, single/double/backtick strings,
numbers, and per-language keyword sets. Unknown langs fall through to the current
plain rendering; the existing diff-specific colorization is preserved.

Closes the §8 "Markdown syntax highlighting is missing (only diff gets colored)"
finding from the TUI v2 audit without pulling in a highlighter library.
---
 ui-tui/src/__tests__/syntax.test.ts |  45 +++++++++++
 ui-tui/src/components/markdown.tsx  |  18 +++++
 ui-tui/src/lib/syntax.ts            | 117 ++++++++++++++++++++++++++++
 3 files changed, 180 insertions(+)
 create mode 100644 ui-tui/src/__tests__/syntax.test.ts
 create mode 100644 ui-tui/src/lib/syntax.ts

diff --git a/ui-tui/src/__tests__/syntax.test.ts b/ui-tui/src/__tests__/syntax.test.ts
new file mode 100644
index 00000000000..505988b2abf
--- /dev/null
+++ b/ui-tui/src/__tests__/syntax.test.ts
@@ -0,0 +1,45 @@
+import { describe, expect, it } from 'vitest'
+
+import { highlightLine, isHighlightable } from '../lib/syntax.js'
+import { DEFAULT_THEME } from '../theme.js'
+
+const t = DEFAULT_THEME
+
+describe('syntax highlighter', () => {
+  it('recognizes supported langs and aliases', () => {
+    expect(isHighlightable('ts')).toBe(true)
+    expect(isHighlightable('js')).toBe(true)
+    expect(isHighlightable('python')).toBe(true)
+    expect(isHighlightable('rs')).toBe(true)
+    expect(isHighlightable('bash')).toBe(true)
+    expect(isHighlightable('whatever')).toBe(false)
+    expect(isHighlightable('')).toBe(false)
+  })
+
+  it('paints a whole-line comment dim', () => {
+    const tokens = highlightLine('// hello', 'ts', t)
+
+    expect(tokens).toEqual([[t.color.dim, '// hello']])
+  })
+
+  it('paints keywords, strings, and numbers in a ts line', () => {
+    const tokens = highlightLine(`const x = 'hi' + 42`, 'ts', t)
+    const colors = tokens.map(tok => tok[0])
+
+    expect(colors).toContain(t.color.bronze) // const
+    expect(colors).toContain(t.color.amber) // 'hi'
+    expect(colors).toContain(t.color.cornsilk) // 42
+  })
+
+  it('falls through unchanged for unknown langs', () => {
+    const tokens = highlightLine(`const x = 1`, 'zzz', t)
+
+    expect(tokens).toEqual([['', 'const x = 1']])
+  })
+
+  it('treats `#` as a python comment, not a selector', () => {
+    const tokens = highlightLine('# comment', 'py', t)
+
+    expect(tokens).toEqual([[t.color.dim, '# comment']])
+  })
+})
diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx
index 865ab857960..d43357b6918 100644
--- a/ui-tui/src/components/markdown.tsx
+++ b/ui-tui/src/components/markdown.tsx
@@ -1,6 +1,7 @@
 import { Box, Text } from '@hermes/ink'
 import { memo, type ReactNode, useMemo } from 'react'
 
+import { highlightLine, isHighlightable } from '../lib/syntax.js'
 import type { Theme } from '../theme.js'
 
 const FENCE_RE = /^\s*(`{3,}|~{3,})(.*)$/
@@ -282,11 +283,28 @@ function MdImpl({ compact, t, text }: MdProps) {
         start('code')
 
         const isDiff = lang === 'diff'
+        const highlighted = !isDiff && isHighlightable(lang)
 
         nodes.push(
           <Box flexDirection="column" key={key} paddingLeft={2}>
             {lang && !isDiff && <Text color={t.color.dim}>{'─ ' + lang}</Text>}
             {block.map((l, j) => {
+              if (highlighted) {
+                return (
+                  <Text key={j}>
+                    {highlightLine(l, lang, t).map(([color, text], k) =>
+                      color ? (
+                        <Text color={color} key={k}>
+                          {text}
+                        </Text>
+                      ) : (
+                        <Text key={k}>{text}</Text>
+                      )
+                    )}
+                  </Text>
+                )
+              }
+
               const add = isDiff && l.startsWith('+')
               const del = isDiff && l.startsWith('-')
               const hunk = isDiff && l.startsWith('@@')
diff --git a/ui-tui/src/lib/syntax.ts b/ui-tui/src/lib/syntax.ts
new file mode 100644
index 00000000000..06173b63e9f
--- /dev/null
+++ b/ui-tui/src/lib/syntax.ts
@@ -0,0 +1,117 @@
+import type { Theme } from '../theme.js'
+
+export type Token = [string, string]
+
+interface LangSpec {
+  comment: null | string
+  keywords: Set<string>
+}
+
+const KW = (s: string) => new Set(s.split(/\s+/).filter(Boolean))
+
+const TS = KW(`
+  abstract as async await break case catch class const continue debugger default delete do else enum export extends
+  false finally for from function get if implements import in instanceof interface is let new null of package private
+  protected public readonly return set static super switch this throw true try type typeof undefined var void while
+  with yield
+`)
+
+const PY = KW(`
+  False None True and as assert async await break class continue def del elif else except finally for from global if
+  import in is lambda nonlocal not or pass raise return try while with yield
+`)
+
+const SH = KW(`
+  if then else elif fi for in do done while until case esac function return break continue local export readonly
+  declare typeset
+`)
+
+const GO = KW(`
+  break case chan const continue default defer else fallthrough for func go goto if import interface map package range
+  return select struct switch type var nil true false
+`)
+
+const RUST = KW(`
+  as async await break const continue crate dyn else enum extern false fn for if impl in let loop match mod move mut
+  pub ref return self Self static struct super trait true type unsafe use where while yield
+`)
+
+const SQL = KW(`
+  select from where and or not in is null as by group order limit offset insert into values update set delete create
+  table drop alter add column primary key foreign references join left right inner outer on
+`)
+
+const LANGS: Record<string, LangSpec> = {
+  go: { comment: '//', keywords: GO },
+  json: { comment: null, keywords: KW('true false null') },
+  py: { comment: '#', keywords: PY },
+  rust: { comment: '//', keywords: RUST },
+  sh: { comment: '#', keywords: SH },
+  sql: { comment: '--', keywords: SQL },
+  ts: { comment: '//', keywords: TS },
+  yaml: { comment: '#', keywords: KW('true false null yes no on off') }
+}
+
+const ALIAS: Record<string, string> = {
+  bash: 'sh',
+  javascript: 'ts',
+  js: 'ts',
+  jsx: 'ts',
+  python: 'py',
+  rs: 'rust',
+  shell: 'sh',
+  tsx: 'ts',
+  typescript: 'ts',
+  yml: 'yaml',
+  zsh: 'sh'
+}
+
+const resolve = (lang: string): LangSpec | null => LANGS[ALIAS[lang] ?? lang] ?? null
+
+export const isHighlightable = (lang: string): boolean => resolve(lang) !== null
+
+const TOKEN_RE = /'(?:[^'\\]|\\.)*'|"(?:[^"\\]|\\.)*"|`(?:[^`\\]|\\.)*`|\b\d+(?:\.\d+)?\b|[A-Za-z_$][\w$]*/g
+
+export function highlightLine(line: string, lang: string, t: Theme): Token[] {
+  const spec = resolve(lang)
+
+  if (!spec) {
+    return [['', line]]
+  }
+
+  if (spec.comment && line.trimStart().startsWith(spec.comment)) {
+    return [[t.color.dim, line]]
+  }
+
+  const tokens: Token[] = []
+  let last = 0
+
+  for (const m of line.matchAll(TOKEN_RE)) {
+    const start = m.index ?? 0
+
+    if (start > last) {
+      tokens.push(['', line.slice(last, start)])
+    }
+
+    const tok = m[0]
+    const ch = tok[0]!
+
+    if (ch === '"' || ch === "'" || ch === '`') {
+      tokens.push([t.color.amber, tok])
+    } else if (ch >= '0' && ch <= '9') {
+      tokens.push([t.color.cornsilk, tok])
+    } else if (spec.keywords.has(tok)) {
+      tokens.push([t.color.bronze, tok])
+    } else {
+      tokens.push(['', tok])
+    }
+
+    last = start + tok.length
+  }
+
+  if (last < line.length) {
+    tokens.push(['', line.slice(last)])
+  }
+
+  return tokens
+}

From 8a0c774e9efd771c317e6f158a080ea19267182b Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Sat, 18 Apr 2026 08:25:39 -0700
Subject: [PATCH 038/143] Add web dashboard build to Nix flake (#12194)

The web dashboard (Vite/React frontend) is now built as a separate Nix
derivation and baked into the Hermes package. The build output is
installed to a standard location and exposed via the `HERMES_WEB_DIST`
environment variable, allowing the dashboard command to use pre-built
assets when available (e.g., in packaged releases) instead of rebuilding
on every invocation.
---
 hermes_cli/main.py       |  5 ++--
 hermes_cli/web_server.py |  2 +-
 nix/packages.nix         |  7 +++++
 nix/web.nix              | 63 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 3 deletions(-)
 create mode 100644 nix/web.nix

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index a13a6f88ee9..ce02c2e72c4 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -6229,8 +6229,9 @@ def cmd_dashboard(args):
         print(f"Install them with:  {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'")
         sys.exit(1)
 
-    if not _build_web_ui(PROJECT_ROOT / "web", fatal=True):
-        sys.exit(1)
+    if "HERMES_WEB_DIST" not in os.environ:
+        if not _build_web_ui(PROJECT_ROOT / "web", fatal=True):
+            sys.exit(1)
 
     from hermes_cli.web_server import start_server
 
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index 0d0dc4a66b5..110b81e4b5e 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -59,7 +59,7 @@ except ImportError:
         f"Install with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'"
     )
 
-WEB_DIST = Path(__file__).parent / "web_dist"
+WEB_DIST = Path(os.environ["HERMES_WEB_DIST"]) if "HERMES_WEB_DIST" in os.environ else Path(__file__).parent / "web_dist"
 _log = logging.getLogger(__name__)
 
 app = FastAPI(title="Hermes Agent", version=__version__)
diff --git a/nix/packages.nix b/nix/packages.nix
index 968ad12fb71..94e84af6d87 100644
--- a/nix/packages.nix
+++ b/nix/packages.nix
@@ -18,6 +18,10 @@
         filter = path: _type: !(pkgs.lib.hasInfix "/index-cache/" path);
       };
 
+      hermesWeb = pkgs.callPackage ./web.nix {
+        npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
+      };
+
       runtimeDeps = with pkgs; [
         nodejs_22
         ripgrep
@@ -52,6 +56,7 @@
 
             mkdir -p $out/share/hermes-agent $out/bin
             cp -r ${bundledSkills} $out/share/hermes-agent/skills
+            cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
 
             # copy pre-built TUI (same layout as dev: ui-tui/dist/ + node_modules/)
             mkdir -p $out/ui-tui
@@ -62,6 +67,7 @@
                 makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
                   --suffix PATH : "${runtimePath}" \
                   --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
+                  --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
                   --set HERMES_TUI_DIR $out/ui-tui \
                   --set HERMES_PYTHON ${hermesVenv}/bin/python3 \
                   --set HERMES_NODE ${pkgs.nodejs_22}/bin/node
@@ -104,6 +110,7 @@
         };
 
         tui = hermesTui;
+        web = hermesWeb;
       };
     };
 }
diff --git a/nix/web.nix b/nix/web.nix
new file mode 100644
index 00000000000..247889753f6
--- /dev/null
+++ b/nix/web.nix
@@ -0,0 +1,63 @@
+# nix/web.nix — Hermes Web Dashboard (Vite/React) frontend build
+{ pkgs, npm-lockfile-fix, ... }:
+let
+  src = ../web;
+  npmDeps = pkgs.fetchNpmDeps {
+    inherit src;
+    hash = "sha256-Y0pOzdFG8BLjfvCLmsvqYpjxFjAQabXp1i7X9W/cCU4=";
+  };
+
+  npmLockHash = builtins.hashString "sha256" (builtins.readFile ../web/package-lock.json);
+in
+pkgs.buildNpmPackage {
+  pname = "hermes-web";
+  version = "0.0.0";
+  inherit src npmDeps;
+
+  doCheck = false;
+
+  buildPhase = ''
+    npx tsc -b
+    npx vite build --outDir dist
+  '';
+
+  installPhase = ''
+    runHook preInstall
+    cp -r dist $out
+    runHook postInstall
+  '';
+
+  nativeBuildInputs = [
+    (pkgs.writeShellScriptBin "update_web_lockfile" ''
+      set -euox pipefail
+
+      REPO_ROOT=$(git rev-parse --show-toplevel)
+
+      cd "$REPO_ROOT/web"
+      rm -rf node_modules/
+      npm cache clean --force
+      CI=true npm install
+      ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
+
+      NIX_FILE="$REPO_ROOT/nix/web.nix"
+      sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
+      NIX_OUTPUT=$(nix build .#web 2>&1 || true)
+      NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}')
+      echo got new hash $NEW_HASH
+      sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
+      nix build .#web
+      echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
+    '')
+  ];
+
+  passthru.devShellHook = ''
+    STAMP=".nix-stamps/hermes-web"
+    STAMP_VALUE="${npmLockHash}"
+    if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
+      echo "hermes-web: installing npm dependencies..."
+      cd web && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd ..
+      mkdir -p .nix-stamps
+      echo "$STAMP_VALUE" > "$STAMP"
+    fi
+  '';
+}

From b0efdf37d783e4e5345bc3687557a48b4504c1d3 Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Sat, 18 Apr 2026 09:21:03 -0700
Subject: [PATCH 039/143] =?UTF-8?q?fix(nix):=20upgrade=20Python=203.11=20?=
 =?UTF-8?q?=E2=86=92=203.12,=20add=20cross-platform=20eval=20check=20(#122?=
 =?UTF-8?q?08)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 nix/checks.nix       | 25 ++++++++++++++++++++++++-
 nix/devShell.nix     |  2 +-
 nix/nixosModules.nix |  7 +++----
 nix/packages.nix     |  2 +-
 nix/python.nix       | 20 +++++++++++---------
 5 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/nix/checks.nix b/nix/checks.nix
index ff8e7947c57..984016a4f47 100644
--- a/nix/checks.nix
+++ b/nix/checks.nix
@@ -37,7 +37,30 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
     in {
       packages.configKeys = configKeys;
 
-      checks = lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux {
+      checks = {
+        # Cross-platform evaluation — catches "not supported for interpreter"
+        # errors (e.g. sphinx dropping python311) without needing a darwin builder.
+        # Evaluation is pure and instant; it doesn't build anything.
+        cross-eval = let
+          targetSystems = builtins.filter
+            (s: inputs.self.packages ? ${s})
+            [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" "x86_64-darwin" ];
+          tryEvalPkg = sys:
+            let pkg = inputs.self.packages.${sys}.default;
+            in builtins.tryEval (builtins.seq pkg.drvPath true);
+          results = map (sys: { inherit sys; result = tryEvalPkg sys; }) targetSystems;
+          failures = builtins.filter (r: !r.result.success) results;
+          failMsg = lib.concatMapStringsSep "\n" (r: "  - ${r.sys}") failures;
+        in pkgs.runCommand "hermes-cross-eval" { } (
+          if failures != [] then
+            builtins.throw "Package fails to evaluate on:\n${failMsg}"
+          else ''
+            echo "PASS: package evaluates on all ${toString (builtins.length targetSystems)} platforms"
+            mkdir -p $out
+            echo "ok" > $out/result
+          ''
+        );
+      } // lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux {
         # Verify binaries exist and are executable
         package-contents = pkgs.runCommand "hermes-package-contents" { } ''
           set -e
diff --git a/nix/devShell.nix b/nix/devShell.nix
index db39c9d9557..63edc59cf1e 100644
--- a/nix/devShell.nix
+++ b/nix/devShell.nix
@@ -12,7 +12,7 @@
       devShells.default = pkgs.mkShell {
         inputsFrom = packages;
         packages = with pkgs; [
-          python311 uv nodejs_22 ripgrep git openssh ffmpeg
+          python312 uv nodejs_22 ripgrep git openssh ffmpeg
         ];
 
         shellHook = let
diff --git a/nix/nixosModules.nix b/nix/nixosModules.nix
index 24a2a1b6ddc..3f2709f8145 100644
--- a/nix/nixosModules.nix
+++ b/nix/nixosModules.nix
@@ -148,15 +148,14 @@
         su -s /bin/sh "$TARGET_USER" -c 'curl -LsSf https://astral.sh/uv/install.sh | sh' || true
       fi
 
-      # Python 3.11 venv — gives the agent a writable Python with pip.
-      # Uses uv to install Python 3.11 (Ubuntu 24.04 ships 3.12).
+      # Python 3.12 venv — gives the agent a writable Python with pip.
       # --seed includes pip/setuptools so bare `pip install` works.
       _UV_BIN="$TARGET_HOME/.local/bin/uv"
       if [ ! -d "$TARGET_HOME/.venv" ] && [ -x "$_UV_BIN" ]; then
         su -s /bin/sh "$TARGET_USER" -c "
           export PATH=\"\$HOME/.local/bin:\$PATH\"
-          uv python install 3.11
-          uv venv --python 3.11 --seed \"\$HOME/.venv\"
+          uv python install 3.12
+          uv venv --python 3.12 --seed \"\$HOME/.venv\"
         " || true
       fi
 
diff --git a/nix/packages.nix b/nix/packages.nix
index 94e84af6d87..912be7843bd 100644
--- a/nix/packages.nix
+++ b/nix/packages.nix
@@ -87,7 +87,7 @@
             STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
             if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
               echo "hermes-agent: installing Python dependencies..."
-              uv venv .venv --python ${pkgs.python311}/bin/python3 2>/dev/null || true
+              uv venv .venv --python ${pkgs.python312}/bin/python3 2>/dev/null || true
               source .venv/bin/activate
               uv pip install -e ".[all]"
               [ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
diff --git a/nix/python.nix b/nix/python.nix
index 91411f4d754..0bcd017e76d 100644
--- a/nix/python.nix
+++ b/nix/python.nix
@@ -1,6 +1,6 @@
 # nix/python.nix — uv2nix virtual environment builder
 {
-  python311,
+  python312,
   lib,
   callPackage,
   uv2nix,
@@ -51,28 +51,30 @@ let
 
   pythonPackageOverrides = final: _prev:
     if isAarch64Darwin then {
-      numpy = mkPrebuiltOverride final python311.pkgs.numpy { };
+      numpy = mkPrebuiltOverride final python312.pkgs.numpy { };
 
-      av = mkPrebuiltOverride final python311.pkgs.av { };
+      pyarrow = mkPrebuiltOverride final python312.pkgs.pyarrow { };
 
-      humanfriendly = mkPrebuiltOverride final python311.pkgs.humanfriendly { };
+      av = mkPrebuiltOverride final python312.pkgs.av { };
 
-      coloredlogs = mkPrebuiltOverride final python311.pkgs.coloredlogs {
+      humanfriendly = mkPrebuiltOverride final python312.pkgs.humanfriendly { };
+
+      coloredlogs = mkPrebuiltOverride final python312.pkgs.coloredlogs {
         humanfriendly = [ ];
       };
 
-      onnxruntime = mkPrebuiltOverride final python311.pkgs.onnxruntime {
+      onnxruntime = mkPrebuiltOverride final python312.pkgs.onnxruntime {
         coloredlogs = [ ];
         numpy = [ ];
         packaging = [ ];
       };
 
-      ctranslate2 = mkPrebuiltOverride final python311.pkgs.ctranslate2 {
+      ctranslate2 = mkPrebuiltOverride final python312.pkgs.ctranslate2 {
         numpy = [ ];
         pyyaml = [ ];
       };
 
-      faster-whisper = mkPrebuiltOverride final python311.pkgs.faster-whisper {
+      faster-whisper = mkPrebuiltOverride final python312.pkgs.faster-whisper {
         av = [ ];
         ctranslate2 = [ ];
         huggingface-hub = [ ];
@@ -84,7 +86,7 @@ let
 
   pythonSet =
     (callPackage pyproject-nix.build.packages {
-      python = python311;
+      python = python312;
     }).overrideScope
       (lib.composeManyExtensions [
         pyproject-build-systems.overlays.default

From 2da558ec36ea7c8743f0e686488af57da8be1634 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sat, 18 Apr 2026 17:36:06 +0530
Subject: [PATCH 040/143] fix(tui): clickable hyperlinks and skill slash
 command dispatch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two TUI fixes:

1. Hyperlinks are now clickable (Cmd+Click / Ctrl+Click) in terminals
   that support OSC 8.  The markdown renderer was rendering links as
   plain colored text — now wraps them in the existing <Link> component
   from @hermes/ink which emits OSC 8 escape sequences.

2. Skill slash commands (e.g. /hermes-agent-dev) now work in the TUI.
   The slash.exec handler was delegating to the _SlashWorker subprocess
   which calls cli.process_command().  For skills, process_command()
   queues the invocation message onto _pending_input — a Queue that
   nobody reads in the worker subprocess.  The skill message was lost.
   Now slash.exec detects skill commands early and rejects them so
   the TUI falls through to command.dispatch, which correctly builds
   and returns the skill payload for the client to send().
---
 tests/tui_gateway/test_protocol.py            | 48 +++++++++++++++++++
 tui_gateway/server.py                         | 13 +++++
 .../src/__tests__/createSlashHandler.test.ts  | 31 ++++++++++++
 ui-tui/src/components/markdown.tsx            | 22 +++++----
 ui-tui/src/types/hermes-ink.d.ts              |  1 +
 5 files changed, 106 insertions(+), 9 deletions(-)

diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py
index 6ee5fe65b65..77cd7b1678d 100644
--- a/tests/tui_gateway/test_protocol.py
+++ b/tests/tui_gateway/test_protocol.py
@@ -231,3 +231,51 @@ def test_cli_exec_blocked(server, argv):
 ])
 def test_cli_exec_allowed(server, argv):
     assert server._cli_exec_blocked(argv) is None
+
+
+# ── slash.exec skill command interception ────────────────────────────
+
+
+def test_slash_exec_rejects_skill_commands(server):
+    """slash.exec must reject skill commands so the TUI falls through to command.dispatch."""
+    # Register a mock session
+    sid = "test-session"
+    server._sessions[sid] = {"session_key": sid, "agent": None}
+
+    # Mock scan_skill_commands to return a known skill
+    fake_skills = {"/hermes-agent-dev": {"name": "hermes-agent-dev", "description": "Dev workflow"}}
+
+    with patch("agent.skill_commands.scan_skill_commands", return_value=fake_skills):
+        resp = server.handle_request({
+            "id": "r1",
+            "method": "slash.exec",
+            "params": {"command": "hermes-agent-dev", "session_id": sid},
+        })
+
+    # Should return an error so the TUI's .catch() fires command.dispatch
+    assert "error" in resp
+    assert resp["error"]["code"] == 4018
+    assert "skill command" in resp["error"]["message"]
+
+
+def test_command_dispatch_returns_skill_payload(server):
+    """command.dispatch returns structured skill payload for the TUI to send()."""
+    sid = "test-session"
+    server._sessions[sid] = {"session_key": sid}
+
+    fake_skills = {"/hermes-agent-dev": {"name": "hermes-agent-dev", "description": "Dev workflow"}}
+    fake_msg = "Loaded skill content here"
+
+    with patch("agent.skill_commands.scan_skill_commands", return_value=fake_skills), \
+         patch("agent.skill_commands.build_skill_invocation_message", return_value=fake_msg):
+        resp = server.handle_request({
+            "id": "r2",
+            "method": "command.dispatch",
+            "params": {"name": "hermes-agent-dev", "session_id": sid},
+        })
+
+    assert "error" not in resp
+    result = resp["result"]
+    assert result["type"] == "skill"
+    assert result["message"] == fake_msg
+    assert result["name"] == "hermes-agent-dev"
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index a7dae9e5c60..45c95a6dabe 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -2333,6 +2333,19 @@ def _(rid, params: dict) -> dict:
     if not cmd:
         return _err(rid, 4004, "empty command")
 
+    # Skill slash commands (e.g. /hermes-agent-dev) must NOT go through the
+    # slash worker — process_command() queues the skill payload onto
+    # _pending_input which nobody reads in the worker subprocess.  Reject
+    # here so the TUI falls through to command.dispatch which handles skills
+    # correctly (builds the invocation message and returns it to the client).
+    try:
+        from agent.skill_commands import scan_skill_commands
+        _cmd_key = f"/{cmd.split()[0]}" if not cmd.startswith("/") else f"/{cmd.lstrip('/').split()[0]}"
+        if _cmd_key in scan_skill_commands():
+            return _err(rid, 4018, f"skill command: use command.dispatch for {_cmd_key}")
+    except Exception:
+        pass
+
     worker = session.get("slash_worker")
     if not worker:
         try:
diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts
index 9e1db994634..a8f050a27da 100644
--- a/ui-tui/src/__tests__/createSlashHandler.test.ts
+++ b/ui-tui/src/__tests__/createSlashHandler.test.ts
@@ -121,6 +121,37 @@ describe('createSlashHandler', () => {
     expect(createSlashHandler(ctx)('/h')).toBe(true)
     expect(ctx.transcript.panel).toHaveBeenCalledWith(expect.any(String), expect.any(Array))
   })
+
+  it('falls through to command.dispatch for skill commands and sends the message', async () => {
+    const skillMessage = 'Use this skill to do X.\n\n## Steps\n1. First step'
+
+    const ctx = buildCtx({
+      gateway: {
+        gw: {
+          getLogTail: vi.fn(() => ''),
+          request: vi.fn((method: string) => {
+            if (method === 'slash.exec') {
+              return Promise.reject(new Error('skill command: use command.dispatch'))
+            }
+
+            if (method === 'command.dispatch') {
+              return Promise.resolve({ type: 'skill', message: skillMessage, name: 'hermes-agent-dev' })
+            }
+
+            return Promise.resolve({})
+          })
+        },
+        rpc: vi.fn(() => Promise.resolve({}))
+      }
+    })
+
+    const h = createSlashHandler(ctx)
+    expect(h('/hermes-agent-dev')).toBe(true)
+    await vi.waitFor(() => {
+      expect(ctx.transcript.sys).toHaveBeenCalledWith('⚡ loading skill: hermes-agent-dev')
+    })
+    expect(ctx.transcript.send).toHaveBeenCalledWith(skillMessage)
+  })
 })
 
 const buildCtx = (overrides: Partial<Ctx> = {}): Ctx => ({
diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx
index 865ab857960..4555c8505f6 100644
--- a/ui-tui/src/components/markdown.tsx
+++ b/ui-tui/src/components/markdown.tsx
@@ -1,4 +1,4 @@
-import { Box, Text } from '@hermes/ink'
+import { Box, Link, Text } from '@hermes/ink'
 import { memo, type ReactNode, useMemo } from 'react'
 
 import type { Theme } from '../theme.js'
@@ -22,10 +22,12 @@ type Fence = {
   len: number
 }
 
-const renderLink = (key: number, t: Theme, label: string) => (
-  <Text color={t.color.amber} key={key} underline>
-    {label}
-  </Text>
+const renderLink = (key: number, t: Theme, label: string, url: string) => (
+  <Link key={key} url={url}>
+    <Text color={t.color.amber} underline>
+      {label}
+    </Text>
+  </Link>
 )
 
 const trimBareUrl = (value: string) => {
@@ -38,9 +40,11 @@ const trimBareUrl = (value: string) => {
 }
 
 const renderAutolink = (key: number, t: Theme, raw: string) => (
-  <Text color={t.color.amber} key={key} underline>
-    {raw.replace(/^mailto:/, '')}
-  </Text>
+  <Link key={key} url={raw}>
+    <Text color={t.color.amber} underline>
+      {raw.replace(/^mailto:/, '')}
+    </Text>
+  </Link>
 )
 
 const indentDepth = (indent: string) => Math.floor(indent.replace(/\t/g, '  ').length / 2)
@@ -141,7 +145,7 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
         </Text>
       )
     } else if (m[4] && m[5]) {
-      parts.push(renderLink(parts.length, t, m[4]))
+      parts.push(renderLink(parts.length, t, m[4], m[5]))
     } else if (m[6]) {
       parts.push(renderAutolink(parts.length, t, m[6]))
     } else if (m[7]) {
diff --git a/ui-tui/src/types/hermes-ink.d.ts b/ui-tui/src/types/hermes-ink.d.ts
index 9b2deec35ff..faab71ae93d 100644
--- a/ui-tui/src/types/hermes-ink.d.ts
+++ b/ui-tui/src/types/hermes-ink.d.ts
@@ -63,6 +63,7 @@ declare module '@hermes/ink' {
   export const Box: React.ComponentType<any>
   export const AlternateScreen: React.ComponentType<any>
   export const Ansi: React.ComponentType<any>
+  export const Link: React.ComponentType<{ readonly url: string; readonly children?: React.ReactNode; readonly fallback?: React.ReactNode }>
   export const NoSelect: React.ComponentType<any>
   export const ScrollBox: React.ComponentType<any>
   export const Text: React.ComponentType<any>

From abc95338c210a587c2b718d62a02dbf9c87076d1 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sat, 18 Apr 2026 17:52:19 +0530
Subject: [PATCH 041/143] fix(tui): slash.exec _pending_input commands, tool
 ANSI, terminal title
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Additional TUI fixes discovered in the same audit:

1. /plan slash command was silently lost — process_command() queues the
   plan skill invocation onto _pending_input which nobody reads in the
   slash worker subprocess.  Now intercepted in slash.exec and routed
   through command.dispatch with a new 'send' dispatch type.

   Same interception added for /retry, /queue, /steer as safety nets
   (these already have correct TUI-local handlers in core.ts, but the
   server-side guard prevents regressions if the local handler is
   bypassed).

2. Tool results were stripping ANSI escape codes — the messageLine
   component used stripAnsi() + plain <Text> for tool role messages,
   losing all color/styling from terminal, search_files, etc.  Now
   uses <Ansi> component (already imported) when ANSI is detected.

3. Terminal tab title now shows model + busy status via useTerminalTitle
   hook from @hermes/ink (was never used).  Users can identify Hermes
   tabs and see at a glance whether the agent is busy or ready.

4. Added 'send' variant to CommandDispatchResponse type + asCommandDispatch
   parser + createSlashHandler handler for commands that need to inject
   a message into the conversation (plan, queue fallback, steer fallback).
---
 tests/tui_gateway/test_protocol.py            | 66 +++++++++++++++++++
 tui_gateway/server.py                         | 66 ++++++++++++++++++-
 .../src/__tests__/asCommandDispatch.test.ts   |  8 ++-
 .../src/__tests__/createSlashHandler.test.ts  | 30 +++++++++
 ui-tui/src/app/createSlashHandler.ts          |  4 ++
 ui-tui/src/app/useMainApp.ts                  |  9 ++-
 ui-tui/src/components/messageLine.tsx         | 14 ++--
 ui-tui/src/gatewayTypes.ts                    |  1 +
 ui-tui/src/lib/rpc.ts                         |  4 ++
 ui-tui/src/types/hermes-ink.d.ts              |  1 +
 10 files changed, 196 insertions(+), 7 deletions(-)

diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py
index 77cd7b1678d..43f2b5a169b 100644
--- a/tests/tui_gateway/test_protocol.py
+++ b/tests/tui_gateway/test_protocol.py
@@ -258,6 +258,72 @@ def test_slash_exec_rejects_skill_commands(server):
     assert "skill command" in resp["error"]["message"]
 
 
+@pytest.mark.parametrize("cmd", ["retry", "queue hello", "q hello", "steer fix the test", "plan"])
+def test_slash_exec_rejects_pending_input_commands(server, cmd):
+    """slash.exec must reject commands that use _pending_input in the CLI."""
+    sid = "test-session"
+    server._sessions[sid] = {"session_key": sid, "agent": None}
+
+    resp = server.handle_request({
+        "id": "r1",
+        "method": "slash.exec",
+        "params": {"command": cmd, "session_id": sid},
+    })
+
+    assert "error" in resp
+    assert resp["error"]["code"] == 4018
+    assert "pending-input command" in resp["error"]["message"]
+
+
+def test_command_dispatch_queue_sends_message(server):
+    """command.dispatch /queue returns {type: 'send', message: ...} for the TUI."""
+    sid = "test-session"
+    server._sessions[sid] = {"session_key": sid}
+
+    resp = server.handle_request({
+        "id": "r1",
+        "method": "command.dispatch",
+        "params": {"name": "queue", "arg": "tell me about quantum computing", "session_id": sid},
+    })
+
+    assert "error" not in resp
+    result = resp["result"]
+    assert result["type"] == "send"
+    assert result["message"] == "tell me about quantum computing"
+
+
+def test_command_dispatch_queue_requires_arg(server):
+    """command.dispatch /queue without an argument returns an error."""
+    sid = "test-session"
+    server._sessions[sid] = {"session_key": sid}
+
+    resp = server.handle_request({
+        "id": "r2",
+        "method": "command.dispatch",
+        "params": {"name": "queue", "arg": "", "session_id": sid},
+    })
+
+    assert "error" in resp
+    assert resp["error"]["code"] == 4004
+
+
+def test_command_dispatch_steer_fallback_sends_message(server):
+    """command.dispatch /steer with no active agent falls back to send."""
+    sid = "test-session"
+    server._sessions[sid] = {"session_key": sid, "agent": None}
+
+    resp = server.handle_request({
+        "id": "r3",
+        "method": "command.dispatch",
+        "params": {"name": "steer", "arg": "focus on testing", "session_id": sid},
+    })
+
+    assert "error" not in resp
+    result = resp["result"]
+    assert result["type"] == "send"
+    assert result["message"] == "focus on testing"
+
+
 def test_command_dispatch_returns_skill_payload(server):
     """command.dispatch returns structured skill payload for the TUI to send()."""
     sid = "test-session"
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 45c95a6dabe..bf8425a8d1a 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -2117,6 +2117,56 @@ def _(rid, params: dict) -> dict:
     except Exception:
         pass
 
+    # ── Commands that queue messages onto _pending_input in the CLI ───
+    # In the TUI the slash worker subprocess has no reader for that queue,
+    # so we handle them here and return a structured payload.
+
+    if name in ("queue", "q"):
+        if not arg:
+            return _err(rid, 4004, "usage: /queue <prompt>")
+        return _ok(rid, {"type": "send", "message": arg})
+
+    if name == "retry":
+        agent = session.get("agent") if session else None
+        if agent and hasattr(agent, "conversation_history"):
+            hist = agent.conversation_history or []
+            for m in reversed(hist):
+                if m.get("role") == "user":
+                    content = m.get("content", "")
+                    if isinstance(content, list):
+                        content = " ".join(
+                            p.get("text", "") for p in content if isinstance(p, dict) and p.get("type") == "text"
+                        )
+                    if content:
+                        return _ok(rid, {"type": "send", "message": content})
+            return _err(rid, 4018, "no previous user message to retry")
+        return _err(rid, 4018, "no active session to retry")
+
+    if name == "steer":
+        if not arg:
+            return _err(rid, 4004, "usage: /steer <prompt>")
+        agent = session.get("agent") if session else None
+        if agent and hasattr(agent, "steer"):
+            try:
+                accepted = agent.steer(arg)
+                if accepted:
+                    return _ok(rid, {"type": "exec", "output": f"⏩ Steer queued — arrives after the next tool call: {arg[:80]}{'...' if len(arg) > 80 else ''}"})
+            except Exception:
+                pass
+        # Fallback: no active run, treat as next-turn message
+        return _ok(rid, {"type": "send", "message": arg})
+
+    if name == "plan":
+        try:
+            from agent.skill_commands import build_skill_invocation_message as _bsim, build_plan_path
+            plan_path = build_plan_path(session.get("session_key", "") if session else "")
+            msg = _bsim("/plan", f"{arg} {plan_path}".strip() if arg else plan_path,
+                        task_id=session.get("session_key", "") if session else "")
+            if msg:
+                return _ok(rid, {"type": "send", "message": msg})
+        except Exception as e:
+            return _err(rid, 5030, f"plan skill failed: {e}")
+
     return _err(rid, 4018, f"not a quick/plugin/skill command: {name}")
 
 
@@ -2338,9 +2388,23 @@ def _(rid, params: dict) -> dict:
     # _pending_input which nobody reads in the worker subprocess.  Reject
     # here so the TUI falls through to command.dispatch which handles skills
     # correctly (builds the invocation message and returns it to the client).
+    #
+    # The same applies to /retry, /queue, /steer, and /plan — they all
+    # put messages on _pending_input that the slash worker never reads.
+    # (/browser connect/disconnect also uses _pending_input for context
+    # notes, but the actual browser operations need the slash worker's
+    # env-var side effects, so they stay in slash.exec — only the context
+    # note to the model is lost, which is low-severity.)
+    _PENDING_INPUT_COMMANDS = frozenset({"retry", "queue", "q", "steer", "plan"})
+    _cmd_parts = cmd.split() if not cmd.startswith("/") else cmd.lstrip("/").split()
+    _cmd_base = _cmd_parts[0] if _cmd_parts else ""
+
+    if _cmd_base in _PENDING_INPUT_COMMANDS:
+        return _err(rid, 4018, f"pending-input command: use command.dispatch for /{_cmd_base}")
+
     try:
         from agent.skill_commands import scan_skill_commands
-        _cmd_key = f"/{cmd.split()[0]}" if not cmd.startswith("/") else f"/{cmd.lstrip('/').split()[0]}"
+        _cmd_key = f"/{_cmd_base}"
         if _cmd_key in scan_skill_commands():
             return _err(rid, 4018, f"skill command: use command.dispatch for {_cmd_key}")
     except Exception:
diff --git a/ui-tui/src/__tests__/asCommandDispatch.test.ts b/ui-tui/src/__tests__/asCommandDispatch.test.ts
index 49ea56936c5..dfa7595174e 100644
--- a/ui-tui/src/__tests__/asCommandDispatch.test.ts
+++ b/ui-tui/src/__tests__/asCommandDispatch.test.ts
@@ -3,7 +3,7 @@ import { describe, expect, it } from 'vitest'
 import { asCommandDispatch } from '../lib/rpc.js'
 
 describe('asCommandDispatch', () => {
-  it('parses exec, alias, and skill', () => {
+  it('parses exec, alias, skill, and send', () => {
     expect(asCommandDispatch({ type: 'exec', output: 'hi' })).toEqual({ type: 'exec', output: 'hi' })
     expect(asCommandDispatch({ type: 'alias', target: 'help' })).toEqual({ type: 'alias', target: 'help' })
     expect(asCommandDispatch({ type: 'skill', name: 'x', message: 'do' })).toEqual({
@@ -11,11 +11,17 @@ describe('asCommandDispatch', () => {
       name: 'x',
       message: 'do'
     })
+    expect(asCommandDispatch({ type: 'send', message: 'hello world' })).toEqual({
+      type: 'send',
+      message: 'hello world'
+    })
   })
 
   it('rejects malformed payloads', () => {
     expect(asCommandDispatch(null)).toBeNull()
     expect(asCommandDispatch({ type: 'alias' })).toBeNull()
     expect(asCommandDispatch({ type: 'skill', name: 1 })).toBeNull()
+    expect(asCommandDispatch({ type: 'send' })).toBeNull()
+    expect(asCommandDispatch({ type: 'send', message: 42 })).toBeNull()
   })
 })
diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts
index a8f050a27da..53a10fd8e02 100644
--- a/ui-tui/src/__tests__/createSlashHandler.test.ts
+++ b/ui-tui/src/__tests__/createSlashHandler.test.ts
@@ -152,6 +152,36 @@ describe('createSlashHandler', () => {
     })
     expect(ctx.transcript.send).toHaveBeenCalledWith(skillMessage)
   })
+
+  it('handles send-type dispatch for /plan command', async () => {
+    const planMessage = 'Plan skill content loaded'
+
+    const ctx = buildCtx({
+      gateway: {
+        gw: {
+          getLogTail: vi.fn(() => ''),
+          request: vi.fn((method: string) => {
+            if (method === 'slash.exec') {
+              return Promise.reject(new Error('pending-input command'))
+            }
+
+            if (method === 'command.dispatch') {
+              return Promise.resolve({ type: 'send', message: planMessage })
+            }
+
+            return Promise.resolve({})
+          })
+        },
+        rpc: vi.fn(() => Promise.resolve({}))
+      }
+    })
+
+    const h = createSlashHandler(ctx)
+    expect(h('/plan create a REST API')).toBe(true)
+    await vi.waitFor(() => {
+      expect(ctx.transcript.send).toHaveBeenCalledWith(planMessage)
+    })
+  })
 })
 
 const buildCtx = (overrides: Partial<Ctx> = {}): Ctx => ({
diff --git a/ui-tui/src/app/createSlashHandler.ts b/ui-tui/src/app/createSlashHandler.ts
index 87475341aea..425e778ef3d 100644
--- a/ui-tui/src/app/createSlashHandler.ts
+++ b/ui-tui/src/app/createSlashHandler.ts
@@ -105,6 +105,10 @@ export function createSlashHandler(ctx: SlashHandlerContext): (cmd: string) => b
 
               return d.message?.trim() ? send(d.message) : sys(`/${parsed.name}: skill payload missing message`)
             }
+
+            if (d.type === 'send') {
+              return d.message?.trim() ? send(d.message) : sys(`/${parsed.name}: empty message`)
+            }
           })
           .catch(guardedErr)
       })
diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts
index 73ea9febdac..46ab21c725a 100644
--- a/ui-tui/src/app/useMainApp.ts
+++ b/ui-tui/src/app/useMainApp.ts
@@ -1,4 +1,4 @@
-import { type ScrollBoxHandle, useApp, useHasSelection, useSelection, useStdout } from '@hermes/ink'
+import { type ScrollBoxHandle, useApp, useHasSelection, useSelection, useStdout, useTerminalTitle } from '@hermes/ink'
 import { useStore } from '@nanostores/react'
 import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 
@@ -284,6 +284,13 @@ export function useMainApp(gw: GatewayClient) {
 
   useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid: ui.sid })
 
+  // ── Terminal tab title ─────────────────────────────────────────────
+  // Show model name + status so users can identify the Hermes tab.
+  const shortModel = ui.info?.model?.replace(/^.*\//, '') ?? ''
+  const titleStatus = ui.busy ? '⏳' : '✓'
+  const terminalTitle = shortModel ? `${titleStatus} ${shortModel} — Hermes` : 'Hermes'
+  useTerminalTitle(terminalTitle)
+
   useEffect(() => {
     if (!ui.sid || !stdout) {
       return
diff --git a/ui-tui/src/components/messageLine.tsx b/ui-tui/src/components/messageLine.tsx
index 59db604e4bd..9cf78c15901 100644
--- a/ui-tui/src/components/messageLine.tsx
+++ b/ui-tui/src/components/messageLine.tsx
@@ -28,12 +28,18 @@ export const MessageLine = memo(function MessageLine({
   }
 
   if (msg.role === 'tool') {
+    const preview = compactPreview(hasAnsi(msg.text) ? stripAnsi(msg.text) : msg.text, Math.max(24, cols - 14)) ||
+      '(empty tool result)'
+
     return (
       <Box alignSelf="flex-start" borderColor={t.color.dim} borderStyle="round" marginLeft={3} paddingX={1}>
-        <Text color={t.color.dim} wrap="truncate-end">
-          {compactPreview(hasAnsi(msg.text) ? stripAnsi(msg.text) : msg.text, Math.max(24, cols - 14)) ||
-            '(empty tool result)'}
-        </Text>
+        {hasAnsi(msg.text) ? (
+          <Ansi>{compactPreview(msg.text, Math.max(24, cols - 14)) || '(empty tool result)'}</Ansi>
+        ) : (
+          <Text color={t.color.dim} wrap="truncate-end">
+            {preview}
+          </Text>
+        )}
       </Box>
     )
   }
diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts
index c8d1c685523..e17e0e7c718 100644
--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@@ -47,6 +47,7 @@ export type CommandDispatchResponse =
   | { output?: string; type: 'exec' | 'plugin' }
   | { target: string; type: 'alias' }
   | { message?: string; name: string; type: 'skill' }
+  | { message: string; type: 'send' }
 
 // ── Config ───────────────────────────────────────────────────────────
 
diff --git a/ui-tui/src/lib/rpc.ts b/ui-tui/src/lib/rpc.ts
index 1697d142bbf..70faa4bbbe1 100644
--- a/ui-tui/src/lib/rpc.ts
+++ b/ui-tui/src/lib/rpc.ts
@@ -26,6 +26,10 @@ export const asCommandDispatch = (value: unknown): CommandDispatchResponse | nul
     return { type: 'skill', name: o.name, message: typeof o.message === 'string' ? o.message : undefined }
   }
 
+  if (t === 'send' && typeof o.message === 'string') {
+    return { type: 'send', message: o.message }
+  }
+
   return null
 }
 
diff --git a/ui-tui/src/types/hermes-ink.d.ts b/ui-tui/src/types/hermes-ink.d.ts
index faab71ae93d..6815e4211b7 100644
--- a/ui-tui/src/types/hermes-ink.d.ts
+++ b/ui-tui/src/types/hermes-ink.d.ts
@@ -93,6 +93,7 @@ declare module '@hermes/ink' {
   export function useHasSelection(): boolean
   export function useStdout(): { readonly stdout?: NodeJS.WriteStream }
   export function useTerminalFocus(): boolean
+  export function useTerminalTitle(title: string | null): void
   export function useDeclaredCursor(args: {
     readonly line: number
     readonly column: number

From 656c375855f7ec331c43d4c796881b02ed2a5218 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sat, 18 Apr 2026 21:54:24 +0530
Subject: [PATCH 042/143] =?UTF-8?q?fix(tui):=20review=20follow-up=20?=
 =?UTF-8?q?=E2=80=94=20/retry,=20/plan,=20ANSI=20truncation,=20caching?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- /retry: use session['history'] instead of non-existent
  agent.conversation_history; truncate history at last user message
  to match CLI retry_last() behavior; add history_lock safety
- /plan: pass user instruction (arg) to build_plan_path instead of
  session_key; add runtime_note so agent knows where to save the plan
- ANSI tool results: render full text via <Ansi wrap=truncate-end>
  instead of slicing raw ANSI through compactPreview (which cuts
  mid-escape-sequence producing garbled output)
- Move _PENDING_INPUT_COMMANDS frozenset to module level
- Use get_skill_commands() (cached) instead of scan_skill_commands()
  (rescans disk) in slash.exec skill interception
- Add 3 retry tests: happy path with history truncation verification,
  empty history error, multipart content extraction
- Update test mock target from scan_skill_commands to get_skill_commands
---
 tests/tui_gateway/test_protocol.py    | 86 ++++++++++++++++++++++++++-
 tui_gateway/server.py                 | 73 ++++++++++++++---------
 ui-tui/src/components/messageLine.tsx |  7 ++-
 3 files changed, 135 insertions(+), 31 deletions(-)

diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py
index 43f2b5a169b..eb51cccfecb 100644
--- a/tests/tui_gateway/test_protocol.py
+++ b/tests/tui_gateway/test_protocol.py
@@ -245,7 +245,7 @@ def test_slash_exec_rejects_skill_commands(server):
     # Mock scan_skill_commands to return a known skill
     fake_skills = {"/hermes-agent-dev": {"name": "hermes-agent-dev", "description": "Dev workflow"}}
 
-    with patch("agent.skill_commands.scan_skill_commands", return_value=fake_skills):
+    with patch("agent.skill_commands.get_skill_commands", return_value=fake_skills):
         resp = server.handle_request({
             "id": "r1",
             "method": "slash.exec",
@@ -324,6 +324,90 @@ def test_command_dispatch_steer_fallback_sends_message(server):
     assert result["message"] == "focus on testing"
 
 
+def test_command_dispatch_retry_finds_last_user_message(server):
+    """command.dispatch /retry walks session['history'] to find the last user message."""
+    sid = "test-session"
+    history = [
+        {"role": "user", "content": "first question"},
+        {"role": "assistant", "content": "first answer"},
+        {"role": "user", "content": "second question"},
+        {"role": "assistant", "content": "second answer"},
+    ]
+    server._sessions[sid] = {
+        "session_key": sid,
+        "agent": None,
+        "history": history,
+        "history_lock": threading.Lock(),
+        "history_version": 0,
+    }
+
+    resp = server.handle_request({
+        "id": "r4",
+        "method": "command.dispatch",
+        "params": {"name": "retry", "session_id": sid},
+    })
+
+    assert "error" not in resp
+    result = resp["result"]
+    assert result["type"] == "send"
+    assert result["message"] == "second question"
+    # Verify history was truncated: everything from last user message onward removed
+    assert len(server._sessions[sid]["history"]) == 2
+    assert server._sessions[sid]["history"][-1]["role"] == "assistant"
+    assert server._sessions[sid]["history_version"] == 1
+
+
+def test_command_dispatch_retry_empty_history(server):
+    """command.dispatch /retry with empty history returns error."""
+    sid = "test-session"
+    server._sessions[sid] = {
+        "session_key": sid,
+        "agent": None,
+        "history": [],
+        "history_lock": threading.Lock(),
+        "history_version": 0,
+    }
+
+    resp = server.handle_request({
+        "id": "r5",
+        "method": "command.dispatch",
+        "params": {"name": "retry", "session_id": sid},
+    })
+
+    assert "error" in resp
+    assert resp["error"]["code"] == 4018
+
+
+def test_command_dispatch_retry_handles_multipart_content(server):
+    """command.dispatch /retry extracts text from multipart content lists."""
+    sid = "test-session"
+    history = [
+        {"role": "user", "content": [
+            {"type": "text", "text": "analyze this"},
+            {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
+        ]},
+        {"role": "assistant", "content": "I see the image."},
+    ]
+    server._sessions[sid] = {
+        "session_key": sid,
+        "agent": None,
+        "history": history,
+        "history_lock": threading.Lock(),
+        "history_version": 0,
+    }
+
+    resp = server.handle_request({
+        "id": "r6",
+        "method": "command.dispatch",
+        "params": {"name": "retry", "session_id": sid},
+    })
+
+    assert "error" not in resp
+    result = resp["result"]
+    assert result["type"] == "send"
+    assert result["message"] == "analyze this"
+
+
 def test_command_dispatch_returns_skill_payload(server):
     """command.dispatch returns structured skill payload for the TUI to send()."""
     sid = "test-session"
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index bf8425a8d1a..ccb9f7260bf 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1949,6 +1949,13 @@ _TUI_EXTRA: list[tuple[str, str, str]] = [
     ("/logs", "Show recent gateway log lines", "TUI"),
 ]
 
+# Commands that queue messages onto _pending_input in the CLI.
+# In the TUI the slash worker subprocess has no reader for that queue,
+# so slash.exec rejects them → TUI falls through to command.dispatch.
+_PENDING_INPUT_COMMANDS: frozenset[str] = frozenset({
+    "retry", "queue", "q", "steer", "plan",
+})
+
 
 @method("commands.catalog")
 def _(rid, params: dict) -> dict:
@@ -2127,20 +2134,32 @@ def _(rid, params: dict) -> dict:
         return _ok(rid, {"type": "send", "message": arg})
 
     if name == "retry":
-        agent = session.get("agent") if session else None
-        if agent and hasattr(agent, "conversation_history"):
-            hist = agent.conversation_history or []
-            for m in reversed(hist):
-                if m.get("role") == "user":
-                    content = m.get("content", "")
-                    if isinstance(content, list):
-                        content = " ".join(
-                            p.get("text", "") for p in content if isinstance(p, dict) and p.get("type") == "text"
-                        )
-                    if content:
-                        return _ok(rid, {"type": "send", "message": content})
+        if not session:
+            return _err(rid, 4001, "no active session to retry")
+        history = session.get("history", [])
+        if not history:
             return _err(rid, 4018, "no previous user message to retry")
-        return _err(rid, 4018, "no active session to retry")
+        # Walk backwards to find the last user message
+        last_user_idx = None
+        for i in range(len(history) - 1, -1, -1):
+            if history[i].get("role") == "user":
+                last_user_idx = i
+                break
+        if last_user_idx is None:
+            return _err(rid, 4018, "no previous user message to retry")
+        content = history[last_user_idx].get("content", "")
+        if isinstance(content, list):
+            content = " ".join(
+                p.get("text", "") for p in content if isinstance(p, dict) and p.get("type") == "text"
+            )
+        if not content:
+            return _err(rid, 4018, "last user message is empty")
+        # Truncate history: remove everything from the last user message onward
+        # (mirrors CLI retry_last() which strips the failed exchange)
+        with session["history_lock"]:
+            session["history"] = history[:last_user_idx]
+            session["history_version"] = int(session.get("history_version", 0)) + 1
+        return _ok(rid, {"type": "send", "message": content})
 
     if name == "steer":
         if not arg:
@@ -2159,9 +2178,16 @@ def _(rid, params: dict) -> dict:
     if name == "plan":
         try:
             from agent.skill_commands import build_skill_invocation_message as _bsim, build_plan_path
-            plan_path = build_plan_path(session.get("session_key", "") if session else "")
-            msg = _bsim("/plan", f"{arg} {plan_path}".strip() if arg else plan_path,
-                        task_id=session.get("session_key", "") if session else "")
+            user_instruction = arg or ""
+            plan_path = build_plan_path(user_instruction)
+            msg = _bsim(
+                "/plan", user_instruction,
+                task_id=session.get("session_key", "") if session else "",
+                runtime_note=(
+                    "Save the markdown plan with write_file to this exact relative path "
+                    f"inside the active workspace/backend cwd: {plan_path}"
+                ),
+            )
             if msg:
                 return _ok(rid, {"type": "send", "message": msg})
         except Exception as e:
@@ -2383,19 +2409,12 @@ def _(rid, params: dict) -> dict:
     if not cmd:
         return _err(rid, 4004, "empty command")
 
-    # Skill slash commands (e.g. /hermes-agent-dev) must NOT go through the
-    # slash worker — process_command() queues the skill payload onto
-    # _pending_input which nobody reads in the worker subprocess.  Reject
-    # here so the TUI falls through to command.dispatch which handles skills
-    # correctly (builds the invocation message and returns it to the client).
-    #
-    # The same applies to /retry, /queue, /steer, and /plan — they all
-    # put messages on _pending_input that the slash worker never reads.
+    # Skill slash commands and _pending_input commands must NOT go through the
+    # slash worker — see _PENDING_INPUT_COMMANDS definition above.
     # (/browser connect/disconnect also uses _pending_input for context
     # notes, but the actual browser operations need the slash worker's
     # env-var side effects, so they stay in slash.exec — only the context
     # note to the model is lost, which is low-severity.)
-    _PENDING_INPUT_COMMANDS = frozenset({"retry", "queue", "q", "steer", "plan"})
     _cmd_parts = cmd.split() if not cmd.startswith("/") else cmd.lstrip("/").split()
     _cmd_base = _cmd_parts[0] if _cmd_parts else ""
 
@@ -2403,9 +2422,9 @@ def _(rid, params: dict) -> dict:
         return _err(rid, 4018, f"pending-input command: use command.dispatch for /{_cmd_base}")
 
     try:
-        from agent.skill_commands import scan_skill_commands
+        from agent.skill_commands import get_skill_commands
         _cmd_key = f"/{_cmd_base}"
-        if _cmd_key in scan_skill_commands():
+        if _cmd_key in get_skill_commands():
             return _err(rid, 4018, f"skill command: use command.dispatch for {_cmd_key}")
     except Exception:
         pass
diff --git a/ui-tui/src/components/messageLine.tsx b/ui-tui/src/components/messageLine.tsx
index 9cf78c15901..9de6f2aa12b 100644
--- a/ui-tui/src/components/messageLine.tsx
+++ b/ui-tui/src/components/messageLine.tsx
@@ -28,13 +28,14 @@ export const MessageLine = memo(function MessageLine({
   }
 
   if (msg.role === 'tool') {
-    const preview = compactPreview(hasAnsi(msg.text) ? stripAnsi(msg.text) : msg.text, Math.max(24, cols - 14)) ||
-      '(empty tool result)'
+    const maxChars = Math.max(24, cols - 14)
+    const stripped = hasAnsi(msg.text) ? stripAnsi(msg.text) : msg.text
+    const preview = compactPreview(stripped, maxChars) || '(empty tool result)'
 
     return (
       <Box alignSelf="flex-start" borderColor={t.color.dim} borderStyle="round" marginLeft={3} paddingX={1}>
         {hasAnsi(msg.text) ? (
-          <Ansi>{compactPreview(msg.text, Math.max(24, cols - 14)) || '(empty tool result)'}</Ansi>
+          <Text wrap="truncate-end"><Ansi>{msg.text}</Ansi></Text>
         ) : (
           <Text color={t.color.dim} wrap="truncate-end">
             {preview}

From c14b3b58806e7abd01d9ee01e4ff218c01590cd0 Mon Sep 17 00:00:00 2001
From: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sat, 18 Apr 2026 09:35:51 -0700
Subject: [PATCH 043/143] fix(kimi): force fixed temperature on kimi-k2.*
 models (k2.5, thinking, turbo) (#12144)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(kimi): force fixed temperature on kimi-k2.* models (k2.5, thinking, turbo)

The prior override only matched the literal model name "kimi-for-coding",
but Moonshot's coding endpoint is hit with real model IDs such as
`kimi-k2.5`, `kimi-k2-turbo-preview`, `kimi-k2-thinking`, etc.  Those
requests bypassed the override and kept the caller's temperature, so
Moonshot returns HTTP 400 "invalid temperature: only 0.6 is allowed for
this model" (or 1.0 for thinking variants).

Match the whole kimi-k2.* family:
  * kimi-k2-thinking / kimi-k2-thinking-turbo -> 1.0 (thinking mode)
  * all other kimi-k2.* -> 0.6 (non-thinking / instant mode)

Also accept an optional vendor prefix (e.g. `moonshotai/kimi-k2.5`) so
aggregator routings are covered.

* refactor(kimi): whitelist-match kimi coding models instead of prefix

Addresses review feedback on PR #12144.

- Replace `startswith("kimi-k2")` with explicit frozensets sourced from
  Moonshot's kimi-for-coding model list.  The prefix match would have also
  clamped `kimi-k2-instruct` / `kimi-k2-instruct-0905`, which are the
  separate non-coding K2 family with variable temperature (recommended 0.6
  but not enforced — see huggingface.co/moonshotai/Kimi-K2-Instruct).
- Confirmed via platform.kimi.ai docs that all five coding models
  (k2.5, k2-turbo-preview, k2-0905-preview, k2-thinking, k2-thinking-turbo)
  share the fixed-temperature lock, so the preview-model mapping is no
  longer an assumption.
- Drop the fragile `"thinking" in bare` substring test for a set lookup.
- Log a debug line on each override so operators can see when Hermes
  silently rewrites temperature.
- Update class docstring.  Extend the negative test to parametrize over
  kimi-k2-instruct, Kimi-K2-Instruct-0905, and a hypothetical future
  kimi-k2-experimental name — all must keep the caller's temperature.
---
 agent/auxiliary_client.py            | 41 +++++++++++++++++++--
 tests/agent/test_auxiliary_client.py | 54 ++++++++++++++++++++++++++--
 2 files changed, 90 insertions(+), 5 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 568d6109220..126f4615ddb 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -99,11 +99,48 @@ _FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
     "kimi-for-coding": 0.6,
 }
 
+# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
+# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
+# value 0.6.  Any other value will result in an error."  The same lock applies
+# to the other k2.* models served on that endpoint.  Enumerated explicitly so
+# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
+# the standard chat API and third parties) are NOT clamped.
+# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
+_KIMI_INSTANT_MODELS: frozenset = frozenset({
+    "kimi-k2.5",
+    "kimi-k2-turbo-preview",
+    "kimi-k2-0905-preview",
+})
+_KIMI_THINKING_MODELS: frozenset = frozenset({
+    "kimi-k2-thinking",
+    "kimi-k2-thinking-turbo",
+})
+
 
 def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
-    """Return a required temperature override for models with strict contracts."""
+    """Return a required temperature override for models with strict contracts.
+
+    Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
+    the k2.5 family.  Non-thinking variants require exactly 0.6; thinking
+    variants require 1.0.  An optional ``vendor/`` prefix (e.g.
+    ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
+
+    Returns ``None`` for every other model, including ``kimi-k2-instruct*``
+    which is the separate non-coding K2 family with variable temperature.
+    """
     normalized = (model or "").strip().lower()
-    return _FIXED_TEMPERATURE_MODELS.get(normalized)
+    fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
+    if fixed is not None:
+        logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
+        return fixed
+    bare = normalized.rsplit("/", 1)[-1]
+    if bare in _KIMI_THINKING_MODELS:
+        logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
+        return 1.0
+    if bare in _KIMI_INSTANT_MODELS:
+        logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
+        return 0.6
+    return None
 
 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
 _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 1778855ddd7..aea8152a53e 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -697,7 +697,12 @@ class TestIsConnectionError:
 
 
 class TestKimiForCodingTemperature:
-    """kimi-for-coding now requires temperature=0.6 exactly."""
+    """Moonshot kimi-for-coding models require fixed temperatures.
+
+    k2.5 / k2-turbo-preview / k2-0905-preview → 0.6 (non-thinking lock).
+    k2-thinking / k2-thinking-turbo → 1.0 (thinking lock).
+    kimi-k2-instruct* and every other model preserve the caller's temperature.
+    """
 
     def test_build_call_kwargs_forces_fixed_temperature(self):
         from agent.auxiliary_client import _build_call_kwargs
@@ -772,12 +777,55 @@ class TestKimiForCodingTemperature:
         assert kwargs["model"] == "kimi-for-coding"
         assert kwargs["temperature"] == 0.6
 
-    def test_non_kimi_model_still_preserves_temperature(self):
+    @pytest.mark.parametrize(
+        "model,expected",
+        [
+            ("kimi-k2.5", 0.6),
+            ("kimi-k2-turbo-preview", 0.6),
+            ("kimi-k2-0905-preview", 0.6),
+            ("kimi-k2-thinking", 1.0),
+            ("kimi-k2-thinking-turbo", 1.0),
+            ("moonshotai/kimi-k2.5", 0.6),
+            ("moonshotai/Kimi-K2-Thinking", 1.0),
+        ],
+    )
+    def test_kimi_k2_family_temperature_override(self, model, expected):
+        """Moonshot kimi-k2.* models only accept fixed temperatures.
+
+        Non-thinking models → 0.6, thinking-mode models → 1.0.
+        """
         from agent.auxiliary_client import _build_call_kwargs
 
         kwargs = _build_call_kwargs(
             provider="kimi-coding",
-            model="kimi-k2.5",
+            model=model,
+            messages=[{"role": "user", "content": "hello"}],
+            temperature=0.3,
+        )
+
+        assert kwargs["temperature"] == expected
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "anthropic/claude-sonnet-4-6",
+            "gpt-5.4",
+            # kimi-k2-instruct is the non-coding K2 family — temperature is
+            # variable (recommended 0.6 but not enforced).  Must not clamp.
+            "kimi-k2-instruct",
+            "moonshotai/Kimi-K2-Instruct",
+            "moonshotai/Kimi-K2-Instruct-0905",
+            "kimi-k2-instruct-0905",
+            # Hypothetical future kimi name not in the whitelist.
+            "kimi-k2-experimental",
+        ],
+    )
+    def test_non_restricted_model_preserves_temperature(self, model):
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider="openrouter",
+            model=model,
             messages=[{"role": "user", "content": "hello"}],
             temperature=0.3,
         )

From b0bde98b0fb17c0015481e2f38b655f0a07558fa Mon Sep 17 00:00:00 2001
From: bluefishs <125471205+bluefishs@users.noreply.github.com>
Date: Sun, 19 Apr 2026 00:50:24 +0800
Subject: [PATCH 044/143] fix(docker): build web/ dashboard assets in image
 (#12180)

The Dockerfile installs root-level npm dependencies (for Playwright) and the
whatsapp-bridge bundle, but never builds the web/ Vite project. As a result,
'hermes dashboard' starts FastAPI on :9119 but serves a broken SPA because
hermes_cli/web_dist/ is empty and requests to /assets/index-<hash>.js 404.

Add a build step inside web/ so the Vite output is baked into the image.

Reproduce (before):
  docker build -t hermes-repro -f Dockerfile .
  docker run --rm -p 9119:9119 hermes-repro hermes dashboard
  curl -sI http://localhost:9119/assets/ | head -1   # -> 404

After: /assets/ returns the built asset path.
---
 Dockerfile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 37038233262..4f88a303d43 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -31,6 +31,12 @@ RUN npm install --prefer-offline --no-audit && \
     npm install --prefer-offline --no-audit && \
     npm cache clean --force
 
+# Build the web/ dashboard so FastAPI at :9119 can serve the Vite assets
+RUN cd /opt/hermes/web && \
+    npm install --prefer-offline --no-audit && \
+    npm run build && \
+    npm cache clean --force
+
 # Hand ownership to hermes user, then install Python deps in a virtualenv
 RUN chown -R hermes:hermes /opt/hermes
 USER hermes

From a828daa7f8eb8f2969c2c46a7796845bab900d04 Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Sat, 18 Apr 2026 10:14:31 -0700
Subject: [PATCH 045/143] perf(docker): layer-cache npm/Playwright and skip
 redundant web rebuild (#12225)

* perf(docker): layer-cache npm/Playwright and skip redundant web rebuild

Copy package manifests before source so npm install + Playwright only
re-run when lockfiles change. Use COPY --chown instead of chown -R,
set HERMES_WEB_DIST to skip runtime web rebuild, and drop the
USER root / chmod dance since entrypoint.sh is already executable in git.

* Update Dockerfile
---
 Dockerfile | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 4f88a303d43..0d3da72eb77 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -21,32 +21,36 @@ RUN useradd -u 10000 -m -d /opt/data hermes
 COPY --chmod=0755 --from=gosu_source /gosu /usr/local/bin/
 COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/
 
-COPY . /opt/hermes
 WORKDIR /opt/hermes
 
-# Install Node dependencies and Playwright as root (--with-deps needs apt)
+# ---------- Layer-cached dependency install ----------
+# Copy only package manifests first so npm install + Playwright are cached
+# unless the lockfiles themselves change.
+COPY package.json package-lock.json ./
+COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/
+COPY web/package.json web/package-lock.json web/
+
 RUN npm install --prefer-offline --no-audit && \
     npx playwright install --with-deps chromium --only-shell && \
-    cd /opt/hermes/scripts/whatsapp-bridge && \
-    npm install --prefer-offline --no-audit && \
+    (cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \
+    (cd web && npm install --prefer-offline --no-audit) && \
     npm cache clean --force
 
-# Build the web/ dashboard so FastAPI at :9119 can serve the Vite assets
-RUN cd /opt/hermes/web && \
-    npm install --prefer-offline --no-audit && \
-    npm run build && \
-    npm cache clean --force
+# ---------- Source code ----------
+# .dockerignore excludes node_modules, so the installs above survive.
+COPY --chown=hermes:hermes . .
 
-# Hand ownership to hermes user, then install Python deps in a virtualenv
-RUN chown -R hermes:hermes /opt/hermes
+# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
+RUN cd web && npm run build
+
+# ---------- Python virtualenv ----------
+RUN chown hermes:hermes /opt/hermes
 USER hermes
-
 RUN uv venv && \
     uv pip install --no-cache-dir -e ".[all]"
 
-USER root
-RUN chmod +x /opt/hermes/docker/entrypoint.sh
-
+# ---------- Runtime ----------
+ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
 VOLUME [ "/opt/data" ]
 ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]

From 65c0a30a776d2d20161658d7cfa8fe8ac78627ed Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 14 Apr 2026 15:53:57 -0700
Subject: [PATCH 046/143] =?UTF-8?q?feat(skills):=20add=20baoyu-infographic?=
 =?UTF-8?q?=20skill=20=E2=80=94=2021=20layouts=20=C3=97=2021=20styles?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Port of baoyu-infographic from JimLiu/baoyu-skills (v1.56.1) adapted
for Hermes Agent's tool ecosystem.

Adaptations from upstream:
- Frontmatter: openclaw metadata → hermes metadata
- Usage: slash command syntax → natural language triggers
- Removed EXTEND.md config system (not part of Hermes infrastructure)
- AskUserQuestion → clarify tool (one question at a time)
- Image generation → image_generate tool
- Removed Windows-specific paths
- Simplified file operations to use Hermes file tools
- All 45 reference files (layouts, styles, templates) preserved intact

Attribution preserved per agreement with 宝玉 (Jim Liu):
- author, version, GitHub homepage URL in frontmatter

Co-authored-by: Jim Liu 宝玉 <junminliu@gmail.com>
---
 skills/creative/baoyu-infographic/SKILL.md    | 236 +++++++++++++++++
 .../references/analysis-framework.md          | 182 +++++++++++++
 .../references/base-prompt.md                 |  43 +++
 .../references/layouts/bento-grid.md          |  41 +++
 .../references/layouts/binary-comparison.md   |  48 ++++
 .../references/layouts/bridge.md              |  41 +++
 .../references/layouts/circular-flow.md       |  41 +++
 .../references/layouts/comic-strip.md         |  41 +++
 .../references/layouts/comparison-matrix.md   |  41 +++
 .../references/layouts/dashboard.md           |  41 +++
 .../references/layouts/dense-modules.md       |  72 ++++++
 .../references/layouts/funnel.md              |  41 +++
 .../references/layouts/hierarchical-layers.md |  48 ++++
 .../references/layouts/hub-spoke.md           |  41 +++
 .../references/layouts/iceberg.md             |  41 +++
 .../references/layouts/isometric-map.md       |  41 +++
 .../references/layouts/jigsaw.md              |  41 +++
 .../references/layouts/linear-progression.md  |  48 ++++
 .../references/layouts/periodic-table.md      |  41 +++
 .../references/layouts/story-mountain.md      |  41 +++
 .../layouts/structural-breakdown.md           |  48 ++++
 .../references/layouts/tree-branching.md      |  41 +++
 .../references/layouts/venn-diagram.md        |  41 +++
 .../references/layouts/winding-roadmap.md     |  41 +++
 .../references/structured-content-template.md | 244 ++++++++++++++++++
 .../references/styles/aged-academia.md        |  36 +++
 .../references/styles/bold-graphic.md         |  36 +++
 .../references/styles/chalkboard.md           |  61 +++++
 .../references/styles/claymation.md           |  29 +++
 .../references/styles/corporate-memphis.md    |  29 +++
 .../references/styles/craft-handmade.md       |  44 ++++
 .../references/styles/cyberpunk-neon.md       |  29 +++
 .../references/styles/hand-drawn-edu.md       |  63 +++++
 .../references/styles/ikea-manual.md          |  29 +++
 .../references/styles/kawaii.md               |  29 +++
 .../references/styles/knolling.md             |  29 +++
 .../references/styles/lego-brick.md           |  29 +++
 .../references/styles/morandi-journal.md      |  60 +++++
 .../references/styles/origami.md              |  29 +++
 .../references/styles/pixel-art.md            |  29 +++
 .../references/styles/pop-laboratory.md       |  48 ++++
 .../references/styles/retro-pop-grid.md       |  47 ++++
 .../references/styles/storybook-watercolor.md |  29 +++
 .../references/styles/subway-map.md           |  29 +++
 .../references/styles/technical-schematic.md  |  36 +++
 .../references/styles/ui-wireframe.md         |  29 +++
 46 files changed, 2404 insertions(+)
 create mode 100644 skills/creative/baoyu-infographic/SKILL.md
 create mode 100644 skills/creative/baoyu-infographic/references/analysis-framework.md
 create mode 100644 skills/creative/baoyu-infographic/references/base-prompt.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/bento-grid.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/binary-comparison.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/bridge.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/circular-flow.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/comic-strip.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/comparison-matrix.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/dashboard.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/dense-modules.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/funnel.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/hierarchical-layers.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/hub-spoke.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/iceberg.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/isometric-map.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/jigsaw.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/linear-progression.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/periodic-table.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/story-mountain.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/structural-breakdown.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/tree-branching.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/venn-diagram.md
 create mode 100644 skills/creative/baoyu-infographic/references/layouts/winding-roadmap.md
 create mode 100644 skills/creative/baoyu-infographic/references/structured-content-template.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/aged-academia.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/bold-graphic.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/chalkboard.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/claymation.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/corporate-memphis.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/craft-handmade.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/cyberpunk-neon.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/hand-drawn-edu.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/ikea-manual.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/kawaii.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/knolling.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/lego-brick.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/morandi-journal.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/origami.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/pixel-art.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/pop-laboratory.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/retro-pop-grid.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/storybook-watercolor.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/subway-map.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/technical-schematic.md
 create mode 100644 skills/creative/baoyu-infographic/references/styles/ui-wireframe.md

diff --git a/skills/creative/baoyu-infographic/SKILL.md b/skills/creative/baoyu-infographic/SKILL.md
new file mode 100644
index 00000000000..fea3499cbf4
--- /dev/null
+++ b/skills/creative/baoyu-infographic/SKILL.md
@@ -0,0 +1,236 @@
+---
+name: baoyu-infographic
+description: Generate professional infographics with 21 layout types and 21 visual styles. Analyzes content, recommends layout×style combinations, and generates publication-ready infographics. Use when user asks to create "infographic", "visual summary", "信息图", "可视化", or "高密度信息大图".
+version: 1.56.1
+author: 宝玉 (JimLiu)
+license: MIT
+metadata:
+  hermes:
+    tags: [infographic, visual-summary, creative, image-generation]
+    homepage: https://github.com/JimLiu/baoyu-skills#baoyu-infographic
+---
+
+# Infographic Generator
+
+Adapted from [baoyu-infographic](https://github.com/JimLiu/baoyu-skills) for Hermes Agent's tool ecosystem.
+
+Two dimensions: **layout** (information structure) × **style** (visual aesthetics). Freely combine any layout with any style.
+
+## When to Use
+
+Trigger this skill when the user asks to create an infographic, visual summary, information graphic, or uses terms like "信息图", "可视化", or "高密度信息大图". The user provides content (text, file path, URL, or topic) and optionally specifies layout, style, aspect ratio, or language.
+
+## Options
+
+| Option | Values |
+|--------|--------|
+| Layout | 21 options (see Layout Gallery), default: bento-grid |
+| Style | 21 options (see Style Gallery), default: craft-handmade |
+| Aspect | Named: landscape (16:9), portrait (9:16), square (1:1). Custom: any W:H ratio (e.g., 3:4, 4:3, 2.35:1) |
+| Language | en, zh, ja, etc. |
+
+## Layout Gallery
+
+| Layout | Best For |
+|--------|----------|
+| `linear-progression` | Timelines, processes, tutorials |
+| `binary-comparison` | A vs B, before-after, pros-cons |
+| `comparison-matrix` | Multi-factor comparisons |
+| `hierarchical-layers` | Pyramids, priority levels |
+| `tree-branching` | Categories, taxonomies |
+| `hub-spoke` | Central concept with related items |
+| `structural-breakdown` | Exploded views, cross-sections |
+| `bento-grid` | Multiple topics, overview (default) |
+| `iceberg` | Surface vs hidden aspects |
+| `bridge` | Problem-solution |
+| `funnel` | Conversion, filtering |
+| `isometric-map` | Spatial relationships |
+| `dashboard` | Metrics, KPIs |
+| `periodic-table` | Categorized collections |
+| `comic-strip` | Narratives, sequences |
+| `story-mountain` | Plot structure, tension arcs |
+| `jigsaw` | Interconnected parts |
+| `venn-diagram` | Overlapping concepts |
+| `winding-roadmap` | Journey, milestones |
+| `circular-flow` | Cycles, recurring processes |
+| `dense-modules` | High-density modules, data-rich guides |
+
+Full definitions: `references/layouts/<layout>.md`
+
+## Style Gallery
+
+| Style | Description |
+|-------|-------------|
+| `craft-handmade` | Hand-drawn, paper craft (default) |
+| `claymation` | 3D clay figures, stop-motion |
+| `kawaii` | Japanese cute, pastels |
+| `storybook-watercolor` | Soft painted, whimsical |
+| `chalkboard` | Chalk on black board |
+| `cyberpunk-neon` | Neon glow, futuristic |
+| `bold-graphic` | Comic style, halftone |
+| `aged-academia` | Vintage science, sepia |
+| `corporate-memphis` | Flat vector, vibrant |
+| `technical-schematic` | Blueprint, engineering |
+| `origami` | Folded paper, geometric |
+| `pixel-art` | Retro 8-bit |
+| `ui-wireframe` | Grayscale interface mockup |
+| `subway-map` | Transit diagram |
+| `ikea-manual` | Minimal line art |
+| `knolling` | Organized flat-lay |
+| `lego-brick` | Toy brick construction |
+| `pop-laboratory` | Blueprint grid, coordinate markers, lab precision |
+| `morandi-journal` | Hand-drawn doodle, warm Morandi tones |
+| `retro-pop-grid` | 1970s retro pop art, Swiss grid, thick outlines |
+| `hand-drawn-edu` | Macaron pastels, hand-drawn wobble, stick figures |
+
+Full definitions: `references/styles/<style>.md`
+
+## Recommended Combinations
+
+| Content Type | Layout + Style |
+|--------------|----------------|
+| Timeline/History | `linear-progression` + `craft-handmade` |
+| Step-by-step | `linear-progression` + `ikea-manual` |
+| A vs B | `binary-comparison` + `corporate-memphis` |
+| Hierarchy | `hierarchical-layers` + `craft-handmade` |
+| Overlap | `venn-diagram` + `craft-handmade` |
+| Conversion | `funnel` + `corporate-memphis` |
+| Cycles | `circular-flow` + `craft-handmade` |
+| Technical | `structural-breakdown` + `technical-schematic` |
+| Metrics | `dashboard` + `corporate-memphis` |
+| Educational | `bento-grid` + `chalkboard` |
+| Journey | `winding-roadmap` + `storybook-watercolor` |
+| Categories | `periodic-table` + `bold-graphic` |
+| Product Guide | `dense-modules` + `morandi-journal` |
+| Technical Guide | `dense-modules` + `pop-laboratory` |
+| Trendy Guide | `dense-modules` + `retro-pop-grid` |
+| Educational Diagram | `hub-spoke` + `hand-drawn-edu` |
+| Process Tutorial | `linear-progression` + `hand-drawn-edu` |
+
+Default: `bento-grid` + `craft-handmade`
+
+## Keyword Shortcuts
+
+When user input contains these keywords, **auto-select** the associated layout and offer associated styles as top recommendations in Step 3. Skip content-based layout inference for matched keywords.
+
+If a shortcut has **Prompt Notes**, append them to the generated prompt (Step 5) as additional style instructions.
+
+| User Keyword | Layout | Recommended Styles | Default Aspect | Prompt Notes |
+|--------------|--------|--------------------|----------------|--------------|
+| 高密度信息大图 / high-density-info | `dense-modules` | `morandi-journal`, `pop-laboratory`, `retro-pop-grid` | portrait | — |
+| 信息图 / infographic | `bento-grid` | `craft-handmade` | landscape | Minimalist: clean canvas, ample whitespace, no complex background textures. Simple cartoon elements and icons only. |
+
+## Output Structure
+
+```
+infographic/{topic-slug}/
+├── source-{slug}.{ext}
+├── analysis.md
+├── structured-content.md
+├── prompts/infographic.md
+└── infographic.png
+```
+
+Slug: 2-4 words kebab-case from topic. Conflict: append `-YYYYMMDD-HHMMSS`.
+
+## Core Principles
+
+- Preserve source data faithfully — no summarization or rephrasing (but **strip any credentials, API keys, tokens, or secrets** before including in outputs)
+- Define learning objectives before structuring content
+- Structure for visual communication (headlines, labels, visual elements)
+
+## Workflow
+
+### Step 1: Analyze Content
+
+**Load references**: Read `references/analysis-framework.md` from this skill.
+
+1. Save source content (file path or paste → `source.md` using `write_file`)
+   - **Backup rule**: If `source.md` exists, rename to `source-backup-YYYYMMDD-HHMMSS.md`
+2. Analyze: topic, data type, complexity, tone, audience
+3. Detect source language and user language
+4. Extract design instructions from user input
+5. Save analysis to `analysis.md`
+   - **Backup rule**: If `analysis.md` exists, rename to `analysis-backup-YYYYMMDD-HHMMSS.md`
+
+See `references/analysis-framework.md` for detailed format.
+
+### Step 2: Generate Structured Content → `structured-content.md`
+
+Transform content into infographic structure:
+1. Title and learning objectives
+2. Sections with: key concept, content (verbatim), visual element, text labels
+3. Data points (all statistics/quotes copied exactly)
+4. Design instructions from user
+
+**Rules**: Markdown only. No new information. Preserve data faithfully. Strip any credentials or secrets from output.
+
+See `references/structured-content-template.md` for detailed format.
+
+### Step 3: Recommend Combinations
+
+**3.1 Check Keyword Shortcuts first**: If user input matches a keyword from the **Keyword Shortcuts** table, auto-select the associated layout and prioritize associated styles as top recommendations. Skip content-based layout inference.
+
+**3.2 Otherwise**, recommend 3-5 layout×style combinations based on:
+- Data structure → matching layout
+- Content tone → matching style
+- Audience expectations
+- User design instructions
+
+### Step 4: Confirm Options
+
+Use the `clarify` tool to confirm options with the user. Since `clarify` handles one question at a time, ask the most important question first:
+
+**Q1 — Combination**: Present 3+ layout×style combos with rationale. Ask user to pick one.
+
+**Q2 — Aspect**: Ask for aspect ratio preference (landscape/portrait/square or custom W:H).
+
+**Q3 — Language** (only if source ≠ user language): Ask which language the text content should use.
+
+### Step 5: Generate Prompt → `prompts/infographic.md`
+
+**Backup rule**: If `prompts/infographic.md` exists, rename to `prompts/infographic-backup-YYYYMMDD-HHMMSS.md`
+
+**Load references**: Read the selected layout from `references/layouts/<layout>.md` and style from `references/styles/<style>.md`.
+
+Combine:
+1. Layout definition from `references/layouts/<layout>.md`
+2. Style definition from `references/styles/<style>.md`
+3. Base template from `references/base-prompt.md`
+4. Structured content from Step 2
+5. All text in confirmed language
+
+**Aspect ratio resolution** for `{{ASPECT_RATIO}}`:
+- Named presets → ratio string: landscape→`16:9`, portrait→`9:16`, square→`1:1`
+- Custom W:H ratios → use as-is (e.g., `3:4`, `4:3`, `2.35:1`)
+
+Save the assembled prompt to `prompts/infographic.md` using `write_file`.
+
+### Step 6: Generate Image
+
+Use the `image_generate` tool with the assembled prompt from Step 5.
+
+- Map aspect ratio to image_generate's format: `16:9` → `landscape`, `9:16` → `portrait`, `1:1` → `square`
+- For custom ratios, pick the closest named aspect
+- On failure, auto-retry once
+- Save the resulting image URL/path to the output directory
+
+### Step 7: Output Summary
+
+Report: topic, layout, style, aspect, language, output path, files created.
+
+## References
+
+- `references/analysis-framework.md` — Analysis methodology
+- `references/structured-content-template.md` — Content format
+- `references/base-prompt.md` — Prompt template
+- `references/layouts/<layout>.md` — 21 layout definitions
+- `references/styles/<style>.md` — 21 style definitions
+
+## Pitfalls
+
+1. **Data integrity is paramount** — never summarize, paraphrase, or alter source statistics. "73% increase" must stay "73% increase", not "significant increase".
+2. **Strip secrets** — always scan source content for API keys, tokens, or credentials before including in any output file.
+3. **One message per section** — each infographic section should convey one clear concept. Overloading sections reduces readability.
+4. **Style consistency** — the style definition from the references file must be applied consistently across the entire infographic. Don't mix styles.
+5. **image_generate aspect ratios** — the tool only supports `landscape`, `portrait`, and `square`. Custom ratios like `3:4` should map to the nearest option (portrait in that case).
diff --git a/skills/creative/baoyu-infographic/references/analysis-framework.md b/skills/creative/baoyu-infographic/references/analysis-framework.md
new file mode 100644
index 00000000000..702bf7becf8
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/analysis-framework.md
@@ -0,0 +1,182 @@
+# Infographic Content Analysis Framework
+
+Deep analysis framework applying instructional design principles to infographic creation.
+
+## Purpose
+
+Before creating an infographic, thoroughly analyze the source material to:
+- Understand the content at a deep level
+- Identify clear learning objectives for the viewer
+- Structure information for maximum clarity and retention
+- Match content to optimal layout×style combinations
+- Preserve all source data verbatim
+
+## Instructional Design Mindset
+
+Approach content analysis as a **world-class instructional designer**:
+
+| Principle | Application |
+|-----------|-------------|
+| **Deep Understanding** | Read the entire document before analyzing any part |
+| **Learner-Centered** | Focus on what the viewer needs to understand |
+| **Visual Storytelling** | Use visuals to communicate, not just decorate |
+| **Cognitive Load** | Simplify complex ideas without losing accuracy |
+| **Data Integrity** | Never alter, summarize, or paraphrase source facts |
+
+## Analysis Dimensions
+
+### 1. Content Type Classification
+
+| Type | Characteristics | Best Layout | Best Style |
+|------|-----------------|-------------|------------|
+| **Timeline/History** | Sequential events, dates, progression | linear-progression | craft-handmade, aged-academia |
+| **Process/Tutorial** | Step-by-step instructions, how-to | linear-progression, winding-roadmap | ikea-manual, technical-schematic |
+| **Comparison** | A vs B, pros/cons, before-after | binary-comparison, comparison-matrix | corporate-memphis, bold-graphic |
+| **Hierarchy** | Levels, priorities, pyramids | hierarchical-layers, tree-branching | craft-handmade, corporate-memphis |
+| **Relationships** | Connections, overlaps, influences | venn-diagram, hub-spoke, jigsaw | craft-handmade, subway-map |
+| **Data/Metrics** | Statistics, KPIs, measurements | dashboard, periodic-table | corporate-memphis, technical-schematic |
+| **Cycle/Loop** | Recurring processes, feedback loops | circular-flow | craft-handmade, technical-schematic |
+| **System/Structure** | Components, architecture, anatomy | structural-breakdown, bento-grid | technical-schematic, ikea-manual |
+| **Journey/Narrative** | Stories, user flows, milestones | winding-roadmap, story-mountain | storybook-watercolor, comic-strip |
+| **Overview/Summary** | Multiple topics, feature highlights | bento-grid, periodic-table, dense-modules | chalkboard, bold-graphic |
+| **Product/Buying Guide** | Multi-dimension comparisons, specs, pitfalls | dense-modules | morandi-journal, pop-laboratory, retro-pop-grid |
+
+### 2. Learning Objective Identification
+
+Every infographic should have 1-3 clear learning objectives.
+
+**Good Learning Objectives**:
+- Specific and measurable
+- Focus on what the viewer will understand, not just see
+- Written from the viewer's perspective
+
+**Format**: "After viewing this infographic, the viewer will understand..."
+
+| Content Aspect | Objective Type |
+|----------------|----------------|
+| Core concept | "...what [topic] is and why it matters" |
+| Process | "...how to [accomplish something]" |
+| Comparison | "...the key differences between [A] and [B]" |
+| Relationships | "...how [elements] connect to each other" |
+| Data | "...the significance of [key statistics]" |
+
+### 3. Audience Analysis
+
+| Factor | Questions | Impact |
+|--------|-----------|--------|
+| **Knowledge Level** | What do they already know? | Determines complexity depth |
+| **Context** | Why are they viewing this? | Determines emphasis points |
+| **Expectations** | What do they hope to learn? | Determines success criteria |
+| **Visual Preferences** | Professional, playful, technical? | Influences style choice |
+
+### 4. Complexity Assessment
+
+| Level | Indicators | Layout Recommendation |
+|-------|------------|----------------------|
+| **Simple** (3-5 points) | Few main concepts, clear relationships | sparse layouts, single focus |
+| **Moderate** (6-8 points) | Multiple concepts, some relationships | balanced layouts, clear sections |
+| **Complex** (9+ points) | Many concepts, intricate relationships | dense layouts, multiple sections |
+
+### 5. Visual Opportunity Mapping
+
+Identify what can be shown rather than told:
+
+| Content Element | Visual Treatment |
+|-----------------|------------------|
+| Numbers/Statistics | Large, highlighted numerals |
+| Comparisons | Side-by-side, split screen |
+| Processes | Arrows, numbered steps, flow |
+| Hierarchies | Pyramids, layers, size differences |
+| Relationships | Lines, connections, overlapping shapes |
+| Categories | Color coding, grouping, sections |
+| Timelines | Horizontal/vertical progression |
+| Quotes | Callout boxes, quotation marks |
+
+### 6. Data Verbatim Extraction
+
+**Critical**: All factual information must be preserved exactly as written in the source.
+
+| Data Type | Handling Rule |
+|-----------|---------------|
+| **Statistics** | Copy exactly: "73%" not "about 70%" |
+| **Quotes** | Copy word-for-word with attribution |
+| **Names** | Preserve exact spelling |
+| **Dates** | Keep original format |
+| **Technical Terms** | Do not simplify or substitute |
+| **Lists** | Preserve order and wording |
+
+**Never**:
+- Round numbers
+- Paraphrase quotes
+- Substitute simpler words
+- Add implied information
+- Remove context that affects meaning
+
+## Output Format
+
+Save analysis results to `analysis.md`:
+
+```yaml
+---
+title: "[Main topic title]"
+topic: "[educational/technical/business/creative/etc.]"
+data_type: "[timeline/hierarchy/comparison/process/etc.]"
+complexity: "[simple/moderate/complex]"
+point_count: [number of main points]
+source_language: "[detected language]"
+user_language: "[user's language]"
+---
+
+## Main Topic
+[1-2 sentence summary of what this content is about]
+
+## Learning Objectives
+After viewing this infographic, the viewer should understand:
+1. [Primary objective]
+2. [Secondary objective]
+3. [Tertiary objective if applicable]
+
+## Target Audience
+- **Knowledge Level**: [Beginner/Intermediate/Expert]
+- **Context**: [Why they're viewing this]
+- **Expectations**: [What they hope to learn]
+
+## Content Type Analysis
+- **Data Structure**: [How information relates to itself]
+- **Key Relationships**: [What connects to what]
+- **Visual Opportunities**: [What can be shown rather than told]
+
+## Key Data Points (Verbatim)
+[All statistics, quotes, and critical facts exactly as they appear in source]
+- "[Exact data point 1]"
+- "[Exact data point 2]"
+- "[Exact quote with attribution]"
+
+## Layout × Style Signals
+- Content type: [type] → suggests [layout]
+- Tone: [tone] → suggests [style]
+- Audience: [audience] → suggests [style]
+- Complexity: [level] → suggests [layout density]
+
+## Design Instructions (from user input)
+[Any style, color, layout, or visual preferences extracted from user's steering prompt]
+
+## Recommended Combinations
+1. **[Layout] + [Style]** (Recommended): [Brief rationale]
+2. **[Layout] + [Style]**: [Brief rationale]
+3. **[Layout] + [Style]**: [Brief rationale]
+```
+
+## Analysis Checklist
+
+Before proceeding to structured content generation:
+
+- [ ] Have I read the entire source document?
+- [ ] Can I summarize the main topic in 1-2 sentences?
+- [ ] Have I identified 1-3 clear learning objectives?
+- [ ] Do I understand the target audience?
+- [ ] Have I classified the content type correctly?
+- [ ] Have I extracted all data points verbatim?
+- [ ] Have I identified visual opportunities?
+- [ ] Have I extracted design instructions from user input?
+- [ ] Have I recommended 3 layout×style combinations?
diff --git a/skills/creative/baoyu-infographic/references/base-prompt.md b/skills/creative/baoyu-infographic/references/base-prompt.md
new file mode 100644
index 00000000000..b65f972289d
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/base-prompt.md
@@ -0,0 +1,43 @@
+Create a professional infographic following these specifications:
+
+## Image Specifications
+
+- **Type**: Infographic
+- **Layout**: {{LAYOUT}}
+- **Style**: {{STYLE}}
+- **Aspect Ratio**: {{ASPECT_RATIO}}
+- **Language**: {{LANGUAGE}}
+
+## Core Principles
+
+- Follow the layout structure precisely for information architecture
+- Apply style aesthetics consistently throughout
+- If content involves sensitive or copyrighted figures, create stylistically similar alternatives
+- Keep information concise, highlight keywords and core concepts
+- Use ample whitespace for visual clarity
+- Maintain clear visual hierarchy
+
+## Text Requirements
+
+- All text must match the specified style treatment
+- Main titles should be prominent and readable
+- Key concepts should be visually emphasized
+- Labels should be clear and appropriately sized
+- Use the specified language for all text content
+
+## Layout Guidelines
+
+{{LAYOUT_GUIDELINES}}
+
+## Style Guidelines
+
+{{STYLE_GUIDELINES}}
+
+---
+
+Generate the infographic based on the content below:
+
+{{CONTENT}}
+
+Text labels (in {{LANGUAGE}}):
+{{TEXT_LABELS}}
diff --git a/skills/creative/baoyu-infographic/references/layouts/bento-grid.md b/skills/creative/baoyu-infographic/references/layouts/bento-grid.md
new file mode 100644
index 00000000000..8ce8bb890a4
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/bento-grid.md
@@ -0,0 +1,41 @@
+# bento-grid
+
+Modular grid layout with varied cell sizes, like a bento box.
+
+## Structure
+
+- Grid of rectangular cells
+- Mixed cell sizes (1x1, 2x1, 1x2, 2x2)
+- No strict symmetry required
+- Hero cell for main point
+- Supporting cells around it
+
+## Best For
+
+- Multiple topic overview
+- Feature highlights
+- Dashboard summaries
+- Portfolio displays
+- Mixed content types
+
+## Visual Elements
+
+- Clear cell boundaries
+- Varied cell backgrounds
+- Icons or illustrations per cell
+- Consistent padding/margins
+- Visual hierarchy through size
+
+## Text Placement
+
+- Main title at top
+- Cell titles within each cell
+- Brief content per cell
+- Minimal text, maximum visual
+- CTA or summary in prominent cell
+
+## Recommended Pairings
+
+- `craft-handmade`: Friendly overviews (default)
+- `corporate-memphis`: Business summaries
+- `pixel-art`: Retro feature grids
diff --git a/skills/creative/baoyu-infographic/references/layouts/binary-comparison.md b/skills/creative/baoyu-infographic/references/layouts/binary-comparison.md
new file mode 100644
index 00000000000..cf06980b065
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/binary-comparison.md
@@ -0,0 +1,48 @@
+# binary-comparison
+
+Side-by-side comparison of two items, states, or concepts.
+
+## Structure
+
+- Vertical divider splitting image in half
+- Left side: Item A / Before / Pro
+- Right side: Item B / After / Con
+- Mirrored layout for easy comparison
+- Clear visual distinction between sides
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Before-After** | Transformation over time | Temporal change, improvement |
+| **A vs B** | Feature comparison | Direct contrast, differences |
+| **Pro-Con** | Advantages/disadvantages | Balanced evaluation |
+
+## Best For
+
+- Before/after transformations
+- Product or option comparisons
+- Pros and cons analysis
+- Old vs new comparisons
+- Two perspectives on a topic
+
+## Visual Elements
+
+- Strong vertical dividing line or gradient
+- Contrasting colors per side
+- Matching element positions for comparison
+- VS symbol or divider decoration
+- Transformation arrow for before-after
+
+## Text Placement
+
+- Main title centered at top
+- Side labels (A/B, Before/After)
+- Corresponding points aligned horizontally
+- Summary at bottom if needed
+
+## Recommended Pairings
+
+- `corporate-memphis`: Business comparisons
+- `bold-graphic`: High-contrast dramatic comparisons
+- `craft-handmade`: Friendly explainers
diff --git a/skills/creative/baoyu-infographic/references/layouts/bridge.md b/skills/creative/baoyu-infographic/references/layouts/bridge.md
new file mode 100644
index 00000000000..116bf97f213
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/bridge.md
@@ -0,0 +1,41 @@
+# bridge
+
+Gap-crossing structure connecting problem to solution or current to future state.
+
+## Structure
+
+- Left side: current state/problem
+- Right side: desired state/solution
+- Bridge element spanning the gap
+- Gap representing challenge/obstacle
+- Bridge elements as steps/methods
+
+## Best For
+
+- Problem to solution journeys
+- Current vs future state
+- Gap analysis
+- Transformation bridges
+- Strategic initiatives
+
+## Visual Elements
+
+- Two distinct platforms/sides
+- Visible gap or chasm
+- Bridge structure with supports
+- Icons representing each side
+- Stepping stones or bridge planks
+
+## Text Placement
+
+- Title at top
+- Left label (From/Problem/Current)
+- Right label (To/Solution/Future)
+- Bridge elements labeled
+- Gap description below
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly journeys
+- `corporate-memphis`: Business transformations
+- `isometric-3d`: Technical transitions
diff --git a/skills/creative/baoyu-infographic/references/layouts/circular-flow.md b/skills/creative/baoyu-infographic/references/layouts/circular-flow.md
new file mode 100644
index 00000000000..068c5fd79d5
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/circular-flow.md
@@ -0,0 +1,41 @@
+# circular-flow
+
+Cyclic process showing continuous or recurring steps.
+
+## Structure
+
+- Circular arrangement
+- Steps around the circle
+- Arrows showing direction
+- No clear start/end (continuous)
+- Center can hold main concept
+
+## Best For
+
+- Recurring processes
+- Feedback loops
+- Lifecycle stages
+- Continuous improvement
+- Natural cycles
+
+## Visual Elements
+
+- Circle or ring shape
+- Directional arrows
+- Step nodes evenly spaced
+- Icons per step
+- Optional center element
+
+## Text Placement
+
+- Title at top
+- Step labels at each node
+- Brief descriptions near nodes
+- Center concept if applicable
+- Cycle name
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly cycles
+- `corporate-memphis`: Business processes
+- `subway-map`: Transit-style cycles
diff --git a/skills/creative/baoyu-infographic/references/layouts/comic-strip.md b/skills/creative/baoyu-infographic/references/layouts/comic-strip.md
new file mode 100644
index 00000000000..b760f57a4fa
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/comic-strip.md
@@ -0,0 +1,41 @@
+# comic-strip
+
+Sequential narrative panels telling a story or explaining a concept.
+
+## Structure
+
+- Multiple panels in sequence
+- Left-to-right, top-to-bottom reading
+- Characters or subjects in scenes
+- Speech/thought bubbles
+- Panel borders clearly defined
+
+## Best For
+
+- Storytelling explanations
+- User journey narratives
+- Scenario illustrations
+- Step sequences with context
+- Before/during/after stories
+
+## Visual Elements
+
+- Panel frames
+- Speech and thought bubbles
+- Sound effects (optional)
+- Characters with expressions
+- Scene backgrounds
+
+## Text Placement
+
+- Title at top
+- Dialogue in speech bubbles
+- Narration in caption boxes
+- Sound effects integrated
+- Panel numbers if needed
+
+## Recommended Pairings
+
+- `graphic-novel`: Dramatic narratives
+- `kawaii`: Cute character stories
+- `cartoon-hand-drawn`: Friendly explanations
diff --git a/skills/creative/baoyu-infographic/references/layouts/comparison-matrix.md b/skills/creative/baoyu-infographic/references/layouts/comparison-matrix.md
new file mode 100644
index 00000000000..16606281f76
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/comparison-matrix.md
@@ -0,0 +1,41 @@
+# comparison-matrix
+
+Grid-based multi-factor comparison across multiple items.
+
+## Structure
+
+- Table/grid layout
+- Rows: items being compared
+- Columns: comparison criteria
+- Cells: scores, checks, or values
+- Header row and column clearly marked
+
+## Best For
+
+- Product feature comparisons
+- Tool/software evaluations
+- Multi-criteria decisions
+- Specification sheets
+- Rating comparisons
+
+## Visual Elements
+
+- Clear grid lines or cell boundaries
+- Checkmarks, X marks, or scores in cells
+- Color coding for quick scanning
+- Icons for criteria categories
+- Highlight for recommended option
+
+## Text Placement
+
+- Title at top
+- Item names in first column
+- Criteria in header row
+- Brief values in cells
+- Legend if using symbols
+
+## Recommended Pairings
+
+- `corporate-memphis`: Business tool comparisons
+- `ui-wireframe`: Technical feature matrices
+- `blueprint`: Specification comparisons
diff --git a/skills/creative/baoyu-infographic/references/layouts/dashboard.md b/skills/creative/baoyu-infographic/references/layouts/dashboard.md
new file mode 100644
index 00000000000..70635ae1c97
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/dashboard.md
@@ -0,0 +1,41 @@
+# dashboard
+
+Multi-metric display with charts, numbers, and KPI indicators.
+
+## Structure
+
+- Multiple data widgets
+- Charts, graphs, numbers
+- Grid or modular layout
+- Key metrics prominent
+- Status indicators
+
+## Best For
+
+- KPI summaries
+- Performance metrics
+- Analytics overviews
+- Status reports
+- Data snapshots
+
+## Visual Elements
+
+- Chart types (bar, line, pie, gauge)
+- Big numbers for KPIs
+- Trend arrows (up/down)
+- Color-coded status (green/red)
+- Clean data visualization
+
+## Text Placement
+
+- Title at top
+- Widget titles above each section
+- Metric labels and values
+- Units clearly shown
+- Time period indicated
+
+## Recommended Pairings
+
+- `corporate-memphis`: Business dashboards
+- `ui-wireframe`: Technical dashboards
+- `cyberpunk-neon`: Futuristic displays
diff --git a/skills/creative/baoyu-infographic/references/layouts/dense-modules.md b/skills/creative/baoyu-infographic/references/layouts/dense-modules.md
new file mode 100644
index 00000000000..5e294e72355
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/dense-modules.md
@@ -0,0 +1,72 @@
+# dense-modules
+
+High-density modular layout with 6-7 typed information modules packed with concrete data.
+
+## Structure
+
+- 6-7 distinct modules per image, each serving a specific information function
+- Every module contains concrete data: brand names, numbers, percentages, parameters
+- Minimal whitespace—compact spacing prioritized over breathing room
+- Smaller text acceptable to maximize information density
+- Each module identified by coordinate label or section marker (e.g., MOD-1, SEC-A)
+
+## Module Archetypes
+
+| Module | Purpose | Content Requirements |
+|--------|---------|---------------------|
+| **Brand/Selection Array** | Grid of options with recommendations | 4-8 items with icons, names, brief descriptions; highlight "best choice" |
+| **Specification Scale** | Quality/measurement gauge | 3-5 levels with precise numerical increments, quality indicators (emoji faces, checkmarks) |
+| **Deep Dive/Detail** | Technical breakdown of key item | Zoom-in callouts, internal components, cross-section or exploded view |
+| **Scenario Comparison** | Side-by-side use cases | 3-6 scenarios with specific recommendations and data per scenario |
+| **Identification Tips** | How-to checklist | 3-5 inspection methods: look/test/check/ask format |
+| **Warning/Pitfall Zone** | Critical mistakes to avoid | 3-5 pitfalls with consequences, 1-2 correct approaches; high visual contrast |
+| **Quick Reference** | Compact summary | Dense table, one-line summaries, decision flowchart, or key takeaways |
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Coordinate-labeled** | Precision and systematicity | Each module has alphanumeric coordinate (A-01, B-05, C-12), ruler/axis markers |
+| **Grid-cell** | Order and structure | Modules in strict rectangular cells divided by thick lines, Swiss grid feel |
+| **Free-flowing** | Organic density | Magazine-style layout with dotted frames, varying module sizes, connected by arrows |
+
+## Best For
+
+- Product selection guides and buying guides
+- Multi-dimensional comparison content
+- Data-rich educational materials
+- "Avoid pitfalls" / "complete guide" formats
+- Content targeting platforms like Xiaohongshu with high-density visual requirements
+
+## Visual Elements
+
+- Module boundary markers (thick lines, dotted frames, or coordinate grids)
+- Quality indicators per module (emoji faces, checkmarks, crosses, crowns)
+- Data callout boxes with highlighted numbers
+- Comparison arrows and progression indicators
+- Warning/alert visual markers for pitfall modules
+- Metadata in corners (page numbers, timestamps, small barcodes)
+
+## Text Placement
+
+- Main title at top, prominent and impactful
+- Subtitle with module count ("X大维度全面解析...")
+- Module headers inside colored badges or labeled frames
+- Body text compact, multiple columns within modules
+- Numbers highlighted with accent colors, slightly larger than body text
+
+## Information Density Rules
+
+- Every corner should contain useful information or metadata
+- No decorative-only empty space
+- Text size may be reduced to fit more content—information over font size
+- Each module must have specific data points, not generic descriptions
+- Balance between density and readability: dense but organized
+
+## Recommended Pairings
+
+- `pop-laboratory`: Technical precision with coordinate markers and blueprint grid
+- `morandi-journal`: Hand-drawn warmth with doodle illustrations and organic frames
+- `retro-pop-grid`: 1970s pop art with strict grid cells and bold contrast
+- `corporate-memphis`: Clean business feel for product comparisons
+- `technical-schematic`: Engineering precision for technical product guides
diff --git a/skills/creative/baoyu-infographic/references/layouts/funnel.md b/skills/creative/baoyu-infographic/references/layouts/funnel.md
new file mode 100644
index 00000000000..9c5eb6dff3f
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/funnel.md
@@ -0,0 +1,41 @@
+# funnel
+
+Narrowing stages showing conversion, filtering, or refinement process.
+
+## Structure
+
+- Wide top (input/start)
+- Narrow bottom (output/result)
+- Horizontal layers for stages
+- Progressive narrowing
+- 3-6 stages typically
+
+## Best For
+
+- Sales/marketing funnels
+- Conversion processes
+- Filtering/selection
+- Recruitment pipelines
+- Decision processes
+
+## Visual Elements
+
+- Funnel shape clearly defined
+- Distinct colors per stage
+- Width indicates volume/quantity
+- Stage icons or symbols
+- Numbers/percentages per stage
+
+## Text Placement
+
+- Title at top
+- Stage names inside or beside
+- Metrics/numbers per stage
+- Input label at top
+- Output label at bottom
+
+## Recommended Pairings
+
+- `corporate-memphis`: Marketing funnels
+- `isometric-3d`: Technical pipelines
+- `cartoon-hand-drawn`: Educational funnels
diff --git a/skills/creative/baoyu-infographic/references/layouts/hierarchical-layers.md b/skills/creative/baoyu-infographic/references/layouts/hierarchical-layers.md
new file mode 100644
index 00000000000..dfa5da83577
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/hierarchical-layers.md
@@ -0,0 +1,48 @@
+# hierarchical-layers
+
+Nested layers showing levels of importance, influence, or proximity.
+
+## Structure
+
+- Multiple layers from core to periphery
+- Core/top: most important/central
+- Outer/bottom: decreasing importance
+- 3-7 levels typically
+- Clear boundaries between levels
+
+## Variants
+
+| Variant | Shape | Visual Emphasis |
+|---------|-------|-----------------|
+| **Pyramid** | Triangle, vertical | Top-down hierarchy, quantity |
+| **Concentric** | Rings, radial | Center-out influence, proximity |
+
+## Best For
+
+- Maslow's hierarchy style concepts
+- Priority and importance levels
+- Spheres of influence
+- Organizational structures
+- Stakeholder analysis
+
+## Visual Elements
+
+- Distinct color per level
+- Icons or illustrations per tier
+- Size indicates importance/quantity
+- Labels inside or beside layers
+- Decorative apex/center element
+
+## Text Placement
+
+- Title at top or side
+- Level names inside each tier
+- Brief descriptions outside
+- Quantities or percentages if relevant
+- Legend for color meanings
+
+## Recommended Pairings
+
+- `craft-handmade`: Playful layered concepts
+- `corporate-memphis`: Business hierarchies
+- `technical-schematic`: Technical 3D pyramids
diff --git a/skills/creative/baoyu-infographic/references/layouts/hub-spoke.md b/skills/creative/baoyu-infographic/references/layouts/hub-spoke.md
new file mode 100644
index 00000000000..372e53e0781
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/hub-spoke.md
@@ -0,0 +1,41 @@
+# hub-spoke
+
+Central concept with radiating connections to related items.
+
+## Structure
+
+- Central hub (main concept)
+- Spokes radiating outward
+- Nodes at spoke ends (related concepts)
+- Even or weighted distribution
+- Optional secondary connections
+
+## Best For
+
+- Central theme with components
+- Product features around core
+- Team roles around project
+- Ecosystem mapping
+- Mind maps
+
+## Visual Elements
+
+- Prominent central hub
+- Clear spoke lines
+- Consistent node styling
+- Icons representing each spoke item
+- Optional grouping colors
+
+## Text Placement
+
+- Title at top
+- Core concept in center hub
+- Spoke item labels at nodes
+- Brief descriptions near nodes
+- Connection labels on spokes if needed
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly concept maps
+- `corporate-memphis`: Business ecosystems
+- `subway-map`: Network-style connections
diff --git a/skills/creative/baoyu-infographic/references/layouts/iceberg.md b/skills/creative/baoyu-infographic/references/layouts/iceberg.md
new file mode 100644
index 00000000000..237c53ff8bf
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/iceberg.md
@@ -0,0 +1,41 @@
+# iceberg
+
+Surface vs hidden depths, visible vs underlying factors.
+
+## Structure
+
+- Waterline dividing visible/hidden
+- Tip above water (obvious/surface)
+- Larger mass below (hidden/deep)
+- Proportional to emphasize hidden depth
+- Optional layers within underwater section
+
+## Best For
+
+- Surface vs root causes
+- Visible vs invisible work
+- Symptoms vs underlying issues
+- Public vs private aspects
+- Known vs unknown factors
+
+## Visual Elements
+
+- Clear water/surface line
+- Above: smaller, brighter
+- Below: larger, darker/deeper
+- Wave or water texture
+- Gradient showing depth
+
+## Text Placement
+
+- Title at top
+- Surface items above waterline
+- Hidden items below, larger
+- Waterline label optional
+- Depth indicators for layers
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly metaphor
+- `storybook-watercolor`: Artistic depth
+- `graphic-novel`: Dramatic revelation
diff --git a/skills/creative/baoyu-infographic/references/layouts/isometric-map.md b/skills/creative/baoyu-infographic/references/layouts/isometric-map.md
new file mode 100644
index 00000000000..f8c3a8e5174
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/isometric-map.md
@@ -0,0 +1,41 @@
+# isometric-map
+
+3D-style spatial layout showing locations, relationships, or journey through space.
+
+## Structure
+
+- Isometric 3D perspective
+- Locations as buildings/landmarks
+- Paths connecting locations
+- Spatial relationships visible
+- Bird's eye view angle
+
+## Best For
+
+- Office/campus layouts
+- City/ecosystem maps
+- User journey maps
+- System architecture
+- Process landscapes
+
+## Visual Elements
+
+- Consistent isometric angle (30°)
+- 3D buildings or objects
+- Pathways and roads
+- Labels floating above
+- Mini scenes at locations
+
+## Text Placement
+
+- Title at top corner
+- Location labels above objects
+- Path labels along routes
+- Legend for symbols
+- Scale indicator if relevant
+
+## Recommended Pairings
+
+- `isometric-3d`: Clean technical maps
+- `pixel-art`: Retro game-style maps
+- `lego-brick`: Playful location maps
diff --git a/skills/creative/baoyu-infographic/references/layouts/jigsaw.md b/skills/creative/baoyu-infographic/references/layouts/jigsaw.md
new file mode 100644
index 00000000000..7ea6e35413b
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/jigsaw.md
@@ -0,0 +1,41 @@
+# jigsaw
+
+Interlocking puzzle pieces showing how parts fit together.
+
+## Structure
+
+- Puzzle pieces that interlock
+- Each piece represents a component
+- Connections show relationships
+- Can be assembled or exploded view
+- Missing piece highlights gaps
+
+## Best For
+
+- Component relationships
+- Team/skill fit
+- Strategy pieces
+- Integration concepts
+- Completeness assessments
+
+## Visual Elements
+
+- Classic puzzle piece shapes
+- Distinct colors per piece
+- Interlocking edges visible
+- Icons or labels per piece
+- Optional missing piece
+
+## Text Placement
+
+- Title at top
+- Piece labels inside or beside
+- Connection descriptions
+- Missing piece explanation
+- Assembly context
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly integration concepts
+- `paper-cutout`: Tactile puzzle feel
+- `corporate-memphis`: Business strategy pieces
diff --git a/skills/creative/baoyu-infographic/references/layouts/linear-progression.md b/skills/creative/baoyu-infographic/references/layouts/linear-progression.md
new file mode 100644
index 00000000000..427cb52a83e
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/linear-progression.md
@@ -0,0 +1,48 @@
+# linear-progression
+
+Sequential progression showing steps, timeline, or chronological events.
+
+## Structure
+
+- Linear arrangement (horizontal or vertical)
+- Nodes/markers at key points
+- Connecting line or path between nodes
+- Clear start and end points
+- Directional flow indicators
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Timeline** | Chronological events, dates | Time markers, period labels |
+| **Process** | Action steps, numbered sequence | Step numbers, action icons |
+
+## Best For
+
+- Step-by-step tutorials and how-tos
+- Historical timelines and evolution
+- Project milestones and roadmaps
+- Workflow documentation
+- Onboarding processes
+
+## Visual Elements
+
+- Numbered steps or date markers
+- Arrows or connectors showing direction
+- Icons representing each step/event
+- Consistent node spacing
+- Progress indicators optional
+
+## Text Placement
+
+- Title at top
+- Step/event titles at each node
+- Brief descriptions below nodes
+- Dates or numbers clearly visible
+
+## Recommended Pairings
+
+- `craft-handmade`: Friendly tutorials and timelines
+- `ikea-manual`: Clean assembly instructions
+- `corporate-memphis`: Business process flows
+- `aged-academia`: Historical discoveries
diff --git a/skills/creative/baoyu-infographic/references/layouts/periodic-table.md b/skills/creative/baoyu-infographic/references/layouts/periodic-table.md
new file mode 100644
index 00000000000..7287d111df0
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/periodic-table.md
@@ -0,0 +1,41 @@
+# periodic-table
+
+Grid of categorized elements with consistent cell formatting.
+
+## Structure
+
+- Rectangular grid
+- Each cell is one element
+- Color-coded categories
+- Consistent cell format
+- Optional grouping gaps
+
+## Best For
+
+- Categorized collections
+- Tool/resource catalogs
+- Skill matrices
+- Element collections
+- Reference guides
+
+## Visual Elements
+
+- Uniform cell sizes
+- Category colors
+- Symbol/abbreviation prominent
+- Small icon per cell
+- Category legend
+
+## Text Placement
+
+- Title at top
+- Cell: symbol, name, brief info
+- Category names in legend
+- Optional row/column headers
+- Footnotes for special cases
+
+## Recommended Pairings
+
+- `pop-art`: Vibrant element grids
+- `pixel-art`: Retro collection displays
+- `corporate-memphis`: Business tool catalogs
diff --git a/skills/creative/baoyu-infographic/references/layouts/story-mountain.md b/skills/creative/baoyu-infographic/references/layouts/story-mountain.md
new file mode 100644
index 00000000000..ac28521970f
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/story-mountain.md
@@ -0,0 +1,41 @@
+# story-mountain
+
+Plot structure visualization showing rising action, climax, and resolution.
+
+## Structure
+
+- Mountain/arc shape
+- Rising slope (build-up)
+- Peak (climax)
+- Falling slope (resolution)
+- Start and end at base level
+
+## Best For
+
+- Narrative structures
+- Project lifecycles
+- Tension/release patterns
+- Emotional journeys
+- Campaign arcs
+
+## Visual Elements
+
+- Mountain or arc curve
+- Points along the path
+- Climax visually emphasized
+- Slope steepness meaningful
+- Base camps or milestones
+
+## Text Placement
+
+- Title at top
+- Stage labels along path
+- Climax prominently labeled
+- Brief descriptions at points
+- Start/end clearly marked
+
+## Recommended Pairings
+
+- `storybook-watercolor`: Narrative journeys
+- `cartoon-hand-drawn`: Educational plot diagrams
+- `graphic-novel`: Dramatic story arcs
diff --git a/skills/creative/baoyu-infographic/references/layouts/structural-breakdown.md b/skills/creative/baoyu-infographic/references/layouts/structural-breakdown.md
new file mode 100644
index 00000000000..86f01a69024
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/structural-breakdown.md
@@ -0,0 +1,48 @@
+# structural-breakdown
+
+Internal structure visualization with labeled parts or layers.
+
+## Structure
+
+- Central subject (object, system, body)
+- Parts or layers clearly shown
+- Labels with callout lines
+- Exploded or cutaway view
+- Optional zoomed detail sections
+
+## Variants
+
+| Variant | View Type | Visual Emphasis |
+|---------|-----------|-----------------|
+| **Exploded** | Parts separated outward | Component relationships |
+| **Cross-section** | Sliced/cutaway view | Internal layers, composition |
+
+## Best For
+
+- Product part breakdowns
+- Anatomy explanations
+- System components
+- Device teardowns
+- Material composition
+
+## Visual Elements
+
+- Main subject clearly rendered
+- Callout lines with dots/arrows
+- Label boxes at endpoints
+- Numbered parts optionally
+- Layer boundaries or separation
+
+## Text Placement
+
+- Title at top
+- Part/layer labels at callouts
+- Brief descriptions in boxes
+- Legend for numbered systems
+- Depth/thickness if relevant
+
+## Recommended Pairings
+
+- `technical-schematic`: Technical schematics
+- `aged-academia`: Classic anatomical style
+- `craft-handmade`: Friendly breakdowns
diff --git a/skills/creative/baoyu-infographic/references/layouts/tree-branching.md b/skills/creative/baoyu-infographic/references/layouts/tree-branching.md
new file mode 100644
index 00000000000..520ef3bef78
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/tree-branching.md
@@ -0,0 +1,41 @@
+# tree-branching
+
+Hierarchical structure branching from root to leaves, showing categories and subcategories.
+
+## Structure
+
+- Root/trunk at top or left
+- Branches splitting into sub-branches
+- Leaves as terminal nodes
+- Clear parent-child relationships
+- Balanced or organic branching
+
+## Best For
+
+- Taxonomies and classifications
+- Decision trees
+- Organizational charts
+- File/folder structures
+- Family trees
+
+## Visual Elements
+
+- Connecting lines showing relationships
+- Nodes at branch points
+- Icons or labels at each node
+- Color coding by branch
+- Visual weight decreasing toward leaves
+
+## Text Placement
+
+- Title at top
+- Root concept prominently labeled
+- Branch and leaf labels
+- Optional descriptions at key nodes
+- Legend for categories
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly taxonomies
+- `da-vinci-notebook`: Scientific classifications
+- `origami`: Geometric tree structures
diff --git a/skills/creative/baoyu-infographic/references/layouts/venn-diagram.md b/skills/creative/baoyu-infographic/references/layouts/venn-diagram.md
new file mode 100644
index 00000000000..9b3e0a8a7c4
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/venn-diagram.md
@@ -0,0 +1,41 @@
+# venn-diagram
+
+Overlapping circles showing relationships, commonalities, and differences.
+
+## Structure
+
+- 2-3 overlapping circles
+- Each circle is a category/concept
+- Overlaps show shared elements
+- Center shows common to all
+- Unique areas for exclusives
+
+## Best For
+
+- Concept relationships
+- Skill overlaps
+- Market segments
+- Comparative analysis
+- Finding common ground
+
+## Visual Elements
+
+- Translucent circle fills
+- Clear overlap regions
+- Distinct colors per circle
+- Icons in regions
+- Boundary labels
+
+## Text Placement
+
+- Title at top
+- Circle labels outside or on edge
+- Items in appropriate regions
+- Overlap region labels
+- Legend if needed
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly concept overlaps
+- `corporate-memphis`: Business segment analysis
+- `pop-art`: High-contrast comparisons
diff --git a/skills/creative/baoyu-infographic/references/layouts/winding-roadmap.md b/skills/creative/baoyu-infographic/references/layouts/winding-roadmap.md
new file mode 100644
index 00000000000..aad5543ad97
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/winding-roadmap.md
@@ -0,0 +1,41 @@
+# winding-roadmap
+
+Curved path showing journey with milestones and checkpoints.
+
+## Structure
+
+- S-curve or winding path
+- Milestones along the path
+- Start and destination points
+- Side elements (obstacles, helpers)
+- Progress indicators
+
+## Best For
+
+- Project roadmaps
+- Career paths
+- Customer journeys
+- Learning paths
+- Strategy timelines
+
+## Visual Elements
+
+- Curving road or river
+- Milestone markers/flags
+- Scene elements along path
+- Vehicle/character on journey
+- Destination landmark
+
+## Text Placement
+
+- Title at top
+- Milestone labels at each point
+- Path section names
+- Destination description
+- Optional timeline indicators
+
+## Recommended Pairings
+
+- `storybook-watercolor`: Whimsical journeys
+- `cartoon-hand-drawn`: Friendly roadmaps
+- `isometric-3d`: Technical project paths
diff --git a/skills/creative/baoyu-infographic/references/structured-content-template.md b/skills/creative/baoyu-infographic/references/structured-content-template.md
new file mode 100644
index 00000000000..32d527ff99e
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/structured-content-template.md
@@ -0,0 +1,244 @@
+# Structured Content Template
+
+Template for generating structured infographic content that informs the visual designer.
+
+## Purpose
+
+This document bridges content analysis and visual design:
+- Transforms source material into designer-ready format
+- Organizes learning objectives into visual sections
+- Preserves all source data verbatim
+- Separates content from design instructions
+
+## Instructional Design Process
+
+### Phase 1: High-Level Outline
+
+1. **Title**: Capture the essence in a compelling headline
+2. **Overview**: Brief description (1-2 sentences)
+3. **Learning Objectives**: List what the viewer will understand
+
+### Phase 2: Section Development
+
+For each learning objective:
+
+1. **Key Concept**: One-sentence summary of the section
+2. **Content**: Points extracted verbatim from source
+3. **Visual Element**: What should be shown visually
+4. **Text Labels**: Exact text for headlines, subheads, labels
+
+### Phase 3: Data Integrity Check
+
+Verify all source data is:
+- Copied exactly (no paraphrasing)
+- Attributed correctly (for quotes)
+- Formatted consistently
+
+## Critical Rules
+
+| Rule | Requirement | Example |
+|------|-------------|---------|
+| **Output format** | Markdown only | Use proper headers, lists, code blocks |
+| **Tone** | Expert trainer | Knowledgeable, clear, encouraging |
+| **No new information** | Only source content | Don't add examples not in source |
+| **Verbatim data** | Exact copies | "73% increase" not "significant increase" |
+
+## Structured Content Format
+
+```markdown
+# [Infographic Title]
+
+## Overview
+[Brief description of what this infographic conveys - 1-2 sentences]
+
+## Learning Objectives
+The viewer will understand:
+1. [Primary objective]
+2. [Secondary objective]
+3. [Tertiary objective if applicable]
+
+---
+
+## Section 1: [Section Title]
+
+**Key Concept**: [One-sentence summary of this section]
+
+**Content**:
+- [Point 1 - verbatim from source]
+- [Point 2 - verbatim from source]
+- [Point 3 - verbatim from source]
+
+**Visual Element**: [Description of what to show visually]
+- Type: [icon/chart/illustration/diagram/photo]
+- Subject: [what it depicts]
+- Treatment: [how it should be presented]
+
+**Text Labels**:
+- Headline: "[Exact text for headline]"
+- Subhead: "[Exact text for subhead]"
+- Labels: "[Label 1]", "[Label 2]", "[Label 3]"
+
+---
+
+## Section 2: [Section Title]
+
+**Key Concept**: [One-sentence summary]
+
+**Content**:
+- [Point 1]
+- [Point 2]
+
+**Visual Element**: [Description]
+
+**Text Labels**:
+- Headline: "[text]"
+- Labels: "[Label 1]", "[Label 2]"
+
+---
+
+[Continue for each section...]
+
+---
+
+## Data Points (Verbatim)
+
+All statistics, numbers, and quotes exactly as they appear in source:
+
+### Statistics
+- "[Exact statistic 1]"
+- "[Exact statistic 2]"
+- "[Exact statistic 3]"
+
+### Quotes
+- "[Exact quote]" — [Attribution]
+
+### Key Terms
+- **[Term 1]**: [Definition from source]
+- **[Term 2]**: [Definition from source]
+
+---
+
+## Design Instructions
+
+Extracted from user's steering prompt:
+
+### Style Preferences
+- [Any color preferences]
+- [Any mood/aesthetic preferences]
+- [Any artistic style preferences]
+
+### Layout Preferences
+- [Any structure preferences]
+- [Any organization preferences]
+
+### Other Requirements
+- [Any other visual requirements from user]
+- [Target platform if specified]
+- [Brand guidelines if any]
+```
+
+## Section Types by Content
+
+### For Process/Steps
+
+```markdown
+## Section N: Step N - [Step Title]
+
+**Key Concept**: [What this step accomplishes]
+
+**Content**:
+- Action: [What to do]
+- Details: [How to do it]
+- Note: [Important consideration]
+
+**Visual Element**:
+- Type: numbered step icon
+- Subject: [visual representing the action]
+- Arrow: leads to next step
+
+**Text Labels**:
+- Headline: "Step N: [Title]"
+- Action: "[Imperative verb + object]"
+```
+
+### For Comparison
+
+```markdown
+## Section N: [Item A] vs [Item B]
+
+**Key Concept**: [What distinguishes them]
+
+**Content**:
+| Aspect | [Item A] | [Item B] |
+|--------|----------|----------|
+| [Factor 1] | [Value] | [Value] |
+| [Factor 2] | [Value] | [Value] |
+
+**Visual Element**:
+- Type: split comparison
+- Left: [Item A representation]
+- Right: [Item B representation]
+
+**Text Labels**:
+- Headline: "[Item A] vs [Item B]"
+- Left label: "[Item A name]"
+- Right label: "[Item B name]"
+```
+
+### For Hierarchy
+
+```markdown
+## Section N: [Level Name]
+
+**Key Concept**: [What this level represents]
+
+**Content**:
+- Position: [Top/Middle/Bottom]
+- Priority: [Importance level]
+- Contains: [Elements at this level]
+
+**Visual Element**:
+- Type: layer/tier
+- Size: [relative to other levels]
+- Position: [where in hierarchy]
+
+**Text Labels**:
+- Level title: "[Name]"
+- Description: "[Brief description]"
+```
+
+### For Data/Statistics
+
+```markdown
+## Section N: [Metric Name]
+
+**Key Concept**: [What this data shows]
+
+**Content**:
+- Value: [Exact number/percentage]
+- Context: [What it means]
+- Comparison: [Benchmark if any]
+
+**Visual Element**:
+- Type: [chart/number highlight/gauge]
+- Emphasis: [how to draw attention]
+
+**Text Labels**:
+- Main number: "[Exact value]"
+- Label: "[Metric name]"
+- Context: "[Brief context]"
+```
+
+## Quality Checklist
+
+Before finalizing structured content:
+
+- [ ] Title captures the main message
+- [ ] Learning objectives are clear and measurable
+- [ ] Each section maps to an objective
+- [ ] All content is verbatim from source
+- [ ] Visual elements are clearly described
+- [ ] Text labels are specified exactly
+- [ ] Data points are collected and verified
+- [ ] Design instructions are separated
+- [ ] No new information has been added
diff --git a/skills/creative/baoyu-infographic/references/styles/aged-academia.md b/skills/creative/baoyu-infographic/references/styles/aged-academia.md
new file mode 100644
index 00000000000..d2094ccd45f
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/aged-academia.md
@@ -0,0 +1,36 @@
+# aged-academia
+
+Historical scientific illustration with aged paper aesthetic.
+
+## Color Palette
+
+- Primary: Sepia brown (#704214), aged ink, muted earth tones
+- Background: Parchment (#F4E4BC), yellowed paper texture
+- Accents: Faded red annotations, iron gall ink spots
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Notebook** | Personal sketches, inventions | Cursive notes, margin annotations |
+| **Specimen** | Scientific classification | Numbered diagrams, Latin labels |
+
+## Visual Elements
+
+- Aged paper texture overlay
+- Detailed cross-hatching and line work
+- Scientific illustration precision
+- Study notes and annotations
+- Specimen plate or sketch aesthetic
+- Numbered diagram elements
+
+## Typography
+
+- Handwritten cursive or serif fonts
+- Scientific annotations
+- Small caps for labels
+- Italics for scientific names
+
+## Best For
+
+Scientific education, biology topics, historical explanations, inventions, nature documentation
diff --git a/skills/creative/baoyu-infographic/references/styles/bold-graphic.md b/skills/creative/baoyu-infographic/references/styles/bold-graphic.md
new file mode 100644
index 00000000000..3f94bf600b6
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/bold-graphic.md
@@ -0,0 +1,36 @@
+# bold-graphic
+
+High-contrast comic style with bold outlines and dramatic visuals.
+
+## Color Palette
+
+- Primary: Bold primaries - red, yellow, blue, black
+- Background: White, halftone patterns, dramatic shadows
+- Accents: Spot colors, neon highlights
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Graphic-novel** | Dramatic narratives | Action lines, hatching, panels |
+| **Pop-art** | High-energy impact | Halftone dots, Warhol repetition |
+
+## Visual Elements
+
+- Bold black outlines
+- High contrast compositions
+- Halftone dot patterns
+- Comic panel borders optional
+- Action lines and motion
+- Speech bubbles and sound effects
+
+## Typography
+
+- Comic book lettering
+- Impact fonts for emphasis
+- POW/BANG effects for pop-art
+- Caption boxes for narrative
+
+## Best For
+
+Attention-grabbing content, dramatic narratives, pop culture, marketing, high-energy presentations
diff --git a/skills/creative/baoyu-infographic/references/styles/chalkboard.md b/skills/creative/baoyu-infographic/references/styles/chalkboard.md
new file mode 100644
index 00000000000..96b91b43f7c
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/chalkboard.md
@@ -0,0 +1,61 @@
+# chalkboard
+
+Black chalkboard background with colorful chalk drawing style
+
+## Design Aesthetic
+
+Classic classroom chalkboard aesthetic with hand-drawn chalk illustrations. Nostalgic educational feel with imperfect, sketchy lines that capture the warmth of traditional teaching. Colorful chalk creates visual hierarchy while maintaining the authentic chalkboard experience.
+
+## Background
+
+- Color: Chalkboard Black (#1A1A1A) or Dark Green-Black (#1C2B1C)
+- Texture: Realistic chalkboard texture with subtle scratches, dust particles, and faint eraser marks
+
+## Typography
+
+Hand-drawn chalk lettering style with visible chalk texture. Imperfect baseline adds authenticity. White or bright colored chalk for emphasis.
+
+## Color Palette
+
+| Role | Color | Hex | Usage |
+|------|-------|-----|-------|
+| Background | Chalkboard Black | #1A1A1A | Primary background |
+| Alt Background | Green-Black | #1C2B1C | Traditional green board |
+| Primary Text | Chalk White | #F5F5F5 | Main text, outlines |
+| Accent 1 | Chalk Yellow | #FFE566 | Highlights, emphasis |
+| Accent 2 | Chalk Pink | #FF9999 | Secondary highlights |
+| Accent 3 | Chalk Blue | #66B3FF | Diagrams, links |
+| Accent 4 | Chalk Green | #90EE90 | Success, nature |
+| Accent 5 | Chalk Orange | #FFB366 | Warnings, energy |
+
+## Visual Elements
+
+- Hand-drawn chalk illustrations with sketchy, imperfect lines
+- Chalk dust effects around text and key elements
+- Doodles: stars, arrows, underlines, circles, checkmarks
+- Mathematical formulas and simple diagrams
+- Eraser smudges and chalk residue textures
+- Wooden frame border optional
+- Stick figures and simple icons
+- Connection lines with hand-drawn feel
+
+## Style Rules
+
+### Do
+
+- Maintain authentic chalk texture on all elements
+- Use imperfect, hand-drawn quality throughout
+- Add subtle chalk dust and smudge effects
+- Create visual hierarchy with color variety
+- Include playful doodles and annotations
+
+### Don't
+
+- Use perfect geometric shapes
+- Create clean digital-looking lines
+- Add photorealistic elements
+- Use gradients or glossy effects
+
+## Best For
+
+Educational content, tutorials, classroom themes, teaching materials, workshops, informal learning sessions, knowledge sharing
diff --git a/skills/creative/baoyu-infographic/references/styles/claymation.md b/skills/creative/baoyu-infographic/references/styles/claymation.md
new file mode 100644
index 00000000000..d9c2b7e5f0a
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/claymation.md
@@ -0,0 +1,29 @@
+# claymation
+
+3D clay figure aesthetic with stop-motion charm
+
+## Color Palette
+
+- Primary: Saturated clay colors - bright but slightly muted
+- Background: Neutral studio backdrop, soft gradients
+- Accents: Complementary clay colors, shiny highlights
+
+## Visual Elements
+
+- Clay/plasticine texture on all objects
+- Fingerprint marks and imperfections
+- Rounded, sculpted forms
+- Soft shadows
+- Stop-motion staging
+- Miniature set aesthetic
+
+## Typography
+
+- Extruded clay letters
+- Dimensional, rounded text
+- Playful and chunky
+- Embedded in clay scenes
+
+## Best For
+
+Playful explanations, children's content, stop-motion narratives, friendly processes
diff --git a/skills/creative/baoyu-infographic/references/styles/corporate-memphis.md b/skills/creative/baoyu-infographic/references/styles/corporate-memphis.md
new file mode 100644
index 00000000000..1e7d5874852
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/corporate-memphis.md
@@ -0,0 +1,29 @@
+# corporate-memphis
+
+Flat vector people with vibrant geometric fills
+
+## Color Palette
+
+- Primary: Bright, saturated - purple, orange, teal, yellow
+- Background: White or light pastels
+- Accents: Gradient fills, geometric patterns
+
+## Visual Elements
+
+- Flat vector illustration
+- Disproportionate human figures
+- Abstract body shapes
+- Floating geometric elements
+- No outlines, solid fills
+- Plant and object accents
+
+## Typography
+
+- Clean sans-serif
+- Bold headings
+- Professional but friendly
+- Minimal decoration
+
+## Best For
+
+Business presentations, tech products, marketing materials, corporate training
diff --git a/skills/creative/baoyu-infographic/references/styles/craft-handmade.md b/skills/creative/baoyu-infographic/references/styles/craft-handmade.md
new file mode 100644
index 00000000000..86354111a57
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/craft-handmade.md
@@ -0,0 +1,44 @@
+# craft-handmade (DEFAULT)
+
+Hand-drawn and paper craft aesthetic with warm, organic feel.
+
+## Color Palette
+
+- Primary: Warm pastels, soft saturated colors, craft paper tones
+- Background: Light cream (#FFF8F0), textured paper (#F5F0E6)
+- Accents: Bold highlights, construction paper colors
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Hand-drawn** | Cartoon illustration | Simple icons, slightly imperfect lines |
+| **Paper-cutout** | Layered paper craft | Drop shadows, torn edges, texture |
+
+## Visual Elements
+
+- Hand-drawn or cut-paper quality
+- Organic, slightly imperfect shapes
+- Layered depth with shadows (paper variant)
+- Simple cartoon elements and icons
+- Character illustrations (people, personalities in cartoon form)
+- Ample whitespace, clean composition
+- Keywords and core concepts highlighted
+- **Strictly hand-drawn—no realistic or photographic elements**
+
+## Style Enforcement
+
+- All imagery must maintain cartoon/illustrated aesthetic
+- Replace real photos or realistic figures with hand-drawn equivalents
+- Maintain consistent line weight and illustration style throughout
+
+## Typography
+
+- Hand-drawn or casual font style
+- Clear, readable labels
+- Keywords emphasized with larger/bolder text
+- Cut-out letter style for paper variant
+
+## Best For
+
+Educational content, general explanations, friendly infographics, children's content, playful hierarchies
diff --git a/skills/creative/baoyu-infographic/references/styles/cyberpunk-neon.md b/skills/creative/baoyu-infographic/references/styles/cyberpunk-neon.md
new file mode 100644
index 00000000000..5a8681355ef
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/cyberpunk-neon.md
@@ -0,0 +1,29 @@
+# cyberpunk-neon
+
+Neon glow on dark backgrounds, futuristic aesthetic
+
+## Color Palette
+
+- Primary: Neon pink (#FF00FF), cyan (#00FFFF), electric blue
+- Background: Deep black (#0A0A0A), dark purple gradients
+- Accents: Neon glow effects, chrome reflections
+
+## Visual Elements
+
+- Glowing neon outlines
+- Dark atmospheric backgrounds
+- Digital glitch effects
+- Circuit patterns
+- Holographic elements
+- Rain and reflections
+
+## Typography
+
+- Glowing neon text
+- Digital/tech fonts
+- Flickering effects
+- Outlined glow letters
+
+## Best For
+
+Tech futures, gaming content, digital culture, futuristic concepts, night aesthetics
diff --git a/skills/creative/baoyu-infographic/references/styles/hand-drawn-edu.md b/skills/creative/baoyu-infographic/references/styles/hand-drawn-edu.md
new file mode 100644
index 00000000000..64cdf7eb37b
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/hand-drawn-edu.md
@@ -0,0 +1,63 @@
+# hand-drawn-edu
+
+Hand-drawn educational infographic with macaron pastel color blocks on warm cream paper texture.
+
+## Color Palette
+
+- Background: Warm cream (#F5F0E8) with subtle paper grain texture
+- Primary text: Deep charcoal (#2D2D2D) for headlines, outlines
+- Macaron Blue: #A8D8EA for cool-toned information zones
+- Macaron Mint: #B5E5CF for growth/positive zones
+- Macaron Lavender: #D5C6E0 for abstract/concept zones
+- Macaron Peach: #FFD5C2 for warm-toned zones
+- Accent: Coral Red (#E8655A) for key data, warnings, emphasis
+- Muted annotations: Warm gray (#6B6B6B) for secondary labels
+
+## Visual Elements
+
+- Macaron pastel rounded cards as distinct information zones
+- Hand-drawn wavy connection lines and arrows with small text labels
+- Simple stick-figure characters and cartoon icons to humanize concepts
+- Doodle decorations: small stars, underlines, spirals, sparkles
+- Color fills don't completely fill outlines — preserve casual hand-drawn feel
+- Dashed borders for secondary or contained zones
+- Small icon doodles (clipboard, lock, checkmark, lightbulb) to reinforce concepts
+- Bold centered quote or takeaway at the bottom
+- Slight hand-drawn wobble on all lines and shapes
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Sketch-notes** | Concept mapping | More stick figures, thought bubbles, connecting arrows |
+| **Pastel cards** | Structured info | Cleaner macaron blocks, less doodle, more white space |
+
+## Typography
+
+- Main title: Bold hand-drawn lettering with organic strokes, large confident letterforms with slight wobble
+- Section headers: Hand-lettered text on or inside macaron color blocks
+- Body text: Clear handwritten print style, legible but not mechanical
+- Annotations: Warm gray (#6B6B6B), smaller, neat handwritten labels
+- Keywords: Bold emphasis within body text
+
+## Style Enforcement
+
+- All lines must have slight hand-drawn wobble — no perfect geometry
+- Each information zone uses a distinct macaron color block
+- Maintain consistent wobble quality across all shapes and lines
+- Include at least one simple cartoon character or stick figure
+- Generous white space between zones — each zone should breathe
+- Maximum 4 macaron colors per infographic
+
+## Avoid
+
+- Perfect geometric shapes or straight lines
+- Photorealistic elements or stock illustration style
+- Pure white backgrounds
+- Flat vector icons or digital-precision graphics
+- Overcrowded layouts — let zones breathe
+- Corporate or clinical aesthetic
+
+## Best For
+
+Educational diagrams, process explainers, concept maps, knowledge summaries, tutorial walkthroughs, onboarding visuals
diff --git a/skills/creative/baoyu-infographic/references/styles/ikea-manual.md b/skills/creative/baoyu-infographic/references/styles/ikea-manual.md
new file mode 100644
index 00000000000..d859828d143
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/ikea-manual.md
@@ -0,0 +1,29 @@
+# ikea-manual
+
+Minimal line art assembly instruction style
+
+## Color Palette
+
+- Primary: Black lines, minimal fills
+- Background: White or cream paper
+- Accents: Red for warnings, blue for highlights
+
+## Visual Elements
+
+- Simple line drawings
+- Numbered step sequences
+- Arrow indicators
+- Exploded assembly views
+- Wordless communication
+- Stick figures for scale
+
+## Typography
+
+- Minimal text
+- Step numbers prominent
+- Universal symbols
+- Simple sans-serif when needed
+
+## Best For
+
+Step-by-step instructions, assembly guides, how-to content, universal communication
diff --git a/skills/creative/baoyu-infographic/references/styles/kawaii.md b/skills/creative/baoyu-infographic/references/styles/kawaii.md
new file mode 100644
index 00000000000..a7531a614b6
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/kawaii.md
@@ -0,0 +1,29 @@
+# kawaii
+
+Japanese cute style with big eyes and pastel colors
+
+## Color Palette
+
+- Primary: Soft pastels - pink (#FFB6C1), mint (#98D8C8), lavender (#E6E6FA)
+- Background: Light pink or cream, sparkle overlays
+- Accents: Bright pops, star and heart shapes
+
+## Visual Elements
+
+- Big sparkly eyes on characters
+- Rounded, soft shapes
+- Blushing cheeks
+- Sparkles and stars scattered
+- Cute animal characters
+- Chibi proportions
+
+## Typography
+
+- Rounded, bubbly fonts
+- Cute decorations on letters
+- Hearts and stars in text
+- Soft, friendly appearance
+
+## Best For
+
+Cute tutorials, children's education, lifestyle content, character-driven explanations
diff --git a/skills/creative/baoyu-infographic/references/styles/knolling.md b/skills/creative/baoyu-infographic/references/styles/knolling.md
new file mode 100644
index 00000000000..cd55c99377c
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/knolling.md
@@ -0,0 +1,29 @@
+# knolling
+
+Organized flat-lay with top-down arrangement
+
+## Color Palette
+
+- Primary: Object's natural colors
+- Background: Solid color - black, white, or colored surface
+- Accents: Shadows, subtle highlights
+
+## Visual Elements
+
+- Top-down camera angle
+- Objects arranged at 90° angles
+- Equal spacing between items
+- Clean organization
+- Symmetry and order
+- No overlapping items
+
+## Typography
+
+- Clean labels
+- Positioned outside objects
+- Connecting lines to items
+- Minimal, catalog-style
+
+## Best For
+
+Product collections, tool inventories, gear layouts, organized overviews
diff --git a/skills/creative/baoyu-infographic/references/styles/lego-brick.md b/skills/creative/baoyu-infographic/references/styles/lego-brick.md
new file mode 100644
index 00000000000..582cc654aad
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/lego-brick.md
@@ -0,0 +1,29 @@
+# lego-brick
+
+Toy brick construction with playful aesthetic
+
+## Color Palette
+
+- Primary: Classic LEGO colors - red, blue, yellow, green, white
+- Background: Light gray baseplate or white
+- Accents: Bright primary pops, shiny studs
+
+## Visual Elements
+
+- Visible brick studs
+- Modular construction
+- Minifigure characters
+- Building instruction style
+- Stackable elements
+- Plastic sheen
+
+## Typography
+
+- Blocky, bold fonts
+- LEGO instruction style
+- Step numbers
+- Playful appearance
+
+## Best For
+
+Building concepts, modular systems, playful education, children's content
diff --git a/skills/creative/baoyu-infographic/references/styles/morandi-journal.md b/skills/creative/baoyu-infographic/references/styles/morandi-journal.md
new file mode 100644
index 00000000000..951f725b102
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/morandi-journal.md
@@ -0,0 +1,60 @@
+# morandi-journal
+
+Hand-drawn doodle illustration with warm Morandi color tones and cozy bullet journal aesthetic.
+
+## Color Palette
+
+- Background: Warm cream/beige with subtle paper texture (#F5F0E6)
+- Primary: Muted teal/sage green (#7BA3A8) for headers and frames
+- Secondary: Warm terracotta/orange (#D4956A) for highlights and numbers
+- Line art: Dark charcoal brown (#4A4540)
+- Soft highlights: Pale yellow (#F5E6C8)
+
+## Visual Elements
+
+- Hand-drawn doodle illustrations with organic, slightly imperfect ink lines
+- Washi tape strip decorations (diagonal stripes pattern, beige and brown)
+- Rounded card containers for brand/option items
+- Hand-drawn rulers, scales, and progress bars with emoji quality indicators
+- Smiley/frowny faces as quality markers (😊✓ 😐 ☹️✗)
+- Dotted line frames around sections
+- Connecting arrows and dotted lines between modules
+- Corner decorations: tiny houses, stars, sparkles, clouds
+- Wavy line dividers between sections
+- Callout bubbles for tips
+- Magnifying glass icons for identification tips
+- Thumbs up/down icons (hand-drawn style)
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Cozy journal** | Maximum warmth | More washi tape, stickers, decorative doodles |
+| **Clean sketch** | Readability | Cleaner lines, less decoration, more structured |
+
+## Typography
+
+- Main title: Bold hand-lettered calligraphy style with decorative flourishes
+- Module headers: Clean handwritten text in white on dark teal rounded badge (#6B9080)
+- Body text: Neat handwritten print style, easy to read
+- Numbers: Highlighted in terracotta (#D4956A), slightly larger than body
+
+## Style Enforcement
+
+- All imagery must maintain hand-drawn/doodle aesthetic—no digital precision
+- Organic, slightly imperfect shapes throughout
+- Sketch-like quality with visible line weight variations
+- Warm and cozy journal feel, not clinical or corporate
+
+## Avoid
+
+- Flat vector icons or emoji
+- Clean geometric shapes
+- Stock illustration style
+- Strict grid layout
+- Pure white background
+- Digital/corporate look
+
+## Best For
+
+Product selection guides, lifestyle content, educational overviews, consumer-facing comparison content, Xiaohongshu-style posts
diff --git a/skills/creative/baoyu-infographic/references/styles/origami.md b/skills/creative/baoyu-infographic/references/styles/origami.md
new file mode 100644
index 00000000000..7a0bf5e2201
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/origami.md
@@ -0,0 +1,29 @@
+# origami
+
+Folded paper forms with geometric precision
+
+## Color Palette
+
+- Primary: Solid origami paper colors - red, blue, green, gold
+- Background: White or soft gray, subtle shadows
+- Accents: Paper fold highlights, crisp shadows
+
+## Visual Elements
+
+- Geometric folded shapes
+- Visible fold lines
+- Cast shadows showing depth
+- Paper texture
+- Angular, faceted forms
+- Low-poly aesthetic
+
+## Typography
+
+- Clean geometric fonts
+- Angular letterforms
+- Folded paper text effect
+- Minimal, precise labels
+
+## Best For
+
+Geometric concepts, transformation topics, Japanese themes, abstract representations
diff --git a/skills/creative/baoyu-infographic/references/styles/pixel-art.md b/skills/creative/baoyu-infographic/references/styles/pixel-art.md
new file mode 100644
index 00000000000..1fab4184876
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/pixel-art.md
@@ -0,0 +1,29 @@
+# pixel-art
+
+Retro 8-bit gaming aesthetic
+
+## Color Palette
+
+- Primary: Limited palette - NES/SNES colors
+- Background: Black or dark blue, scanlines optional
+- Accents: Bright pixel highlights, CRT glow
+
+## Visual Elements
+
+- Visible pixel grid
+- Limited color count per sprite
+- 8-bit or 16-bit style
+- Retro game UI elements
+- Pixel-perfect edges
+- Dithering for gradients
+
+## Typography
+
+- Pixel fonts
+- Blocky letterforms
+- Game UI style text
+- Score/stat display style
+
+## Best For
+
+Gaming topics, nostalgia content, developer audiences, retro tech themes
diff --git a/skills/creative/baoyu-infographic/references/styles/pop-laboratory.md b/skills/creative/baoyu-infographic/references/styles/pop-laboratory.md
new file mode 100644
index 00000000000..f53014dd96a
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/pop-laboratory.md
@@ -0,0 +1,48 @@
+# pop-laboratory
+
+Lab manual precision meets pop art color impact—coordinate systems, technical diagrams, and fluorescent accents on blueprint grid.
+
+## Color Palette
+
+- Background: Professional grayish-white with faint blueprint grid texture (#F2F2F2)
+- Primary: Muted teal/sage green (#B8D8BE) for major functional blocks and data zones
+- High-alert accent: Vibrant fluorescent pink (#E91E63) strictly for warnings, critical data, or "winner" highlights
+- Marker highlights: Vivid lemon yellow (#FFF200) as translucent highlighter effect for keywords
+- Line art: Ultra-fine charcoal brown (#2D2926) for technical grids, coordinates, and hairlines
+
+## Visual Elements
+
+- Coordinate-style labels on every module (e.g., R-20, G-02, SEC-08)
+- Technical diagrams: exploded views, cross-sections with anchor points, architectural skeletal lines
+- Vertical/horizontal rulers with precise markers (0.5mm, 1.8mm, 45°)
+- "Marker-over-print" effect: color blocks slightly offset from text, postmodern print feel
+- Cross-hair targets, mathematical symbols (Σ, Δ, ∞), directional arrows (X/Y axis)
+- Microscopic detail annotations alongside macroscopic bold headers
+- Corner metadata: tiny barcodes, timestamps, technical parameters
+- High contrast between massive bold headers and tiny 8pt-style annotations
+
+## Typography
+
+- Headers: Bold brutalist characters, high visual impact
+- Body: Professional sans-serif or crisp technical print
+- Numbers: Large, highlighted with yellow or blue to stand out
+- Annotations: Ultra-crisp, small technical labels
+
+## Style Enforcement
+
+- Strictly systematic color usage: only teal, pink, yellow, charcoal—no rainbow palette
+- Sufficient fine grid lines and coordinate annotations throughout
+- Maintain tension between large impactful headers and small precise parameters
+- Lab manual aesthetic: mix of microscopic details and macroscopic data
+
+## Avoid
+
+- Cute or cartoonish doodles
+- Soft pastels or generic textures
+- Empty white space
+- Flat vector stock icons
+- Organic or hand-drawn imperfections
+
+## Best For
+
+Technical product guides, specification comparisons, precision-focused data visualization, engineering-adjacent content
diff --git a/skills/creative/baoyu-infographic/references/styles/retro-pop-grid.md b/skills/creative/baoyu-infographic/references/styles/retro-pop-grid.md
new file mode 100644
index 00000000000..08c34d358bf
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/retro-pop-grid.md
@@ -0,0 +1,47 @@
+# retro-pop-grid
+
+1970s retro pop art with strict Swiss international grid, thick black outlines, and flat color blocks.
+
+## Color Palette
+
+- Background: Warm vintage cream/beige (#F5F0E6)
+- Flat accents: Salmon pink, sky blue, mustard yellow, mint green—all muted retro tones
+- Contrast blocks: Solid pure black (#000000) and solid pure white (#FFFFFF) used strategically for extreme contrast
+- Line art and outlines: Solid thick black
+
+## Visual Elements
+
+- Uniform thick black outlines on all illustrations, text boxes, and grid dividers
+- Pure 2D flat vector aesthetic with subtle screen print texture
+- Strict Swiss international grid: poster divided into square and rectangular cells by thick black lines
+- Black-background cells with white text for warnings or key categories (inverted contrast)
+- Geometric fill patterns in empty cells: checkerboards, diagonal lines, dots
+- Flat abstract symbols, warning signs, keyholes, stars, arrows
+- Vintage comic-style smiley/frowny faces for quality indicators
+- Colored cells used for breathing room—some with minimal/no content
+
+## Typography
+
+- Headers: Bold brutalist or retro thick display fonts, high legibility
+- Body: Clean sans-serif, structured typographic alignment
+- Decorative English text acceptable for stylistic labels ("WARNING", "INFO", "BEST")
+- All content text in specified language
+
+## Style Enforcement
+
+- Absolutely no gradients, shading, drop shadows, or 3D effects
+- Everything anchored in grid cells—no floating or unorganized elements
+- Maintain 1970s retro pop art and underground comic illustration feel
+- Visual density balanced with rhythmic grid—some cells intentionally sparse for contrast
+
+## Avoid
+
+- 3D rendering, realistic details, gradients, soft shadows
+- Soft, thin, or sketch-like pencil lines
+- Free-flowing, unorganized, or floating layouts (everything must be grid-anchored)
+- Pure white background canvas
+- Organic or hand-drawn imperfections
+
+## Best For
+
+Trendy product guides, design-conscious content, visually striking comparisons, content targeting design-savvy audiences, bold social media posts
diff --git a/skills/creative/baoyu-infographic/references/styles/storybook-watercolor.md b/skills/creative/baoyu-infographic/references/styles/storybook-watercolor.md
new file mode 100644
index 00000000000..01828f3baad
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/storybook-watercolor.md
@@ -0,0 +1,29 @@
+# storybook-watercolor
+
+Soft hand-painted illustration with whimsical charm
+
+## Color Palette
+
+- Primary: Soft watercolor washes - muted blues, greens, warm earth
+- Background: Watercolor paper texture, white or cream
+- Accents: Deeper pigment pools, splatter effects
+
+## Visual Elements
+
+- Visible brushstrokes
+- Soft color bleeds and gradients
+- White space as design element
+- Delicate line work over washes
+- Natural, organic shapes
+- Dreamy, atmospheric quality
+
+## Typography
+
+- Elegant hand-lettering
+- Watercolor-style text
+- Flowing, organic letterforms
+- Integrated with illustrations
+
+## Best For
+
+Storytelling, emotional journeys, nature topics, children's education, artistic presentations
diff --git a/skills/creative/baoyu-infographic/references/styles/subway-map.md b/skills/creative/baoyu-infographic/references/styles/subway-map.md
new file mode 100644
index 00000000000..de908576474
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/subway-map.md
@@ -0,0 +1,29 @@
+# subway-map
+
+Transit diagram style with colored lines and stations
+
+## Color Palette
+
+- Primary: Transit line colors - red, blue, green, yellow, orange
+- Background: White or light gray
+- Accents: Station dots, interchange markers
+
+## Visual Elements
+
+- Colored route lines
+- 45° and 90° angles only
+- Station circle markers
+- Interchange symbols
+- Simplified geography
+- Line thickness hierarchy
+
+## Typography
+
+- Clean sans-serif
+- Station name labels
+- Line number/name badges
+- Horizontal or angled text
+
+## Best For
+
+Journey maps, process flows, network diagrams, route explanations
diff --git a/skills/creative/baoyu-infographic/references/styles/technical-schematic.md b/skills/creative/baoyu-infographic/references/styles/technical-schematic.md
new file mode 100644
index 00000000000..5de34c576e9
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/technical-schematic.md
@@ -0,0 +1,36 @@
+# technical-schematic
+
+Technical diagrams with engineering precision and clean geometry.
+
+## Color Palette
+
+- Primary: Blues (#2563EB), teals, grays, white lines
+- Background: Deep blue (#1E3A5F), white, or light gray with grid
+- Accents: Amber highlights (#F59E0B), cyan callouts
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Blueprint** | Engineering schematics | White on blue, measurements, grid |
+| **Isometric** | 3D spatial representation | 30° angle blocks, clean fills |
+
+## Visual Elements
+
+- Geometric precision throughout
+- Grid pattern or isometric angle
+- Dimension lines and measurements
+- Technical symbols and annotations
+- Clean vector shapes
+- Consistent stroke weights
+
+## Typography
+
+- Technical stencil or clean sans-serif
+- All-caps labels
+- Measurement annotations
+- Floating labels for isometric
+
+## Best For
+
+Technical architecture, system diagrams, engineering specs, product breakdowns, data visualization
diff --git a/skills/creative/baoyu-infographic/references/styles/ui-wireframe.md b/skills/creative/baoyu-infographic/references/styles/ui-wireframe.md
new file mode 100644
index 00000000000..397a436ff75
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/ui-wireframe.md
@@ -0,0 +1,29 @@
+# ui-wireframe
+
+Grayscale interface mockup style
+
+## Color Palette
+
+- Primary: Grays - light (#E5E5E5), medium (#9CA3AF), dark (#374151)
+- Background: White (#FFFFFF), light gray
+- Accents: Blue for interactive (#3B82F6), red for emphasis
+
+## Visual Elements
+
+- Wireframe boxes and placeholders
+- X marks for image placeholders
+- Simple line icons
+- Grid-based layout
+- Annotation callouts
+- Redline specifications
+
+## Typography
+
+- System fonts
+- Placeholder "Lorem ipsum"
+- UI label style
+- Sans-serif throughout
+
+## Best For
+
+Product designs, UI explanations, app concepts, user flow diagrams

From ade7958f1f53f5afe4c1fbd6042b1888bed5a0ee Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 14 Apr 2026 22:48:49 -0700
Subject: [PATCH 047/143] docs: add PORT_NOTES.md for baoyu-infographic

Documents what changed from upstream and how to sync future updates.
---
 .../creative/baoyu-infographic/PORT_NOTES.md  | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 skills/creative/baoyu-infographic/PORT_NOTES.md

diff --git a/skills/creative/baoyu-infographic/PORT_NOTES.md b/skills/creative/baoyu-infographic/PORT_NOTES.md
new file mode 100644
index 00000000000..0a2d86d89ca
--- /dev/null
+++ b/skills/creative/baoyu-infographic/PORT_NOTES.md
@@ -0,0 +1,43 @@
+# Port Notes — baoyu-infographic
+
+Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56.1.
+
+## Changes from upstream
+
+Only `SKILL.md` was modified. All 45 reference files are verbatim copies.
+
+### SKILL.md adaptations
+
+| Change | Upstream | Hermes |
+|--------|----------|--------|
+| Metadata namespace | `openclaw` | `hermes` |
+| Trigger | `/baoyu-infographic` slash command | Natural language skill matching |
+| User config | EXTEND.md file (project/user/XDG paths) | Removed — not part of Hermes infra |
+| User prompts | `AskUserQuestion` (batched) | `clarify` tool (one at a time) |
+| Image generation | baoyu-imagine (Bun/TypeScript) | `image_generate` tool |
+| Platform support | Linux/macOS/Windows/WSL/PowerShell | Linux/macOS only |
+| File operations | Bash commands | Hermes file tools (write_file, read_file) |
+
+### What was preserved
+
+- All layout definitions (21 files)
+- All style definitions (21 files)
+- Core reference files (analysis-framework, base-prompt, structured-content-template)
+- Recommended combinations table
+- Keyword shortcuts table
+- Core principles and workflow structure
+- Author, version, homepage attribution
+
+## Syncing with upstream
+
+To pull upstream updates:
+```bash
+# Compare versions
+curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-infographic/SKILL.md | head -5
+# Look for version: line
+
+# Diff reference files
+diff <(curl -sL https://raw.githubusercontent.com/.../references/layouts/bento-grid.md) references/layouts/bento-grid.md
+```
+
+Reference files can be overwritten directly (they're unchanged from upstream). SKILL.md must be manually merged since it contains Hermes-specific adaptations.

From b73ebfee302a59a5892b0e521fd7eb8b6ae1d7ba Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sat, 18 Apr 2026 12:27:22 -0700
Subject: [PATCH 048/143] chore(attribution): add AUTHOR_MAP entry for Jim Liu
 (JimLiu)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Maps junminliu@gmail.com → JimLiu for the baoyu-infographic skill port
co-author attribution.
---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 372a4802ba7..bc0496d863b 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -264,6 +264,7 @@ AUTHOR_MAP = {
     "asurla@nvidia.com": "anniesurla",
     "limkuan24@gmail.com": "WideLee",
     "aviralarora002@gmail.com": "AviArora02-commits",
+    "junminliu@gmail.com": "JimLiu",
 }
 
 

From a7f4d756b7048e4ece779ad44737ef060716b1b1 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 14:36:34 -0500
Subject: [PATCH 049/143] fix(tui): cap approval prompt command preview at 10
 lines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Large inline scripts (e.g. Python code_execution bodies) rendered as a single
unbounded <Text> block, pushing the Allow/Deny options below the visible
viewport. Users had to scroll the terminal to vote.

Preview now shows the first 10 lines with truncate-end wrap per line and a
dim "… +N more lines" indicator. Full text remains in the transcript above.
---
 ui-tui/src/components/prompts.tsx | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/ui-tui/src/components/prompts.tsx b/ui-tui/src/components/prompts.tsx
index 98aba0789b5..bfc603c51c6 100644
--- a/ui-tui/src/components/prompts.tsx
+++ b/ui-tui/src/components/prompts.tsx
@@ -8,6 +8,7 @@ import { TextInput } from './textInput.js'
 
 const OPTS = ['once', 'session', 'always', 'deny'] as const
 const LABELS = { always: 'Always allow', deny: 'Deny', once: 'Allow once', session: 'Allow this session' } as const
+const CMD_PREVIEW_LINES = 10
 
 export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) {
   const [sel, setSel] = useState(0)
@@ -34,13 +35,28 @@ export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) {
     }
   })
 
+  const rawLines = req.command.split('\n')
+  const shown = rawLines.slice(0, CMD_PREVIEW_LINES)
+  const overflow = rawLines.length - shown.length
+
   return (
     <Box borderColor={t.color.warn} borderStyle="double" flexDirection="column" paddingX={1}>
       <Text bold color={t.color.warn}>
         ⚠ approval required · {req.description}
       </Text>
 
-      <Text color={t.color.cornsilk}> {req.command}</Text>
+      <Box flexDirection="column" paddingLeft={1}>
+        {shown.map((line, i) => (
+          <Text color={t.color.cornsilk} key={i} wrap="truncate-end">
+            {line || ' '}
+          </Text>
+        ))}
+
+        {overflow > 0 ? (
+          <Text color={t.color.dim}>… +{overflow} more line{overflow === 1 ? '' : 's'} (full text above)</Text>
+        ) : null}
+      </Box>
+
       <Text />
 
       {OPTS.map((o, i) => (

From 5c8b291607e23eb11d5df70f560490bdd0b3dd6e Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 14:39:24 -0500
Subject: [PATCH 050/143] fix(tui): wrap markdown links in Link so
 Ghostty/iTerm/kitty get real OSC 8 hyperlinks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

renderLink was discarding the URL entirely — it rendered the label as amber
underlined text and dropped the href. Result: Cmd+Click / Ctrl+Click did
nothing in any terminal, including Ghostty.

Now both markdown links `[label](url)` and bare `https://…` URLs are wrapped
in @hermes/ink's Link component, which emits OSC 8 (\\x1b]8;;url\\x07label\\x1b]8;;\\x07)
when supportsHyperlinks() returns true. ADDITIONAL_HYPERLINK_TERMINALS already
includes ghostty, iTerm2, kitty, alacritty, Hyper.

Autolinks that look like bare emails (foo@bar.com) now prepend mailto: in the
href so they open the mail client correctly.

Also adds a typed declaration for Link in hermes-ink.d.ts.
---
 ui-tui/src/components/markdown.tsx | 30 +++++++++++++++++++-----------
 ui-tui/src/types/hermes-ink.d.ts   |  5 +++++
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx
index d43357b6918..5e1063837b9 100644
--- a/ui-tui/src/components/markdown.tsx
+++ b/ui-tui/src/components/markdown.tsx
@@ -1,4 +1,4 @@
-import { Box, Text } from '@hermes/ink'
+import { Box, Link, Text } from '@hermes/ink'
 import { memo, type ReactNode, useMemo } from 'react'
 
 import { highlightLine, isHighlightable } from '../lib/syntax.js'
@@ -23,10 +23,12 @@ type Fence = {
   len: number
 }
 
-const renderLink = (key: number, t: Theme, label: string) => (
-  <Text color={t.color.amber} key={key} underline>
-    {label}
-  </Text>
+const renderLink = (key: number, t: Theme, label: string, url: string) => (
+  <Link key={key} url={url}>
+    <Text color={t.color.amber} underline>
+      {label}
+    </Text>
+  </Link>
 )
 
 const trimBareUrl = (value: string) => {
@@ -38,11 +40,17 @@ const trimBareUrl = (value: string) => {
   }
 }
 
-const renderAutolink = (key: number, t: Theme, raw: string) => (
-  <Text color={t.color.amber} key={key} underline>
-    {raw.replace(/^mailto:/, '')}
-  </Text>
-)
+const renderAutolink = (key: number, t: Theme, raw: string) => {
+  const url = raw.startsWith('mailto:') ? raw : raw.includes('@') && !raw.startsWith('http') ? `mailto:${raw}` : raw
+
+  return (
+    <Link key={key} url={url}>
+      <Text color={t.color.amber} underline>
+        {raw.replace(/^mailto:/, '')}
+      </Text>
+    </Link>
+  )
+}
 
 const indentDepth = (indent: string) => Math.floor(indent.replace(/\t/g, '  ').length / 2)
 
@@ -142,7 +150,7 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
         </Text>
       )
     } else if (m[4] && m[5]) {
-      parts.push(renderLink(parts.length, t, m[4]))
+      parts.push(renderLink(parts.length, t, m[4], m[5]))
     } else if (m[6]) {
       parts.push(renderAutolink(parts.length, t, m[6]))
     } else if (m[7]) {
diff --git a/ui-tui/src/types/hermes-ink.d.ts b/ui-tui/src/types/hermes-ink.d.ts
index 9b2deec35ff..051451d4196 100644
--- a/ui-tui/src/types/hermes-ink.d.ts
+++ b/ui-tui/src/types/hermes-ink.d.ts
@@ -63,6 +63,11 @@ declare module '@hermes/ink' {
   export const Box: React.ComponentType<any>
   export const AlternateScreen: React.ComponentType<any>
   export const Ansi: React.ComponentType<any>
+  export const Link: React.ComponentType<{
+    readonly children?: React.ReactNode
+    readonly fallback?: React.ReactNode
+    readonly url: string
+  }>
   export const NoSelect: React.ComponentType<any>
   export const ScrollBox: React.ComponentType<any>
   export const Text: React.ComponentType<any>

From 3128d9fcd24ff06b1cb9e7bb7f300d7be4054d05 Mon Sep 17 00:00:00 2001
From: Honghua Yang <2500400+honghua@users.noreply.github.com>
Date: Fri, 17 Apr 2026 13:32:10 -0700
Subject: [PATCH 051/143] fix(context_compressor): keep tool-call arguments
 JSON valid when shrinking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pass 3 of `_prune_old_tool_results` previously shrunk long `function.arguments`
blobs by slicing the raw JSON string at byte 200 and appending the literal
text `...[truncated]`. That routinely produced payloads like::

    {"path": "/foo.md", "content": "# Long markdown
    ...[truncated]

— an unterminated string with no closing brace. Strict providers (observed
on MiniMax) reject this as `invalid function arguments json string` with a
non-retryable 400. Because the broken call survives in the session history,
every subsequent turn re-sends the same malformed payload and gets the same
400, locking the session into a re-send loop until the call falls out of
the window.

Fix: parse the arguments first, shrink long string leaves inside the parsed
structure, and re-serialise. Non-string values (paths, ints, booleans, lists)
pass through intact. Arguments that are not valid JSON to begin with (rare,
some backends use non-JSON tool args) are returned unchanged rather than
replaced with something neither we nor the provider can parse.

Observed in the wild: a `write_file` with ~800 chars of markdown `content`
triggered this on a real session against MiniMax-M2.7; every turn after
compression got rejected until the session was manually reset.

Tests:
- 7 direct tests of `_truncate_tool_call_args_json` covering valid-JSON
  output, non-JSON pass-through, nested structures, non-string leaves,
  scalar JSON, and Unicode preservation
- 1 end-to-end test through `_prune_old_tool_results` Pass 3 that
  reproduces the exact failure payload shape from the incident

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 agent/context_compressor.py            |  57 +++++++++++-
 tests/agent/test_context_compressor.py | 124 +++++++++++++++++++++++++
 2 files changed, 179 insertions(+), 2 deletions(-)

diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 34ec5091b1c..ae8c2c0bd31 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -63,6 +63,52 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
 
 
+def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
+    """Shrink long string values inside a tool-call arguments JSON blob while
+    preserving JSON validity.
+
+    The ``function.arguments`` field on a tool call is a JSON-encoded string
+    passed through to the LLM provider; downstream providers strictly
+    validate it and return a non-retryable 400 when it is not well-formed.
+    An earlier implementation sliced the raw JSON at a fixed byte offset and
+    appended ``...[truncated]`` — which routinely produced strings like::
+
+        {"path": "/foo/bar", "content": "# long markdown
+        ...[truncated]
+
+    i.e. an unterminated string and a missing closing brace. MiniMax, for
+    example, rejects this with ``invalid function arguments json string``
+    and the session gets stuck re-sending the same broken history on every
+    turn. See issue #11762 for the observed loop.
+
+    This helper parses the arguments, shrinks long string leaves inside the
+    parsed structure, and re-serialises. Non-string values (paths, ints,
+    booleans) are preserved intact. If the arguments are not valid JSON
+    to begin with — some model backends use non-JSON tool arguments — the
+    original string is returned unchanged rather than replaced with
+    something neither we nor the backend can parse.
+    """
+    try:
+        parsed = json.loads(args)
+    except (ValueError, TypeError):
+        return args
+
+    def _shrink(obj: Any) -> Any:
+        if isinstance(obj, str):
+            if len(obj) > head_chars:
+                return obj[:head_chars] + "...[truncated]"
+            return obj
+        if isinstance(obj, dict):
+            return {k: _shrink(v) for k, v in obj.items()}
+        if isinstance(obj, list):
+            return [_shrink(v) for v in obj]
+        return obj
+
+    shrunken = _shrink(parsed)
+    # ensure_ascii=False preserves CJK/emoji instead of bloating with \uXXXX
+    return json.dumps(shrunken, ensure_ascii=False)
+
+
 def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
     """Create an informative 1-line summary of a tool call + result.
 
@@ -449,6 +495,11 @@ class ContextCompressor(ContextEngine):
         # Pass 3: Truncate large tool_call arguments in assistant messages
         # outside the protected tail. write_file with 50KB content, for
         # example, survives pruning entirely without this.
+        #
+        # The shrinking is done inside the parsed JSON structure so the
+        # result remains valid JSON — otherwise downstream providers 400
+        # on every subsequent turn until the broken call falls out of
+        # the window. See ``_truncate_tool_call_args_json`` docstring.
         for i in range(prune_boundary):
             msg = result[i]
             if msg.get("role") != "assistant" or not msg.get("tool_calls"):
@@ -459,8 +510,10 @@ class ContextCompressor(ContextEngine):
                 if isinstance(tc, dict):
                     args = tc.get("function", {}).get("arguments", "")
                     if len(args) > 500:
-                        tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
-                        modified = True
+                        new_args = _truncate_tool_call_args_json(args)
+                        if new_args != args:
+                            tc = {**tc, "function": {**tc["function"], "arguments": new_args}}
+                            modified = True
                 new_tcs.append(tc)
             if modified:
                 result[i] = {**msg, "tool_calls": new_tcs}
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 6164d812f6b..0c20dddcd7c 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -781,3 +781,127 @@ class TestTokenBudgetTailProtection:
         # Tool at index 2 is outside the protected tail (last 3 = indices 2,3,4)
         # so it might or might not be pruned depending on boundary
         assert isinstance(pruned, int)
+
+
+class TestTruncateToolCallArgsJson:
+    """Regression tests for #11762.
+
+    The previous implementation produced invalid JSON by slicing
+    ``function.arguments`` mid-string, which caused non-retryable 400s from
+    strict providers (observed on MiniMax) and stuck long sessions in a
+    re-send loop. The helper here must always emit parseable JSON whose
+    shape matches the original — shrunken, not corrupted.
+    """
+
+    def _helper(self):
+        from agent.context_compressor import _truncate_tool_call_args_json
+        return _truncate_tool_call_args_json
+
+    def test_shrunken_args_remain_valid_json(self):
+        import json as _json
+        shrink = self._helper()
+        original = _json.dumps({
+            "path": "~/.hermes/skills/shopping/browser-setup-notes.md",
+            "content": "# Shopping Browser Setup Notes\n\n" + "abc " * 400,
+        })
+        assert len(original) > 500
+        shrunk = shrink(original)
+        parsed = _json.loads(shrunk)  # must not raise
+        assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
+        assert parsed["content"].endswith("...[truncated]")
+        assert len(shrunk) < len(original)
+
+    def test_non_json_arguments_pass_through(self):
+        shrink = self._helper()
+        not_json = "this is not json at all, " * 50
+        assert shrink(not_json) == not_json
+
+    def test_short_string_leaves_unchanged(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps({"command": "ls -la", "cwd": "/tmp"})
+        assert _json.loads(shrink(payload)) == {"command": "ls -la", "cwd": "/tmp"}
+
+    def test_nested_structures_are_walked(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps({
+            "messages": [
+                {"role": "user", "content": "x" * 500},
+                {"role": "assistant", "content": "ok"},
+            ],
+            "meta": {"note": "y" * 500},
+        })
+        parsed = _json.loads(shrink(payload))
+        assert parsed["messages"][0]["content"].endswith("...[truncated]")
+        assert parsed["messages"][1]["content"] == "ok"
+        assert parsed["meta"]["note"].endswith("...[truncated]")
+
+    def test_non_string_leaves_preserved(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps({
+            "retries": 3,
+            "enabled": True,
+            "timeout": None,
+            "items": [1, 2, 3],
+            "note": "z" * 500,
+        })
+        parsed = _json.loads(shrink(payload))
+        assert parsed["retries"] == 3
+        assert parsed["enabled"] is True
+        assert parsed["timeout"] is None
+        assert parsed["items"] == [1, 2, 3]
+        assert parsed["note"].endswith("...[truncated]")
+
+    def test_scalar_json_string_gets_shrunk(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps("q" * 500)
+        parsed = _json.loads(shrink(payload))
+        assert isinstance(parsed, str)
+        assert parsed.endswith("...[truncated]")
+
+    def test_unicode_preserved(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps({"content": "非德满" + ("a" * 500)})
+        out = shrink(payload)
+        # ensure_ascii=False keeps CJK intact rather than emitting \uXXXX
+        assert "非德满" in out
+
+    def test_pass3_emits_valid_json_for_downstream_provider(self):
+        """End-to-end: Pass 3 must never produce the exact failure payload
+        that caused the 400 loop (unterminated string, missing brace)."""
+        import json as _json
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(
+                model="test/model",
+                threshold_percent=0.85,
+                protect_first_n=1,
+                protect_last_n=1,
+                quiet_mode=True,
+            )
+        huge_content = "# Shopping Browser Setup Notes\n\n## Overview\n" + "x " * 400
+        args_payload = _json.dumps({
+            "path": "~/.hermes/skills/shopping/browser-setup-notes.md",
+            "content": huge_content,
+        })
+        assert len(args_payload) > 500  # triggers the Pass-3 shrink
+        messages = [
+            {"role": "user", "content": "please write two files"},
+            {"role": "assistant", "content": None, "tool_calls": [
+                {"id": "call_1", "type": "function",
+                 "function": {"name": "write_file", "arguments": args_payload}},
+            ]},
+            {"role": "tool", "tool_call_id": "call_1",
+             "content": '{"bytes_written": 727}'},
+            {"role": "user", "content": "ok"},
+            {"role": "assistant", "content": "done"},
+        ]
+        result, _ = c._prune_old_tool_results(messages, protect_tail_count=2)
+        shrunk = result[1]["tool_calls"][0]["function"]["arguments"]
+        # Must parse — otherwise downstream provider returns 400
+        parsed = _json.loads(shrunk)
+        assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
+        assert parsed["content"].endswith("...[truncated]")

From 0bebf5b948b1a64030da56d7ad0f9ec9cb875981 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sat, 18 Apr 2026 12:35:00 -0700
Subject: [PATCH 052/143] chore(attribution): add AUTHOR_MAP entry for Honghua
 Yang (honghua)

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index bc0496d863b..4c32dccfdb7 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -74,6 +74,7 @@ AUTHOR_MAP = {
     "109555139+davetist@users.noreply.github.com": "davetist",
     "39405770+yyq4193@users.noreply.github.com": "yyq4193",
     "Asunfly@users.noreply.github.com": "Asunfly",
+    "2500400+honghua@users.noreply.github.com": "honghua",
     # contributors (manual mapping from git names)
     "ahmedsherif95@gmail.com": "asheriif",
     "liujinkun@bytedance.com": "liujinkun2025",

From 37cba82bfcf84a64be145fcc7ee9d69a8867dc5d Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 14:42:03 -0500
Subject: [PATCH 053/143] fix(tui): Ctrl+C on in-input selection copies to
 clipboard instead of clearing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before: textInput explicitly ignored Ctrl+C so the app-level handler took
over — with no knowledge of the TextInput's own selection — and fell through
to clearIn() whenever input had text. Selecting part of the composer and
pressing Ctrl+C silently nuked everything you typed.

Now: Ctrl+C with an active in-input selection writes the selected substring
to the clipboard via OSC 52 and clears the selection. The original semantics
(Ctrl+C with no selection → app-level interrupt/clear/die chain) are
preserved by still returning early in that case.
---
 ui-tui/src/components/textInput.tsx | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index 6503da4dbff..a0f7c42f3b0 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -2,6 +2,8 @@ import type { InputEvent, Key } from '@hermes/ink'
 import * as Ink from '@hermes/ink'
 import { useEffect, useMemo, useRef, useState } from 'react'
 
+import { writeOsc52Clipboard } from '../lib/osc52.js'
+
 type InkExt = typeof Ink & {
   stringWidth: (s: string) => number
   useDeclaredCursor: (a: { line: number; column: number; active: boolean }) => (el: any) => void
@@ -468,10 +470,22 @@ export function TextInput({
         return void emitPaste({ cursor: curRef.current, hotkey: true, text: '', value: vRef.current })
       }
 
+      if (k.ctrl && inp === 'c') {
+        const range = selRange()
+
+        if (range) {
+          writeOsc52Clipboard(vRef.current.slice(range.start, range.end))
+          clearSel()
+
+          return
+        }
+
+        return
+      }
+
       if (
         k.upArrow ||
         k.downArrow ||
-        (k.ctrl && inp === 'c') ||
         k.tab ||
         (k.shift && k.tab) ||
         k.pageUp ||

From 4caf6c23dd8233effe9d39e65fb7160553f917c4 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 14:46:38 -0500
Subject: [PATCH 054/143] =?UTF-8?q?fix(tui):=20strip=20<think>=E2=80=A6</t?=
 =?UTF-8?q?hink>=20tags=20from=20assistant=20content=20and=20route=20to=20?=
 =?UTF-8?q?reasoning=20panel?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Models that emit reasoning inline as <think>/<reasoning>/<thinking>/<thought>/
<REASONING_SCRATCHPAD> tags in the content field (rather than a separate API
reasoning channel) had the raw tags + inner content shown twice: once as body
text with literal <think> markers, and again in the thinking panel when the
reasoning field was populated.

Port v1's tag set to lib/reasoning.ts with a splitReasoning(text) helper that
returns { reasoning, text }. Applied in three spots:

  - scheduleStreaming: strips tags from the live streaming view so the user
    never sees <think> mid-turn.
  - flushStreamingSegment: when a tool interrupts assistant output mid-turn,
    the saved segment is the stripped text; extracted reasoning promotes to
    reasoningText if the API channel hasn't already populated it.
  - recordMessageComplete: final message text is split, extracted reasoning
    merges with any existing reasoning (API channel wins on conflicts so we
    don't double-count when both are present).
---
 ui-tui/src/__tests__/reasoning.test.ts | 50 ++++++++++++++++++++++++++
 ui-tui/src/app/turnController.ts       | 35 +++++++++++++-----
 ui-tui/src/lib/reasoning.ts            | 50 ++++++++++++++++++++++++++
 3 files changed, 127 insertions(+), 8 deletions(-)
 create mode 100644 ui-tui/src/__tests__/reasoning.test.ts
 create mode 100644 ui-tui/src/lib/reasoning.ts

diff --git a/ui-tui/src/__tests__/reasoning.test.ts b/ui-tui/src/__tests__/reasoning.test.ts
new file mode 100644
index 00000000000..c961ea7a0c2
--- /dev/null
+++ b/ui-tui/src/__tests__/reasoning.test.ts
@@ -0,0 +1,50 @@
+import { describe, expect, it } from 'vitest'
+
+import { hasReasoningTag, splitReasoning } from '../lib/reasoning.js'
+
+describe('splitReasoning', () => {
+  it('extracts <think>…</think> and strips it from text', () => {
+    const { reasoning, text } = splitReasoning('<think>plotting</think>\n\nhere is the answer')
+
+    expect(reasoning).toBe('plotting')
+    expect(text).toBe('here is the answer')
+  })
+
+  it('handles multiple tag shapes', () => {
+    const input = '<reasoning>a</reasoning> <THINKING>b</THINKING> <thought>c</thought> body'
+    const { reasoning, text } = splitReasoning(input)
+
+    expect(reasoning).toContain('a')
+    expect(reasoning).toContain('b')
+    expect(reasoning).toContain('c')
+    expect(text).toBe('body')
+  })
+
+  it('treats unclosed trailing <think>… as reasoning', () => {
+    const { reasoning, text } = splitReasoning('answer start <think>still deciding')
+
+    expect(reasoning).toBe('still deciding')
+    expect(text).toBe('answer start')
+  })
+
+  it('returns empty reasoning and untouched text when no tags present', () => {
+    const { reasoning, text } = splitReasoning('plain body with no tags')
+
+    expect(reasoning).toBe('')
+    expect(text).toBe('plain body with no tags')
+  })
+
+  it('preserves text when reasoning block is empty', () => {
+    const { reasoning, text } = splitReasoning('<think></think>only body')
+
+    expect(reasoning).toBe('')
+    expect(text).toBe('only body')
+  })
+
+  it('detects presence of any supported tag', () => {
+    expect(hasReasoningTag('pre <think>x</think> post')).toBe(true)
+    expect(hasReasoningTag('pre <reasoning>x</reasoning>')).toBe(true)
+    expect(hasReasoningTag('<REASONING_SCRATCHPAD>x</REASONING_SCRATCHPAD>')).toBe(true)
+    expect(hasReasoningTag('no tags at all')).toBe(false)
+  })
+})
diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts
index de57b2dd053..236324ffb98 100644
--- a/ui-tui/src/app/turnController.ts
+++ b/ui-tui/src/app/turnController.ts
@@ -1,5 +1,6 @@
 import { REASONING_PULSE_MS, STREAM_BATCH_MS } from '../config/timing.js'
 import type { SessionInterruptResponse, SubagentEventPayload } from '../gatewayTypes.js'
+import { hasReasoningTag, splitReasoning } from '../lib/reasoning.js'
 import {
   buildToolTrailLine,
   estimateTokensRough,
@@ -121,18 +122,31 @@ class TurnController {
   }
 
   flushStreamingSegment() {
-    const text = this.bufRef.trimStart()
+    const raw = this.bufRef.trimStart()
 
-    if (!text) {
+    if (!raw) {
       return
     }
 
-    const tools = this.pendingSegmentTools
+    const split = hasReasoningTag(raw) ? splitReasoning(raw) : { reasoning: '', text: raw }
+
+    if (split.reasoning && !this.reasoningText.trim()) {
+      this.reasoningText = split.reasoning
+      patchTurnState({ reasoning: this.reasoningText, reasoningTokens: estimateTokensRough(this.reasoningText) })
+    }
+
+    const text = split.text
 
     this.streamTimer = clear(this.streamTimer)
-    this.segmentMessages = [...this.segmentMessages, { role: 'assistant', text, ...(tools.length && { tools }) }]
+
+    if (text) {
+      const tools = this.pendingSegmentTools
+
+      this.segmentMessages = [...this.segmentMessages, { role: 'assistant', text, ...(tools.length && { tools }) }]
+      this.pendingSegmentTools = []
+    }
+
     this.bufRef = ''
-    this.pendingSegmentTools = []
     patchTurnState({ streamPendingTools: [], streamSegments: this.segmentMessages, streaming: '' })
   }
 
@@ -187,8 +201,11 @@ class TurnController {
   }
 
   recordMessageComplete(payload: { rendered?: string; reasoning?: string; text?: string }) {
-    const finalText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
-    const savedReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
+    const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
+    const split = splitReasoning(rawText)
+    const finalText = split.text
+    const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
+    const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
     const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0
     const savedToolTokens = this.toolTokenAcc
     const tools = this.pendingSegmentTools
@@ -355,7 +372,9 @@ class TurnController {
 
     this.streamTimer = setTimeout(() => {
       this.streamTimer = null
-      patchTurnState({ streaming: this.bufRef.trimStart() })
+      const raw = this.bufRef.trimStart()
+      const visible = hasReasoningTag(raw) ? splitReasoning(raw).text : raw
+      patchTurnState({ streaming: visible })
     }, STREAM_BATCH_MS)
   }
 
diff --git a/ui-tui/src/lib/reasoning.ts b/ui-tui/src/lib/reasoning.ts
new file mode 100644
index 00000000000..eba63918c41
--- /dev/null
+++ b/ui-tui/src/lib/reasoning.ts
@@ -0,0 +1,50 @@
+const TAGS = ['think', 'reasoning', 'thinking', 'thought', 'REASONING_SCRATCHPAD'] as const
+
+export interface SplitReasoning {
+  reasoning: string
+  text: string
+}
+
+export function splitReasoning(input: string): SplitReasoning {
+  let text = input
+  const reasoning: string[] = []
+
+  for (const tag of TAGS) {
+    const paired = new RegExp(`<${tag}>([\\s\\S]*?)</${tag}>\\s*`, 'gi')
+    text = text.replace(paired, (_m, inner: string) => {
+      const trimmed = inner.trim()
+
+      if (trimmed) {
+        reasoning.push(trimmed)
+      }
+
+      return ''
+    })
+
+    const unclosed = new RegExp(`<${tag}>([\\s\\S]*)$`, 'i')
+    text = text.replace(unclosed, (_m, inner: string) => {
+      const trimmed = inner.trim()
+
+      if (trimmed) {
+        reasoning.push(trimmed)
+      }
+
+      return ''
+    })
+  }
+
+  return {
+    reasoning: reasoning.join('\n\n').trim(),
+    text: text.trim()
+  }
+}
+
+export const hasReasoningTag = (input: string) => {
+  for (const tag of TAGS) {
+    if (input.includes(`<${tag}>`)) {
+      return true
+    }
+  }
+
+  return false
+}

From 0f778f776877cd452cf7475f06a6044cf07ebfe8 Mon Sep 17 00:00:00 2001
From: jarvischer <jarvischer@gmail.com>
Date: Sat, 18 Apr 2026 22:46:36 +0530
Subject: [PATCH 055/143] fix: prevent tool name duplication in streaming
 accumulator (MiniMax/NVIDIA NIM)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based on #11984 by @maxchernin.  Fixes #8259.

Some providers (MiniMax M2.7 via NVIDIA NIM) resend the full function
name in every streaming chunk instead of only the first.  The old
accumulator used += which concatenated them into 'read_fileread_file'.

Changed to simple assignment (=), matching the OpenAI Node SDK, LiteLLM,
and Vercel AI SDK patterns.  Function names are atomic identifiers
delivered complete — no provider splits them across chunks, so
concatenation was never correct semantics.
---
 run_agent.py                      | 10 ++++++-
 scripts/release.py                |  1 +
 tests/run_agent/test_streaming.py | 44 +++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/run_agent.py b/run_agent.py
index a47455e5345..e88096a603a 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -5868,7 +5868,15 @@ class AIAgent:
                             entry["id"] = tc_delta.id
                         if tc_delta.function:
                             if tc_delta.function.name:
-                                entry["function"]["name"] += tc_delta.function.name
+                                # Use assignment, not +=.  Function names are
+                                # atomic identifiers delivered complete in the
+                                # first chunk (OpenAI spec).  Some providers
+                                # (MiniMax M2.7 via NVIDIA NIM) resend the full
+                                # name in every chunk; concatenation would
+                                # produce "read_fileread_file".  Assignment
+                                # (matching the OpenAI Node SDK / LiteLLM /
+                                # Vercel AI patterns) is immune to this.
+                                entry["function"]["name"] = tc_delta.function.name
                             if tc_delta.function.arguments:
                                 entry["function"]["arguments"] += tc_delta.function.arguments
                         extra = getattr(tc_delta, "extra_content", None)
diff --git a/scripts/release.py b/scripts/release.py
index 4c32dccfdb7..88ddb2f4343 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -266,6 +266,7 @@ AUTHOR_MAP = {
     "limkuan24@gmail.com": "WideLee",
     "aviralarora002@gmail.com": "AviArora02-commits",
     "junminliu@gmail.com": "JimLiu",
+    "jarvischer@gmail.com": "maxchernin",
 }
 
 
diff --git a/tests/run_agent/test_streaming.py b/tests/run_agent/test_streaming.py
index 6afe36ee3ad..e4825599af8 100644
--- a/tests/run_agent/test_streaming.py
+++ b/tests/run_agent/test_streaming.py
@@ -141,6 +141,50 @@ class TestStreamingAccumulator:
         assert tc[0].function.name == "terminal"
         assert tc[0].function.arguments == '{"command": "ls"}'
 
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_tool_name_not_duplicated_when_resent_per_chunk(self, mock_close, mock_create):
+        """MiniMax M2.7 via NVIDIA NIM resends the full name in every chunk.
+
+        Bug #8259: the old += accumulation produced "read_fileread_file".
+        Assignment (matching OpenAI Node SDK / LiteLLM) prevents this.
+        """
+        from run_agent import AIAgent
+
+        chunks = [
+            _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_nim", name="read_file")
+            ]),
+            _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_nim", name="read_file", arguments='{"path":')
+            ]),
+            _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_nim", name="read_file", arguments=' "x.py"}')
+            ]),
+            _make_stream_chunk(finish_reason="tool_calls"),
+        ]
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.return_value = iter(chunks)
+        mock_create.return_value = mock_client
+
+        agent = AIAgent(
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+
+        response = agent._interruptible_streaming_api_call({})
+
+        tc = response.choices[0].message.tool_calls
+        assert tc is not None
+        assert len(tc) == 1
+        assert tc[0].function.name == "read_file"
+        assert tc[0].function.arguments == '{"path": "x.py"}'
+
     @patch("run_agent.AIAgent._create_request_openai_client")
     @patch("run_agent.AIAgent._close_request_openai_client")
     def test_tool_call_extra_content_preserved(self, mock_close, mock_create):

From f7af90e2daf2e2a11262ff3152bb3f08ff13ca37 Mon Sep 17 00:00:00 2001
From: LVT382009 <levantam.98.2324@gmail.com>
Date: Sat, 18 Apr 2026 22:49:30 +0530
Subject: [PATCH 056/143] fix: wire _ephemeral_max_output_tokens into
 chat_completions and add NVIDIA NIM default

Based on #12152 by @LVT382009.

Two fixes to run_agent.py:

1. _ephemeral_max_output_tokens consumption in chat_completions path:
   The error-recovery ephemeral override was only consumed in the
   anthropic_messages branch of _build_api_kwargs.  All chat_completions
   providers (OpenRouter, NVIDIA NIM, Qwen, Alibaba, custom, etc.)
   silently ignored it.  Now consumed at highest priority, matching the
   anthropic pattern.

2. NVIDIA NIM max_tokens default (16384):
   NVIDIA NIM falls back to a very low internal default when max_tokens
   is omitted, causing models like GLM-4.7 to truncate immediately
   (thinking tokens exhaust the budget before the response starts).

3. Progressive length-continuation boost:
   When finish_reason='length' triggers a continuation retry, the output
   budget now grows progressively (2x base on retry 1, 3x on retry 2,
   capped at 32768) via _ephemeral_max_output_tokens.  Previously the
   retry loop just re-sent the same token limit on all 3 attempts.
---
 run_agent.py       | 20 +++++++++++++++++++-
 scripts/release.py |  1 +
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/run_agent.py b/run_agent.py
index e88096a603a..a0f4db54853 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7061,8 +7061,20 @@ class AIAgent:
         if self.tools:
             api_kwargs["tools"] = self.tools
 
-        if self.max_tokens is not None:
+        # ── max_tokens for chat_completions ──────────────────────────────
+        # Priority: ephemeral override (error recovery / length-continuation
+        # boost) > user-configured max_tokens > provider-specific defaults.
+        _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
+        if _ephemeral_out is not None:
+            self._ephemeral_max_output_tokens = None  # consume immediately
+            api_kwargs.update(self._max_tokens_param(_ephemeral_out))
+        elif self.max_tokens is not None:
             api_kwargs.update(self._max_tokens_param(self.max_tokens))
+        elif "integrate.api.nvidia.com" in self._base_url_lower:
+            # NVIDIA NIM defaults to a very low max_tokens when omitted,
+            # causing models like GLM-4.7 to truncate immediately (thinking
+            # tokens alone exhaust the budget).  16384 provides adequate room.
+            api_kwargs.update(self._max_tokens_param(16384))
         elif self._is_qwen_portal():
             # Qwen Portal defaults to a very low max_tokens when omitted.
             # Reasoning models (qwen3-coder-plus) exhaust that budget on
@@ -10804,6 +10816,12 @@ class AIAgent:
                 continue
 
             if restart_with_length_continuation:
+                # Progressively boost the output token budget on each retry.
+                # Retry 1 → 2× base, retry 2 → 3× base, capped at 32 768.
+                # Applies to all providers via _ephemeral_max_output_tokens.
+                _boost_base = self.max_tokens if self.max_tokens else 4096
+                _boost = _boost_base * (length_continue_retries + 1)
+                self._ephemeral_max_output_tokens = min(_boost, 32768)
                 continue
 
             # Guard: if all retries exhausted without a successful response
diff --git a/scripts/release.py b/scripts/release.py
index 88ddb2f4343..94ebef5d345 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -267,6 +267,7 @@ AUTHOR_MAP = {
     "aviralarora002@gmail.com": "AviArora02-commits",
     "junminliu@gmail.com": "JimLiu",
     "jarvischer@gmail.com": "maxchernin",
+    "levantam.98.2324@gmail.com": "LVT382009",
 }
 
 

From 2eab7ee15f9f0283ae1a6c466b0400caa44defbb Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Sat, 18 Apr 2026 13:00:04 -0600
Subject: [PATCH 057/143] fix(gemini): hide low-TPM Gemma models from exposed
 lists

---
 agent/models_dev.py                      | 32 +++++++++++++++++++++---
 hermes_cli/models.py                     |  2 --
 hermes_cli/setup.py                      |  1 -
 tests/hermes_cli/test_gemini_provider.py | 23 +++++++++++++++--
 4 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/agent/models_dev.py b/agent/models_dev.py
index 42c8925ffe7..cc4dbf0be47 100644
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -420,7 +420,10 @@ def list_provider_models(provider: str) -> List[str]:
     models = _get_provider_models(provider)
     if models is None:
         return []
-    return list(models.keys())
+    return [
+        mid for mid in models.keys()
+        if not _should_hide_from_provider_catalog(provider, mid)
+    ]
 
 
 # Patterns that indicate non-agentic or noise models (TTS, embedding,
@@ -432,6 +435,29 @@ _NOISE_PATTERNS: re.Pattern = re.compile(
     re.IGNORECASE,
 )
 
+# Google-hosted Gemma models currently have very low TPM quotas for agent-style
+# traffic (for example 15K/16K TPM tiers in AI Studio) and are not practical as
+# normal Hermes picks even though they advertise large context windows. Keep the
+# capability metadata available for direct/manual use, but hide them from the
+# Gemini model catalogs we surface in setup and model selection.
+_GOOGLE_GEMMA_HIDDEN_MODELS = frozenset({
+    "gemma-4-31b-it",
+    "gemma-4-26b-a4b-it",
+    "gemma-3-1b",
+    "gemma-3-2b",
+    "gemma-3-4b",
+    "gemma-3-12b",
+    "gemma-3-27b",
+})
+
+
+def _should_hide_from_provider_catalog(provider: str, model_id: str) -> bool:
+    provider_lower = (provider or "").strip().lower()
+    model_lower = (model_id or "").strip().lower()
+    if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_GEMMA_HIDDEN_MODELS:
+        return True
+    return False
+
 
 def list_agentic_models(provider: str) -> List[str]:
     """Return model IDs suitable for agentic use from models.dev.
@@ -448,6 +474,8 @@ def list_agentic_models(provider: str) -> List[str]:
     for mid, entry in models.items():
         if not isinstance(entry, dict):
             continue
+        if _should_hide_from_provider_catalog(provider, mid):
+            continue
         if not entry.get("tool_call", False):
             continue
         if _NOISE_PATTERNS.search(mid):
@@ -582,5 +610,3 @@ def get_model_info(
             return _parse_model_info(mid, mdata, mdev_id)
 
     return None
-
-
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index cbbeef62d44..a0d7c2220c1 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -133,8 +133,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "gemini-2.5-pro",
         "gemini-2.5-flash",
         "gemini-2.5-flash-lite",
-        # Gemma open models (also served via AI Studio)
-        "gemma-4-31b-it",
     ],
     "google-gemini-cli": [
         "gemini-2.5-pro",
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 8770386b73e..8f6b633c6ac 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -91,7 +91,6 @@ _DEFAULT_PROVIDER_MODELS = {
     "gemini": [
         "gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
         "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
-        "gemma-4-31b-it",
     ],
     "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
     "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py
index fd16e825d14..7632f7691c3 100644
--- a/tests/hermes_cli/test_gemini_provider.py
+++ b/tests/hermes_cli/test_gemini_provider.py
@@ -130,7 +130,7 @@ class TestGeminiModelCatalog:
         models = _PROVIDER_MODELS["gemini"]
         assert "gemini-2.5-pro" in models
         assert "gemini-2.5-flash" in models
-        assert "gemma-4-31b-it" in models
+        assert "gemma-4-31b-it" not in models
 
     def test_provider_models_has_3x(self):
         models = _PROVIDER_MODELS["gemini"]
@@ -313,9 +313,28 @@ class TestGeminiModelsDev:
             result = list_agentic_models("gemini")
         assert "gemini-3-flash-preview" in result
         assert "gemini-2.5-pro" in result
-        assert "gemma-4-31b-it" in result
+        assert "gemma-4-31b-it" not in result
         # Filtered out:
         assert "gemini-embedding-001" not in result      # no tool_call
         assert "gemini-2.5-flash-preview-tts" not in result  # no tool_call
         assert "gemini-live-2.5-flash" not in result     # noise: live-
         assert "gemini-2.5-flash-preview-04-17" not in result  # noise: dated preview
+
+    def test_list_provider_models_hides_low_tpm_google_gemmas(self):
+        mock_data = {
+            "google": {
+                "models": {
+                    "gemini-2.5-pro": {},
+                    "gemma-4-31b-it": {},
+                    "gemma-3-1b": {},
+                }
+            }
+        }
+        with patch("agent.models_dev.fetch_models_dev", return_value=mock_data):
+            from agent.models_dev import list_provider_models
+
+            result = list_provider_models("gemini")
+
+        assert "gemini-2.5-pro" in result
+        assert "gemma-4-31b-it" not in result
+        assert "gemma-3-1b" not in result

From a7dd6a34499cb8fa91579b8943d251a8c2d42021 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Sat, 18 Apr 2026 13:08:57 -0600
Subject: [PATCH 058/143] fix(gemini): hide stale and low-TPM Google models

---
 agent/models_dev.py                      | 28 ++++++++++++++++++------
 tests/hermes_cli/test_gemini_provider.py |  8 +++++--
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/agent/models_dev.py b/agent/models_dev.py
index cc4dbf0be47..3e5c911e7ee 100644
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -435,26 +435,40 @@ _NOISE_PATTERNS: re.Pattern = re.compile(
     re.IGNORECASE,
 )
 
-# Google-hosted Gemma models currently have very low TPM quotas for agent-style
-# traffic (for example 15K/16K TPM tiers in AI Studio) and are not practical as
-# normal Hermes picks even though they advertise large context windows. Keep the
-# capability metadata available for direct/manual use, but hide them from the
-# Gemini model catalogs we surface in setup and model selection.
-_GOOGLE_GEMMA_HIDDEN_MODELS = frozenset({
+# Google's live Gemini catalogs currently include a mix of stale slugs and
+# Gemma models whose TPM quotas are too small for normal Hermes agent traffic.
+# Keep capability metadata available for direct/manual use, but hide these from
+# the Gemini model catalogs we surface in setup and model selection.
+_GOOGLE_HIDDEN_MODELS = frozenset({
+    # Low-TPM Gemma models that trip Google input-token quota walls under
+    # agent-style traffic despite advertising large context windows.
     "gemma-4-31b-it",
+    "gemma-4-26b-it",
     "gemma-4-26b-a4b-it",
     "gemma-3-1b",
+    "gemma-3-1b-it",
     "gemma-3-2b",
+    "gemma-3-2b-it",
     "gemma-3-4b",
+    "gemma-3-4b-it",
     "gemma-3-12b",
+    "gemma-3-12b-it",
     "gemma-3-27b",
+    "gemma-3-27b-it",
+    # Stale/retired Google slugs that still surface through models.dev-backed
+    # Gemini selection but 404 on the current Google endpoints.
+    "gemini-1.5-flash",
+    "gemini-1.5-pro",
+    "gemini-1.5-flash-8b",
+    "gemini-2.0-flash",
+    "gemini-2.0-flash-lite",
 })
 
 
 def _should_hide_from_provider_catalog(provider: str, model_id: str) -> bool:
     provider_lower = (provider or "").strip().lower()
     model_lower = (model_id or "").strip().lower()
-    if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_GEMMA_HIDDEN_MODELS:
+    if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_HIDDEN_MODELS:
         return True
     return False
 
diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py
index 7632f7691c3..9c6ee70aa91 100644
--- a/tests/hermes_cli/test_gemini_provider.py
+++ b/tests/hermes_cli/test_gemini_provider.py
@@ -326,7 +326,9 @@ class TestGeminiModelsDev:
                 "models": {
                     "gemini-2.5-pro": {},
                     "gemma-4-31b-it": {},
-                    "gemma-3-1b": {},
+                    "gemma-3-27b-it": {},
+                    "gemini-1.5-pro": {},
+                    "gemini-2.0-flash": {},
                 }
             }
         }
@@ -337,4 +339,6 @@ class TestGeminiModelsDev:
 
         assert "gemini-2.5-pro" in result
         assert "gemma-4-31b-it" not in result
-        assert "gemma-3-1b" not in result
+        assert "gemma-3-27b-it" not in result
+        assert "gemini-1.5-pro" not in result
+        assert "gemini-2.0-flash" not in result

From ca32a2a60bd8655c001b96394e68309ba53b4550 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Sat, 18 Apr 2026 13:15:27 -0600
Subject: [PATCH 059/143] fix(gemini): restore bearer auth on openai route

---
 agent/auxiliary_client.py                | 28 -------------------
 run_agent.py                             | 21 --------------
 tests/hermes_cli/test_gemini_provider.py | 35 +++++-------------------
 3 files changed, 7 insertions(+), 77 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 126f4615ddb..19bde946ee3 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -782,15 +782,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                 from hermes_cli.models import copilot_default_headers
 
                 extra["default_headers"] = copilot_default_headers()
-            elif "generativelanguage.googleapis.com" in base_url.lower():
-                # Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
-                # Passing api_key= causes the SDK to inject Authorization: Bearer,
-                # which Google rejects with HTTP 400 "Multiple authentication
-                # credentials received". Use a placeholder for api_key and pass
-                # the real key via x-goog-api-key header instead.
-                # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
-                extra["default_headers"] = {"x-goog-api-key": api_key}
-                api_key = "not-used"
             return OpenAI(api_key=api_key, base_url=base_url, **extra), model
 
         creds = resolve_api_key_provider_credentials(provider_id)
@@ -812,15 +803,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
             from hermes_cli.models import copilot_default_headers
 
             extra["default_headers"] = copilot_default_headers()
-        elif "generativelanguage.googleapis.com" in base_url.lower():
-            # Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
-            # Passing api_key= causes the SDK to inject Authorization: Bearer,
-            # which Google rejects with HTTP 400 "Multiple authentication
-            # credentials received". Use a placeholder for api_key and pass
-            # the real key via x-goog-api-key header instead.
-            # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
-            extra["default_headers"] = {"x-goog-api-key": api_key}
-            api_key = "not-used"
         return OpenAI(api_key=api_key, base_url=base_url, **extra), model
 
     return None, None
@@ -1666,16 +1648,6 @@ def resolve_provider_client(
             from hermes_cli.models import copilot_default_headers
 
             headers.update(copilot_default_headers())
-        elif "generativelanguage.googleapis.com" in base_url.lower():
-            # Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
-            # Passing api_key= causes the OpenAI SDK to inject Authorization: Bearer,
-            # which Google rejects with HTTP 400 "Multiple authentication credentials
-            # received". Use a placeholder for api_key and pass the real key via
-            # x-goog-api-key header instead.
-            # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
-            headers["x-goog-api-key"] = api_key
-            api_key = "not-used"
-
         client = OpenAI(api_key=api_key, base_url=base_url,
                         **({"default_headers": headers} if headers else {}))
 
diff --git a/run_agent.py b/run_agent.py
index a0f4db54853..756bb62eddd 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1054,16 +1054,6 @@ class AIAgent:
                     }
                 elif "portal.qwen.ai" in effective_base.lower():
                     client_kwargs["default_headers"] = _qwen_portal_headers()
-                elif "generativelanguage.googleapis.com" in effective_base.lower():
-                    # Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
-                    # The OpenAI SDK auto-injects Authorization: Bearer when api_key= is
-                    # set to a real value, causing HTTP 400 "Multiple authentication
-                    # credentials received".  Pass a placeholder so the SDK does not
-                    # emit Bearer, and carry the real key via x-goog-api-key instead.
-                    # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
-                    real_key = client_kwargs["api_key"]
-                    client_kwargs["api_key"] = "not-used"
-                    client_kwargs["default_headers"] = {"x-goog-api-key": real_key}
             else:
                 # No explicit creds — use the centralized provider router
                 from agent.auxiliary_client import resolve_provider_client
@@ -5245,17 +5235,6 @@ class AIAgent:
             self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
         elif "portal.qwen.ai" in normalized:
             self._client_kwargs["default_headers"] = _qwen_portal_headers()
-        elif "generativelanguage.googleapis.com" in normalized:
-            # Google's endpoint rejects Bearer tokens; use x-goog-api-key instead.
-            # Swap the real key out of api_key and into the header so the OpenAI
-            # SDK does not emit Authorization: Bearer.
-            # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
-            real_key = self._client_kwargs.get("api_key", "")
-            if real_key and real_key != "not-used":
-                self._client_kwargs["api_key"] = "not-used"
-            self._client_kwargs["default_headers"] = {
-                "x-goog-api-key": real_key or self._client_kwargs.get("api_key", ""),
-            }
         else:
             self._client_kwargs.pop("default_headers", None)
 
diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py
index 9c6ee70aa91..dbb1111fcf9 100644
--- a/tests/hermes_cli/test_gemini_provider.py
+++ b/tests/hermes_cli/test_gemini_provider.py
@@ -207,14 +207,8 @@ class TestGeminiAgentInit:
             assert agent.api_mode == "chat_completions"
             assert agent.provider == "gemini"
 
-    def test_gemini_uses_x_goog_api_key_not_bearer(self, monkeypatch):
-        """Regression test for issue #7893.
-
-        When provider=gemini, the OpenAI client must be constructed with
-        api_key='not-used' and default_headers={'x-goog-api-key': real_key}.
-        This prevents the SDK from injecting Authorization: Bearer, which
-        Google's endpoint rejects with HTTP 400.
-        """
+    def test_gemini_uses_bearer_auth(self, monkeypatch):
+        """Gemini OpenAI-compatible endpoint should receive the real API key."""
         monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy_REAL_KEY")
         real_key = "AIzaSy_REAL_KEY"
         with patch("run_agent.OpenAI") as mock_openai:
@@ -227,37 +221,22 @@ class TestGeminiAgentInit:
                 base_url="https://generativelanguage.googleapis.com/v1beta/openai",
             )
         call_kwargs = mock_openai.call_args[1]
-        # The SDK must NOT receive the real key as api_key (which would emit Bearer)
-        assert call_kwargs.get("api_key") == "not-used", (
-            "api_key must be 'not-used' to suppress Authorization: Bearer for Gemini"
-        )
-        # The real key must be in x-goog-api-key header
+        assert call_kwargs.get("api_key") == real_key
         headers = call_kwargs.get("default_headers", {})
-        assert headers.get("x-goog-api-key") == real_key, (
-            "x-goog-api-key header must carry the real Gemini API key"
-        )
+        assert "x-goog-api-key" not in headers
 
     def test_gemini_resolve_provider_client_auth(self, monkeypatch):
-        """Regression test for issue #7893 — resolve_provider_client path.
-
-        When resolve_provider_client('gemini') is called, the returned OpenAI
-        client must use x-goog-api-key header, not Authorization: Bearer.
-        """
+        """resolve_provider_client('gemini') should pass the real API key through."""
         monkeypatch.setenv("GEMINI_API_KEY", "AIzaSy_TEST_KEY")
         real_key = "AIzaSy_TEST_KEY"
         with patch("agent.auxiliary_client.OpenAI") as mock_openai:
             mock_openai.return_value = MagicMock()
-            mock_openai.return_value.api_key = "not-used"
             from agent.auxiliary_client import resolve_provider_client
             resolve_provider_client("gemini")
         call_kwargs = mock_openai.call_args[1]
-        assert call_kwargs.get("api_key") == "not-used", (
-            "api_key must be 'not-used' to prevent Bearer injection for Gemini"
-        )
+        assert call_kwargs.get("api_key") == real_key
         headers = call_kwargs.get("default_headers", {})
-        assert headers.get("x-goog-api-key") == real_key, (
-            "x-goog-api-key header must carry the real Gemini API key"
-        )
+        assert "x-goog-api-key" not in headers
 
 
 # ── models.dev Integration ──

From 450ded98dbbe37de125ef387288aff1a19111ab5 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 15:13:31 -0500
Subject: [PATCH 060/143] chore(tui): prettier whitespace on files touched in
 this branch

---
 ui-tui/src/components/prompts.tsx   |  4 +++-
 ui-tui/src/components/textInput.tsx | 10 +---------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/ui-tui/src/components/prompts.tsx b/ui-tui/src/components/prompts.tsx
index bfc603c51c6..c7ced5b31d6 100644
--- a/ui-tui/src/components/prompts.tsx
+++ b/ui-tui/src/components/prompts.tsx
@@ -53,7 +53,9 @@ export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) {
         ))}
 
         {overflow > 0 ? (
-          <Text color={t.color.dim}>… +{overflow} more line{overflow === 1 ? '' : 's'} (full text above)</Text>
+          <Text color={t.color.dim}>
+            … +{overflow} more line{overflow === 1 ? '' : 's'} (full text above)
+          </Text>
         ) : null}
       </Box>
 
diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index a0f7c42f3b0..3f456482123 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -483,15 +483,7 @@ export function TextInput({
         return
       }
 
-      if (
-        k.upArrow ||
-        k.downArrow ||
-        k.tab ||
-        (k.shift && k.tab) ||
-        k.pageUp ||
-        k.pageDown ||
-        k.escape
-      ) {
+      if (k.upArrow || k.downArrow || k.tab || (k.shift && k.tab) || k.pageUp || k.pageDown || k.escape) {
         return
       }
 

From 7e9a09857426f7acc66546188dd37802dd0a9920 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 15:17:42 -0500
Subject: [PATCH 061/143] chore: uptick

---
 ui-tui/src/app/slash/commands/core.ts | 35 ++++++++++++++++++---------
 ui-tui/src/components/messageLine.tsx |  4 ++-
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts
index a151b2cdc87..dd5a9f58c82 100644
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@@ -1,7 +1,12 @@
 import { dailyFortune, randomFortune } from '../../../content/fortunes.js'
 import { HOTKEYS } from '../../../content/hotkeys.js'
 import { nextDetailsMode, parseDetailsMode } from '../../../domain/details.js'
-import type { ConfigGetValueResponse, ConfigSetResponse, SessionSteerResponse, SessionUndoResponse } from '../../../gatewayTypes.js'
+import type {
+  ConfigGetValueResponse,
+  ConfigSetResponse,
+  SessionSteerResponse,
+  SessionUndoResponse
+} from '../../../gatewayTypes.js'
 import { writeOsc52Clipboard } from '../../../lib/osc52.js'
 import type { DetailsMode, Msg, PanelSection } from '../../../types.js'
 import { patchOverlayState } from '../../overlayStore.js'
@@ -259,19 +264,27 @@ export const coreCommands: SlashCommand[] = [
       // message isn't lost — identical semantics to the gateway handler.
       if (!ctx.ui.busy || !ctx.sid) {
         ctx.composer.enqueue(payload)
-        ctx.transcript.sys(`no active turn — queued for next: "${payload.slice(0, 50)}${payload.length > 50 ? '…' : ''}"`)
+        ctx.transcript.sys(
+          `no active turn — queued for next: "${payload.slice(0, 50)}${payload.length > 50 ? '…' : ''}"`
+        )
+
         return
       }
 
-      ctx.gateway.rpc<SessionSteerResponse>('session.steer', { session_id: ctx.sid, text: payload }).then(
-        ctx.guarded<SessionSteerResponse>(r => {
-          if (r?.status === 'queued') {
-            ctx.transcript.sys(`⏩ steer queued — arrives after next tool call: "${payload.slice(0, 50)}${payload.length > 50 ? '…' : ''}"`)
-          } else {
-            ctx.transcript.sys('steer rejected')
-          }
-        })
-      ).catch(ctx.guardedErr)
+      ctx.gateway
+        .rpc<SessionSteerResponse>('session.steer', { session_id: ctx.sid, text: payload })
+        .then(
+          ctx.guarded<SessionSteerResponse>(r => {
+            if (r?.status === 'queued') {
+              ctx.transcript.sys(
+                `⏩ steer queued — arrives after next tool call: "${payload.slice(0, 50)}${payload.length > 50 ? '…' : ''}"`
+              )
+            } else {
+              ctx.transcript.sys('steer rejected')
+            }
+          })
+        )
+        .catch(ctx.guardedErr)
     }
   },
 
diff --git a/ui-tui/src/components/messageLine.tsx b/ui-tui/src/components/messageLine.tsx
index 9de6f2aa12b..8d77a49e573 100644
--- a/ui-tui/src/components/messageLine.tsx
+++ b/ui-tui/src/components/messageLine.tsx
@@ -35,7 +35,9 @@ export const MessageLine = memo(function MessageLine({
     return (
       <Box alignSelf="flex-start" borderColor={t.color.dim} borderStyle="round" marginLeft={3} paddingX={1}>
         {hasAnsi(msg.text) ? (
-          <Text wrap="truncate-end"><Ansi>{msg.text}</Ansi></Text>
+          <Text wrap="truncate-end">
+            <Ansi>{msg.text}</Ansi>
+          </Text>
         ) : (
           <Text color={t.color.dim} wrap="truncate-end">
             {preview}

From 17e95a26b72b1ea296cdda41587b7f38d27fe72d Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 15:22:43 -0500
Subject: [PATCH 062/143] fix(tui): render /skills browse as a formatted Panel
 instead of raw JSON
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previous handler dumped the raw skills.manage response into a pager, which
was unreadable and hid the pagination metadata. Also silently accepted
non-numeric page args.

Now:
- validates page arg (rejects NaN / <1 with a usage message)
- shows "fetching community skills (scans 6 sources, may take ~15s)…" up
  front so the 10-30s hub fetch isn't a silent hang
- renders items as {name · trust, description (truncated 160 chars)} rows
  in the existing Panel component
- footer shows "page X of Y · N skills total · /skills browse N+1 for more"
  when the server returned pagination metadata

Skills hub's remote fetch latency is a separate upstream issue
(browse_skills hits 6 sources sequentially) — client-side we just stop
misrepresenting it.
---
 ui-tui/src/app/slash/commands/ops.ts | 58 +++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/ui-tui/src/app/slash/commands/ops.ts b/ui-tui/src/app/slash/commands/ops.ts
index d941c5af410..26318b3fb06 100644
--- a/ui-tui/src/app/slash/commands/ops.ts
+++ b/ui-tui/src/app/slash/commands/ops.ts
@@ -27,6 +27,20 @@ interface SkillsInstallResponse {
   name?: string
 }
 
+interface SkillsBrowseItem {
+  description?: string
+  name: string
+  source?: string
+  trust?: string
+}
+
+interface SkillsBrowseResponse {
+  items?: SkillsBrowseItem[]
+  page?: number
+  total?: number
+  total_pages?: number
+}
+
 export const opsCommands: SlashCommand[] = [
   {
     help: 'browse, inspect, install skills',
@@ -139,13 +153,47 @@ export const opsCommands: SlashCommand[] = [
       }
 
       if (sub === 'browse') {
-        const pageNum = parseInt(query, 10) || 1
+        const pageNum = query ? parseInt(query, 10) : 1
 
-        rpc<Record<string, unknown>>('skills.manage', { action: 'browse', page: pageNum })
+        if (Number.isNaN(pageNum) || pageNum < 1) {
+          return sys('usage: /skills browse [page]  (page must be a positive number)')
+        }
+
+        sys('fetching community skills (scans 6 sources, may take ~15s)…')
+
+        rpc<SkillsBrowseResponse>('skills.manage', { action: 'browse', page: pageNum })
           .then(
-            ctx.guarded<Record<string, unknown>>(r =>
-              page(JSON.stringify(r, null, 2).slice(0, 4000), `Browse Skills — p${pageNum}`)
-            )
+            ctx.guarded<SkillsBrowseResponse>(r => {
+              const items = r.items ?? []
+
+              if (!items.length) {
+                return sys(`no skills on page ${pageNum}${r.total ? ` (total ${r.total})` : ''}`)
+              }
+
+              const rows: [string, string][] = items.map(s => [
+                s.trust ? `${s.name} · ${s.trust}` : s.name,
+                String(s.description ?? '').slice(0, 160)
+              ])
+
+              const footer: string[] = []
+
+              if (r.page && r.total_pages) {
+                footer.push(`page ${r.page} of ${r.total_pages}`)
+              }
+
+              if (r.total) {
+                footer.push(`${r.total} skills total`)
+              }
+
+              if (r.page && r.total_pages && r.page < r.total_pages) {
+                footer.push(`/skills browse ${r.page + 1} for more`)
+              }
+
+              panel(`Browse Skills${pageNum > 1 ? ` — p${pageNum}` : ''}`, [
+                { rows },
+                ...(footer.length ? [{ text: footer.join(' · ') }] : [])
+              ])
+            })
           )
           .catch(ctx.guardedErr)
 

From fb06bc67debf74ba53de7ebc90e2d6755ae0e973 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 16:28:51 -0500
Subject: [PATCH 063/143] fix(tui): Ctrl+C with input selection actually
 preserves input (lift handler to app level)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previous fix in 9dbf1ec6 handled Ctrl+C inside textInput but the APP-level
useInputHandlers fires the same keypress in a separate React hook and ran
clearIn() regardless. Net effect: the OSC 52 copy succeeded but the input
wiped right after, so Brooklyn only noticed the wipe.

Lift the selection-aware Ctrl+C to a single place by threading input
selection state through a new nanostore (src/app/inputSelectionStore.ts).
textInput syncs its derived `selected` range + a clear() callback to the
store on every selection change, and the app-level Ctrl+C handler reads
the store before its clear/interrupt/die chain:

  - terminal-level selection (scrollback) → copy, existing behavior
  - in-input selection present → copy + clear selection, preserve input
  - input has text, no selection → clearIn(), existing behavior
  - empty + busy → interrupt turn
  - empty + idle → die

textInput no longer has its own Ctrl+C block; keypress falls through to
app-level like it did before 9dbf1ec6.
---
 ui-tui/src/app/inputSelectionStore.ts | 14 ++++++++
 ui-tui/src/app/useInputHandlers.ts    | 12 +++++++
 ui-tui/src/components/textInput.tsx   | 48 ++++++++++++++++++---------
 3 files changed, 59 insertions(+), 15 deletions(-)
 create mode 100644 ui-tui/src/app/inputSelectionStore.ts

diff --git a/ui-tui/src/app/inputSelectionStore.ts b/ui-tui/src/app/inputSelectionStore.ts
new file mode 100644
index 00000000000..25b67c4283e
--- /dev/null
+++ b/ui-tui/src/app/inputSelectionStore.ts
@@ -0,0 +1,14 @@
+import { atom } from 'nanostores'
+
+export interface InputSelection {
+  clear: () => void
+  end: number
+  start: number
+  value: string
+}
+
+export const $inputSelection = atom<InputSelection | null>(null)
+
+export const setInputSelection = (next: InputSelection | null) => $inputSelection.set(next)
+
+export const getInputSelection = () => $inputSelection.get()
diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts
index 0279a203cac..b71a1dc3924 100644
--- a/ui-tui/src/app/useInputHandlers.ts
+++ b/ui-tui/src/app/useInputHandlers.ts
@@ -8,6 +8,9 @@ import type {
   VoiceRecordResponse
 } from '../gatewayTypes.js'
 
+import { writeOsc52Clipboard } from '../lib/osc52.js'
+
+import { getInputSelection } from './inputSelectionStore.js'
 import type { InputHandlerContext, InputHandlerResult } from './interfaces.js'
 import { $isBlocked, $overlayState, patchOverlayState } from './overlayStore.js'
 import { turnController } from './turnController.js'
@@ -247,6 +250,15 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
         return copySelection()
       }
 
+      const inputSel = getInputSelection()
+
+      if (inputSel && inputSel.end > inputSel.start) {
+        writeOsc52Clipboard(inputSel.value.slice(inputSel.start, inputSel.end))
+        inputSel.clear()
+
+        return
+      }
+
       if (live.busy && live.sid) {
         return turnController.interruptTurn({
           appendMessage: actions.appendMessage,
diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index 3f456482123..dff8121b5e9 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -2,7 +2,7 @@ import type { InputEvent, Key } from '@hermes/ink'
 import * as Ink from '@hermes/ink'
 import { useEffect, useMemo, useRef, useState } from 'react'
 
-import { writeOsc52Clipboard } from '../lib/osc52.js'
+import { setInputSelection } from '../app/inputSelectionStore.js'
 
 type InkExt = typeof Ink & {
   stringWidth: (s: string) => number
@@ -353,6 +353,28 @@ export function TextInput({
     }
   }, [value])
 
+  useEffect(() => {
+    if (!focus) {
+      return
+    }
+
+    if (selected) {
+      setInputSelection({
+        clear: () => {
+          selRef.current = null
+          setSel(null)
+        },
+        end: selected.end,
+        start: selected.start,
+        value: vRef.current
+      })
+    } else {
+      setInputSelection(null)
+    }
+
+    return () => setInputSelection(null)
+  }, [focus, selected])
+
   useEffect(
     () => () => {
       if (pasteTimer.current) {
@@ -470,20 +492,16 @@ export function TextInput({
         return void emitPaste({ cursor: curRef.current, hotkey: true, text: '', value: vRef.current })
       }
 
-      if (k.ctrl && inp === 'c') {
-        const range = selRange()
-
-        if (range) {
-          writeOsc52Clipboard(vRef.current.slice(range.start, range.end))
-          clearSel()
-
-          return
-        }
-
-        return
-      }
-
-      if (k.upArrow || k.downArrow || k.tab || (k.shift && k.tab) || k.pageUp || k.pageDown || k.escape) {
+      if (
+        k.upArrow ||
+        k.downArrow ||
+        (k.ctrl && inp === 'c') ||
+        k.tab ||
+        (k.shift && k.tab) ||
+        k.pageUp ||
+        k.pageDown ||
+        k.escape
+      ) {
         return
       }
 

From 4e8f60fd110e54db771af507931fd32e855bd880 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Fri, 17 Apr 2026 22:19:33 -0600
Subject: [PATCH 064/143] fix(cli): use display width for wrapped spinner
 height

---
 cli.py                           | 44 +++++++++++++++++---------------
 tests/cli/test_cli_status_bar.py |  7 +++++
 2 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/cli.py b/cli.py
index 8aa8bb03f11..02c1a4f7ef6 100644
--- a/cli.py
+++ b/cli.py
@@ -2068,20 +2068,35 @@ class HermesCLI:
 
     def _spinner_widget_height(self, width: Optional[int] = None) -> int:
         """Return the visible height for the spinner/status text line above the status bar."""
-        if not getattr(self, "_spinner_text", ""):
+        spinner_line = self._render_spinner_text()
+        if not spinner_line:
             return 0
         if self._use_minimal_tui_chrome(width=width):
             return 0
-        # Compute how many lines the spinner text needs when wrapped.
-        # The rendered text is "  {emoji} {label}  ({elapsed})" — about
-        # len(_spinner_text) + 16 chars for indent + timer suffix.
         width = width or self._get_tui_terminal_width()
         if width and width > 10:
             import math
-            text_len = len(self._spinner_text) + 16  # indent + timer
-            return max(1, math.ceil(text_len / width))
+            text_width = self._status_bar_display_width(spinner_line)
+            return max(1, math.ceil(text_width / width))
         return 1
 
+    def _render_spinner_text(self) -> str:
+        """Return the live spinner/status text exactly as rendered in the TUI."""
+        txt = getattr(self, "_spinner_text", "")
+        if not txt:
+            return ""
+        t0 = getattr(self, "_tool_start_time", 0) or 0
+        if t0 > 0:
+            import time as _time
+            elapsed = _time.monotonic() - t0
+            if elapsed >= 60:
+                _m, _s = int(elapsed // 60), int(elapsed % 60)
+                elapsed_str = f"{_m}m {_s}s"
+            else:
+                elapsed_str = f"{elapsed:.1f}s"
+            return f"  {txt}  ({elapsed_str})"
+        return f"  {txt}"
+
     def _get_voice_status_fragments(self, width: Optional[int] = None):
         """Return the voice status bar fragments for the interactive TUI."""
         width = width or self._get_tui_terminal_width()
@@ -9375,21 +9390,10 @@ class HermesCLI:
             return cli_ref._agent_spacer_height()
 
         def get_spinner_text():
-            txt = cli_ref._spinner_text
-            if not txt:
+            spinner_line = cli_ref._render_spinner_text()
+            if not spinner_line:
                 return []
-            # Append live elapsed timer when a tool is running
-            t0 = cli_ref._tool_start_time
-            if t0 > 0:
-                import time as _time
-                elapsed = _time.monotonic() - t0
-                if elapsed >= 60:
-                    _m, _s = int(elapsed // 60), int(elapsed % 60)
-                    elapsed_str = f"{_m}m {_s}s"
-                else:
-                    elapsed_str = f"{elapsed:.1f}s"
-                return [('class:hint', f'  {txt}  ({elapsed_str})')]
-            return [('class:hint', f'  {txt}')]
+            return [('class:hint', spinner_line)]
 
         def get_spinner_height():
             return cli_ref._spinner_widget_height()
diff --git a/tests/cli/test_cli_status_bar.py b/tests/cli/test_cli_status_bar.py
index eabcd0f9624..4a65c6e4673 100644
--- a/tests/cli/test_cli_status_bar.py
+++ b/tests/cli/test_cli_status_bar.py
@@ -237,6 +237,13 @@ class TestCLIStatusBar:
         cli_obj._spinner_text = ""
         assert cli_obj._spinner_widget_height(width=90) == 0
 
+    def test_spinner_height_uses_display_width_for_wide_characters(self):
+        cli_obj = _make_cli()
+        cli_obj._spinner_text = "你" * 40
+        cli_obj._tool_start_time = 0
+
+        assert cli_obj._spinner_widget_height(width=64) == 2
+
     def test_voice_status_bar_compacts_on_narrow_terminals(self):
         cli_obj = _make_cli()
         cli_obj._voice_mode = True

From 0175ff7516515f62f2aab42e34c23e920838371d Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 15:11:32 -0700
Subject: [PATCH 065/143] =?UTF-8?q?feat(skills):=20replace=20xitter=20with?=
 =?UTF-8?q?=20xurl=20=E2=80=94=20the=20official=20X=20API=20CLI=20(#12303)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Swap the social-media/xitter skill (third-party wrapper around
Infatoshi/x-cli) for a new social-media/xurl skill wrapping
xdevplatform/xurl — the official X API CLI from the X developer
platform team.

Why:
- xurl is officially maintained by the X dev platform team
- OAuth 2.0 PKCE with auto-refresh + multi-app / multi-user support
  (vs. xitter's 5-env-var OAuth 1.0a + single account)
- Credentials stored in ~/.xurl managed by xurl itself — no manual
  env var juggling for users
- Substantially larger API surface: DMs, follows, blocks, mutes,
  media upload, streaming, and raw v2 endpoint access
- Ships stronger agent-safety guardrails (forbidden-flag list,
  no --verbose in agent mode, never-read-~/.xurl rule)

Adaptation:
- Ported the openclaw SKILL.md (which the xdevplatform team seeded)
  to Hermes frontmatter conventions (prerequisites.commands, platforms,
  metadata.hermes.tags/homepage) — dropped openclaw-specific metadata
- Added a Hermes-oriented one-time user setup section so the agent
  knows to direct the user to run auth commands themselves, never
  execute them with inline secrets
- Preserved the mandatory secret-safety rules verbatim
- Attribution block credits xdevplatform, openclaw, and the Hermes
  port

Docs: updated website/docs/reference/skills-catalog.md to replace
the xitter row with xurl.
---
 skills/social-media/xitter/SKILL.md      | 202 ------------
 skills/social-media/xurl/SKILL.md        | 386 +++++++++++++++++++++++
 website/docs/reference/skills-catalog.md |   2 +-
 3 files changed, 387 insertions(+), 203 deletions(-)
 delete mode 100644 skills/social-media/xitter/SKILL.md
 create mode 100644 skills/social-media/xurl/SKILL.md

diff --git a/skills/social-media/xitter/SKILL.md b/skills/social-media/xitter/SKILL.md
deleted file mode 100644
index 802924dff39..00000000000
--- a/skills/social-media/xitter/SKILL.md
+++ /dev/null
@@ -1,202 +0,0 @@
----
-name: xitter
-description: Interact with X/Twitter via the x-cli terminal client using official X API credentials. Use for posting, reading timelines, searching tweets, liking, retweeting, bookmarks, mentions, and user lookups.
-version: 1.0.0
-author: Siddharth Balyan + Hermes Agent
-license: MIT
-platforms: [linux, macos]
-prerequisites:
-  commands: [uv]
-  env_vars: [X_API_KEY, X_API_SECRET, X_BEARER_TOKEN, X_ACCESS_TOKEN, X_ACCESS_TOKEN_SECRET]
-metadata:
-  hermes:
-    tags: [twitter, x, social-media, x-cli]
-    homepage: https://github.com/Infatoshi/x-cli
----
-
-# Xitter — X/Twitter via x-cli
-
-Use `x-cli` for official X/Twitter API interactions from the terminal.
-
-This skill is for:
-- posting tweets, replies, and quote tweets
-- searching tweets and reading timelines
-- looking up users, followers, and following
-- liking and retweeting
-- checking mentions and bookmarks
-
-This skill intentionally does not vendor a separate CLI implementation into Hermes. Install and use upstream `x-cli` instead.
-
-## Important Cost / Access Note
-
-X API access is not meaningfully free for most real usage. Expect to need paid or prepaid X developer access. If commands fail with permissions or quota errors, check your X developer plan first.
-
-## Install
-
-Install upstream `x-cli` with `uv`:
-
-```bash
-uv tool install git+https://github.com/Infatoshi/x-cli.git
-```
-
-Upgrade later with:
-
-```bash
-uv tool upgrade x-cli
-```
-
-Verify:
-
-```bash
-x-cli --help
-```
-
-## Credentials
-
-You need these five values from the X Developer Portal:
-- `X_API_KEY`
-- `X_API_SECRET`
-- `X_BEARER_TOKEN`
-- `X_ACCESS_TOKEN`
-- `X_ACCESS_TOKEN_SECRET`
-
-Get them from:
-- https://developer.x.com/en/portal/dashboard
-
-### Why does X need 5 secrets?
-
-Unfortunately, the official X API splits auth across both app-level and user-level credentials:
-
-- `X_API_KEY` + `X_API_SECRET` identify your app
-- `X_BEARER_TOKEN` is used for app-level read access
-- `X_ACCESS_TOKEN` + `X_ACCESS_TOKEN_SECRET` let the CLI act as your user account for writes and authenticated actions
-
-So yes — it is a lot of secrets for one integration, but this is the stable official API path and is still preferable to cookie/session scraping.
-
-Setup requirements in the portal:
-1. Create or open your app
-2. In user authentication settings, set permissions to `Read and write`
-3. Generate or regenerate the access token + access token secret after enabling write permissions
-4. Save all five values carefully — missing any one of them will usually produce confusing auth or permission errors
-
-Note: upstream `x-cli` expects the full credential set to be present, so even if you mostly care about read-only commands, it is simplest to configure all five.
-
-## Cost / Friction Reality Check
-
-If this setup feels heavier than it should be, that is because it is. X’s official developer flow is high-friction and often paid. This skill chooses the official API path because it is more stable and maintainable than browser-cookie/session approaches.
-
-If the user wants the least brittle long-term setup, use this skill. If they want a zero-setup or unofficial path, that is a different trade-off and not what this skill is for.
-
-
-## Where to Store Credentials
-
-`x-cli` looks for credentials in `~/.config/x-cli/.env`.
-
-If you already keep your X credentials in `~/.hermes/.env`, the cleanest setup is:
-
-```bash
-mkdir -p ~/.config/x-cli
-ln -sf ~/.hermes/.env ~/.config/x-cli/.env
-```
-
-Or create a dedicated file:
-
-```bash
-mkdir -p ~/.config/x-cli
-cat > ~/.config/x-cli/.env <<'EOF'
-X_API_KEY=your_consumer_key
-X_API_SECRET=your_secret_key
-X_BEARER_TOKEN=your_bearer_token
-X_ACCESS_TOKEN=your_access_token
-X_ACCESS_TOKEN_SECRET=your_access_token_secret
-EOF
-chmod 600 ~/.config/x-cli/.env
-```
-
-## Quick Verification
-
-```bash
-x-cli user get openai
-x-cli tweet search "from:NousResearch" --max 3
-x-cli me mentions --max 5
-```
-
-If reads work but writes fail, regenerate the access token after confirming `Read and write` permissions.
-
-## Common Commands
-
-### Tweets
-
-```bash
-x-cli tweet post "hello world"
-x-cli tweet get https://x.com/user/status/1234567890
-x-cli tweet delete 1234567890
-x-cli tweet reply 1234567890 "nice post"
-x-cli tweet quote 1234567890 "worth reading"
-x-cli tweet search "AI agents" --max 20
-x-cli tweet metrics 1234567890
-```
-
-### Users
-
-```bash
-x-cli user get openai
-x-cli user timeline openai --max 10
-x-cli user followers openai --max 50
-x-cli user following openai --max 50
-```
-
-### Self / Authenticated User
-
-```bash
-x-cli me mentions --max 20
-x-cli me bookmarks --max 20
-x-cli me bookmark 1234567890
-x-cli me unbookmark 1234567890
-```
-
-### Quick Actions
-
-```bash
-x-cli like 1234567890
-x-cli retweet 1234567890
-```
-
-## Output Modes
-
-Use structured output when the agent needs to inspect fields programmatically:
-
-```bash
-x-cli -j tweet search "AI agents" --max 5
-x-cli -p user get openai
-x-cli -md tweet get 1234567890
-x-cli -v -j tweet get 1234567890
-```
-
-Recommended defaults:
-- `-j` for machine-readable output
-- `-v` when you need timestamps, metrics, or metadata
-- plain/default mode for quick human inspection
-
-## Agent Workflow
-
-1. Confirm `x-cli` is installed
-2. Confirm credentials are present
-3. Start with a read command (`user get`, `tweet search`, `me mentions`)
-4. Use `-j` when extracting fields for later steps
-5. Only perform write actions after confirming the target tweet/user and the user's intent
-
-## Pitfalls
-
-- **Paid API access**: many failures are plan/permission problems, not code problems.
-- **403 oauth1-permissions**: regenerate the access token after enabling `Read and write`.
-- **Reply restrictions**: X restricts many programmatic replies. `tweet quote` is often more reliable than `tweet reply`.
-- **Rate limits**: expect per-endpoint limits and cooldown windows.
-- **Credential drift**: if you rotate tokens in `~/.hermes/.env`, make sure `~/.config/x-cli/.env` still points at the current file.
-
-## Notes
-
-- Prefer official API workflows over cookie/session scraping.
-- Use tweet URLs or IDs interchangeably — `x-cli` accepts both.
-- If bookmark behavior changes upstream, check the upstream README first:
-  https://github.com/Infatoshi/x-cli
diff --git a/skills/social-media/xurl/SKILL.md b/skills/social-media/xurl/SKILL.md
new file mode 100644
index 00000000000..2d7a017c9cd
--- /dev/null
+++ b/skills/social-media/xurl/SKILL.md
@@ -0,0 +1,386 @@
+---
+name: xurl
+description: Interact with X/Twitter via xurl, the official X API CLI. Use for posting, replying, quoting, searching, timelines, mentions, likes, reposts, bookmarks, follows, DMs, media upload, and raw v2 endpoint access.
+version: 1.0.0
+author: xdevplatform + openclaw + Hermes Agent
+license: MIT
+platforms: [linux, macos]
+prerequisites:
+  commands: [xurl]
+metadata:
+  hermes:
+    tags: [twitter, x, social-media, xurl, official-api]
+    homepage: https://github.com/xdevplatform/xurl
+    upstream_skill: https://github.com/openclaw/openclaw/blob/main/skills/xurl/SKILL.md
+---
+
+# xurl — X (Twitter) API via the Official CLI
+
+`xurl` is the X developer platform's official CLI for the X API. It supports shortcut commands for common actions AND raw curl-style access to any v2 endpoint. All commands return JSON to stdout.
+
+Use this skill for:
+- posting, replying, quoting, deleting posts
+- searching posts and reading timelines/mentions
+- liking, reposting, bookmarking
+- following, unfollowing, blocking, muting
+- direct messages
+- media uploads (images and video)
+- raw access to any X API v2 endpoint
+- multi-app / multi-account workflows
+
+This skill replaces the older `xitter` skill (which wrapped a third-party Python CLI). `xurl` is maintained by the X developer platform team, supports OAuth 2.0 PKCE with auto-refresh, and covers a substantially larger API surface.
+
+---
+
+## Secret Safety (MANDATORY)
+
+Critical rules when operating inside an agent/LLM session:
+
+- **Never** read, print, parse, summarize, upload, or send `~/.xurl` to LLM context.
+- **Never** ask the user to paste credentials/tokens into chat.
+- The user must fill `~/.xurl` with secrets manually on their own machine.
+- **Never** recommend or execute auth commands with inline secrets in agent sessions.
+- **Never** use `--verbose` / `-v` in agent sessions — it can expose auth headers/tokens.
+- To verify credentials exist, only use: `xurl auth status`.
+
+Forbidden flags in agent commands (they accept inline secrets):
+`--bearer-token`, `--consumer-key`, `--consumer-secret`, `--access-token`, `--token-secret`, `--client-id`, `--client-secret`
+
+App credential registration and credential rotation must be done by the user manually, outside the agent session. After credentials are registered, the user authenticates with `xurl auth oauth2` — also outside the agent session. Tokens persist to `~/.xurl` in YAML. Each app has isolated tokens. OAuth 2.0 tokens auto-refresh.
+
+---
+
+## Installation
+
+Pick ONE method. On Linux, the shell script or `go install` are the easiest.
+
+```bash
+# Shell script (installs to ~/.local/bin, no sudo, works on Linux + macOS)
+curl -fsSL https://raw.githubusercontent.com/xdevplatform/xurl/main/install.sh | bash
+
+# Homebrew (macOS)
+brew install --cask xdevplatform/tap/xurl
+
+# npm
+npm install -g @xdevplatform/xurl
+
+# Go
+go install github.com/xdevplatform/xurl@latest
+```
+
+Verify:
+
+```bash
+xurl --help
+xurl auth status
+```
+
+If `xurl` is installed but `auth status` shows no apps or tokens, the user needs to complete auth manually — see the next section.
+
+---
+
+## One-Time User Setup (user runs these outside the agent)
+
+These steps must be performed by the user directly, NOT by the agent, because they involve pasting secrets. Direct the user to this block; do not execute it for them.
+
+1. Create or open an app at https://developer.x.com/en/portal/dashboard
+2. Set the redirect URI to `http://localhost:8080/callback`
+3. Copy the app's Client ID and Client Secret
+4. Register the app locally (user runs this):
+   ```bash
+   xurl auth apps add my-app --client-id YOUR_CLIENT_ID --client-secret YOUR_CLIENT_SECRET
+   ```
+5. Authenticate:
+   ```bash
+   xurl auth oauth2
+   ```
+   (This opens a browser for the OAuth 2.0 PKCE flow.)
+6. Verify:
+   ```bash
+   xurl auth status
+   xurl whoami
+   ```
+
+After this, the agent can use any command below without further setup. OAuth 2.0 tokens auto-refresh.
+
+---
+
+## Quick Reference
+
+| Action | Command |
+| --- | --- |
+| Post | `xurl post "Hello world!"` |
+| Reply | `xurl reply POST_ID "Nice post!"` |
+| Quote | `xurl quote POST_ID "My take"` |
+| Delete a post | `xurl delete POST_ID` |
+| Read a post | `xurl read POST_ID` |
+| Search posts | `xurl search "QUERY" -n 10` |
+| Who am I | `xurl whoami` |
+| Look up a user | `xurl user @handle` |
+| Home timeline | `xurl timeline -n 20` |
+| Mentions | `xurl mentions -n 10` |
+| Like / Unlike | `xurl like POST_ID` / `xurl unlike POST_ID` |
+| Repost / Undo | `xurl repost POST_ID` / `xurl unrepost POST_ID` |
+| Bookmark / Remove | `xurl bookmark POST_ID` / `xurl unbookmark POST_ID` |
+| List bookmarks / likes | `xurl bookmarks -n 10` / `xurl likes -n 10` |
+| Follow / Unfollow | `xurl follow @handle` / `xurl unfollow @handle` |
+| Following / Followers | `xurl following -n 20` / `xurl followers -n 20` |
+| Block / Unblock | `xurl block @handle` / `xurl unblock @handle` |
+| Mute / Unmute | `xurl mute @handle` / `xurl unmute @handle` |
+| Send DM | `xurl dm @handle "message"` |
+| List DMs | `xurl dms -n 10` |
+| Upload media | `xurl media upload path/to/file.mp4` |
+| Media status | `xurl media status MEDIA_ID` |
+| List apps | `xurl auth apps list` |
+| Remove app | `xurl auth apps remove NAME` |
+| Set default app | `xurl auth default APP_NAME [USERNAME]` |
+| Per-request app | `xurl --app NAME /2/users/me` |
+| Auth status | `xurl auth status` |
+
+Notes:
+- `POST_ID` accepts full URLs too (e.g. `https://x.com/user/status/1234567890`) — xurl extracts the ID.
+- Usernames work with or without a leading `@`.
+
+---
+
+## Command Details
+
+### Posting
+
+```bash
+xurl post "Hello world!"
+xurl post "Check this out" --media-id MEDIA_ID
+xurl post "Thread pics" --media-id 111 --media-id 222
+
+xurl reply 1234567890 "Great point!"
+xurl reply https://x.com/user/status/1234567890 "Agreed!"
+xurl reply 1234567890 "Look at this" --media-id MEDIA_ID
+
+xurl quote 1234567890 "Adding my thoughts"
+xurl delete 1234567890
+```
+
+### Reading & Search
+
+```bash
+xurl read 1234567890
+xurl read https://x.com/user/status/1234567890
+
+xurl search "golang"
+xurl search "from:elonmusk" -n 20
+xurl search "#buildinpublic lang:en" -n 15
+```
+
+### Users, Timeline, Mentions
+
+```bash
+xurl whoami
+xurl user elonmusk
+xurl user @XDevelopers
+
+xurl timeline -n 25
+xurl mentions -n 20
+```
+
+### Engagement
+
+```bash
+xurl like 1234567890
+xurl unlike 1234567890
+
+xurl repost 1234567890
+xurl unrepost 1234567890
+
+xurl bookmark 1234567890
+xurl unbookmark 1234567890
+
+xurl bookmarks -n 20
+xurl likes -n 20
+```
+
+### Social Graph
+
+```bash
+xurl follow @XDevelopers
+xurl unfollow @XDevelopers
+
+xurl following -n 50
+xurl followers -n 50
+
+# Another user's graph
+xurl following --of elonmusk -n 20
+xurl followers --of elonmusk -n 20
+
+xurl block @spammer
+xurl unblock @spammer
+xurl mute @annoying
+xurl unmute @annoying
+```
+
+### Direct Messages
+
+```bash
+xurl dm @someuser "Hey, saw your post!"
+xurl dms -n 25
+```
+
+### Media Upload
+
+```bash
+# Auto-detect type
+xurl media upload photo.jpg
+xurl media upload video.mp4
+
+# Explicit type/category
+xurl media upload --media-type image/jpeg --category tweet_image photo.jpg
+
+# Videos need server-side processing — check status (or poll)
+xurl media status MEDIA_ID
+xurl media status --wait MEDIA_ID
+
+# Full workflow
+xurl media upload meme.png                  # returns media id
+xurl post "lol" --media-id MEDIA_ID
+```
+
+---
+
+## Raw API Access
+
+The shortcuts cover common operations. For anything else, use raw curl-style mode against any X API v2 endpoint:
+
+```bash
+# GET
+xurl /2/users/me
+
+# POST with JSON body
+xurl -X POST /2/tweets -d '{"text":"Hello world!"}'
+
+# DELETE / PUT / PATCH
+xurl -X DELETE /2/tweets/1234567890
+
+# Custom headers
+xurl -H "Content-Type: application/json" /2/some/endpoint
+
+# Force streaming
+xurl -s /2/tweets/search/stream
+
+# Full URLs also work
+xurl https://api.x.com/2/users/me
+```
+
+---
+
+## Global Flags
+
+| Flag | Short | Description |
+| --- | --- | --- |
+| `--app` | | Use a specific registered app (overrides default) |
+| `--auth` | | Force auth type: `oauth1`, `oauth2`, or `app` |
+| `--username` | `-u` | Which OAuth2 account to use (if multiple exist) |
+| `--verbose` | `-v` | **Forbidden in agent sessions** — leaks auth headers |
+| `--trace` | `-t` | Add `X-B3-Flags: 1` trace header |
+
+---
+
+## Streaming
+
+Streaming endpoints are auto-detected. Known ones include:
+
+- `/2/tweets/search/stream`
+- `/2/tweets/sample/stream`
+- `/2/tweets/sample10/stream`
+
+Force streaming on any endpoint with `-s`.
+
+---
+
+## Output Format
+
+All commands return JSON to stdout. Structure mirrors X API v2:
+
+```json
+{ "data": { "id": "1234567890", "text": "Hello world!" } }
+```
+
+Errors are also JSON:
+
+```json
+{ "errors": [ { "message": "Not authorized", "code": 403 } ] }
+```
+
+---
+
+## Common Workflows
+
+### Post with an image
+```bash
+xurl media upload photo.jpg
+xurl post "Check out this photo!" --media-id MEDIA_ID
+```
+
+### Reply to a conversation
+```bash
+xurl read https://x.com/user/status/1234567890
+xurl reply 1234567890 "Here are my thoughts..."
+```
+
+### Search and engage
+```bash
+xurl search "topic of interest" -n 10
+xurl like POST_ID_FROM_RESULTS
+xurl reply POST_ID_FROM_RESULTS "Great point!"
+```
+
+### Check your activity
+```bash
+xurl whoami
+xurl mentions -n 20
+xurl timeline -n 20
+```
+
+### Multiple apps (credentials pre-configured manually)
+```bash
+xurl auth default prod alice               # prod app, alice user
+xurl --app staging /2/users/me             # one-off against staging
+```
+
+---
+
+## Error Handling
+
+- Non-zero exit code on any error.
+- API errors are still printed as JSON to stdout, so you can parse them.
+- Auth errors → have the user re-run `xurl auth oauth2` outside the agent session.
+- Commands that need the caller's user ID (like, repost, bookmark, follow, etc.) will auto-fetch it via `/2/users/me`. An auth failure there surfaces as an auth error.
+
+---
+
+## Agent Workflow
+
+1. Verify prerequisites: `xurl --help` and `xurl auth status`.
+2. If auth is missing, stop and direct the user to the "One-Time User Setup" section — do NOT attempt to register apps or pass secrets yourself.
+3. Start with a cheap read (`xurl whoami`, `xurl user @handle`, `xurl search ... -n 3`) to confirm reachability.
+4. Confirm the target post/user and the user's intent before any write action (post, reply, like, repost, DM, follow, block, delete).
+5. Use JSON output directly — every response is already structured.
+6. Never paste `~/.xurl` contents back into the conversation.
+
+---
+
+## Notes
+
+- **Rate limits:** X enforces per-endpoint rate limits. A 429 means wait and retry. Write endpoints (post, reply, like, repost) have tighter limits than reads.
+- **Scopes:** OAuth 2.0 tokens use broad scopes. A 403 on a specific action usually means the token is missing a scope — have the user re-run `xurl auth oauth2`.
+- **Token refresh:** OAuth 2.0 tokens auto-refresh. Nothing to do.
+- **Multiple apps:** Each app has isolated credentials/tokens. Switch with `xurl auth default` or `--app`.
+- **Multiple accounts per app:** Select with `-u / --username`, or set a default with `xurl auth default APP USER`.
+- **Token storage:** `~/.xurl` is YAML. Never read or send this file to LLM context.
+- **Cost:** X API access is typically paid for meaningful usage. Many failures are plan/permission problems, not code problems.
+
+---
+
+## Attribution
+
+- Upstream CLI: https://github.com/xdevplatform/xurl (X developer platform team, Chris Park et al.)
+- Upstream agent skill: https://github.com/openclaw/openclaw/blob/main/skills/xurl/SKILL.md
+- Hermes adaptation: reformatted for Hermes skill conventions; safety guardrails preserved verbatim.
diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md
index e5283ba0154..27fbb8c7655 100644
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@@ -251,7 +251,7 @@ Skills for interacting with social platforms — posting, reading, monitoring, a
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `xitter` | Interact with X/Twitter via the x-cli terminal client using official X API credentials. Use for posting, reading timelines, searching tweets, liking, retweeting, bookmarks, mentions, and user lookups. | `social-media/xitter` |
+| `xurl` | Interact with X/Twitter via xurl, the official X API CLI. Use for posting, replying, quoting, searching, timelines, mentions, likes, reposts, bookmarks, follows, DMs, media upload, and raw v2 endpoint access. | `social-media/xurl` |
 
 ## software-development
 

From ff2aa7ccd776f8e787644515e7864ed40b0599b3 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 17:14:29 -0500
Subject: [PATCH 066/143] feat(tui): append git branch to cwd label in status
 bar

Adds useGitBranch hook (async, cached, 15s TTL) and fmtCwdBranch
helper so the footer shows `~/repo (main)` instead of just `~/repo`.
Degrades silently when git is unavailable or cwd is outside a repo.

Partial fix for #12267 (TUI portion; #12277 covers the Python side).
---
 ui-tui/src/__tests__/paths.test.ts | 70 +++++++++++++++++++++++++++++
 ui-tui/src/app/useMainApp.ts       | 10 +++--
 ui-tui/src/domain/paths.ts         | 11 +++++
 ui-tui/src/hooks/useGitBranch.ts   | 72 ++++++++++++++++++++++++++++++
 4 files changed, 160 insertions(+), 3 deletions(-)
 create mode 100644 ui-tui/src/__tests__/paths.test.ts
 create mode 100644 ui-tui/src/hooks/useGitBranch.ts

diff --git a/ui-tui/src/__tests__/paths.test.ts b/ui-tui/src/__tests__/paths.test.ts
new file mode 100644
index 00000000000..ef3c31ff36e
--- /dev/null
+++ b/ui-tui/src/__tests__/paths.test.ts
@@ -0,0 +1,70 @@
+import { afterEach, beforeEach, describe, expect, it } from 'vitest'
+
+import { fmtCwdBranch, shortCwd } from '../domain/paths.js'
+
+describe('shortCwd', () => {
+  const origHome = process.env.HOME
+
+  beforeEach(() => {
+    process.env.HOME = '/Users/bb'
+  })
+
+  afterEach(() => {
+    process.env.HOME = origHome
+  })
+
+  it('collapses HOME to ~', () => {
+    expect(shortCwd('/Users/bb/proj/repo')).toBe('~/proj/repo')
+  })
+
+  it('leaves non-HOME paths alone', () => {
+    expect(shortCwd('/tmp/work')).toBe('/tmp/work')
+  })
+
+  it('truncates long paths from the left with ellipsis', () => {
+    const out = shortCwd('/var/long/deeply/nested/workspace/here', 10)
+    expect(out.startsWith('…')).toBe(true)
+    expect(out.length).toBe(10)
+    expect('/var/long/deeply/nested/workspace/here'.endsWith(out.slice(1))).toBe(true)
+  })
+
+  it('keeps paths shorter than max intact', () => {
+    expect(shortCwd('/a/b', 10)).toBe('/a/b')
+  })
+})
+
+describe('fmtCwdBranch', () => {
+  const origHome = process.env.HOME
+
+  beforeEach(() => {
+    process.env.HOME = '/Users/bb'
+  })
+
+  afterEach(() => {
+    process.env.HOME = origHome
+  })
+
+  it('returns bare cwd when branch is null', () => {
+    expect(fmtCwdBranch('/Users/bb/proj', null)).toBe('~/proj')
+  })
+
+  it('returns bare cwd when branch is empty', () => {
+    expect(fmtCwdBranch('/Users/bb/proj', '')).toBe('~/proj')
+  })
+
+  it('appends branch in parens', () => {
+    expect(fmtCwdBranch('/Users/bb/proj', 'main')).toBe('~/proj (main)')
+  })
+
+  it('truncates the path to keep the branch tag readable', () => {
+    const out = fmtCwdBranch('/Users/bb/very/deeply/nested/project/folder', 'feature-branch', 30)
+    expect(out).toMatch(/ \(feature-branch\)$/)
+    expect(out.length).toBeLessThanOrEqual(30)
+  })
+
+  it('truncates very long branch names from the right', () => {
+    const out = fmtCwdBranch('/Users/bb/p', 'a-very-long-feature-branch-name')
+    expect(out).toMatch(/^~\/p \(…/)
+    expect(out).toContain(')')
+  })
+})
diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts
index 46ab21c725a..fb48badea9a 100644
--- a/ui-tui/src/app/useMainApp.ts
+++ b/ui-tui/src/app/useMainApp.ts
@@ -5,7 +5,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import { STARTUP_RESUME_ID } from '../config/env.js'
 import { MAX_HISTORY, WHEEL_SCROLL_STEP } from '../config/limits.js'
 import { imageTokenMeta } from '../domain/messages.js'
-import { shortCwd } from '../domain/paths.js'
+import { fmtCwdBranch } from '../domain/paths.js'
 import { type GatewayClient } from '../gatewayClient.js'
 import type {
   ClarifyRespondResponse,
@@ -13,6 +13,7 @@ import type {
   GatewayEvent,
   TerminalResizeResponse
 } from '../gatewayTypes.js'
+import { useGitBranch } from '../hooks/useGitBranch.js'
 import { useVirtualHistory } from '../hooks/useVirtualHistory.js'
 import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
 import { buildToolTrailLine, sameToolTrailGroup, toolTrailLabel } from '../lib/text.js'
@@ -620,9 +621,12 @@ export function useMainApp(gw: GatewayClient) {
     [turn, showProgressArea]
   )
 
+  const cwd = ui.info?.cwd || process.env.HERMES_CWD || process.cwd()
+  const gitBranch = useGitBranch(cwd)
+
   const appStatus = useMemo(
     () => ({
-      cwdLabel: shortCwd(ui.info?.cwd || process.env.HERMES_CWD || process.cwd()),
+      cwdLabel: fmtCwdBranch(cwd, gitBranch),
       goodVibesTick,
       sessionStartedAt: ui.sid ? sessionStartedAt : null,
       showStickyPrompt: !!stickyPrompt,
@@ -630,7 +634,7 @@ export function useMainApp(gw: GatewayClient) {
       stickyPrompt,
       voiceLabel: voiceRecording ? 'REC' : voiceProcessing ? 'STT' : `voice ${voiceEnabled ? 'on' : 'off'}`
     }),
-    [goodVibesTick, sessionStartedAt, stickyPrompt, ui, voiceEnabled, voiceProcessing, voiceRecording]
+    [cwd, gitBranch, goodVibesTick, sessionStartedAt, stickyPrompt, ui, voiceEnabled, voiceProcessing, voiceRecording]
   )
 
   const appTranscript = useMemo(
diff --git a/ui-tui/src/domain/paths.ts b/ui-tui/src/domain/paths.ts
index 78daff170a8..6b95dcbac15 100644
--- a/ui-tui/src/domain/paths.ts
+++ b/ui-tui/src/domain/paths.ts
@@ -4,3 +4,14 @@ export const shortCwd = (cwd: string, max = 28) => {
 
   return p.length <= max ? p : `…${p.slice(-(max - 1))}`
 }
+
+export const fmtCwdBranch = (cwd: string, branch: null | string, max = 40) => {
+  if (!branch) {
+    return shortCwd(cwd, max)
+  }
+
+  const b = branch.length > 16 ? `…${branch.slice(-15)}` : branch
+  const tag = ` (${b})`
+
+  return `${shortCwd(cwd, Math.max(8, max - tag.length))}${tag}`
+}
diff --git a/ui-tui/src/hooks/useGitBranch.ts b/ui-tui/src/hooks/useGitBranch.ts
new file mode 100644
index 00000000000..7eb4880177a
--- /dev/null
+++ b/ui-tui/src/hooks/useGitBranch.ts
@@ -0,0 +1,72 @@
+import { execFile } from 'node:child_process'
+import { promisify } from 'node:util'
+
+import { useEffect, useState } from 'react'
+
+const TTL_MS = 15_000
+const TIMEOUT_MS = 500
+
+const pexec = promisify(execFile)
+const cache = new Map<string, { at: number; branch: null | string }>()
+const inflight = new Map<string, Promise<null | string>>()
+
+const resolveBranch = async (cwd: string): Promise<null | string> => {
+  try {
+    const { stdout } = await pexec('git', ['-C', cwd, 'rev-parse', '--abbrev-ref', 'HEAD'], { timeout: TIMEOUT_MS })
+    const b = stdout.trim()
+
+    return !b || b === 'HEAD' ? null : b
+  } catch {
+    return null
+  }
+}
+
+const fetchBranch = (cwd: string): Promise<null | string> => {
+  const pending = inflight.get(cwd)
+
+  if (pending) {
+    return pending
+  }
+
+  const p = resolveBranch(cwd).finally(() => inflight.delete(cwd))
+  inflight.set(cwd, p)
+
+  return p
+}
+
+export function useGitBranch(cwd: string): null | string {
+  const [branch, setBranch] = useState<null | string>(() => cache.get(cwd)?.branch ?? null)
+
+  useEffect(() => {
+    let cancelled = false
+
+    const tick = async () => {
+      const hit = cache.get(cwd)
+
+      if (hit && Date.now() - hit.at < TTL_MS) {
+        if (!cancelled) {
+          setBranch(hit.branch)
+        }
+
+        return
+      }
+
+      const b = await fetchBranch(cwd)
+      cache.set(cwd, { at: Date.now(), branch: b })
+
+      if (!cancelled) {
+        setBranch(b)
+      }
+    }
+
+    void tick()
+    const id = setInterval(() => void tick(), TTL_MS)
+
+    return () => {
+      cancelled = true
+      clearInterval(id)
+    }
+  }, [cwd])
+
+  return branch
+}

From 4aa52590d8f5551c89a9eea3aab06eca497086db Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 17:22:23 -0500
Subject: [PATCH 067/143] fix(tui): disambiguate /model picker rows when
 provider display names collide
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the gateway returns two providers that resolve to the same display name
(e.g. `kimi-coding` and `kimi-coding-cn` both → "Kimi For Coding"), the
picker now appends the slug so users can tell them apart, in both the
provider list and the selected-provider header. No-op when names are
already unique.

Refs #10526 — the Python backend dedupe from #10599 skips one alias, but
user-defined providers, canonical overlays, and future regressions can
still surface as indistinguishable rows in the picker. This is a
client-side safety net on top of that.
---
 ui-tui/src/__tests__/providers.test.ts | 62 ++++++++++++++++++++++++++
 ui-tui/src/components/modelPicker.tsx  |  8 ++--
 ui-tui/src/domain/providers.ts         | 17 +++++++
 3 files changed, 84 insertions(+), 3 deletions(-)
 create mode 100644 ui-tui/src/__tests__/providers.test.ts
 create mode 100644 ui-tui/src/domain/providers.ts

diff --git a/ui-tui/src/__tests__/providers.test.ts b/ui-tui/src/__tests__/providers.test.ts
new file mode 100644
index 00000000000..a46102e8933
--- /dev/null
+++ b/ui-tui/src/__tests__/providers.test.ts
@@ -0,0 +1,62 @@
+import { describe, expect, it } from 'vitest'
+
+import { providerDisplayNames } from '../domain/providers.js'
+
+describe('providerDisplayNames', () => {
+  it('returns bare names when all are unique', () => {
+    expect(providerDisplayNames([{ name: 'Anthropic', slug: 'anthropic' }, { name: 'OpenAI', slug: 'openai' }])).toEqual(
+      ['Anthropic', 'OpenAI']
+    )
+  })
+
+  it('appends slug to every collision so the disambiguation is symmetric', () => {
+    expect(
+      providerDisplayNames([
+        { name: 'Kimi For Coding', slug: 'kimi-coding' },
+        { name: 'Kimi For Coding', slug: 'kimi-coding-cn' }
+      ])
+    ).toEqual(['Kimi For Coding (kimi-coding)', 'Kimi For Coding (kimi-coding-cn)'])
+  })
+
+  it('only disambiguates the colliding group', () => {
+    expect(
+      providerDisplayNames([
+        { name: 'Anthropic', slug: 'anthropic' },
+        { name: 'Foo', slug: 'foo-a' },
+        { name: 'Foo', slug: 'foo-b' }
+      ])
+    ).toEqual(['Anthropic', 'Foo (foo-a)', 'Foo (foo-b)'])
+  })
+
+  it('falls back to plain name if slug is empty', () => {
+    expect(
+      providerDisplayNames([
+        { name: 'Foo', slug: '' },
+        { name: 'Foo', slug: '' }
+      ])
+    ).toEqual(['Foo', 'Foo'])
+  })
+
+  it('skips disambiguation when slug equals name', () => {
+    expect(
+      providerDisplayNames([
+        { name: 'foo', slug: 'foo' },
+        { name: 'foo', slug: 'foo' }
+      ])
+    ).toEqual(['foo', 'foo'])
+  })
+
+  it('handles empty input', () => {
+    expect(providerDisplayNames([])).toEqual([])
+  })
+
+  it('preserves order', () => {
+    const input = [
+      { name: 'Z', slug: 'z' },
+      { name: 'A', slug: 'a1' },
+      { name: 'A', slug: 'a2' }
+    ]
+
+    expect(providerDisplayNames(input)).toEqual(['Z', 'A (a1)', 'A (a2)'])
+  })
+})
diff --git a/ui-tui/src/components/modelPicker.tsx b/ui-tui/src/components/modelPicker.tsx
index 10a00cdf19e..1bc95481da8 100644
--- a/ui-tui/src/components/modelPicker.tsx
+++ b/ui-tui/src/components/modelPicker.tsx
@@ -1,6 +1,7 @@
 import { Box, Text, useInput } from '@hermes/ink'
-import { useEffect, useState } from 'react'
+import { useEffect, useMemo, useState } from 'react'
 
+import { providerDisplayNames } from '../domain/providers.js'
 import type { GatewayClient } from '../gatewayClient.js'
 import type { ModelOptionProvider, ModelOptionsResponse } from '../gatewayTypes.js'
 import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
@@ -59,6 +60,7 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
 
   const provider = providers[providerIdx]
   const models = provider?.models ?? []
+  const names = useMemo(() => providerDisplayNames(providers), [providers])
 
   useInput((ch, key) => {
     if (key.escape) {
@@ -160,7 +162,7 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
 
   if (stage === 'provider') {
     const rows = providers.map(
-      p => `${p.is_current ? '*' : ' '} ${p.name} · ${p.total_models ?? p.models?.length ?? 0} models`
+      (p, i) => `${p.is_current ? '*' : ' '} ${names[i]} · ${p.total_models ?? p.models?.length ?? 0} models`
     )
 
     const { items, off } = visibleItems(rows, providerIdx)
@@ -201,7 +203,7 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
         Select Model
       </Text>
 
-      <Text color={t.color.dim}>{provider?.name || '(unknown provider)'}</Text>
+      <Text color={t.color.dim}>{names[providerIdx] || '(unknown provider)'}</Text>
       {!models.length ? <Text color={t.color.dim}>no models listed for this provider</Text> : null}
       {provider?.warning ? <Text color={t.color.label}>warning: {provider.warning}</Text> : null}
       {off > 0 && <Text color={t.color.dim}> ↑ {off} more</Text>}
diff --git a/ui-tui/src/domain/providers.ts b/ui-tui/src/domain/providers.ts
new file mode 100644
index 00000000000..02cc99b922a
--- /dev/null
+++ b/ui-tui/src/domain/providers.ts
@@ -0,0 +1,17 @@
+export const providerDisplayNames = (providers: readonly { name: string; slug: string }[]): string[] => {
+  const counts = new Map<string, number>()
+
+  for (const p of providers) {
+    counts.set(p.name, (counts.get(p.name) ?? 0) + 1)
+  }
+
+  return providers.map(p => {
+    const dup = (counts.get(p.name) ?? 0) > 1
+
+    if (!dup || !p.slug || p.slug === p.name) {
+      return p.name
+    }
+
+    return `${p.name} (${p.slug})`
+  })
+}

From 52124384de5367585d9644826ccf2da6b3b7c63d Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 17:47:26 -0500
Subject: [PATCH 068/143] fix(tui): stable React keys in /model picker rows

Use provider.slug (and a composite key for model rows) instead of the
rendered string, so dupes in the backend response can't collapse two
rows into one or trigger key-collision warnings.
---
 ui-tui/src/components/modelPicker.tsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ui-tui/src/components/modelPicker.tsx b/ui-tui/src/components/modelPicker.tsx
index 1bc95481da8..406047bc11c 100644
--- a/ui-tui/src/components/modelPicker.tsx
+++ b/ui-tui/src/components/modelPicker.tsx
@@ -181,7 +181,7 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
           const idx = off + i
 
           return (
-            <Text color={providerIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+            <Text color={providerIdx === idx ? t.color.cornsilk : t.color.dim} key={providers[idx]?.slug ?? `row-${idx}`}>
               {providerIdx === idx ? '▸ ' : '  '}
               {i + 1}. {row}
             </Text>
@@ -212,7 +212,7 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
         const idx = off + i
 
         return (
-          <Text color={modelIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+          <Text color={modelIdx === idx ? t.color.cornsilk : t.color.dim} key={`${provider?.slug ?? 'prov'}:${idx}:${row}`}>
             {modelIdx === idx ? '▸ ' : '  '}
             {i + 1}. {row}
           </Text>

From 3366714ba4fb34a2fb933a96180236df488ab01f Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 17:48:34 -0500
Subject: [PATCH 069/143] feat(tui): double-press confirm on /clear and /new

Prevents accidental session loss: the first press prints
"press /clear again within 3s to confirm"; a second press inside
the window actually starts a new session. Outside the window the
gate re-arms.

Opt out with HERMES_TUI_NO_CONFIRM=1 for scripted / muscle-memory
workflows.

Refs #4069.
---
 ui-tui/src/__tests__/destructive.test.ts | 52 ++++++++++++++++++++++++
 ui-tui/src/app/slash/commands/core.ts    | 10 +++++
 ui-tui/src/config/env.ts                 |  3 ++
 ui-tui/src/domain/destructive.ts         | 23 +++++++++++
 4 files changed, 88 insertions(+)
 create mode 100644 ui-tui/src/__tests__/destructive.test.ts
 create mode 100644 ui-tui/src/domain/destructive.ts

diff --git a/ui-tui/src/__tests__/destructive.test.ts b/ui-tui/src/__tests__/destructive.test.ts
new file mode 100644
index 00000000000..3e19066c6e6
--- /dev/null
+++ b/ui-tui/src/__tests__/destructive.test.ts
@@ -0,0 +1,52 @@
+import { describe, expect, it } from 'vitest'
+
+import { CONFIRM_WINDOW_MS, createDestructiveGate } from '../domain/destructive.js'
+
+describe('createDestructiveGate', () => {
+  it('first request is not confirmed — it arms the gate', () => {
+    const g = createDestructiveGate()
+    expect(g.request('clear', 0)).toBe(false)
+  })
+
+  it('second request within window with same key is confirmed', () => {
+    const g = createDestructiveGate()
+    g.request('clear', 0)
+    expect(g.request('clear', 2_500)).toBe(true)
+  })
+
+  it('second request outside the window re-arms and is not confirmed', () => {
+    const g = createDestructiveGate()
+    g.request('clear', 0)
+    expect(g.request('clear', CONFIRM_WINDOW_MS + 1)).toBe(false)
+  })
+
+  it('different key re-arms the gate, does not confirm', () => {
+    const g = createDestructiveGate()
+    g.request('clear', 0)
+    expect(g.request('undo', 500)).toBe(false)
+    expect(g.request('undo', 900)).toBe(true)
+  })
+
+  it('confirmation consumes the pending state so a third press re-arms', () => {
+    const g = createDestructiveGate()
+    g.request('clear', 0)
+    g.request('clear', 500)
+    expect(g.request('clear', 600)).toBe(false)
+  })
+
+  it('reset clears pending state', () => {
+    const g = createDestructiveGate()
+    g.request('clear', 0)
+    g.reset()
+    expect(g.request('clear', 500)).toBe(false)
+  })
+
+  it('respects a custom window', () => {
+    const g = createDestructiveGate(100)
+    g.request('clear', 0)
+    expect(g.request('clear', 50)).toBe(true)
+
+    g.request('clear', 0)
+    expect(g.request('clear', 150)).toBe(false)
+  })
+})
diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts
index dd5a9f58c82..690d6972d58 100644
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@@ -1,5 +1,7 @@
+import { NO_CONFIRM_DESTRUCTIVE } from '../../../config/env.js'
 import { dailyFortune, randomFortune } from '../../../content/fortunes.js'
 import { HOTKEYS } from '../../../content/hotkeys.js'
+import { createDestructiveGate } from '../../../domain/destructive.js'
 import { nextDetailsMode, parseDetailsMode } from '../../../domain/details.js'
 import type {
   ConfigGetValueResponse,
@@ -13,6 +15,8 @@ import { patchOverlayState } from '../../overlayStore.js'
 import { patchUiState } from '../../uiStore.js'
 import type { SlashCommand } from '../types.js'
 
+const destructiveGate = createDestructiveGate()
+
 const flagFromArg = (arg: string, current: boolean): boolean | null => {
   if (!arg) {
     return !current
@@ -82,6 +86,12 @@ export const coreCommands: SlashCommand[] = [
         return
       }
 
+      const label = cmd.startsWith('/new') ? '/new' : '/clear'
+
+      if (!NO_CONFIRM_DESTRUCTIVE && !destructiveGate.request('clear')) {
+        return ctx.transcript.sys(`press ${label} again within 3s to confirm (starts a new session)`)
+      }
+
       patchUiState({ status: 'forging session…' })
       ctx.session.newSession(cmd.startsWith('/new') ? 'new session started' : undefined)
     }
diff --git a/ui-tui/src/config/env.ts b/ui-tui/src/config/env.ts
index 3a476d6bc5f..999607dacf8 100644
--- a/ui-tui/src/config/env.ts
+++ b/ui-tui/src/config/env.ts
@@ -1,2 +1,5 @@
 export const STARTUP_RESUME_ID = (process.env.HERMES_TUI_RESUME ?? '').trim()
 export const MOUSE_TRACKING = !/^(?:1|true|yes|on)$/i.test((process.env.HERMES_TUI_DISABLE_MOUSE ?? '').trim())
+export const NO_CONFIRM_DESTRUCTIVE = /^(?:1|true|yes|on)$/i.test(
+  (process.env.HERMES_TUI_NO_CONFIRM ?? '').trim()
+)
diff --git a/ui-tui/src/domain/destructive.ts b/ui-tui/src/domain/destructive.ts
new file mode 100644
index 00000000000..3570de74b10
--- /dev/null
+++ b/ui-tui/src/domain/destructive.ts
@@ -0,0 +1,23 @@
+export const CONFIRM_WINDOW_MS = 3_000
+
+export interface DestructiveGate {
+  request: (key: string, now?: number) => boolean
+  reset: () => void
+}
+
+export const createDestructiveGate = (windowMs = CONFIRM_WINDOW_MS): DestructiveGate => {
+  let pending: { at: number; key: string } | null = null
+
+  return {
+    request: (key, now = Date.now()) => {
+      const confirmed = pending?.key === key && now - pending.at < windowMs
+
+      pending = confirmed ? null : { at: now, key }
+
+      return confirmed
+    },
+    reset: () => {
+      pending = null
+    }
+  }
+}

From 20eab355e753a61c3e7e0f648a50be0cd3d22431 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 17:49:40 -0500
Subject: [PATCH 070/143] feat(tui): add LIGHT_THEME preset for white/light
 terminal backgrounds

Splits the existing palette into DARK_THEME (current yellow-heavy
default) and LIGHT_THEME (darker browns + proper contrast on white).
DEFAULT_THEME aliases DARK_THEME, and flips to LIGHT_THEME when
HERMES_TUI_LIGHT=1 is set at launch.

Skin system (fromSkin) still layers on top of whichever preset is
active, so users can keep customizing on top of either palette.

Refs #11300.
---
 ui-tui/src/__tests__/theme.test.ts | 22 +++++++++-
 ui-tui/src/theme.ts                | 69 +++++++++++++++++++++++++-----
 2 files changed, 80 insertions(+), 11 deletions(-)

diff --git a/ui-tui/src/__tests__/theme.test.ts b/ui-tui/src/__tests__/theme.test.ts
index 86a9768b0fd..4fe165c8d56 100644
--- a/ui-tui/src/__tests__/theme.test.ts
+++ b/ui-tui/src/__tests__/theme.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest'
 
-import { DEFAULT_THEME, fromSkin } from '../theme.js'
+import { DARK_THEME, DEFAULT_THEME, fromSkin, LIGHT_THEME } from '../theme.js'
 
 describe('DEFAULT_THEME', () => {
   it('has brand defaults', () => {
@@ -15,6 +15,26 @@ describe('DEFAULT_THEME', () => {
   })
 })
 
+describe('LIGHT_THEME', () => {
+  it('avoids bright-yellow accents unreadable on white backgrounds (#11300)', () => {
+    expect(LIGHT_THEME.color.gold).not.toBe('#FFD700')
+    expect(LIGHT_THEME.color.amber).not.toBe('#FFBF00')
+    expect(LIGHT_THEME.color.dim).not.toBe('#B8860B')
+    expect(LIGHT_THEME.color.statusWarn).not.toBe('#FFD700')
+  })
+
+  it('keeps the same shape as DARK_THEME', () => {
+    expect(Object.keys(LIGHT_THEME.color).sort()).toEqual(Object.keys(DARK_THEME.color).sort())
+    expect(LIGHT_THEME.brand).toEqual(DARK_THEME.brand)
+  })
+})
+
+describe('DEFAULT_THEME aliasing', () => {
+  it('defaults to DARK_THEME when HERMES_TUI_LIGHT is unset', () => {
+    expect(DEFAULT_THEME).toBe(DARK_THEME)
+  })
+})
+
 describe('fromSkin', () => {
   it('overrides banner colors', () => {
     expect(fromSkin({ banner_title: '#FF0000' }, {}).color.gold).toBe('#FF0000')
diff --git a/ui-tui/src/theme.ts b/ui-tui/src/theme.ts
index 88bc3c39081..386e436f523 100644
--- a/ui-tui/src/theme.ts
+++ b/ui-tui/src/theme.ts
@@ -78,7 +78,17 @@ function mix(a: string, b: string, t: number) {
 
 // ── Defaults ─────────────────────────────────────────────────────────
 
-export const DEFAULT_THEME: Theme = {
+const BRAND: ThemeBrand = {
+  name: 'Hermes Agent',
+  icon: '⚕',
+  prompt: '❯',
+  welcome: 'Type your message or /help for commands.',
+  goodbye: 'Goodbye! ⚕',
+  tool: '┊',
+  helpHeader: '(^_^)? Commands'
+}
+
+export const DARK_THEME: Theme = {
   color: {
     gold: '#FFD700',
     amber: '#FFBF00',
@@ -112,20 +122,59 @@ export const DEFAULT_THEME: Theme = {
     shellDollar: '#4dabf7'
   },
 
-  brand: {
-    name: 'Hermes Agent',
-    icon: '⚕',
-    prompt: '❯',
-    welcome: 'Type your message or /help for commands.',
-    goodbye: 'Goodbye! ⚕',
-    tool: '┊',
-    helpHeader: '(^_^)? Commands'
-  },
+  brand: BRAND,
 
   bannerLogo: '',
   bannerHero: ''
 }
 
+// Light-terminal palette: darker golds/ambers that stay legible on white
+// backgrounds. Same shape as DARK_THEME so `fromSkin` still layers on top
+// cleanly (#11300).
+export const LIGHT_THEME: Theme = {
+  color: {
+    gold: '#8B6914',
+    amber: '#A0651C',
+    bronze: '#7A4F1F',
+    cornsilk: '#3D2F13',
+    dim: '#7A5A0F',
+    completionBg: '#F5F5F5',
+    completionCurrentBg: mix('#F5F5F5', '#A0651C', 0.25),
+
+    label: '#7A5A0F',
+    ok: '#2E7D32',
+    error: '#C62828',
+    warn: '#E65100',
+
+    prompt: '#2B2014',
+    sessionLabel: '#7A5A0F',
+    sessionBorder: '#7A5A0F',
+
+    statusBg: '#F5F5F5',
+    statusFg: '#333333',
+    statusGood: '#2E7D32',
+    statusWarn: '#8B6914',
+    statusBad: '#D84315',
+    statusCritical: '#B71C1C',
+    selectionBg: '#D4E4F7',
+
+    diffAdded: 'rgb(200,240,200)',
+    diffRemoved: 'rgb(240,200,200)',
+    diffAddedWord: 'rgb(27,94,32)',
+    diffRemovedWord: 'rgb(183,28,28)',
+    shellDollar: '#1565C0'
+  },
+
+  brand: BRAND,
+
+  bannerLogo: '',
+  bannerHero: ''
+}
+
+const LIGHT_MODE = /^(?:1|true|yes|on)$/i.test((process.env.HERMES_TUI_LIGHT ?? '').trim())
+
+export const DEFAULT_THEME: Theme = LIGHT_MODE ? LIGHT_THEME : DARK_THEME
+
 // ── Skin → Theme ─────────────────────────────────────────────────────
 
 export function fromSkin(

From 75377feb0729c8996a25448ddc3ddc0ecfb22cb0 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 17:55:53 -0500
Subject: [PATCH 071/143] =?UTF-8?q?fix(tui):=20make=20/clear=20confirm=20w?=
 =?UTF-8?q?indow=20humane=20(3s=20=E2=86=92=2030s,=20reset=20on=20other=20?=
 =?UTF-8?q?slash)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 3s gate was too tight — users reading the prompt and retyping
consistently blow past it and get stuck in a loop ("press /clear
again within 3s" forever). Fixes:

- bump CONFIRM_WINDOW_MS 3_000 → 30_000
- drop the time number from the confirmation message to remove the
  pressure vibe: "press /clear again to confirm — starts a new session"
- reset the gate from createSlashHandler whenever any non-destructive
  slash command runs, so stale arming from 20s ago can't silently
  turn the next /clear into an unintended confirm
- export the gate + isDestructiveCommand helper for that wiring
- add armed() introspection method

Follow-up to #4069 / 3366714b.
---
 ui-tui/src/__tests__/destructive.test.ts | 15 ++++++++++++++-
 ui-tui/src/app/createSlashHandler.ts     |  7 +++++++
 ui-tui/src/app/slash/commands/core.ts    |  8 ++++++--
 ui-tui/src/domain/destructive.ts         |  8 ++++++--
 4 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/ui-tui/src/__tests__/destructive.test.ts b/ui-tui/src/__tests__/destructive.test.ts
index 3e19066c6e6..4ed7dc1b35c 100644
--- a/ui-tui/src/__tests__/destructive.test.ts
+++ b/ui-tui/src/__tests__/destructive.test.ts
@@ -3,6 +3,10 @@ import { describe, expect, it } from 'vitest'
 import { CONFIRM_WINDOW_MS, createDestructiveGate } from '../domain/destructive.js'
 
 describe('createDestructiveGate', () => {
+  it('uses a generous default window so real humans can retype (#4069)', () => {
+    expect(CONFIRM_WINDOW_MS).toBeGreaterThanOrEqual(15_000)
+  })
+
   it('first request is not confirmed — it arms the gate', () => {
     const g = createDestructiveGate()
     expect(g.request('clear', 0)).toBe(false)
@@ -11,7 +15,7 @@ describe('createDestructiveGate', () => {
   it('second request within window with same key is confirmed', () => {
     const g = createDestructiveGate()
     g.request('clear', 0)
-    expect(g.request('clear', 2_500)).toBe(true)
+    expect(g.request('clear', CONFIRM_WINDOW_MS - 1)).toBe(true)
   })
 
   it('second request outside the window re-arms and is not confirmed', () => {
@@ -20,6 +24,15 @@ describe('createDestructiveGate', () => {
     expect(g.request('clear', CONFIRM_WINDOW_MS + 1)).toBe(false)
   })
 
+  it('armed() reports the pending key while fresh, null otherwise', () => {
+    const g = createDestructiveGate(100)
+    expect(g.armed()).toBe(null)
+    g.request('clear')
+    expect(g.armed()).toBe('clear')
+    g.reset()
+    expect(g.armed()).toBe(null)
+  })
+
   it('different key re-arms the gate, does not confirm', () => {
     const g = createDestructiveGate()
     g.request('clear', 0)
diff --git a/ui-tui/src/app/createSlashHandler.ts b/ui-tui/src/app/createSlashHandler.ts
index 425e778ef3d..0bd2398d408 100644
--- a/ui-tui/src/app/createSlashHandler.ts
+++ b/ui-tui/src/app/createSlashHandler.ts
@@ -3,6 +3,7 @@ import type { SlashExecResponse } from '../gatewayTypes.js'
 import { asCommandDispatch, rpcErrorMessage } from '../lib/rpc.js'
 
 import type { SlashHandlerContext } from './interfaces.js'
+import { destructiveGate, isDestructiveCommand } from './slash/commands/core.js'
 import { findSlashCommand } from './slash/registry.js'
 import type { SlashRunCtx } from './slash/types.js'
 import { getUiState } from './uiStore.js'
@@ -40,11 +41,17 @@ export function createSlashHandler(ctx: SlashHandlerContext): (cmd: string) => b
     const found = findSlashCommand(parsed.name)
 
     if (found) {
+      if (!isDestructiveCommand(found.name)) {
+        destructiveGate.reset()
+      }
+
       found.run(parsed.arg, runCtx, cmd)
 
       return true
     }
 
+    destructiveGate.reset()
+
     if (catalog?.canon) {
       const needle = `/${parsed.name}`.toLowerCase()
 
diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts
index 690d6972d58..bbb5e2ec116 100644
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@@ -15,7 +15,11 @@ import { patchOverlayState } from '../../overlayStore.js'
 import { patchUiState } from '../../uiStore.js'
 import type { SlashCommand } from '../types.js'
 
-const destructiveGate = createDestructiveGate()
+export const destructiveGate = createDestructiveGate()
+
+const DESTRUCTIVE_COMMANDS = new Set(['clear', 'new'])
+
+export const isDestructiveCommand = (name: string) => DESTRUCTIVE_COMMANDS.has(name)
 
 const flagFromArg = (arg: string, current: boolean): boolean | null => {
   if (!arg) {
@@ -89,7 +93,7 @@ export const coreCommands: SlashCommand[] = [
       const label = cmd.startsWith('/new') ? '/new' : '/clear'
 
       if (!NO_CONFIRM_DESTRUCTIVE && !destructiveGate.request('clear')) {
-        return ctx.transcript.sys(`press ${label} again within 3s to confirm (starts a new session)`)
+        return ctx.transcript.sys(`press ${label} again to confirm — starts a new session`)
       }
 
       patchUiState({ status: 'forging session…' })
diff --git a/ui-tui/src/domain/destructive.ts b/ui-tui/src/domain/destructive.ts
index 3570de74b10..f808b2a30f5 100644
--- a/ui-tui/src/domain/destructive.ts
+++ b/ui-tui/src/domain/destructive.ts
@@ -1,6 +1,7 @@
-export const CONFIRM_WINDOW_MS = 3_000
+export const CONFIRM_WINDOW_MS = 30_000
 
 export interface DestructiveGate {
+  armed: () => null | string
   request: (key: string, now?: number) => boolean
   reset: () => void
 }
@@ -8,9 +9,12 @@ export interface DestructiveGate {
 export const createDestructiveGate = (windowMs = CONFIRM_WINDOW_MS): DestructiveGate => {
   let pending: { at: number; key: string } | null = null
 
+  const isFresh = (now: number) => pending != null && now - pending.at < windowMs
+
   return {
+    armed: () => (pending != null && isFresh(Date.now()) ? pending.key : null),
     request: (key, now = Date.now()) => {
-      const confirmed = pending?.key === key && now - pending.at < windowMs
+      const confirmed = pending?.key === key && isFresh(now)
 
       pending = confirmed ? null : { at: now, key }
 

From df5ca5065f9204e4fb8d67b8103980d849e5fcd9 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 18 Apr 2026 18:04:08 -0500
Subject: [PATCH 072/143] feat(tui): replace /clear double-press gate with a
 proper confirm overlay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The time-window gate felt wrong — users would hit /clear, read the
prompt, retype, and consistently blow past the window. Swapping to a
real yes/no overlay that blocks input like the existing Approval and
Clarify prompts.

- add ConfirmReq type + OverlayState.confirm + $isBlocked coverage
- ConfirmPrompt component (prompts.tsx): cancel row on top as the
  default, danger-coloured confirm row on the bottom, Y/N hotkeys,
  Enter on default = cancel, Esc/Ctrl+C cancel
- wire into PromptZone (appOverlays.tsx)
- /clear + /new now push onto the overlay instead of arming a timer
- HERMES_TUI_NO_CONFIRM=1 still skips the prompt for scripting
- drop the destructiveGate + createSlashHandler reset wiring
  (destructive.ts and its tests removed)

Refs #4069.
---
 ui-tui/src/__tests__/destructive.test.ts | 65 -------------------
 ui-tui/src/app/createSlashHandler.ts     |  7 --
 ui-tui/src/app/interfaces.ts             |  2 +
 ui-tui/src/app/overlayStore.ts           |  5 +-
 ui-tui/src/app/slash/commands/core.ts    | 30 +++++----
 ui-tui/src/components/appOverlays.tsx    | 19 +++++-
 ui-tui/src/components/prompts.tsx        | 83 +++++++++++++++++++++++-
 ui-tui/src/domain/destructive.ts         | 27 --------
 ui-tui/src/types.ts                      |  9 +++
 9 files changed, 132 insertions(+), 115 deletions(-)
 delete mode 100644 ui-tui/src/__tests__/destructive.test.ts
 delete mode 100644 ui-tui/src/domain/destructive.ts

diff --git a/ui-tui/src/__tests__/destructive.test.ts b/ui-tui/src/__tests__/destructive.test.ts
deleted file mode 100644
index 4ed7dc1b35c..00000000000
--- a/ui-tui/src/__tests__/destructive.test.ts
+++ /dev/null
@@ -1,65 +0,0 @@
-import { describe, expect, it } from 'vitest'
-
-import { CONFIRM_WINDOW_MS, createDestructiveGate } from '../domain/destructive.js'
-
-describe('createDestructiveGate', () => {
-  it('uses a generous default window so real humans can retype (#4069)', () => {
-    expect(CONFIRM_WINDOW_MS).toBeGreaterThanOrEqual(15_000)
-  })
-
-  it('first request is not confirmed — it arms the gate', () => {
-    const g = createDestructiveGate()
-    expect(g.request('clear', 0)).toBe(false)
-  })
-
-  it('second request within window with same key is confirmed', () => {
-    const g = createDestructiveGate()
-    g.request('clear', 0)
-    expect(g.request('clear', CONFIRM_WINDOW_MS - 1)).toBe(true)
-  })
-
-  it('second request outside the window re-arms and is not confirmed', () => {
-    const g = createDestructiveGate()
-    g.request('clear', 0)
-    expect(g.request('clear', CONFIRM_WINDOW_MS + 1)).toBe(false)
-  })
-
-  it('armed() reports the pending key while fresh, null otherwise', () => {
-    const g = createDestructiveGate(100)
-    expect(g.armed()).toBe(null)
-    g.request('clear')
-    expect(g.armed()).toBe('clear')
-    g.reset()
-    expect(g.armed()).toBe(null)
-  })
-
-  it('different key re-arms the gate, does not confirm', () => {
-    const g = createDestructiveGate()
-    g.request('clear', 0)
-    expect(g.request('undo', 500)).toBe(false)
-    expect(g.request('undo', 900)).toBe(true)
-  })
-
-  it('confirmation consumes the pending state so a third press re-arms', () => {
-    const g = createDestructiveGate()
-    g.request('clear', 0)
-    g.request('clear', 500)
-    expect(g.request('clear', 600)).toBe(false)
-  })
-
-  it('reset clears pending state', () => {
-    const g = createDestructiveGate()
-    g.request('clear', 0)
-    g.reset()
-    expect(g.request('clear', 500)).toBe(false)
-  })
-
-  it('respects a custom window', () => {
-    const g = createDestructiveGate(100)
-    g.request('clear', 0)
-    expect(g.request('clear', 50)).toBe(true)
-
-    g.request('clear', 0)
-    expect(g.request('clear', 150)).toBe(false)
-  })
-})
diff --git a/ui-tui/src/app/createSlashHandler.ts b/ui-tui/src/app/createSlashHandler.ts
index 0bd2398d408..425e778ef3d 100644
--- a/ui-tui/src/app/createSlashHandler.ts
+++ b/ui-tui/src/app/createSlashHandler.ts
@@ -3,7 +3,6 @@ import type { SlashExecResponse } from '../gatewayTypes.js'
 import { asCommandDispatch, rpcErrorMessage } from '../lib/rpc.js'
 
 import type { SlashHandlerContext } from './interfaces.js'
-import { destructiveGate, isDestructiveCommand } from './slash/commands/core.js'
 import { findSlashCommand } from './slash/registry.js'
 import type { SlashRunCtx } from './slash/types.js'
 import { getUiState } from './uiStore.js'
@@ -41,17 +40,11 @@ export function createSlashHandler(ctx: SlashHandlerContext): (cmd: string) => b
     const found = findSlashCommand(parsed.name)
 
     if (found) {
-      if (!isDestructiveCommand(found.name)) {
-        destructiveGate.reset()
-      }
-
       found.run(parsed.arg, runCtx, cmd)
 
       return true
     }
 
-    destructiveGate.reset()
-
     if (catalog?.canon) {
       const needle = `/${parsed.name}`.toLowerCase()
 
diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts
index a23b2068836..353c56535be 100644
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -10,6 +10,7 @@ import type {
   ActivityItem,
   ApprovalReq,
   ClarifyReq,
+  ConfirmReq,
   DetailsMode,
   Msg,
   PanelSection,
@@ -53,6 +54,7 @@ export interface GatewayProviderProps {
 export interface OverlayState {
   approval: ApprovalReq | null
   clarify: ClarifyReq | null
+  confirm: ConfirmReq | null
   modelPicker: boolean
   pager: null | PagerState
   picker: boolean
diff --git a/ui-tui/src/app/overlayStore.ts b/ui-tui/src/app/overlayStore.ts
index a2ea4002331..06dbd27a789 100644
--- a/ui-tui/src/app/overlayStore.ts
+++ b/ui-tui/src/app/overlayStore.ts
@@ -5,6 +5,7 @@ import type { OverlayState } from './interfaces.js'
 const buildOverlayState = (): OverlayState => ({
   approval: null,
   clarify: null,
+  confirm: null,
   modelPicker: false,
   pager: null,
   picker: false,
@@ -17,8 +18,8 @@ export const $overlayState = atom<OverlayState>(buildOverlayState())
 
 export const $isBlocked = computed(
   $overlayState,
-  ({ approval, clarify, modelPicker, pager, picker, secret, skillsHub, sudo }) =>
-    Boolean(approval || clarify || modelPicker || pager || picker || secret || skillsHub || sudo)
+  ({ approval, clarify, confirm, modelPicker, pager, picker, secret, skillsHub, sudo }) =>
+    Boolean(approval || clarify || confirm || modelPicker || pager || picker || secret || skillsHub || sudo)
 )
 
 export const getOverlayState = () => $overlayState.get()
diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts
index bbb5e2ec116..0f8916c5cb6 100644
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@@ -1,7 +1,6 @@
 import { NO_CONFIRM_DESTRUCTIVE } from '../../../config/env.js'
 import { dailyFortune, randomFortune } from '../../../content/fortunes.js'
 import { HOTKEYS } from '../../../content/hotkeys.js'
-import { createDestructiveGate } from '../../../domain/destructive.js'
 import { nextDetailsMode, parseDetailsMode } from '../../../domain/details.js'
 import type {
   ConfigGetValueResponse,
@@ -15,12 +14,6 @@ import { patchOverlayState } from '../../overlayStore.js'
 import { patchUiState } from '../../uiStore.js'
 import type { SlashCommand } from '../types.js'
 
-export const destructiveGate = createDestructiveGate()
-
-const DESTRUCTIVE_COMMANDS = new Set(['clear', 'new'])
-
-export const isDestructiveCommand = (name: string) => DESTRUCTIVE_COMMANDS.has(name)
-
 const flagFromArg = (arg: string, current: boolean): boolean | null => {
   if (!arg) {
     return !current
@@ -90,14 +83,27 @@ export const coreCommands: SlashCommand[] = [
         return
       }
 
-      const label = cmd.startsWith('/new') ? '/new' : '/clear'
+      const isNew = cmd.startsWith('/new')
 
-      if (!NO_CONFIRM_DESTRUCTIVE && !destructiveGate.request('clear')) {
-        return ctx.transcript.sys(`press ${label} again to confirm — starts a new session`)
+      const commit = () => {
+        patchUiState({ status: 'forging session…' })
+        ctx.session.newSession(isNew ? 'new session started' : undefined)
       }
 
-      patchUiState({ status: 'forging session…' })
-      ctx.session.newSession(cmd.startsWith('/new') ? 'new session started' : undefined)
+      if (NO_CONFIRM_DESTRUCTIVE) {
+        return commit()
+      }
+
+      patchOverlayState({
+        confirm: {
+          cancelLabel: 'No, keep going',
+          confirmLabel: isNew ? 'Yes, start a new session' : 'Yes, clear the session',
+          danger: true,
+          detail: 'This ends the current conversation and clears the transcript.',
+          onConfirm: commit,
+          title: isNew ? 'Start a new session?' : 'Clear the current session?'
+        }
+      })
     }
   },
 
diff --git a/ui-tui/src/components/appOverlays.tsx b/ui-tui/src/components/appOverlays.tsx
index 27db09024fc..844996af3f9 100644
--- a/ui-tui/src/components/appOverlays.tsx
+++ b/ui-tui/src/components/appOverlays.tsx
@@ -9,7 +9,7 @@ import { $uiState } from '../app/uiStore.js'
 import { FloatBox } from './appChrome.js'
 import { MaskedPrompt } from './maskedPrompt.js'
 import { ModelPicker } from './modelPicker.js'
-import { ApprovalPrompt, ClarifyPrompt } from './prompts.js'
+import { ApprovalPrompt, ClarifyPrompt, ConfirmPrompt } from './prompts.js'
 import { SessionPicker } from './sessionPicker.js'
 import { SkillsHub } from './skillsHub.js'
 
@@ -31,6 +31,23 @@ export function PromptZone({
     )
   }
 
+  if (overlay.confirm) {
+    const req = overlay.confirm
+
+    const onConfirm = () => {
+      patchOverlayState({ confirm: null })
+      req.onConfirm()
+    }
+
+    const onCancel = () => patchOverlayState({ confirm: null })
+
+    return (
+      <Box flexDirection="column" flexShrink={0} paddingX={1} paddingY={1}>
+        <ConfirmPrompt onCancel={onCancel} onConfirm={onConfirm} req={req} t={ui.theme} />
+      </Box>
+    )
+  }
+
   if (overlay.clarify) {
     return (
       <Box flexDirection="column" flexShrink={0} paddingX={1} paddingY={1}>
diff --git a/ui-tui/src/components/prompts.tsx b/ui-tui/src/components/prompts.tsx
index c7ced5b31d6..cd9c3a2d1d7 100644
--- a/ui-tui/src/components/prompts.tsx
+++ b/ui-tui/src/components/prompts.tsx
@@ -2,7 +2,7 @@ import { Box, Text, useInput } from '@hermes/ink'
 import { useState } from 'react'
 
 import type { Theme } from '../theme.js'
-import type { ApprovalReq, ClarifyReq } from '../types.js'
+import type { ApprovalReq, ClarifyReq, ConfirmReq } from '../types.js'
 
 import { TextInput } from './textInput.js'
 
@@ -151,6 +151,80 @@ export function ClarifyPrompt({ cols = 80, onAnswer, onCancel, req, t }: Clarify
   )
 }
 
+export function ConfirmPrompt({ onCancel, onConfirm, req, t }: ConfirmPromptProps) {
+  const [sel, setSel] = useState(0)
+
+  useInput((ch, key) => {
+    if (key.escape || (key.ctrl && ch.toLowerCase() === 'c')) {
+      onCancel()
+
+      return
+    }
+
+    const lower = ch.toLowerCase()
+
+    if (lower === 'y') {
+      onConfirm()
+
+      return
+    }
+
+    if (lower === 'n') {
+      onCancel()
+
+      return
+    }
+
+    if (key.upArrow && sel > 0) {
+      setSel(0)
+    }
+
+    if (key.downArrow && sel < 1) {
+      setSel(1)
+    }
+
+    if (key.return) {
+      sel === 0 ? onCancel() : onConfirm()
+    }
+  })
+
+  const accent = req.danger ? t.color.error : t.color.warn
+  const confirmLabel = req.confirmLabel ?? 'Yes'
+  const cancelLabel = req.cancelLabel ?? 'No'
+
+  const rows = [
+    { color: t.color.cornsilk, label: cancelLabel },
+    { color: req.danger ? t.color.error : t.color.cornsilk, label: confirmLabel }
+  ]
+
+  return (
+    <Box borderColor={accent} borderStyle="double" flexDirection="column" paddingX={1}>
+      <Text bold color={accent}>
+        {req.danger ? '⚠' : '?'} {req.title}
+      </Text>
+
+      {req.detail ? (
+        <Box paddingLeft={1}>
+          <Text color={t.color.cornsilk} wrap="truncate-end">
+            {req.detail}
+          </Text>
+        </Box>
+      ) : null}
+
+      <Text />
+
+      {rows.map((row, i) => (
+        <Text key={row.label}>
+          <Text color={sel === i ? accent : t.color.dim}>{sel === i ? '▸ ' : '  '}</Text>
+          <Text color={sel === i ? row.color : t.color.dim}>{row.label}</Text>
+        </Text>
+      ))}
+
+      <Text color={t.color.dim}>↑/↓ select · Enter confirm · Y/N quick · Esc cancel</Text>
+    </Box>
+  )
+}
+
 interface ApprovalPromptProps {
   onChoice: (s: string) => void
   req: ApprovalReq
@@ -164,3 +238,10 @@ interface ClarifyPromptProps {
   req: ClarifyReq
   t: Theme
 }
+
+interface ConfirmPromptProps {
+  onCancel: () => void
+  onConfirm: () => void
+  req: ConfirmReq
+  t: Theme
+}
diff --git a/ui-tui/src/domain/destructive.ts b/ui-tui/src/domain/destructive.ts
deleted file mode 100644
index f808b2a30f5..00000000000
--- a/ui-tui/src/domain/destructive.ts
+++ /dev/null
@@ -1,27 +0,0 @@
-export const CONFIRM_WINDOW_MS = 30_000
-
-export interface DestructiveGate {
-  armed: () => null | string
-  request: (key: string, now?: number) => boolean
-  reset: () => void
-}
-
-export const createDestructiveGate = (windowMs = CONFIRM_WINDOW_MS): DestructiveGate => {
-  let pending: { at: number; key: string } | null = null
-
-  const isFresh = (now: number) => pending != null && now - pending.at < windowMs
-
-  return {
-    armed: () => (pending != null && isFresh(Date.now()) ? pending.key : null),
-    request: (key, now = Date.now()) => {
-      const confirmed = pending?.key === key && isFresh(now)
-
-      pending = confirmed ? null : { at: now, key }
-
-      return confirmed
-    },
-    reset: () => {
-      pending = null
-    }
-  }
-}
diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts
index 98cc31203c5..3045a74a856 100644
--- a/ui-tui/src/types.ts
+++ b/ui-tui/src/types.ts
@@ -29,6 +29,15 @@ export interface ApprovalReq {
   description: string
 }
 
+export interface ConfirmReq {
+  cancelLabel?: string
+  confirmLabel?: string
+  danger?: boolean
+  detail?: string
+  onConfirm: () => void
+  title: string
+}
+
 export interface ClarifyReq {
   choices: string[] | null
   question: string

From cb4addacab4679914878ceaab3be7bd1011ffb7a Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 17:32:17 -0700
Subject: [PATCH 073/143] fix(gateway): auto-resume sessions after
 drain-timeout restart (#11852) (#12301)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The shutdown banner promised "send any message after restart to resume
where you left off" but the code did the opposite: a drain-timeout
restart skipped the .clean_shutdown marker, which made the next startup
call suspend_recently_active(), which marked the session suspended,
which made get_or_create_session() spawn a fresh session_id with a
'Session automatically reset. Use /resume...' notice — contradicting
the banner.

Introduce a resume_pending state on SessionEntry that is distinct from
suspended. Drain-timeout shutdown flags active sessions resume_pending
instead of letting startup-wide suspension destroy them. The next
message on the same session_key preserves the session_id, reloads the
transcript, and the agent receives a reason-aware restart-resume
system note that subsumes the existing tool-tail auto-continue note
(PR #9934).

Terminal escalation still flows through the existing
.restart_failure_counts stuck-loop counter (PR #7536, threshold 3) —
no parallel counter on SessionEntry. suspended still wins over
resume_pending in get_or_create_session() so genuinely stuck sessions
converge to a clean slate.

Spec: PR #11852 (BrennerSpear). Implementation follows the spec with
the approved correction (reuse .restart_failure_counts rather than
adding a resume_attempts field).

Changes:
- gateway/session.py: SessionEntry.resume_pending/resume_reason/
  last_resume_marked_at + to_dict/from_dict; SessionStore
  .mark_resume_pending()/clear_resume_pending(); get_or_create_session()
  returns existing entry when resume_pending (suspended still wins);
  suspend_recently_active() skips resume_pending entries.
- gateway/run.py: _stop_impl() drain-timeout branch marks active
  sessions resume_pending before _interrupt_running_agents();
  _run_agent() injects reason-aware restart-resume system note that
  subsumes the tool-tail case; successful-turn cleanup also clears
  resume_pending next to _clear_restart_failure_count();
  _notify_active_sessions_of_shutdown() softens the restart banner to
  'I'll try to resume where you left off' (honest about stuck-loop
  escalation).
- tests/gateway/test_restart_resume_pending.py: 29 new tests covering
  SessionEntry roundtrip, mark/clear helpers, get_or_create_session
  precedence (suspended > resume_pending), suspend_recently_active
  skip, drain-timeout mark reason (restart vs shutdown), system-note
  injection decision tree (including tool-tail subsumption), banner
  wording, and stuck-loop escalation override.
---
 gateway/run.py                               |  70 ++-
 gateway/session.py                           | 107 +++-
 tests/gateway/test_restart_resume_pending.py | 610 +++++++++++++++++++
 3 files changed, 782 insertions(+), 5 deletions(-)
 create mode 100644 tests/gateway/test_restart_resume_pending.py

diff --git a/gateway/run.py b/gateway/run.py
index 1525ad14776..8683c5a7526 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1539,7 +1539,7 @@ class GatewayRunner:
         action = "restarting" if self._restart_requested else "shutting down"
         hint = (
             "Your current task will be interrupted. "
-            "Send any message after restart to resume where it left off."
+            "Send any message after restart and I'll try to resume where you left off."
             if self._restart_requested
             else "Your current task will be interrupted."
         )
@@ -2373,6 +2373,27 @@ class GatewayRunner:
                     timeout,
                     self._running_agent_count(),
                 )
+                # Mark forcibly-interrupted sessions as resume_pending BEFORE
+                # interrupting the agents.  This preserves each session's
+                # session_id + transcript so the next message on the same
+                # session_key auto-resumes from the existing conversation
+                # instead of getting routed through suspend_recently_active()
+                # and converted into a fresh session.  Terminal escalation
+                # for genuinely stuck sessions still flows through the
+                # existing ``.restart_failure_counts`` stuck-loop counter
+                # (incremented below, threshold 3), which sets
+                # ``suspended=True`` and overrides resume_pending.
+                _resume_reason = (
+                    "restart_timeout" if self._restart_requested else "shutdown_timeout"
+                )
+                for _sk in list(active_agents.keys()):
+                    try:
+                        self.session_store.mark_resume_pending(_sk, _resume_reason)
+                    except Exception as _e:
+                        logger.debug(
+                            "mark_resume_pending failed for %s: %s",
+                            _sk[:20], _e,
+                        )
                 self._interrupt_running_agents(
                     "Gateway restarting" if self._restart_requested else "Gateway shutting down"
                 )
@@ -4152,8 +4173,20 @@ class GatewayRunner:
             # Successful turn — clear any stuck-loop counter for this session.
             # This ensures the counter only accumulates across CONSECUTIVE
             # restarts where the session was active (never completed).
+            #
+            # Also clear the resume_pending flag (set by drain-timeout
+            # shutdown) — the turn ran to completion, so recovery
+            # succeeded and subsequent messages should no longer receive
+            # the restart-interruption system note.
             if session_key:
                 self._clear_restart_failure_count(session_key)
+                try:
+                    self.session_store.clear_resume_pending(session_key)
+                except Exception as _e:
+                    logger.debug(
+                        "clear_resume_pending failed for %s: %s",
+                        session_key[:20], _e,
+                    )
 
             # Surface error details when the agent failed silently (final_response=None)
             if not response and agent_result.get("failed"):
@@ -9427,7 +9460,40 @@ class GatewayRunner:
             # restart, crash, SIGTERM).  Prepend a system note so the model
             # finishes processing the pending tool results before addressing
             # the user's new message.  (#4493)
-            if agent_history and agent_history[-1].get("role") == "tool":
+            #
+            # Session-level resume_pending (set on drain-timeout shutdown)
+            # escalates the wording — the transcript's last role may be
+            # anything (tool, assistant with unfinished work, etc.), so we
+            # give a stronger, reason-aware instruction that subsumes the
+            # tool-tail case.
+            _resume_entry = None
+            if session_key:
+                try:
+                    _resume_entry = self.session_store._entries.get(session_key)
+                except Exception:
+                    _resume_entry = None
+            _is_resume_pending = bool(
+                _resume_entry is not None and getattr(_resume_entry, "resume_pending", False)
+            )
+
+            if _is_resume_pending:
+                _reason = getattr(_resume_entry, "resume_reason", None) or "restart_timeout"
+                _reason_phrase = (
+                    "a gateway restart"
+                    if _reason == "restart_timeout"
+                    else "a gateway shutdown"
+                    if _reason == "shutdown_timeout"
+                    else "a gateway interruption"
+                )
+                message = (
+                    f"[System note: Your previous turn in this session was interrupted "
+                    f"by {_reason_phrase}. The conversation history below is intact. "
+                    f"If it contains unfinished tool result(s), process them first and "
+                    f"summarize what was accomplished, then address the user's new "
+                    f"message below.]\n\n"
+                    + message
+                )
+            elif agent_history and agent_history[-1].get("role") == "tool":
                 message = (
                     "[System note: Your previous turn was interrupted before you could "
                     "process the last tool result(s). The conversation history contains "
diff --git a/gateway/session.py b/gateway/session.py
index 4cb623128c7..8b31c2b0aa2 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -377,7 +377,19 @@ class SessionEntry:
     # this session (create a new session_id) so the user starts fresh.
     # Set by /stop to break stuck-resume loops (#7536).
     suspended: bool = False
-    
+
+    # When True the session was interrupted by a gateway restart/shutdown
+    # drain timeout, but recovery is still expected.  Unlike ``suspended``,
+    # ``resume_pending`` preserves the existing session_id on next access —
+    # the user stays on the same transcript and the agent auto-continues
+    # from where it left off.  Cleared after the next successful turn.
+    # Escalation to ``suspended`` is handled by the existing
+    # ``.restart_failure_counts`` stuck-loop counter (#7536), not by a
+    # parallel counter on this entry.
+    resume_pending: bool = False
+    resume_reason: Optional[str] = None  # e.g. "restart_timeout"
+    last_resume_marked_at: Optional[datetime] = None
+
     def to_dict(self) -> Dict[str, Any]:
         result = {
             "session_key": self.session_key,
@@ -397,6 +409,13 @@ class SessionEntry:
             "cost_status": self.cost_status,
             "memory_flushed": self.memory_flushed,
             "suspended": self.suspended,
+            "resume_pending": self.resume_pending,
+            "resume_reason": self.resume_reason,
+            "last_resume_marked_at": (
+                self.last_resume_marked_at.isoformat()
+                if self.last_resume_marked_at
+                else None
+            ),
         }
         if self.origin:
             result["origin"] = self.origin.to_dict()
@@ -414,7 +433,15 @@ class SessionEntry:
                 platform = Platform(data["platform"])
             except ValueError as e:
                 logger.debug("Unknown platform value %r: %s", data["platform"], e)
-        
+
+        last_resume_marked_at = None
+        _lrma = data.get("last_resume_marked_at")
+        if _lrma:
+            try:
+                last_resume_marked_at = datetime.fromisoformat(_lrma)
+            except (TypeError, ValueError):
+                last_resume_marked_at = None
+
         return cls(
             session_key=data["session_key"],
             session_id=data["session_id"],
@@ -434,6 +461,9 @@ class SessionEntry:
             cost_status=data.get("cost_status", "unknown"),
             memory_flushed=data.get("memory_flushed", False),
             suspended=data.get("suspended", False),
+            resume_pending=data.get("resume_pending", False),
+            resume_reason=data.get("resume_reason"),
+            last_resume_marked_at=last_resume_marked_at,
         )
 
 
@@ -710,9 +740,23 @@ class SessionStore:
                 entry = self._entries[session_key]
 
                 # Auto-reset sessions marked as suspended (e.g. after /stop
-                # broke a stuck loop — #7536).
+                # broke a stuck loop — #7536).  ``suspended`` is the hard
+                # forced-wipe signal and always wins over ``resume_pending``,
+                # so repeated interrupted restarts that escalate via the
+                # existing ``.restart_failure_counts`` stuck-loop counter
+                # still converge to a clean slate.
                 if entry.suspended:
                     reset_reason = "suspended"
+                elif entry.resume_pending:
+                    # Restart-interrupted session: preserve the session_id
+                    # and return the existing entry so the transcript
+                    # reloads intact.  ``resume_pending`` is cleared after
+                    # the NEXT successful turn completes (not here), which
+                    # means a re-interrupted retry keeps trying — the
+                    # stuck-loop counter handles terminal escalation.
+                    entry.updated_at = now
+                    self._save()
+                    return entry
                 else:
                     reset_reason = self._should_reset(entry, source)
                 if not reset_reason:
@@ -802,6 +846,55 @@ class SessionStore:
                 return True
         return False
 
+    def mark_resume_pending(
+        self,
+        session_key: str,
+        reason: str = "restart_timeout",
+    ) -> bool:
+        """Mark a session as resumable after a restart interruption.
+
+        Unlike ``suspend_session()``, this preserves the existing
+        ``session_id`` and the transcript.  The next call to
+        ``get_or_create_session()`` for this key returns the same entry
+        so the user auto-resumes on the same conversation lane.
+
+        Returns True if the session existed and was marked.
+        """
+        with self._lock:
+            self._ensure_loaded_locked()
+            if session_key in self._entries:
+                entry = self._entries[session_key]
+                # Never override an explicit ``suspended`` — that is a hard
+                # forced-wipe signal (from /stop or stuck-loop escalation).
+                if entry.suspended:
+                    return False
+                entry.resume_pending = True
+                entry.resume_reason = reason
+                entry.last_resume_marked_at = _now()
+                self._save()
+                return True
+        return False
+
+    def clear_resume_pending(self, session_key: str) -> bool:
+        """Clear the resume-pending flag after a successful resumed turn.
+
+        Called from the gateway after ``run_conversation()`` returns a
+        final response for a session that had ``resume_pending=True``,
+        signalling that recovery succeeded.
+
+        Returns True if a flag was cleared.
+        """
+        with self._lock:
+            self._ensure_loaded_locked()
+            entry = self._entries.get(session_key)
+            if entry is None or not entry.resume_pending:
+                return False
+            entry.resume_pending = False
+            entry.resume_reason = None
+            entry.last_resume_marked_at = None
+            self._save()
+            return True
+
     def prune_old_entries(self, max_age_days: int) -> int:
         """Drop SessionEntry records older than max_age_days.
 
@@ -861,6 +954,12 @@ class SessionStore:
         (#7536).  Only suspends sessions updated within *max_age_seconds*
         to avoid resetting long-idle sessions that are harmless to resume.
         Returns the number of sessions that were suspended.
+
+        Entries flagged ``resume_pending=True`` are skipped — those were
+        marked intentionally by the drain-timeout path as recoverable.
+        Terminal escalation for genuinely stuck ``resume_pending`` sessions
+        is handled by the existing ``.restart_failure_counts`` stuck-loop
+        counter, which runs after this method on startup.
         """
         from datetime import timedelta
 
@@ -869,6 +968,8 @@ class SessionStore:
         with self._lock:
             self._ensure_loaded_locked()
             for entry in self._entries.values():
+                if entry.resume_pending:
+                    continue
                 if not entry.suspended and entry.updated_at >= cutoff:
                     entry.suspended = True
                     count += 1
diff --git a/tests/gateway/test_restart_resume_pending.py b/tests/gateway/test_restart_resume_pending.py
new file mode 100644
index 00000000000..a18d85cc469
--- /dev/null
+++ b/tests/gateway/test_restart_resume_pending.py
@@ -0,0 +1,610 @@
+"""Tests for the resume_pending session continuity path.
+
+Covers the behaviour introduced to fix the ``Gateway shutting down ...
+task will be interrupted`` follow-up bug (spec: PR #11852, builds on
+PRs #9850, #9934, #7536):
+
+1. When a gateway restart drain times out and agents are force-interrupted,
+   the affected sessions are flagged ``resume_pending=True`` — not
+   ``suspended`` — so the next user message on the same session_key
+   auto-resumes from the existing transcript instead of getting routed
+   through ``suspend_recently_active()`` and converted into a fresh
+   session.
+
+2. ``suspended=True`` (from ``/stop`` or stuck-loop escalation) still
+   wins over ``resume_pending`` — the forced-wipe path is preserved.
+
+3. The restart-resume system note injected into the next user message is
+   a superset of the existing tool-tail auto-continue note (from
+   PR #9934), using session-entry metadata rather than just transcript
+   shape so it fires even when the interrupted transcript does NOT end
+   with a ``tool`` role.
+
+4. The existing ``.restart_failure_counts`` stuck-loop counter from
+   PR #7536 remains the single source of escalation — no parallel
+   counter is added on ``SessionEntry``.
+"""
+
+import asyncio
+from datetime import datetime, timedelta
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.session import SessionEntry, SessionSource, SessionStore
+from tests.gateway.restart_test_helpers import (
+    make_restart_runner,
+    make_restart_source,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_source(platform=Platform.TELEGRAM, chat_id="123", user_id="u1"):
+    return SessionSource(platform=platform, chat_id=chat_id, user_id=user_id)
+
+
+def _make_store(tmp_path):
+    return SessionStore(sessions_dir=tmp_path, config=GatewayConfig())
+
+
+def _simulate_note_injection(
+    agent_history: list,
+    user_message: str,
+    resume_entry: SessionEntry | None,
+) -> str:
+    """Mirror the note-injection logic in gateway/run.py _run_agent().
+
+    Matches the production code in the ``run_sync`` closure so we can
+    test the decision tree without a full gateway runner.
+    """
+    message = user_message
+    is_resume_pending = bool(
+        resume_entry is not None and getattr(resume_entry, "resume_pending", False)
+    )
+
+    if is_resume_pending:
+        reason = getattr(resume_entry, "resume_reason", None) or "restart_timeout"
+        reason_phrase = (
+            "a gateway restart"
+            if reason == "restart_timeout"
+            else "a gateway shutdown"
+            if reason == "shutdown_timeout"
+            else "a gateway interruption"
+        )
+        message = (
+            f"[System note: Your previous turn in this session was interrupted "
+            f"by {reason_phrase}. The conversation history below is intact. "
+            f"If it contains unfinished tool result(s), process them first and "
+            f"summarize what was accomplished, then address the user's new "
+            f"message below.]\n\n"
+            + message
+        )
+    elif agent_history and agent_history[-1].get("role") == "tool":
+        message = (
+            "[System note: Your previous turn was interrupted before you could "
+            "process the last tool result(s). The conversation history contains "
+            "tool outputs you haven't responded to yet. Please finish processing "
+            "those results and summarize what was accomplished, then address the "
+            "user's new message below.]\n\n"
+            + message
+        )
+    return message
+
+
+# ---------------------------------------------------------------------------
+# SessionEntry field + serialization
+# ---------------------------------------------------------------------------
+
+
+class TestSessionEntryResumeFields:
+    def test_defaults(self):
+        now = datetime.now()
+        entry = SessionEntry(
+            session_key="agent:main:telegram:dm:1",
+            session_id="sid",
+            created_at=now,
+            updated_at=now,
+        )
+        assert entry.resume_pending is False
+        assert entry.resume_reason is None
+        assert entry.last_resume_marked_at is None
+
+    def test_roundtrip_with_resume_fields(self):
+        now = datetime(2026, 4, 18, 12, 0, 0)
+        entry = SessionEntry(
+            session_key="agent:main:telegram:dm:1",
+            session_id="sid",
+            created_at=now,
+            updated_at=now,
+            resume_pending=True,
+            resume_reason="restart_timeout",
+            last_resume_marked_at=now,
+        )
+        restored = SessionEntry.from_dict(entry.to_dict())
+        assert restored.resume_pending is True
+        assert restored.resume_reason == "restart_timeout"
+        assert restored.last_resume_marked_at == now
+
+    def test_from_dict_legacy_without_resume_fields(self):
+        """Old sessions.json without the new fields deserialize cleanly."""
+        now = datetime.now()
+        legacy = {
+            "session_key": "agent:main:telegram:dm:1",
+            "session_id": "sid",
+            "created_at": now.isoformat(),
+            "updated_at": now.isoformat(),
+            "chat_type": "dm",
+        }
+        restored = SessionEntry.from_dict(legacy)
+        assert restored.resume_pending is False
+        assert restored.resume_reason is None
+        assert restored.last_resume_marked_at is None
+
+    def test_malformed_timestamp_is_tolerated(self):
+        now = datetime.now()
+        data = {
+            "session_key": "k",
+            "session_id": "sid",
+            "created_at": now.isoformat(),
+            "updated_at": now.isoformat(),
+            "resume_pending": True,
+            "resume_reason": "restart_timeout",
+            "last_resume_marked_at": "not-a-timestamp",
+        }
+        restored = SessionEntry.from_dict(data)
+        # resume_pending still honoured, only the broken timestamp drops
+        assert restored.resume_pending is True
+        assert restored.resume_reason == "restart_timeout"
+        assert restored.last_resume_marked_at is None
+
+
+# ---------------------------------------------------------------------------
+# SessionStore.mark_resume_pending / clear_resume_pending
+# ---------------------------------------------------------------------------
+
+
+class TestMarkResumePending:
+    def test_marks_existing_session(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+
+        assert store.mark_resume_pending(entry.session_key) is True
+        refreshed = store._entries[entry.session_key]
+        assert refreshed.resume_pending is True
+        assert refreshed.resume_reason == "restart_timeout"
+        assert refreshed.last_resume_marked_at is not None
+
+    def test_custom_reason_persists(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+
+        store.mark_resume_pending(entry.session_key, reason="shutdown_timeout")
+        assert store._entries[entry.session_key].resume_reason == "shutdown_timeout"
+
+    def test_returns_false_for_unknown_key(self, tmp_path):
+        store = _make_store(tmp_path)
+        assert store.mark_resume_pending("no-such-key") is False
+
+    def test_does_not_override_suspended(self, tmp_path):
+        """suspended wins — mark_resume_pending is a no-op on a suspended entry."""
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        store.suspend_session(entry.session_key)
+
+        assert store.mark_resume_pending(entry.session_key) is False
+        e = store._entries[entry.session_key]
+        assert e.suspended is True
+        assert e.resume_pending is False
+
+    def test_survives_roundtrip_through_json(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        store.mark_resume_pending(entry.session_key, reason="restart_timeout")
+
+        # Reload from disk
+        store2 = _make_store(tmp_path)
+        store2._ensure_loaded()
+        reloaded = store2._entries[entry.session_key]
+        assert reloaded.resume_pending is True
+        assert reloaded.resume_reason == "restart_timeout"
+
+
+class TestClearResumePending:
+    def test_clears_flag(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        store.mark_resume_pending(entry.session_key)
+
+        assert store.clear_resume_pending(entry.session_key) is True
+        e = store._entries[entry.session_key]
+        assert e.resume_pending is False
+        assert e.resume_reason is None
+        assert e.last_resume_marked_at is None
+
+    def test_returns_false_when_not_pending(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        # Not marked
+        assert store.clear_resume_pending(entry.session_key) is False
+
+    def test_returns_false_for_unknown_key(self, tmp_path):
+        store = _make_store(tmp_path)
+        assert store.clear_resume_pending("no-such-key") is False
+
+
+# ---------------------------------------------------------------------------
+# SessionStore.get_or_create_session resume_pending behaviour
+# ---------------------------------------------------------------------------
+
+
+class TestGetOrCreateResumePending:
+    def test_resume_pending_preserves_session_id(self, tmp_path):
+        """This is THE core behavioural fix — resume_pending ≠ new session."""
+        store = _make_store(tmp_path)
+        source = _make_source()
+        first = store.get_or_create_session(source)
+        original_sid = first.session_id
+        store.mark_resume_pending(first.session_key)
+
+        second = store.get_or_create_session(source)
+        assert second.session_id == original_sid
+        assert second.was_auto_reset is False
+        assert second.auto_reset_reason is None
+        # Flag is NOT cleared on read — only on successful turn completion.
+        assert second.resume_pending is True
+
+    def test_suspended_still_creates_new_session(self, tmp_path):
+        """Regression guard — suspended must still force a clean slate."""
+        store = _make_store(tmp_path)
+        source = _make_source()
+        first = store.get_or_create_session(source)
+        original_sid = first.session_id
+        store.suspend_session(first.session_key)
+
+        second = store.get_or_create_session(source)
+        assert second.session_id != original_sid
+        assert second.was_auto_reset is True
+        assert second.auto_reset_reason == "suspended"
+
+    def test_suspended_overrides_resume_pending(self, tmp_path):
+        """Terminal escalation: a session that somehow has BOTH flags must
+        behave like ``suspended`` — forced wipe + auto_reset_reason."""
+        store = _make_store(tmp_path)
+        source = _make_source()
+        first = store.get_or_create_session(source)
+        original_sid = first.session_id
+
+        # Force the pathological state directly (normally mark_resume_pending
+        # refuses to run when suspended=True, but a stuck-loop escalation
+        # can set suspended=True AFTER resume_pending is set).
+        with store._lock:
+            e = store._entries[first.session_key]
+            e.resume_pending = True
+            e.resume_reason = "restart_timeout"
+            e.suspended = True
+            store._save()
+
+        second = store.get_or_create_session(source)
+        assert second.session_id != original_sid
+        assert second.was_auto_reset is True
+        assert second.auto_reset_reason == "suspended"
+
+
+# ---------------------------------------------------------------------------
+# SessionStore.suspend_recently_active skip behaviour
+# ---------------------------------------------------------------------------
+
+
+class TestSuspendRecentlyActiveSkipsResumePending:
+    def test_resume_pending_entries_not_suspended(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        store.mark_resume_pending(entry.session_key)
+
+        count = store.suspend_recently_active()
+        assert count == 0
+        e = store._entries[entry.session_key]
+        assert e.suspended is False
+        assert e.resume_pending is True
+
+    def test_non_resume_pending_still_suspended(self, tmp_path):
+        """Non-resume sessions still get the old crash-recovery suspension."""
+        store = _make_store(tmp_path)
+        source_a = _make_source(chat_id="a")
+        source_b = _make_source(chat_id="b")
+        entry_a = store.get_or_create_session(source_a)
+        entry_b = store.get_or_create_session(source_b)
+        store.mark_resume_pending(entry_a.session_key)
+
+        count = store.suspend_recently_active()
+        assert count == 1
+        assert store._entries[entry_a.session_key].suspended is False
+        assert store._entries[entry_b.session_key].suspended is True
+
+
+# ---------------------------------------------------------------------------
+# Restart-resume system-note injection
+# ---------------------------------------------------------------------------
+
+
+class TestResumePendingSystemNote:
+    def _pending_entry(self, reason="restart_timeout") -> SessionEntry:
+        now = datetime.now()
+        return SessionEntry(
+            session_key="agent:main:telegram:dm:1",
+            session_id="sid",
+            created_at=now,
+            updated_at=now,
+            resume_pending=True,
+            resume_reason=reason,
+            last_resume_marked_at=now,
+        )
+
+    def test_resume_pending_restart_note_mentions_restart(self):
+        entry = self._pending_entry(reason="restart_timeout")
+        result = _simulate_note_injection(
+            agent_history=[{"role": "assistant", "content": "in progress"}],
+            user_message="what happened?",
+            resume_entry=entry,
+        )
+        assert "[System note:" in result
+        assert "gateway restart" in result
+        assert "what happened?" in result
+
+    def test_resume_pending_shutdown_note_mentions_shutdown(self):
+        entry = self._pending_entry(reason="shutdown_timeout")
+        result = _simulate_note_injection(
+            agent_history=[{"role": "assistant", "content": "in progress"}],
+            user_message="ping",
+            resume_entry=entry,
+        )
+        assert "gateway shutdown" in result
+
+    def test_resume_pending_fires_without_tool_tail(self):
+        """Key improvement over PR #9934: the restart-resume note fires
+        even when the transcript's last role is NOT ``tool``."""
+        entry = self._pending_entry()
+        history = [
+            {"role": "user", "content": "run a long thing"},
+            {"role": "assistant", "content": "ok, starting..."},
+        ]
+        result = _simulate_note_injection(history, "ping", resume_entry=entry)
+        assert "[System note:" in result
+        assert "gateway restart" in result
+
+    def test_resume_pending_subsumes_tool_tail_note(self):
+        """When BOTH conditions are true, the restart-resume note wins —
+        no duplicate notes."""
+        entry = self._pending_entry()
+        history = [
+            {"role": "assistant", "content": None, "tool_calls": [
+                {"id": "c1", "function": {"name": "x", "arguments": "{}"}},
+            ]},
+            {"role": "tool", "tool_call_id": "c1", "content": "result"},
+        ]
+        result = _simulate_note_injection(history, "ping", resume_entry=entry)
+        assert result.count("[System note:") == 1
+        assert "gateway restart" in result
+        # Old tool-tail wording absent
+        assert "haven't responded to yet" not in result
+
+    def test_no_resume_pending_preserves_tool_tail_note(self):
+        """Regression: the old PR #9934 tool-tail behaviour is unchanged."""
+        history = [
+            {"role": "assistant", "content": None, "tool_calls": [
+                {"id": "c1", "function": {"name": "x", "arguments": "{}"}},
+            ]},
+            {"role": "tool", "tool_call_id": "c1", "content": "result"},
+        ]
+        result = _simulate_note_injection(history, "ping", resume_entry=None)
+        assert "[System note:" in result
+        assert "tool result" in result
+
+    def test_no_note_when_nothing_to_resume(self):
+        history = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi"},
+        ]
+        result = _simulate_note_injection(history, "ping", resume_entry=None)
+        assert result == "ping"
+
+
+# ---------------------------------------------------------------------------
+# Drain-timeout path marks sessions resume_pending
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_drain_timeout_marks_resume_pending():
+    """End-to-end: a drain timeout during gateway stop should flag every
+    active session as resume_pending BEFORE the interrupt fires, so the
+    next startup's suspend_recently_active() does not destroy them."""
+    runner, adapter = make_restart_runner()
+    adapter.disconnect = AsyncMock()
+    runner._restart_drain_timeout = 0.05
+
+    running_agent = MagicMock()
+    session_key_one = "agent:main:telegram:dm:A"
+    session_key_two = "agent:main:telegram:dm:B"
+    runner._running_agents = {
+        session_key_one: running_agent,
+        session_key_two: MagicMock(),
+    }
+
+    # Plug a mock session_store that records marks.
+    session_store = MagicMock()
+    session_store.mark_resume_pending = MagicMock(return_value=True)
+    runner.session_store = session_store
+
+    with patch("gateway.status.remove_pid_file"), patch(
+        "gateway.status.write_runtime_status"
+    ):
+        await runner.stop()
+
+    # Both active sessions were marked with the shutdown_timeout reason.
+    calls = session_store.mark_resume_pending.call_args_list
+    marked = {args[0][0] for args in calls}
+    assert marked == {session_key_one, session_key_two}
+    for args in calls:
+        assert args[0][1] == "shutdown_timeout"
+
+
+@pytest.mark.asyncio
+async def test_drain_timeout_uses_restart_reason_when_restarting():
+    runner, adapter = make_restart_runner()
+    adapter.disconnect = AsyncMock()
+    runner._restart_drain_timeout = 0.05
+    runner._restart_requested = True
+
+    running_agent = MagicMock()
+    runner._running_agents = {"agent:main:telegram:dm:A": running_agent}
+
+    session_store = MagicMock()
+    session_store.mark_resume_pending = MagicMock(return_value=True)
+    runner.session_store = session_store
+
+    with patch("gateway.status.remove_pid_file"), patch(
+        "gateway.status.write_runtime_status"
+    ):
+        await runner.stop(restart=True, detached_restart=False, service_restart=True)
+
+    calls = session_store.mark_resume_pending.call_args_list
+    assert calls, "expected at least one mark_resume_pending call"
+    for args in calls:
+        assert args[0][1] == "restart_timeout"
+
+
+@pytest.mark.asyncio
+async def test_clean_drain_does_not_mark_resume_pending():
+    """If the drain completes within timeout (no force-interrupt), no
+    sessions should be flagged — the normal shutdown path is unchanged."""
+    runner, adapter = make_restart_runner()
+    adapter.disconnect = AsyncMock()
+
+    running_agent = MagicMock()
+    runner._running_agents = {"agent:main:telegram:dm:A": running_agent}
+
+    # Finish the agent before the (generous) drain deadline
+    async def finish_agent():
+        await asyncio.sleep(0.05)
+        runner._running_agents.clear()
+
+    asyncio.create_task(finish_agent())
+
+    session_store = MagicMock()
+    session_store.mark_resume_pending = MagicMock(return_value=True)
+    runner.session_store = session_store
+
+    with patch("gateway.status.remove_pid_file"), patch(
+        "gateway.status.write_runtime_status"
+    ):
+        await runner.stop()
+
+    session_store.mark_resume_pending.assert_not_called()
+    running_agent.interrupt.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# Shutdown banner wording
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_restart_banner_uses_try_to_resume_wording():
+    """The notification sent before drain should hedge the resume promise
+    — the session-continuity fix is best-effort (stuck-loop counter can
+    still escalate to suspended)."""
+    runner, adapter = make_restart_runner()
+    runner._restart_requested = True
+    runner._running_agents["agent:main:telegram:dm:999"] = MagicMock()
+
+    await runner._notify_active_sessions_of_shutdown()
+
+    assert len(adapter.sent) == 1
+    msg = adapter.sent[0]
+    assert "restarting" in msg
+    assert "try to resume" in msg
+
+
+# ---------------------------------------------------------------------------
+# Stuck-loop escalation integration
+# ---------------------------------------------------------------------------
+
+
+class TestStuckLoopEscalation:
+    """The existing .restart_failure_counts counter (PR #7536) remains the
+    single source of terminal escalation — no parallel counter on
+    SessionEntry was added.  After the configured threshold, the startup
+    path flips suspended=True which overrides resume_pending."""
+
+    def test_escalation_via_stuck_loop_counter_overrides_resume_pending(
+        self, tmp_path, monkeypatch
+    ):
+        """Simulate a session that keeps getting restart-interrupted and
+        hits the stuck-loop threshold: next startup should force it to
+        fresh-session despite resume_pending being set."""
+        import json
+
+        from gateway.run import GatewayRunner
+
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        store.mark_resume_pending(entry.session_key, reason="restart_timeout")
+
+        # Simulate counter already at threshold (3 consecutive interrupted
+        # restarts).  _suspend_stuck_loop_sessions will flip suspended=True.
+        counts_file = tmp_path / ".restart_failure_counts"
+        counts_file.write_text(json.dumps({entry.session_key: 3}))
+
+        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+        runner = object.__new__(GatewayRunner)
+        runner.session_store = store
+
+        suspended_count = GatewayRunner._suspend_stuck_loop_sessions(runner)
+        assert suspended_count == 1
+        assert store._entries[entry.session_key].suspended is True
+        # resume_pending is still set on the entry, but suspended wins in
+        # get_or_create_session so the next message still gets a new sid.
+        second = store.get_or_create_session(source)
+        assert second.session_id != entry.session_id
+        assert second.auto_reset_reason == "suspended"
+
+    def test_successful_turn_flow_clears_both_counter_and_resume_pending(
+        self, tmp_path, monkeypatch
+    ):
+        """The gateway's post-turn cleanup should clear both signals so a
+        future restart-interrupt starts with a fresh counter."""
+        import json
+
+        from gateway.run import GatewayRunner
+
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        store.mark_resume_pending(entry.session_key, reason="restart_timeout")
+
+        counts_file = tmp_path / ".restart_failure_counts"
+        counts_file.write_text(json.dumps({entry.session_key: 2}))
+
+        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+        runner = object.__new__(GatewayRunner)
+        runner.session_store = store
+
+        GatewayRunner._clear_restart_failure_count(runner, entry.session_key)
+        store.clear_resume_pending(entry.session_key)
+
+        assert store._entries[entry.session_key].resume_pending is False
+        assert not counts_file.exists()

From c49a58a6d0f81d7e77db20c259ea7115a36d49da Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 17:40:34 -0700
Subject: [PATCH 074/143] fix(gateway): mark only still-running sessions
 resume_pending on drain timeout (#12332)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to #12301.

The drain-timeout branch of _stop_impl() was iterating the drain-start
snapshot (active_agents) when marking sessions resume_pending. That
snapshot can include sessions that finished gracefully during the drain
window — marking them would give their next turn a stray
'your previous turn was interrupted by a gateway restart' system note
even though the prior turn actually completed cleanly.

Iterate self._running_agents at timeout time instead, mirroring
_interrupt_running_agents() exactly:
- only sessions still blocking the shutdown get marked
- pending sentinels (AIAgent construction not yet complete) are skipped

Changes:
- gateway/run.py: swap active_agents.keys() for filtered
  self._running_agents.items() iteration in the drain-timeout mark loop.
- tests/gateway/test_restart_resume_pending.py: two regression tests —
  finisher-during-drain not marked, pending sentinel not marked.
---
 gateway/run.py                               | 15 +++-
 tests/gateway/test_restart_resume_pending.py | 78 ++++++++++++++++++++
 2 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/gateway/run.py b/gateway/run.py
index 8683c5a7526..af3946d4afc 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -2383,10 +2383,23 @@ class GatewayRunner:
                 # existing ``.restart_failure_counts`` stuck-loop counter
                 # (incremented below, threshold 3), which sets
                 # ``suspended=True`` and overrides resume_pending.
+                #
+                # Iterate self._running_agents (current) rather than the
+                # drain-start ``active_agents`` snapshot — the snapshot
+                # may include sessions that finished gracefully during
+                # the drain window, and marking those falsely would give
+                # them a stray restart-interruption system note on their
+                # next turn even though their previous turn completed
+                # cleanly.  Skip pending sentinels for the same reason
+                # _interrupt_running_agents() does: their agent hasn't
+                # started yet, there's nothing to interrupt, and the
+                # session shouldn't carry a misleading resume flag.
                 _resume_reason = (
                     "restart_timeout" if self._restart_requested else "shutdown_timeout"
                 )
-                for _sk in list(active_agents.keys()):
+                for _sk, _agent in list(self._running_agents.items()):
+                    if _agent is _AGENT_PENDING_SENTINEL:
+                        continue
                     try:
                         self.session_store.mark_resume_pending(_sk, _resume_reason)
                     except Exception as _e:
diff --git a/tests/gateway/test_restart_resume_pending.py b/tests/gateway/test_restart_resume_pending.py
index a18d85cc469..c11b2740db3 100644
--- a/tests/gateway/test_restart_resume_pending.py
+++ b/tests/gateway/test_restart_resume_pending.py
@@ -516,6 +516,84 @@ async def test_clean_drain_does_not_mark_resume_pending():
     running_agent.interrupt.assert_not_called()
 
 
+@pytest.mark.asyncio
+async def test_drain_timeout_only_marks_still_running_sessions():
+    """A session that finished gracefully during the drain window must
+    NOT be marked ``resume_pending`` — it completed cleanly and its
+    next turn should be a normal fresh turn, not one prefixed with the
+    restart-interruption system note.
+
+    Regression guard for using ``self._running_agents`` at timeout
+    rather than the ``active_agents`` drain-start snapshot.
+    """
+    runner, adapter = make_restart_runner()
+    adapter.disconnect = AsyncMock()
+    # Long enough for the finisher to exit, short enough to still time out
+    # with the stuck session still present.
+    runner._restart_drain_timeout = 0.3
+
+    session_key_finisher = "agent:main:telegram:dm:A"
+    session_key_stuck = "agent:main:telegram:dm:B"
+    runner._running_agents = {
+        session_key_finisher: MagicMock(),
+        session_key_stuck: MagicMock(),
+    }
+
+    async def finish_one():
+        await asyncio.sleep(0.05)
+        runner._running_agents.pop(session_key_finisher, None)
+
+    asyncio.create_task(finish_one())
+
+    session_store = MagicMock()
+    session_store.mark_resume_pending = MagicMock(return_value=True)
+    runner.session_store = session_store
+
+    with patch("gateway.status.remove_pid_file"), patch(
+        "gateway.status.write_runtime_status"
+    ):
+        await runner.stop()
+
+    calls = session_store.mark_resume_pending.call_args_list
+    marked = {args[0][0] for args in calls}
+    # Only the session still running at timeout is marked; the finisher is not.
+    assert marked == {session_key_stuck}
+
+
+@pytest.mark.asyncio
+async def test_drain_timeout_skips_pending_sentinel_sessions():
+    """Pending sentinels — sessions whose AIAgent construction hasn't
+    produced a real agent yet — are skipped by
+    ``_interrupt_running_agents()``.  The resume_pending marking must
+    mirror that: no agent started means no turn was interrupted.
+    """
+    from gateway.run import _AGENT_PENDING_SENTINEL
+
+    runner, adapter = make_restart_runner()
+    adapter.disconnect = AsyncMock()
+    runner._restart_drain_timeout = 0.05
+
+    session_key_real = "agent:main:telegram:dm:A"
+    session_key_sentinel = "agent:main:telegram:dm:B"
+    runner._running_agents = {
+        session_key_real: MagicMock(),
+        session_key_sentinel: _AGENT_PENDING_SENTINEL,
+    }
+
+    session_store = MagicMock()
+    session_store.mark_resume_pending = MagicMock(return_value=True)
+    runner.session_store = session_store
+
+    with patch("gateway.status.remove_pid_file"), patch(
+        "gateway.status.write_runtime_status"
+    ):
+        await runner.stop()
+
+    calls = session_store.mark_resume_pending.call_args_list
+    marked = {args[0][0] for args in calls}
+    assert marked == {session_key_real}
+
+
 # ---------------------------------------------------------------------------
 # Shutdown banner wording
 # ---------------------------------------------------------------------------

From 7a5371b20d2e8226a3ec61f0320b4cb57d68e88f Mon Sep 17 00:00:00 2001
From: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Date: Wed, 15 Apr 2026 10:33:15 +0530
Subject: [PATCH 075/143] feat: add TouchDesigner integration skill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New skill: creative/touchdesigner — control a running TouchDesigner
instance via REST API. Build real-time visual networks programmatically.

Architecture:
  Hermes Agent -> HTTP REST (curl) -> TD WebServer DAT -> TD Python env

Key features:
- Custom API handler (scripts/custom_api_handler.py) that creates a
  self-contained WebServer DAT + callback in TD. More reliable than the
  official mcp_webserver_base.tox which frequently fails module imports.
- Discovery-first workflow: never hardcode TD parameter names. Always
  probe the running instance first since names change across versions.
- Persistent setup: save the TD project once with the API handler baked
  in. TD auto-opens the last project on launch, so port 9981 is live
  with zero manual steps after first-time setup.
- Works via curl in execute_code (no MCP dependency required).
- Optional MCP server config for touchdesigner-mcp-server npm package.

Skill structure (2823 lines total):
  SKILL.md (209 lines) — setup, workflow, key rules, operator reference
  references/pitfalls.md (276 lines) — 24 hard-won lessons
  references/operators.md (239 lines) — all 6 operator families
  references/network-patterns.md (589 lines) — audio-reactive, generative,
    video processing, GLSL, instancing, live performance recipes
  references/mcp-tools.md (501 lines) — 13 MCP tool schemas
  references/python-api.md (443 lines) — TD Python scripting patterns
  references/troubleshooting.md (274 lines) — connection diagnostics
  scripts/custom_api_handler.py (140 lines) — REST API handler for TD
  scripts/setup.sh (152 lines) — prerequisite checker

Tested on TouchDesigner 099 Non-Commercial (macOS/darwin).
---
 skills/creative/touchdesigner/SKILL.md        | 278 ++++++
 .../touchdesigner/references/mcp-tools.md     | 501 ++++++++++
 .../references/network-patterns.md            | 914 ++++++++++++++++++
 .../touchdesigner/references/operators.md     | 239 +++++
 .../touchdesigner/references/pitfalls.md      | 336 +++++++
 .../touchdesigner/references/python-api.md    | 443 +++++++++
 .../references/troubleshooting.md             | 274 ++++++
 .../scripts/custom_api_handler.py             | 140 +++
 .../creative/touchdesigner/scripts/setup.sh   | 152 +++
 9 files changed, 3277 insertions(+)
 create mode 100644 skills/creative/touchdesigner/SKILL.md
 create mode 100644 skills/creative/touchdesigner/references/mcp-tools.md
 create mode 100644 skills/creative/touchdesigner/references/network-patterns.md
 create mode 100644 skills/creative/touchdesigner/references/operators.md
 create mode 100644 skills/creative/touchdesigner/references/pitfalls.md
 create mode 100644 skills/creative/touchdesigner/references/python-api.md
 create mode 100644 skills/creative/touchdesigner/references/troubleshooting.md
 create mode 100644 skills/creative/touchdesigner/scripts/custom_api_handler.py
 create mode 100644 skills/creative/touchdesigner/scripts/setup.sh

diff --git a/skills/creative/touchdesigner/SKILL.md b/skills/creative/touchdesigner/SKILL.md
new file mode 100644
index 00000000000..0f464193fed
--- /dev/null
+++ b/skills/creative/touchdesigner/SKILL.md
@@ -0,0 +1,278 @@
+---
+name: touchdesigner
+description: "Control a running TouchDesigner instance programmatically — create operators, set parameters, wire connections, execute Python, build real-time visuals. Covers: GLSL shaders, audio-reactive, generative art, video processing, instancing, and live performance."
+version: 3.0.0
+author: Hermes Agent
+license: MIT
+metadata:
+  hermes:
+    tags: [TouchDesigner, MCP, creative-coding, real-time-visuals, generative-art, audio-reactive, VJ, installation, GLSL]
+    related_skills: [native-mcp, ascii-video, manim-video, hermes-video]
+    security:
+      allow_network: true
+      allow_install: true
+      allow_config_write: true
+---
+
+# TouchDesigner Integration
+
+## Architecture
+
+Hermes Agent -> HTTP REST (curl) -> TD WebServer DAT (port 9981) -> TD Python environment.
+
+The agent controls a **running TouchDesigner instance** via a REST API on port 9981. It does NOT generate .toe files from scratch.
+
+## First-Time Setup (one-time, persists across sessions)
+
+### 1. Verify TD is running and check for existing API
+
+```bash
+lsof -i :9981 -P -n | grep LISTEN   # TD listening?
+curl -s --max-time 5 http://127.0.0.1:9981/api/td/server/td  # API working?
+```
+
+If HTTP 200 + JSON → skip to **Discovery**. Setup is already done.
+
+### 2. If no API: deploy the custom handler
+
+The user must paste ONE line into TD Textport (Alt+T / Dialogs > Textport and DATs):
+
+```
+exec(open('PATH_TO_SKILL/scripts/custom_api_handler.py').read())
+```
+
+Copy this to their clipboard with `pbcopy`. This creates a WebServer DAT + callback handler pair in `/project1` that implements the REST API. No external dependencies.
+
+**Why not the official .tox?** The `mcp_webserver_base.tox` from 8beeeaaat/touchdesigner-mcp frequently fails to import its Python modules after drag-drop (relative path resolution issue). Our custom handler is self-contained and more reliable. See `references/pitfalls.md` #1-2.
+
+### 3. Save the project to persist the API
+
+After the handler is running, save the project so the API auto-starts on every future TD launch:
+
+```python
+td_exec("project.save(os.path.expanduser('~/Documents/HermesAgent.toe'))")
+```
+
+TD auto-opens the last saved project on launch. From now on, `open /Applications/TouchDesigner.app` → port 9981 is live → agent can connect immediately.
+
+To launch TD with this project explicitly:
+```bash
+open /Applications/TouchDesigner.app ~/Documents/HermesAgent.toe
+```
+
+### 4. Optional: Configure Hermes MCP
+
+Add under `mcp_servers:` in the user's Hermes config:
+```yaml
+touchdesigner:
+  command: npx
+  args: ["-y", "touchdesigner-mcp-server@latest"]
+  env:
+    TD_API_URL: "http://127.0.0.1:9981"
+  timeout: 120
+```
+
+This is optional — the agent works fully via `curl` to the REST API using `execute_code`. MCP tools are a convenience layer.
+
+## Talking to TD (the td_exec pattern)
+
+All communication uses this pattern in `execute_code`:
+
+```python
+import json, shlex
+from hermes_tools import terminal
+
+API = "http://127.0.0.1:9981"
+def td_exec(script):
+    payload = json.dumps({"script": script})
+    cmd = f"curl -s --max-time 15 -X POST -H 'Content-Type: application/json' -d {shlex.quote(payload)} '{API}/api/td/server/exec'"
+    r = terminal(cmd, timeout=20)
+    return json.loads(r['output'])
+
+# Returns: {"result": <value>, "stdout": "...", "stderr": "..."}
+```
+
+For large GLSL shaders: write to a temp file, then `td_exec("op('...').text = open('/tmp/shader.glsl').read()")`.
+
+## Workflow
+
+### Step 0: Discovery (MANDATORY — never skip)
+
+**Never hardcode parameter names.** They change between TD versions. Run this first:
+
+```python
+td_exec("""
+import sys
+info = {'version': str(app.version), 'platform': sys.platform}
+root = op('/project1')
+for name, optype in [('glslTOP', glslTOP), ('constantTOP', constantTOP),
+                      ('blurTOP', blurTOP), ('textTOP', textTOP),
+                      ('levelTOP', levelTOP), ('compositeTOP', compositeTOP),
+                      ('transformTOP', transformTOP), ('feedbackTOP', feedbackTOP),
+                      ('windowCOMP', windowCOMP)]:
+    n = root.create(optype, '_d_' + name)
+    kw = ['color','size','font','dat','alpha','opacity','resolution','text',
+          'extend','operand','top','pixel','format','win','type']
+    info[name] = [p.name for p in n.pars() if any(k in p.name.lower() for k in kw)]
+    n.destroy()
+result = info
+""")
+```
+
+Use the returned param names for ALL subsequent calls. Store them in your session context.
+
+### Step 1: Clean + Build
+
+Build the entire network in ONE `td_exec` call (batching avoids round-trip overhead and ensures TD advances frames between calls):
+
+```python
+td_exec("""
+root = op('/project1')
+keep = {'api_server', 'api_handler'}
+for child in list(root.children):  # snapshot before destroying
+    if child.name not in keep and child.valid:
+        child.destroy()
+
+# Create nodes, set params (using discovered names), wire, verify
+...
+result = {'nodes': len(list(root.children)), 'errors': [...]}
+""")
+```
+
+### Step 2: Wire connections
+
+```python
+gl.outputConnectors[0].connect(comp.inputConnectors[0])
+```
+
+### Step 3: Verify
+
+```python
+for c in list(root.children):
+    e = c.errors(); w = c.warnings()
+    if e: print(c.name, 'ERR:', e)
+```
+
+### Step 4: Display
+
+```python
+win = root.create(windowCOMP, 'display')
+win.par.winop = out.path    # discovered param name
+win.par.winw = 1280; win.par.winh = 720
+win.par.winopen.pulse()
+```
+
+## Key Implementation Rules
+
+**Always clean safely:** `list(root.children)` before iterating + `child.valid` check.
+
+**GLSL time:** No `uTDCurrentTime` in TD 099. Feed time via 1x1 Constant TOP.
+**CRITICAL: must use `rgba32float` format** — the default 8-bit format clamps values to 0-1, so `absTime.seconds % 1000.0` becomes 1.0 and the shader appears frozen:
+```python
+t = root.create(constantTOP, 'time_driver')
+t.par.format = 'rgba32float'  # ← REQUIRED or time is stuck at 1.0
+t.par.outputresolution = 'custom'
+t.par.resolutionw = 1
+t.par.resolutionh = 1
+t.par.colorr.expr = "absTime.seconds % 1000.0"
+t.par.colorg.expr = "int(absTime.seconds / 1000.0)"
+t.outputConnectors[0].connect(glsl.inputConnectors[0])
+# In GLSL: vec4 td = texture(sTD2DInputs[0], vec2(.5)); float t = td.r + td.g*1000.;
+```
+
+**Feedback TOP:** Use `top` parameter reference (not direct input wire). The "Not enough sources" error resolves after first cook. The "Cook dependency loop" warning is expected.
+
+**Resolution:** Non-Commercial caps at 1280×1280. Use `outputresolution = 'custom'`.
+
+**Large shaders:** Write GLSL to `/tmp/file.glsl`, then `td_exec("op('shader').text = open('/tmp/file.glsl').read()")`.
+
+**WebServer DAT quirk:** Response body goes in `response['data']` not `response['body']`. Request POST body comes as bytes in `request['data']`.
+
+## Recording / Exporting Video
+
+To capture TD output as video or image sequence for external use (e.g., ASCII video pipeline):
+
+### Movie Recording (recommended)
+
+```python
+# Put a Null TOP before the recorder (official best practice)
+rec = root.create(moviefileoutTOP, 'recorder')
+null_out.outputConnectors[0].connect(rec.inputConnectors[0])
+
+rec.par.type = 'movie'
+rec.par.file = '/tmp/output.mov'
+rec.par.videocodec = 'mjpa'  # Motion JPEG — works on Non-Commercial
+
+# Start/stop recording (par.record is a toggle, NOT .record() method)
+rec.par.record = True   # start
+# ... wait ...
+rec.par.record = False  # stop
+```
+
+**H.264/H.265 require a Commercial license** — use `mjpa` (Motion JPEG) or `prores` on Non-Commercial. Extract frames afterward with ffmpeg if needed:
+```bash
+ffmpeg -i /tmp/output.mov -vframes 120 /tmp/frames/frame_%06d.png
+```
+
+### Image Sequence Export
+
+```python
+rec.par.type = 'imagesequence'
+rec.par.imagefiletype = 'png'
+rec.par.file.expr = "'/tmp/frames/out' + me.fileSuffix"  # fileSuffix is REQUIRED
+rec.par.record = True
+```
+
+### Pitfalls
+
+- **Race condition:** When setting `par.file` and starting recording in the same script, use `run("...", delayFrames=2)` so the file path is applied before recording begins.
+- **TOP.save() is useless for animation:** Calling `op('null1').save(path)` in a loop or rapid API calls captures the same GPU texture every time — TD doesn't cook new frames between save calls. Always use MovieFileOut for animated output.
+- See `references/pitfalls.md` #25-27 for full details.
+
+## Audio-Reactive GLSL (Proven Recipe)
+
+Complete chain for music-driven visuals: AudioFileIn → AudioSpectrum → Math (boost) → Resample (256) → CHOP To TOP → GLSL TOP (spectrum sampled per-pixel). See `references/network-patterns.md` Pattern 3b for the full working recipe with shader code.
+
+## Audio-Reactive Visuals
+
+The most powerful TD workflow for the agent: play an audio file, analyze its spectrum, and drive a GLSL shader in real-time. The agent builds the entire signal chain programmatically.
+
+**Signal chain:**
+```
+AudioFileIn CHOP → AudioSpectrum CHOP → Math CHOP (gain=5)
+  → Resample CHOP (256) → CHOP To TOP (spectrum texture)
+                                  ↓ (GLSL input 1)
+  Constant TOP (rgba32float, time) → GLSL TOP → Null TOP → MovieFileOut
+        (input 0)
+```
+
+**Key technique:** The spectrum becomes a 256×1 texture. In GLSL, `texture(sTD2DInputs[1], vec2(x, 0.0)).r` samples frequency at position x (0=bass, 1=treble). This lets the shader react per-pixel to different frequency bands.
+
+**Smoothing is critical:** Raw FFT jitters. Use `Math CHOP` gain to boost weak signal, then the GLSL shader's own temporal integration (via feedback or time-smoothed params) handles visual smoothing.
+
+See `references/network-patterns.md` Pattern 9b for the complete build script + shader code.
+
+## Operator Quick Reference
+
+| Family | Color | Examples | Suffix |
+|--------|-------|----------|--------|
+| TOP | Purple | noiseTop, glslTop, compositeTop, levelTop, blurTop, textTop, nullTop, feedbackTop, renderTop | TOP |
+| CHOP | Green | audiofileinChop, audiospectrumChop, mathChop, lfoChop, constantChop | CHOP |
+| SOP | Blue | gridSop, sphereSop, transformSop, noiseSop | SOP |
+| DAT | White | textDat, tableDat, scriptDat, webserverDAT | DAT |
+| MAT | Yellow | phongMat, pbrMat, glslMat, constMat | MAT |
+| COMP | Gray | geometryComp, containerComp, cameraComp, lightComp, windowCOMP | COMP |
+
+See `references/operators.md` for full catalog. See `references/network-patterns.md` for recipes.
+
+## References
+
+| File | What |
+|------|------|
+| `references/pitfalls.md` | **READ FIRST** — 31 hard-won lessons from real sessions |
+| `references/operators.md` | All operator families with params and use cases |
+| `references/network-patterns.md` | Recipes: audio-reactive, generative, video, GLSL, instancing |
+| `references/mcp-tools.md` | MCP tool schemas (optional — curl works without MCP) |
+| `references/python-api.md` | TD Python: op(), scripting, extensions |
+| `references/troubleshooting.md` | Connection diagnostics, param debugging, performance |
+| `scripts/custom_api_handler.py` | Self-contained REST API handler for TD WebServer DAT |
diff --git a/skills/creative/touchdesigner/references/mcp-tools.md b/skills/creative/touchdesigner/references/mcp-tools.md
new file mode 100644
index 00000000000..5e4ad98d553
--- /dev/null
+++ b/skills/creative/touchdesigner/references/mcp-tools.md
@@ -0,0 +1,501 @@
+# TouchDesigner MCP Tools Reference
+
+Complete parameter schemas and usage examples for all 13 MCP tools from the 8beeeaaat/touchdesigner-mcp server.
+
+## Hermes Configuration
+
+Add a `touchdesigner` entry under the `mcp_servers` section of your Hermes config. Example YAML block:
+
+```yaml
+# Under mcp_servers: in config.yaml
+mcp_servers:
+  touchdesigner:
+    command: npx
+    args: ["-y", "touchdesigner-mcp-server@latest"]
+    env:
+      TD_API_URL: "http://127.0.0.1:9981"
+    timeout: 120
+    connect_timeout: 60
+```
+
+For a locally built server, point `command` to `node` and `args` to the built server index.js path. Set `TD_API_URL` to the TouchDesigner WebServer DAT address (default port 9981).
+
+For the documentation/knowledge server (no running TD needed), add a `td_docs` entry using `touchdesigner-mcp-server` as the npx package.
+
+Tools are registered as `mcp_touchdesigner_<tool_name>` in Hermes.
+
+**If MCP tools are not available as direct function calls** (common when the MCP server connects but Hermes doesn't expose them as callable tools), use the custom API handler directly via `curl` in `execute_code` or `terminal`:
+
+```python
+import json, shlex
+from hermes_tools import terminal
+
+def td_exec(script):
+    """Execute Python in TouchDesigner via the REST API."""
+    escaped = json.dumps({"script": script})
+    cmd = f"curl -s --max-time 15 -X POST -H 'Content-Type: application/json' -d {shlex.quote(escaped)} 'http://127.0.0.1:9981/api/td/server/exec'"
+    r = terminal(cmd, timeout=20)
+    return json.loads(r['output'])
+
+# Example: list all nodes
+result = td_exec('result = [c.name for c in op("/project1").children]')
+print(result)  # {"result": ["node1", "node2", ...], "stdout": "", "stderr": ""}
+```
+
+This `td_exec` helper works with both the official .tox handler and the custom API handler from `scripts/custom_api_handler.py`.
+
+Tools are registered as `mcp_touchdesigner_<tool_name>` in Hermes.
+
+## Common Formatting Parameters
+
+Most tools accept these optional formatting parameters:
+
+| Parameter | Type | Values | Description |
+|-----------|------|--------|-------------|
+| `detailLevel` | string | `"minimal"`, `"summary"`, `"detailed"` | Response verbosity |
+| `responseFormat` | string | `"json"`, `"yaml"`, `"markdown"` | Output format |
+| `limit` | integer | 1-500 | Max items (on list-type tools only) |
+
+These are client-side formatting — they control how the MCP server formats the response text, not what data TD returns.
+
+---
+
+## Tool 1: describe_td_tools
+
+**Purpose:** Meta-tool — lists all available TouchDesigner MCP tools with descriptions and parameters.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `filter` | string | No | Keyword to filter tools by name, description, or parameter |
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+
+**Example:** Find tools related to node creation
+```
+describe_td_tools(filter="create")
+```
+
+**Note:** This tool runs entirely in the MCP server — it does NOT contact TouchDesigner. Use it to discover what's available.
+
+---
+
+## Tool 2: get_td_info
+
+**Purpose:** Get TouchDesigner server information (version, OS, build).
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+
+**Example:** Check TD is running and get version
+```
+get_td_info()
+```
+
+**Returns:** TD version, build number, OS name/version, MCP API version.
+
+**Use this first** to verify the connection is working before building networks.
+
+---
+
+## Tool 3: execute_python_script
+
+**Purpose:** Execute arbitrary Python code inside TouchDesigner's Python environment.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `script` | string | **Yes** | Python code to execute |
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+
+**Available globals in the script:**
+- `op` — find operators by path
+- `ops` — find multiple operators by pattern
+- `me` — the WebServer DAT running the script
+- `parent` — me.parent()
+- `project` — root project component
+- `td` — the full td module
+- `result` — set this to explicitly return a value
+
+**Execution behavior:**
+- Single-line scripts: tries `eval()` first (returns value), falls back to `exec()`
+- Multi-line scripts: uses `exec()` always
+- stdout/stderr are captured and returned separately
+- If `result` is not set, tries to evaluate the last expression as the return value
+
+**Examples:**
+
+```python
+# Simple query
+execute_python_script(script="op('/project1/noise1').par.seed.val")
+# Returns: {"result": 42, "stdout": "", "stderr": ""}
+
+# Multi-line script
+execute_python_script(script="""
+nodes = op('/project1').findChildren(type=TOP)
+result = [{'name': n.name, 'type': n.OPType} for n in nodes]
+""")
+
+# Connect two operators
+execute_python_script(script="op('/project1/noise1').outputConnectors[0].connect(op('/project1/level1'))")
+
+# Create and configure in one script
+execute_python_script(script="""
+parent = op('/project1')
+n = parent.create(noiseTop, 'my_noise')
+n.par.seed.val = 42
+n.par.monochrome.val = True
+n.par.resolutionw.val = 1920
+n.par.resolutionh.val = 1080
+result = {'path': n.path, 'type': n.OPType}
+""")
+
+# Batch wire a chain
+execute_python_script(script="""
+chain = ['noise1', 'level1', 'blur1', 'composite1', 'null_out']
+for i in range(len(chain) - 1):
+    src = op(f'/project1/{chain[i]}')
+    dst = op(f'/project1/{chain[i+1]}')
+    if src and dst:
+        src.outputConnectors[0].connect(dst)
+result = 'Wired chain: ' + ' -> '.join(chain)
+""")
+```
+
+**When to use:** Wiring connections, complex logic, batch operations, querying state that other tools don't cover. This is the most powerful and flexible tool.
+
+---
+
+## Tool 4: create_td_node
+
+**Purpose:** Create a new operator in TouchDesigner.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `parentPath` | string | **Yes** | Path to parent (e.g., `/project1`) |
+| `nodeType` | string | **Yes** | Operator type (e.g., `noiseTop`, `mathChop`) |
+| `nodeName` | string | No | Custom name (auto-generated if omitted) |
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+
+**Examples:**
+
+```
+create_td_node(parentPath="/project1", nodeType="noiseTop", nodeName="bg_noise")
+create_td_node(parentPath="/project1", nodeType="compositeTop")  # auto-named
+create_td_node(parentPath="/project1/audio_chain", nodeType="audiospectrumChop", nodeName="spectrum")
+```
+
+**Returns:** Node summary with id, name, path, opType, and all default parameter values.
+
+**Node type naming convention:** camelCase family suffix — `noiseTop`, `mathChop`, `gridSop`, `tableDat`, `phongMat`, `geometryComp`. See `references/operators.md` for the full list.
+
+---
+
+## Tool 5: delete_td_node
+
+**Purpose:** Delete an existing operator.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `nodePath` | string | **Yes** | Absolute path to node (e.g., `/project1/noise1`) |
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+
+**Example:**
+
+```
+delete_td_node(nodePath="/project1/noise1")
+```
+
+**Returns:** Confirmation with the deleted node's summary (captured before deletion).
+
+---
+
+## Tool 6: get_td_nodes
+
+**Purpose:** List operators under a path with optional filtering.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `parentPath` | string | **Yes** | Parent path (e.g., `/project1`) |
+| `pattern` | string | No | Glob pattern for name filtering (default: `*`) |
+| `includeProperties` | boolean | No | Include full parameter values (default: false) |
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+| `limit` | integer | No | Max items (1-500) |
+
+**Examples:**
+
+```
+# List all direct children of /project1
+get_td_nodes(parentPath="/project1")
+
+# Find all noise operators
+get_td_nodes(parentPath="/project1", pattern="noise*")
+
+# Get full parameter details
+get_td_nodes(parentPath="/project1", pattern="*", includeProperties=true, limit=20)
+```
+
+**Returns:** List of node summaries. With `includeProperties=false` (default): id, name, path, opType only. With `includeProperties=true`: full parameter values included.
+
+---
+
+## Tool 7: get_td_node_parameters
+
+**Purpose:** Get detailed parameters of a specific node.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `nodePath` | string | **Yes** | Node path (e.g., `/project1/noise1`) |
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+| `limit` | integer | No | Max parameters (1-500) |
+
+**Example:**
+
+```
+get_td_node_parameters(nodePath="/project1/noise1")
+```
+
+**Returns:** All parameter name-value pairs for the node. Use this to discover available parameters before calling update_td_node_parameters.
+
+---
+
+## Tool 8: get_td_node_errors
+
+**Purpose:** Check for errors on a node and all its descendants (recursive).
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `nodePath` | string | **Yes** | Absolute path to inspect (e.g., `/project1`) |
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+| `limit` | integer | No | Max error items (1-500) |
+
+**Examples:**
+
+```
+# Check entire project for errors
+get_td_node_errors(nodePath="/project1")
+
+# Check a specific chain
+get_td_node_errors(nodePath="/project1/audio_chain")
+```
+
+**Returns:** Error count, hasErrors boolean, and list of errors each with nodePath, nodeName, opType, and error message.
+
+**Always call this after building a network** to catch wiring mistakes, missing references, and configuration errors.
+
+---
+
+## Tool 9: update_td_node_parameters
+
+**Purpose:** Update parameters on an existing node.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `nodePath` | string | **Yes** | Path to node (e.g., `/project1/noise1`) |
+| `properties` | object | **Yes** | Key-value pairs to update (e.g., `{"seed": 42, "monochrome": true}`) |
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+
+**Examples:**
+
+```
+# Set noise parameters
+update_td_node_parameters(
+    nodePath="/project1/noise1",
+    properties={"seed": 42, "monochrome": false, "period": 4.0, "harmonics": 3,
+                "resolutionw": 1920, "resolutionh": 1080}
+)
+
+# Set a file path
+update_td_node_parameters(
+    nodePath="/project1/moviefilein1",
+    properties={"file": "/Users/me/Videos/clip.mp4", "play": true}
+)
+
+# Set compositing mode
+update_td_node_parameters(
+    nodePath="/project1/composite1",
+    properties={"operand": 0}  # 0=Over, 1=Under, 3=Add, 18=Multiply, 27=Screen
+)
+```
+
+**Returns:** List of successfully updated properties and any that failed (with reasons). Raises error if zero properties were updated.
+
+**Parameter value types:** Floats, ints, booleans, and strings are all accepted. For menu parameters, use either the string label or the integer index.
+
+---
+
+## Tool 10: exec_node_method
+
+**Purpose:** Call a Python method directly on a specific node.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `nodePath` | string | **Yes** | Path to node |
+| `method` | string | **Yes** | Method name to call |
+| `args` | array | No | Positional arguments (strings, numbers, booleans) |
+| `kwargs` | object | No | Keyword arguments |
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+
+**Examples:**
+
+```
+# Get all children of a component
+exec_node_method(nodePath="/project1", method="findChildren")
+
+# Find specific children
+exec_node_method(nodePath="/project1", method="findChildren",
+                 kwargs={"name": "noise*", "depth": 1})
+
+# Get node errors
+exec_node_method(nodePath="/project1/noise1", method="errors")
+
+# Get node warnings
+exec_node_method(nodePath="/project1/noise1", method="warnings")
+
+# Save a component as .tox
+exec_node_method(nodePath="/project1/myContainer", method="save",
+                 args=["/path/to/component.tox"])
+```
+
+**Returns:** Processed return value of the method call. TD operators are serialized to their path strings, iterables to lists, etc.
+
+---
+
+## Tool 11: get_td_classes
+
+**Purpose:** List available TouchDesigner Python classes and modules.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+| `limit` | integer | No | Max items (default: 50) |
+
+**Example:**
+
+```
+get_td_classes(limit=100)
+```
+
+**Returns:** List of class/module names and their docstrings from the td module. Useful for discovering what's available in TD's Python environment.
+
+---
+
+## Tool 12: get_td_class_details
+
+**Purpose:** Get methods and properties of a specific TD Python class.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `className` | string | **Yes** | Class name (e.g., `noiseTop`, `OP`, `COMP`) |
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+| `limit` | integer | No | Max methods/properties (default: 30) |
+
+**Examples:**
+
+```
+# Inspect the noiseTop class
+get_td_class_details(className="noiseTop")
+
+# Inspect the base OP class (all operators inherit from this)
+get_td_class_details(className="OP", limit=50)
+
+# Inspect COMP (component) class
+get_td_class_details(className="COMP")
+```
+
+**Returns:** Class name, type, description, methods (name + description + type), and properties (name + description + type).
+
+---
+
+## Tool 13: get_td_module_help
+
+**Purpose:** Retrieve Python help() text for any TD module, class, or function.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `moduleName` | string | **Yes** | Module/class name (e.g., `noiseCHOP`, `tdu`, `td.OP`) |
+| `detailLevel` | string | No | Response verbosity |
+| `responseFormat` | string | No | Output format |
+
+**Examples:**
+
+```
+# Get help for the noise CHOP class
+get_td_module_help(moduleName="noiseCHOP")
+
+# Get help for the tdu utilities module
+get_td_module_help(moduleName="tdu")
+
+# Dotted name resolution works
+get_td_module_help(moduleName="td.OP")
+```
+
+**Returns:** Full Python help() text output, cleaned of backspace characters.
+
+---
+
+## Workflow: Building a Complete Network
+
+Typical sequence of tool calls to build a project:
+
+1. `get_td_info` — verify connection
+2. `get_td_nodes(parentPath="/project1")` — see what already exists
+3. `create_td_node` (multiple) — create all operators
+4. `update_td_node_parameters` (multiple) — configure each operator
+5. `execute_python_script` — wire all connections in one batch script
+6. `get_td_node_errors(nodePath="/project1")` — check for problems
+7. `get_td_node_parameters` — verify specific nodes if needed
+8. Iterate: adjust parameters, add operators, fix errors
+
+## TD Documentation MCP Server Tools
+
+The bottobot/touchdesigner-mcp-server provides 21 reference/knowledge tools (no running TD needed):
+
+| Tool | Purpose |
+|------|---------|
+| `get_operator` | Get full documentation for a specific operator |
+| `search_operators` | Search operators by keyword |
+| `list_operators` | List all operators (filterable by family) |
+| `compare_operators` | Compare two operators side by side |
+| `get_operator_examples` | Get usage examples for an operator |
+| `suggest_workflow` | Get workflow suggestions for a task |
+| `get_tutorial` | Get a full TD tutorial |
+| `list_tutorials` | List available tutorials |
+| `search_tutorials` | Search tutorial content |
+| `get_python_api` | Get Python API class documentation |
+| `search_python_api` | Search Python API |
+| `list_python_classes` | List all documented Python classes |
+| `get_version_info` | Get TD version release notes |
+| `list_versions` | List all documented TD versions |
+| `get_experimental_techniques` | Get advanced technique guides (GLSL, ML, generative, etc.) |
+| `search_experimental` | Search experimental techniques |
+| `get_glsl_pattern` | Get GLSL code patterns (SDF, color, math utilities) |
+| `get_operator_connections` | Get common operator wiring patterns |
+| `get_network_template` | Get complete network templates with Python generation scripts |
+| `get_experimental_build` | Get experimental build info |
+| `list_experimental_builds` | List experimental builds |
+
+This server contains 630 operator docs, 14 tutorials, 69 Python API classes, and 7 experimental technique categories with working code.
diff --git a/skills/creative/touchdesigner/references/network-patterns.md b/skills/creative/touchdesigner/references/network-patterns.md
new file mode 100644
index 00000000000..7afa2415022
--- /dev/null
+++ b/skills/creative/touchdesigner/references/network-patterns.md
@@ -0,0 +1,914 @@
+# TouchDesigner Network Patterns
+
+Complete network recipes for common creative coding tasks. Each pattern shows the operator chain, MCP tool calls to build it, and key parameter settings.
+
+## Audio-Reactive Visuals
+
+### Pattern 1: Audio Spectrum -> Noise Displacement
+
+Audio drives noise parameters for organic, music-responsive textures.
+
+```
+Audio File In CHOP -> Audio Spectrum CHOP -> Math CHOP (scale)
+                                                |
+                                                v (export to noise params)
+                          Noise TOP -> Level TOP -> Feedback TOP -> Composite TOP -> Null TOP (out)
+                                                        ^                |
+                                                        |________________|
+```
+
+**MCP Build Sequence:**
+
+```
+1. create_td_node(parentPath="/project1", nodeType="audiofileinChop", nodeName="audio_in")
+2. create_td_node(parentPath="/project1", nodeType="audiospectrumChop", nodeName="spectrum")
+3. create_td_node(parentPath="/project1", nodeType="mathChop", nodeName="spectrum_scale")
+4. create_td_node(parentPath="/project1", nodeType="noiseTop", nodeName="noise1")
+5. create_td_node(parentPath="/project1", nodeType="levelTop", nodeName="level1")
+6. create_td_node(parentPath="/project1", nodeType="feedbackTop", nodeName="feedback1")
+7. create_td_node(parentPath="/project1", nodeType="compositeTop", nodeName="comp1")
+8. create_td_node(parentPath="/project1", nodeType="nullTop", nodeName="out")
+
+9. update_td_node_parameters(nodePath="/project1/audio_in",
+     properties={"file": "/path/to/music.wav", "play": true})
+10. update_td_node_parameters(nodePath="/project1/spectrum",
+     properties={"size": 512})
+11. update_td_node_parameters(nodePath="/project1/spectrum_scale",
+     properties={"gain": 2.0, "postoff": 0.0})
+12. update_td_node_parameters(nodePath="/project1/noise1",
+     properties={"type": 1, "monochrome": false, "resolutionw": 1920, "resolutionh": 1080,
+                  "period": 4.0, "harmonics": 3, "amp": 1.0})
+13. update_td_node_parameters(nodePath="/project1/level1",
+     properties={"opacity": 0.95, "gamma1": 0.75})
+14. update_td_node_parameters(nodePath="/project1/feedback1",
+     properties={"top": "/project1/comp1"})
+15. update_td_node_parameters(nodePath="/project1/comp1",
+     properties={"operand": 0})
+
+16. execute_python_script: """
+op('/project1/audio_in').outputConnectors[0].connect(op('/project1/spectrum'))
+op('/project1/spectrum').outputConnectors[0].connect(op('/project1/spectrum_scale'))
+op('/project1/noise1').outputConnectors[0].connect(op('/project1/level1'))
+op('/project1/level1').outputConnectors[0].connect(op('/project1/comp1').inputConnectors[0])
+op('/project1/feedback1').outputConnectors[0].connect(op('/project1/comp1').inputConnectors[1])
+op('/project1/comp1').outputConnectors[0].connect(op('/project1/out'))
+"""
+
+17. execute_python_script: """
+# Export spectrum values to drive noise parameters
+# This makes the noise react to audio frequencies
+op('/project1/noise1').par.seed.expr = "op('/project1/spectrum_scale')['chan1']"
+op('/project1/noise1').par.period.expr = "tdu.remap(op('/project1/spectrum_scale')['chan1'].eval(), 0, 1, 1, 8)"
+"""
+```
+
+### Pattern 2: Beat Detection -> Visual Pulses
+
+Detect beats from audio and trigger visual events.
+
+```
+Audio Device In CHOP -> Audio Spectrum CHOP -> Math CHOP (isolate bass)
+                                                    |
+                                              Trigger CHOP (envelope)
+                                                    |
+                                              [export to visual params]
+```
+
+**Key parameter settings:**
+
+```
+# Isolate bass frequencies (20-200 Hz)
+Math CHOP: chanop=1 (Add channels), range1low=0, range1high=10
+           (first 10 FFT bins = bass frequencies with 512 FFT at 44100Hz)
+
+# ADSR envelope on each beat
+Trigger CHOP: attack=0.02, peak=1.0, decay=0.3, sustain=0.0, release=0.1
+
+# Export to visual: Scale, brightness, or color intensity
+execute_python_script: "op('/project1/level1').par.brightness1.expr = \"1.0 + op('/project1/trigger1')['chan1'] * 0.5\""
+```
+
+### Pattern 3: Multi-Band Audio -> Multi-Layer Visuals
+
+Split audio into frequency bands, drive different visual layers per band.
+
+```
+Audio In -> Spectrum -> Audio Band EQ (3 bands: bass, mid, treble)
+                              |
+                    +---------+---------+
+                    |         |         |
+                 Bass      Mids     Treble
+                  |          |         |
+           Noise TOP   Circle TOP  Text TOP
+           (slow,dark) (mid,warm)  (fast,bright)
+                  |          |         |
+                  +-----+----+----+----+
+                        |         |
+                   Composite  Composite
+                        |
+                       Out
+```
+
+### Pattern 3b: Audio-Reactive GLSL Fractal (Proven td_exec Recipe)
+
+Complete working recipe tested in TD 099. Plays an MP3, runs FFT, feeds spectrum as a texture into a GLSL shader where inner fractal reacts to bass, outer to treble.
+
+**Network:**
+```
+AudioFileIn CHOP → AudioSpectrum CHOP → Math CHOP (boost) → Resample CHOP (256)
+                                                                  ↓
+                                                            CHOP To TOP (256x1 spectrum texture)
+                                                                  ↓
+Constant TOP (time, rgba32float) → GLSL TOP (input 0=time, input 1=spectrum) → Null → MovieFileOut
+                                                                                        ↓
+AudioFileIn CHOP → Audio Device Out CHOP                                          Record to .mov
+```
+
+**Build via td_exec (one call per step for reliability):**
+
+```python
+# Step 1: Audio chain
+td_exec("""
+root = op('/project1')
+audio = root.create(audiofileinCHOP, 'audio_in')
+audio.par.file = '/path/to/music.mp3'
+audio.par.playmode = 0  # Locked to timeline
+audio.par.volume = 0.5
+
+spec = root.create(audiospectrumCHOP, 'spectrum')
+audio.outputConnectors[0].connect(spec.inputConnectors[0])
+
+math_n = root.create(mathCHOP, 'math_norm')
+spec.outputConnectors[0].connect(math_n.inputConnectors[0])
+math_n.par.gain = 5  # boost signal
+
+resamp = root.create(resampleCHOP, 'resample_spec')
+math_n.outputConnectors[0].connect(resamp.inputConnectors[0])
+resamp.par.timeslice = True
+resamp.par.rate = 256
+
+chop2top = root.create(choptoTOP, 'spectrum_tex')
+resamp.outputConnectors[0].connect(chop2top.inputConnectors[0])
+
+# Audio output (hear the music)
+aout = root.create(audiodeviceoutCHOP, 'audio_out')
+audio.outputConnectors[0].connect(aout.inputConnectors[0])
+result = 'audio chain ok'
+""")
+
+# Step 2: Time driver (MUST be rgba32float — see pitfalls #12)
+td_exec("""
+root = op('/project1')
+td = root.create(constantTOP, 'time_driver')
+td.par.format = 'rgba32float'
+td.par.outputresolution = 'custom'
+td.par.resolutionw = 1
+td.par.resolutionh = 1
+td.par.colorr.expr = "absTime.seconds % 1000.0"
+td.par.colorg.expr = "int(absTime.seconds / 1000.0)"
+result = 'time ok'
+""")
+
+# Step 3: GLSL shader (write to /tmp, load from file)
+td_exec("""
+root = op('/project1')
+glsl = root.create(glslTOP, 'audio_shader')
+glsl.par.outputresolution = 'custom'
+glsl.par.resolutionw = 1280
+glsl.par.resolutionh = 720
+
+sd = root.create(textDAT, 'shader_code')
+sd.text = open('/tmp/my_shader.glsl').read()
+glsl.par.pixeldat = sd
+
+# Wire: input 0 = time, input 1 = spectrum texture
+op('/project1/time_driver').outputConnectors[0].connect(glsl.inputConnectors[0])
+op('/project1/spectrum_tex').outputConnectors[0].connect(glsl.inputConnectors[1])
+result = 'glsl ok'
+""")
+
+# Step 4: Output + recorder
+td_exec("""
+root = op('/project1')
+out = root.create(nullTOP, 'output')
+op('/project1/audio_shader').outputConnectors[0].connect(out.inputConnectors[0])
+
+rec = root.create(moviefileoutTOP, 'recorder')
+out.outputConnectors[0].connect(rec.inputConnectors[0])
+rec.par.type = 'movie'
+rec.par.file = '/tmp/output.mov'
+rec.par.videocodec = 'mjpa'
+result = 'output ok'
+""")
+```
+
+**GLSL shader pattern (audio-reactive fractal):**
+```glsl
+out vec4 fragColor;
+
+vec3 palette(float t) {
+    vec3 a = vec3(0.5); vec3 b = vec3(0.5);
+    vec3 c = vec3(1.0); vec3 d = vec3(0.263, 0.416, 0.557);
+    return a + b * cos(6.28318 * (c * t + d));
+}
+
+void main() {
+    // Input 0 = time (1x1 rgba32float constant)
+    // Input 1 = audio spectrum (256x1 CHOP To TOP)
+    vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+    float t = td.r + td.g * 1000.0;
+
+    vec2 res = uTDOutputInfo.res.zw;
+    vec2 uv = (gl_FragCoord.xy * 2.0 - res) / min(res.x, res.y);
+    vec2 uv0 = uv;
+    vec3 finalColor = vec3(0.0);
+
+    float bass = texture(sTD2DInputs[1], vec2(0.05, 0.0)).r;
+    float mids = texture(sTD2DInputs[1], vec2(0.25, 0.0)).r;
+
+    for (float i = 0.0; i < 4.0; i++) {
+        uv = fract(uv * (1.4 + bass * 0.3)) - 0.5;
+        float d = length(uv) * exp(-length(uv0));
+
+        // Sample spectrum at distance: inner=bass, outer=treble
+        float freq = texture(sTD2DInputs[1], vec2(clamp(d * 0.5, 0.0, 1.0), 0.0)).r;
+
+        vec3 col = palette(length(uv0) + i * 0.4 + t * 0.35);
+        d = sin(d * (7.0 + bass * 4.0) + t * 1.5) / 8.0;
+        d = abs(d);
+        d = pow(0.012 / d, 1.2 + freq * 0.8 + bass * 0.5);
+        finalColor += col * d;
+    }
+
+    // Tone mapping
+    finalColor = finalColor / (finalColor + vec3(1.0));
+    fragColor = TDOutputSwizzle(vec4(finalColor, 1.0));
+}
+```
+
+**Key insights from testing:**
+- `spectrum_tex` (CHOP To TOP) produces a 256x1 texture — x position = frequency
+- Sampling at `vec2(0.05, 0.0)` gets bass, `vec2(0.65, 0.0)` gets treble
+- Sampling based on pixel distance (`d * 0.5`) makes inner fractal react to bass, outer to treble
+- `bass * 0.3` in the `fract()` zoom makes the fractal breathe with kicks
+- Math CHOP gain of 5 is needed because raw spectrum values are very small
+
+## Generative Art
+
+### Pattern 4: Feedback Loop with Transform
+
+Classic generative technique — texture evolves through recursive transformation.
+
+```
+Noise TOP -> Composite TOP -> Level TOP -> Null TOP (out)
+                  ^      |
+                  |      v
+            Transform TOP <- Feedback TOP
+```
+
+**MCP Build Sequence:**
+
+```
+1. create_td_node(parentPath="/project1", nodeType="noiseTop", nodeName="seed_noise")
+2. create_td_node(parentPath="/project1", nodeType="compositeTop", nodeName="mix")
+3. create_td_node(parentPath="/project1", nodeType="transformTop", nodeName="evolve")
+4. create_td_node(parentPath="/project1", nodeType="feedbackTop", nodeName="fb")
+5. create_td_node(parentPath="/project1", nodeType="levelTop", nodeName="color_correct")
+6. create_td_node(parentPath="/project1", nodeType="nullTop", nodeName="out")
+
+7. update_td_node_parameters(nodePath="/project1/seed_noise",
+     properties={"type": 1, "monochrome": false, "period": 2.0, "amp": 0.3,
+                  "resolutionw": 1920, "resolutionh": 1080})
+8. update_td_node_parameters(nodePath="/project1/mix",
+     properties={"operand": 27})  # 27 = Screen blend
+9. update_td_node_parameters(nodePath="/project1/evolve",
+     properties={"sx": 1.003, "sy": 1.003, "rz": 0.5, "extend": 2})  # slight zoom + rotate, repeat edges
+10. update_td_node_parameters(nodePath="/project1/fb",
+     properties={"top": "/project1/mix"})
+11. update_td_node_parameters(nodePath="/project1/color_correct",
+     properties={"opacity": 0.98, "gamma1": 0.85})
+
+12. execute_python_script: """
+op('/project1/seed_noise').outputConnectors[0].connect(op('/project1/mix').inputConnectors[0])
+op('/project1/fb').outputConnectors[0].connect(op('/project1/evolve'))
+op('/project1/evolve').outputConnectors[0].connect(op('/project1/mix').inputConnectors[1])
+op('/project1/mix').outputConnectors[0].connect(op('/project1/color_correct'))
+op('/project1/color_correct').outputConnectors[0].connect(op('/project1/out'))
+"""
+```
+
+**Variations:**
+- Change Transform: `rz` (rotation), `sx/sy` (zoom), `tx/ty` (drift)
+- Change Composite operand: Screen (glow), Add (bright), Multiply (dark)
+- Add HSV Adjust in the feedback loop for color evolution
+- Add Blur for dreamlike softness
+- Replace Noise with a GLSL TOP for custom seed patterns
+
+### Pattern 5: Instancing (Particle-Like Systems)
+
+Render thousands of copies of geometry, each with unique position/rotation/scale driven by CHOP data or DATs.
+
+```
+Table DAT (instance data) -> DAT to CHOP -> Geometry COMP (instancing on) -> Render TOP
+                                              + Sphere SOP (template geometry)
+                                              + Constant MAT (material)
+                                              + Camera COMP
+                                              + Light COMP
+```
+
+**MCP Build Sequence:**
+
+```
+1. create_td_node(parentPath="/project1", nodeType="tableDat", nodeName="instance_data")
+2. create_td_node(parentPath="/project1", nodeType="geometryComp", nodeName="geo1")
+3. create_td_node(parentPath="/project1/geo1", nodeType="sphereSop", nodeName="sphere")
+4. create_td_node(parentPath="/project1", nodeType="constMat", nodeName="mat1")
+5. create_td_node(parentPath="/project1", nodeType="cameraComp", nodeName="cam1")
+6. create_td_node(parentPath="/project1", nodeType="lightComp", nodeName="light1")
+7. create_td_node(parentPath="/project1", nodeType="renderTop", nodeName="render1")
+
+8. execute_python_script: """
+import random, math
+dat = op('/project1/instance_data')
+dat.clear()
+dat.appendRow(['tx', 'ty', 'tz', 'sx', 'sy', 'sz', 'cr', 'cg', 'cb'])
+for i in range(500):
+    angle = i * 0.1
+    r = 2 + i * 0.01
+    dat.appendRow([
+        str(math.cos(angle) * r),
+        str(math.sin(angle) * r),
+        str((i - 250) * 0.02),
+        '0.05', '0.05', '0.05',
+        str(random.random()),
+        str(random.random()),
+        str(random.random())
+    ])
+"""
+
+9. update_td_node_parameters(nodePath="/project1/geo1",
+     properties={"instancing": true, "instancechop": "",
+                  "instancedat": "/project1/instance_data",
+                  "material": "/project1/mat1"})
+10. update_td_node_parameters(nodePath="/project1/render1",
+     properties={"camera": "/project1/cam1", "geometry": "/project1/geo1",
+                  "light": "/project1/light1",
+                  "resolutionw": 1920, "resolutionh": 1080})
+11. update_td_node_parameters(nodePath="/project1/cam1",
+     properties={"tz": 10})
+```
+
+### Pattern 6: Reaction-Diffusion (GLSL)
+
+Classic Gray-Scott reaction-diffusion system running on the GPU.
+
+```
+Text DAT (GLSL code) -> GLSL TOP (resolution, dat reference) -> Feedback TOP
+                              ^                                       |
+                              |_______________________________________|
+                         Level TOP (out)
+```
+
+**Key GLSL code (write to Text DAT via execute_python_script):**
+
+```glsl
+// Gray-Scott reaction-diffusion
+uniform float feed;    // 0.037
+uniform float kill;    // 0.06
+uniform float dA;      // 1.0
+uniform float dB;      // 0.5
+
+layout(location = 0) out vec4 fragColor;
+
+void main() {
+    vec2 uv = vUV.st;
+    vec2 texel = 1.0 / uTDOutputInfo.res.zw;
+
+    vec4 c = texture(sTD2DInputs[0], uv);
+    float a = c.r;
+    float b = c.g;
+
+    // Laplacian (9-point stencil)
+    float lA = 0.0, lB = 0.0;
+    for(int dx = -1; dx <= 1; dx++) {
+        for(int dy = -1; dy <= 1; dy++) {
+            float w = (dx == 0 && dy == 0) ? -1.0 : (abs(dx) + abs(dy) == 1 ? 0.2 : 0.05);
+            vec4 s = texture(sTD2DInputs[0], uv + vec2(dx, dy) * texel);
+            lA += s.r * w;
+            lB += s.g * w;
+        }
+    }
+
+    float reaction = a * b * b;
+    float newA = a + (dA * lA - reaction + feed * (1.0 - a));
+    float newB = b + (dB * lB + reaction - (kill + feed) * b);
+
+    fragColor = vec4(clamp(newA, 0.0, 1.0), clamp(newB, 0.0, 1.0), 0.0, 1.0);
+}
+```
+
+## Video Processing
+
+### Pattern 7: Video Effects Chain
+
+Apply a chain of effects to a video file.
+
+```
+Movie File In TOP -> HSV Adjust TOP -> Level TOP -> Blur TOP -> Composite TOP -> Null TOP (out)
+                                                                      ^
+                                                          Text TOP ---+
+```
+
+**MCP Build Sequence:**
+
+```
+1. create_td_node(parentPath="/project1", nodeType="moviefileinTop", nodeName="video_in")
+2. create_td_node(parentPath="/project1", nodeType="hsvadjustTop", nodeName="color")
+3. create_td_node(parentPath="/project1", nodeType="levelTop", nodeName="levels")
+4. create_td_node(parentPath="/project1", nodeType="blurTop", nodeName="blur")
+5. create_td_node(parentPath="/project1", nodeType="compositeTop", nodeName="overlay")
+6. create_td_node(parentPath="/project1", nodeType="textTop", nodeName="title")
+7. create_td_node(parentPath="/project1", nodeType="nullTop", nodeName="out")
+
+8. update_td_node_parameters(nodePath="/project1/video_in",
+     properties={"file": "/path/to/video.mp4", "play": true})
+9. update_td_node_parameters(nodePath="/project1/color",
+     properties={"hueoffset": 0.1, "saturationmult": 1.3})
+10. update_td_node_parameters(nodePath="/project1/levels",
+     properties={"brightness1": 1.1, "contrast": 1.2, "gamma1": 0.9})
+11. update_td_node_parameters(nodePath="/project1/blur",
+     properties={"sizex": 2, "sizey": 2})
+12. update_td_node_parameters(nodePath="/project1/title",
+     properties={"text": "My Video", "fontsizex": 48, "alignx": 1, "aligny": 1})
+
+13. execute_python_script: """
+chain = ['video_in', 'color', 'levels', 'blur']
+for i in range(len(chain) - 1):
+    op(f'/project1/{chain[i]}').outputConnectors[0].connect(op(f'/project1/{chain[i+1]}'))
+op('/project1/blur').outputConnectors[0].connect(op('/project1/overlay').inputConnectors[0])
+op('/project1/title').outputConnectors[0].connect(op('/project1/overlay').inputConnectors[1])
+op('/project1/overlay').outputConnectors[0].connect(op('/project1/out'))
+"""
+```
+
+### Pattern 8: Video Recording
+
+Record the output to a file. **H.264/H.265 require a Commercial license** — use Motion JPEG (`mjpa`) on Non-Commercial.
+
+```
+[any TOP chain] -> Null TOP -> Movie File Out TOP
+```
+
+```python
+# Build via td_exec():
+root = op('/project1')
+
+# Always put a Null TOP before the recorder
+null_out = root.op('out')  # or create one
+rec = root.create(moviefileoutTOP, 'recorder')
+null_out.outputConnectors[0].connect(rec.inputConnectors[0])
+
+rec.par.type = 'movie'
+rec.par.file = '/tmp/output.mov'
+rec.par.videocodec = 'mjpa'  # Motion JPEG — works on Non-Commercial
+
+# Start recording (par.record is a toggle — .record() method may not exist)
+rec.par.record = True
+# ... let TD run for desired duration ...
+rec.par.record = False
+
+# For image sequences:
+# rec.par.type = 'imagesequence'
+# rec.par.imagefiletype = 'png'
+# rec.par.file.expr = "'/tmp/frames/out' + me.fileSuffix"  # fileSuffix REQUIRED
+```
+
+**Pitfalls:**
+- Setting `par.file` + `par.record = True` in the same script may race — use `run("...", delayFrames=2)`
+- `TOP.save()` called rapidly always captures the same frame — use MovieFileOut for animation
+- See `pitfalls.md` #25-27 for full details
+
+### Pattern 8b: TD → External Pipeline (e.g., ASCII Video)
+
+Export TD visuals for use in another tool (ffmpeg, Python, ASCII art, etc.):
+
+```python
+# 1. Record with MovieFileOut (MJPEG)
+rec.par.videocodec = 'mjpa'
+rec.par.record = True
+# ... wait N seconds ...
+rec.par.record = False
+
+# 2. Extract frames with ffmpeg (outside TD)
+# ffmpeg -i /tmp/output.mov -vframes 120 /tmp/frames/frame_%06d.png
+
+# 3. Load frames in Python for processing
+# from PIL import Image
+# img = Image.open('/tmp/frames/frame_000001.png')
+```
+
+## Data Visualization
+
+### Pattern 9: Table Data -> Bar Chart via Instancing
+
+Visualize tabular data as a 3D bar chart.
+
+```
+Table DAT (data) -> Script DAT (transform to instance format) -> DAT to CHOP
+                                                                      |
+Box SOP -> Geometry COMP (instancing from CHOP) -> Render TOP -> Null TOP (out)
+           + PBR MAT
+           + Camera COMP
+           + Light COMP
+```
+
+```python
+# Script DAT code to transform data to instance positions
+execute_python_script: """
+source = op('/project1/data_table')
+instance = op('/project1/instance_transform')
+instance.clear()
+instance.appendRow(['tx', 'ty', 'tz', 'sx', 'sy', 'sz', 'cr', 'cg', 'cb'])
+
+for i in range(1, source.numRows):
+    value = float(source[i, 'value'])
+    name = source[i, 'name']
+    instance.appendRow([
+        str(i * 1.5),          # x position (spread bars)
+        str(value / 2),        # y position (center bar vertically)
+        '0',                   # z position
+        '1', str(value), '1',  # scale (height = data value)
+        '0.2', '0.6', '1.0'   # color (blue)
+    ])
+"""
+```
+
+### Pattern 9b: Audio-Reactive GLSL Fractal (Proven Recipe)
+
+Audio spectrum drives a GLSL fractal shader directly via a spectrum texture input. Bass thickens inner fractal lines, mids twist rotation, highs light outer edges. Tested and working on TD 099 Non-Commercial.
+
+```
+Audio File In CHOP → Audio Spectrum CHOP → Math CHOP (boost gain=5)
+    → Resample CHOP (256 samples) → CHOP To TOP (spectrum texture, 256x1)
+                                          ↓ (input 1)
+Constant TOP (rgba32float, time) → GLSL TOP (audio-reactive shader) → Null TOP
+        (input 0)                    ↑
+                              Text DAT (shader code)
+```
+
+**Build via td_exec (complete working script):**
+
+```python
+td_exec("""
+import os
+root = op('/project1')
+
+# Audio input
+audio = root.create(audiofileinCHOP, 'audio_in')
+audio.par.file = '/path/to/music.mp3'
+audio.par.playmode = 0  # Locked to timeline
+
+# FFT analysis
+spectrum = root.create(audiospectrumCHOP, 'spectrum')
+audio.outputConnectors[0].connect(spectrum.inputConnectors[0])
+
+# Normalize + boost
+math = root.create(mathCHOP, 'math_norm')
+spectrum.outputConnectors[0].connect(math.inputConnectors[0])
+math.par.gain = 5
+
+# Resample to 256 bins for texture
+resample = root.create(resampleCHOP, 'resample_spec')
+math.outputConnectors[0].connect(resample.inputConnectors[0])
+resample.par.timeslice = True
+resample.par.rate = 256
+
+# Spectrum → texture (256x1 image)
+# NOTE: choptoTOP has NO input connectors — use par.chop reference!
+spec_tex = root.create(choptoTOP, 'spectrum_tex')
+spec_tex.par.chop = resample
+
+# Time driver (rgba32float to avoid 0-1 clamping!)
+time_drv = root.create(constantTOP, 'time_driver')
+time_drv.par.format = 'rgba32float'
+time_drv.par.outputresolution = 'custom'
+time_drv.par.resolutionw = 1
+time_drv.par.resolutionh = 1
+time_drv.par.colorr.expr = "absTime.seconds % 1000.0"
+time_drv.par.colorg.expr = "int(absTime.seconds / 1000.0)"
+
+# GLSL shader
+glsl = root.create(glslTOP, 'audio_shader')
+glsl.par.outputresolution = 'custom'
+glsl.par.resolutionw = 1280; glsl.par.resolutionh = 720
+
+shader_dat = root.create(textDAT, 'shader_code')
+shader_dat.text = open('/tmp/shader.glsl').read()
+glsl.par.pixeldat = shader_dat
+
+# Wire: input 0=time, input 1=spectrum
+time_drv.outputConnectors[0].connect(glsl.inputConnectors[0])
+spec_tex.outputConnectors[0].connect(glsl.inputConnectors[1])
+
+# Output + audio playback
+out = root.create(nullTOP, 'output')
+glsl.outputConnectors[0].connect(out.inputConnectors[0])
+audio_out = root.create(audiodeviceoutCHOP, 'audio_out')
+audio.outputConnectors[0].connect(audio_out.inputConnectors[0])
+
+result = 'network built'
+""")
+```
+
+**GLSL shader (reads spectrum from input 1 texture):**
+
+```glsl
+out vec4 fragColor;
+
+vec3 palette(float t) {
+    vec3 a = vec3(0.5); vec3 b = vec3(0.5);
+    vec3 c = vec3(1.0); vec3 d = vec3(0.263, 0.416, 0.557);
+    return a + b * cos(6.28318 * (c * t + d));
+}
+
+void main() {
+    vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+    float t = td.r + td.g * 1000.0;
+
+    vec2 res = uTDOutputInfo.res.zw;
+    vec2 uv = (gl_FragCoord.xy * 2.0 - res) / min(res.x, res.y);
+    vec2 uv0 = uv;
+    vec3 finalColor = vec3(0.0);
+
+    float bass = texture(sTD2DInputs[1], vec2(0.05, 0.0)).r;
+    float mids = texture(sTD2DInputs[1], vec2(0.25, 0.0)).r;
+    float highs = texture(sTD2DInputs[1], vec2(0.65, 0.0)).r;
+
+    float ca = cos(t * (0.15 + mids * 0.3));
+    float sa = sin(t * (0.15 + mids * 0.3));
+    uv = mat2(ca, -sa, sa, ca) * uv;
+
+    for (float i = 0.0; i < 4.0; i++) {
+        uv = fract(uv * (1.4 + bass * 0.3)) - 0.5;
+        float d = length(uv) * exp(-length(uv0));
+        float freq = texture(sTD2DInputs[1], vec2(clamp(d*0.5, 0.0, 1.0), 0.0)).r;
+        vec3 col = palette(length(uv0) + i * 0.4 + t * 0.35);
+        d = sin(d * (7.0 + bass * 4.0) + t * 1.5) / 8.0;
+        d = abs(d);
+        d = pow(0.012 / d, 1.2 + freq * 0.8 + bass * 0.5);
+        finalColor += col * d;
+    }
+
+    float glow = (0.03 + bass * 0.05) / (length(uv0) + 0.03);
+    finalColor += vec3(0.4, 0.1, 0.7) * glow * (0.6 + 0.4 * sin(t * 2.5));
+
+    float ring = abs(length(uv0) - 0.4 - mids * 0.3);
+    finalColor += vec3(0.1, 0.6, 0.8) * (0.005 / ring) * (0.2 + highs * 0.5);
+
+    finalColor *= smoothstep(0.0, 1.0, 1.0 - dot(uv0*0.55, uv0*0.55));
+    finalColor = finalColor / (finalColor + vec3(1.0));
+
+    fragColor = TDOutputSwizzle(vec4(finalColor, 1.0));
+}
+```
+
+**How spectrum sampling drives the visual:**
+- `texture(sTD2DInputs[1], vec2(x, 0.0)).r` — x position = frequency (0=bass, 1=treble)
+- Inner fractal iterations sample lower x → react to bass
+- Outer iterations sample higher x → react to treble
+- `bass * 0.3` on `fract()` scale → fractal zoom pulses with bass
+- `bass * 4.0` on sin frequency → line density pulses with bass
+- `mids * 0.3` on rotation speed → spiral twists faster during vocal/mid sections
+- `highs * 0.5` on ring opacity → high-frequency sparkle on outer ring
+
+**Recording the output:** Use MovieFileOut TOP with `mjpa` codec (H.264 requires Commercial license). See pitfalls #25-27.
+
+## GLSL Shaders
+
+### Pattern 10: Custom Fragment Shader
+
+Write a custom visual effect as a GLSL fragment shader.
+
+```
+Text DAT (shader code) -> GLSL TOP -> Level TOP -> Null TOP (out)
+                           + optional input TOPs for texture sampling
+```
+
+**Common GLSL uniforms available in TouchDesigner:**
+
+```glsl
+// Automatically provided by TD
+uniform vec4 uTDOutputInfo;  // .res.zw = resolution
+
+// NOTE: uTDCurrentTime does NOT exist in TD 099!
+// Feed time via a 1x1 Constant TOP (format=rgba32float):
+//   t.par.colorr.expr = "absTime.seconds % 1000.0"
+//   t.par.colorg.expr = "int(absTime.seconds / 1000.0)"
+// Then read in GLSL:
+//   vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+//   float t = td.r + td.g * 1000.0;
+
+// Input textures (from connected TOP inputs)
+uniform sampler2D sTD2DInputs[1];  // array of input samplers
+
+// From vertex shader
+in vec3 vUV;  // UV coordinates (0-1 range)
+```
+
+**Example: Plasma shader (using time from input texture)**
+
+```glsl
+layout(location = 0) out vec4 fragColor;
+
+void main() {
+    vec2 uv = vUV.st;
+    // Read time from Constant TOP input 0 (rgba32float format)
+    vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+    float t = td.r + td.g * 1000.0;
+
+    float v1 = sin(uv.x * 10.0 + t);
+    float v2 = sin(uv.y * 10.0 + t * 0.7);
+    float v3 = sin((uv.x + uv.y) * 10.0 + t * 1.3);
+    float v4 = sin(length(uv - 0.5) * 20.0 - t * 2.0);
+
+    float v = (v1 + v2 + v3 + v4) * 0.25;
+
+    vec3 color = vec3(
+        sin(v * 3.14159 + 0.0) * 0.5 + 0.5,
+        sin(v * 3.14159 + 2.094) * 0.5 + 0.5,
+        sin(v * 3.14159 + 4.189) * 0.5 + 0.5
+    );
+
+    fragColor = vec4(color, 1.0);
+}
+```
+
+### Pattern 11: Multi-Pass GLSL (Ping-Pong)
+
+For effects needing state across frames (particles, fluid, cellular automata), use GLSL Multi TOP with multiple passes or a Feedback TOP loop.
+
+```
+GLSL Multi TOP (pass 0: simulation, pass 1: rendering)
+   + Text DAT (simulation shader)
+   + Text DAT (render shader)
+   -> Level TOP -> Null TOP (out)
+      ^
+      |__ Feedback TOP (feeds simulation state back)
+```
+
+## Interactive Installations
+
+### Pattern 12: Mouse/Touch -> Visual Response
+
+```
+Mouse In CHOP -> Math CHOP (normalize to 0-1) -> [export to visual params]
+
+# Or for touch/multi-touch:
+Multi Touch In DAT -> Script CHOP (parse touches) -> [export to visual params]
+```
+
+```python
+# Normalize mouse position to 0-1 range
+execute_python_script: """
+op('/project1/noise1').par.offsetx.expr = "op('/project1/mouse_norm')['tx']"
+op('/project1/noise1').par.offsety.expr = "op('/project1/mouse_norm')['ty']"
+"""
+```
+
+### Pattern 13: OSC Control (from external software)
+
+```
+OSC In CHOP (port 7000) -> Select CHOP (pick channels) -> [export to visual params]
+```
+
+```
+1. create_td_node(parentPath="/project1", nodeType="oscinChop", nodeName="osc_in")
+2. update_td_node_parameters(nodePath="/project1/osc_in", properties={"port": 7000})
+
+# OSC messages like /frequency 440 will appear as channel "frequency" with value 440
+# Export to any parameter:
+3. execute_python_script: "op('/project1/noise1').par.period.expr = \"op('/project1/osc_in')['frequency']\""
+```
+
+### Pattern 14: MIDI Control (DJ/VJ)
+
+```
+MIDI In CHOP (device) -> Select CHOP -> [export channels to visual params]
+```
+
+Common MIDI mappings:
+- CC channels (knobs/faders): continuous 0-127, map to float params
+- Note On/Off: binary triggers, map to Trigger CHOP for envelopes
+- Velocity: intensity/brightness
+
+## Live Performance
+
+### Pattern 15: Multi-Source VJ Setup
+
+```
+Source A (generative) ----+
+Source B (video) ---------+-- Switch/Cross TOP -- Level TOP -- Window COMP (output)
+Source C (camera) --------+
+                           ^
+                    MIDI/OSC control selects active source and crossfade
+```
+
+```python
+# MIDI CC1 controls which source is active (0-127 -> 0-2)
+execute_python_script: """
+op('/project1/switch1').par.index.expr = "int(op('/project1/midi_in')['cc1'] / 42)"
+"""
+
+# MIDI CC2 controls crossfade between current and next
+execute_python_script: """
+op('/project1/cross1').par.cross.expr = "op('/project1/midi_in')['cc2'] / 127.0"
+"""
+```
+
+### Pattern 16: Projection Mapping
+
+```
+Content TOPs ----+
+                 |
+Stoner TOP (UV mapping) -> Composite TOP -> Window COMP (projector output)
+   or
+Kantan Mapper COMP (external .tox)
+```
+
+For projection mapping, the key is:
+1. Create your visual content as standard TOPs
+2. Use Stoner TOP or a third-party mapping tool to UV-map content to physical surfaces
+3. Output via Window COMP to the projector
+
+### Pattern 17: Cue System
+
+```
+Table DAT (cue list: cue_number, scene_name, duration, transition_type)
+    |
+Script CHOP (cue state: current_cue, progress, next_cue_trigger)
+    |
+[export to Switch/Cross TOPs to transition between scenes]
+```
+
+```python
+execute_python_script: """
+# Simple cue system
+cue_table = op('/project1/cue_list')
+cue_state = op('/project1/cue_state')
+
+def advance_cue():
+    current = int(cue_state.par.value0.val)
+    next_cue = min(current + 1, cue_table.numRows - 1)
+    cue_state.par.value0.val = next_cue
+    
+    scene = cue_table[next_cue, 'scene']
+    duration = float(cue_table[next_cue, 'duration'])
+    
+    # Set crossfade target and duration
+    op('/project1/cross1').par.cross.val = 0
+    # Animate cross to 1.0 over duration seconds
+    # (use a Timer CHOP or LFO CHOP for smooth animation)
+"""
+```
+
+## Networking
+
+### Pattern 18: OSC Server/Client
+
+```
+# Sending OSC
+OSC Out CHOP -> (network) -> external application
+
+# Receiving OSC  
+(network) -> OSC In CHOP -> Select CHOP -> [use values]
+```
+
+### Pattern 19: NDI Video Streaming
+
+```
+# Send video over network
+[any TOP chain] -> NDI Out TOP (source name)
+
+# Receive video from network
+NDI In TOP (select source) -> [process as normal TOP]
+```
+
+### Pattern 20: WebSocket Communication
+
+```
+WebSocket DAT -> Script DAT (parse JSON messages) -> [update visuals]
+```
+
+```python
+execute_python_script: """
+ws = op('/project1/websocket1')
+ws.par.address = 'ws://localhost:8080'
+ws.par.active = True
+
+# In a DAT Execute callback (Script DAT watching WebSocket DAT):
+# def onTableChange(dat):
+#     import json
+#     msg = json.loads(dat.text)
+#     op('/project1/noise1').par.seed.val = msg.get('seed', 0)
+"""
+```
diff --git a/skills/creative/touchdesigner/references/operators.md b/skills/creative/touchdesigner/references/operators.md
new file mode 100644
index 00000000000..6aa716cb9a2
--- /dev/null
+++ b/skills/creative/touchdesigner/references/operators.md
@@ -0,0 +1,239 @@
+# TouchDesigner Operator Reference
+
+## Operator Families Overview
+
+TouchDesigner has 6 operator families. Each family processes a specific data type and is color-coded in the UI. Operators can only connect to others of the SAME family (with cross-family converters as the bridge).
+
+## TOPs — Texture Operators (Purple)
+
+2D image/texture processing on the GPU. The workhorse of visual output.
+
+### Generators (create images from nothing)
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Noise TOP | `noiseTop` | `type` (0-6), `monochrome`, `seed`, `period`, `harmonics`, `exponent`, `amp`, `offset`, `resolutionw/h` | Procedural noise textures — Perlin, Simplex, Sparse, etc. Foundation of generative art. |
+| Constant TOP | `constantTop` | `colorr/g/b/a`, `resolutionw/h` | Solid color. Use as background or blend input. |
+| Text TOP | `textTop` | `text`, `fontsizex`, `fontfile`, `alignx/y`, `colorr/g/b` | Render text to texture. Supports multi-line, word wrap. |
+| Ramp TOP | `rampTop` | `type` (0=horizontal, 1=vertical, 2=radial, 3=circular), `phase`, `period` | Gradient textures for masking, color mapping. |
+| Circle TOP | `circleTop` | `radiusx/y`, `centerx/y`, `width` | Circles, rings, ellipses. |
+| Rectangle TOP | `rectangleTop` | `sizex/y`, `centerx/y`, `softness` | Rectangles with optional softness. |
+| GLSL TOP | `glslTop` | `dat` (points to shader DAT), `resolutionw/h`, `outputformat`, custom uniforms | Custom fragment shaders. Most powerful TOP for custom visuals. |
+| GLSL Multi TOP | `glslmultiTop` | `dat`, `numinputs`, `numoutputs`, `numcomputepasses` | Multi-pass GLSL with compute shaders. Advanced. |
+| Render TOP | `renderTop` | `camera`, `geometry`, `lights`, `resolutionw/h` | Renders 3D scenes (SOPs + MATs + Camera/Light COMPs). |
+
+### Filters (modify a single input)
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Level TOP | `levelTop` | `opacity`, `brightness1/2`, `gamma1/2`, `contrast`, `invert`, `blacklevel/whitelevel` | Brightness, contrast, gamma, levels. Essential color correction. |
+| Blur TOP | `blurTop` | `sizex/y`, `type` (0=Gaussian, 1=Box, 2=Bartlett) | Gaussian/box blur. |
+| Transform TOP | `transformTop` | `tx/ty`, `sx/sy`, `rz`, `pivotx/y`, `extend` (0=Hold, 1=Zero, 2=Repeat, 3=Mirror) | Translate, scale, rotate textures. |
+| HSV Adjust TOP | `hsvadjustTop` | `hueoffset`, `saturationmult`, `valuemult` | HSV color adjustments. |
+| Lookup TOP | `lookupTop` | (input: texture + lookup table) | Color remapping via lookup table texture. |
+| Edge TOP | `edgeTop` | `type` (0=Sobel, 1=Frei-Chen) | Edge detection. |
+| Displace TOP | `displaceTop` | `scalex/y` | Pixel displacement using a second input as displacement map. |
+| Flip TOP | `flipTop` | `flipx`, `flipy`, `flop` (diagonal) | Mirror/flip textures. |
+| Crop TOP | `cropTop` | `cropleft/right/top/bottom` | Crop region of texture. |
+| Resolution TOP | `resolutionTop` | `resolutionw/h`, `outputresolution` | Resize textures. |
+| Null TOP | `nullTop` | (none significant) | Pass-through. Use for organization, referencing, feedback delay. |
+| Cache TOP | `cacheTop` | `length`, `step` | Store N frames of history. Useful for trails, time effects. |
+
+### Compositors (combine multiple inputs)
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Composite TOP | `compositeTop` | `operand` (0-31: Over, Add, Multiply, Screen, etc.) | Blend two textures with standard compositing modes. |
+| Over TOP | `overTop` | (simple alpha compositing) | Layer with alpha. Simpler than Composite. |
+| Add TOP | `addTop` | (additive blend) | Additive blending. Great for glow, light effects. |
+| Multiply TOP | `multiplyTop` | (multiplicative blend) | Multiply blend. Good for masking, darkening. |
+| Switch TOP | `switchTop` | `index` (0-based) | Switch between multiple inputs by index. |
+| Cross TOP | `crossTop` | `cross` (0.0-1.0) | Crossfade between two inputs. |
+
+### I/O (input/output)
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Movie File In TOP | `moviefileinTop` | `file`, `speed`, `trim`, `index` | Load video files, image sequences. |
+| Movie File Out TOP | `moviefileoutTop` | `file`, `type` (codec), `record` (toggle) | Record/export video files. |
+| NDI In TOP | `ndiinTop` | `sourcename` | Receive NDI video streams. |
+| NDI Out TOP | `ndioutTop` | `sourcename` | Send NDI video streams. |
+| Syphon Spout In/Out TOP | `syphonspoutinTop` / `syphonspoutoutTop` | `servername` | Inter-app texture sharing. |
+| Video Device In TOP | `videodeviceinTop` | `device` | Webcam/capture card input. |
+| Feedback TOP | `feedbackTop` | `top` (path to the TOP to feed back) | One-frame delay feedback. Essential for recursive effects. |
+
+### Converters
+
+| Operator | Type Name | Direction | Use |
+|----------|-----------|-----------|-----|
+| CHOP to TOP | `choptopTop` | CHOP -> TOP | Visualize channel data as texture (waveform, spectrum display). |
+| TOP to CHOP | `topchopChop` | TOP -> CHOP | Sample texture pixels as channel data. |
+
+## CHOPs — Channel Operators (Green)
+
+Time-varying numeric data: audio, animation curves, sensor data, control signals.
+
+### Generators
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Constant CHOP | `constantChop` | `name0/value0`, `name1/value1`... | Static named channels. Control panel for parameters. |
+| LFO CHOP | `lfoChop` | `frequency`, `type` (0=Sin, 1=Tri, 2=Square, 3=Ramp, 4=Pulse), `amp`, `offset`, `phase` | Low frequency oscillator. Animation driver. |
+| Noise CHOP | `noiseChop` | `type`, `roughness`, `period`, `amp`, `seed`, `channels` | Smooth random motion. Organic animation. |
+| Pattern CHOP | `patternChop` | `type` (0=Sine, 1=Triangle, ...), `length`, `cycles` | Generate waveform patterns. |
+| Timer CHOP | `timerChop` | `length`, `play`, `cue`, `cycles` | Countdown/count-up timer with cue points. |
+| Count CHOP | `countChop` | `threshold`, `limittype`, `limitmin/max` | Event counter with wrapping/clamping. |
+
+### Audio
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Audio File In CHOP | `audiofileinChop` | `file`, `volume`, `play`, `speed`, `trim` | Play audio files. |
+| Audio Device In CHOP | `audiodeviceinChop` | `device`, `channels` | Live microphone/line input. |
+| Audio Spectrum CHOP | `audiospectrumChop` | `size` (FFT size), `outputformat` (0=Power, 1=Magnitude) | FFT frequency analysis. |
+| Audio Band EQ CHOP | `audiobandeqChop` | `bands`, `gaindb` per band | Frequency band isolation. |
+| Audio Device Out CHOP | `audiodeviceoutChop` | `device` | Audio playback output. |
+
+### Math/Logic
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Math CHOP | `mathChop` | `preoff`, `gain`, `postoff`, `chanop` (0=Off, 1=Add, 2=Subtract, 3=Multiply...) | Math operations on channels. The Swiss army knife. |
+| Logic CHOP | `logicChop` | `preop` (0=Off, 1=AND, 2=OR, 3=XOR, 4=NAND), `convert` | Boolean logic on channels. |
+| Filter CHOP | `filterChop` | `type` (0=Low Pass, 1=Band Pass, 2=High Pass, 3=Notch), `cutofffreq`, `filterwidth` | Smooth, dampen, filter signals. |
+| Lag CHOP | `lagChop` | `lag1/2`, `overshoot1/2` | Smooth transitions with overshoot. |
+| Limit CHOP | `limitChop` | `type` (0=Clamp, 1=Loop, 2=ZigZag), `min/max` | Clamp or wrap channel values. |
+| Speed CHOP | `speedChop` | (none significant) | Integrate values (velocity to position, acceleration to velocity). |
+| Trigger CHOP | `triggerChop` | `attack`, `peak`, `decay`, `sustain`, `release` | ADSR envelope from trigger events. |
+| Select CHOP | `selectChop` | `chop` (path), `channames` | Reference channels from another CHOP. |
+| Merge CHOP | `mergeChop` | `align` (0=Extend, 1=Trim to First, 2=Trim to Shortest) | Combine channels from multiple CHOPs. |
+| Null CHOP | `nullChop` | (none significant) | Pass-through for organization and referencing. |
+
+### Input Devices
+
+| Operator | Type Name | Use |
+|----------|-----------|-----|
+| Mouse In CHOP | `mouseinChop` | Mouse position, buttons, wheel. |
+| Keyboard In CHOP | `keyboardinChop` | Keyboard key states. |
+| MIDI In CHOP | `midiinChop` | MIDI note/CC input. |
+| OSC In CHOP | `oscinChop` | OSC message input (network). |
+
+## SOPs — Surface Operators (Blue)
+
+3D geometry: points, polygons, NURBS, meshes.
+
+### Generators
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Grid SOP | `gridSop` | `rows`, `cols`, `sizex/y`, `type` (0=Polygon, 1=Mesh, 2=NURBS) | Flat grid mesh. Foundation for displacement, instancing. |
+| Sphere SOP | `sphereSop` | `type`, `rows`, `cols`, `radius` | Sphere geometry. |
+| Box SOP | `boxSop` | `sizex/y/z` | Box geometry. |
+| Torus SOP | `torusSop` | `radiusx/y`, `rows`, `cols` | Donut shape. |
+| Circle SOP | `circleSop` | `type`, `radius`, `divs` | Circle/ring geometry. |
+| Line SOP | `lineSop` | `dist`, `points` | Line segments. |
+| Text SOP | `textSop` | `text`, `fontsizex`, `fontfile`, `extrude` | 3D text geometry. |
+
+### Modifiers
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Transform SOP | `transformSop` | `tx/ty/tz`, `rx/ry/rz`, `sx/sy/sz` | Transform geometry (translate, rotate, scale). |
+| Noise SOP | `noiseSop` | `type`, `amp`, `period`, `roughness` | Deform geometry with noise. |
+| Sort SOP | `sortSop` | `ptsort`, `primsort` | Reorder points/primitives. |
+| Facet SOP | `facetSop` | `unique`, `consolidate`, `computenormals` | Normals, consolidation, unique points. |
+| Merge SOP | `mergeSop` | (none significant) | Combine multiple geometry inputs. |
+| Null SOP | `nullSop` | (none significant) | Pass-through. |
+
+## DATs — Data Operators (White)
+
+Text, tables, scripts, network data.
+
+### Core
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Table DAT | `tableDat` | (edit content directly) | Spreadsheet-like data tables. |
+| Text DAT | `textDat` | (edit content directly) | Arbitrary text content. Shader code, configs, scripts. |
+| Script DAT | `scriptDat` | `language` (0=Python, 1=C++) | Custom callbacks and DAT processing. |
+| CHOP Execute DAT | `chopexecDat` | `chop` (path to watch), callbacks | Trigger Python on CHOP value changes. |
+| DAT Execute DAT | `datexecDat` | `dat` (path to watch) | Trigger Python on DAT content changes. |
+| Panel Execute DAT | `panelexecDat` | `panel` | Trigger Python on UI panel events. |
+
+### I/O
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Web DAT | `webDat` | `url`, `fetchmethod` (0=GET, 1=POST) | HTTP requests. API integration. |
+| TCP/IP DAT | `tcpipDat` | `address`, `port`, `mode` | TCP networking. |
+| OSC In DAT | `oscinDat` | `port` | Receive OSC as text messages. |
+| Serial DAT | `serialDat` | `port`, `baudrate` | Serial port communication (Arduino, etc.). |
+| File In DAT | `fileinDat` | `file` | Read text files. |
+| File Out DAT | `fileoutDat` | `file`, `write` | Write text files. |
+
+### Conversions
+
+| Operator | Type Name | Direction | Use |
+|----------|-----------|-----------|-----|
+| DAT to CHOP | `dattochopChop` | DAT -> CHOP | Convert table data to channels. |
+| CHOP to DAT | `choptodatDat` | CHOP -> DAT | Convert channel data to table rows. |
+| SOP to DAT | `soptodatDat` | SOP -> DAT | Extract geometry data as table. |
+
+## MATs — Material Operators (Yellow)
+
+Materials for 3D rendering in Render TOP / Geometry COMP.
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Phong MAT | `phongMat` | `diff_colorr/g/b`, `spec_colorr/g/b`, `shininess`, `colormap`, `normalmap` | Classic Phong shading. Simple, fast. |
+| PBR MAT | `pbrMat` | `basecolorr/g/b`, `metallic`, `roughness`, `normalmap`, `emitcolorr/g/b` | Physically-based rendering. Realistic materials. |
+| GLSL MAT | `glslMat` | `dat` (shader DAT), custom uniforms | Custom vertex + fragment shaders for 3D. |
+| Constant MAT | `constMat` | `colorr/g/b`, `colormap` | Flat unlit color/texture. No shading. |
+| Point Sprite MAT | `pointspriteMat` | `colormap`, `scale` | Render points as camera-facing sprites. Great for particles. |
+| Wireframe MAT | `wireframeMat` | `colorr/g/b`, `width` | Wireframe rendering. |
+| Depth MAT | `depthMat` | `near`, `far` | Render depth buffer as grayscale. |
+
+## COMPs — Component Operators (Gray)
+
+Containers, 3D scene elements, UI components.
+
+### 3D Scene
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Geometry COMP | `geometryComp` | `material` (path), `instancechop` (path), `instancing` (toggle) | Renders geometry with material. Instancing host. |
+| Camera COMP | `cameraComp` | `tx/ty/tz`, `rx/ry/rz`, `fov`, `near/far` | Camera for Render TOP. |
+| Light COMP | `lightComp` | `lighttype` (0=Point, 1=Directional, 2=Spot, 3=Cone), `dimmer`, `colorr/g/b` | Lighting for 3D scenes. |
+| Ambient Light COMP | `ambientlightComp` | `dimmer`, `colorr/g/b` | Ambient lighting. |
+| Environment Light COMP | `envlightComp` | `envmap` | Image-based lighting (IBL). |
+
+### Containers
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Container COMP | `containerComp` | `w`, `h`, `bgcolor1/2/3` | UI container. Holds other COMPs for panel layouts. |
+| Base COMP | `baseComp` | (none significant) | Generic container. Networks-inside-networks. |
+| Replicator COMP | `replicatorComp` | `template`, `operatorsdat` | Clone a template operator N times from a table. |
+
+### Utilities
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Window COMP | `windowComp` | `winw/h`, `winoffsetx/y`, `monitor`, `borders` | Output window for display/projection. |
+| Select COMP | `selectComp` | `rowcol`, `panel` | Select and display content from elsewhere. |
+| Engine COMP | `engineComp` | `tox`, `externaltox` | Load external .tox components. Sub-process isolation. |
+
+## Cross-Family Converter Summary
+
+| From | To | Operator | Type Name |
+|------|-----|----------|-----------|
+| CHOP | TOP | CHOP to TOP | `choptopTop` |
+| TOP | CHOP | TOP to CHOP | `topchopChop` |
+| DAT | CHOP | DAT to CHOP | `dattochopChop` |
+| CHOP | DAT | CHOP to DAT | `choptodatDat` |
+| SOP | CHOP | SOP to CHOP | `soptochopChop` |
+| CHOP | SOP | CHOP to SOP | `choptosopSop` |
+| SOP | DAT | SOP to DAT | `soptodatDat` |
+| DAT | SOP | DAT to SOP | `dattosopSop` |
+| SOP | TOP | (use Render TOP + Geometry COMP) | — |
+| TOP | SOP | TOP to SOP | `toptosopSop` |
diff --git a/skills/creative/touchdesigner/references/pitfalls.md b/skills/creative/touchdesigner/references/pitfalls.md
new file mode 100644
index 00000000000..862bdc56316
--- /dev/null
+++ b/skills/creative/touchdesigner/references/pitfalls.md
@@ -0,0 +1,336 @@
+# TouchDesigner MCP — Pitfalls & Lessons Learned
+
+Hard-won knowledge from real TD sessions. Read this before building anything.
+
+## Setup & Connection
+
+### 1. The .tox from the git repo is BROKEN
+
+The `td/mcp_webserver_base.tox` in the `8beeeaaat/touchdesigner-mcp` git clone is **incomplete**. It's missing the `td_server` Python module (generated by `npm run gen:webserver` which requires Docker). Port 9981 opens, but every route returns 404.
+
+**Always download the release zip:**
+```bash
+curl -L -o td.zip \
+  "https://github.com/8beeeaaat/touchdesigner-mcp/releases/latest/download/touchdesigner-mcp-td.zip"
+unzip -o td.zip -d touchdesigner-mcp-td
+```
+
+### 2. The release .tox also breaks (frequently)
+
+Even the correct release .tox fails after drag-and-drop import because `import_modules.py` resolves `modules/` via `parent().par.externaltox.eval()` — a relative path that often goes wrong. Symptoms: port 9981 listens, all routes 404, TD Textport shows `[ERROR] Failed to setup modules`.
+
+**The custom API handler (`scripts/custom_api_handler.py`) is more reliable.** It has zero external module dependencies — just a WebServer DAT + Text DAT callback. The skill's setup workflow should try the .tox first, test with `curl`, and auto-deploy the handler if 404.
+
+### 3. You CANNOT automate the .tox import from outside TD
+
+TD has no CLI flag to import a .tox. macOS blocks keystroke injection via System Events for security. The only way to get code into TD from outside is:
+- Have a WebServer DAT already running (chicken-and-egg)
+- AppleScript to open Textport + clipboard paste (fragile, not always reliable)
+- User manually drags the .tox or pastes a script
+
+**Plan for one manual step** from the user (either .tox drag-drop or Textport paste). Make it as frictionless as possible: `open -R /path/to/file` to reveal in Finder.
+
+### 4. The npm package name is `touchdesigner-mcp-server` (not `@anthropic/...`)
+
+The Hermes config should use:
+```yaml
+command: npx
+args: ["-y", "touchdesigner-mcp-server@latest"]
+```
+
+### 5. MCP tools may register but not be callable
+
+Hermes may report "17 MCP tool(s) now available" but the tools aren't exposed as function calls. Use the REST API directly via `curl` in `execute_code` as a reliable fallback:
+```python
+def td_exec(script):
+    escaped = json.dumps({"script": script})
+    cmd = f"curl -s -X POST -H 'Content-Type: application/json' -d {shlex.quote(escaped)} 'http://127.0.0.1:9981/api/td/server/exec'"
+    return json.loads(terminal(cmd)['output'])
+```
+
+## TD WebServer DAT Quirks
+
+### 6. Response body goes in `response['data']`, NOT `response['body']`
+
+When writing custom WebServer DAT handlers, the response payload must be set on the `data` key:
+```python
+response['data'] = json.dumps({"result": 42})  # ✓ works
+response['body'] = json.dumps({"result": 42})   # ✗ ignored
+```
+
+### 7. Request POST body comes as BYTES in `request['data']`
+
+Not `request['body']`, and it's `bytes` not `str`:
+```python
+raw = request.get('data', b'')
+if isinstance(raw, bytes):
+    raw = raw.decode('utf-8')
+body = json.loads(raw) if raw else {}
+```
+
+### 8. Non-Commercial license caps resolution at 1280×1280
+
+Setting `resolutionw=1920` silently clamps to 1280. Always check effective resolution after creation:
+```python
+n.cook(force=True)
+actual = str(n.width) + 'x' + str(n.height)
+```
+
+## Parameter Names
+
+### 9. NEVER hardcode parameter names — always discover
+
+Parameter names change between TD versions. What works in 099 may not work in 098 or 2023.x. Always run discovery first:
+```python
+n = root.create(glslTOP, '_test')
+pars = [(p.name, type(p.val).__name__) for p in n.pars()]
+n.destroy()
+```
+
+Known differences from docs/online references:
+| What docs say | TD 099 actual | Notes |
+|---------------|---------------|-------|
+| `dat` | `pixeldat` | GLSL TOP pixel shader DAT |
+| `colora` | `alpha` | Constant TOP alpha |
+| `sizex` / `sizey` | `size` | Blur TOP (single value) |
+| `fontr/g/b/a` | `fontcolorr/g/b/a` | Text TOP font color (r/g/b) |
+| `fontcolora` | `fontalpha` | Text TOP font alpha (NOT `fontcolora`) |
+| `bgcolora` | `bgalpha` | Text TOP bg alpha |
+| `value1name` | `vec0name` | GLSL TOP uniform name |
+
+### 10. Use `safe_par()` pattern for cross-version compatibility
+
+```python
+def safe_par(node, name, value):
+    p = getattr(node.par, name, None)
+    if p is not None:
+        p.val = value
+        return True
+    return False
+```
+
+### 11. `td.tdAttributeError` crashes the whole script
+
+If you do `node.par.nonexistent = value`, TD raises `tdAttributeError` and **stops the entire script**. There's no way to catch it with try/except in some TD versions. Always check with `getattr` first or use `safe_par()`.
+
+## GLSL Shaders
+
+### 12. `uTDCurrentTime` does NOT exist in TD 099
+
+The GLSL builtin for time was removed or never existed in some builds. Feed time via a 1×1 Constant TOP input. **CRITICAL: set format to `rgba32float`** — the default 8-bit format clamps values to 0-1, so `absTime.seconds % 1000.0` gets clamped and the GLSL shader sees a frozen time value of 1.0:
+```python
+t = root.create(constantTOP, 'time_driver')
+t.par.format = 'rgba32float'  # ← REQUIRED! Without this, time is stuck at 1.0
+t.par.outputresolution = 'custom'
+t.par.resolutionw = 1
+t.par.resolutionh = 1
+t.par.colorr.expr = "absTime.seconds % 1000.0"
+t.par.colorg.expr = "int(absTime.seconds / 1000.0)"
+t.outputConnectors[0].connect(glsl.inputConnectors[0])
+```
+In GLSL:
+```glsl
+vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+float t = td.r + td.g * 1000.0;
+```
+
+### 13. GLSL compile errors are silent in the API
+
+The GLSL TOP shows a yellow warning triangle in the UI but `node.errors()` may return empty string. Check `node.warnings()` too, and create an Info DAT pointed at the GLSL TOP to read the actual compiler output.
+
+### 14. TD GLSL uses `vUV.st` not `gl_FragCoord`
+
+Standard GLSL patterns don't work. TD provides:
+- `vUV.st` — UV coordinates (0-1)
+- `uTDOutputInfo.res.zw` — resolution
+- `sTD2DInputs[0]` — input textures
+- `layout(location = 0) out vec4 fragColor` — output
+
+## Node Management
+
+### 15. Destroying nodes while iterating `root.children` causes `tdError`
+
+The iterator is invalidated when a child is destroyed. Always snapshot first:
+```python
+kids = list(root.children)  # snapshot
+for child in kids:
+    if child.valid:  # check — earlier destroys may cascade
+        child.destroy()
+```
+
+### 16. Feedback TOP: use `top` parameter, NOT direct input wire
+
+In TD 099, the feedbackTOP's `top` parameter references which TOP to delay. **Do not also wire that TOP directly into the feedback's input** — this creates a real cook dependency loop (warning flood, potential crash). The "Not enough sources" error on feedbackTOP is benign and resolves after a few frames of playback.
+
+Correct setup:
+```python
+fb = root.create(feedbackTOP, 'fb_delay')
+fb.par.top = comp.path          # reference only — no wire to fb input
+fb.outputConnectors[0].connect(xf)  # fb output -> transform -> fade -> comp
+```
+
+The resulting "Cook dependency loop detected" **warning** on the transform/fade chain is expected and correct — that's what feedback loops do. It's informational, not an error.
+
+### 16. GLSL TOP auto-creates companion nodes
+
+Creating a `glslTOP` also creates `name_pixel` (Text DAT), `name_info` (Info DAT), and `name_compute` (Text DAT). These are visible in the network and count toward node totals. Don't be alarmed by "extra" nodes.
+
+### 17. The default project root is `/project1`
+
+New TD files start with `/project1` as the main container. System nodes live at `/`, `/ui`, `/sys`, `/local`, `/perform`. Don't create user nodes outside `/project1`.
+
+### 18. `open -R` reveals the file but can't automate the drag
+
+Use `open -R /path/to/file.tox` to open Finder highlighting the file. The user must then drag it into TD manually. No AppleScript workaround exists on modern macOS due to accessibility restrictions.
+
+## Workflow
+
+### 19. Always verify after building — errors are silent
+
+Node errors and broken connections produce no output. Always check:
+```python
+for c in list(root.children):
+    e = c.errors()
+    w = c.warnings()
+    if e: print(c.name, 'ERR:', e)
+    if w: print(c.name, 'WARN:', w)
+```
+
+### 20. Build in one big `execute_python_script` call, not many small ones
+
+Each API round-trip adds latency. Bundle node creation + parameter setting + wiring into a single script that creates everything at once, then verify in one final call.
+
+### 21. Window COMP param for display target is `winop` (not `top` or `window`)
+
+To display output in a separate window:
+```python
+win = root.create(windowCOMP, 'display')
+win.par.winop = '/project1/logo_out'  # ← this is the correct param
+win.par.winw = 1280; win.par.winh = 720
+win.par.winopen.pulse()  # open the window
+```
+
+### 22. Save the project to make API persistent across TD restarts
+
+After deploying the custom API handler, save the project:
+```python
+td_exec("project.save(os.path.expanduser('~/Documents/HermesAgent.toe'))")
+```
+TD auto-opens the last saved project on launch. The API handler is now baked into the .toe file — next time TD opens, port 9981 is live with zero manual steps. To explicitly launch with this project: `open /Applications/TouchDesigner.app ~/Documents/HermesAgent.toe`
+
+### 23. `sample()` returns frozen pixels when called from WebServer DAT callback
+
+`out.sample(x, y)` called from inside the API handler's `exec()` returns pixels from a single cook snapshot. It does NOT update between multiple API calls in quick succession. To verify animation is working, either:
+- Compare samples with a 2+ second delay between separate `td_exec()` calls
+- Use `screencapture` on the display window
+- Check `absTime.seconds` is advancing and shader uses time correctly
+
+### 22. `outputresolution` is a string menu, not an integer
+
+### 25. MovieFileOut TOP: H.264/H.265 requires Commercial license
+
+In Non-Commercial TD 099, encoding with H.264 or H.265 produces an error: "GPU Accelerated H.264/H.265 Encoding requires a Commercial license". Use Motion JPEG instead:
+```python
+rec = root.create(moviefileoutTOP, 'recorder')
+rec.par.type = 'movie'
+rec.par.file = '/tmp/output.mov'
+rec.par.videocodec = 'mjpa'  # Motion JPEG — works on Non-Commercial
+```
+
+For image sequences, use `type = 'imagesequence'` and the file param **must** use `me.fileSuffix`:
+```python
+rec.par.type = 'imagesequence'
+rec.par.imagefiletype = 'png'
+rec.par.file.expr = "'/tmp/frames/out' + me.fileSuffix"
+```
+
+### 26. MovieFileOut `.record()` method may not exist
+
+In TD 099, there is no `.record()` method on moviefileoutTOP. Use the toggle parameter instead:
+```python
+rec.par.record = True   # start recording
+rec.par.record = False  # stop recording
+```
+
+When setting the file path and starting recording in the same script, use `run()` with `delayFrames` to avoid a race condition where the old filename is used:
+```python
+rec.par.file = '/tmp/new_output.mov'
+run("op('/project1/recorder').par.record = True", delayFrames=2)
+```
+
+### 27. TOP.save() captures same frame when called rapidly
+
+`op('null1').save(path)` captures the current GPU texture at call time. When called multiple times in a single script (or rapid API calls), TD doesn't cook new frames between saves — all exported PNGs will be identical. To get unique frames, use the MovieFileOut TOP which records in real-time from TD's cook cycle.
+
+### 28. AudioFileIn CHOP: cue before recording for sync
+
+When recording audio-reactive visuals, always cue the audio to the start before beginning the recording. Otherwise the visuals are synced to wherever the audio happens to be in its playback:
+```python
+op('/project1/audio_in').par.cue.pulse()  # reset to start
+run("op('/project1/recorder').par.record = True", delayFrames=3)
+```
+The audio plays via `playmode=0` (Locked to Timeline), so it stays in sync with TD's frame clock. Use `audiodeviceoutCHOP` to hear the audio during recording.
+
+### 29. Audio Spectrum CHOP output is weak — boost with Math CHOP
+
+The raw AudioSpectrum CHOP output has very small values (often 0.001-0.05 range). When fed directly to CHOP To TOP → GLSL, the shader barely reacts. Always insert a Math CHOP with `gain=5` (or higher) between the spectrum and the CHOP To TOP to get usable 0-1 range values in the shader.
+
+### 30. CHOP To TOP texture size — Resample to 256 first
+
+`choptoTOP` creates a texture where width = number of samples. An AudioSpectrum CHOP at 44100Hz has ~24000 samples — creating a 24000×1 texture is wasteful. Use a Resample CHOP set to 256 or 512 samples before the CHOP To TOP for an efficient spectrum texture.
+
+### 31. CHOP To TOP has NO input connectors — use par.chop reference
+
+`choptoTOP` does NOT have input connectors. `resample.outputConnectors[0].connect(chop_to_top.inputConnectors[0])` silently does nothing. Use the `chop` parameter instead:
+```python
+spec_tex = root.create(choptoTOP, 'spectrum_tex')
+spec_tex.par.chop = resample  # ← correct: parameter reference
+# NOT: resample.outputConnectors[0].connect(spec_tex.inputConnectors[0])  # ← WRONG: no connectors
+```
+
+### 22. `outputresolution` is a string menu, not an integer
+
+The `outputresolution` param is a menu with string values:
+```
+menuNames: ['useinput','eighth','quarter','half','2x','4x','8x','fit','limit','custom','parpanel']
+```
+Always use the string form. Setting `outputresolution = 9` may silently fail.
+```python
+node.par.outputresolution = 'custom'  # ✓ correct
+node.par.resolutionw = 1280; node.par.resolutionh = 720
+```
+Discover valid values: `list(node.par.outputresolution.menuNames)`
+
+### 23. Large GLSL shaders break curl JSON escaping
+
+GLSL code full of single/double quotes, backslashes, and special chars will corrupt the JSON payload when sent via `curl -d`. **Write the shader to a temp file and load it in TD:**
+```python
+# Agent side: write shader to /tmp/shader.glsl via write_file
+# TD side (via td_exec):
+sd = root.create(textDAT, 'shader_code')
+with open('/tmp/shader.glsl', 'r') as f:
+    sd.text = f.read()
+```
+This avoids all escaping issues. The TD Python environment has full filesystem access.
+
+### 24. TD crashes lose everything — the WebServer DAT must be re-deployed
+
+If TD crashes (common with heavy GLSL or rapid-fire API calls), all nodes including the WebServer DAT are lost. On relaunch, port 9981 is dead. Recovery:
+1. Detect: `curl` returns exit code 7 (connection refused) or `lsof -i :9981` shows nothing
+2. Check: `pgrep TouchDesigner` to confirm TD is running
+3. Re-deploy: user must paste `exec(open('...custom_api_handler.py').read())` into Textport again
+4. Verify: poll port 9981 until API responds
+
+The `td_exec()` helper should handle this gracefully:
+```python
+def td_exec(script):
+    escaped = json.dumps({"script": script})
+    cmd = f"curl -s --max-time 15 -X POST -H 'Content-Type: application/json' -d {shlex.quote(escaped)} 'http://127.0.0.1:9981/api/td/server/exec'"
+    r = terminal(cmd, timeout=20)
+    if r.get('exit_code') == 7:
+        return {'error': 'TD not responding — WebServer DAT may need re-deploy'}
+    try:
+        return json.loads(r['output'])
+    except:
+        return {'error': 'Bad response', 'raw': r['output'][:200]}
+```
diff --git a/skills/creative/touchdesigner/references/python-api.md b/skills/creative/touchdesigner/references/python-api.md
new file mode 100644
index 00000000000..2b8d8847f63
--- /dev/null
+++ b/skills/creative/touchdesigner/references/python-api.md
@@ -0,0 +1,443 @@
+# TouchDesigner Python API Reference
+
+## The td Module
+
+TouchDesigner's Python environment auto-imports the `td` module. All TD-specific classes, functions, and constants live here. Scripts inside TD (Script DATs, CHOP/DAT Execute callbacks, Extensions) have full access.
+
+When using the MCP `execute_python_script` tool, these globals are pre-loaded:
+- `op` — shortcut for `td.op()`, finds operators by path
+- `ops` — shortcut for `td.ops()`, finds multiple operators by pattern
+- `me` — the operator running the script (not meaningful via MCP — will be the WebServer DAT)
+- `parent` — shortcut for `me.parent()`
+- `project` — the root project component
+- `td` — the full td module
+
+## Finding Operators: op() and ops()
+
+### op(path) — Find a single operator
+
+```python
+# Absolute path (always works from MCP)
+node = op('/project1/noise1')
+
+# Relative path (relative to current operator — only in Script DATs)
+node = op('noise1')      # sibling
+node = op('../noise1')   # parent's sibling
+
+# Returns None if not found (does NOT raise)
+node = op('/project1/nonexistent')  # None
+```
+
+### ops(pattern) — Find multiple operators
+
+```python
+# Glob patterns
+nodes = ops('/project1/noise*')       # all nodes starting with "noise"
+nodes = ops('/project1/*')            # all direct children
+nodes = ops('/project1/container1/*') # all children of container1
+
+# Returns a tuple of operators (may be empty)
+for n in ops('/project1/*'):
+    print(n.name, n.OPType)
+```
+
+### Navigation from a node
+
+```python
+node = op('/project1/noise1')
+
+node.name        # 'noise1'
+node.path        # '/project1/noise1'
+node.OPType      # 'noiseTop'
+node.type         # <class 'noiseTop'>
+node.family       # 'TOP'
+
+# Parent / children
+node.parent()              # the parent COMP
+node.parent().children     # all siblings + self
+node.parent().findChildren(name='noise*')  # filtered
+
+# Type checking
+node.isTOP   # True
+node.isCHOP  # False
+node.isSOP   # False
+node.isDAT   # False
+node.isMAT   # False
+node.isCOMP  # False
+```
+
+## Parameters
+
+Every operator has parameters accessed via the `.par` attribute.
+
+### Reading parameters
+
+```python
+node = op('/project1/noise1')
+
+# Direct access
+node.par.seed.val        # current evaluated value (may be an expression result)
+node.par.seed.eval()     # same as .val
+node.par.seed.default    # default value
+node.par.monochrome.val  # boolean parameters: True/False
+
+# List all parameters
+for p in node.pars():
+    print(f"{p.name}: {p.val} (default: {p.default})")
+
+# Filter by page (parameter group)
+for p in node.pars('Noise'):  # page name
+    print(f"{p.name}: {p.val}")
+```
+
+### Setting parameters
+
+```python
+# Direct value setting
+node.par.seed.val = 42
+node.par.monochrome.val = True
+node.par.resolutionw.val = 1920
+node.par.resolutionh.val = 1080
+
+# String parameters
+op('/project1/text1').par.text.val = 'Hello World'
+
+# File paths
+op('/project1/moviefilein1').par.file.val = '/path/to/video.mp4'
+
+# Reference another operator (for "dat", "chop", "top" type parameters)
+op('/project1/glsl1').par.dat.val = '/project1/shader_code'
+```
+
+### Parameter expressions
+
+```python
+# Python expressions that evaluate dynamically
+node.par.seed.expr = "me.time.frame"
+node.par.tx.expr = "math.sin(me.time.seconds * 2)"
+
+# Reference another parameter
+node.par.brightness1.expr = "op('/project1/constant1').par.value0.val"
+
+# Export (one-way binding from CHOP to parameter)
+# This makes the parameter follow a CHOP channel value
+op('/project1/noise1').par.seed.val  # can also be driven by exports
+```
+
+### Parameter types
+
+| Type | Python Type | Example |
+|------|------------|---------|
+| Float | `float` | `node.par.brightness1.val = 0.5` |
+| Int | `int` | `node.par.seed.val = 42` |
+| Toggle | `bool` | `node.par.monochrome.val = True` |
+| String | `str` | `node.par.text.val = 'hello'` |
+| Menu | `int` (index) or `str` (label) | `node.par.type.val = 'sine'` |
+| File | `str` (path) | `node.par.file.val = '/path/to/file'` |
+| OP reference | `str` (path) | `node.par.dat.val = '/project1/text1'` |
+| Color | separate r/g/b/a floats | `node.par.colorr.val = 1.0` |
+| XY/XYZ | separate x/y/z floats | `node.par.tx.val = 0.5` |
+
+## Creating and Deleting Operators
+
+```python
+# Create via parent component
+parent = op('/project1')
+new_node = parent.create(noiseTop)         # using class reference
+new_node = parent.create(noiseTop, 'my_noise')  # with custom name
+
+# The MCP create_td_node tool handles this automatically:
+# create_td_node(parentPath="/project1", nodeType="noiseTop", nodeName="my_noise")
+
+# Delete
+node = op('/project1/my_noise')
+node.destroy()
+
+# Copy
+original = op('/project1/noise1')
+copy = parent.copy(original, name='noise1_copy')
+```
+
+## Connections (Wiring Operators)
+
+### Output to Input connections
+
+```python
+# Connect noise1's output to level1's input
+op('/project1/noise1').outputConnectors[0].connect(op('/project1/level1'))
+
+# Connect to specific input index (for multi-input operators like Composite)
+op('/project1/noise1').outputConnectors[0].connect(op('/project1/composite1').inputConnectors[0])
+op('/project1/text1').outputConnectors[0].connect(op('/project1/composite1').inputConnectors[1])
+
+# Disconnect all outputs
+op('/project1/noise1').outputConnectors[0].disconnect()
+
+# Query connections
+node = op('/project1/level1')
+inputs = node.inputs          # list of connected input operators
+outputs = node.outputs        # list of connected output operators
+```
+
+### Connection patterns for common setups
+
+```python
+# Linear chain: A -> B -> C -> D
+ops_list = [op(f'/project1/{name}') for name in ['noise1', 'level1', 'blur1', 'null1']]
+for i in range(len(ops_list) - 1):
+    ops_list[i].outputConnectors[0].connect(ops_list[i+1])
+
+# Fan-out: A -> B, A -> C, A -> D
+source = op('/project1/noise1')
+for target_name in ['level1', 'composite1', 'transform1']:
+    source.outputConnectors[0].connect(op(f'/project1/{target_name}'))
+
+# Merge: A + B + C -> Composite
+comp = op('/project1/composite1')
+for i, source_name in enumerate(['noise1', 'text1', 'ramp1']):
+    op(f'/project1/{source_name}').outputConnectors[0].connect(comp.inputConnectors[i])
+```
+
+## DAT Content Manipulation
+
+### Text DATs
+
+```python
+dat = op('/project1/text1')
+
+# Read
+content = dat.text          # full text as string
+
+# Write
+dat.text = "new content"
+dat.text = '''multi
+line
+content'''
+
+# Append
+dat.text += "\nnew line"
+```
+
+### Table DATs
+
+```python
+dat = op('/project1/table1')
+
+# Read cell
+val = dat[0, 0]         # row 0, col 0
+val = dat[0, 'name']    # row 0, column named 'name'
+val = dat['key', 1]     # row named 'key', col 1
+
+# Write cell
+dat[0, 0] = 'value'
+
+# Read row/col
+row = dat.row(0)         # list of Cell objects
+col = dat.col('name')    # list of Cell objects
+
+# Dimensions
+rows = dat.numRows
+cols = dat.numCols
+
+# Append row
+dat.appendRow(['col1_val', 'col2_val', 'col3_val'])
+
+# Clear
+dat.clear()
+
+# Set entire table
+dat.clear()
+dat.appendRow(['name', 'value', 'type'])
+dat.appendRow(['frequency', '440', 'float'])
+dat.appendRow(['amplitude', '0.8', 'float'])
+```
+
+## Time and Animation
+
+```python
+# Global time
+td.absTime.frame       # absolute frame number (never resets)
+td.absTime.seconds     # absolute seconds
+
+# Timeline time (affected by play/pause/loop)
+me.time.frame          # current frame on timeline
+me.time.seconds        # current seconds on timeline
+me.time.rate           # FPS setting
+
+# Timeline control (via execute_python_script)
+project.play = True
+project.play = False
+project.frameRange = (1, 300)   # set timeline range
+
+# Cook frame (when operator was last computed)
+node.cookFrame
+node.cookTime
+```
+
+## Extensions (Custom Python Classes on Components)
+
+Extensions add custom Python methods and attributes to COMPs.
+
+```python
+# Create extension on a Base COMP
+base = op('/project1/myBase')
+
+# The extension class is defined in a Text DAT inside the COMP
+# Typically named 'ExtClass' with the extension code:
+
+extension_code = '''
+class MyExtension:
+    def __init__(self, ownerComp):
+        self.ownerComp = ownerComp
+        self.counter = 0
+
+    def Reset(self):
+        self.counter = 0
+
+    def Increment(self):
+        self.counter += 1
+        return self.counter
+
+    @property
+    def Count(self):
+        return self.counter
+'''
+
+# Write extension code to DAT inside the COMP
+op('/project1/myBase/extClass').text = extension_code
+
+# Configure the extension on the COMP
+base.par.extension1 = 'extClass'  # name of the DAT
+base.par.promoteextension1 = True  # promote methods to parent
+
+# Call extension methods
+base.Increment()       # calls MyExtension.Increment()
+count = base.Count     # accesses MyExtension.Count property
+base.Reset()
+```
+
+## Useful Built-in Modules
+
+### tdu — TouchDesigner Utilities
+
+```python
+import tdu
+
+# Dependency tracking (reactive values)
+dep = tdu.Dependency(initial_value)
+dep.val = new_value   # triggers dependents to recook
+
+# File path utilities
+tdu.expandPath('$HOME/Desktop/output.mov')
+
+# Math
+tdu.clamp(value, min, max)
+tdu.remap(value, from_min, from_max, to_min, to_max)
+```
+
+### TDFunctions
+
+```python
+from TDFunctions import *
+
+# Commonly used utilities
+clamp(value, low, high)
+remap(value, inLow, inHigh, outLow, outHigh)
+interp(value1, value2, t)  # linear interpolation
+```
+
+### TDStoreTools — Persistent Storage
+
+```python
+from TDStoreTools import StorageManager
+
+# Store data that survives project reload
+me.store('myKey', 'myValue')
+val = me.fetch('myKey', default='fallback')
+
+# Storage dict
+me.storage['key'] = value
+```
+
+## Common Patterns via execute_python_script
+
+### Build a complete chain
+
+```python
+# Create a complete audio-reactive noise chain
+parent = op('/project1')
+
+# Create operators
+audio_in = parent.create(audiofileinChop, 'audio_in')
+spectrum = parent.create(audiospectrumChop, 'spectrum')
+chop_to_top = parent.create(choptopTop, 'chop_to_top')
+noise = parent.create(noiseTop, 'noise1')
+level = parent.create(levelTop, 'level1')
+null_out = parent.create(nullTop, 'out')
+
+# Wire the chain
+audio_in.outputConnectors[0].connect(spectrum)
+spectrum.outputConnectors[0].connect(chop_to_top)
+noise.outputConnectors[0].connect(level)
+level.outputConnectors[0].connect(null_out)
+
+# Set parameters
+audio_in.par.file = '/path/to/music.wav'
+audio_in.par.play = True
+spectrum.par.size = 512
+noise.par.type = 1  # Sparse
+noise.par.monochrome = False
+noise.par.resolutionw = 1920
+noise.par.resolutionh = 1080
+level.par.opacity = 0.8
+level.par.gamma1 = 0.7
+```
+
+### Query network state
+
+```python
+# Get all TOPs in the project
+tops = [c for c in op('/project1').findChildren(type=TOP)]
+for t in tops:
+    print(f"{t.path}: {t.OPType} {'ERROR' if t.errors() else 'OK'}")
+
+# Find all operators with errors
+def find_errors(parent_path='/project1'):
+    parent = op(parent_path)
+    errors = []
+    for child in parent.findChildren(depth=-1):
+        if child.errors():
+            errors.append((child.path, child.errors()))
+    return errors
+
+result = find_errors()
+```
+
+### Batch parameter changes
+
+```python
+# Set parameters on multiple nodes at once
+settings = {
+    '/project1/noise1': {'seed': 42, 'monochrome': False, 'resolutionw': 1920},
+    '/project1/level1': {'brightness1': 1.2, 'gamma1': 0.8},
+    '/project1/blur1': {'sizex': 5, 'sizey': 5},
+}
+
+for path, params in settings.items():
+    node = op(path)
+    if node:
+        for key, val in params.items():
+            setattr(node.par, key, val)
+```
+
+## Python Version and Packages
+
+TouchDesigner bundles Python 3.11+ (as of TD 2024) with these pre-installed:
+- **numpy** — array operations, fast math
+- **scipy** — signal processing, FFT
+- **OpenCV** (cv2) — computer vision
+- **PIL/Pillow** — image processing
+- **requests** — HTTP client
+- **json**, **re**, **os**, **sys** — standard library
+
+Custom packages can be installed to TD's Python site-packages directory. See TD documentation for the exact path per platform.
diff --git a/skills/creative/touchdesigner/references/troubleshooting.md b/skills/creative/touchdesigner/references/troubleshooting.md
new file mode 100644
index 00000000000..30ad580f4ca
--- /dev/null
+++ b/skills/creative/touchdesigner/references/troubleshooting.md
@@ -0,0 +1,274 @@
+# TouchDesigner Troubleshooting
+
+> See `references/pitfalls.md` for the comprehensive lessons-learned list.
+
+## Quick Connection Diagnostic
+
+```bash
+lsof -i :9981 -P -n | grep LISTEN    # Step 1: Is TD listening?
+curl -s http://127.0.0.1:9981/api/td/server/td   # Step 2: API working?
+```
+
+| Symptom | Cause | Fix |
+|---------|-------|-----|
+| Connection refused | No WebServer DAT | Deploy `scripts/custom_api_handler.py` in TD Textport |
+| HTTP 404 on all routes | .tox module import failed | Deploy custom handler (pitfalls #1-2) |
+| HTTP 200, empty body | Response in wrong key | Handler uses `response['data']` not `response['body']` (pitfalls #6) |
+| HTTP 200, JSON body | Working | Proceed to discovery |
+| MCP tools not callable | Normal — use curl instead | `td_exec()` pattern in SKILL.md works without MCP |
+
+## Node Creation Issues
+
+### "Node type not found" error
+
+**Cause:** Wrong `nodeType` string in `create_td_node`.
+
+**Fix:** Use camelCase with family suffix. Common mistakes:
+- Wrong: `NoiseTop`, `noise_top`, `NOISE TOP`, `Noise`
+- Right: `noiseTop`
+- Wrong: `AudioSpectrum`, `audio_spectrum_chop`
+- Right: `audiospectrumChop`
+
+**Discovery method:** Use `get_td_classes` to see available types, or `execute_python_script` with `dir(td)` filtered for operator classes.
+
+### Node created but not visible in TD
+
+**Cause:** Node was created in a different container than expected, or TD viewport is looking at a different network.
+
+**Fix:** Check `parentPath` — use absolute paths like `/project1`. Verify with `get_td_nodes(parentPath="/project1")`.
+
+### Cannot create node inside a non-COMP
+
+**Cause:** Only COMP operators (Container, Base, Geometry, etc.) can contain child operators. You cannot create nodes inside a TOP, CHOP, SOP, DAT, or MAT.
+
+**Fix:** Create a Container COMP or Base COMP first, then create nodes inside it.
+
+## Parameter Issues
+
+### Parameter not updating
+
+**Causes:**
+1. **Wrong parameter name.** TD parameter names change across versions. Run the discovery script (SKILL.md Step 0) or use `get_td_node_parameters` to discover exact names for your TD version. Never trust online docs or this skill's tables — always verify.
+2. **Parameter is read-only.** Some parameters are computed/locked.
+3. **Wrong value type.** Menu parameters need integer index or exact string label.
+4. **Parameter has an expression.** If `node.par.X.expr` is set, `.val` is ignored. Clear the expression first.
+
+**Discovery-based approach (preferred):**
+```python
+execute_python_script(script="""
+n = op('/project1/mynode')
+pars = [(p.name, type(p.val).__name__, p.val) for p in n.pars()
+        if any(k in p.name.lower() for k in ['color', 'size', 'dat', 'font', 'alpha'])]
+result = pars
+""")
+```
+
+**Safe parameter setter pattern:**
+```python
+def safe_par(node, name, value):
+    p = getattr(node.par, name, None)
+    if p is not None:
+        p.val = value
+        return True
+    return False  # param doesn't exist in this TD version
+```
+
+### Common parameter name gotchas
+
+| What you expect | Actual name | Notes |
+|----------------|-------------|-------|
+| `width` | `resolutionw` | TOP resolution width |
+| `height` | `resolutionh` | TOP resolution height |
+| `filepath` | `file` | File path parameter |
+| `color` | `colorr`, `colorg`, `colorb`, `colora` | Separate RGBA components |
+| `position_x` | `tx` | Translate X |
+| `rotation` | `rz` | Rotate Z (2D rotation) |
+| `scale` | `sx`, `sy` | Separate X/Y scale |
+| `blend_mode` | `operand` | Composite TOP blend mode (integer) |
+| `opacity` | `opacity` | On Level TOP (this one is correct!) |
+
+### Composite TOP operand values
+
+| Mode | Index |
+|------|-------|
+| Over | 0 |
+| Under | 1 |
+| Inside | 2 |
+| Add | 3 |
+| Subtract | 4 |
+| Difference | 5 |
+| Multiply | 18 |
+| Screen | 27 |
+| Maximum | 13 |
+| Minimum | 14 |
+| Average | 28 |
+
+## Connection/Wiring Issues
+
+### Connections not working
+
+**Causes:**
+1. **Cross-family wiring.** TOPs can only connect to TOPs, CHOPs to CHOPs, etc. Use converter operators to bridge families.
+2. **Wrong connector index.** Most operators have one output connector (index 0). Multi-output operators may need index 1, 2, etc.
+3. **Node path wrong.** Verify paths are absolute and correctly spelled.
+
+**Verify connections:**
+```python
+execute_python_script(script="""
+node = op('/project1/level1')
+result = {
+    'inputs': [i.path if i else None for i in node.inputs],
+    'outputs': [o.path if o else None for o in node.outputs]
+}
+""")
+```
+
+### Feedback loops causing errors
+
+**Symptom:** "Circular dependency" or infinite cook loop.
+
+**Fix:** Always use a Feedback TOP (or a Null TOP with a one-frame delay) to break the loop:
+```
+A -> B -> Feedback(references B) -> A
+```
+Never create A -> B -> A directly.
+
+## Performance Issues
+
+### Low FPS / choppy output
+
+**Common causes and fixes:**
+
+1. **Resolution too high.** Start at 1920x1080, only go higher if GPU handles it.
+2. **Too many operators.** Each operator has GPU/CPU overhead. Consolidate where possible.
+3. **Expensive shader.** GLSL TOPs with complex math per-pixel drain GPU. Profile with TD's Performance Monitor (F2).
+4. **No GPU instancing.** Rendering 1000 separate geometry objects is much slower than 1 instanced geometry.
+5. **Unnecessary cooks.** Operators that don't change frame-to-frame still recook if inputs change. Use Null TOPs to cache stable results.
+6. **Large texture transfers.** TOP to CHOP and CHOP to TOP involve GPU-CPU memory transfers. Minimize these.
+
+**Performance Monitor:**
+```python
+execute_python_script(script="td.performanceMonitor = True")
+# After testing:
+execute_python_script(script="td.performanceMonitor = False")
+```
+
+### Memory growing over time
+
+**Causes:**
+- Cache TOPs with high `length` value
+- Feedback loops without brightness decay (values accumulate)
+- Table DATs growing without clearing
+- Movie File In loading many unique frames
+
+**Fix:** Always add slight decay in feedback loops (Level TOP with `opacity=0.98` or multiply blend). Clear tables periodically.
+
+## Export / Recording Issues
+
+### Movie File Out not recording
+
+**Checklist:**
+1. Is the `record` parameter toggled on? `update_td_node_parameters(properties={"record": true})`
+2. Is an input connected? The Movie File Out needs a TOP input.
+3. Is the output path valid and writable? Check `file` parameter.
+4. Is the codec available? H.264 (type 4) is most reliable.
+
+### Exported video is black
+
+**Causes:**
+1. The TOP chain output is all black (brightness too low).
+2. The input TOP has errors (check with `get_td_node_errors`).
+3. Resolution mismatch — the output may be wrong resolution.
+
+**Debug:** Check the input TOP's actual pixel values:
+```python
+execute_python_script(script="""
+import numpy as np
+top = op('/project1/out')
+arr = top.numpyArray(delayed=True)
+result = {'mean': float(arr.mean()), 'max': float(arr.max()), 'shape': list(arr.shape)}
+""")
+```
+
+### .tox export losing connections
+
+**Note:** When saving a component as .tox, only the component and its internal children are saved. External connections (wires to operators outside the component) are lost. Design self-contained components.
+
+## Python Scripting Issues
+
+### execute_python_script returns empty result
+
+**Causes:**
+1. The script used `exec()` semantics (multi-line) but didn't set `result`.
+2. The last expression has no return value (e.g., `print()` returns None).
+
+**Fix:** Explicitly set `result`:
+```python
+execute_python_script(script="""
+nodes = op('/project1').findChildren(type=TOP)
+result = len(nodes)  # explicitly set return value
+""")
+```
+
+### Script errors not clear
+
+**Check stderr in the response.** The MCP server captures both stdout and stderr from script execution. Error tracebacks appear in stderr.
+
+### Module not found in TD Python
+
+**Cause:** TD's Python environment may not have the module. TD bundles numpy, scipy, opencv, Pillow, and requests. Other packages need manual installation.
+
+**Check available packages:**
+```python
+execute_python_script(script="""
+import sys
+result = [p for p in sys.path]
+""")
+```
+
+## Common Workflow Pitfalls
+
+### Building before verifying connection
+
+Always call `get_td_info` first. If TD isn't running or the WebServer DAT isn't loaded, all subsequent tool calls will fail.
+
+### Not checking errors after building
+
+Always call `get_td_node_errors(nodePath="/project1")` after creating and wiring a network. Broken connections and missing references are silent until you check.
+
+### Creating too many operators in one go
+
+When building complex networks, create in logical groups:
+1. Create all operators in a section
+2. Wire that section
+3. Verify with `get_td_node_errors`
+4. Move to the next section
+
+Don't create 50 operators, wire them all, then discover something was wrong 30 operators ago.
+
+### Parameter expressions vs static values
+
+If you set `node.par.X.val = 5` but there's an expression on that parameter (`node.par.X.expr`), the expression wins. To use a static value, clear the expression first:
+```python
+execute_python_script(script="""
+op('/project1/noise1').par.seed.expr = ''  # clear expression
+op('/project1/noise1').par.seed.val = 42   # now static value works
+""")
+```
+
+### Forgetting to start audio playback
+
+Audio File In CHOP won't produce data unless `play` is True and a valid `file` is set:
+```
+update_td_node_parameters(nodePath="/project1/audio_in",
+    properties={"file": "/path/to/music.wav", "play": true})
+```
+
+### GLSL shader compilation errors
+
+If a GLSL TOP shows errors after setting shader code:
+1. Check the shader code in the Text DAT for syntax errors
+2. Ensure the GLSL version is compatible (TD uses GLSL 3.30+)
+3. Input sampler name must be `sTD2DInputs[0]` (not custom names)
+4. Output must use `layout(location = 0) out vec4 fragColor`
+5. UV coordinates come from `vUV.st` (not `gl_FragCoord`)
diff --git a/skills/creative/touchdesigner/scripts/custom_api_handler.py b/skills/creative/touchdesigner/scripts/custom_api_handler.py
new file mode 100644
index 00000000000..fd3772a87df
--- /dev/null
+++ b/skills/creative/touchdesigner/scripts/custom_api_handler.py
@@ -0,0 +1,140 @@
+"""
+Custom API Handler for TouchDesigner WebServer DAT
+===================================================
+Use this when mcp_webserver_base.tox fails to load its modules
+(common — the .tox relies on relative paths to a modules/ folder
+that often break during import).
+
+Paste into TD Textport or run via exec(open('...').read()):
+  Creates a WebServer DAT + Text DAT callback handler on port 9981.
+  Implements the core endpoints the MCP server expects.
+
+After running, test with:
+  curl http://127.0.0.1:9981/api/td/server/td
+"""
+
+root = op('/project1')
+
+# Remove broken webserver if present
+old = op('/project1/mcp_webserver_base')
+if old and old.valid:
+    old.destroy()
+
+# Create WebServer DAT
+ws = root.create(webserverDAT, 'api_server')
+ws.par.port = 9981
+ws.par.active = True
+ws.nodeX = -800; ws.nodeY = 500
+
+# Create callback handler
+cb = root.create(textDAT, 'api_handler')
+cb.nodeX = -800; cb.nodeY = 400
+cb.text = r'''
+import json, traceback, io, sys
+
+def onHTTPRequest(webServerDAT, request, response):
+    uri = request.get('uri', '')
+    method = request.get('method', 'GET')
+    response['statusCode'] = 200
+    response['statusReason'] = 'OK'
+    response['headers'] = {'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*'}
+
+    try:
+        # TD sends POST body as bytes in request['data']
+        raw = request.get('data', request.get('body', ''))
+        if isinstance(raw, bytes):
+            raw = raw.decode('utf-8')
+        body = {}
+        if raw and isinstance(raw, str) and raw.strip():
+            body = json.loads(raw)
+        pars = request.get('pars', {})
+
+        if uri == '/api/td/server/td':
+            response['data'] = json.dumps({
+                'version': str(app.version),
+                'osName': sys.platform,
+                'apiVersion': '1.4.3',
+                'product': 'TouchDesigner'
+            })
+
+        elif uri == '/api/td/server/exec':
+            script = body.get('script', '')
+            old_stdout = sys.stdout
+            sys.stdout = buf = io.StringIO()
+            result_val = None
+            err_text = ''
+            try:
+                globs = {'op': op, 'ops': ops, 'me': webServerDAT, 'parent': parent,
+                         'project': project, 'td': td, 'result': None,
+                         'app': app, 'absTime': absTime}
+                lines = script.strip().split('\n')
+                if len(lines) == 1:
+                    try:
+                        result_val = eval(script, globs)
+                    except SyntaxError:
+                        exec(script, globs)
+                        result_val = globs.get('result')
+                else:
+                    exec(script, globs)
+                    result_val = globs.get('result')
+            except Exception as e:
+                err_text = traceback.format_exc()
+            finally:
+                captured = buf.getvalue()
+                sys.stdout = old_stdout
+            response['data'] = json.dumps({
+                'result': _serialize(result_val),
+                'stdout': captured,
+                'stderr': err_text
+            })
+
+        elif uri == '/api/nodes':
+            pp = pars.get('parentPath', ['/project1'])[0]
+            p = op(pp)
+            nodes = []
+            if p:
+                for c in p.children:
+                    nodes.append({'name': c.name, 'path': c.path,
+                                  'opType': c.OPType, 'family': c.family})
+            response['data'] = json.dumps({'data': nodes})
+
+        elif uri == '/api/nodes/errors':
+            np = pars.get('nodePath', ['/project1'])[0]
+            n = op(np)
+            errors = []
+            if n:
+                def _collect(node, depth=0):
+                    if depth > 10: return
+                    e = node.errors()
+                    if e:
+                        errors.append({'nodePath': node.path, 'nodeName': node.name,
+                                       'opType': node.OPType, 'errors': str(e)})
+                    if hasattr(node, 'children'):
+                        for c in node.children: _collect(c, depth+1)
+                _collect(n)
+            response['data'] = json.dumps({'data': errors, 'hasErrors': len(errors)>0,
+                                            'errorCount': len(errors)})
+
+        else:
+            response['statusCode'] = 404
+            response['data'] = json.dumps({'error': 'Unknown: ' + uri})
+
+    except Exception as e:
+        response['statusCode'] = 500
+        response['data'] = json.dumps({'error': str(e), 'trace': traceback.format_exc()})
+
+    return response
+
+def _serialize(v):
+    if v is None: return None
+    if isinstance(v, (int, float, bool, str)): return v
+    if isinstance(v, (list, tuple)): return [_serialize(i) for i in v]
+    if isinstance(v, dict): return {str(k): _serialize(vv) for k, vv in v.items()}
+    return str(v)
+'''
+
+# Point webserver to callback
+ws.par.callbacks = cb.path
+
+print("Custom API server created on port 9981")
+print("Test: curl http://127.0.0.1:9981/api/td/server/td")
diff --git a/skills/creative/touchdesigner/scripts/setup.sh b/skills/creative/touchdesigner/scripts/setup.sh
new file mode 100644
index 00000000000..ce8b56870c3
--- /dev/null
+++ b/skills/creative/touchdesigner/scripts/setup.sh
@@ -0,0 +1,152 @@
+#!/usr/bin/env bash
+# TouchDesigner MCP Setup Verification Script
+# Checks all prerequisites and guides configuration
+
+set -euo pipefail
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m'
+
+pass() { echo -e "  ${GREEN}✓${NC} $1"; }
+fail() { echo -e "  ${RED}✗${NC} $1"; }
+warn() { echo -e "  ${YELLOW}!${NC} $1"; }
+info() { echo -e "  ${BLUE}→${NC} $1"; }
+
+echo ""
+echo "TouchDesigner MCP Setup Check"
+echo "=============================="
+echo ""
+
+ERRORS=0
+
+# 1. Check Node.js
+echo "1. Node.js"
+if command -v node &>/dev/null; then
+    NODE_VER=$(node --version 2>/dev/null || echo "unknown")
+    MAJOR=$(echo "$NODE_VER" | sed 's/^v//' | cut -d. -f1)
+    if [ "$MAJOR" -ge 18 ] 2>/dev/null; then
+        pass "Node.js $NODE_VER (>= 18 required)"
+    else
+        fail "Node.js $NODE_VER (>= 18 required, please upgrade)"
+        ERRORS=$((ERRORS + 1))
+    fi
+else
+    fail "Node.js not found"
+    info "Install: https://nodejs.org/ or 'brew install node'"
+    ERRORS=$((ERRORS + 1))
+fi
+
+# 2. Check npm/npx
+echo "2. npm/npx"
+if command -v npx &>/dev/null; then
+    NPX_VER=$(npx --version 2>/dev/null || echo "unknown")
+    pass "npx $NPX_VER"
+else
+    fail "npx not found (usually comes with Node.js)"
+    ERRORS=$((ERRORS + 1))
+fi
+
+# 3. Check MCP Python package
+echo "3. MCP Python package"
+HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
+VENV_PYTHON=""
+
+# Try to find the Hermes venv Python
+if [ -f "$HERMES_HOME/hermes-agent/.venv/bin/python" ]; then
+    VENV_PYTHON="$HERMES_HOME/hermes-agent/.venv/bin/python"
+elif [ -f "$HERMES_HOME/hermes-agent/venv/bin/python" ]; then
+    VENV_PYTHON="$HERMES_HOME/hermes-agent/venv/bin/python"
+fi
+
+if [ -n "$VENV_PYTHON" ]; then
+    if $VENV_PYTHON -c "import mcp" 2>/dev/null; then
+        MCP_VER=$($VENV_PYTHON -c "import importlib.metadata; print(importlib.metadata.version('mcp'))" 2>/dev/null || echo "installed")
+        pass "mcp package ($MCP_VER) in Hermes venv"
+    else
+        fail "mcp package not installed in Hermes venv"
+        info "Install: $VENV_PYTHON -m pip install mcp"
+        ERRORS=$((ERRORS + 1))
+    fi
+else
+    warn "Could not find Hermes venv — check mcp package manually"
+fi
+
+# 4. Check TouchDesigner
+echo "4. TouchDesigner"
+TD_FOUND=false
+
+# macOS
+if [ -d "/Applications/TouchDesigner.app" ]; then
+    TD_FOUND=true
+    pass "TouchDesigner found at /Applications/TouchDesigner.app"
+fi
+
+# Linux (common install locations)
+if command -v TouchDesigner &>/dev/null; then
+    TD_FOUND=true
+    pass "TouchDesigner found in PATH"
+fi
+
+if [ -d "$HOME/TouchDesigner" ]; then
+    TD_FOUND=true
+    pass "TouchDesigner found at ~/TouchDesigner"
+fi
+
+if [ "$TD_FOUND" = false ]; then
+    warn "TouchDesigner not detected (may be installed elsewhere)"
+    info "Download from: https://derivative.ca/download"
+    info "Free Non-Commercial license available"
+fi
+
+# 5. Check TD WebServer DAT reachability
+echo "5. TouchDesigner WebServer DAT"
+TD_URL="${TD_API_URL:-http://127.0.0.1:9981}"
+if command -v curl &>/dev/null; then
+    HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 "$TD_URL/api/td/server/td" 2>/dev/null || echo "000")
+    if [ "$HTTP_CODE" = "200" ]; then
+        TD_INFO=$(curl -s --connect-timeout 3 "$TD_URL/api/td/server/td" 2>/dev/null || echo "{}")
+        pass "TD WebServer DAT responding at $TD_URL"
+        info "Response: $TD_INFO"
+    elif [ "$HTTP_CODE" = "000" ]; then
+        warn "Cannot reach TD WebServer DAT at $TD_URL"
+        info "Make sure TouchDesigner is running with mcp_webserver_base.tox imported"
+    else
+        warn "TD WebServer DAT returned HTTP $HTTP_CODE at $TD_URL"
+    fi
+else
+    warn "curl not found — cannot test TD connection"
+fi
+
+# 6. Check Hermes config
+echo "6. Hermes MCP config"
+CONFIG_FILE="$HERMES_HOME/config.yaml"
+if [ -f "$CONFIG_FILE" ]; then
+    if grep -q "touchdesigner" "$CONFIG_FILE" 2>/dev/null; then
+        pass "TouchDesigner MCP server configured in config.yaml"
+    else
+        warn "No 'touchdesigner' entry found in mcp_servers config"
+        info "Add a touchdesigner entry under mcp_servers: in $CONFIG_FILE"
+        info "See references/mcp-tools.md for the configuration block"
+    fi
+else
+    warn "No Hermes config.yaml found at $CONFIG_FILE"
+fi
+
+# Summary
+echo ""
+echo "=============================="
+if [ $ERRORS -eq 0 ]; then
+    echo -e "${GREEN}All critical checks passed!${NC}"
+    echo ""
+    echo "Next steps:"
+    echo "  1. Open TouchDesigner and import mcp_webserver_base.tox"
+    echo "  2. Add the MCP server config to Hermes (see references/mcp-tools.md)"
+    echo "  3. Restart Hermes and test: 'Get TouchDesigner server info'"
+else
+    echo -e "${RED}$ERRORS critical issue(s) found.${NC}"
+    echo "Fix the issues above, then re-run this script."
+fi
+echo ""

From 6f27390fae352cc4e2aa5f41f1cbd35139656fb5 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:50:35 +0530
Subject: [PATCH 076/143] feat: rewrite TouchDesigner skill for twozero MCP
 (v2.0.0)

Major rewrite of the TouchDesigner skill:
- Replace custom API handler with twozero MCP (36 native tools)
- Add audio-reactive GLSL proven recipe (spectrum chain, pitfalls)
- Add recording checklist (FPS>0, non-black, audio cueing)
- Expand pitfalls: 38 entries from real sessions (was 20)
- Update network-patterns with MCP-native build scripts
- Rewrite mcp-tools reference for twozero v2.774+
- Update troubleshooting for MCP-based workflow
- Remove obsolete custom_api_handler.py
- Generalize Environment section for all users
- Remove session-specific Paired Skills section
- Bump version to 2.0.0
---
 skills/creative/touchdesigner/SKILL.md        | 427 +++++++-----
 .../touchdesigner/references/mcp-tools.md     | 645 +++++++-----------
 .../references/network-patterns.md            | 286 ++++----
 .../touchdesigner/references/pitfalls.md      | 622 +++++++++++------
 .../touchdesigner/references/python-api.md    |  24 +-
 .../references/troubleshooting.md             | 438 ++++++------
 .../scripts/custom_api_handler.py             | 140 ----
 .../creative/touchdesigner/scripts/setup.sh   | 236 +++----
 8 files changed, 1398 insertions(+), 1420 deletions(-)
 delete mode 100644 skills/creative/touchdesigner/scripts/custom_api_handler.py

diff --git a/skills/creative/touchdesigner/SKILL.md b/skills/creative/touchdesigner/SKILL.md
index 0f464193fed..88fc79b2e74 100644
--- a/skills/creative/touchdesigner/SKILL.md
+++ b/skills/creative/touchdesigner/SKILL.md
@@ -1,278 +1,339 @@
 ---
 name: touchdesigner
-description: "Control a running TouchDesigner instance programmatically — create operators, set parameters, wire connections, execute Python, build real-time visuals. Covers: GLSL shaders, audio-reactive, generative art, video processing, instancing, and live performance."
-version: 3.0.0
+description: "Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools."
+version: 1.0.0
 author: Hermes Agent
 license: MIT
 metadata:
   hermes:
-    tags: [TouchDesigner, MCP, creative-coding, real-time-visuals, generative-art, audio-reactive, VJ, installation, GLSL]
+    tags: [TouchDesigner, MCP, twozero, creative-coding, real-time-visuals, generative-art, audio-reactive, VJ, installation, GLSL]
     related_skills: [native-mcp, ascii-video, manim-video, hermes-video]
-    security:
-      allow_network: true
-      allow_install: true
-      allow_config_write: true
+
 ---
 
-# TouchDesigner Integration
+# TouchDesigner Integration (twozero MCP)
+
+## CRITICAL RULES
+
+1. **NEVER guess parameter names.** Call `td_get_par_info` for the op type FIRST. Your training data is wrong for TD 2025.32.
+2. **If `tdAttributeError` fires, STOP.** Call `td_get_operator_info` on the failing node before continuing.
+3. **NEVER hardcode absolute paths** in script callbacks. Use `me.parent()` / `scriptOp.parent()`.
+4. **Prefer native MCP tools over td_execute_python.** Use `td_create_operator`, `td_set_operator_pars`, `td_get_errors` etc. Only fall back to `td_execute_python` for complex multi-step logic.
+5. **Call `td_get_hints` before building.** It returns patterns specific to the op type you're working with.
 
 ## Architecture
 
-Hermes Agent -> HTTP REST (curl) -> TD WebServer DAT (port 9981) -> TD Python environment.
+```
+Hermes Agent -> MCP (Streamable HTTP) -> twozero.tox (port 40404) -> TD Python
+```
 
-The agent controls a **running TouchDesigner instance** via a REST API on port 9981. It does NOT generate .toe files from scratch.
+36 native tools. Free plugin (no payment/license — confirmed April 2026).
+Context-aware (knows selected OP, current network).
+Hub health check: `GET http://localhost:40404/mcp` returns JSON with instance PID, project name, TD version.
 
-## First-Time Setup (one-time, persists across sessions)
+## Setup (Automated)
 
-### 1. Verify TD is running and check for existing API
+Run the setup script to handle everything:
 
 ```bash
-lsof -i :9981 -P -n | grep LISTEN   # TD listening?
-curl -s --max-time 5 http://127.0.0.1:9981/api/td/server/td  # API working?
+bash ~/.hermes/skills/creative/touchdesigner/scripts/setup.sh
 ```
 
-If HTTP 200 + JSON → skip to **Discovery**. Setup is already done.
+The script will:
+1. Check if TD is running
+2. Download twozero.tox if not already cached
+3. Add `twozero_td` MCP server to Hermes config (if missing)
+4. Test the MCP connection on port 40404
+5. Report what manual steps remain (drag .tox into TD, enable MCP toggle)
 
-### 2. If no API: deploy the custom handler
+### Manual steps (one-time, cannot be automated)
 
-The user must paste ONE line into TD Textport (Alt+T / Dialogs > Textport and DATs):
+1. **Drag `~/Downloads/twozero.tox` into the TD network editor** → click Install
+2. **Enable MCP:** click twozero icon → Settings → mcp → "auto start MCP" → Yes
+3. **Restart Hermes session** to pick up the new MCP server
 
-```
-exec(open('PATH_TO_SKILL/scripts/custom_api_handler.py').read())
-```
-
-Copy this to their clipboard with `pbcopy`. This creates a WebServer DAT + callback handler pair in `/project1` that implements the REST API. No external dependencies.
-
-**Why not the official .tox?** The `mcp_webserver_base.tox` from 8beeeaaat/touchdesigner-mcp frequently fails to import its Python modules after drag-drop (relative path resolution issue). Our custom handler is self-contained and more reliable. See `references/pitfalls.md` #1-2.
-
-### 3. Save the project to persist the API
-
-After the handler is running, save the project so the API auto-starts on every future TD launch:
-
-```python
-td_exec("project.save(os.path.expanduser('~/Documents/HermesAgent.toe'))")
-```
-
-TD auto-opens the last saved project on launch. From now on, `open /Applications/TouchDesigner.app` → port 9981 is live → agent can connect immediately.
-
-To launch TD with this project explicitly:
+After setup, verify:
 ```bash
-open /Applications/TouchDesigner.app ~/Documents/HermesAgent.toe
+nc -z 127.0.0.1 40404 && echo "twozero MCP: READY"
 ```
 
-### 4. Optional: Configure Hermes MCP
+## Environment Notes
 
-Add under `mcp_servers:` in the user's Hermes config:
-```yaml
-touchdesigner:
-  command: npx
-  args: ["-y", "touchdesigner-mcp-server@latest"]
-  env:
-    TD_API_URL: "http://127.0.0.1:9981"
-  timeout: 120
-```
-
-This is optional — the agent works fully via `curl` to the REST API using `execute_code`. MCP tools are a convenience layer.
-
-## Talking to TD (the td_exec pattern)
-
-All communication uses this pattern in `execute_code`:
-
-```python
-import json, shlex
-from hermes_tools import terminal
-
-API = "http://127.0.0.1:9981"
-def td_exec(script):
-    payload = json.dumps({"script": script})
-    cmd = f"curl -s --max-time 15 -X POST -H 'Content-Type: application/json' -d {shlex.quote(payload)} '{API}/api/td/server/exec'"
-    r = terminal(cmd, timeout=20)
-    return json.loads(r['output'])
-
-# Returns: {"result": <value>, "stdout": "...", "stderr": "..."}
-```
-
-For large GLSL shaders: write to a temp file, then `td_exec("op('...').text = open('/tmp/shader.glsl').read()")`.
+- **Non-Commercial TD** caps resolution at 1280×1280. Use `outputresolution = 'custom'` and set width/height explicitly.
+- **Codecs:** `prores` (preferred on macOS) or `mjpa` as fallback. H.264/H.265/AV1 require a Commercial license.
+- Always call `td_get_par_info` before setting params — names vary by TD version (see CRITICAL RULES #1).
 
 ## Workflow
 
-### Step 0: Discovery (MANDATORY — never skip)
+### Step 0: Discover (before building anything)
 
-**Never hardcode parameter names.** They change between TD versions. Run this first:
-
-```python
-td_exec("""
-import sys
-info = {'version': str(app.version), 'platform': sys.platform}
-root = op('/project1')
-for name, optype in [('glslTOP', glslTOP), ('constantTOP', constantTOP),
-                      ('blurTOP', blurTOP), ('textTOP', textTOP),
-                      ('levelTOP', levelTOP), ('compositeTOP', compositeTOP),
-                      ('transformTOP', transformTOP), ('feedbackTOP', feedbackTOP),
-                      ('windowCOMP', windowCOMP)]:
-    n = root.create(optype, '_d_' + name)
-    kw = ['color','size','font','dat','alpha','opacity','resolution','text',
-          'extend','operand','top','pixel','format','win','type']
-    info[name] = [p.name for p in n.pars() if any(k in p.name.lower() for k in kw)]
-    n.destroy()
-result = info
-""")
+```
+Call td_get_par_info with op_type for each type you plan to use.
+Call td_get_hints with the topic you're building (e.g. "glsl", "audio reactive", "feedback").
+Call td_get_focus to see where the user is and what's selected.
+Call td_get_network to see what already exists.
 ```
 
-Use the returned param names for ALL subsequent calls. Store them in your session context.
+No temp nodes, no cleanup. This replaces the old discovery dance entirely.
 
 ### Step 1: Clean + Build
 
-Build the entire network in ONE `td_exec` call (batching avoids round-trip overhead and ensures TD advances frames between calls):
+**IMPORTANT: Split cleanup and creation into SEPARATE MCP calls.** Destroying and recreating same-named nodes in one `td_execute_python` script causes "Invalid OP object" errors. See pitfalls #10b.
+
+Use `td_create_operator` for each node (handles viewport positioning automatically):
+
+```
+td_create_operator(type="noiseTOP", parent="/project1", name="bg", parameters={"resolutionw": 1280, "resolutionh": 720})
+td_create_operator(type="levelTOP", parent="/project1", name="brightness")
+td_create_operator(type="nullTOP", parent="/project1", name="out")
+```
+
+For bulk creation or wiring, use `td_execute_python`:
 
 ```python
-td_exec("""
+# td_execute_python script:
 root = op('/project1')
-keep = {'api_server', 'api_handler'}
-for child in list(root.children):  # snapshot before destroying
-    if child.name not in keep and child.valid:
-        child.destroy()
-
-# Create nodes, set params (using discovered names), wire, verify
-...
-result = {'nodes': len(list(root.children)), 'errors': [...]}
-""")
+nodes = []
+for name, optype in [('bg', noiseTOP), ('fx', levelTOP), ('out', nullTOP)]:
+    n = root.create(optype, name)
+    nodes.append(n.path)
+# Wire chain
+for i in range(len(nodes)-1):
+    op(nodes[i]).outputConnectors[0].connect(op(nodes[i+1]).inputConnectors[0])
+result = {'created': nodes}
 ```
 
-### Step 2: Wire connections
+### Step 2: Set Parameters
 
-```python
-gl.outputConnectors[0].connect(comp.inputConnectors[0])
+Prefer the native tool (validates params, won't crash):
+
+```
+td_set_operator_pars(path="/project1/bg", parameters={"roughness": 0.6, "monochrome": true})
 ```
 
-### Step 3: Verify
+For expressions or modes, use `td_execute_python`:
 
 ```python
-for c in list(root.children):
-    e = c.errors(); w = c.warnings()
-    if e: print(c.name, 'ERR:', e)
+op('/project1/time_driver').par.colorr.expr = "absTime.seconds % 1000.0"
 ```
 
-### Step 4: Display
+### Step 3: Wire
+
+Use `td_execute_python` — no native wire tool exists:
 
 ```python
-win = root.create(windowCOMP, 'display')
-win.par.winop = out.path    # discovered param name
+op('/project1/bg').outputConnectors[0].connect(op('/project1/fx').inputConnectors[0])
+```
+
+### Step 4: Verify
+
+```
+td_get_errors(path="/project1", recursive=true)
+td_get_perf()
+td_get_operator_info(path="/project1/out", detail="full")
+```
+
+### Step 5: Display / Capture
+
+```
+td_get_screenshot(path="/project1/out")
+```
+
+Or open a window via script:
+
+```python
+win = op('/project1').create(windowCOMP, 'display')
+win.par.winop = op('/project1/out').path
 win.par.winw = 1280; win.par.winh = 720
 win.par.winopen.pulse()
 ```
 
+## MCP Tool Quick Reference
+
+**Core (use these most):**
+| Tool | What |
+|------|------|
+| `td_execute_python` | Run arbitrary Python in TD. Full API access. |
+| `td_create_operator` | Create node with params + auto-positioning |
+| `td_set_operator_pars` | Set params safely (validates, won't crash) |
+| `td_get_operator_info` | Inspect one node: connections, params, errors |
+| `td_get_operators_info` | Inspect multiple nodes in one call |
+| `td_get_network` | See network structure at a path |
+| `td_get_errors` | Find errors/warnings recursively |
+| `td_get_par_info` | Get param names for an OP type (replaces discovery) |
+| `td_get_hints` | Get patterns/tips before building |
+| `td_get_focus` | What network is open, what's selected |
+
+**Read/Write:**
+| Tool | What |
+|------|------|
+| `td_read_dat` | Read DAT text content |
+| `td_write_dat` | Write/patch DAT content |
+| `td_read_chop` | Read CHOP channel values |
+| `td_read_textport` | Read TD console output |
+
+**Visual:**
+| Tool | What |
+|------|------|
+| `td_get_screenshot` | Capture one OP viewer to file |
+| `td_get_screenshots` | Capture multiple OPs at once |
+| `td_get_screen_screenshot` | Capture actual screen via TD |
+| `td_navigate_to` | Jump network editor to an OP |
+
+**Search:**
+| Tool | What |
+|------|------|
+| `td_find_op` | Find ops by name/type across project |
+| `td_search` | Search code, expressions, string params |
+
+**System:**
+| Tool | What |
+|------|------|
+| `td_get_perf` | Performance profiling (FPS, slow ops) |
+| `td_list_instances` | List all running TD instances |
+| `td_get_docs` | In-depth docs on a TD topic |
+| `td_agents_md` | Read/write per-COMP markdown docs |
+| `td_reinit_extension` | Reload extension after code edit |
+| `td_clear_textport` | Clear console before debug session |
+
+**Input Automation:**
+| Tool | What |
+|------|------|
+| `td_input_execute` | Send mouse/keyboard to TD |
+| `td_input_status` | Poll input queue status |
+| `td_input_clear` | Stop input automation |
+| `td_op_screen_rect` | Get screen coords of a node |
+| `td_click_screen_point` | Click a point in a screenshot |
+
+See `references/mcp-tools.md` for full parameter schemas.
+
 ## Key Implementation Rules
 
-**Always clean safely:** `list(root.children)` before iterating + `child.valid` check.
-
-**GLSL time:** No `uTDCurrentTime` in TD 099. Feed time via 1x1 Constant TOP.
-**CRITICAL: must use `rgba32float` format** — the default 8-bit format clamps values to 0-1, so `absTime.seconds % 1000.0` becomes 1.0 and the shader appears frozen:
+**GLSL time:** No `uTDCurrentTime` in GLSL TOP. Use the Values page:
 ```python
-t = root.create(constantTOP, 'time_driver')
-t.par.format = 'rgba32float'  # ← REQUIRED or time is stuck at 1.0
-t.par.outputresolution = 'custom'
-t.par.resolutionw = 1
-t.par.resolutionh = 1
-t.par.colorr.expr = "absTime.seconds % 1000.0"
-t.par.colorg.expr = "int(absTime.seconds / 1000.0)"
-t.outputConnectors[0].connect(glsl.inputConnectors[0])
-# In GLSL: vec4 td = texture(sTD2DInputs[0], vec2(.5)); float t = td.r + td.g*1000.;
+# Call td_get_par_info(op_type="glslTOP") first to confirm param names
+td_set_operator_pars(path="/project1/shader", parameters={"value0name": "uTime"})
+# Then set expression via script:
+# op('/project1/shader').par.value0.expr = "absTime.seconds"
+# In GLSL: uniform float uTime;
 ```
 
-**Feedback TOP:** Use `top` parameter reference (not direct input wire). The "Not enough sources" error resolves after first cook. The "Cook dependency loop" warning is expected.
+Fallback: Constant TOP in `rgba32float` format (8-bit clamps to 0-1, freezing the shader).
+
+**Feedback TOP:** Use `top` parameter reference, not direct input wire. "Not enough sources" resolves after first cook. "Cook dependency loop" warning is expected.
 
 **Resolution:** Non-Commercial caps at 1280×1280. Use `outputresolution = 'custom'`.
 
-**Large shaders:** Write GLSL to `/tmp/file.glsl`, then `td_exec("op('shader').text = open('/tmp/file.glsl').read()")`.
+**Large shaders:** Write GLSL to `/tmp/file.glsl`, then use `td_write_dat` or `td_execute_python` to load.
 
-**WebServer DAT quirk:** Response body goes in `response['data']` not `response['body']`. Request POST body comes as bytes in `request['data']`.
+**Vertex/Point access (TD 2025.32):** `point.P[0]`, `point.P[1]`, `point.P[2]` — NOT `.x`, `.y`, `.z`.
+
+**Extensions:** `ext0object` format is `"op('./datName').module.ClassName(me)"` in CONSTANT mode. After editing extension code with `td_write_dat`, call `td_reinit_extension`.
+
+**Script callbacks:** ALWAYS use relative paths via `me.parent()` / `scriptOp.parent()`.
+
+**Cleaning nodes:** Always `list(root.children)` before iterating + `child.valid` check.
 
 ## Recording / Exporting Video
 
-To capture TD output as video or image sequence for external use (e.g., ASCII video pipeline):
-
-### Movie Recording (recommended)
-
 ```python
-# Put a Null TOP before the recorder (official best practice)
+# via td_execute_python:
+root = op('/project1')
 rec = root.create(moviefileoutTOP, 'recorder')
-null_out.outputConnectors[0].connect(rec.inputConnectors[0])
-
+op('/project1/out').outputConnectors[0].connect(rec.inputConnectors[0])
 rec.par.type = 'movie'
 rec.par.file = '/tmp/output.mov'
-rec.par.videocodec = 'mjpa'  # Motion JPEG — works on Non-Commercial
-
-# Start/stop recording (par.record is a toggle, NOT .record() method)
+rec.par.videocodec = 'prores'  # Apple ProRes — NOT license-restricted on macOS
 rec.par.record = True   # start
-# ... wait ...
-rec.par.record = False  # stop
+# rec.par.record = False  # stop (call separately later)
 ```
 
-**H.264/H.265 require a Commercial license** — use `mjpa` (Motion JPEG) or `prores` on Non-Commercial. Extract frames afterward with ffmpeg if needed:
-```bash
-ffmpeg -i /tmp/output.mov -vframes 120 /tmp/frames/frame_%06d.png
-```
+H.264/H.265/AV1 need Commercial license. Use `prores` on macOS or `mjpa` as fallback.
+Extract frames: `ffmpeg -i /tmp/output.mov -vframes 120 /tmp/frames/frame_%06d.png`
 
-### Image Sequence Export
+**TOP.save() is useless for animation** — captures same GPU texture every time. Always use MovieFileOut.
 
-```python
-rec.par.type = 'imagesequence'
-rec.par.imagefiletype = 'png'
-rec.par.file.expr = "'/tmp/frames/out' + me.fileSuffix"  # fileSuffix is REQUIRED
-rec.par.record = True
-```
+### Before Recording: Checklist
 
-### Pitfalls
-
-- **Race condition:** When setting `par.file` and starting recording in the same script, use `run("...", delayFrames=2)` so the file path is applied before recording begins.
-- **TOP.save() is useless for animation:** Calling `op('null1').save(path)` in a loop or rapid API calls captures the same GPU texture every time — TD doesn't cook new frames between save calls. Always use MovieFileOut for animated output.
-- See `references/pitfalls.md` #25-27 for full details.
+1. **Verify FPS > 0** via `td_get_perf`. If FPS=0 the recording will be empty. See pitfalls #37-38.
+2. **Verify shader output is not black** via `td_get_screenshot`. Black output = shader error or missing input. See pitfalls #7, #39.
+3. **If recording with audio:** cue audio to start first, then delay recording by 3 frames. See pitfalls #18.
+4. **Set output path before starting record** — setting both in the same script can race.
 
 ## Audio-Reactive GLSL (Proven Recipe)
 
-Complete chain for music-driven visuals: AudioFileIn → AudioSpectrum → Math (boost) → Resample (256) → CHOP To TOP → GLSL TOP (spectrum sampled per-pixel). See `references/network-patterns.md` Pattern 3b for the full working recipe with shader code.
+### Correct signal chain (tested April 2026)
 
-## Audio-Reactive Visuals
-
-The most powerful TD workflow for the agent: play an audio file, analyze its spectrum, and drive a GLSL shader in real-time. The agent builds the entire signal chain programmatically.
-
-**Signal chain:**
 ```
-AudioFileIn CHOP → AudioSpectrum CHOP → Math CHOP (gain=5)
-  → Resample CHOP (256) → CHOP To TOP (spectrum texture)
-                                  ↓ (GLSL input 1)
-  Constant TOP (rgba32float, time) → GLSL TOP → Null TOP → MovieFileOut
-        (input 0)
+AudioFileIn CHOP (playmode=sequential)
+  → AudioSpectrum CHOP (FFT=512, outputmenu=setmanually, outlength=256, timeslice=ON)
+  → Math CHOP (gain=10)
+  → CHOP to TOP (dataformat=r, layout=rowscropped)
+  → GLSL TOP input 1 (spectrum texture, 256x2)
+
+Constant TOP (rgba32float, time) → GLSL TOP input 0
+GLSL TOP → Null TOP → MovieFileOut
 ```
 
-**Key technique:** The spectrum becomes a 256×1 texture. In GLSL, `texture(sTD2DInputs[1], vec2(x, 0.0)).r` samples frequency at position x (0=bass, 1=treble). This lets the shader react per-pixel to different frequency bands.
+### Critical audio-reactive rules (empirically verified)
 
-**Smoothing is critical:** Raw FFT jitters. Use `Math CHOP` gain to boost weak signal, then the GLSL shader's own temporal integration (via feedback or time-smoothed params) handles visual smoothing.
+1. **TimeSlice must stay ON** for AudioSpectrum. OFF = processes entire audio file → 24000+ samples → CHOP to TOP overflow.
+2. **Set Output Length manually** to 256 via `outputmenu='setmanually'` and `outlength=256`. Default outputs 22050 samples.
+3. **DO NOT use Lag CHOP for spectrum smoothing.** Lag CHOP operates in timeslice mode and expands 256 samples to 2400+, averaging all values to near-zero (~1e-06). The shader receives no usable data. This was the #1 audio sync failure in testing.
+4. **DO NOT use Filter CHOP either** — same timeslice expansion problem with spectrum data.
+5. **Smoothing belongs in the GLSL shader** if needed, via temporal lerp with a feedback texture: `mix(prevValue, newValue, 0.3)`. This gives frame-perfect sync with zero pipeline latency.
+6. **CHOP to TOP dataformat = 'r'**, layout = 'rowscropped'. Spectrum output is 256x2 (stereo). Sample at y=0.25 for first channel.
+7. **Math gain = 10** (not 5). Raw spectrum values are ~0.19 in bass range. Gain of 10 gives usable ~5.0 for the shader.
+8. **No Resample CHOP needed.** Control output size via AudioSpectrum's `outlength` param directly.
 
-See `references/network-patterns.md` Pattern 9b for the complete build script + shader code.
+### GLSL spectrum sampling
+
+```glsl
+// Input 0 = time (1x1 rgba32float), Input 1 = spectrum (256x2)
+float iTime = texture(sTD2DInputs[0], vec2(0.5)).r;
+
+// Sample multiple points per band and average for stability:
+// NOTE: y=0.25 for first channel (stereo texture is 256x2, first row center is 0.25)
+float bass = (texture(sTD2DInputs[1], vec2(0.02, 0.25)).r +
+              texture(sTD2DInputs[1], vec2(0.05, 0.25)).r) / 2.0;
+float mid  = (texture(sTD2DInputs[1], vec2(0.2, 0.25)).r +
+              texture(sTD2DInputs[1], vec2(0.35, 0.25)).r) / 2.0;
+float hi   = (texture(sTD2DInputs[1], vec2(0.6, 0.25)).r +
+              texture(sTD2DInputs[1], vec2(0.8, 0.25)).r) / 2.0;
+```
+
+See `references/network-patterns.md` for complete build scripts + shader code.
 
 ## Operator Quick Reference
 
-| Family | Color | Examples | Suffix |
-|--------|-------|----------|--------|
-| TOP | Purple | noiseTop, glslTop, compositeTop, levelTop, blurTop, textTop, nullTop, feedbackTop, renderTop | TOP |
-| CHOP | Green | audiofileinChop, audiospectrumChop, mathChop, lfoChop, constantChop | CHOP |
-| SOP | Blue | gridSop, sphereSop, transformSop, noiseSop | SOP |
-| DAT | White | textDat, tableDat, scriptDat, webserverDAT | DAT |
-| MAT | Yellow | phongMat, pbrMat, glslMat, constMat | MAT |
-| COMP | Gray | geometryComp, containerComp, cameraComp, lightComp, windowCOMP | COMP |
+| Family | Color | Python class / MCP type | Suffix |
+|--------|-------|-------------|--------|
+| TOP | Purple | noiseTOP, glslTOP, compositeTOP, levelTop, blurTOP, textTOP, nullTOP | TOP |
+| CHOP | Green | audiofileinCHOP, audiospectrumCHOP, mathCHOP, lfoCHOP, constantCHOP | CHOP |
+| SOP | Blue | gridSOP, sphereSOP, transformSOP, noiseSOP | SOP |
+| DAT | White | textDAT, tableDAT, scriptDAT, webserverDAT | DAT |
+| MAT | Yellow | phongMAT, pbrMAT, glslMAT, constMAT | MAT |
+| COMP | Gray | geometryCOMP, containerCOMP, cameraCOMP, lightCOMP, windowCOMP | COMP |
 
-See `references/operators.md` for full catalog. See `references/network-patterns.md` for recipes.
+## Security Notes
+
+- MCP runs on localhost only (port 40404). No authentication — any local process can send commands.
+- `td_execute_python` has unrestricted access to the TD Python environment and filesystem as the TD process user.
+- `setup.sh` downloads twozero.tox from the official 404zero.com URL. Verify the download if concerned.
+- The skill never sends data outside localhost. All MCP communication is local.
 
 ## References
 
 | File | What |
 |------|------|
-| `references/pitfalls.md` | **READ FIRST** — 31 hard-won lessons from real sessions |
+| `references/pitfalls.md` | Hard-won lessons from real sessions |
 | `references/operators.md` | All operator families with params and use cases |
-| `references/network-patterns.md` | Recipes: audio-reactive, generative, video, GLSL, instancing |
-| `references/mcp-tools.md` | MCP tool schemas (optional — curl works without MCP) |
+| `references/network-patterns.md` | Recipes: audio-reactive, generative, GLSL, instancing |
+| `references/mcp-tools.md` | Full twozero MCP tool parameter schemas |
 | `references/python-api.md` | TD Python: op(), scripting, extensions |
-| `references/troubleshooting.md` | Connection diagnostics, param debugging, performance |
-| `scripts/custom_api_handler.py` | Self-contained REST API handler for TD WebServer DAT |
+| `references/troubleshooting.md` | Connection diagnostics, debugging |
+| `scripts/setup.sh` | Automated setup script |
+
+---
+
+> You're not writing code. You're conducting light.
diff --git a/skills/creative/touchdesigner/references/mcp-tools.md b/skills/creative/touchdesigner/references/mcp-tools.md
index 5e4ad98d553..ec90076cb2b 100644
--- a/skills/creative/touchdesigner/references/mcp-tools.md
+++ b/skills/creative/touchdesigner/references/mcp-tools.md
@@ -1,501 +1,382 @@
-# TouchDesigner MCP Tools Reference
+# twozero MCP Tools Reference
 
-Complete parameter schemas and usage examples for all 13 MCP tools from the 8beeeaaat/touchdesigner-mcp server.
+36 tools from twozero MCP v2.774+ (April 2026).
+All tools accept an optional `target_instance` param for multi-TD-instance scenarios.
 
-## Hermes Configuration
+## Execution & Scripting
 
-Add a `touchdesigner` entry under the `mcp_servers` section of your Hermes config. Example YAML block:
+### td_execute_python
 
-```yaml
-# Under mcp_servers: in config.yaml
-mcp_servers:
-  touchdesigner:
-    command: npx
-    args: ["-y", "touchdesigner-mcp-server@latest"]
-    env:
-      TD_API_URL: "http://127.0.0.1:9981"
-    timeout: 120
-    connect_timeout: 60
-```
+Execute Python code inside TouchDesigner and return the result. Has full access to TD Python API (op, project, app, etc). Print statements and the last expression value are captured. Best for: wiring connections (inputConnectors), setting expressions (par.X.expr/mode), querying parameter names, and batch creation scripts (5+ operators). For creating 1-4 operators, prefer td_create_operator instead.
 
-For a locally built server, point `command` to `node` and `args` to the built server index.js path. Set `TD_API_URL` to the TouchDesigner WebServer DAT address (default port 9981).
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `code` | string | yes | Python code to execute in TouchDesigner |
 
-For the documentation/knowledge server (no running TD needed), add a `td_docs` entry using `touchdesigner-mcp-server` as the npx package.
+## Network & Structure
 
-Tools are registered as `mcp_touchdesigner_<tool_name>` in Hermes.
+### td_get_network
 
-**If MCP tools are not available as direct function calls** (common when the MCP server connects but Hermes doesn't expose them as callable tools), use the custom API handler directly via `curl` in `execute_code` or `terminal`:
+Get the operator network structure in TouchDesigner (TD) at a given path. Returns compact list: name OPType flags. First line is full path of queried op. Flags: ch:N=children count, !cook=allowCooking off, bypass, private=isPrivate, blocked:reason, "comment text". depth=0 (default) = current level only. depth=1 = one level of children (indented). To explore deeper, call again on a specific COMP path. System operators (/ui, /sys) are hidden by default.
 
-```python
-import json, shlex
-from hermes_tools import terminal
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | no | Network path to inspect, e.g. '/' or '/project1' |
+| `depth` | integer | no | How many levels deep to recurse. 0=current level only (recommended), 1=include direct children of COMPs |
+| `includeSystem` | boolean | no | Include system operators (/ui, /sys). Default false. |
+| `nodeXY` | boolean | no | Include nodeX,nodeY coordinates. Default false. |
 
-def td_exec(script):
-    """Execute Python in TouchDesigner via the REST API."""
-    escaped = json.dumps({"script": script})
-    cmd = f"curl -s --max-time 15 -X POST -H 'Content-Type: application/json' -d {shlex.quote(escaped)} 'http://127.0.0.1:9981/api/td/server/exec'"
-    r = terminal(cmd, timeout=20)
-    return json.loads(r['output'])
+### td_create_operator
 
-# Example: list all nodes
-result = td_exec('result = [c.name for c in op("/project1").children]')
-print(result)  # {"result": ["node1", "node2", ...], "stdout": "", "stderr": ""}
-```
+Create a new operator (node) in TouchDesigner (TD). Preferred way to create operators — handles viewport positioning, viewer flag, and docked ops automatically. For batch creation (5+ ops), you may use td_execute_python with a script instead, but then call td_get_hints('construction') first for correct parameter names and layout rules. Supports all TD operator types: TOP, CHOP, SOP, DAT, COMP, MAT. If parent is omitted, creates in the currently open network at the user's viewport position. When building a container: first create baseCOMP (no parent), then create children with parent=compPath.
 
-This `td_exec` helper works with both the official .tox handler and the custom API handler from `scripts/custom_api_handler.py`.
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `type` | string | yes | Operator type, e.g. 'textDAT', 'constantCHOP', 'noiseTOP', 'transformTOP', 'baseCOMP' |
+| `parent` | string | no | Path to the parent operator. If omitted, uses the currently open network in TD. |
+| `name` | string | no | Name for the new operator (optional, TD auto-names if omitted) |
+| `parameters` | object | no | Key-value pairs of parameters to set on the created operator |
 
-Tools are registered as `mcp_touchdesigner_<tool_name>` in Hermes.
+### td_find_op
 
-## Common Formatting Parameters
+Find operators by name and/or type across the project. Returns TSV: path, OPType, flags. Flags: bypass, !cook, private, blocked:reason. Use td_search to search inside code/expressions; use td_find_op to find operators themselves.
 
-Most tools accept these optional formatting parameters:
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | string | no | Substring to match in operator name (case-insensitive). E.g. 'noise' finds noise1, noise2, myNoise. |
+| `type` | string | no | Substring to match in OPType (case-insensitive). E.g. 'noiseTOP', 'baseCOMP', 'CHOP'. Use exact type for precision or partial for broader matches. |
+| `root` | string | no | Root operator path to search from. Default '/project1'. |
+| `max_results` | number | no | Maximum results to return. Default 50. |
+| `max_depth` | number | no | Max recursion depth from root. Default unlimited. |
+| `detail` | `basic` / `summary` | no | Result detail level. 'basic' = name/path/type (fast). 'summary' = + connections, non-default pars, expressions. Default 'basic'. |
 
-| Parameter | Type | Values | Description |
-|-----------|------|--------|-------------|
-| `detailLevel` | string | `"minimal"`, `"summary"`, `"detailed"` | Response verbosity |
-| `responseFormat` | string | `"json"`, `"yaml"`, `"markdown"` | Output format |
-| `limit` | integer | 1-500 | Max items (on list-type tools only) |
+### td_search
 
-These are client-side formatting — they control how the MCP server formats the response text, not what data TD returns.
+Search for text across all code (DAT scripts), parameter expressions, and string parameter values in the TD project. Returns TSV: path, kind (code/expression/parameter/ref), line, text. JSON when context>0. Words are OR-matched. Use quotes for exact phrases: 'GetLogin "op('login')"'. Use count_only=true to quickly check if something is referenced without fetching full results.
 
----
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `query` | string | yes | Search query. Multiple words = OR (any match). Wrap in quotes for exact phrase. Example: 'GetLogin getLogin' finds either. |
+| `root` | string | no | Root operator path to search from. Default '/project1'. |
+| `scope` | `all` / `code` / `editable` / `expressions` / `parameters` | no | What to search. 'code' = DAT scripts only (fast, ~0.05s). 'editable' = only editable code (skips inherited/ref DATs). 'expressions' = parameter expressions only. 'parameters' = string parameter values only. 'all' = everything (slow, ~1.5s due to parameter scan). Default 'all'. |
+| `case_sensitive` | boolean | no | Case-sensitive matching. Default false. |
+| `max_results` | number | no | Maximum results to return. Default 50. |
+| `context` | number | no | Lines to show before/after each code match. Saves td_read_dat calls. Default 0. |
+| `count_only` | boolean | no | Return only match count, not results. Fast existence check. |
+| `max_depth` | number | no | Max recursion depth from root. Default unlimited. |
 
-## Tool 1: describe_td_tools
+### td_navigate_to
 
-**Purpose:** Meta-tool — lists all available TouchDesigner MCP tools with descriptions and parameters.
+Navigate the TouchDesigner Network Editor viewport to show a specific operator. Opens the operator's parent network and centers the view on it. Use this to show the user where a problem is, or to navigate to an operator before modifying it.
 
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `filter` | string | No | Keyword to filter tools by name, description, or parameter |
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the operator to navigate to, e.g. '/project1/noise1' |
 
-**Example:** Find tools related to node creation
-```
-describe_td_tools(filter="create")
-```
+## Operator Inspection
 
-**Note:** This tool runs entirely in the MCP server — it does NOT contact TouchDesigner. Use it to discover what's available.
+### td_get_operator_info
 
----
+Get information about a specific operator (node) in TouchDesigner (TD). detail='summary': connections, non-default pars, expressions, CHOP channels (compact). detail='full': all of the above PLUS every parameter with value/default/label.
 
-## Tool 2: get_td_info
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Full path to the operator, e.g. '/project1/noise1' |
+| `detail` | `summary` / `full` | no | Level of detail. 'summary' = connections, expressions, non-default pars, custom pars (pulse marked), CHOP channels. 'full' = summary + all parameters. Default 'full'. |
 
-**Purpose:** Get TouchDesigner server information (version, OS, build).
+### td_get_operators_info
 
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
+Get information about multiple operators in one call. Returns an array of operator info objects. Use instead of calling td_get_operator_info multiple times.
 
-**Example:** Check TD is running and get version
-```
-get_td_info()
-```
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `paths` | array | yes | Array of full operator paths, e.g. ['/project1/null1', '/project1/null2'] |
+| `detail` | `summary` / `full` | no | Level of detail. Default 'summary'. |
 
-**Returns:** TD version, build number, OS name/version, MCP API version.
+### td_get_par_info
 
-**Use this first** to verify the connection is working before building networks.
+Get parameter names and details for a TouchDesigner operator type. Without specific pars: returns compact list of all parameters with their names, types, and menu options. With pars: returns full details (help text, menu values, style) for specific parameters. Use this when you need to know exact parameter names before setting them.
 
----
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `op_type` | string | yes | TD operator type name, e.g. 'noiseTOP', 'blurTOP', 'lfoCHOP', 'compositeTOP' |
+| `pars` | array | no | Optional list of specific parameter names to get full details for |
 
-## Tool 3: execute_python_script
+## Parameter Setting
 
-**Purpose:** Execute arbitrary Python code inside TouchDesigner's Python environment.
+### td_set_operator_pars
 
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `script` | string | **Yes** | Python code to execute |
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
+Set parameters and flags on an operator in TouchDesigner (TD). Safer than td_execute_python for simple parameter changes. Can set values, toggle bypass/viewer, without writing Python code.
 
-**Available globals in the script:**
-- `op` — find operators by path
-- `ops` — find multiple operators by pattern
-- `me` — the WebServer DAT running the script
-- `parent` — me.parent()
-- `project` — root project component
-- `td` — the full td module
-- `result` — set this to explicitly return a value
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the operator |
+| `parameters` | object | no | Key-value pairs of parameters to set |
+| `bypass` | boolean | no | Set bypass state of the operator (not available on COMPs) |
+| `viewer` | boolean | no | Set viewer state of the operator |
+| `allowCooking` | boolean | no | Set cooking flag on a COMP. When False, internal network stops cooking (0 CPU). COMP-only. |
 
-**Execution behavior:**
-- Single-line scripts: tries `eval()` first (returns value), falls back to `exec()`
-- Multi-line scripts: uses `exec()` always
-- stdout/stderr are captured and returned separately
-- If `result` is not set, tries to evaluate the last expression as the return value
+## Data Read/Write
 
-**Examples:**
+### td_read_dat
 
-```python
-# Simple query
-execute_python_script(script="op('/project1/noise1').par.seed.val")
-# Returns: {"result": 42, "stdout": "", "stderr": ""}
+Read the text content of a DAT operator in TouchDesigner (TD). Returns content with line numbers. Use to read scripts, extensions, GLSL shaders, table data.
 
-# Multi-line script
-execute_python_script(script="""
-nodes = op('/project1').findChildren(type=TOP)
-result = [{'name': n.name, 'type': n.OPType} for n in nodes]
-""")
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the DAT operator |
+| `start_line` | integer | no | Start line (1-based). Omit to read from beginning. |
+| `end_line` | integer | no | End line (inclusive). Omit to read to end. |
 
-# Connect two operators
-execute_python_script(script="op('/project1/noise1').outputConnectors[0].connect(op('/project1/level1'))")
+### td_write_dat
 
-# Create and configure in one script
-execute_python_script(script="""
-parent = op('/project1')
-n = parent.create(noiseTop, 'my_noise')
-n.par.seed.val = 42
-n.par.monochrome.val = True
-n.par.resolutionw.val = 1920
-n.par.resolutionh.val = 1080
-result = {'path': n.path, 'type': n.OPType}
-""")
+Write or patch text content of a DAT operator in TouchDesigner (TD). Can do full replacement or StrReplace-style patching (old_text -> new_text). Use for editing scripts, extensions, shaders. Does NOT reinit extensions automatically.
 
-# Batch wire a chain
-execute_python_script(script="""
-chain = ['noise1', 'level1', 'blur1', 'composite1', 'null_out']
-for i in range(len(chain) - 1):
-    src = op(f'/project1/{chain[i]}')
-    dst = op(f'/project1/{chain[i+1]}')
-    if src and dst:
-        src.outputConnectors[0].connect(dst)
-result = 'Wired chain: ' + ' -> '.join(chain)
-""")
-```
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the DAT operator |
+| `text` | string | no | Full replacement text. Use this OR old_text+new_text, not both. |
+| `old_text` | string | no | Text to find and replace (must be unique in the DAT) |
+| `new_text` | string | no | Replacement text |
+| `replace_all` | boolean | no | If true, replaces ALL occurrences of old_text (default: false, requires unique match) |
 
-**When to use:** Wiring connections, complex logic, batch operations, querying state that other tools don't cover. This is the most powerful and flexible tool.
+### td_read_chop
 
----
+Read CHOP channel sample data. Returns channel values as arrays. Use when you need the actual sample values (animation curves, lookup tables, waveforms), not just the summary from td_get_operator_info.
 
-## Tool 4: create_td_node
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the CHOP operator |
+| `channels` | array | no | Channel names to read. Omit to read all channels. |
+| `start` | integer | no | Start sample index (0-based). Omit to read from beginning. |
+| `end` | integer | no | End sample index (inclusive). Omit to read to end. |
 
-**Purpose:** Create a new operator in TouchDesigner.
+### td_read_textport
 
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `parentPath` | string | **Yes** | Path to parent (e.g., `/project1`) |
-| `nodeType` | string | **Yes** | Operator type (e.g., `noiseTop`, `mathChop`) |
-| `nodeName` | string | No | Custom name (auto-generated if omitted) |
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
+Read the last N lines from the TouchDesigner (TD) log/textport (console output). Use this to see errors, warnings and print output from TD.
 
-**Examples:**
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `lines` | integer | no | Number of recent lines to return |
 
-```
-create_td_node(parentPath="/project1", nodeType="noiseTop", nodeName="bg_noise")
-create_td_node(parentPath="/project1", nodeType="compositeTop")  # auto-named
-create_td_node(parentPath="/project1/audio_chain", nodeType="audiospectrumChop", nodeName="spectrum")
-```
+### td_clear_textport
 
-**Returns:** Node summary with id, name, path, opType, and all default parameter values.
+Clear the MCP textport log buffer. Use this before starting a debug session or an edit-run-check loop to keep td_read_textport output focused and minimal.
 
-**Node type naming convention:** camelCase family suffix — `noiseTop`, `mathChop`, `gridSop`, `tableDat`, `phongMat`, `geometryComp`. See `references/operators.md` for the full list.
+No parameters (other than optional `target_instance`).
 
----
+## Visual Capture
 
-## Tool 5: delete_td_node
+### td_get_screenshot
 
-**Purpose:** Delete an existing operator.
+Get a screenshot of an operator's viewer in TouchDesigner (TD). Saves the image to a file and returns the file path. Use your file-reading tool to view the image. Shows what the operator looks like in its viewer (TOP output, CHOP waveform graph, SOP geometry, DAT table, parameter UI, etc). Use this to visually inspect any operator, or to generate images via TD for use in your project. TWO-STEP ASYNC USAGE: Step 1 — call with 'path' to start: returns {'status': 'pending', 'requestId': '...'}. Step 2 — call with 'request_id' to retrieve: returns {'file': '/tmp/.../opname_id.jpg'}. Then read the file to see the image. If step 2 still returns pending, make one other tool call then retry.
 
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `nodePath` | string | **Yes** | Absolute path to node (e.g., `/project1/noise1`) |
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | no | Full operator path to screenshot, e.g. '/project1/noise1'. Required for step 1. |
+| `request_id` | string | no | Request ID from step 1 to retrieve the completed screenshot. |
+| `max_size` | integer | no | Max pixel size for the longer side (default 512). Use 0 for original operator resolution (useful for pixel-accurate UI work). Higher values (e.g. 1024) for more detail. |
+| `output_path` | string | no | Optional absolute path where the image should be saved (e.g. '/Users/me/project/render.png'). If omitted, saved to /tmp/pisang_mcp/screenshots/. Use absolute paths — TD's working directory may differ from the agent's. |
+| `as_top` | boolean | no | If true, captures the operator directly as a TOP (bypasses the viewer renderer), preserving alpha/transparency. Only works for TOP operators — if the target is not a TOP, falls back to the viewer automatically. Use this when you need a clean PNG with alpha, e.g. to save a generated image for use in another project. |
+| `format` | `auto` / `jpg` / `png` | no | Image format. 'auto' (default): JPEG for viewer mode, PNG for as_top=true. 'jpg': always JPEG (smaller). 'png': always PNG (lossless). |
 
-**Example:**
+### td_get_screenshots
 
-```
-delete_td_node(nodePath="/project1/noise1")
-```
+Get screenshots of multiple operators in one batch. Saves images to files and returns file paths. Use your file-reading tool to view images. TWO-STEP ASYNC USAGE: Step 1 — call with 'paths' array to start: returns {'status': 'pending', 'batchId': '...', 'total': N}. Step 2 — call with 'batch_id' to retrieve: returns {'files': [{op, file}, ...]}. Then read the files to see the images. If still processing returns {'status': 'pending', 'ready': K, 'total': N}.
 
-**Returns:** Confirmation with the deleted node's summary (captured before deletion).
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `paths` | array | no | List of full operator paths to screenshot. Required for step 1. |
+| `batch_id` | string | no | Batch ID from step 1 to retrieve completed screenshots. |
+| `max_size` | integer | no | Max pixel size for longer side (default 512). Use 0 for original resolution. |
+| `as_top` | boolean | no | If true, captures TOP operators directly (preserves alpha). Non-TOP operators fall back to viewer. |
+| `output_dir` | string | no | Optional absolute path to a directory. Each screenshot saved as <opname>.jpg or .png inside it and kept on disk. |
+| `format` | `auto` / `jpg` / `png` | no | Image format. 'auto' (default): JPEG for viewer mode, PNG for as_top=true. 'jpg': always JPEG (smaller). 'png': always PNG (lossless). |
 
----
+### td_get_screen_screenshot
 
-## Tool 6: get_td_nodes
+Capture a screenshot of the actual screen via TD's screenGrabTOP. Saves the image to a file and returns the file path. Use your file-reading tool to view the image. Unlike td_get_screenshot (operator viewer), this shows what the user literally sees on their monitor — TD windows, UI panels, everything. Use when simulating mouse/keyboard input to verify what happened on screen. Workflow: td_get_screen_screenshot → read file → td_input_execute → wait idle → td_get_screen_screenshot again. TWO-STEP ASYNC: Step 1 — call without request_id: returns {'status':'pending','requestId':'...'}. Step 2 — call with request_id: returns {'file': '/tmp/.../screen_id.jpg', 'info': '...metadata...'}. Then read the file to see the image. The requestId also stays usable with td_screen_point_to_global for later coordinate lookup. crop_x/y/w/h are in ACTUAL SCREEN PIXELS (not image pixels). Crops exceeding screen bounds are auto-clamped. SMART DEFAULTS: max_size is auto when omitted — 1920 for full screen (good overview), max(crop_w,crop_h) for cropped (guarantees 1:1 scale). At 1:1 scale: screen_coord = crop_origin + image_pixel. Otherwise use the formula from metadata.
 
-**Purpose:** List operators under a path with optional filtering.
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `request_id` | string | no | Request ID from step 1 to retrieve the completed screenshot. |
+| `max_size` | integer | no | Max pixel size for the longer side. Auto when omitted: 1920 for full screen, max(crop_w,crop_h) for cropped (1:1). Set explicitly to override. |
+| `crop_x` | integer | no | Left edge in screen pixels. |
+| `crop_y` | integer | no | Top edge in screen pixels (y=0 at top of screen). |
+| `crop_w` | integer | no | Width in pixels. |
+| `crop_h` | integer | no | Height in pixels. |
+| `display` | integer | no | Screen index (default 0 = primary display). |
 
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `parentPath` | string | **Yes** | Parent path (e.g., `/project1`) |
-| `pattern` | string | No | Glob pattern for name filtering (default: `*`) |
-| `includeProperties` | boolean | No | Include full parameter values (default: false) |
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
-| `limit` | integer | No | Max items (1-500) |
+## Context & Focus
 
-**Examples:**
+### td_get_focus
 
-```
-# List all direct children of /project1
-get_td_nodes(parentPath="/project1")
+Get the current user focus in TouchDesigner (TD): which network is open, selected operators, current operator, and rollover (what is under the mouse cursor). IMPORTANT: when the user says 'this operator' or 'вот этот', they mean the SELECTED/CURRENT operator, NOT the rollover. Rollover is just incidental mouse position and should be ignored for intent. Pass screenshots=true to immediately start a screenshot batch for all selected operators — response includes a 'screenshots' field with batchId; retrieve with td_get_screenshots(batch_id=...).
 
-# Find all noise operators
-get_td_nodes(parentPath="/project1", pattern="noise*")
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `screenshots` | boolean | no | If true, start a screenshot batch for all selected operators. Retrieve with td_get_screenshots(batch_id=...). |
+| `max_size` | integer | no | Max screenshot size when screenshots=true (default 512). |
+| `as_top` | boolean | no | Passed to the screenshot batch when screenshots=true. |
 
-# Get full parameter details
-get_td_nodes(parentPath="/project1", pattern="*", includeProperties=true, limit=20)
-```
+### td_get_errors
 
-**Returns:** List of node summaries. With `includeProperties=false` (default): id, name, path, opType only. With `includeProperties=true`: full parameter values included.
+Find errors and warnings in TouchDesigner (TD) operators. Checks operator errors, warnings, AND broken parameter expressions (missing channels, bad references, etc). Also includes recent script errors from the log (tracebacks), grouped and deduplicated — e.g. 1000 identical mouse-move errors shown as ×1000 with one entry. If path is given, checks that operator and its children. If no path, checks the currently open network. Use '/' for entire project. Use when user says something is broken, has errors, red nodes, горит ошибка, etc. TIP: call td_clear_textport before reproducing an error to keep log focused. TIP: combine with td_get_perf when user says 'тупит/лагает' to check both errors and performance.
 
----
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | no | Path to check. If omitted, checks the current network. Use '/' to scan entire project. |
+| `recursive` | boolean | no | Check children recursively (default true) |
+| `include_log` | boolean | no | Include recent script errors from log, grouped by unique signature (default true). Use td_clear_textport before reproducing an error to keep results focused. |
 
-## Tool 7: get_td_node_parameters
+### td_get_perf
 
-**Purpose:** Get detailed parameters of a specific node.
+Get performance data from TouchDesigner (TD). Returns TSV: header with fps/budget/memory summary, then slowest operators sorted by cook time. Columns: path, OPType, cpu/cook(ms), gpu/cook(ms), cpu/s, gpu/s, rate, flags. Use when user reports lag, low FPS, slow performance, тупит, тормозит.
 
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `nodePath` | string | **Yes** | Node path (e.g., `/project1/noise1`) |
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
-| `limit` | integer | No | Max parameters (1-500) |
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | no | Path to profile. If omitted, profiles the current network. Use '/' for entire project. |
+| `top` | integer | no | Number of slowest operators to return |
 
-**Example:**
+## Documentation
 
-```
-get_td_node_parameters(nodePath="/project1/noise1")
-```
+### td_get_docs
 
-**Returns:** All parameter name-value pairs for the node. Use this to discover available parameters before calling update_td_node_parameters.
+Get comprehensive documentation on a TouchDesigner topic. Unlike td_get_hints (compact tips), this returns in-depth reference material. Call without arguments to see available topics with descriptions. Call with a topic name to get the full documentation.
 
----
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `topic` | string | no | Topic to get docs for. Omit to list available topics. |
 
-## Tool 8: get_td_node_errors
+### td_get_hints
 
-**Purpose:** Check for errors on a node and all its descendants (recursive).
+Get TouchDesigner tips and common patterns for a topic. Call this BEFORE creating operators or writing TD Python code to learn correct parameter names, expressions, and idiomatic approaches. Available topics: animation, noise, connections, parameters, scripting, construction, ui_analysis, panel_layout, screenshots, input_simulation, undo. IMPORTANT: always call with topic='construction' before building multi-operator setups to get correct TOP/CHOP parameter names, compositeTOP input ordering, and layout guidelines. IMPORTANT: always call with topic='input_simulation' before using td_input_execute to learn focus recovery, coordinate systems, and testing workflow.
 
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `nodePath` | string | **Yes** | Absolute path to inspect (e.g., `/project1`) |
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
-| `limit` | integer | No | Max error items (1-500) |
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `topic` | string | yes | Topic to get hints for. Available: 'animation', 'noise', 'connections', 'parameters', 'scripting', 'construction', 'ui_analysis', 'panel_layout', 'screenshots', 'input_simulation', 'undo', 'networking', 'all' |
 
-**Examples:**
+### td_agents_md
 
-```
-# Check entire project for errors
-get_td_node_errors(nodePath="/project1")
+Read, write, or update the agents_md documentation inside a COMP container. agents_md is a Markdown textDAT describing the container's purpose, structure, and conventions. action='read': returns content + staleness check (compares documented children vs live state). action='update': refreshes auto-generated sections (children list, connections) from live state, preserves human-written sections. action='write': sets full content, creates the DAT if missing.
 
-# Check a specific chain
-get_td_node_errors(nodePath="/project1/audio_chain")
-```
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the COMP container |
+| `action` | `read` / `update` / `write` | yes | read=get content+staleness, update=refresh auto sections, write=set content |
+| `content` | string | no | Markdown content (only for action='write') |
 
-**Returns:** Error count, hasErrors boolean, and list of errors each with nodePath, nodeName, opType, and error message.
+## Input Automation
 
-**Always call this after building a network** to catch wiring mistakes, missing references, and configuration errors.
+### td_input_execute
 
----
+Send a sequence of mouse/keyboard commands to TouchDesigner. Commands execute sequentially with smooth bezier movement. Returns immediately — poll td_input_status() until status='idle' before proceeding. Command types: 'focus' — bring TD to foreground. 'move' — smooth mouse move: {type,x,y,duration,easing}. 'click' — click: {type,x,y,button,hold,duration,easing}. hold=seconds to hold down. duration=smooth move before click. 'dblclick' — double click: {type,x,y,duration}. 'mousedown'/'mouseup' — {type,x,y,button}. 'key' — keystroke: {type,keys} e.g. 'ctrl+z','tab','escape','shift+f5'. Requires Accessibility permission on Mac. 'type' — human-like typing: {type,text,wpm,variance} — layout-independent Unicode, variable timing. 'wait' — pause: {type,duration}. 'scroll' — {type,x,y,dx,dy,steps} — human-like scroll: moves mouse to (x,y) first, then sends dy (vertical, +up) and dx (horizontal, +right) as multiple ticks with natural timing. steps=4 by default. Mouse commands may include coord_space='logical' (default) or coord_space='physical'. On macOS, 'physical' means actual screen pixels from td_get_screen_screenshot and is converted to CGEvent logical coords automatically. Top-level coord_space applies to commands that do not override it. on_error: 'stop' (default) clears queue on error; 'continue' skips failed command. IMPORTANT: call td_get_hints('input_simulation') before first use to learn focus recovery, coordinate systems, and testing workflow.
 
-## Tool 9: update_td_node_parameters
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `commands` | array | yes | List of command dicts to execute in sequence. |
+| `coord_space` | `logical` / `physical` | no | Default coordinate space for mouse commands that do not specify their own coord_space. 'logical' uses CGEvent coords directly. 'physical' uses actual screen pixels from td_get_screen_screenshot and is auto-converted on macOS. |
+| `on_error` | `stop` / `continue` | no | What to do on error. Default 'stop'. |
 
-**Purpose:** Update parameters on an existing node.
+### td_input_status
 
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `nodePath` | string | **Yes** | Path to node (e.g., `/project1/noise1`) |
-| `properties` | object | **Yes** | Key-value pairs to update (e.g., `{"seed": 42, "monochrome": true}`) |
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
+Get current status of the td_input command queue. Poll this after td_input_execute until status='idle'. Returns: status ('idle'/'running'), current command, queue_remaining, last error.
 
-**Examples:**
+No parameters (other than optional `target_instance`).
 
-```
-# Set noise parameters
-update_td_node_parameters(
-    nodePath="/project1/noise1",
-    properties={"seed": 42, "monochrome": false, "period": 4.0, "harmonics": 3,
-                "resolutionw": 1920, "resolutionh": 1080}
-)
+### td_input_clear
 
-# Set a file path
-update_td_node_parameters(
-    nodePath="/project1/moviefilein1",
-    properties={"file": "/Users/me/Videos/clip.mp4", "play": true}
-)
+Clear the td_input command queue and stop current execution immediately.
 
-# Set compositing mode
-update_td_node_parameters(
-    nodePath="/project1/composite1",
-    properties={"operand": 0}  # 0=Over, 1=Under, 3=Add, 18=Multiply, 27=Screen
-)
-```
+No parameters (other than optional `target_instance`).
 
-**Returns:** List of successfully updated properties and any that failed (with reasons). Raises error if zero properties were updated.
+### td_op_screen_rect
 
-**Parameter value types:** Floats, ints, booleans, and strings are all accepted. For menu parameters, use either the string label or the integer index.
+Get the screen coordinates of an operator node in the network editor. Returns {x,y,w,h,cx,cy} where cx,cy is the center for clicking. Use this to find where to click on a specific operator. Only works if the operator's parent network is currently open in a network editor pane.
 
----
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Full path to the operator, e.g. '/project1/myComp/noise1' |
 
-## Tool 10: exec_node_method
+### td_click_screen_point
 
-**Purpose:** Call a Python method directly on a specific node.
+Resolve a point inside a previous td_get_screen_screenshot result and click it. Pass the screenshot request_id plus either normalized u/v or image_x/image_y. Queues a td_input click using physical screen coordinates, so it works directly with screenshot-derived points. Use duration/easing to control the cursor travel before the click.
 
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `nodePath` | string | **Yes** | Path to node |
-| `method` | string | **Yes** | Method name to call |
-| `args` | array | No | Positional arguments (strings, numbers, booleans) |
-| `kwargs` | object | No | Keyword arguments |
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `request_id` | string | yes | Request ID originally returned by td_get_screen_screenshot. |
+| `u` | number | no | Normalized horizontal position inside the screenshot region (0=left, 1=right). Use with v. |
+| `v` | number | no | Normalized vertical position inside the screenshot region (0=top, 1=bottom). Use with u. |
+| `image_x` | number | no | Horizontal pixel coordinate inside the returned screenshot image. Use with image_y. |
+| `image_y` | number | no | Vertical pixel coordinate inside the returned screenshot image. Use with image_x. |
+| `button` | `left` / `right` / `middle` | no | Mouse button to click. Default left. |
+| `hold` | number | no | Seconds to hold the mouse button down before releasing. |
+| `duration` | number | no | Seconds for the cursor to travel to the target before clicking. |
+| `easing` | `linear` / `ease-in` / `ease-out` / `ease-in-out` | no | Cursor movement easing for the pre-click travel. |
+| `focus` | boolean | no | If true, bring TD to the front before clicking and wait briefly for focus to settle. |
 
-**Examples:**
+### td_screen_point_to_global
 
-```
-# Get all children of a component
-exec_node_method(nodePath="/project1", method="findChildren")
+Convert a point inside a previous td_get_screen_screenshot result into absolute screen coordinates. Pass the screenshot request_id plus either normalized u/v (0..1 inside that screenshot region) or image_x/image_y in returned image pixels. Returns absolute physical screen coordinates, logical coordinates, and a ready-to-use td_input_execute payload. Metadata is kept for the most recent screen screenshots so multiple agents can resolve points later by request_id.
 
-# Find specific children
-exec_node_method(nodePath="/project1", method="findChildren",
-                 kwargs={"name": "noise*", "depth": 1})
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `request_id` | string | yes | Request ID originally returned by td_get_screen_screenshot. |
+| `u` | number | no | Normalized horizontal position inside the screenshot region (0=left, 1=right). Use with v. |
+| `v` | number | no | Normalized vertical position inside the screenshot region (0=top, 1=bottom). Use with u. |
+| `image_x` | number | no | Horizontal pixel coordinate inside the returned screenshot image. Use with image_y. |
+| `image_y` | number | no | Vertical pixel coordinate inside the returned screenshot image. Use with image_x. |
 
-# Get node errors
-exec_node_method(nodePath="/project1/noise1", method="errors")
+## System
 
-# Get node warnings
-exec_node_method(nodePath="/project1/noise1", method="warnings")
+### td_list_instances
 
-# Save a component as .tox
-exec_node_method(nodePath="/project1/myContainer", method="save",
-                 args=["/path/to/component.tox"])
-```
+List all running TouchDesigner (TD) instances with active MCP servers. Returns port, project name, PID, and instanceId for each instance. Call this at the start of every conversation to discover available instances and choose which one to work with. instanceId is stable for the lifetime of a TD process and is used as target_instance in all other tool calls.
 
-**Returns:** Processed return value of the method call. TD operators are serialized to their path strings, iterables to lists, etc.
+No parameters (other than optional `target_instance`).
 
----
+### td_project_quit
 
-## Tool 11: get_td_classes
+Save and/or close the current TouchDesigner (TD) project. Can save before closing. Reports if project has unsaved changes. To close a different instance, pass target_instance=instanceId. WARNING: this will shut down the MCP server on that instance.
 
-**Purpose:** List available TouchDesigner Python classes and modules.
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `save` | boolean | no | Save the project before closing. Default true. |
+| `force` | boolean | no | Force close without save dialog. Default false. |
 
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
-| `limit` | integer | No | Max items (default: 50) |
+### td_reinit_extension
 
-**Example:**
+Reinitialize an extension on a COMP in TouchDesigner (TD). Call this AFTER finishing all code edits via td_write_dat to apply changes. Do NOT call after every small edit - batch your changes first.
 
-```
-get_td_classes(limit=100)
-```
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the COMP with the extension |
 
-**Returns:** List of class/module names and their docstrings from the td module. Useful for discovering what's available in TD's Python environment.
+### td_dev_log
 
----
+Read the last N entries from the MCP dev log. Only available when Devmode is enabled. Shows request/response history.
 
-## Tool 12: get_td_class_details
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `count` | integer | no | Number of recent log entries to return |
 
-**Purpose:** Get methods and properties of a specific TD Python class.
+### td_clear_dev_log
 
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `className` | string | **Yes** | Class name (e.g., `noiseTop`, `OP`, `COMP`) |
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
-| `limit` | integer | No | Max methods/properties (default: 30) |
+Clear the current MCP dev log by closing the old file and starting a fresh one. Only available when Devmode is enabled.
 
-**Examples:**
+No parameters (other than optional `target_instance`).
 
-```
-# Inspect the noiseTop class
-get_td_class_details(className="noiseTop")
+### td_test_session
 
-# Inspect the base OP class (all operators inherit from this)
-get_td_class_details(className="OP", limit=50)
+Manage test sessions, bug reports, and conversation export. IMPORTANT: Do NOT proactively suggest exporting chat or submitting reports. These are tools for specific situations: - export_chat / submit_report: ONLY when the user encounters a BUG with the plugin or TouchDesigner and wants to report it, or when the user explicitly asks to export the conversation. Never suggest this at session end or as routine action. USER PHRASES → ACTIONS: 'разбор тестовых сессий' / 'analyze test sessions' → list, then pull, read meta.json → index.jsonl → calls/. 'разбор репортов' / 'analyze user reports' → list with session='user', then pull by name. 'экспортируй чат' / 'export chat' → (1) export_chat_id → marker, (2) export_chat with session=marker. 'сообщи о проблеме' / 'report bug' → export chat, review for privacy, then submit_report with summary + tags + result_op=file_path. ACTIONS: export_chat_id | export_chat | submit_report | start | note | import_chat | end | list | pull. list: default=auto-detect repo. session='user' for user_reports (dev only). pull: auto-searches both repos. Auto-detects dev vs user Hub access.
 
-# Inspect COMP (component) class
-get_td_class_details(className="COMP")
-```
-
-**Returns:** Class name, type, description, methods (name + description + type), and properties (name + description + type).
-
----
-
-## Tool 13: get_td_module_help
-
-**Purpose:** Retrieve Python help() text for any TD module, class, or function.
-
-**Parameters:**
-| Name | Type | Required | Description |
-|------|------|----------|-------------|
-| `moduleName` | string | **Yes** | Module/class name (e.g., `noiseCHOP`, `tdu`, `td.OP`) |
-| `detailLevel` | string | No | Response verbosity |
-| `responseFormat` | string | No | Output format |
-
-**Examples:**
-
-```
-# Get help for the noise CHOP class
-get_td_module_help(moduleName="noiseCHOP")
-
-# Get help for the tdu utilities module
-get_td_module_help(moduleName="tdu")
-
-# Dotted name resolution works
-get_td_module_help(moduleName="td.OP")
-```
-
-**Returns:** Full Python help() text output, cleaned of backspace characters.
-
----
-
-## Workflow: Building a Complete Network
-
-Typical sequence of tool calls to build a project:
-
-1. `get_td_info` — verify connection
-2. `get_td_nodes(parentPath="/project1")` — see what already exists
-3. `create_td_node` (multiple) — create all operators
-4. `update_td_node_parameters` (multiple) — configure each operator
-5. `execute_python_script` — wire all connections in one batch script
-6. `get_td_node_errors(nodePath="/project1")` — check for problems
-7. `get_td_node_parameters` — verify specific nodes if needed
-8. Iterate: adjust parameters, add operators, fix errors
-
-## TD Documentation MCP Server Tools
-
-The bottobot/touchdesigner-mcp-server provides 21 reference/knowledge tools (no running TD needed):
-
-| Tool | Purpose |
-|------|---------|
-| `get_operator` | Get full documentation for a specific operator |
-| `search_operators` | Search operators by keyword |
-| `list_operators` | List all operators (filterable by family) |
-| `compare_operators` | Compare two operators side by side |
-| `get_operator_examples` | Get usage examples for an operator |
-| `suggest_workflow` | Get workflow suggestions for a task |
-| `get_tutorial` | Get a full TD tutorial |
-| `list_tutorials` | List available tutorials |
-| `search_tutorials` | Search tutorial content |
-| `get_python_api` | Get Python API class documentation |
-| `search_python_api` | Search Python API |
-| `list_python_classes` | List all documented Python classes |
-| `get_version_info` | Get TD version release notes |
-| `list_versions` | List all documented TD versions |
-| `get_experimental_techniques` | Get advanced technique guides (GLSL, ML, generative, etc.) |
-| `search_experimental` | Search experimental techniques |
-| `get_glsl_pattern` | Get GLSL code patterns (SDF, color, math utilities) |
-| `get_operator_connections` | Get common operator wiring patterns |
-| `get_network_template` | Get complete network templates with Python generation scripts |
-| `get_experimental_build` | Get experimental build info |
-| `list_experimental_builds` | List experimental builds |
-
-This server contains 630 operator docs, 14 tutorials, 69 Python API classes, and 7 experimental technique categories with working code.
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `action` | `export_chat_id` / `export_chat` / `submit_report` / `start` / `note` / `import_chat` / `end` / `list` / `pull` | yes | Action: export_chat_id / export_chat / submit_report / start / note / import_chat / end / list / pull |
+| `prompt` | string | no | (start) The test prompt/task description |
+| `tags` | array | no | (start) Tags for categorization, e.g. ['ui', 'layout'] |
+| `text` | string | no | (note) Observation text. (import_chat) Full conversation text. |
+| `outcome` | `success` / `partial` / `failure` | no | (end) Result: success / partial / failure |
+| `summary` | string | no | (end) Brief summary of what happened |
+| `result_op` | string | no | (end) Path to operator to save as result.tox |
+| `session` | string | no | (pull) Session name or substring to download |
diff --git a/skills/creative/touchdesigner/references/network-patterns.md b/skills/creative/touchdesigner/references/network-patterns.md
index 7afa2415022..cb04fd54d57 100644
--- a/skills/creative/touchdesigner/references/network-patterns.md
+++ b/skills/creative/touchdesigner/references/network-patterns.md
@@ -20,32 +20,32 @@ Audio File In CHOP -> Audio Spectrum CHOP -> Math CHOP (scale)
 **MCP Build Sequence:**
 
 ```
-1. create_td_node(parentPath="/project1", nodeType="audiofileinChop", nodeName="audio_in")
-2. create_td_node(parentPath="/project1", nodeType="audiospectrumChop", nodeName="spectrum")
-3. create_td_node(parentPath="/project1", nodeType="mathChop", nodeName="spectrum_scale")
-4. create_td_node(parentPath="/project1", nodeType="noiseTop", nodeName="noise1")
-5. create_td_node(parentPath="/project1", nodeType="levelTop", nodeName="level1")
-6. create_td_node(parentPath="/project1", nodeType="feedbackTop", nodeName="feedback1")
-7. create_td_node(parentPath="/project1", nodeType="compositeTop", nodeName="comp1")
-8. create_td_node(parentPath="/project1", nodeType="nullTop", nodeName="out")
+1. td_create_operator(parent="/project1", type="audiofileinChop", name="audio_in")
+2. td_create_operator(parent="/project1", type="audiospectrumChop", name="spectrum")
+3. td_create_operator(parent="/project1", type="mathChop", name="spectrum_scale")
+4. td_create_operator(parent="/project1", type="noiseTop", name="noise1")
+5. td_create_operator(parent="/project1", type="levelTop", name="level1")
+6. td_create_operator(parent="/project1", type="feedbackTop", name="feedback1")
+7. td_create_operator(parent="/project1", type="compositeTop", name="comp1")
+8. td_create_operator(parent="/project1", type="nullTop", name="out")
 
-9. update_td_node_parameters(nodePath="/project1/audio_in",
+9. td_set_operator_pars(path="/project1/audio_in",
      properties={"file": "/path/to/music.wav", "play": true})
-10. update_td_node_parameters(nodePath="/project1/spectrum",
+10. td_set_operator_pars(path="/project1/spectrum",
      properties={"size": 512})
-11. update_td_node_parameters(nodePath="/project1/spectrum_scale",
+11. td_set_operator_pars(path="/project1/spectrum_scale",
      properties={"gain": 2.0, "postoff": 0.0})
-12. update_td_node_parameters(nodePath="/project1/noise1",
-     properties={"type": 1, "monochrome": false, "resolutionw": 1920, "resolutionh": 1080,
+12. td_set_operator_pars(path="/project1/noise1",
+     properties={"type": 1, "monochrome": false, "resolutionw": 1280, "resolutionh": 720,
                   "period": 4.0, "harmonics": 3, "amp": 1.0})
-13. update_td_node_parameters(nodePath="/project1/level1",
+13. td_set_operator_pars(path="/project1/level1",
      properties={"opacity": 0.95, "gamma1": 0.75})
-14. update_td_node_parameters(nodePath="/project1/feedback1",
+14. td_set_operator_pars(path="/project1/feedback1",
      properties={"top": "/project1/comp1"})
-15. update_td_node_parameters(nodePath="/project1/comp1",
+15. td_set_operator_pars(path="/project1/comp1",
      properties={"operand": 0})
 
-16. execute_python_script: """
+16. td_execute_python: """
 op('/project1/audio_in').outputConnectors[0].connect(op('/project1/spectrum'))
 op('/project1/spectrum').outputConnectors[0].connect(op('/project1/spectrum_scale'))
 op('/project1/noise1').outputConnectors[0].connect(op('/project1/level1'))
@@ -54,7 +54,7 @@ op('/project1/feedback1').outputConnectors[0].connect(op('/project1/comp1').inpu
 op('/project1/comp1').outputConnectors[0].connect(op('/project1/out'))
 """
 
-17. execute_python_script: """
+17. td_execute_python: """
 # Export spectrum values to drive noise parameters
 # This makes the noise react to audio frequencies
 op('/project1/noise1').par.seed.expr = "op('/project1/spectrum_scale')['chan1']"
@@ -85,7 +85,7 @@ Math CHOP: chanop=1 (Add channels), range1low=0, range1high=10
 Trigger CHOP: attack=0.02, peak=1.0, decay=0.3, sustain=0.0, release=0.1
 
 # Export to visual: Scale, brightness, or color intensity
-execute_python_script: "op('/project1/level1').par.brightness1.expr = \"1.0 + op('/project1/trigger1')['chan1'] * 0.5\""
+td_execute_python: "op('/project1/level1').par.brightness1.expr = \"1.0 + op('/project1/trigger1')['chan1'] * 0.5\""
 ```
 
 ### Pattern 3: Multi-Band Audio -> Multi-Layer Visuals
@@ -109,26 +109,26 @@ Audio In -> Spectrum -> Audio Band EQ (3 bands: bass, mid, treble)
                        Out
 ```
 
-### Pattern 3b: Audio-Reactive GLSL Fractal (Proven td_exec Recipe)
+### Pattern 3b: Audio-Reactive GLSL Fractal (Proven Recipe)
 
-Complete working recipe tested in TD 099. Plays an MP3, runs FFT, feeds spectrum as a texture into a GLSL shader where inner fractal reacts to bass, outer to treble.
+Complete working recipe. Plays an MP3, runs FFT, feeds spectrum as a texture into a GLSL shader where inner fractal reacts to bass, outer to treble.
 
 **Network:**
 ```
-AudioFileIn CHOP → AudioSpectrum CHOP → Math CHOP (boost) → Resample CHOP (256)
-                                                                  ↓
-                                                            CHOP To TOP (256x1 spectrum texture)
-                                                                  ↓
+AudioFileIn CHOP → AudioSpectrum CHOP (FFT=512, outlength=256)
+    → Math CHOP (gain=10) → CHOP To TOP (256x2 spectrum texture, dataformat=r)
+                                                                   ↓
 Constant TOP (time, rgba32float) → GLSL TOP (input 0=time, input 1=spectrum) → Null → MovieFileOut
                                                                                         ↓
 AudioFileIn CHOP → Audio Device Out CHOP                                          Record to .mov
 ```
 
-**Build via td_exec (one call per step for reliability):**
+**Build via td_execute_python (one call per step for reliability):**
 
 ```python
 # Step 1: Audio chain
-td_exec("""
+# td_execute_python script:
+td_execute_python(code="""
 root = op('/project1')
 audio = root.create(audiofileinCHOP, 'audio_in')
 audio.par.file = '/path/to/music.mp3'
@@ -148,7 +148,7 @@ resamp.par.timeslice = True
 resamp.par.rate = 256
 
 chop2top = root.create(choptoTOP, 'spectrum_tex')
-resamp.outputConnectors[0].connect(chop2top.inputConnectors[0])
+chop2top.par.chop = resamp  # CHOP To TOP has NO input connectors — use par.chop reference
 
 # Audio output (hear the music)
 aout = root.create(audiodeviceoutCHOP, 'audio_out')
@@ -156,8 +156,9 @@ audio.outputConnectors[0].connect(aout.inputConnectors[0])
 result = 'audio chain ok'
 """)
 
-# Step 2: Time driver (MUST be rgba32float — see pitfalls #12)
-td_exec("""
+# Step 2: Time driver (MUST be rgba32float — see pitfalls #6)
+# td_execute_python script:
+td_execute_python(code="""
 root = op('/project1')
 td = root.create(constantTOP, 'time_driver')
 td.par.format = 'rgba32float'
@@ -170,7 +171,8 @@ result = 'time ok'
 """)
 
 # Step 3: GLSL shader (write to /tmp, load from file)
-td_exec("""
+# td_execute_python script:
+td_execute_python(code="""
 root = op('/project1')
 glsl = root.create(glslTOP, 'audio_shader')
 glsl.par.outputresolution = 'custom'
@@ -188,7 +190,8 @@ result = 'glsl ok'
 """)
 
 # Step 4: Output + recorder
-td_exec("""
+# td_execute_python script:
+td_execute_python(code="""
 root = op('/project1')
 out = root.create(nullTOP, 'output')
 op('/project1/audio_shader').outputConnectors[0].connect(out.inputConnectors[0])
@@ -214,7 +217,7 @@ vec3 palette(float t) {
 
 void main() {
     // Input 0 = time (1x1 rgba32float constant)
-    // Input 1 = audio spectrum (256x1 CHOP To TOP)
+    // Input 1 = audio spectrum (256x2 CHOP To TOP, stereo — sample at y=0.25 for first channel)
     vec4 td = texture(sTD2DInputs[0], vec2(0.5));
     float t = td.r + td.g * 1000.0;
 
@@ -223,15 +226,15 @@ void main() {
     vec2 uv0 = uv;
     vec3 finalColor = vec3(0.0);
 
-    float bass = texture(sTD2DInputs[1], vec2(0.05, 0.0)).r;
-    float mids = texture(sTD2DInputs[1], vec2(0.25, 0.0)).r;
+    float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;
+    float mids = texture(sTD2DInputs[1], vec2(0.25, 0.25)).r;
 
     for (float i = 0.0; i < 4.0; i++) {
         uv = fract(uv * (1.4 + bass * 0.3)) - 0.5;
         float d = length(uv) * exp(-length(uv0));
 
         // Sample spectrum at distance: inner=bass, outer=treble
-        float freq = texture(sTD2DInputs[1], vec2(clamp(d * 0.5, 0.0, 1.0), 0.0)).r;
+        float freq = texture(sTD2DInputs[1], vec2(clamp(d * 0.5, 0.0, 1.0), 0.25)).r;
 
         vec3 col = palette(length(uv0) + i * 0.4 + t * 0.35);
         d = sin(d * (7.0 + bass * 4.0) + t * 1.5) / 8.0;
@@ -247,7 +250,7 @@ void main() {
 ```
 
 **Key insights from testing:**
-- `spectrum_tex` (CHOP To TOP) produces a 256x1 texture — x position = frequency
+- `spectrum_tex` (CHOP To TOP) produces a 256x2 texture — x position = frequency, y=0.25 for first channel
 - Sampling at `vec2(0.05, 0.0)` gets bass, `vec2(0.65, 0.0)` gets treble
 - Sampling based on pixel distance (`d * 0.5`) makes inner fractal react to bass, outer to treble
 - `bass * 0.3` in the `fract()` zoom makes the fractal breathe with kicks
@@ -269,26 +272,26 @@ Noise TOP -> Composite TOP -> Level TOP -> Null TOP (out)
 **MCP Build Sequence:**
 
 ```
-1. create_td_node(parentPath="/project1", nodeType="noiseTop", nodeName="seed_noise")
-2. create_td_node(parentPath="/project1", nodeType="compositeTop", nodeName="mix")
-3. create_td_node(parentPath="/project1", nodeType="transformTop", nodeName="evolve")
-4. create_td_node(parentPath="/project1", nodeType="feedbackTop", nodeName="fb")
-5. create_td_node(parentPath="/project1", nodeType="levelTop", nodeName="color_correct")
-6. create_td_node(parentPath="/project1", nodeType="nullTop", nodeName="out")
+1. td_create_operator(parent="/project1", type="noiseTop", name="seed_noise")
+2. td_create_operator(parent="/project1", type="compositeTop", name="mix")
+3. td_create_operator(parent="/project1", type="transformTop", name="evolve")
+4. td_create_operator(parent="/project1", type="feedbackTop", name="fb")
+5. td_create_operator(parent="/project1", type="levelTop", name="color_correct")
+6. td_create_operator(parent="/project1", type="nullTop", name="out")
 
-7. update_td_node_parameters(nodePath="/project1/seed_noise",
+7. td_set_operator_pars(path="/project1/seed_noise",
      properties={"type": 1, "monochrome": false, "period": 2.0, "amp": 0.3,
-                  "resolutionw": 1920, "resolutionh": 1080})
-8. update_td_node_parameters(nodePath="/project1/mix",
+                  "resolutionw": 1280, "resolutionh": 720})
+8. td_set_operator_pars(path="/project1/mix",
      properties={"operand": 27})  # 27 = Screen blend
-9. update_td_node_parameters(nodePath="/project1/evolve",
+9. td_set_operator_pars(path="/project1/evolve",
      properties={"sx": 1.003, "sy": 1.003, "rz": 0.5, "extend": 2})  # slight zoom + rotate, repeat edges
-10. update_td_node_parameters(nodePath="/project1/fb",
+10. td_set_operator_pars(path="/project1/fb",
      properties={"top": "/project1/mix"})
-11. update_td_node_parameters(nodePath="/project1/color_correct",
+11. td_set_operator_pars(path="/project1/color_correct",
      properties={"opacity": 0.98, "gamma1": 0.85})
 
-12. execute_python_script: """
+12. td_execute_python: """
 op('/project1/seed_noise').outputConnectors[0].connect(op('/project1/mix').inputConnectors[0])
 op('/project1/fb').outputConnectors[0].connect(op('/project1/evolve'))
 op('/project1/evolve').outputConnectors[0].connect(op('/project1/mix').inputConnectors[1])
@@ -319,15 +322,15 @@ Table DAT (instance data) -> DAT to CHOP -> Geometry COMP (instancing on) -> Ren
 **MCP Build Sequence:**
 
 ```
-1. create_td_node(parentPath="/project1", nodeType="tableDat", nodeName="instance_data")
-2. create_td_node(parentPath="/project1", nodeType="geometryComp", nodeName="geo1")
-3. create_td_node(parentPath="/project1/geo1", nodeType="sphereSop", nodeName="sphere")
-4. create_td_node(parentPath="/project1", nodeType="constMat", nodeName="mat1")
-5. create_td_node(parentPath="/project1", nodeType="cameraComp", nodeName="cam1")
-6. create_td_node(parentPath="/project1", nodeType="lightComp", nodeName="light1")
-7. create_td_node(parentPath="/project1", nodeType="renderTop", nodeName="render1")
+1. td_create_operator(parent="/project1", type="tableDat", name="instance_data")
+2. td_create_operator(parent="/project1", type="geometryComp", name="geo1")
+3. td_create_operator(parent="/project1/geo1", type="sphereSop", name="sphere")
+4. td_create_operator(parent="/project1", type="constMat", name="mat1")
+5. td_create_operator(parent="/project1", type="cameraComp", name="cam1")
+6. td_create_operator(parent="/project1", type="lightComp", name="light1")
+7. td_create_operator(parent="/project1", type="renderTop", name="render1")
 
-8. execute_python_script: """
+8. td_execute_python: """
 import random, math
 dat = op('/project1/instance_data')
 dat.clear()
@@ -346,15 +349,15 @@ for i in range(500):
     ])
 """
 
-9. update_td_node_parameters(nodePath="/project1/geo1",
+9. td_set_operator_pars(path="/project1/geo1",
      properties={"instancing": true, "instancechop": "",
                   "instancedat": "/project1/instance_data",
                   "material": "/project1/mat1"})
-10. update_td_node_parameters(nodePath="/project1/render1",
+10. td_set_operator_pars(path="/project1/render1",
      properties={"camera": "/project1/cam1", "geometry": "/project1/geo1",
                   "light": "/project1/light1",
-                  "resolutionw": 1920, "resolutionh": 1080})
-11. update_td_node_parameters(nodePath="/project1/cam1",
+                  "resolutionw": 1280, "resolutionh": 720})
+11. td_set_operator_pars(path="/project1/cam1",
      properties={"tz": 10})
 ```
 
@@ -369,7 +372,7 @@ Text DAT (GLSL code) -> GLSL TOP (resolution, dat reference) -> Feedback TOP
                          Level TOP (out)
 ```
 
-**Key GLSL code (write to Text DAT via execute_python_script):**
+**Key GLSL code (write to Text DAT via td_execute_python):**
 
 ```glsl
 // Gray-Scott reaction-diffusion
@@ -422,26 +425,26 @@ Movie File In TOP -> HSV Adjust TOP -> Level TOP -> Blur TOP -> Composite TOP ->
 **MCP Build Sequence:**
 
 ```
-1. create_td_node(parentPath="/project1", nodeType="moviefileinTop", nodeName="video_in")
-2. create_td_node(parentPath="/project1", nodeType="hsvadjustTop", nodeName="color")
-3. create_td_node(parentPath="/project1", nodeType="levelTop", nodeName="levels")
-4. create_td_node(parentPath="/project1", nodeType="blurTop", nodeName="blur")
-5. create_td_node(parentPath="/project1", nodeType="compositeTop", nodeName="overlay")
-6. create_td_node(parentPath="/project1", nodeType="textTop", nodeName="title")
-7. create_td_node(parentPath="/project1", nodeType="nullTop", nodeName="out")
+1. td_create_operator(parent="/project1", type="moviefileinTop", name="video_in")
+2. td_create_operator(parent="/project1", type="hsvadjustTop", name="color")
+3. td_create_operator(parent="/project1", type="levelTop", name="levels")
+4. td_create_operator(parent="/project1", type="blurTop", name="blur")
+5. td_create_operator(parent="/project1", type="compositeTop", name="overlay")
+6. td_create_operator(parent="/project1", type="textTop", name="title")
+7. td_create_operator(parent="/project1", type="nullTop", name="out")
 
-8. update_td_node_parameters(nodePath="/project1/video_in",
+8. td_set_operator_pars(path="/project1/video_in",
      properties={"file": "/path/to/video.mp4", "play": true})
-9. update_td_node_parameters(nodePath="/project1/color",
+9. td_set_operator_pars(path="/project1/color",
      properties={"hueoffset": 0.1, "saturationmult": 1.3})
-10. update_td_node_parameters(nodePath="/project1/levels",
+10. td_set_operator_pars(path="/project1/levels",
      properties={"brightness1": 1.1, "contrast": 1.2, "gamma1": 0.9})
-11. update_td_node_parameters(nodePath="/project1/blur",
+11. td_set_operator_pars(path="/project1/blur",
      properties={"sizex": 2, "sizey": 2})
-12. update_td_node_parameters(nodePath="/project1/title",
+12. td_set_operator_pars(path="/project1/title",
      properties={"text": "My Video", "fontsizex": 48, "alignx": 1, "aligny": 1})
 
-13. execute_python_script: """
+13. td_execute_python: """
 chain = ['video_in', 'color', 'levels', 'blur']
 for i in range(len(chain) - 1):
     op(f'/project1/{chain[i]}').outputConnectors[0].connect(op(f'/project1/{chain[i+1]}'))
@@ -460,7 +463,7 @@ Record the output to a file. **H.264/H.265 require a Commercial license** — us
 ```
 
 ```python
-# Build via td_exec():
+# Build via td_execute_python:
 root = op('/project1')
 
 # Always put a Null TOP before the recorder
@@ -488,25 +491,73 @@ rec.par.record = False
 - `TOP.save()` called rapidly always captures the same frame — use MovieFileOut for animation
 - See `pitfalls.md` #25-27 for full details
 
-### Pattern 8b: TD → External Pipeline (e.g., ASCII Video)
+### Pattern 8b: TD → External Pipeline (FFmpeg / Python / Post-Processing)
 
-Export TD visuals for use in another tool (ffmpeg, Python, ASCII art, etc.):
+Export TD visuals for use in another tool (ffmpeg, Python, ASCII art, etc.). This is the standard workflow when you need to composite TD output with external processing (ASCII conversion, Python shader chains, ML inference, etc.).
+
+**Step 1: Record to video in TD**
 
 ```python
-# 1. Record with MovieFileOut (MJPEG)
-rec.par.videocodec = 'mjpa'
+# Preferred: ProRes on macOS (lossless, Non-Commercial OK, ~55MB/s at 1280x720)
+rec.par.videocodec = 'prores'
+# Fallback for non-macOS: mjpa (Motion JPEG)
+# rec.par.videocodec = 'mjpa'
 rec.par.record = True
 # ... wait N seconds ...
 rec.par.record = False
-
-# 2. Extract frames with ffmpeg (outside TD)
-# ffmpeg -i /tmp/output.mov -vframes 120 /tmp/frames/frame_%06d.png
-
-# 3. Load frames in Python for processing
-# from PIL import Image
-# img = Image.open('/tmp/frames/frame_000001.png')
 ```
 
+**Step 2: Extract frames with ffmpeg**
+
+```bash
+# Extract all frames at 30fps
+ffmpeg -y -i /tmp/output.mov -vf 'fps=30' /tmp/frames/frame_%06d.png
+
+# Or extract a specific duration
+ffmpeg -y -i /tmp/output.mov -t 25 -vf 'fps=30' /tmp/frames/frame_%06d.png
+
+# Or extract specific frame range
+ffmpeg -y -i /tmp/output.mov -vf 'select=between(n\,0\,749)' -vsync vfr /tmp/frames/frame_%06d.png
+```
+
+**Step 3: Process frames in Python**
+
+```python
+from PIL import Image
+import os
+
+frames_dir = '/tmp/frames'
+output_dir = '/tmp/processed'
+os.makedirs(output_dir, exist_ok=True)
+
+for fname in sorted(os.listdir(frames_dir)):
+    if not fname.endswith('.png'):
+        continue
+    img = Image.open(os.path.join(frames_dir, fname))
+    # ... apply your processing ...
+    img.save(os.path.join(output_dir, fname))
+```
+
+**Step 4: Mux processed frames back with audio**
+
+```bash
+# Create video from processed frames + audio with fade-out
+ffmpeg -y \
+  -framerate 30 -i /tmp/processed/frame_%06d.png \
+  -i /tmp/audio.mp3 \
+  -c:v libx264 -pix_fmt yuv420p -crf 18 \
+  -c:a aac -b:a 192k \
+  -shortest \
+  -af 'afade=t=out:st=23:d=2' \
+  /tmp/final_output.mp4
+```
+
+**Key considerations:**
+- Use ProRes for the TD recording step to avoid generation loss during compositing
+- Extract at the target output framerate (not TD's render framerate)
+- For audio-synced content, analyze the audio file separately in Python (scipy FFT) to get per-frame features (rms, spectral bands, beats) and drive compositing parameters
+- Always verify TD FPS > 0 before recording (see pitfalls #37, #38)
+
 ## Data Visualization
 
 ### Pattern 9: Table Data -> Bar Chart via Instancing
@@ -524,7 +575,7 @@ Box SOP -> Geometry COMP (instancing from CHOP) -> Render TOP -> Null TOP (out)
 
 ```python
 # Script DAT code to transform data to instance positions
-execute_python_script: """
+td_execute_python: """
 source = op('/project1/data_table')
 instance = op('/project1/instance_transform')
 instance.clear()
@@ -545,21 +596,23 @@ for i in range(1, source.numRows):
 
 ### Pattern 9b: Audio-Reactive GLSL Fractal (Proven Recipe)
 
-Audio spectrum drives a GLSL fractal shader directly via a spectrum texture input. Bass thickens inner fractal lines, mids twist rotation, highs light outer edges. Tested and working on TD 099 Non-Commercial.
+Audio spectrum drives a GLSL fractal shader directly via a spectrum texture input. Bass thickens inner fractal lines, mids twist rotation, highs light outer edges. **Always run discovery (SKILL.md Step 0) before using any param names from these recipes — they may differ in your TD version.**
 
 ```
-Audio File In CHOP → Audio Spectrum CHOP → Math CHOP (boost gain=5)
-    → Resample CHOP (256 samples) → CHOP To TOP (spectrum texture, 256x1)
+Audio File In CHOP → Audio Spectrum CHOP (FFT=512, outlength=256)
+    → Math CHOP (gain=10)
+    → CHOP To TOP (spectrum texture, 256x2, dataformat=r)
                                           ↓ (input 1)
 Constant TOP (rgba32float, time) → GLSL TOP (audio-reactive shader) → Null TOP
         (input 0)                    ↑
                               Text DAT (shader code)
 ```
 
-**Build via td_exec (complete working script):**
+**Build via td_execute_python (complete working script):**
 
 ```python
-td_exec("""
+# td_execute_python script:
+td_execute_python(code="""
 import os
 root = op('/project1')
 
@@ -568,25 +621,24 @@ audio = root.create(audiofileinCHOP, 'audio_in')
 audio.par.file = '/path/to/music.mp3'
 audio.par.playmode = 0  # Locked to timeline
 
-# FFT analysis
+# FFT analysis (output length manually set to 256 bins)
 spectrum = root.create(audiospectrumCHOP, 'spectrum')
 audio.outputConnectors[0].connect(spectrum.inputConnectors[0])
+spectrum.par.fftsize = '512'
+spectrum.par.outputmenu = 'setmanually'
+spectrum.par.outlength = 256
 
-# Normalize + boost
+# THEN boost gain on the raw spectrum (NO Lag CHOP — see pitfall #34)
 math = root.create(mathCHOP, 'math_norm')
 spectrum.outputConnectors[0].connect(math.inputConnectors[0])
-math.par.gain = 5
+math.par.gain = 10
 
-# Resample to 256 bins for texture
-resample = root.create(resampleCHOP, 'resample_spec')
-math.outputConnectors[0].connect(resample.inputConnectors[0])
-resample.par.timeslice = True
-resample.par.rate = 256
-
-# Spectrum → texture (256x1 image)
+# Spectrum → texture (256x2 image — stereo, sample at y=0.25 for first channel)
 # NOTE: choptoTOP has NO input connectors — use par.chop reference!
 spec_tex = root.create(choptoTOP, 'spectrum_tex')
-spec_tex.par.chop = resample
+spec_tex.par.chop = math
+spec_tex.par.dataformat = 'r'
+spec_tex.par.layout = 'rowscropped'
 
 # Time driver (rgba32float to avoid 0-1 clamping!)
 time_drv = root.create(constantTOP, 'time_driver')
@@ -640,9 +692,9 @@ void main() {
     vec2 uv0 = uv;
     vec3 finalColor = vec3(0.0);
 
-    float bass = texture(sTD2DInputs[1], vec2(0.05, 0.0)).r;
-    float mids = texture(sTD2DInputs[1], vec2(0.25, 0.0)).r;
-    float highs = texture(sTD2DInputs[1], vec2(0.65, 0.0)).r;
+    float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;
+    float mids = texture(sTD2DInputs[1], vec2(0.25, 0.25)).r;
+    float highs = texture(sTD2DInputs[1], vec2(0.65, 0.25)).r;
 
     float ca = cos(t * (0.15 + mids * 0.3));
     float sa = sin(t * (0.15 + mids * 0.3));
@@ -651,7 +703,7 @@ void main() {
     for (float i = 0.0; i < 4.0; i++) {
         uv = fract(uv * (1.4 + bass * 0.3)) - 0.5;
         float d = length(uv) * exp(-length(uv0));
-        float freq = texture(sTD2DInputs[1], vec2(clamp(d*0.5, 0.0, 1.0), 0.0)).r;
+        float freq = texture(sTD2DInputs[1], vec2(clamp(d*0.5, 0.0, 1.0), 0.25)).r;
         vec3 col = palette(length(uv0) + i * 0.4 + t * 0.35);
         d = sin(d * (7.0 + bass * 4.0) + t * 1.5) / 8.0;
         d = abs(d);
@@ -769,7 +821,7 @@ Multi Touch In DAT -> Script CHOP (parse touches) -> [export to visual params]
 
 ```python
 # Normalize mouse position to 0-1 range
-execute_python_script: """
+td_execute_python: """
 op('/project1/noise1').par.offsetx.expr = "op('/project1/mouse_norm')['tx']"
 op('/project1/noise1').par.offsety.expr = "op('/project1/mouse_norm')['ty']"
 """
@@ -782,12 +834,12 @@ OSC In CHOP (port 7000) -> Select CHOP (pick channels) -> [export to visual para
 ```
 
 ```
-1. create_td_node(parentPath="/project1", nodeType="oscinChop", nodeName="osc_in")
-2. update_td_node_parameters(nodePath="/project1/osc_in", properties={"port": 7000})
+1. td_create_operator(parent="/project1", type="oscinChop", name="osc_in")
+2. td_set_operator_pars(path="/project1/osc_in", properties={"port": 7000})
 
 # OSC messages like /frequency 440 will appear as channel "frequency" with value 440
 # Export to any parameter:
-3. execute_python_script: "op('/project1/noise1').par.period.expr = \"op('/project1/osc_in')['frequency']\""
+3. td_execute_python: "op('/project1/noise1').par.period.expr = \"op('/project1/osc_in')['frequency']\""
 ```
 
 ### Pattern 14: MIDI Control (DJ/VJ)
@@ -815,12 +867,12 @@ Source C (camera) --------+
 
 ```python
 # MIDI CC1 controls which source is active (0-127 -> 0-2)
-execute_python_script: """
+td_execute_python: """
 op('/project1/switch1').par.index.expr = "int(op('/project1/midi_in')['cc1'] / 42)"
 """
 
 # MIDI CC2 controls crossfade between current and next
-execute_python_script: """
+td_execute_python: """
 op('/project1/cross1').par.cross.expr = "op('/project1/midi_in')['cc2'] / 127.0"
 """
 ```
@@ -851,7 +903,7 @@ Script CHOP (cue state: current_cue, progress, next_cue_trigger)
 ```
 
 ```python
-execute_python_script: """
+td_execute_python: """
 # Simple cue system
 cue_table = op('/project1/cue_list')
 cue_state = op('/project1/cue_state')
@@ -900,7 +952,7 @@ WebSocket DAT -> Script DAT (parse JSON messages) -> [update visuals]
 ```
 
 ```python
-execute_python_script: """
+td_execute_python: """
 ws = op('/project1/websocket1')
 ws.par.address = 'ws://localhost:8080'
 ws.par.active = True
diff --git a/skills/creative/touchdesigner/references/pitfalls.md b/skills/creative/touchdesigner/references/pitfalls.md
index 862bdc56316..5883ed72c99 100644
--- a/skills/creative/touchdesigner/references/pitfalls.md
+++ b/skills/creative/touchdesigner/references/pitfalls.md
@@ -2,93 +2,16 @@
 
 Hard-won knowledge from real TD sessions. Read this before building anything.
 
-## Setup & Connection
-
-### 1. The .tox from the git repo is BROKEN
-
-The `td/mcp_webserver_base.tox` in the `8beeeaaat/touchdesigner-mcp` git clone is **incomplete**. It's missing the `td_server` Python module (generated by `npm run gen:webserver` which requires Docker). Port 9981 opens, but every route returns 404.
-
-**Always download the release zip:**
-```bash
-curl -L -o td.zip \
-  "https://github.com/8beeeaaat/touchdesigner-mcp/releases/latest/download/touchdesigner-mcp-td.zip"
-unzip -o td.zip -d touchdesigner-mcp-td
-```
-
-### 2. The release .tox also breaks (frequently)
-
-Even the correct release .tox fails after drag-and-drop import because `import_modules.py` resolves `modules/` via `parent().par.externaltox.eval()` — a relative path that often goes wrong. Symptoms: port 9981 listens, all routes 404, TD Textport shows `[ERROR] Failed to setup modules`.
-
-**The custom API handler (`scripts/custom_api_handler.py`) is more reliable.** It has zero external module dependencies — just a WebServer DAT + Text DAT callback. The skill's setup workflow should try the .tox first, test with `curl`, and auto-deploy the handler if 404.
-
-### 3. You CANNOT automate the .tox import from outside TD
-
-TD has no CLI flag to import a .tox. macOS blocks keystroke injection via System Events for security. The only way to get code into TD from outside is:
-- Have a WebServer DAT already running (chicken-and-egg)
-- AppleScript to open Textport + clipboard paste (fragile, not always reliable)
-- User manually drags the .tox or pastes a script
-
-**Plan for one manual step** from the user (either .tox drag-drop or Textport paste). Make it as frictionless as possible: `open -R /path/to/file` to reveal in Finder.
-
-### 4. The npm package name is `touchdesigner-mcp-server` (not `@anthropic/...`)
-
-The Hermes config should use:
-```yaml
-command: npx
-args: ["-y", "touchdesigner-mcp-server@latest"]
-```
-
-### 5. MCP tools may register but not be callable
-
-Hermes may report "17 MCP tool(s) now available" but the tools aren't exposed as function calls. Use the REST API directly via `curl` in `execute_code` as a reliable fallback:
-```python
-def td_exec(script):
-    escaped = json.dumps({"script": script})
-    cmd = f"curl -s -X POST -H 'Content-Type: application/json' -d {shlex.quote(escaped)} 'http://127.0.0.1:9981/api/td/server/exec'"
-    return json.loads(terminal(cmd)['output'])
-```
-
-## TD WebServer DAT Quirks
-
-### 6. Response body goes in `response['data']`, NOT `response['body']`
-
-When writing custom WebServer DAT handlers, the response payload must be set on the `data` key:
-```python
-response['data'] = json.dumps({"result": 42})  # ✓ works
-response['body'] = json.dumps({"result": 42})   # ✗ ignored
-```
-
-### 7. Request POST body comes as BYTES in `request['data']`
-
-Not `request['body']`, and it's `bytes` not `str`:
-```python
-raw = request.get('data', b'')
-if isinstance(raw, bytes):
-    raw = raw.decode('utf-8')
-body = json.loads(raw) if raw else {}
-```
-
-### 8. Non-Commercial license caps resolution at 1280×1280
-
-Setting `resolutionw=1920` silently clamps to 1280. Always check effective resolution after creation:
-```python
-n.cook(force=True)
-actual = str(n.width) + 'x' + str(n.height)
-```
-
 ## Parameter Names
 
-### 9. NEVER hardcode parameter names — always discover
+### 1. NEVER hardcode parameter names — always discover
 
-Parameter names change between TD versions. What works in 099 may not work in 098 or 2023.x. Always run discovery first:
-```python
-n = root.create(glslTOP, '_test')
-pars = [(p.name, type(p.val).__name__) for p in n.pars()]
-n.destroy()
-```
+Parameter names change between TD versions. What works in one build may not work in another. ALWAYS use td_get_par_info to discover actual names from TD.
 
-Known differences from docs/online references:
-| What docs say | TD 099 actual | Notes |
+The agent's LLM training data contains WRONG parameter names. Do not trust them.
+
+Known historical differences (may vary further — always verify):
+| What docs/training say | Actual in some versions | Notes |
 |---------------|---------------|-------|
 | `dat` | `pixeldat` | GLSL TOP pixel shader DAT |
 | `colora` | `alpha` | Constant TOP alpha |
@@ -98,7 +21,15 @@ Known differences from docs/online references:
 | `bgcolora` | `bgalpha` | Text TOP bg alpha |
 | `value1name` | `vec0name` | GLSL TOP uniform name |
 
-### 10. Use `safe_par()` pattern for cross-version compatibility
+### 2. twozero td_execute_python response format
+
+When calling `td_execute_python` via twozero MCP, successful responses return `(ok)` followed by FPS/error summary (e.g. `[fps 60.0/60] [0 err/0 warn]`), NOT the raw Python `result` dict. If you're parsing responses programmatically, check for the `(ok)` prefix — don't pattern-match on Python variable names from the script. Use `td_get_operator_info` or separate inspection calls to read back values.
+
+### 3. When using td_set_operator_pars, param names must match exactly
+
+Use td_get_par_info to discover them. The MCP tool validates parameter names and returns clear errors explaining what went wrong, unlike raw Python which crashes the whole script with tdAttributeError and stops execution. Always discover before setting.
+
+### 3. Use `safe_par()` pattern for cross-version compatibility
 
 ```python
 def safe_par(node, name, value):
@@ -109,36 +40,65 @@ def safe_par(node, name, value):
     return False
 ```
 
-### 11. `td.tdAttributeError` crashes the whole script
+### 4. `td.tdAttributeError` crashes the whole script — use defensive access
 
-If you do `node.par.nonexistent = value`, TD raises `tdAttributeError` and **stops the entire script**. There's no way to catch it with try/except in some TD versions. Always check with `getattr` first or use `safe_par()`.
+If you do `node.par.nonexistent = value`, TD raises `tdAttributeError` and stops the entire script. Prevention is better than catching:
+- Use `op()` instead of `opex()` — `op()` returns None on failure, `opex()` raises
+- Use `hasattr(node.par, 'name')` before accessing any parameter
+- Use `getattr(node.par, 'name', None)` with a default
+- Use the `safe_par()` pattern from pitfall #3
+
+```python
+# WRONG — crashes if param doesn't exist:
+node.par.nonexistent = value
+
+# CORRECT — defensive access:
+if hasattr(node.par, 'nonexistent'):
+    node.par.nonexistent = value
+```
+
+### 5. `outputresolution` is a string menu, not an integer
+
+```
+menuNames: ['useinput','eighth','quarter','half','2x','4x','8x','fit','limit','custom','parpanel']
+```
+Always use the string form. Setting `outputresolution = 9` may silently fail.
+```python
+node.par.outputresolution = 'custom'  # correct
+node.par.resolutionw = 1280; node.par.resolutionh = 720
+```
+Discover valid values: `list(node.par.outputresolution.menuNames)`
 
 ## GLSL Shaders
 
-### 12. `uTDCurrentTime` does NOT exist in TD 099
+### 6. `uTDCurrentTime` does NOT exist in GLSL TOP
 
-The GLSL builtin for time was removed or never existed in some builds. Feed time via a 1×1 Constant TOP input. **CRITICAL: set format to `rgba32float`** — the default 8-bit format clamps values to 0-1, so `absTime.seconds % 1000.0` gets clamped and the GLSL shader sees a frozen time value of 1.0:
+There is NO built-in time uniform for GLSL TOPs. GLSL MAT has `uTDGeneral.seconds` but that's NOT available in GLSL TOP context.
+
+**PRIMARY — GLSL TOP Vectors/Values page:**
+```python
+gl.par.value0name = 'uTime'
+gl.par.value0.expr = "absTime.seconds"
+# In GLSL: uniform float uTime;
+```
+
+**FALLBACK — Constant TOP texture (for complex time data):**
+
+CRITICAL: set format to `rgba32float` — default 8-bit clamps to 0-1:
 ```python
 t = root.create(constantTOP, 'time_driver')
-t.par.format = 'rgba32float'  # ← REQUIRED! Without this, time is stuck at 1.0
+t.par.format = 'rgba32float'
 t.par.outputresolution = 'custom'
-t.par.resolutionw = 1
-t.par.resolutionh = 1
+t.par.resolutionw = 1; t.par.resolutionh = 1
 t.par.colorr.expr = "absTime.seconds % 1000.0"
-t.par.colorg.expr = "int(absTime.seconds / 1000.0)"
 t.outputConnectors[0].connect(glsl.inputConnectors[0])
 ```
-In GLSL:
-```glsl
-vec4 td = texture(sTD2DInputs[0], vec2(0.5));
-float t = td.r + td.g * 1000.0;
-```
 
-### 13. GLSL compile errors are silent in the API
+### 7. GLSL compile errors are silent in the API
 
 The GLSL TOP shows a yellow warning triangle in the UI but `node.errors()` may return empty string. Check `node.warnings()` too, and create an Info DAT pointed at the GLSL TOP to read the actual compiler output.
 
-### 14. TD GLSL uses `vUV.st` not `gl_FragCoord`
+### 8. TD GLSL uses `vUV.st` not `gl_FragCoord` — and REQUIRES `TDOutputSwizzle()` on macOS
 
 Standard GLSL patterns don't work. TD provides:
 - `vUV.st` — UV coordinates (0-1)
@@ -146,9 +106,26 @@ Standard GLSL patterns don't work. TD provides:
 - `sTD2DInputs[0]` — input textures
 - `layout(location = 0) out vec4 fragColor` — output
 
+CRITICAL on macOS: Always wrap output with `TDOutputSwizzle()`:
+```glsl
+fragColor = TDOutputSwizzle(color);
+```
+TD uses GLSL 4.60 (Vulkan backend). GLSL 3.30 and earlier removed.
+
+### 9. Large GLSL shaders — write to temp file
+
+GLSL code with special characters can corrupt JSON payloads. Write the shader to a temp file and load it in TD:
+```python
+# Agent side: write shader to /tmp/shader.glsl via write_file
+# TD side:
+sd = root.create(textDAT, 'shader_code')
+with open('/tmp/shader.glsl', 'r') as f:
+    sd.text = f.read()
+```
+
 ## Node Management
 
-### 15. Destroying nodes while iterating `root.children` causes `tdError`
+### 10. Destroying nodes while iterating `root.children` causes `tdError`
 
 The iterator is invalidated when a child is destroyed. Always snapshot first:
 ```python
@@ -158,9 +135,34 @@ for child in kids:
         child.destroy()
 ```
 
-### 16. Feedback TOP: use `top` parameter, NOT direct input wire
+### 10b. Split cleanup and creation into SEPARATE td_execute_python calls
 
-In TD 099, the feedbackTOP's `top` parameter references which TOP to delay. **Do not also wire that TOP directly into the feedback's input** — this creates a real cook dependency loop (warning flood, potential crash). The "Not enough sources" error on feedbackTOP is benign and resolves after a few frames of playback.
+Creating nodes with the same names you just destroyed in the SAME script causes "Invalid OP object" errors — even with `list()` snapshot. TD's internal references can go stale within one execution context.
+
+**WRONG (single call):**
+```python
+# td_execute_python:
+for c in list(root.children):
+    if c.valid and c.name.startswith('promo_'):
+        c.destroy()
+# ... then create promo_audio, promo_shader etc. in same script → CRASHES
+```
+
+**CORRECT (two separate calls):**
+```python
+# Call 1: td_execute_python — clean only
+for c in list(root.children):
+    if c.valid and c.name.startswith('promo_'):
+        c.destroy()
+
+# Call 2: td_execute_python — build (separate MCP call)
+audio = root.create(audiofileinCHOP, 'promo_audio')
+# ... rest of build
+```
+
+### 11. Feedback TOP: use `top` parameter, NOT direct input wire
+
+The feedbackTOP's `top` parameter references which TOP to delay. Do NOT also wire that TOP directly into the feedback's input — this creates a real cook dependency loop.
 
 Correct setup:
 ```python
@@ -169,23 +171,173 @@ fb.par.top = comp.path          # reference only — no wire to fb input
 fb.outputConnectors[0].connect(xf)  # fb output -> transform -> fade -> comp
 ```
 
-The resulting "Cook dependency loop detected" **warning** on the transform/fade chain is expected and correct — that's what feedback loops do. It's informational, not an error.
+The "Cook dependency loop detected" warning on the transform/fade chain is expected.
 
-### 16. GLSL TOP auto-creates companion nodes
+### 12. GLSL TOP auto-creates companion nodes
 
-Creating a `glslTOP` also creates `name_pixel` (Text DAT), `name_info` (Info DAT), and `name_compute` (Text DAT). These are visible in the network and count toward node totals. Don't be alarmed by "extra" nodes.
+Creating a `glslTOP` also creates `name_pixel` (Text DAT), `name_info` (Info DAT), and `name_compute` (Text DAT). These are visible in the network. Don't be alarmed by "extra" nodes.
 
-### 17. The default project root is `/project1`
+### 13. The default project root is `/project1`
 
 New TD files start with `/project1` as the main container. System nodes live at `/`, `/ui`, `/sys`, `/local`, `/perform`. Don't create user nodes outside `/project1`.
 
-### 18. `open -R` reveals the file but can't automate the drag
+### 14. Non-Commercial license caps resolution at 1280x1280
 
-Use `open -R /path/to/file.tox` to open Finder highlighting the file. The user must then drag it into TD manually. No AppleScript workaround exists on modern macOS due to accessibility restrictions.
+Setting `resolutionw=1920` silently clamps to 1280. Always check effective resolution after creation:
+```python
+n.cook(force=True)
+actual = str(n.width) + 'x' + str(n.height)
+```
+
+## Recording & Codecs
+
+### 15. MovieFileOut TOP: H.264/H.265/AV1 requires Commercial license
+
+In Non-Commercial TD, these codecs produce an error. Recommended alternatives:
+- `prores` — Apple ProRes, **best on macOS**, HW accelerated, NOT license-restricted. ~55MB/s at 1280x720 but lossless quality. **Use this as default on macOS.**
+- `cineform` — GoPro Cineform, supports alpha
+- `hap` — GPU-accelerated playback, large files
+- `notchlc` — GPU-accelerated, good quality
+- `mjpa` — Motion JPEG, legacy fallback (lossy, use only if ProRes unavailable)
+
+For image sequences: `rec.par.type = 'imagesequence'`, `rec.par.imagefiletype = 'png'`
+
+### 16. MovieFileOut `.record()` method may not exist
+
+Use the toggle parameter instead:
+```python
+rec.par.record = True   # start recording
+rec.par.record = False  # stop recording
+```
+
+When setting file path and starting recording in the same script, use delayFrames:
+```python
+rec.par.file = '/tmp/new_output.mov'
+run("op('/project1/recorder').par.record = True", delayFrames=2)
+```
+
+### 17. TOP.save() captures same frame when called rapidly
+
+Use MovieFileOut for real-time recording. Set `project.realTime = False` for frame-accurate output.
+
+### 18. AudioFileIn CHOP: cue and recording sequence matters
+
+The recording sequence must be done in exact order, or the recording will be empty, audio will start mid-file, or the file won't be written.
+
+**Proven recording sequence:**
+
+```python
+# Step 1: Stop any existing recording
+rec.par.record = False
+
+# Step 2: Reset audio to beginning
+audio.par.play = False
+audio.par.cue = True
+audio.par.cuepoint = 0      # may need cuepointunit=0 too
+# Verify: audio.par.cue.eval() should be True
+
+# Step 3: Set output file path
+rec.par.file = '/tmp/output.mov'
+
+# Step 4: Release cue + start playing + start recording (with frame delay)
+audio.par.cue = False
+audio.par.play = True
+audio.par.playmode = 2      # Sequential — plays once through
+run("op('/project1/recorder').par.record = True", delayFrames=3)
+```
+
+**Why each step matters:**
+- `rec.par.record = False` first — if a previous recording is active, setting `par.file` may fail silently
+- `audio.par.cue = True` + `cuepoint = 0` — guarantees audio starts from the beginning, otherwise the spectrum may be silent for the first few seconds
+- `delayFrames=3` on the record start — setting `par.file` and `par.record = True` in the same script can race; the file path needs a frame to register before recording starts
+- `playmode = 2` (Sequential) — plays the file once. Use `playmode = 0` (Locked to Timeline) if you want TD's timeline to control position
+
+## TD Python API Patterns
+
+### 19. COMP extension setup: ext0object format is CRITICAL
+
+`ext0object` expects a CONSTANT string (NOT expression mode):
+```python
+comp.par.ext0object = "op('./myExtensionDat').module.MyClassName(me)"
+```
+NEVER set as just the DAT name. NEVER use ParMode.EXPRESSION. ALWAYS ensure the DAT has `par.language='python'`.
+
+### 20. td.Panel is NOT subscriptable — use attribute access
+
+```python
+comp.panel.select      # correct (attribute access, returns float)
+comp.panel['select']   # WRONG — 'td.Panel' object is not subscriptable
+```
+
+### 21. ALWAYS use relative paths in script callbacks
+
+In scriptTOP/CHOP/SOP/DAT callbacks, use paths relative to `scriptOp` or `me`:
+```python
+root = scriptOp.parent().parent()
+dat = root.op('pixel_data')
+```
+NEVER hardcode absolute paths like `op('/project1/myComp/child')` — they break when containers are renamed or copied.
+
+### 22. keyboardinCHOP channel names have 'k' prefix
+
+Channel names are `kup`, `kdown`, `kleft`, `kright`, `ka`, `kb`, etc. — NOT `up`, `down`, `a`, `b`. Always verify with:
+```python
+channels = [c.name for c in op('/project1/keyboard1').chans()]
+```
+
+### 23. expressCHOP cook-only properties — false positive errors
+
+`me.inputVal`, `me.chanIndex`, `me.sampleIndex` work ONLY in cook-context. Calling `par.expr0expr.eval()` from outside always raises an error — this is NOT a real operator error. Ignore these in error scans.
+
+### 24. td.Vertex attributes — use index access not named attributes
+
+In TD 2025.32, `td.Vertex` objects do NOT have `.x`, `.y`, `.z` attributes:
+```python
+# WRONG — crashes:
+vertex.x, vertex.y, vertex.z
+
+# CORRECT — index-based:
+vertex.point.P[0], vertex.point.P[1], vertex.point.P[2]
+# Or for SOP point positions:
+pt = sop.points()[i]
+pos = pt.P    # use P[0], P[1], P[2]
+```
+
+## Audio
+
+### 25. Audio Spectrum CHOP output is weak — boost it
+
+Raw output is very small (0.001-0.05). Use built-in boost: `spectrum.par.highfrequencyboost = 3.0`
+
+If still weak, add Math CHOP in Range mode: `fromrangehi=0.05, torangehi=1.0`
+
+### 26. AudioSpectrum CHOP: timeslice and sample count are the #1 gotcha
+
+AudioSpectrum at 44100Hz with `timeslice=False` outputs the ENTIRE audio file as samples (~24000+). CHOP-to-TOP then exceeds texture resolution max and warns/fails.
+
+**Fix:** Keep `timeslice = True` (default) for real-time per-frame FFT. Set `fftsize` to control bin count (it's a STRING enum: `'256'` not `256`).
+
+If the CHOP-to-TOP still gets too many samples, set `layout = 'rowscropped'` on the choptoTOP.
+
+```python
+spectrum.par.fftsize = '256'      # STRING, not int — enum values
+spectrum.par.timeslice = True     # MUST be True for real-time audio reactivity
+spectex.par.layout = 'rowscropped'  # handles oversized CHOP inputs
+```
+
+**resampleCHOP has NO `numsamples` param.** It uses `rate`, `start`, `end`, `method`. Don't guess — always `td_get_par_info('resampleCHOP')` first.
+
+### 27. CHOP To TOP has NO input connectors — use par.chop reference
+
+```python
+spec_tex = root.create(choptoTOP, 'spectrum_tex')
+spec_tex.par.chop = resample  # correct: parameter reference
+# NOT: resample.outputConnectors[0].connect(spec_tex.inputConnectors[0])  # WRONG
+```
 
 ## Workflow
 
-### 19. Always verify after building — errors are silent
+### 28. Always verify after building — errors are silent
 
 Node errors and broken connections produce no output. Always check:
 ```python
@@ -196,141 +348,161 @@ for c in list(root.children):
     if w: print(c.name, 'WARN:', w)
 ```
 
-### 20. Build in one big `execute_python_script` call, not many small ones
+### 29. Window COMP param for display target is `winop`
 
-Each API round-trip adds latency. Bundle node creation + parameter setting + wiring into a single script that creates everything at once, then verify in one final call.
-
-### 21. Window COMP param for display target is `winop` (not `top` or `window`)
-
-To display output in a separate window:
 ```python
 win = root.create(windowCOMP, 'display')
-win.par.winop = '/project1/logo_out'  # ← this is the correct param
+win.par.winop = '/project1/logo_out'
 win.par.winw = 1280; win.par.winh = 720
-win.par.winopen.pulse()  # open the window
+win.par.winopen.pulse()
 ```
 
-### 22. Save the project to make API persistent across TD restarts
+### 30. `sample()` returns frozen pixels in rapid calls
 
-After deploying the custom API handler, save the project:
+`out.sample(x, y)` returns pixels from a single cook snapshot. Compare samples with 2+ second delays, or use screencapture on the display window.
+
+### 31. Audio-reactive GLSL: dual-layer sync pipeline
+
+For audio-synced visuals, use BOTH layers for maximum effect:
+
+**Layer 1 (TD-side, real-time):** AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
+
+**Layer 2 (Python-side, post-hoc):** scipy FFT on the SAME audio file → per-frame features (rms, bass, mid, hi, beat detection) → drive ASCII brightness, chromatic aberration, beat flashes during the render pass.
+
+Both layers locked to the same audio file = visuals genuinely sync to the beat at two independent stages.
+
+**Key gotcha:** AudioFileIn must be cued (`par.cue=True` → `par.cuepulse.pulse()`) then uncued (`par.cue=False`, `par.play=True`) before recording starts. Otherwise the spectrum is silent for the first few seconds.
+
+### 32. twozero MCP: benchmark and prefer native tools
+
+Benchmarked April 2026: twozero MCP with 36 native tools. The old curl/REST method (port 9981) had zero native tools.
+
+**Always prefer native MCP tools over td_execute_python:**
+- `td_create_operator` over `root.create()` scripts (handles viewport positioning)
+- `td_set_operator_pars` over `node.par.X = Y` scripts (validates param names)
+- `td_get_par_info` over temp-node discovery dance (instant, no cleanup)
+- `td_get_errors` over manual `c.errors()` loops
+- `td_get_focus` for context awareness (no equivalent in old method)
+
+Only fall back to `td_execute_python` for multi-step logic (wiring chains, conditional builds, loops).
+
+### 33. twozero td_execute_python response wrapping
+
+twozero wraps `td_execute_python` responses with status info: `(ok)\n\n[fps 60.0/60] [0 err/0 warn]`. Your Python `result` variable value may not appear verbatim in the response text. If you need to check results programmatically, use `print()` statements in the script — they appear in the response. Don't rely on string-matching the `result` dict.
+
+### 34. Audio-reactive chain: DO NOT use Lag CHOP or Filter CHOP for spectrum smoothing
+
+The Derivative docs and tutorials suggest using Lag CHOP (lag1=0.2, lag2=0.5) to smooth raw FFT output before passing to a shader. **This does NOT work with AudioSpectrum → CHOP to TOP → GLSL.**
+
+What happens: Lag CHOP operates in timeslice mode. A 256-sample spectrum input gets expanded to 1600-2400 samples. The Lag averaging drives all values to near-zero (~1e-06). The CHOP to TOP produces a 2400x2 texture instead of 256x2. The shader receives effectively zero audio data.
+
+**The correct chain is: Spectrum(outlength=256) → Math(gain=10) → CHOPtoTOP → GLSL.** No CHOP smoothing at all. If you need smoothing, do it in the GLSL shader via temporal lerp with a feedback texture.
+
+Verified values with audio playing:
+- Without Lag CHOP: bass bins = 5.0-5.4, mid bins = 1.0-1.7 (strong, usable)
+- With Lag CHOP: ALL bins = 0.000001-0.00004 (dead, zero audio reactivity)
+
+### 35. AudioSpectrum Output Length: set manually to avoid CHOP to TOP overflow
+
+AudioSpectrum in Visualization mode with FFT 8192 outputs 22,050 samples by default (1 per Hz, 0–22050). CHOP to TOP cannot handle this — you get "Number of samples exceeded texture resolution max".
+
+Fix: `spectrum.par.outputmenu = 'setmanually'` and `spectrum.par.outlength = 256`. This gives 256 frequency bins — plenty for visual FFT.
+
+DO NOT set `timeslice = False` as a workaround — that processes the entire audio file at once and produces even more samples.
+
+### 36. GLSL spectrum texture from CHOP to TOP is 256x2 not 256x1
+
+AudioSpectrum outputs 2 channels (stereo: chan1, chan2). CHOP to TOP with `dataformat='r'` creates a 256x2 texture — one row per channel. Sample the first channel at `y=0.25` (center of first row), NOT `y=0.5` (boundary between rows):
+
+```glsl
+float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;  // correct
+float bass = texture(sTD2DInputs[1], vec2(0.05, 0.5)).r;   // WRONG — samples between rows
+```
+
+### 37. FPS=0 doesn't mean ops aren't cooking — check play state
+
+TD can show `fps:0` in `td_get_perf` while ops still cook and `TOP.save()` still produces valid screenshots. The two most common causes:
+
+**a) Project is paused (playbar stopped).** TD's playbar can be toggled with spacebar. The `root` at `/` has no `.playbar` attribute (it's on the perform COMP). The easiest fix is sending a spacebar keypress via `td_input_execute`, though this tool can sometimes error. As a workaround, `TOP.save()` always works regardless of play state — use it to verify rendering is actually happening before spending time debugging FPS.
+
+**b) Audio device CHOP blocking the main thread.** An `audiooutCHOP` with an active audio device can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. Fix: keep the CHOP active but set `volume=0` to prevent the audio driver from blocking. Disabling it entirely (`active=False`) may also work but can prevent downstream audio processing CHOPs from cooking.
+
+Diagnostic sequence when FPS=0:
+1. `td_get_perf` — check if any op has extreme CPU/s
+2. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
+3. Check for blocking CHOPs (audioout, audiodevin, etc.)
+4. Toggle play state (spacebar, or check if absTime.seconds is advancing)
+
+### 38. Recording while FPS=0 produces empty or near-empty files
+
+This is the #1 cause of "I recorded for 30 seconds but got a 2-frame video." If TD's cook loop is stalled (FPS=0 or very low), MovieFileOut has nothing to record. Unlike `TOP.save()` which captures the last cooked frame regardless, MovieFileOut only writes frames that actually cook.
+
+**Always verify FPS before starting a recording:**
 ```python
-td_exec("project.save(os.path.expanduser('~/Documents/HermesAgent.toe'))")
+# Check via td_get_perf first
+# If FPS < 30, do NOT start recording — fix the performance issue first
+# If FPS=0, the playbar is likely paused — see pitfall #37
 ```
-TD auto-opens the last saved project on launch. The API handler is now baked into the .toe file — next time TD opens, port 9981 is live with zero manual steps. To explicitly launch with this project: `open /Applications/TouchDesigner.app ~/Documents/HermesAgent.toe`
 
-### 23. `sample()` returns frozen pixels when called from WebServer DAT callback
+Common causes of recording empty video:
+- Playbar paused (FPS=0) — see pitfall #37
+- Audio device CHOP blocking the main thread — see pitfall #37b
+- Recording started before audio was cued — audio is silent, GLSL outputs black, MovieFileOut records black frames that look empty
+- `par.file` set in the same script as `par.record = True` — see pitfall #18
 
-`out.sample(x, y)` called from inside the API handler's `exec()` returns pixels from a single cook snapshot. It does NOT update between multiple API calls in quick succession. To verify animation is working, either:
-- Compare samples with a 2+ second delay between separate `td_exec()` calls
-- Use `screencapture` on the display window
-- Check `absTime.seconds` is advancing and shader uses time correctly
+### 39. GLSL shader produces black output — test before committing to a long render
 
-### 22. `outputresolution` is a string menu, not an integer
+New GLSL shaders can fail silently (see pitfall #7). Before recording a long take, always:
 
-### 25. MovieFileOut TOP: H.264/H.265 requires Commercial license
+1. **Write a minimal test shader first** that just outputs a solid color or pass-through:
+```glsl
+void main() {
+    vec2 uv = vUV.st;
+    fragColor = TDOutputSwizzle(vec4(uv, 0.0, 1.0));
+}
+```
 
-In Non-Commercial TD 099, encoding with H.264 or H.265 produces an error: "GPU Accelerated H.264/H.265 Encoding requires a Commercial license". Use Motion JPEG instead:
+2. **Verify the test renders correctly** via `td_get_screenshot` on the GLSL TOP's output.
+
+3. **Swap in the real shader** and screenshot again immediately. If black, the shader has a compile error or logic issue.
+
+4. **Only then start recording.** A 90-second ProRes recording is ~5GB. Recording black frames wastes disk and time.
+
+Common causes of black GLSL output:
+- Missing `TDOutputSwizzle()` on macOS (pitfall #8)
+- Time uniform not connected — shader uses default 0.0, fractal stays at origin
+- Spectrum texture not connected — audio values all 0.0, driving everything to black
+- Integer division where float division was expected (`1/2 = 0` not `0.5`)
+- `absTime.seconds % 1000.0` rolled over past 1000 and the modulo produces unexpected values
+
+### 40. td_write_dat uses `text` parameter, NOT `content`
+
+The MCP tool `td_write_dat` expects a `text` parameter for full replacement. Passing `content` returns an error: `"Provide either 'text' for full replace, or 'old_text'+'new_text' for patching"`.
+
+If `td_write_dat` fails, fall back to `td_execute_python`:
 ```python
-rec = root.create(moviefileoutTOP, 'recorder')
-rec.par.type = 'movie'
-rec.par.file = '/tmp/output.mov'
-rec.par.videocodec = 'mjpa'  # Motion JPEG — works on Non-Commercial
+op("/project1/shader_code").text = shader_string
 ```
 
-For image sequences, use `type = 'imagesequence'` and the file param **must** use `me.fileSuffix`:
+### 41. td_execute_python does NOT return stdout or print() output
+
+Despite what earlier versions of pitfall #33 stated, `print()` and `debug()` output from `td_execute_python` scripts does NOT appear in the MCP response. The response is always just `(ok)` + FPS/error summary. To read values back, use dedicated inspection tools (`td_get_operator_info`, `td_read_dat`, `td_read_chop`) instead of trying to print from within a script.
+
+### 42. td_get_operator_info JSON is appended with `[fps X.X/X]` — breaks json.loads()
+
+The response text from `td_get_operator_info` has `[fps 60.0/60]` appended after the JSON object. This causes `json.loads()` to fail with "Extra data" errors. Strip it before parsing:
 ```python
-rec.par.type = 'imagesequence'
-rec.par.imagefiletype = 'png'
-rec.par.file.expr = "'/tmp/frames/out' + me.fileSuffix"
+clean = response_text.rsplit('[fps', 1)[0]
+data = json.loads(clean)
 ```
 
-### 26. MovieFileOut `.record()` method may not exist
+### 43. td_get_screenshot is asynchronous — returns `{"status": "pending"}`
 
-In TD 099, there is no `.record()` method on moviefileoutTOP. Use the toggle parameter instead:
-```python
-rec.par.record = True   # start recording
-rec.par.record = False  # stop recording
-```
-
-When setting the file path and starting recording in the same script, use `run()` with `delayFrames` to avoid a race condition where the old filename is used:
-```python
-rec.par.file = '/tmp/new_output.mov'
-run("op('/project1/recorder').par.record = True", delayFrames=2)
-```
-
-### 27. TOP.save() captures same frame when called rapidly
-
-`op('null1').save(path)` captures the current GPU texture at call time. When called multiple times in a single script (or rapid API calls), TD doesn't cook new frames between saves — all exported PNGs will be identical. To get unique frames, use the MovieFileOut TOP which records in real-time from TD's cook cycle.
-
-### 28. AudioFileIn CHOP: cue before recording for sync
-
-When recording audio-reactive visuals, always cue the audio to the start before beginning the recording. Otherwise the visuals are synced to wherever the audio happens to be in its playback:
-```python
-op('/project1/audio_in').par.cue.pulse()  # reset to start
-run("op('/project1/recorder').par.record = True", delayFrames=3)
-```
-The audio plays via `playmode=0` (Locked to Timeline), so it stays in sync with TD's frame clock. Use `audiodeviceoutCHOP` to hear the audio during recording.
-
-### 29. Audio Spectrum CHOP output is weak — boost with Math CHOP
-
-The raw AudioSpectrum CHOP output has very small values (often 0.001-0.05 range). When fed directly to CHOP To TOP → GLSL, the shader barely reacts. Always insert a Math CHOP with `gain=5` (or higher) between the spectrum and the CHOP To TOP to get usable 0-1 range values in the shader.
-
-### 30. CHOP To TOP texture size — Resample to 256 first
-
-`choptoTOP` creates a texture where width = number of samples. An AudioSpectrum CHOP at 44100Hz has ~24000 samples — creating a 24000×1 texture is wasteful. Use a Resample CHOP set to 256 or 512 samples before the CHOP To TOP for an efficient spectrum texture.
-
-### 31. CHOP To TOP has NO input connectors — use par.chop reference
-
-`choptoTOP` does NOT have input connectors. `resample.outputConnectors[0].connect(chop_to_top.inputConnectors[0])` silently does nothing. Use the `chop` parameter instead:
-```python
-spec_tex = root.create(choptoTOP, 'spectrum_tex')
-spec_tex.par.chop = resample  # ← correct: parameter reference
-# NOT: resample.outputConnectors[0].connect(spec_tex.inputConnectors[0])  # ← WRONG: no connectors
-```
-
-### 22. `outputresolution` is a string menu, not an integer
-
-The `outputresolution` param is a menu with string values:
-```
-menuNames: ['useinput','eighth','quarter','half','2x','4x','8x','fit','limit','custom','parpanel']
-```
-Always use the string form. Setting `outputresolution = 9` may silently fail.
-```python
-node.par.outputresolution = 'custom'  # ✓ correct
-node.par.resolutionw = 1280; node.par.resolutionh = 720
-```
-Discover valid values: `list(node.par.outputresolution.menuNames)`
-
-### 23. Large GLSL shaders break curl JSON escaping
-
-GLSL code full of single/double quotes, backslashes, and special chars will corrupt the JSON payload when sent via `curl -d`. **Write the shader to a temp file and load it in TD:**
-```python
-# Agent side: write shader to /tmp/shader.glsl via write_file
-# TD side (via td_exec):
-sd = root.create(textDAT, 'shader_code')
-with open('/tmp/shader.glsl', 'r') as f:
-    sd.text = f.read()
-```
-This avoids all escaping issues. The TD Python environment has full filesystem access.
-
-### 24. TD crashes lose everything — the WebServer DAT must be re-deployed
-
-If TD crashes (common with heavy GLSL or rapid-fire API calls), all nodes including the WebServer DAT are lost. On relaunch, port 9981 is dead. Recovery:
-1. Detect: `curl` returns exit code 7 (connection refused) or `lsof -i :9981` shows nothing
-2. Check: `pgrep TouchDesigner` to confirm TD is running
-3. Re-deploy: user must paste `exec(open('...custom_api_handler.py').read())` into Textport again
-4. Verify: poll port 9981 until API responds
-
-The `td_exec()` helper should handle this gracefully:
-```python
-def td_exec(script):
-    escaped = json.dumps({"script": script})
-    cmd = f"curl -s --max-time 15 -X POST -H 'Content-Type: application/json' -d {shlex.quote(escaped)} 'http://127.0.0.1:9981/api/td/server/exec'"
-    r = terminal(cmd, timeout=20)
-    if r.get('exit_code') == 7:
-        return {'error': 'TD not responding — WebServer DAT may need re-deploy'}
-    try:
-        return json.loads(r['output'])
-    except:
-        return {'error': 'Bad response', 'raw': r['output'][:200]}
+Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file appears later. Wait a few seconds before checking for the file. There is no callback or completion notification — poll the filesystem.
+
+### 44. Recording duration is manual — no auto-stop at audio end
+
+MovieFileOut records until `par.record = False` is set. If audio ends before you stop recording, the file keeps growing with repeated frames. Always stop recording promptly after the audio duration. For precision: set a timer on the agent side matching the audio length, then send `par.record = False`. Trim excess with ffmpeg as a safety net:
+```bash
+ffmpeg -i raw.mov -t 25 -c copy trimmed.mov
 ```
diff --git a/skills/creative/touchdesigner/references/python-api.md b/skills/creative/touchdesigner/references/python-api.md
index 2b8d8847f63..f2955110b0e 100644
--- a/skills/creative/touchdesigner/references/python-api.md
+++ b/skills/creative/touchdesigner/references/python-api.md
@@ -7,7 +7,7 @@ TouchDesigner's Python environment auto-imports the `td` module. All TD-specific
 When using the MCP `execute_python_script` tool, these globals are pre-loaded:
 - `op` — shortcut for `td.op()`, finds operators by path
 - `ops` — shortcut for `td.ops()`, finds multiple operators by pattern
-- `me` — the operator running the script (not meaningful via MCP — will be the WebServer DAT)
+- `me` — the operator running the script (via MCP this is the twozero internal executor)
 - `parent` — shortcut for `me.parent()`
 - `project` — the root project component
 - `td` — the full td module
@@ -432,7 +432,7 @@ for path, params in settings.items():
 
 ## Python Version and Packages
 
-TouchDesigner bundles Python 3.11+ (as of TD 2024) with these pre-installed:
+TouchDesigner bundles Python 3.11+ with these pre-installed:
 - **numpy** — array operations, fast math
 - **scipy** — signal processing, FFT
 - **OpenCV** (cv2) — computer vision
@@ -440,4 +440,24 @@ TouchDesigner bundles Python 3.11+ (as of TD 2024) with these pre-installed:
 - **requests** — HTTP client
 - **json**, **re**, **os**, **sys** — standard library
 
+**IMPORTANT:** Parameter names in examples below are illustrative. Always run discovery (SKILL.md Step 0) to get actual names for your TD version. Do NOT copy param names from these examples verbatim.
+
 Custom packages can be installed to TD's Python site-packages directory. See TD documentation for the exact path per platform.
+
+## SOP Vertex/Point Access (TD 2025.32)
+
+In TD 2025.32, `td.Vertex` does NOT have `.x`, `.y`, `.z` attributes. Use index access:
+
+```python
+# WRONG — crashes in TD 2025.32:
+vertex.x, vertex.y, vertex.z
+
+# CORRECT — index/attribute access:
+pt = sop.points()[i]
+pos = pt.P          # Position object
+x, y, z = pos[0], pos[1], pos[2]
+
+# Always introspect first:
+dir(sop.points()[0])   # see what attributes actually exist
+dir(sop.points()[0].P) # see Position object interface
+```
diff --git a/skills/creative/touchdesigner/references/troubleshooting.md b/skills/creative/touchdesigner/references/troubleshooting.md
index 30ad580f4ca..c9817ebe0f2 100644
--- a/skills/creative/touchdesigner/references/troubleshooting.md
+++ b/skills/creative/touchdesigner/references/troubleshooting.md
@@ -1,274 +1,244 @@
-# TouchDesigner Troubleshooting
+# TouchDesigner Troubleshooting (twozero MCP)
 
 > See `references/pitfalls.md` for the comprehensive lessons-learned list.
 
-## Quick Connection Diagnostic
+## 1. Connection Issues
 
-```bash
-lsof -i :9981 -P -n | grep LISTEN    # Step 1: Is TD listening?
-curl -s http://127.0.0.1:9981/api/td/server/td   # Step 2: API working?
+### Port 40404 not responding
+
+Check these in order:
+
+1. Is TouchDesigner running?
+   ```bash
+   pgrep TouchDesigner
+   ```
+
+1b. Quick hub health check (no JSON-RPC needed):
+   A plain GET to the MCP URL returns instance info:
+   ```
+   curl -s http://localhost:40404/mcp
+   ```
+   Returns: `{"hub": true, "pid": ..., "instances": {"127.0.0.1_PID": {"project": "...", "tdVersion": "...", ...}}}`
+   If this returns JSON but `instances` is empty, TD is running but twozero hasn't registered yet.
+
+2. Is twozero installed in TD?
+   Open TD Palette Browser > twozero should be listed. If not, install it.
+
+3. Is MCP enabled in twozero settings?
+   In TD, open twozero preferences and confirm MCP server is toggled ON.
+
+4. Test the port directly:
+   ```bash
+   nc -z 127.0.0.1 40404
+   ```
+
+5. Test the MCP endpoint:
+   ```bash
+   curl -s http://localhost:40404/mcp
+   ```
+   Should return JSON with hub info. If it does, the server is running.
+
+### Hub responds but no TD instances
+
+The twozero MCP hub is running but TD hasn't registered. Causes:
+- TD project not loaded yet (still on splash screen)
+- twozero COMP not initialized in the current project
+- twozero version mismatch
+
+Fix: Open/reload a TD project that contains the twozero COMP. Use td_list_instances
+to check which TD instances are registered.
+
+### Multi-instance setup
+
+twozero auto-assigns ports for multiple TD instances:
+- First instance: 40404
+- Second instance: 40405
+- Third instance: 40406
+- etc.
+
+Use `td_list_instances` to discover all running instances and their ports.
+
+## 2. MCP Tool Errors
+
+### td_execute_python returns error
+
+The error message from td_execute_python often contains the Python traceback.
+If it's unclear, use `td_read_textport` to see the full TD console output —
+Python exceptions are always printed there.
+
+Common causes:
+- Syntax error in the script
+- Referencing a node that doesn't exist (op() returns None, then you call .par on None)
+- Using wrong parameter names (see pitfalls.md)
+
+### td_set_operator_pars fails
+
+Parameter name mismatch is the #1 cause. The tool validates param names and
+returns clear errors, but you must use exact names.
+
+Fix: ALWAYS call `td_get_par_info` first to discover the real parameter names:
+```
+td_get_par_info(op_type='glslTOP')
+td_get_par_info(op_type='noiseTOP')
 ```
 
-| Symptom | Cause | Fix |
-|---------|-------|-----|
-| Connection refused | No WebServer DAT | Deploy `scripts/custom_api_handler.py` in TD Textport |
-| HTTP 404 on all routes | .tox module import failed | Deploy custom handler (pitfalls #1-2) |
-| HTTP 200, empty body | Response in wrong key | Handler uses `response['data']` not `response['body']` (pitfalls #6) |
-| HTTP 200, JSON body | Working | Proceed to discovery |
-| MCP tools not callable | Normal — use curl instead | `td_exec()` pattern in SKILL.md works without MCP |
+### td_create_operator type name errors
 
-## Node Creation Issues
+Operator type names use camelCase with family suffix:
+- CORRECT: noiseTOP, glslTOP, levelTOP, compositeTOP, audiospectrumCHOP
+- WRONG:   NoiseTOP, noise_top, NOISE TOP, Noise
+
+### td_get_operator_info for deep inspection
+
+If unsure about any aspect of an operator (params, inputs, outputs, state):
+```
+td_get_operator_info(path='/project1/noise1', detail='full')
+```
+
+## 3. Parameter Discovery
+
+CRITICAL: ALWAYS use td_get_par_info to discover parameter names.
+
+The agent's LLM training data contains WRONG parameter names for TouchDesigner.
+Do not trust them. Known wrong names include dat vs pixeldat, colora vs alpha,
+sizex vs size, and many more. See pitfalls.md for the full list.
+
+Workflow:
+1. td_get_par_info(op_type='glslTOP') — get all params for a type
+2. td_get_operator_info(path='/project1/mynode', detail='full') — get params for a specific instance
+3. Use ONLY the names returned by these tools
+
+## 4. Performance
+
+### Diagnosing slow performance
+
+Use `td_get_perf` to see which operators are slow. Look at cook times —
+anything over 1ms per frame is worth investigating.
+
+Common causes:
+- Resolution too high (especially on Non-Commercial)
+- Complex GLSL shaders
+- Too many TOP-to-CHOP or CHOP-to-TOP transfers (GPU-CPU memory copies)
+- Feedback loops without decay (values accumulate, memory grows)
+
+### Non-Commercial license restrictions
+
+- Resolution cap: 1280x1280. Setting resolutionw=1920 silently clamps to 1280.
+- H.264/H.265/AV1 encoding requires Commercial license. Use ProRes or Hap instead.
+- No commercial use of output.
+
+Always check effective resolution after creation:
+```python
+n.cook(force=True)
+actual = str(n.width) + 'x' + str(n.height)
+```
+
+## 5. Hermes Configuration
+
+### Config location
+
+~/.hermes/config.yaml
+
+### MCP entry format
+
+The twozero TD entry should look like:
+```yaml
+mcpServers:
+  twozero_td:
+    url: http://localhost:40404/mcp
+```
+
+### After config changes
+
+Restart the Hermes session for changes to take effect. The MCP connection is
+established at session startup.
+
+### Verifying MCP tools are available
+
+After restarting, the session log should show twozero MCP tools registered.
+If tools show as registered but aren't callable, check:
+- The twozero MCP hub is still running (curl test above)
+- TD is still running with a project loaded
+- No firewall blocking localhost:40404
+
+## 6. Node Creation Issues
 
 ### "Node type not found" error
 
-**Cause:** Wrong `nodeType` string in `create_td_node`.
+Wrong type string. Use camelCase with family suffix:
+- Wrong: NoiseTop, noise_top, NOISE TOP
+- Right: noiseTOP
 
-**Fix:** Use camelCase with family suffix. Common mistakes:
-- Wrong: `NoiseTop`, `noise_top`, `NOISE TOP`, `Noise`
-- Right: `noiseTop`
-- Wrong: `AudioSpectrum`, `audio_spectrum_chop`
-- Right: `audiospectrumChop`
+### Node created but not visible
 
-**Discovery method:** Use `get_td_classes` to see available types, or `execute_python_script` with `dir(td)` filtered for operator classes.
-
-### Node created but not visible in TD
-
-**Cause:** Node was created in a different container than expected, or TD viewport is looking at a different network.
-
-**Fix:** Check `parentPath` — use absolute paths like `/project1`. Verify with `get_td_nodes(parentPath="/project1")`.
+Check parentPath — use absolute paths like /project1. The default project
+root is /project1. System nodes live at /, /ui, /sys, /local, /perform.
+Don't create user nodes outside /project1.
 
 ### Cannot create node inside a non-COMP
 
-**Cause:** Only COMP operators (Container, Base, Geometry, etc.) can contain child operators. You cannot create nodes inside a TOP, CHOP, SOP, DAT, or MAT.
+Only COMP operators (Container, Base, Geometry, etc.) can contain children.
+You cannot create nodes inside a TOP, CHOP, SOP, DAT, or MAT.
 
-**Fix:** Create a Container COMP or Base COMP first, then create nodes inside it.
+## 7. Wiring Issues
 
-## Parameter Issues
+### Cross-family wiring
 
-### Parameter not updating
+TOPs connect to TOPs, CHOPs to CHOPs, SOPs to SOPs, DATs to DATs.
+Use converter operators to bridge: choptoTOP, topToCHOP, soptoDAT, etc.
 
-**Causes:**
-1. **Wrong parameter name.** TD parameter names change across versions. Run the discovery script (SKILL.md Step 0) or use `get_td_node_parameters` to discover exact names for your TD version. Never trust online docs or this skill's tables — always verify.
-2. **Parameter is read-only.** Some parameters are computed/locked.
-3. **Wrong value type.** Menu parameters need integer index or exact string label.
-4. **Parameter has an expression.** If `node.par.X.expr` is set, `.val` is ignored. Clear the expression first.
-
-**Discovery-based approach (preferred):**
+Note: choptoTOP has NO input connectors. Use par.chop reference instead:
 ```python
-execute_python_script(script="""
-n = op('/project1/mynode')
-pars = [(p.name, type(p.val).__name__, p.val) for p in n.pars()
-        if any(k in p.name.lower() for k in ['color', 'size', 'dat', 'font', 'alpha'])]
-result = pars
-""")
+spec_tex.par.chop = resample_node  # correct
+# NOT: resample.outputConnectors[0].connect(spec_tex.inputConnectors[0])
 ```
 
-**Safe parameter setter pattern:**
+### Feedback loops
+
+Never create A -> B -> A directly. Use a Feedback TOP:
 ```python
-def safe_par(node, name, value):
-    p = getattr(node.par, name, None)
-    if p is not None:
-        p.val = value
-        return True
-    return False  # param doesn't exist in this TD version
+fb = root.create(feedbackTOP, 'fb')
+fb.par.top = comp.path          # reference only, no wire to fb input
+fb.outputConnectors[0].connect(next_node)
 ```
+"Cook dependency loop detected" warning on the chain is expected and correct.
 
-### Common parameter name gotchas
+## 8. GLSL Issues
 
-| What you expect | Actual name | Notes |
-|----------------|-------------|-------|
-| `width` | `resolutionw` | TOP resolution width |
-| `height` | `resolutionh` | TOP resolution height |
-| `filepath` | `file` | File path parameter |
-| `color` | `colorr`, `colorg`, `colorb`, `colora` | Separate RGBA components |
-| `position_x` | `tx` | Translate X |
-| `rotation` | `rz` | Rotate Z (2D rotation) |
-| `scale` | `sx`, `sy` | Separate X/Y scale |
-| `blend_mode` | `operand` | Composite TOP blend mode (integer) |
-| `opacity` | `opacity` | On Level TOP (this one is correct!) |
+### Shader compilation errors are silent
 
-### Composite TOP operand values
+GLSL TOP shows a yellow warning in the UI but node.errors() may return empty.
+Check node.warnings() too. Create an Info DAT pointed at the GLSL TOP for
+full compiler output.
 
-| Mode | Index |
-|------|-------|
-| Over | 0 |
-| Under | 1 |
-| Inside | 2 |
-| Add | 3 |
-| Subtract | 4 |
-| Difference | 5 |
-| Multiply | 18 |
-| Screen | 27 |
-| Maximum | 13 |
-| Minimum | 14 |
-| Average | 28 |
+### TD GLSL specifics
 
-## Connection/Wiring Issues
+- Uses GLSL 4.60 (Vulkan backend). GLSL 3.30 and earlier removed.
+- UV coordinates: vUV.st (not gl_FragCoord)
+- Input textures: sTD2DInputs[0]
+- Output: layout(location = 0) out vec4 fragColor
+- macOS CRITICAL: Always wrap output with TDOutputSwizzle(color)
+- No built-in time uniform. Pass time via GLSL TOP Values page or Constant TOP.
 
-### Connections not working
+## 9. Recording Issues
 
-**Causes:**
-1. **Cross-family wiring.** TOPs can only connect to TOPs, CHOPs to CHOPs, etc. Use converter operators to bridge families.
-2. **Wrong connector index.** Most operators have one output connector (index 0). Multi-output operators may need index 1, 2, etc.
-3. **Node path wrong.** Verify paths are absolute and correctly spelled.
+### H.264/H.265/AV1 requires Commercial license
 
-**Verify connections:**
+Use Apple ProRes on macOS (hardware accelerated, not license-restricted):
 ```python
-execute_python_script(script="""
-node = op('/project1/level1')
-result = {
-    'inputs': [i.path if i else None for i in node.inputs],
-    'outputs': [o.path if o else None for o in node.outputs]
-}
-""")
+rec.par.videocodec = 'prores'  # Preferred on macOS — lossless, Non-Commercial OK
+# rec.par.videocodec = 'mjpa'  # Fallback — lossy, works everywhere
 ```
 
-### Feedback loops causing errors
+### MovieFileOut has no .record() method
 
-**Symptom:** "Circular dependency" or infinite cook loop.
-
-**Fix:** Always use a Feedback TOP (or a Null TOP with a one-frame delay) to break the loop:
-```
-A -> B -> Feedback(references B) -> A
-```
-Never create A -> B -> A directly.
-
-## Performance Issues
-
-### Low FPS / choppy output
-
-**Common causes and fixes:**
-
-1. **Resolution too high.** Start at 1920x1080, only go higher if GPU handles it.
-2. **Too many operators.** Each operator has GPU/CPU overhead. Consolidate where possible.
-3. **Expensive shader.** GLSL TOPs with complex math per-pixel drain GPU. Profile with TD's Performance Monitor (F2).
-4. **No GPU instancing.** Rendering 1000 separate geometry objects is much slower than 1 instanced geometry.
-5. **Unnecessary cooks.** Operators that don't change frame-to-frame still recook if inputs change. Use Null TOPs to cache stable results.
-6. **Large texture transfers.** TOP to CHOP and CHOP to TOP involve GPU-CPU memory transfers. Minimize these.
-
-**Performance Monitor:**
+Use the toggle parameter:
 ```python
-execute_python_script(script="td.performanceMonitor = True")
-# After testing:
-execute_python_script(script="td.performanceMonitor = False")
+rec.par.record = True   # start
+rec.par.record = False  # stop
 ```
 
-### Memory growing over time
+### All exported frames identical
 
-**Causes:**
-- Cache TOPs with high `length` value
-- Feedback loops without brightness decay (values accumulate)
-- Table DATs growing without clearing
-- Movie File In loading many unique frames
-
-**Fix:** Always add slight decay in feedback loops (Level TOP with `opacity=0.98` or multiply blend). Clear tables periodically.
-
-## Export / Recording Issues
-
-### Movie File Out not recording
-
-**Checklist:**
-1. Is the `record` parameter toggled on? `update_td_node_parameters(properties={"record": true})`
-2. Is an input connected? The Movie File Out needs a TOP input.
-3. Is the output path valid and writable? Check `file` parameter.
-4. Is the codec available? H.264 (type 4) is most reliable.
-
-### Exported video is black
-
-**Causes:**
-1. The TOP chain output is all black (brightness too low).
-2. The input TOP has errors (check with `get_td_node_errors`).
-3. Resolution mismatch — the output may be wrong resolution.
-
-**Debug:** Check the input TOP's actual pixel values:
-```python
-execute_python_script(script="""
-import numpy as np
-top = op('/project1/out')
-arr = top.numpyArray(delayed=True)
-result = {'mean': float(arr.mean()), 'max': float(arr.max()), 'shape': list(arr.shape)}
-""")
-```
-
-### .tox export losing connections
-
-**Note:** When saving a component as .tox, only the component and its internal children are saved. External connections (wires to operators outside the component) are lost. Design self-contained components.
-
-## Python Scripting Issues
-
-### execute_python_script returns empty result
-
-**Causes:**
-1. The script used `exec()` semantics (multi-line) but didn't set `result`.
-2. The last expression has no return value (e.g., `print()` returns None).
-
-**Fix:** Explicitly set `result`:
-```python
-execute_python_script(script="""
-nodes = op('/project1').findChildren(type=TOP)
-result = len(nodes)  # explicitly set return value
-""")
-```
-
-### Script errors not clear
-
-**Check stderr in the response.** The MCP server captures both stdout and stderr from script execution. Error tracebacks appear in stderr.
-
-### Module not found in TD Python
-
-**Cause:** TD's Python environment may not have the module. TD bundles numpy, scipy, opencv, Pillow, and requests. Other packages need manual installation.
-
-**Check available packages:**
-```python
-execute_python_script(script="""
-import sys
-result = [p for p in sys.path]
-""")
-```
-
-## Common Workflow Pitfalls
-
-### Building before verifying connection
-
-Always call `get_td_info` first. If TD isn't running or the WebServer DAT isn't loaded, all subsequent tool calls will fail.
-
-### Not checking errors after building
-
-Always call `get_td_node_errors(nodePath="/project1")` after creating and wiring a network. Broken connections and missing references are silent until you check.
-
-### Creating too many operators in one go
-
-When building complex networks, create in logical groups:
-1. Create all operators in a section
-2. Wire that section
-3. Verify with `get_td_node_errors`
-4. Move to the next section
-
-Don't create 50 operators, wire them all, then discover something was wrong 30 operators ago.
-
-### Parameter expressions vs static values
-
-If you set `node.par.X.val = 5` but there's an expression on that parameter (`node.par.X.expr`), the expression wins. To use a static value, clear the expression first:
-```python
-execute_python_script(script="""
-op('/project1/noise1').par.seed.expr = ''  # clear expression
-op('/project1/noise1').par.seed.val = 42   # now static value works
-""")
-```
-
-### Forgetting to start audio playback
-
-Audio File In CHOP won't produce data unless `play` is True and a valid `file` is set:
-```
-update_td_node_parameters(nodePath="/project1/audio_in",
-    properties={"file": "/path/to/music.wav", "play": true})
-```
-
-### GLSL shader compilation errors
-
-If a GLSL TOP shows errors after setting shader code:
-1. Check the shader code in the Text DAT for syntax errors
-2. Ensure the GLSL version is compatible (TD uses GLSL 3.30+)
-3. Input sampler name must be `sTD2DInputs[0]` (not custom names)
-4. Output must use `layout(location = 0) out vec4 fragColor`
-5. UV coordinates come from `vUV.st` (not `gl_FragCoord`)
+TOP.save() captures same frame when called rapidly. Use MovieFileOut for
+real-time recording. Set project.realTime = False for frame-accurate output.
diff --git a/skills/creative/touchdesigner/scripts/custom_api_handler.py b/skills/creative/touchdesigner/scripts/custom_api_handler.py
deleted file mode 100644
index fd3772a87df..00000000000
--- a/skills/creative/touchdesigner/scripts/custom_api_handler.py
+++ /dev/null
@@ -1,140 +0,0 @@
-"""
-Custom API Handler for TouchDesigner WebServer DAT
-===================================================
-Use this when mcp_webserver_base.tox fails to load its modules
-(common — the .tox relies on relative paths to a modules/ folder
-that often break during import).
-
-Paste into TD Textport or run via exec(open('...').read()):
-  Creates a WebServer DAT + Text DAT callback handler on port 9981.
-  Implements the core endpoints the MCP server expects.
-
-After running, test with:
-  curl http://127.0.0.1:9981/api/td/server/td
-"""
-
-root = op('/project1')
-
-# Remove broken webserver if present
-old = op('/project1/mcp_webserver_base')
-if old and old.valid:
-    old.destroy()
-
-# Create WebServer DAT
-ws = root.create(webserverDAT, 'api_server')
-ws.par.port = 9981
-ws.par.active = True
-ws.nodeX = -800; ws.nodeY = 500
-
-# Create callback handler
-cb = root.create(textDAT, 'api_handler')
-cb.nodeX = -800; cb.nodeY = 400
-cb.text = r'''
-import json, traceback, io, sys
-
-def onHTTPRequest(webServerDAT, request, response):
-    uri = request.get('uri', '')
-    method = request.get('method', 'GET')
-    response['statusCode'] = 200
-    response['statusReason'] = 'OK'
-    response['headers'] = {'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*'}
-
-    try:
-        # TD sends POST body as bytes in request['data']
-        raw = request.get('data', request.get('body', ''))
-        if isinstance(raw, bytes):
-            raw = raw.decode('utf-8')
-        body = {}
-        if raw and isinstance(raw, str) and raw.strip():
-            body = json.loads(raw)
-        pars = request.get('pars', {})
-
-        if uri == '/api/td/server/td':
-            response['data'] = json.dumps({
-                'version': str(app.version),
-                'osName': sys.platform,
-                'apiVersion': '1.4.3',
-                'product': 'TouchDesigner'
-            })
-
-        elif uri == '/api/td/server/exec':
-            script = body.get('script', '')
-            old_stdout = sys.stdout
-            sys.stdout = buf = io.StringIO()
-            result_val = None
-            err_text = ''
-            try:
-                globs = {'op': op, 'ops': ops, 'me': webServerDAT, 'parent': parent,
-                         'project': project, 'td': td, 'result': None,
-                         'app': app, 'absTime': absTime}
-                lines = script.strip().split('\n')
-                if len(lines) == 1:
-                    try:
-                        result_val = eval(script, globs)
-                    except SyntaxError:
-                        exec(script, globs)
-                        result_val = globs.get('result')
-                else:
-                    exec(script, globs)
-                    result_val = globs.get('result')
-            except Exception as e:
-                err_text = traceback.format_exc()
-            finally:
-                captured = buf.getvalue()
-                sys.stdout = old_stdout
-            response['data'] = json.dumps({
-                'result': _serialize(result_val),
-                'stdout': captured,
-                'stderr': err_text
-            })
-
-        elif uri == '/api/nodes':
-            pp = pars.get('parentPath', ['/project1'])[0]
-            p = op(pp)
-            nodes = []
-            if p:
-                for c in p.children:
-                    nodes.append({'name': c.name, 'path': c.path,
-                                  'opType': c.OPType, 'family': c.family})
-            response['data'] = json.dumps({'data': nodes})
-
-        elif uri == '/api/nodes/errors':
-            np = pars.get('nodePath', ['/project1'])[0]
-            n = op(np)
-            errors = []
-            if n:
-                def _collect(node, depth=0):
-                    if depth > 10: return
-                    e = node.errors()
-                    if e:
-                        errors.append({'nodePath': node.path, 'nodeName': node.name,
-                                       'opType': node.OPType, 'errors': str(e)})
-                    if hasattr(node, 'children'):
-                        for c in node.children: _collect(c, depth+1)
-                _collect(n)
-            response['data'] = json.dumps({'data': errors, 'hasErrors': len(errors)>0,
-                                            'errorCount': len(errors)})
-
-        else:
-            response['statusCode'] = 404
-            response['data'] = json.dumps({'error': 'Unknown: ' + uri})
-
-    except Exception as e:
-        response['statusCode'] = 500
-        response['data'] = json.dumps({'error': str(e), 'trace': traceback.format_exc()})
-
-    return response
-
-def _serialize(v):
-    if v is None: return None
-    if isinstance(v, (int, float, bool, str)): return v
-    if isinstance(v, (list, tuple)): return [_serialize(i) for i in v]
-    if isinstance(v, dict): return {str(k): _serialize(vv) for k, vv in v.items()}
-    return str(v)
-'''
-
-# Point webserver to callback
-ws.par.callbacks = cb.path
-
-print("Custom API server created on port 9981")
-print("Test: curl http://127.0.0.1:9981/api/td/server/td")
diff --git a/skills/creative/touchdesigner/scripts/setup.sh b/skills/creative/touchdesigner/scripts/setup.sh
index ce8b56870c3..f6bab2f5073 100644
--- a/skills/creative/touchdesigner/scripts/setup.sh
+++ b/skills/creative/touchdesigner/scripts/setup.sh
@@ -1,152 +1,114 @@
 #!/usr/bin/env bash
-# TouchDesigner MCP Setup Verification Script
-# Checks all prerequisites and guides configuration
-
+# setup.sh — Automated setup for twozero MCP plugin for TouchDesigner
+# Idempotent: safe to run multiple times.
 set -euo pipefail
 
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m'
+GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'; NC='\033[0m'
+OK="${GREEN}✔${NC}"; FAIL="${RED}✘${NC}"; WARN="${YELLOW}⚠${NC}"
 
-pass() { echo -e "  ${GREEN}✓${NC} $1"; }
-fail() { echo -e "  ${RED}✗${NC} $1"; }
-warn() { echo -e "  ${YELLOW}!${NC} $1"; }
-info() { echo -e "  ${BLUE}→${NC} $1"; }
+TWOZERO_URL="https://www.404zero.com/pisang/twozero.tox"
+TOX_PATH="$HOME/Downloads/twozero.tox"
+HERMES_CFG="$HOME/.hermes/config.yaml"
+MCP_PORT=40404
+MCP_ENDPOINT="http://localhost:${MCP_PORT}/mcp"
 
-echo ""
-echo "TouchDesigner MCP Setup Check"
-echo "=============================="
-echo ""
+manual_steps=()
 
-ERRORS=0
+echo -e "\n${CYAN}═══ twozero MCP for TouchDesigner — Setup ═══${NC}\n"
 
-# 1. Check Node.js
-echo "1. Node.js"
-if command -v node &>/dev/null; then
-    NODE_VER=$(node --version 2>/dev/null || echo "unknown")
-    MAJOR=$(echo "$NODE_VER" | sed 's/^v//' | cut -d. -f1)
-    if [ "$MAJOR" -ge 18 ] 2>/dev/null; then
-        pass "Node.js $NODE_VER (>= 18 required)"
+# ── 1. Check if TouchDesigner is running ──
+if pgrep -if "TouchDesigner" >/dev/null 2>&1; then
+    echo -e " ${OK} TouchDesigner is running"
+    td_running=true
+else
+    echo -e " ${WARN} TouchDesigner is not running"
+    td_running=false
+fi
+
+# ── 2. Ensure twozero.tox exists ──
+if [[ -f "$TOX_PATH" ]]; then
+    echo -e " ${OK} twozero.tox already exists at ${TOX_PATH}"
+else
+    echo -e " ${WARN} twozero.tox not found — downloading..."
+    if curl -fSL -o "$TOX_PATH" "$TWOZERO_URL" 2>/dev/null; then
+        echo -e " ${OK} Downloaded twozero.tox to ${TOX_PATH}"
     else
-        fail "Node.js $NODE_VER (>= 18 required, please upgrade)"
-        ERRORS=$((ERRORS + 1))
+        echo -e " ${FAIL} Failed to download twozero.tox from ${TWOZERO_URL}"
+        echo "       Please download manually and place at ${TOX_PATH}"
+        manual_steps+=("Download twozero.tox from ${TWOZERO_URL} to ${TOX_PATH}")
+    fi
+fi
+
+# ── 3. Ensure Hermes config has twozero_td MCP entry ──
+if [[ ! -f "$HERMES_CFG" ]]; then
+    echo -e " ${FAIL} Hermes config not found at ${HERMES_CFG}"
+    manual_steps+=("Create ${HERMES_CFG} with twozero_td MCP server entry")
+elif grep -q 'twozero_td' "$HERMES_CFG" 2>/dev/null; then
+    echo -e " ${OK} twozero_td MCP entry exists in Hermes config"
+else
+    echo -e " ${WARN} Adding twozero_td MCP entry to Hermes config..."
+    python3 -c "
+import yaml, sys, copy
+
+cfg_path = '$HERMES_CFG'
+with open(cfg_path, 'r') as f:
+    cfg = yaml.safe_load(f) or {}
+
+if 'mcp_servers' not in cfg:
+    cfg['mcp_servers'] = {}
+
+if 'twozero_td' not in cfg['mcp_servers']:
+    cfg['mcp_servers']['twozero_td'] = {
+        'url': '${MCP_ENDPOINT}',
+        'timeout': 120,
+        'connect_timeout': 60
+    }
+    with open(cfg_path, 'w') as f:
+        yaml.dump(cfg, f, default_flow_style=False, sort_keys=False)
+    print('added')
+else:
+    print('exists')
+" 2>/dev/null && echo -e " ${OK} twozero_td MCP entry added to config" \
+              || { echo -e " ${FAIL} Could not update config (is PyYAML installed?)"; \
+                   manual_steps+=("Add twozero_td MCP entry to ${HERMES_CFG} manually"); }
+    manual_steps+=("Restart Hermes session to pick up config change")
+fi
+
+# ── 4. Test if MCP port is responding ──
+if nc -z 127.0.0.1 "$MCP_PORT" 2>/dev/null; then
+    echo -e " ${OK} Port ${MCP_PORT} is open"
+
+    # ── 5. Verify MCP endpoint responds ──
+    resp=$(curl -s --max-time 3 "$MCP_ENDPOINT" 2>/dev/null || true)
+    if [[ -n "$resp" ]]; then
+        echo -e " ${OK} MCP endpoint responded at ${MCP_ENDPOINT}"
+    else
+        echo -e " ${WARN} Port open but MCP endpoint returned empty response"
+        manual_steps+=("Verify MCP is enabled in twozero settings")
     fi
 else
-    fail "Node.js not found"
-    info "Install: https://nodejs.org/ or 'brew install node'"
-    ERRORS=$((ERRORS + 1))
-fi
-
-# 2. Check npm/npx
-echo "2. npm/npx"
-if command -v npx &>/dev/null; then
-    NPX_VER=$(npx --version 2>/dev/null || echo "unknown")
-    pass "npx $NPX_VER"
-else
-    fail "npx not found (usually comes with Node.js)"
-    ERRORS=$((ERRORS + 1))
-fi
-
-# 3. Check MCP Python package
-echo "3. MCP Python package"
-HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
-VENV_PYTHON=""
-
-# Try to find the Hermes venv Python
-if [ -f "$HERMES_HOME/hermes-agent/.venv/bin/python" ]; then
-    VENV_PYTHON="$HERMES_HOME/hermes-agent/.venv/bin/python"
-elif [ -f "$HERMES_HOME/hermes-agent/venv/bin/python" ]; then
-    VENV_PYTHON="$HERMES_HOME/hermes-agent/venv/bin/python"
-fi
-
-if [ -n "$VENV_PYTHON" ]; then
-    if $VENV_PYTHON -c "import mcp" 2>/dev/null; then
-        MCP_VER=$($VENV_PYTHON -c "import importlib.metadata; print(importlib.metadata.version('mcp'))" 2>/dev/null || echo "installed")
-        pass "mcp package ($MCP_VER) in Hermes venv"
+    echo -e " ${WARN} Port ${MCP_PORT} is not open"
+    if [[ "$td_running" == true ]]; then
+        manual_steps+=("In TD: drag twozero.tox into network editor → click Install")
+        manual_steps+=("Enable MCP: twozero icon → Settings → mcp → 'auto start MCP' → Yes")
     else
-        fail "mcp package not installed in Hermes venv"
-        info "Install: $VENV_PYTHON -m pip install mcp"
-        ERRORS=$((ERRORS + 1))
+        manual_steps+=("Launch TouchDesigner")
+        manual_steps+=("Drag twozero.tox into the TD network editor and click Install")
+        manual_steps+=("Enable MCP: twozero icon → Settings → mcp → 'auto start MCP' → Yes")
     fi
+fi
+
+# ── Status Report ──
+echo -e "\n${CYAN}═══ Status Report ═══${NC}\n"
+
+if [[ ${#manual_steps[@]} -eq 0 ]]; then
+    echo -e " ${OK} ${GREEN}Fully configured! twozero MCP is ready to use.${NC}\n"
+    exit 0
 else
-    warn "Could not find Hermes venv — check mcp package manually"
-fi
-
-# 4. Check TouchDesigner
-echo "4. TouchDesigner"
-TD_FOUND=false
-
-# macOS
-if [ -d "/Applications/TouchDesigner.app" ]; then
-    TD_FOUND=true
-    pass "TouchDesigner found at /Applications/TouchDesigner.app"
-fi
-
-# Linux (common install locations)
-if command -v TouchDesigner &>/dev/null; then
-    TD_FOUND=true
-    pass "TouchDesigner found in PATH"
-fi
-
-if [ -d "$HOME/TouchDesigner" ]; then
-    TD_FOUND=true
-    pass "TouchDesigner found at ~/TouchDesigner"
-fi
-
-if [ "$TD_FOUND" = false ]; then
-    warn "TouchDesigner not detected (may be installed elsewhere)"
-    info "Download from: https://derivative.ca/download"
-    info "Free Non-Commercial license available"
-fi
-
-# 5. Check TD WebServer DAT reachability
-echo "5. TouchDesigner WebServer DAT"
-TD_URL="${TD_API_URL:-http://127.0.0.1:9981}"
-if command -v curl &>/dev/null; then
-    HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 "$TD_URL/api/td/server/td" 2>/dev/null || echo "000")
-    if [ "$HTTP_CODE" = "200" ]; then
-        TD_INFO=$(curl -s --connect-timeout 3 "$TD_URL/api/td/server/td" 2>/dev/null || echo "{}")
-        pass "TD WebServer DAT responding at $TD_URL"
-        info "Response: $TD_INFO"
-    elif [ "$HTTP_CODE" = "000" ]; then
-        warn "Cannot reach TD WebServer DAT at $TD_URL"
-        info "Make sure TouchDesigner is running with mcp_webserver_base.tox imported"
-    else
-        warn "TD WebServer DAT returned HTTP $HTTP_CODE at $TD_URL"
-    fi
-else
-    warn "curl not found — cannot test TD connection"
-fi
-
-# 6. Check Hermes config
-echo "6. Hermes MCP config"
-CONFIG_FILE="$HERMES_HOME/config.yaml"
-if [ -f "$CONFIG_FILE" ]; then
-    if grep -q "touchdesigner" "$CONFIG_FILE" 2>/dev/null; then
-        pass "TouchDesigner MCP server configured in config.yaml"
-    else
-        warn "No 'touchdesigner' entry found in mcp_servers config"
-        info "Add a touchdesigner entry under mcp_servers: in $CONFIG_FILE"
-        info "See references/mcp-tools.md for the configuration block"
-    fi
-else
-    warn "No Hermes config.yaml found at $CONFIG_FILE"
-fi
-
-# Summary
-echo ""
-echo "=============================="
-if [ $ERRORS -eq 0 ]; then
-    echo -e "${GREEN}All critical checks passed!${NC}"
+    echo -e " ${WARN} ${YELLOW}Manual steps remaining:${NC}\n"
+    for i in "${!manual_steps[@]}"; do
+        echo -e "   $((i+1)). ${manual_steps[$i]}"
+    done
     echo ""
-    echo "Next steps:"
-    echo "  1. Open TouchDesigner and import mcp_webserver_base.tox"
-    echo "  2. Add the MCP server config to Hermes (see references/mcp-tools.md)"
-    echo "  3. Restart Hermes and test: 'Get TouchDesigner server info'"
-else
-    echo -e "${RED}$ERRORS critical issue(s) found.${NC}"
-    echo "Fix the issues above, then re-run this script."
+    exit 1
 fi
-echo ""

From 6d2fe1d6249122a3198447118d412eb20726515d Mon Sep 17 00:00:00 2001
From: kshitijk4poor <kshitijk4poor@gmail.com>
Date: Fri, 17 Apr 2026 22:12:26 +0530
Subject: [PATCH 077/143] feat: rename touchdesigner -> touchdesigner-mcp, move
 to optional-skills/

- Rename skill to touchdesigner-mcp (matches blender-mcp convention)
- Move from skills/creative/ to optional-skills/creative/
- Fix duplicate pitfall numbering (#3 appeared twice)
- Update SKILL.md cross-references for renumbered pitfalls
- Update setup.sh path for new directory location
---
 .../creative/touchdesigner-mcp}/SKILL.md      |  14 +-
 .../references/mcp-tools.md                   |   0
 .../references/network-patterns.md            |   0
 .../references/operators.md                   |   0
 .../touchdesigner-mcp/references/pitfalls.md  | 508 ++++++++++++++++++
 .../references/python-api.md                  |   0
 .../references/troubleshooting.md             |   0
 .../touchdesigner-mcp}/scripts/setup.sh       |   0
 8 files changed, 515 insertions(+), 7 deletions(-)
 rename {skills/creative/touchdesigner => optional-skills/creative/touchdesigner-mcp}/SKILL.md (97%)
 rename {skills/creative/touchdesigner => optional-skills/creative/touchdesigner-mcp}/references/mcp-tools.md (100%)
 rename {skills/creative/touchdesigner => optional-skills/creative/touchdesigner-mcp}/references/network-patterns.md (100%)
 rename {skills/creative/touchdesigner => optional-skills/creative/touchdesigner-mcp}/references/operators.md (100%)
 create mode 100644 optional-skills/creative/touchdesigner-mcp/references/pitfalls.md
 rename {skills/creative/touchdesigner => optional-skills/creative/touchdesigner-mcp}/references/python-api.md (100%)
 rename {skills/creative/touchdesigner => optional-skills/creative/touchdesigner-mcp}/references/troubleshooting.md (100%)
 rename {skills/creative/touchdesigner => optional-skills/creative/touchdesigner-mcp}/scripts/setup.sh (100%)

diff --git a/skills/creative/touchdesigner/SKILL.md b/optional-skills/creative/touchdesigner-mcp/SKILL.md
similarity index 97%
rename from skills/creative/touchdesigner/SKILL.md
rename to optional-skills/creative/touchdesigner-mcp/SKILL.md
index 88fc79b2e74..2df25117f50 100644
--- a/skills/creative/touchdesigner/SKILL.md
+++ b/optional-skills/creative/touchdesigner-mcp/SKILL.md
@@ -1,8 +1,8 @@
 ---
-name: touchdesigner
+name: touchdesigner-mcp
 description: "Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools."
 version: 1.0.0
-author: Hermes Agent
+author: kshitijk4poor
 license: MIT
 metadata:
   hermes:
@@ -36,7 +36,7 @@ Hub health check: `GET http://localhost:40404/mcp` returns JSON with instance PI
 Run the setup script to handle everything:
 
 ```bash
-bash ~/.hermes/skills/creative/touchdesigner/scripts/setup.sh
+bash ~/.hermes/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
 ```
 
 The script will:
@@ -78,7 +78,7 @@ No temp nodes, no cleanup. This replaces the old discovery dance entirely.
 
 ### Step 1: Clean + Build
 
-**IMPORTANT: Split cleanup and creation into SEPARATE MCP calls.** Destroying and recreating same-named nodes in one `td_execute_python` script causes "Invalid OP object" errors. See pitfalls #10b.
+**IMPORTANT: Split cleanup and creation into SEPARATE MCP calls.** Destroying and recreating same-named nodes in one `td_execute_python` script causes "Invalid OP object" errors. See pitfalls #11b.
 
 Use `td_create_operator` for each node (handles viewport positioning automatically):
 
@@ -255,9 +255,9 @@ Extract frames: `ffmpeg -i /tmp/output.mov -vframes 120 /tmp/frames/frame_%06d.p
 
 ### Before Recording: Checklist
 
-1. **Verify FPS > 0** via `td_get_perf`. If FPS=0 the recording will be empty. See pitfalls #37-38.
-2. **Verify shader output is not black** via `td_get_screenshot`. Black output = shader error or missing input. See pitfalls #7, #39.
-3. **If recording with audio:** cue audio to start first, then delay recording by 3 frames. See pitfalls #18.
+1. **Verify FPS > 0** via `td_get_perf`. If FPS=0 the recording will be empty. See pitfalls #38-39.
+2. **Verify shader output is not black** via `td_get_screenshot`. Black output = shader error or missing input. See pitfalls #8, #40.
+3. **If recording with audio:** cue audio to start first, then delay recording by 3 frames. See pitfalls #19.
 4. **Set output path before starting record** — setting both in the same script can race.
 
 ## Audio-Reactive GLSL (Proven Recipe)
diff --git a/skills/creative/touchdesigner/references/mcp-tools.md b/optional-skills/creative/touchdesigner-mcp/references/mcp-tools.md
similarity index 100%
rename from skills/creative/touchdesigner/references/mcp-tools.md
rename to optional-skills/creative/touchdesigner-mcp/references/mcp-tools.md
diff --git a/skills/creative/touchdesigner/references/network-patterns.md b/optional-skills/creative/touchdesigner-mcp/references/network-patterns.md
similarity index 100%
rename from skills/creative/touchdesigner/references/network-patterns.md
rename to optional-skills/creative/touchdesigner-mcp/references/network-patterns.md
diff --git a/skills/creative/touchdesigner/references/operators.md b/optional-skills/creative/touchdesigner-mcp/references/operators.md
similarity index 100%
rename from skills/creative/touchdesigner/references/operators.md
rename to optional-skills/creative/touchdesigner-mcp/references/operators.md
diff --git a/optional-skills/creative/touchdesigner-mcp/references/pitfalls.md b/optional-skills/creative/touchdesigner-mcp/references/pitfalls.md
new file mode 100644
index 00000000000..33c9b5f4d87
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/pitfalls.md
@@ -0,0 +1,508 @@
+# TouchDesigner MCP — Pitfalls & Lessons Learned
+
+Hard-won knowledge from real TD sessions. Read this before building anything.
+
+## Parameter Names
+
+### 1. NEVER hardcode parameter names — always discover
+
+Parameter names change between TD versions. What works in one build may not work in another. ALWAYS use td_get_par_info to discover actual names from TD.
+
+The agent's LLM training data contains WRONG parameter names. Do not trust them.
+
+Known historical differences (may vary further — always verify):
+| What docs/training say | Actual in some versions | Notes |
+|---------------|---------------|-------|
+| `dat` | `pixeldat` | GLSL TOP pixel shader DAT |
+| `colora` | `alpha` | Constant TOP alpha |
+| `sizex` / `sizey` | `size` | Blur TOP (single value) |
+| `fontr/g/b/a` | `fontcolorr/g/b/a` | Text TOP font color (r/g/b) |
+| `fontcolora` | `fontalpha` | Text TOP font alpha (NOT `fontcolora`) |
+| `bgcolora` | `bgalpha` | Text TOP bg alpha |
+| `value1name` | `vec0name` | GLSL TOP uniform name |
+
+### 2. twozero td_execute_python response format
+
+When calling `td_execute_python` via twozero MCP, successful responses return `(ok)` followed by FPS/error summary (e.g. `[fps 60.0/60] [0 err/0 warn]`), NOT the raw Python `result` dict. If you're parsing responses programmatically, check for the `(ok)` prefix — don't pattern-match on Python variable names from the script. Use `td_get_operator_info` or separate inspection calls to read back values.
+
+### 3. When using td_set_operator_pars, param names must match exactly
+
+Use td_get_par_info to discover them. The MCP tool validates parameter names and returns clear errors explaining what went wrong, unlike raw Python which crashes the whole script with tdAttributeError and stops execution. Always discover before setting.
+
+### 4. Use `safe_par()` pattern for cross-version compatibility
+
+```python
+def safe_par(node, name, value):
+    p = getattr(node.par, name, None)
+    if p is not None:
+        p.val = value
+        return True
+    return False
+```
+
+### 5. `td.tdAttributeError` crashes the whole script — use defensive access
+
+If you do `node.par.nonexistent = value`, TD raises `tdAttributeError` and stops the entire script. Prevention is better than catching:
+- Use `op()` instead of `opex()` — `op()` returns None on failure, `opex()` raises
+- Use `hasattr(node.par, 'name')` before accessing any parameter
+- Use `getattr(node.par, 'name', None)` with a default
+- Use the `safe_par()` pattern from pitfall #3
+
+```python
+# WRONG — crashes if param doesn't exist:
+node.par.nonexistent = value
+
+# CORRECT — defensive access:
+if hasattr(node.par, 'nonexistent'):
+    node.par.nonexistent = value
+```
+
+### 6. `outputresolution` is a string menu, not an integer
+
+```
+menuNames: ['useinput','eighth','quarter','half','2x','4x','8x','fit','limit','custom','parpanel']
+```
+Always use the string form. Setting `outputresolution = 9` may silently fail.
+```python
+node.par.outputresolution = 'custom'  # correct
+node.par.resolutionw = 1280; node.par.resolutionh = 720
+```
+Discover valid values: `list(node.par.outputresolution.menuNames)`
+
+## GLSL Shaders
+
+### 7. `uTDCurrentTime` does NOT exist in GLSL TOP
+
+There is NO built-in time uniform for GLSL TOPs. GLSL MAT has `uTDGeneral.seconds` but that's NOT available in GLSL TOP context.
+
+**PRIMARY — GLSL TOP Vectors/Values page:**
+```python
+gl.par.value0name = 'uTime'
+gl.par.value0.expr = "absTime.seconds"
+# In GLSL: uniform float uTime;
+```
+
+**FALLBACK — Constant TOP texture (for complex time data):**
+
+CRITICAL: set format to `rgba32float` — default 8-bit clamps to 0-1:
+```python
+t = root.create(constantTOP, 'time_driver')
+t.par.format = 'rgba32float'
+t.par.outputresolution = 'custom'
+t.par.resolutionw = 1; t.par.resolutionh = 1
+t.par.colorr.expr = "absTime.seconds % 1000.0"
+t.outputConnectors[0].connect(glsl.inputConnectors[0])
+```
+
+### 8. GLSL compile errors are silent in the API
+
+The GLSL TOP shows a yellow warning triangle in the UI but `node.errors()` may return empty string. Check `node.warnings()` too, and create an Info DAT pointed at the GLSL TOP to read the actual compiler output.
+
+### 9. TD GLSL uses `vUV.st` not `gl_FragCoord` — and REQUIRES `TDOutputSwizzle()` on macOS
+
+Standard GLSL patterns don't work. TD provides:
+- `vUV.st` — UV coordinates (0-1)
+- `uTDOutputInfo.res.zw` — resolution
+- `sTD2DInputs[0]` — input textures
+- `layout(location = 0) out vec4 fragColor` — output
+
+CRITICAL on macOS: Always wrap output with `TDOutputSwizzle()`:
+```glsl
+fragColor = TDOutputSwizzle(color);
+```
+TD uses GLSL 4.60 (Vulkan backend). GLSL 3.30 and earlier removed.
+
+### 10. Large GLSL shaders — write to temp file
+
+GLSL code with special characters can corrupt JSON payloads. Write the shader to a temp file and load it in TD:
+```python
+# Agent side: write shader to /tmp/shader.glsl via write_file
+# TD side:
+sd = root.create(textDAT, 'shader_code')
+with open('/tmp/shader.glsl', 'r') as f:
+    sd.text = f.read()
+```
+
+## Node Management
+
+### 11. Destroying nodes while iterating `root.children` causes `tdError`
+
+The iterator is invalidated when a child is destroyed. Always snapshot first:
+```python
+kids = list(root.children)  # snapshot
+for child in kids:
+    if child.valid:  # check — earlier destroys may cascade
+        child.destroy()
+```
+
+### 11b. Split cleanup and creation into SEPARATE td_execute_python calls
+
+Creating nodes with the same names you just destroyed in the SAME script causes "Invalid OP object" errors — even with `list()` snapshot. TD's internal references can go stale within one execution context.
+
+**WRONG (single call):**
+```python
+# td_execute_python:
+for c in list(root.children):
+    if c.valid and c.name.startswith('promo_'):
+        c.destroy()
+# ... then create promo_audio, promo_shader etc. in same script → CRASHES
+```
+
+**CORRECT (two separate calls):**
+```python
+# Call 1: td_execute_python — clean only
+for c in list(root.children):
+    if c.valid and c.name.startswith('promo_'):
+        c.destroy()
+
+# Call 2: td_execute_python — build (separate MCP call)
+audio = root.create(audiofileinCHOP, 'promo_audio')
+# ... rest of build
+```
+
+### 12. Feedback TOP: use `top` parameter, NOT direct input wire
+
+The feedbackTOP's `top` parameter references which TOP to delay. Do NOT also wire that TOP directly into the feedback's input — this creates a real cook dependency loop.
+
+Correct setup:
+```python
+fb = root.create(feedbackTOP, 'fb_delay')
+fb.par.top = comp.path          # reference only — no wire to fb input
+fb.outputConnectors[0].connect(xf)  # fb output -> transform -> fade -> comp
+```
+
+The "Cook dependency loop detected" warning on the transform/fade chain is expected.
+
+### 13. GLSL TOP auto-creates companion nodes
+
+Creating a `glslTOP` also creates `name_pixel` (Text DAT), `name_info` (Info DAT), and `name_compute` (Text DAT). These are visible in the network. Don't be alarmed by "extra" nodes.
+
+### 14. The default project root is `/project1`
+
+New TD files start with `/project1` as the main container. System nodes live at `/`, `/ui`, `/sys`, `/local`, `/perform`. Don't create user nodes outside `/project1`.
+
+### 15. Non-Commercial license caps resolution at 1280x1280
+
+Setting `resolutionw=1920` silently clamps to 1280. Always check effective resolution after creation:
+```python
+n.cook(force=True)
+actual = str(n.width) + 'x' + str(n.height)
+```
+
+## Recording & Codecs
+
+### 16. MovieFileOut TOP: H.264/H.265/AV1 requires Commercial license
+
+In Non-Commercial TD, these codecs produce an error. Recommended alternatives:
+- `prores` — Apple ProRes, **best on macOS**, HW accelerated, NOT license-restricted. ~55MB/s at 1280x720 but lossless quality. **Use this as default on macOS.**
+- `cineform` — GoPro Cineform, supports alpha
+- `hap` — GPU-accelerated playback, large files
+- `notchlc` — GPU-accelerated, good quality
+- `mjpa` — Motion JPEG, legacy fallback (lossy, use only if ProRes unavailable)
+
+For image sequences: `rec.par.type = 'imagesequence'`, `rec.par.imagefiletype = 'png'`
+
+### 17. MovieFileOut `.record()` method may not exist
+
+Use the toggle parameter instead:
+```python
+rec.par.record = True   # start recording
+rec.par.record = False  # stop recording
+```
+
+When setting file path and starting recording in the same script, use delayFrames:
+```python
+rec.par.file = '/tmp/new_output.mov'
+run("op('/project1/recorder').par.record = True", delayFrames=2)
+```
+
+### 18. TOP.save() captures same frame when called rapidly
+
+Use MovieFileOut for real-time recording. Set `project.realTime = False` for frame-accurate output.
+
+### 19. AudioFileIn CHOP: cue and recording sequence matters
+
+The recording sequence must be done in exact order, or the recording will be empty, audio will start mid-file, or the file won't be written.
+
+**Proven recording sequence:**
+
+```python
+# Step 1: Stop any existing recording
+rec.par.record = False
+
+# Step 2: Reset audio to beginning
+audio.par.play = False
+audio.par.cue = True
+audio.par.cuepoint = 0      # may need cuepointunit=0 too
+# Verify: audio.par.cue.eval() should be True
+
+# Step 3: Set output file path
+rec.par.file = '/tmp/output.mov'
+
+# Step 4: Release cue + start playing + start recording (with frame delay)
+audio.par.cue = False
+audio.par.play = True
+audio.par.playmode = 2      # Sequential — plays once through
+run("op('/project1/recorder').par.record = True", delayFrames=3)
+```
+
+**Why each step matters:**
+- `rec.par.record = False` first — if a previous recording is active, setting `par.file` may fail silently
+- `audio.par.cue = True` + `cuepoint = 0` — guarantees audio starts from the beginning, otherwise the spectrum may be silent for the first few seconds
+- `delayFrames=3` on the record start — setting `par.file` and `par.record = True` in the same script can race; the file path needs a frame to register before recording starts
+- `playmode = 2` (Sequential) — plays the file once. Use `playmode = 0` (Locked to Timeline) if you want TD's timeline to control position
+
+## TD Python API Patterns
+
+### 20. COMP extension setup: ext0object format is CRITICAL
+
+`ext0object` expects a CONSTANT string (NOT expression mode):
+```python
+comp.par.ext0object = "op('./myExtensionDat').module.MyClassName(me)"
+```
+NEVER set as just the DAT name. NEVER use ParMode.EXPRESSION. ALWAYS ensure the DAT has `par.language='python'`.
+
+### 21. td.Panel is NOT subscriptable — use attribute access
+
+```python
+comp.panel.select      # correct (attribute access, returns float)
+comp.panel['select']   # WRONG — 'td.Panel' object is not subscriptable
+```
+
+### 22. ALWAYS use relative paths in script callbacks
+
+In scriptTOP/CHOP/SOP/DAT callbacks, use paths relative to `scriptOp` or `me`:
+```python
+root = scriptOp.parent().parent()
+dat = root.op('pixel_data')
+```
+NEVER hardcode absolute paths like `op('/project1/myComp/child')` — they break when containers are renamed or copied.
+
+### 23. keyboardinCHOP channel names have 'k' prefix
+
+Channel names are `kup`, `kdown`, `kleft`, `kright`, `ka`, `kb`, etc. — NOT `up`, `down`, `a`, `b`. Always verify with:
+```python
+channels = [c.name for c in op('/project1/keyboard1').chans()]
+```
+
+### 24. expressCHOP cook-only properties — false positive errors
+
+`me.inputVal`, `me.chanIndex`, `me.sampleIndex` work ONLY in cook-context. Calling `par.expr0expr.eval()` from outside always raises an error — this is NOT a real operator error. Ignore these in error scans.
+
+### 25. td.Vertex attributes — use index access not named attributes
+
+In TD 2025.32, `td.Vertex` objects do NOT have `.x`, `.y`, `.z` attributes:
+```python
+# WRONG — crashes:
+vertex.x, vertex.y, vertex.z
+
+# CORRECT — index-based:
+vertex.point.P[0], vertex.point.P[1], vertex.point.P[2]
+# Or for SOP point positions:
+pt = sop.points()[i]
+pos = pt.P    # use P[0], P[1], P[2]
+```
+
+## Audio
+
+### 26. Audio Spectrum CHOP output is weak — boost it
+
+Raw output is very small (0.001-0.05). Use built-in boost: `spectrum.par.highfrequencyboost = 3.0`
+
+If still weak, add Math CHOP in Range mode: `fromrangehi=0.05, torangehi=1.0`
+
+### 27. AudioSpectrum CHOP: timeslice and sample count are the #1 gotcha
+
+AudioSpectrum at 44100Hz with `timeslice=False` outputs the ENTIRE audio file as samples (~24000+). CHOP-to-TOP then exceeds texture resolution max and warns/fails.
+
+**Fix:** Keep `timeslice = True` (default) for real-time per-frame FFT. Set `fftsize` to control bin count (it's a STRING enum: `'256'` not `256`).
+
+If the CHOP-to-TOP still gets too many samples, set `layout = 'rowscropped'` on the choptoTOP.
+
+```python
+spectrum.par.fftsize = '256'      # STRING, not int — enum values
+spectrum.par.timeslice = True     # MUST be True for real-time audio reactivity
+spectex.par.layout = 'rowscropped'  # handles oversized CHOP inputs
+```
+
+**resampleCHOP has NO `numsamples` param.** It uses `rate`, `start`, `end`, `method`. Don't guess — always `td_get_par_info('resampleCHOP')` first.
+
+### 28. CHOP To TOP has NO input connectors — use par.chop reference
+
+```python
+spec_tex = root.create(choptoTOP, 'spectrum_tex')
+spec_tex.par.chop = resample  # correct: parameter reference
+# NOT: resample.outputConnectors[0].connect(spec_tex.inputConnectors[0])  # WRONG
+```
+
+## Workflow
+
+### 29. Always verify after building — errors are silent
+
+Node errors and broken connections produce no output. Always check:
+```python
+for c in list(root.children):
+    e = c.errors()
+    w = c.warnings()
+    if e: print(c.name, 'ERR:', e)
+    if w: print(c.name, 'WARN:', w)
+```
+
+### 30. Window COMP param for display target is `winop`
+
+```python
+win = root.create(windowCOMP, 'display')
+win.par.winop = '/project1/logo_out'
+win.par.winw = 1280; win.par.winh = 720
+win.par.winopen.pulse()
+```
+
+### 31. `sample()` returns frozen pixels in rapid calls
+
+`out.sample(x, y)` returns pixels from a single cook snapshot. Compare samples with 2+ second delays, or use screencapture on the display window.
+
+### 32. Audio-reactive GLSL: dual-layer sync pipeline
+
+For audio-synced visuals, use BOTH layers for maximum effect:
+
+**Layer 1 (TD-side, real-time):** AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
+
+**Layer 2 (Python-side, post-hoc):** scipy FFT on the SAME audio file → per-frame features (rms, bass, mid, hi, beat detection) → drive ASCII brightness, chromatic aberration, beat flashes during the render pass.
+
+Both layers locked to the same audio file = visuals genuinely sync to the beat at two independent stages.
+
+**Key gotcha:** AudioFileIn must be cued (`par.cue=True` → `par.cuepulse.pulse()`) then uncued (`par.cue=False`, `par.play=True`) before recording starts. Otherwise the spectrum is silent for the first few seconds.
+
+### 33. twozero MCP: benchmark and prefer native tools
+
+Benchmarked April 2026: twozero MCP with 36 native tools. The old curl/REST method (port 9981) had zero native tools.
+
+**Always prefer native MCP tools over td_execute_python:**
+- `td_create_operator` over `root.create()` scripts (handles viewport positioning)
+- `td_set_operator_pars` over `node.par.X = Y` scripts (validates param names)
+- `td_get_par_info` over temp-node discovery dance (instant, no cleanup)
+- `td_get_errors` over manual `c.errors()` loops
+- `td_get_focus` for context awareness (no equivalent in old method)
+
+Only fall back to `td_execute_python` for multi-step logic (wiring chains, conditional builds, loops).
+
+### 34. twozero td_execute_python response wrapping
+
+twozero wraps `td_execute_python` responses with status info: `(ok)\n\n[fps 60.0/60] [0 err/0 warn]`. Your Python `result` variable value may not appear verbatim in the response text. If you need to check results programmatically, use `print()` statements in the script — they appear in the response. Don't rely on string-matching the `result` dict.
+
+### 35. Audio-reactive chain: DO NOT use Lag CHOP or Filter CHOP for spectrum smoothing
+
+The Derivative docs and tutorials suggest using Lag CHOP (lag1=0.2, lag2=0.5) to smooth raw FFT output before passing to a shader. **This does NOT work with AudioSpectrum → CHOP to TOP → GLSL.**
+
+What happens: Lag CHOP operates in timeslice mode. A 256-sample spectrum input gets expanded to 1600-2400 samples. The Lag averaging drives all values to near-zero (~1e-06). The CHOP to TOP produces a 2400x2 texture instead of 256x2. The shader receives effectively zero audio data.
+
+**The correct chain is: Spectrum(outlength=256) → Math(gain=10) → CHOPtoTOP → GLSL.** No CHOP smoothing at all. If you need smoothing, do it in the GLSL shader via temporal lerp with a feedback texture.
+
+Verified values with audio playing:
+- Without Lag CHOP: bass bins = 5.0-5.4, mid bins = 1.0-1.7 (strong, usable)
+- With Lag CHOP: ALL bins = 0.000001-0.00004 (dead, zero audio reactivity)
+
+### 36. AudioSpectrum Output Length: set manually to avoid CHOP to TOP overflow
+
+AudioSpectrum in Visualization mode with FFT 8192 outputs 22,050 samples by default (1 per Hz, 0–22050). CHOP to TOP cannot handle this — you get "Number of samples exceeded texture resolution max".
+
+Fix: `spectrum.par.outputmenu = 'setmanually'` and `spectrum.par.outlength = 256`. This gives 256 frequency bins — plenty for visual FFT.
+
+DO NOT set `timeslice = False` as a workaround — that processes the entire audio file at once and produces even more samples.
+
+### 37. GLSL spectrum texture from CHOP to TOP is 256x2 not 256x1
+
+AudioSpectrum outputs 2 channels (stereo: chan1, chan2). CHOP to TOP with `dataformat='r'` creates a 256x2 texture — one row per channel. Sample the first channel at `y=0.25` (center of first row), NOT `y=0.5` (boundary between rows):
+
+```glsl
+float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;  // correct
+float bass = texture(sTD2DInputs[1], vec2(0.05, 0.5)).r;   // WRONG — samples between rows
+```
+
+### 38. FPS=0 doesn't mean ops aren't cooking — check play state
+
+TD can show `fps:0` in `td_get_perf` while ops still cook and `TOP.save()` still produces valid screenshots. The two most common causes:
+
+**a) Project is paused (playbar stopped).** TD's playbar can be toggled with spacebar. The `root` at `/` has no `.playbar` attribute (it's on the perform COMP). The easiest fix is sending a spacebar keypress via `td_input_execute`, though this tool can sometimes error. As a workaround, `TOP.save()` always works regardless of play state — use it to verify rendering is actually happening before spending time debugging FPS.
+
+**b) Audio device CHOP blocking the main thread.** An `audiooutCHOP` with an active audio device can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. Fix: keep the CHOP active but set `volume=0` to prevent the audio driver from blocking. Disabling it entirely (`active=False`) may also work but can prevent downstream audio processing CHOPs from cooking.
+
+Diagnostic sequence when FPS=0:
+1. `td_get_perf` — check if any op has extreme CPU/s
+2. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
+3. Check for blocking CHOPs (audioout, audiodevin, etc.)
+4. Toggle play state (spacebar, or check if absTime.seconds is advancing)
+
+### 39. Recording while FPS=0 produces empty or near-empty files
+
+This is the #1 cause of "I recorded for 30 seconds but got a 2-frame video." If TD's cook loop is stalled (FPS=0 or very low), MovieFileOut has nothing to record. Unlike `TOP.save()` which captures the last cooked frame regardless, MovieFileOut only writes frames that actually cook.
+
+**Always verify FPS before starting a recording:**
+```python
+# Check via td_get_perf first
+# If FPS < 30, do NOT start recording — fix the performance issue first
+# If FPS=0, the playbar is likely paused — see pitfall #37
+```
+
+Common causes of recording empty video:
+- Playbar paused (FPS=0) — see pitfall #37
+- Audio device CHOP blocking the main thread — see pitfall #37b
+- Recording started before audio was cued — audio is silent, GLSL outputs black, MovieFileOut records black frames that look empty
+- `par.file` set in the same script as `par.record = True` — see pitfall #18
+
+### 40. GLSL shader produces black output — test before committing to a long render
+
+New GLSL shaders can fail silently (see pitfall #7). Before recording a long take, always:
+
+1. **Write a minimal test shader first** that just outputs a solid color or pass-through:
+```glsl
+void main() {
+    vec2 uv = vUV.st;
+    fragColor = TDOutputSwizzle(vec4(uv, 0.0, 1.0));
+}
+```
+
+2. **Verify the test renders correctly** via `td_get_screenshot` on the GLSL TOP's output.
+
+3. **Swap in the real shader** and screenshot again immediately. If black, the shader has a compile error or logic issue.
+
+4. **Only then start recording.** A 90-second ProRes recording is ~5GB. Recording black frames wastes disk and time.
+
+Common causes of black GLSL output:
+- Missing `TDOutputSwizzle()` on macOS (pitfall #8)
+- Time uniform not connected — shader uses default 0.0, fractal stays at origin
+- Spectrum texture not connected — audio values all 0.0, driving everything to black
+- Integer division where float division was expected (`1/2 = 0` not `0.5`)
+- `absTime.seconds % 1000.0` rolled over past 1000 and the modulo produces unexpected values
+
+### 41. td_write_dat uses `text` parameter, NOT `content`
+
+The MCP tool `td_write_dat` expects a `text` parameter for full replacement. Passing `content` returns an error: `"Provide either 'text' for full replace, or 'old_text'+'new_text' for patching"`.
+
+If `td_write_dat` fails, fall back to `td_execute_python`:
+```python
+op("/project1/shader_code").text = shader_string
+```
+
+### 42. td_execute_python does NOT return stdout or print() output
+
+Despite what earlier versions of pitfall #33 stated, `print()` and `debug()` output from `td_execute_python` scripts does NOT appear in the MCP response. The response is always just `(ok)` + FPS/error summary. To read values back, use dedicated inspection tools (`td_get_operator_info`, `td_read_dat`, `td_read_chop`) instead of trying to print from within a script.
+
+### 43. td_get_operator_info JSON is appended with `[fps X.X/X]` — breaks json.loads()
+
+The response text from `td_get_operator_info` has `[fps 60.0/60]` appended after the JSON object. This causes `json.loads()` to fail with "Extra data" errors. Strip it before parsing:
+```python
+clean = response_text.rsplit('[fps', 1)[0]
+data = json.loads(clean)
+```
+
+### 44. td_get_screenshot is asynchronous — returns `{"status": "pending"}`
+
+Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file appears later. Wait a few seconds before checking for the file. There is no callback or completion notification — poll the filesystem.
+
+### 45. Recording duration is manual — no auto-stop at audio end
+
+MovieFileOut records until `par.record = False` is set. If audio ends before you stop recording, the file keeps growing with repeated frames. Always stop recording promptly after the audio duration. For precision: set a timer on the agent side matching the audio length, then send `par.record = False`. Trim excess with ffmpeg as a safety net:
+```bash
+ffmpeg -i raw.mov -t 25 -c copy trimmed.mov
+```
\ No newline at end of file
diff --git a/skills/creative/touchdesigner/references/python-api.md b/optional-skills/creative/touchdesigner-mcp/references/python-api.md
similarity index 100%
rename from skills/creative/touchdesigner/references/python-api.md
rename to optional-skills/creative/touchdesigner-mcp/references/python-api.md
diff --git a/skills/creative/touchdesigner/references/troubleshooting.md b/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md
similarity index 100%
rename from skills/creative/touchdesigner/references/troubleshooting.md
rename to optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md
diff --git a/skills/creative/touchdesigner/scripts/setup.sh b/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
similarity index 100%
rename from skills/creative/touchdesigner/scripts/setup.sh
rename to optional-skills/creative/touchdesigner-mcp/scripts/setup.sh

From 11ee87e6057fe2916127bd6595f40398c4cdaa1b Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sat, 18 Apr 2026 14:37:21 -0700
Subject: [PATCH 078/143] chore(attribution): add AUTHOR_MAP entry for
 kshitijk4poor@gmail.com

Covers the non-noreply email used on commit dd3e6424 (rename of the
TouchDesigner skill to touchdesigner-mcp).
---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 94ebef5d345..88d01cc7348 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -48,6 +48,7 @@ AUTHOR_MAP = {
     "35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
     "82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
     "kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
+    "kshitijk4poor@gmail.com": "kshitijk4poor",
     "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
     "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
     "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",

From 6b31e20894b6e1b9b369b970d91df0ffb6ce83ac Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sat, 18 Apr 2026 14:37:26 -0700
Subject: [PATCH 079/143] chore(skills): touchdesigner-mcp follow-ups

- Remove orphan skills/creative/touchdesigner/references/pitfalls.md
  left over from the rename commit (git add-then-edit instead of git mv
  meant the old file never got deleted).
- Honour $HERMES_HOME in setup.sh and SKILL.md setup invocation so
  profile-aware installs work correctly.
- Fix troubleshooting.md config path to use $HERMES_HOME instead of
  hardcoding ~/.hermes/.
- Add touchdesigner-mcp entries to skills-catalog.md and
  optional-skills-catalog.md for parity with blender-mcp/meme-generation.
---
 .../creative/touchdesigner-mcp/SKILL.md       |   2 +-
 .../references/troubleshooting.md             |   2 +-
 .../touchdesigner-mcp/scripts/setup.sh        |   3 +-
 .../touchdesigner/references/pitfalls.md      | 508 ------------------
 .../docs/reference/optional-skills-catalog.md |   1 +
 website/docs/reference/skills-catalog.md      |   1 +
 6 files changed, 6 insertions(+), 511 deletions(-)
 delete mode 100644 skills/creative/touchdesigner/references/pitfalls.md

diff --git a/optional-skills/creative/touchdesigner-mcp/SKILL.md b/optional-skills/creative/touchdesigner-mcp/SKILL.md
index 2df25117f50..d0bd348afc4 100644
--- a/optional-skills/creative/touchdesigner-mcp/SKILL.md
+++ b/optional-skills/creative/touchdesigner-mcp/SKILL.md
@@ -36,7 +36,7 @@ Hub health check: `GET http://localhost:40404/mcp` returns JSON with instance PI
 Run the setup script to handle everything:
 
 ```bash
-bash ~/.hermes/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
+bash "${HERMES_HOME:-$HOME/.hermes}/skills/creative/touchdesigner-mcp/scripts/setup.sh"
 ```
 
 The script will:
diff --git a/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md b/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md
index c9817ebe0f2..b8e201f5c32 100644
--- a/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md
+++ b/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md
@@ -137,7 +137,7 @@ actual = str(n.width) + 'x' + str(n.height)
 
 ### Config location
 
-~/.hermes/config.yaml
+`$HERMES_HOME/config.yaml` (defaults to `~/.hermes/config.yaml` when `HERMES_HOME` is unset)
 
 ### MCP entry format
 
diff --git a/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh b/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
index f6bab2f5073..34d883c1c4a 100644
--- a/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
+++ b/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
@@ -8,7 +8,8 @@ OK="${GREEN}✔${NC}"; FAIL="${RED}✘${NC}"; WARN="${YELLOW}⚠${NC}"
 
 TWOZERO_URL="https://www.404zero.com/pisang/twozero.tox"
 TOX_PATH="$HOME/Downloads/twozero.tox"
-HERMES_CFG="$HOME/.hermes/config.yaml"
+HERMES_HOME_DIR="${HERMES_HOME:-$HOME/.hermes}"
+HERMES_CFG="${HERMES_HOME_DIR}/config.yaml"
 MCP_PORT=40404
 MCP_ENDPOINT="http://localhost:${MCP_PORT}/mcp"
 
diff --git a/skills/creative/touchdesigner/references/pitfalls.md b/skills/creative/touchdesigner/references/pitfalls.md
deleted file mode 100644
index 5883ed72c99..00000000000
--- a/skills/creative/touchdesigner/references/pitfalls.md
+++ /dev/null
@@ -1,508 +0,0 @@
-# TouchDesigner MCP — Pitfalls & Lessons Learned
-
-Hard-won knowledge from real TD sessions. Read this before building anything.
-
-## Parameter Names
-
-### 1. NEVER hardcode parameter names — always discover
-
-Parameter names change between TD versions. What works in one build may not work in another. ALWAYS use td_get_par_info to discover actual names from TD.
-
-The agent's LLM training data contains WRONG parameter names. Do not trust them.
-
-Known historical differences (may vary further — always verify):
-| What docs/training say | Actual in some versions | Notes |
-|---------------|---------------|-------|
-| `dat` | `pixeldat` | GLSL TOP pixel shader DAT |
-| `colora` | `alpha` | Constant TOP alpha |
-| `sizex` / `sizey` | `size` | Blur TOP (single value) |
-| `fontr/g/b/a` | `fontcolorr/g/b/a` | Text TOP font color (r/g/b) |
-| `fontcolora` | `fontalpha` | Text TOP font alpha (NOT `fontcolora`) |
-| `bgcolora` | `bgalpha` | Text TOP bg alpha |
-| `value1name` | `vec0name` | GLSL TOP uniform name |
-
-### 2. twozero td_execute_python response format
-
-When calling `td_execute_python` via twozero MCP, successful responses return `(ok)` followed by FPS/error summary (e.g. `[fps 60.0/60] [0 err/0 warn]`), NOT the raw Python `result` dict. If you're parsing responses programmatically, check for the `(ok)` prefix — don't pattern-match on Python variable names from the script. Use `td_get_operator_info` or separate inspection calls to read back values.
-
-### 3. When using td_set_operator_pars, param names must match exactly
-
-Use td_get_par_info to discover them. The MCP tool validates parameter names and returns clear errors explaining what went wrong, unlike raw Python which crashes the whole script with tdAttributeError and stops execution. Always discover before setting.
-
-### 3. Use `safe_par()` pattern for cross-version compatibility
-
-```python
-def safe_par(node, name, value):
-    p = getattr(node.par, name, None)
-    if p is not None:
-        p.val = value
-        return True
-    return False
-```
-
-### 4. `td.tdAttributeError` crashes the whole script — use defensive access
-
-If you do `node.par.nonexistent = value`, TD raises `tdAttributeError` and stops the entire script. Prevention is better than catching:
-- Use `op()` instead of `opex()` — `op()` returns None on failure, `opex()` raises
-- Use `hasattr(node.par, 'name')` before accessing any parameter
-- Use `getattr(node.par, 'name', None)` with a default
-- Use the `safe_par()` pattern from pitfall #3
-
-```python
-# WRONG — crashes if param doesn't exist:
-node.par.nonexistent = value
-
-# CORRECT — defensive access:
-if hasattr(node.par, 'nonexistent'):
-    node.par.nonexistent = value
-```
-
-### 5. `outputresolution` is a string menu, not an integer
-
-```
-menuNames: ['useinput','eighth','quarter','half','2x','4x','8x','fit','limit','custom','parpanel']
-```
-Always use the string form. Setting `outputresolution = 9` may silently fail.
-```python
-node.par.outputresolution = 'custom'  # correct
-node.par.resolutionw = 1280; node.par.resolutionh = 720
-```
-Discover valid values: `list(node.par.outputresolution.menuNames)`
-
-## GLSL Shaders
-
-### 6. `uTDCurrentTime` does NOT exist in GLSL TOP
-
-There is NO built-in time uniform for GLSL TOPs. GLSL MAT has `uTDGeneral.seconds` but that's NOT available in GLSL TOP context.
-
-**PRIMARY — GLSL TOP Vectors/Values page:**
-```python
-gl.par.value0name = 'uTime'
-gl.par.value0.expr = "absTime.seconds"
-# In GLSL: uniform float uTime;
-```
-
-**FALLBACK — Constant TOP texture (for complex time data):**
-
-CRITICAL: set format to `rgba32float` — default 8-bit clamps to 0-1:
-```python
-t = root.create(constantTOP, 'time_driver')
-t.par.format = 'rgba32float'
-t.par.outputresolution = 'custom'
-t.par.resolutionw = 1; t.par.resolutionh = 1
-t.par.colorr.expr = "absTime.seconds % 1000.0"
-t.outputConnectors[0].connect(glsl.inputConnectors[0])
-```
-
-### 7. GLSL compile errors are silent in the API
-
-The GLSL TOP shows a yellow warning triangle in the UI but `node.errors()` may return empty string. Check `node.warnings()` too, and create an Info DAT pointed at the GLSL TOP to read the actual compiler output.
-
-### 8. TD GLSL uses `vUV.st` not `gl_FragCoord` — and REQUIRES `TDOutputSwizzle()` on macOS
-
-Standard GLSL patterns don't work. TD provides:
-- `vUV.st` — UV coordinates (0-1)
-- `uTDOutputInfo.res.zw` — resolution
-- `sTD2DInputs[0]` — input textures
-- `layout(location = 0) out vec4 fragColor` — output
-
-CRITICAL on macOS: Always wrap output with `TDOutputSwizzle()`:
-```glsl
-fragColor = TDOutputSwizzle(color);
-```
-TD uses GLSL 4.60 (Vulkan backend). GLSL 3.30 and earlier removed.
-
-### 9. Large GLSL shaders — write to temp file
-
-GLSL code with special characters can corrupt JSON payloads. Write the shader to a temp file and load it in TD:
-```python
-# Agent side: write shader to /tmp/shader.glsl via write_file
-# TD side:
-sd = root.create(textDAT, 'shader_code')
-with open('/tmp/shader.glsl', 'r') as f:
-    sd.text = f.read()
-```
-
-## Node Management
-
-### 10. Destroying nodes while iterating `root.children` causes `tdError`
-
-The iterator is invalidated when a child is destroyed. Always snapshot first:
-```python
-kids = list(root.children)  # snapshot
-for child in kids:
-    if child.valid:  # check — earlier destroys may cascade
-        child.destroy()
-```
-
-### 10b. Split cleanup and creation into SEPARATE td_execute_python calls
-
-Creating nodes with the same names you just destroyed in the SAME script causes "Invalid OP object" errors — even with `list()` snapshot. TD's internal references can go stale within one execution context.
-
-**WRONG (single call):**
-```python
-# td_execute_python:
-for c in list(root.children):
-    if c.valid and c.name.startswith('promo_'):
-        c.destroy()
-# ... then create promo_audio, promo_shader etc. in same script → CRASHES
-```
-
-**CORRECT (two separate calls):**
-```python
-# Call 1: td_execute_python — clean only
-for c in list(root.children):
-    if c.valid and c.name.startswith('promo_'):
-        c.destroy()
-
-# Call 2: td_execute_python — build (separate MCP call)
-audio = root.create(audiofileinCHOP, 'promo_audio')
-# ... rest of build
-```
-
-### 11. Feedback TOP: use `top` parameter, NOT direct input wire
-
-The feedbackTOP's `top` parameter references which TOP to delay. Do NOT also wire that TOP directly into the feedback's input — this creates a real cook dependency loop.
-
-Correct setup:
-```python
-fb = root.create(feedbackTOP, 'fb_delay')
-fb.par.top = comp.path          # reference only — no wire to fb input
-fb.outputConnectors[0].connect(xf)  # fb output -> transform -> fade -> comp
-```
-
-The "Cook dependency loop detected" warning on the transform/fade chain is expected.
-
-### 12. GLSL TOP auto-creates companion nodes
-
-Creating a `glslTOP` also creates `name_pixel` (Text DAT), `name_info` (Info DAT), and `name_compute` (Text DAT). These are visible in the network. Don't be alarmed by "extra" nodes.
-
-### 13. The default project root is `/project1`
-
-New TD files start with `/project1` as the main container. System nodes live at `/`, `/ui`, `/sys`, `/local`, `/perform`. Don't create user nodes outside `/project1`.
-
-### 14. Non-Commercial license caps resolution at 1280x1280
-
-Setting `resolutionw=1920` silently clamps to 1280. Always check effective resolution after creation:
-```python
-n.cook(force=True)
-actual = str(n.width) + 'x' + str(n.height)
-```
-
-## Recording & Codecs
-
-### 15. MovieFileOut TOP: H.264/H.265/AV1 requires Commercial license
-
-In Non-Commercial TD, these codecs produce an error. Recommended alternatives:
-- `prores` — Apple ProRes, **best on macOS**, HW accelerated, NOT license-restricted. ~55MB/s at 1280x720 but lossless quality. **Use this as default on macOS.**
-- `cineform` — GoPro Cineform, supports alpha
-- `hap` — GPU-accelerated playback, large files
-- `notchlc` — GPU-accelerated, good quality
-- `mjpa` — Motion JPEG, legacy fallback (lossy, use only if ProRes unavailable)
-
-For image sequences: `rec.par.type = 'imagesequence'`, `rec.par.imagefiletype = 'png'`
-
-### 16. MovieFileOut `.record()` method may not exist
-
-Use the toggle parameter instead:
-```python
-rec.par.record = True   # start recording
-rec.par.record = False  # stop recording
-```
-
-When setting file path and starting recording in the same script, use delayFrames:
-```python
-rec.par.file = '/tmp/new_output.mov'
-run("op('/project1/recorder').par.record = True", delayFrames=2)
-```
-
-### 17. TOP.save() captures same frame when called rapidly
-
-Use MovieFileOut for real-time recording. Set `project.realTime = False` for frame-accurate output.
-
-### 18. AudioFileIn CHOP: cue and recording sequence matters
-
-The recording sequence must be done in exact order, or the recording will be empty, audio will start mid-file, or the file won't be written.
-
-**Proven recording sequence:**
-
-```python
-# Step 1: Stop any existing recording
-rec.par.record = False
-
-# Step 2: Reset audio to beginning
-audio.par.play = False
-audio.par.cue = True
-audio.par.cuepoint = 0      # may need cuepointunit=0 too
-# Verify: audio.par.cue.eval() should be True
-
-# Step 3: Set output file path
-rec.par.file = '/tmp/output.mov'
-
-# Step 4: Release cue + start playing + start recording (with frame delay)
-audio.par.cue = False
-audio.par.play = True
-audio.par.playmode = 2      # Sequential — plays once through
-run("op('/project1/recorder').par.record = True", delayFrames=3)
-```
-
-**Why each step matters:**
-- `rec.par.record = False` first — if a previous recording is active, setting `par.file` may fail silently
-- `audio.par.cue = True` + `cuepoint = 0` — guarantees audio starts from the beginning, otherwise the spectrum may be silent for the first few seconds
-- `delayFrames=3` on the record start — setting `par.file` and `par.record = True` in the same script can race; the file path needs a frame to register before recording starts
-- `playmode = 2` (Sequential) — plays the file once. Use `playmode = 0` (Locked to Timeline) if you want TD's timeline to control position
-
-## TD Python API Patterns
-
-### 19. COMP extension setup: ext0object format is CRITICAL
-
-`ext0object` expects a CONSTANT string (NOT expression mode):
-```python
-comp.par.ext0object = "op('./myExtensionDat').module.MyClassName(me)"
-```
-NEVER set as just the DAT name. NEVER use ParMode.EXPRESSION. ALWAYS ensure the DAT has `par.language='python'`.
-
-### 20. td.Panel is NOT subscriptable — use attribute access
-
-```python
-comp.panel.select      # correct (attribute access, returns float)
-comp.panel['select']   # WRONG — 'td.Panel' object is not subscriptable
-```
-
-### 21. ALWAYS use relative paths in script callbacks
-
-In scriptTOP/CHOP/SOP/DAT callbacks, use paths relative to `scriptOp` or `me`:
-```python
-root = scriptOp.parent().parent()
-dat = root.op('pixel_data')
-```
-NEVER hardcode absolute paths like `op('/project1/myComp/child')` — they break when containers are renamed or copied.
-
-### 22. keyboardinCHOP channel names have 'k' prefix
-
-Channel names are `kup`, `kdown`, `kleft`, `kright`, `ka`, `kb`, etc. — NOT `up`, `down`, `a`, `b`. Always verify with:
-```python
-channels = [c.name for c in op('/project1/keyboard1').chans()]
-```
-
-### 23. expressCHOP cook-only properties — false positive errors
-
-`me.inputVal`, `me.chanIndex`, `me.sampleIndex` work ONLY in cook-context. Calling `par.expr0expr.eval()` from outside always raises an error — this is NOT a real operator error. Ignore these in error scans.
-
-### 24. td.Vertex attributes — use index access not named attributes
-
-In TD 2025.32, `td.Vertex` objects do NOT have `.x`, `.y`, `.z` attributes:
-```python
-# WRONG — crashes:
-vertex.x, vertex.y, vertex.z
-
-# CORRECT — index-based:
-vertex.point.P[0], vertex.point.P[1], vertex.point.P[2]
-# Or for SOP point positions:
-pt = sop.points()[i]
-pos = pt.P    # use P[0], P[1], P[2]
-```
-
-## Audio
-
-### 25. Audio Spectrum CHOP output is weak — boost it
-
-Raw output is very small (0.001-0.05). Use built-in boost: `spectrum.par.highfrequencyboost = 3.0`
-
-If still weak, add Math CHOP in Range mode: `fromrangehi=0.05, torangehi=1.0`
-
-### 26. AudioSpectrum CHOP: timeslice and sample count are the #1 gotcha
-
-AudioSpectrum at 44100Hz with `timeslice=False` outputs the ENTIRE audio file as samples (~24000+). CHOP-to-TOP then exceeds texture resolution max and warns/fails.
-
-**Fix:** Keep `timeslice = True` (default) for real-time per-frame FFT. Set `fftsize` to control bin count (it's a STRING enum: `'256'` not `256`).
-
-If the CHOP-to-TOP still gets too many samples, set `layout = 'rowscropped'` on the choptoTOP.
-
-```python
-spectrum.par.fftsize = '256'      # STRING, not int — enum values
-spectrum.par.timeslice = True     # MUST be True for real-time audio reactivity
-spectex.par.layout = 'rowscropped'  # handles oversized CHOP inputs
-```
-
-**resampleCHOP has NO `numsamples` param.** It uses `rate`, `start`, `end`, `method`. Don't guess — always `td_get_par_info('resampleCHOP')` first.
-
-### 27. CHOP To TOP has NO input connectors — use par.chop reference
-
-```python
-spec_tex = root.create(choptoTOP, 'spectrum_tex')
-spec_tex.par.chop = resample  # correct: parameter reference
-# NOT: resample.outputConnectors[0].connect(spec_tex.inputConnectors[0])  # WRONG
-```
-
-## Workflow
-
-### 28. Always verify after building — errors are silent
-
-Node errors and broken connections produce no output. Always check:
-```python
-for c in list(root.children):
-    e = c.errors()
-    w = c.warnings()
-    if e: print(c.name, 'ERR:', e)
-    if w: print(c.name, 'WARN:', w)
-```
-
-### 29. Window COMP param for display target is `winop`
-
-```python
-win = root.create(windowCOMP, 'display')
-win.par.winop = '/project1/logo_out'
-win.par.winw = 1280; win.par.winh = 720
-win.par.winopen.pulse()
-```
-
-### 30. `sample()` returns frozen pixels in rapid calls
-
-`out.sample(x, y)` returns pixels from a single cook snapshot. Compare samples with 2+ second delays, or use screencapture on the display window.
-
-### 31. Audio-reactive GLSL: dual-layer sync pipeline
-
-For audio-synced visuals, use BOTH layers for maximum effect:
-
-**Layer 1 (TD-side, real-time):** AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
-
-**Layer 2 (Python-side, post-hoc):** scipy FFT on the SAME audio file → per-frame features (rms, bass, mid, hi, beat detection) → drive ASCII brightness, chromatic aberration, beat flashes during the render pass.
-
-Both layers locked to the same audio file = visuals genuinely sync to the beat at two independent stages.
-
-**Key gotcha:** AudioFileIn must be cued (`par.cue=True` → `par.cuepulse.pulse()`) then uncued (`par.cue=False`, `par.play=True`) before recording starts. Otherwise the spectrum is silent for the first few seconds.
-
-### 32. twozero MCP: benchmark and prefer native tools
-
-Benchmarked April 2026: twozero MCP with 36 native tools. The old curl/REST method (port 9981) had zero native tools.
-
-**Always prefer native MCP tools over td_execute_python:**
-- `td_create_operator` over `root.create()` scripts (handles viewport positioning)
-- `td_set_operator_pars` over `node.par.X = Y` scripts (validates param names)
-- `td_get_par_info` over temp-node discovery dance (instant, no cleanup)
-- `td_get_errors` over manual `c.errors()` loops
-- `td_get_focus` for context awareness (no equivalent in old method)
-
-Only fall back to `td_execute_python` for multi-step logic (wiring chains, conditional builds, loops).
-
-### 33. twozero td_execute_python response wrapping
-
-twozero wraps `td_execute_python` responses with status info: `(ok)\n\n[fps 60.0/60] [0 err/0 warn]`. Your Python `result` variable value may not appear verbatim in the response text. If you need to check results programmatically, use `print()` statements in the script — they appear in the response. Don't rely on string-matching the `result` dict.
-
-### 34. Audio-reactive chain: DO NOT use Lag CHOP or Filter CHOP for spectrum smoothing
-
-The Derivative docs and tutorials suggest using Lag CHOP (lag1=0.2, lag2=0.5) to smooth raw FFT output before passing to a shader. **This does NOT work with AudioSpectrum → CHOP to TOP → GLSL.**
-
-What happens: Lag CHOP operates in timeslice mode. A 256-sample spectrum input gets expanded to 1600-2400 samples. The Lag averaging drives all values to near-zero (~1e-06). The CHOP to TOP produces a 2400x2 texture instead of 256x2. The shader receives effectively zero audio data.
-
-**The correct chain is: Spectrum(outlength=256) → Math(gain=10) → CHOPtoTOP → GLSL.** No CHOP smoothing at all. If you need smoothing, do it in the GLSL shader via temporal lerp with a feedback texture.
-
-Verified values with audio playing:
-- Without Lag CHOP: bass bins = 5.0-5.4, mid bins = 1.0-1.7 (strong, usable)
-- With Lag CHOP: ALL bins = 0.000001-0.00004 (dead, zero audio reactivity)
-
-### 35. AudioSpectrum Output Length: set manually to avoid CHOP to TOP overflow
-
-AudioSpectrum in Visualization mode with FFT 8192 outputs 22,050 samples by default (1 per Hz, 0–22050). CHOP to TOP cannot handle this — you get "Number of samples exceeded texture resolution max".
-
-Fix: `spectrum.par.outputmenu = 'setmanually'` and `spectrum.par.outlength = 256`. This gives 256 frequency bins — plenty for visual FFT.
-
-DO NOT set `timeslice = False` as a workaround — that processes the entire audio file at once and produces even more samples.
-
-### 36. GLSL spectrum texture from CHOP to TOP is 256x2 not 256x1
-
-AudioSpectrum outputs 2 channels (stereo: chan1, chan2). CHOP to TOP with `dataformat='r'` creates a 256x2 texture — one row per channel. Sample the first channel at `y=0.25` (center of first row), NOT `y=0.5` (boundary between rows):
-
-```glsl
-float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;  // correct
-float bass = texture(sTD2DInputs[1], vec2(0.05, 0.5)).r;   // WRONG — samples between rows
-```
-
-### 37. FPS=0 doesn't mean ops aren't cooking — check play state
-
-TD can show `fps:0` in `td_get_perf` while ops still cook and `TOP.save()` still produces valid screenshots. The two most common causes:
-
-**a) Project is paused (playbar stopped).** TD's playbar can be toggled with spacebar. The `root` at `/` has no `.playbar` attribute (it's on the perform COMP). The easiest fix is sending a spacebar keypress via `td_input_execute`, though this tool can sometimes error. As a workaround, `TOP.save()` always works regardless of play state — use it to verify rendering is actually happening before spending time debugging FPS.
-
-**b) Audio device CHOP blocking the main thread.** An `audiooutCHOP` with an active audio device can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. Fix: keep the CHOP active but set `volume=0` to prevent the audio driver from blocking. Disabling it entirely (`active=False`) may also work but can prevent downstream audio processing CHOPs from cooking.
-
-Diagnostic sequence when FPS=0:
-1. `td_get_perf` — check if any op has extreme CPU/s
-2. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
-3. Check for blocking CHOPs (audioout, audiodevin, etc.)
-4. Toggle play state (spacebar, or check if absTime.seconds is advancing)
-
-### 38. Recording while FPS=0 produces empty or near-empty files
-
-This is the #1 cause of "I recorded for 30 seconds but got a 2-frame video." If TD's cook loop is stalled (FPS=0 or very low), MovieFileOut has nothing to record. Unlike `TOP.save()` which captures the last cooked frame regardless, MovieFileOut only writes frames that actually cook.
-
-**Always verify FPS before starting a recording:**
-```python
-# Check via td_get_perf first
-# If FPS < 30, do NOT start recording — fix the performance issue first
-# If FPS=0, the playbar is likely paused — see pitfall #37
-```
-
-Common causes of recording empty video:
-- Playbar paused (FPS=0) — see pitfall #37
-- Audio device CHOP blocking the main thread — see pitfall #37b
-- Recording started before audio was cued — audio is silent, GLSL outputs black, MovieFileOut records black frames that look empty
-- `par.file` set in the same script as `par.record = True` — see pitfall #18
-
-### 39. GLSL shader produces black output — test before committing to a long render
-
-New GLSL shaders can fail silently (see pitfall #7). Before recording a long take, always:
-
-1. **Write a minimal test shader first** that just outputs a solid color or pass-through:
-```glsl
-void main() {
-    vec2 uv = vUV.st;
-    fragColor = TDOutputSwizzle(vec4(uv, 0.0, 1.0));
-}
-```
-
-2. **Verify the test renders correctly** via `td_get_screenshot` on the GLSL TOP's output.
-
-3. **Swap in the real shader** and screenshot again immediately. If black, the shader has a compile error or logic issue.
-
-4. **Only then start recording.** A 90-second ProRes recording is ~5GB. Recording black frames wastes disk and time.
-
-Common causes of black GLSL output:
-- Missing `TDOutputSwizzle()` on macOS (pitfall #8)
-- Time uniform not connected — shader uses default 0.0, fractal stays at origin
-- Spectrum texture not connected — audio values all 0.0, driving everything to black
-- Integer division where float division was expected (`1/2 = 0` not `0.5`)
-- `absTime.seconds % 1000.0` rolled over past 1000 and the modulo produces unexpected values
-
-### 40. td_write_dat uses `text` parameter, NOT `content`
-
-The MCP tool `td_write_dat` expects a `text` parameter for full replacement. Passing `content` returns an error: `"Provide either 'text' for full replace, or 'old_text'+'new_text' for patching"`.
-
-If `td_write_dat` fails, fall back to `td_execute_python`:
-```python
-op("/project1/shader_code").text = shader_string
-```
-
-### 41. td_execute_python does NOT return stdout or print() output
-
-Despite what earlier versions of pitfall #33 stated, `print()` and `debug()` output from `td_execute_python` scripts does NOT appear in the MCP response. The response is always just `(ok)` + FPS/error summary. To read values back, use dedicated inspection tools (`td_get_operator_info`, `td_read_dat`, `td_read_chop`) instead of trying to print from within a script.
-
-### 42. td_get_operator_info JSON is appended with `[fps X.X/X]` — breaks json.loads()
-
-The response text from `td_get_operator_info` has `[fps 60.0/60]` appended after the JSON object. This causes `json.loads()` to fail with "Extra data" errors. Strip it before parsing:
-```python
-clean = response_text.rsplit('[fps', 1)[0]
-data = json.loads(clean)
-```
-
-### 43. td_get_screenshot is asynchronous — returns `{"status": "pending"}`
-
-Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file appears later. Wait a few seconds before checking for the file. There is no callback or completion notification — poll the filesystem.
-
-### 44. Recording duration is manual — no auto-stop at audio end
-
-MovieFileOut records until `par.record = False` is set. If audio ends before you stop recording, the file keeps growing with repeated frames. Always stop recording promptly after the audio duration. For precision: set a timer on the agent side matching the audio length, then send `par.record = False`. Trim excess with ffmpeg as a safety net:
-```bash
-ffmpeg -i raw.mov -t 25 -c copy trimmed.mov
-```
diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index 1501567b791..044060e9dd7 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -56,6 +56,7 @@ hermes skills uninstall <skill-name>
 | **blender-mcp** | Control Blender directly from Hermes via socket connection to the blender-mcp addon. Create 3D objects, materials, animations, and run arbitrary Blender Python (bpy) code. |
 | **concept-diagrams** | Generate flat, minimal light/dark-aware SVG diagrams as standalone HTML files, using a unified educational visual language (9 semantic color ramps, automatic dark mode). Best for physics setups, chemistry mechanisms, math curves, physical objects (aircraft, turbines, smartphones), floor plans, cross-sections, lifecycle/process narratives, and hub-spoke system diagrams. Ships with 15 example diagrams. |
 | **meme-generation** | Generate real meme images by picking a template and overlaying text with Pillow. Produces actual `.png` meme files. |
+| **touchdesigner-mcp** | Control a running TouchDesigner instance via the twozero MCP plugin — create operators, set parameters, wire connections, execute Python, build real-time audio-reactive visuals and GLSL networks. 36 native tools. |
 
 ## DevOps
 
diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md
index 27fbb8c7655..16be6a6581c 100644
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@@ -296,6 +296,7 @@ hermes skills install official/<category>/<skill>
 |-------|-------------|------|
 | `blender-mcp` | Control Blender directly from Hermes via socket connection to the blender-mcp addon. Create 3D objects, materials, animations, and run arbitrary Blender Python (bpy) code. | `creative/blender-mcp` |
 | `meme-generation` | Generate real meme images by picking a template and overlaying text with Pillow. Produces actual .png meme files. | `creative/meme-generation` |
+| `touchdesigner-mcp` | Control a running TouchDesigner instance via the twozero MCP plugin — create operators, set parameters, wire connections, execute Python, build real-time audio-reactive visuals and GLSL networks. 36 native tools. | `creative/touchdesigner-mcp` |
 
 ## devops
 

From 139a6da67c4c13fed41cadbd53b82c0e2ad57083 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sat, 18 Apr 2026 14:44:40 -0700
Subject: [PATCH 080/143] =?UTF-8?q?fix(skills):=20touchdesigner-mcp=20setu?=
 =?UTF-8?q?p.sh=20=E2=80=94=20correct=20pgrep=20match=20+=20suppress=20str?=
 =?UTF-8?q?ay=20yaml=20output?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Discovered while dogfooding the skill end-to-end:

- pgrep -if "TouchDesigner" matched any shell whose command line
  contained the substring (including the setup script's own invocation
  under certain wrappers), falsely reporting TD running on machines
  where it isn't. Switch to pgrep -x (exact process name match,
  supported on both macOS and Linux) and also check TouchDesignerFTE
  (the non-commercial variant).
- The embedded python3 yaml-writer printed 'added' / 'exists' to
  stdout as status, which leaked a stray word into the setup output
  right before the ✔ line. Drop the print()s — the bash-level ✔/✘ is
  the status indicator.
---
 .../creative/touchdesigner-mcp/scripts/setup.sh           | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh b/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
index 34d883c1c4a..15dc662c1cd 100644
--- a/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
+++ b/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
@@ -18,7 +18,10 @@ manual_steps=()
 echo -e "\n${CYAN}═══ twozero MCP for TouchDesigner — Setup ═══${NC}\n"
 
 # ── 1. Check if TouchDesigner is running ──
-if pgrep -if "TouchDesigner" >/dev/null 2>&1; then
+# Match on process *name* (not full cmdline) to avoid self-matching shells
+# that happen to have "TouchDesigner" in their args. macOS and Linux pgrep
+# both support -x for exact name match.
+if pgrep -x TouchDesigner >/dev/null 2>&1 || pgrep -x TouchDesignerFTE >/dev/null 2>&1; then
     echo -e " ${OK} TouchDesigner is running"
     td_running=true
 else
@@ -66,9 +69,6 @@ if 'twozero_td' not in cfg['mcp_servers']:
     }
     with open(cfg_path, 'w') as f:
         yaml.dump(cfg, f, default_flow_style=False, sort_keys=False)
-    print('added')
-else:
-    print('exists')
 " 2>/dev/null && echo -e " ${OK} twozero_td MCP entry added to config" \
               || { echo -e " ${FAIL} Could not update config (is PyYAML installed?)"; \
                    manual_steps+=("Add twozero_td MCP entry to ${HERMES_CFG} manually"); }

From 1a9a2d7fe81b32fddd6ec5c1eaf167caded3f528 Mon Sep 17 00:00:00 2001
From: Nish <nish3451@users.noreply.github.com>
Date: Wed, 8 Apr 2026 12:05:24 +0530
Subject: [PATCH 081/143] fix(gateway/telegram): fall back to chat.id when
 from_user is None in DMs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When `message.from_user` is None — which can happen for forwarded messages,
anonymous admin mode in groups, or certain Telegram client edge cases —
`_build_message_event` set `source.user_id` to None. This caused:

1. `_is_user_authorized()` to early-return False (`if not user_id: return False`)
2. The access check never compared against `TELEGRAM_ALLOWED_USERS` even when
   the user actually was in the allowlist
3. The pairing flow fired and generated a code for `user_id=None`
4. The pairing approval saved an entry under the literal string key "null"
5. The user was effectively locked out because their real user_id never
   matched the "null" key on subsequent messages

For DMs (`chat_type == "dm"`), Telegram guarantees `chat.id == user.id` —
they are the same numeric ID for private chats. Falling back to `chat.id`
when `from_user` is None for DMs restores the expected access-control
behavior without weakening it (group/channel chats correctly stay None).

Also adds a parallel `user_name` fallback to `chat.full_name` so the
display name still works in the same edge case.
---
 gateway/platforms/telegram.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 8df05268c71..f71614054c3 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -2926,8 +2926,8 @@ class TelegramAdapter(BasePlatformAdapter):
             chat_id=str(chat.id),
             chat_name=chat.title or (chat.full_name if hasattr(chat, "full_name") else None),
             chat_type=chat_type,
-            user_id=str(user.id) if user else None,
-            user_name=user.full_name if user else None,
+            user_id=str(user.id) if user else (str(chat.id) if chat_type == "dm" else None),
+            user_name=user.full_name if user else (chat.full_name if hasattr(chat, "full_name") and chat_type == "dm" else None),
             thread_id=thread_id_str,
             chat_topic=chat_topic,
         )

From aa5f89d3eaadcd05420aab5adf709221abf018a9 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Wed, 8 Apr 2026 02:41:57 -0700
Subject: [PATCH 082/143] test: add coverage for from_user=None DM fallback

Tests the three cases:
- DM with from_user=None: user_id falls back to chat.id
- Group with from_user=None: user_id stays None (safe default)
- DM with from_user present: user_id uses from_user.id (no regression)
---
 tests/gateway/test_dm_topics.py | 51 +++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/tests/gateway/test_dm_topics.py b/tests/gateway/test_dm_topics.py
index b9a94c3438b..69e9629b23d 100644
--- a/tests/gateway/test_dm_topics.py
+++ b/tests/gateway/test_dm_topics.py
@@ -645,3 +645,54 @@ def test_group_topic_chat_id_int_string_coercion():
 
     assert event.auto_skill == "hermes-agent-dev"
     assert event.source.chat_topic == "Dev"
+
+
+# ── _build_message_event: from_user=None fallback in DMs ──
+
+
+def test_build_message_event_dm_from_user_none_falls_back_to_chat_id():
+    """When from_user is None in a DM, user_id should fall back to chat.id."""
+    from gateway.platforms.base import MessageType
+
+    adapter = _make_adapter()
+    msg = _make_mock_message(chat_id=12345, user_id=42, user_name="Alice")
+    # Simulate from_user being None (edge case on fresh restart / forwarded msg)
+    msg.from_user = None
+
+    event = adapter._build_message_event(msg, MessageType.TEXT)
+
+    # Should fall back to chat.id since chat_type is "dm"
+    assert event.source.user_id == "12345"
+    assert event.source.user_name == "Alice"  # falls back to chat.full_name
+
+
+def test_build_message_event_group_from_user_none_stays_none():
+    """When from_user is None in a group, user_id should remain None."""
+    from gateway.platforms.base import MessageType
+
+    adapter = _make_adapter()
+    msg = _make_mock_message(
+        chat_id=-1001234567890, chat_type=_ChatType.SUPERGROUP,
+        user_id=42, user_name="Alice"
+    )
+    msg.from_user = None
+
+    event = adapter._build_message_event(msg, MessageType.TEXT)
+
+    # Groups should NOT fall back — anonymous senders stay None
+    assert event.source.user_id is None
+    assert event.source.user_name is None
+
+
+def test_build_message_event_dm_from_user_present_uses_user():
+    """When from_user is present in a DM, it should be used (no fallback)."""
+    from gateway.platforms.base import MessageType
+
+    adapter = _make_adapter()
+    msg = _make_mock_message(chat_id=12345, user_id=99999, user_name="Bob")
+
+    event = adapter._build_message_event(msg, MessageType.TEXT)
+
+    # Normal case — from_user is used directly
+    assert event.source.user_id == "99999"
+    assert event.source.user_name == "Bob"

From 41560192c4e4e8d5a51141b39907f01cd977524f Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sat, 18 Apr 2026 18:52:41 -0700
Subject: [PATCH 083/143] chore(attribution): add AUTHOR_MAP entry for nish3451

Adds the nish3451 noreply email to the AUTHOR_MAP so CI attribution checks
pass for the #6100 Telegram DM fallback fix merged in 1a9a2d7f.
---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 88d01cc7348..90c2a13d0b5 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -76,6 +76,7 @@ AUTHOR_MAP = {
     "39405770+yyq4193@users.noreply.github.com": "yyq4193",
     "Asunfly@users.noreply.github.com": "Asunfly",
     "2500400+honghua@users.noreply.github.com": "honghua",
+    "nish3451@users.noreply.github.com": "nish3451",
     # contributors (manual mapping from git names)
     "ahmedsherif95@gmail.com": "asheriif",
     "liujinkun@bytedance.com": "liujinkun2025",

From 632a807a3e528169bd39baf8fc3aa1d641580e96 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 18:53:22 -0700
Subject: [PATCH 084/143] fix(gateway): slash commands never interrupt a
 running agent (#12334)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Any recognized slash command now bypasses the Level-1 active-session
guard instead of queueing + interrupting. A mid-run /model (or
/reasoning, /voice, /insights, /title, /resume, /retry, /undo,
/compress, /usage, /provider, /reload-mcp, /sethome, /reset) used to
interrupt the agent AND get silently discarded by the slash-command
safety net — zero-char response, dropped tool calls.

Root cause:
- Discord registers 41 native slash commands via tree.command().
- Only 14 were in ACTIVE_SESSION_BYPASS_COMMANDS.
- The other ~15 user-facing ones fell through base.py:handle_message
  to the busy-session handler, which calls running_agent.interrupt()
  AND queues the text.
- After the aborted run, gateway/run.py:9912 correctly identifies the
  queued text as a slash command and discards it — but the damage
  (interrupt + zero-char response) already happened.

Fix:
- should_bypass_active_session() now returns True for any resolvable
  slash command. ACTIVE_SESSION_BYPASS_COMMANDS stays as the subset
  with dedicated Level-2 handlers (documentation + tests).
- gateway/run.py adds a catch-all after the dedicated handlers that
  returns a user-visible "agent busy — wait or /stop first" response
  for any other resolvable command.
- Unknown text / file-path-like messages are unchanged — they still
  queue.

Also:
- gateway/platforms/discord.py logs the invoker identity on every
  slash command (user id + name + channel + guild) so future
  ghost-command reports can be triaged without guessing.

Tests:
- 15 new parametrized cases in test_command_bypass_active_session.py
  cover every previously-broken Discord slash command.
- Existing tests for /stop, /new, /approve, /deny, /help, /status,
  /agents, /background, /steer, /update, /queue still pass.
- test_steer.py's ACTIVE_SESSION_BYPASS_COMMANDS check still passes.

Fixes #5057. Related: #6252, #10370, #4665.
---
 gateway/platforms/discord.py                  | 18 +++++
 gateway/run.py                                | 25 ++++--
 hermes_cli/commands.py                        | 31 ++++++--
 .../test_command_bypass_active_session.py     | 76 +++++++++++++++++++
 4 files changed, 137 insertions(+), 13 deletions(-)

diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 31973b9629b..b1585637ff4 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -1933,6 +1933,24 @@ class DiscordAdapter(BasePlatformAdapter):
         the "thinking..." indicator is replaced with that text; otherwise it
         is deleted so the channel isn't cluttered.
         """
+        # Log the invoker so ghost-command reports can be triaged.  Discord
+        # native slash invocations are always user-initiated (no bot can fire
+        # them), but mobile autocomplete / keyboard shortcuts / other users
+        # in the same channel are easy to miss in post-mortems.
+        try:
+            _user = interaction.user
+            _chan_id = getattr(interaction.channel, "id", None) or getattr(interaction, "channel_id", None)
+            logger.info(
+                "[Discord] slash '%s' invoked by user=%s id=%s channel=%s guild=%s",
+                command_text,
+                getattr(_user, "name", "?"),
+                getattr(_user, "id", "?"),
+                _chan_id,
+                getattr(interaction, "guild_id", None),
+            )
+        except Exception:
+            pass  # logging must never block command dispatch
+
         await interaction.response.defer(ephemeral=True)
         event = self._build_slash_event(interaction, command_text)
         await self.handle_message(event)
diff --git a/gateway/run.py b/gateway/run.py
index af3946d4afc..f9782b29900 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -2987,8 +2987,8 @@ class GatewayRunner:
 
             # Resolve the command once for all early-intercept checks below.
             from hermes_cli.commands import (
+                ACTIVE_SESSION_BYPASS_COMMANDS as _DEDICATED_HANDLERS,
                 resolve_command as _resolve_cmd_inner,
-                should_bypass_active_session as _should_bypass_active_inner,
             )
             _evt_cmd = event.get_command()
             _cmd_def_inner = _resolve_cmd_inner(_evt_cmd) if _evt_cmd else None
@@ -3123,11 +3123,9 @@ class GatewayRunner:
             if _cmd_def_inner and _cmd_def_inner.name == "background":
                 return await self._handle_background_command(event)
 
-            # Gateway-handled info/control commands must never fall through to
-            # the interrupt path. If they are queued as pending text, the
-            # slash-command safety net discards them before the user sees any
-            # response.
-            if _cmd_def_inner and _should_bypass_active_inner(_cmd_def_inner.name):
+            # Gateway-handled info/control commands with dedicated
+            # running-agent handlers.
+            if _cmd_def_inner and _cmd_def_inner.name in _DEDICATED_HANDLERS:
                 if _cmd_def_inner.name == "help":
                     return await self._handle_help_command(event)
                 if _cmd_def_inner.name == "commands":
@@ -3137,6 +3135,21 @@ class GatewayRunner:
                 if _cmd_def_inner.name == "update":
                     return await self._handle_update_command(event)
 
+            # Catch-all: any other recognized slash command reached the
+            # running-agent guard. Reject gracefully rather than falling
+            # through to interrupt + discard. Without this, commands
+            # like /model, /reasoning, /voice, /insights, /title,
+            # /resume, /retry, /undo, /compress, /usage, /provider,
+            # /reload-mcp, /sethome, /reset (all registered as Discord
+            # slash commands) would interrupt the agent AND get
+            # silently discarded by the slash-command safety net,
+            # producing a zero-char response. See #5057, #6252, #10370.
+            if _cmd_def_inner:
+                return (
+                    f"⏳ Agent is running — `/{_cmd_def_inner.name}` can't run "
+                    f"mid-turn. Wait for the current response or `/stop` first."
+                )
+
             if event.message_type == MessageType.PHOTO:
                 logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
                 adapter = self.adapters.get(source.platform)
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 681e6f9b265..f753d6f3a73 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -260,10 +260,10 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
 )
 
 
-# Commands that must never be queued behind an active gateway session.
-# These are explicit control/info commands handled by the gateway itself;
-# if they get queued as pending text, the safety net in gateway.run will
-# discard them before they ever reach the user.
+# Commands with explicit Level-2 running-agent handlers in gateway/run.py.
+# Listed here for introspection / tests; semantically a subset of
+# "all resolvable commands" — which is the real bypass set (see
+# should_bypass_active_session below).
 ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
     {
         "agents",
@@ -285,9 +285,26 @@ ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
 
 
 def should_bypass_active_session(command_name: str | None) -> bool:
-    """Return True when a slash command must bypass active-session queuing."""
-    cmd = resolve_command(command_name) if command_name else None
-    return bool(cmd and cmd.name in ACTIVE_SESSION_BYPASS_COMMANDS)
+    """Return True for any resolvable slash command.
+
+    Rationale: every gateway-registered slash command either has a
+    specific Level-2 handler in gateway/run.py (/stop, /new, /model,
+    /approve, etc.) or reaches the running-agent catch-all that returns
+    a "busy — wait or /stop first" response. In both paths the command
+    is dispatched, not queued.
+
+    Queueing is always wrong for a recognized slash command because the
+    safety net in gateway.run discards any command text that reaches
+    the pending queue — which meant a mid-run /model (or /reasoning,
+    /voice, /insights, /title, /resume, /retry, /undo, /compress,
+    /usage, /provider, /reload-mcp, /sethome, /reset) would silently
+    interrupt the agent AND get discarded, producing a zero-char
+    response. See issue #5057 / PRs #6252, #10370, #4665.
+
+    ACTIVE_SESSION_BYPASS_COMMANDS remains the subset of commands with
+    explicit Level-2 handlers; the rest fall through to the catch-all.
+    """
+    return resolve_command(command_name) is not None if command_name else False
 
 
 def _resolve_config_gates() -> set[str]:
diff --git a/tests/gateway/test_command_bypass_active_session.py b/tests/gateway/test_command_bypass_active_session.py
index c456243945a..ea910d30ba8 100644
--- a/tests/gateway/test_command_bypass_active_session.py
+++ b/tests/gateway/test_command_bypass_active_session.py
@@ -268,6 +268,82 @@ class TestCommandBypassActiveSession:
         )
 
 
+# ---------------------------------------------------------------------------
+# Tests: non-bypass-set commands (no dedicated Level-2 handler) also bypass
+# instead of interrupting + being discarded.  Regression for the Discord
+# ghost-slash-command bug where /model, /reasoning, /voice, /insights, /title,
+# /resume, /retry, /undo, /compress, /usage, /provider, /reload-mcp,
+# /sethome, /reset silently interrupted the running agent.
+# ---------------------------------------------------------------------------
+
+
+class TestAllResolvableCommandsBypassGuard:
+    """Every recognized slash command must bypass the Level-1 active-session
+    guard. Without this, commands the user fires mid-run interrupt the agent
+    AND get silently discarded by the slash-command safety net (zero-char
+    response)."""
+
+    @pytest.mark.parametrize(
+        "command_text,canonical",
+        [
+            ("/model claude-sonnet-4", "model"),
+            ("/model", "model"),
+            ("/reasoning high", "reasoning"),
+            ("/personality default", "personality"),
+            ("/voice on", "voice"),
+            ("/insights 7", "insights"),
+            ("/title my session", "title"),
+            ("/resume yesterday", "resume"),
+            ("/retry", "retry"),
+            ("/undo", "undo"),
+            ("/compress", "compress"),
+            ("/usage", "usage"),
+            ("/provider", "provider"),
+            ("/reload-mcp", "reload-mcp"),
+            ("/sethome", "sethome"),
+        ],
+    )
+    @pytest.mark.asyncio
+    async def test_command_bypasses_guard(self, command_text, canonical):
+        """Any resolvable slash command bypasses instead of being queued."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event(command_text))
+
+        assert sk not in adapter._pending_messages, (
+            f"{command_text} was queued as pending — it should bypass the guard"
+        )
+        assert len(adapter.sent_responses) > 0, (
+            f"{command_text} produced no response — it should be dispatched, "
+            "not silently discarded"
+        )
+
+    def test_should_bypass_returns_true_for_every_registered_command(self):
+        """Spot-check: the commands previously-broken on Discord all bypass."""
+        from hermes_cli.commands import should_bypass_active_session
+
+        for cmd in (
+            "model", "reasoning", "personality", "voice", "insights", "title",
+            "resume", "retry", "undo", "compress", "usage", "provider",
+            "reload-mcp", "sethome", "reset",
+        ):
+            assert should_bypass_active_session(cmd) is True, (
+                f"/{cmd} must bypass the active-session guard"
+            )
+
+    def test_should_bypass_returns_false_for_unknown(self):
+        """Unknown words don't bypass — they get queued as user text."""
+        from hermes_cli.commands import should_bypass_active_session
+
+        assert should_bypass_active_session("foobar") is False
+        assert should_bypass_active_session(None) is False
+        assert should_bypass_active_session("") is False
+        # A file path split on whitespace: '/path/to/file.py' -> 'path/to/file.py'
+        assert should_bypass_active_session("path/to/file.py") is False
+
+
 # ---------------------------------------------------------------------------
 # Tests: non-bypass messages still get queued
 # ---------------------------------------------------------------------------

From beabbd87efcb84928b7e6387ebcfba15fbcec96a Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 18:53:31 -0700
Subject: [PATCH 085/143] fix(gateway): close adapter resources when connect()
 fails or raises (#12339)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Gateway startup leaks aiohttp.ClientSession (and other partial-init
resources) when an adapter's connect() returns False or raises. The
adapter is never added to self.adapters, so the shutdown path at
gateway/run.py:2426 never calls disconnect() on it — Python GC later
logs 'Unclosed client session' at process exit.

Seen on 2026-04-18 18:08:16 during a double --replace takeover cycle:
one of the partial-init sessions survived past shutdown and emitted
the warning right before status=75/TEMPFAIL.

Fix:
- New GatewayRunner._safe_adapter_disconnect() helper — calls
  adapter.disconnect() and swallows any exception. Used on error paths.
- Connect loop calls it in both failure branches: success=False and
  except Exception.
- Adapter disconnect() implementations are already expected to be
  idempotent and tolerate partial-init state (they all guard on
  self._http_session / self._bridge_process before touching them).

Tests: tests/gateway/test_safe_adapter_disconnect.py — 3 cases verify
the helper forwards to disconnect, swallows exceptions, and tolerates
platform=None.
---
 gateway/run.py                                | 33 +++++++++++
 tests/gateway/test_safe_adapter_disconnect.py | 59 +++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 tests/gateway/test_safe_adapter_disconnect.py

diff --git a/gateway/run.py b/gateway/run.py
index f9782b29900..b72e95eb839 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -752,6 +752,26 @@ class GatewayRunner:
             chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
         )
 
+    async def _safe_adapter_disconnect(self, adapter, platform) -> None:
+        """Call adapter.disconnect() defensively, swallowing any error.
+
+        Used when adapter.connect() failed or raised — the adapter may
+        have allocated partial resources (aiohttp.ClientSession, poll
+        tasks, child subprocesses) that would otherwise leak and surface
+        as "Unclosed client session" warnings at process exit.
+
+        Must tolerate partial-init state and never raise, since callers
+        use it inside error-handling blocks.
+        """
+        try:
+            await adapter.disconnect()
+        except Exception as e:
+            logger.debug(
+                "Defensive %s disconnect after failed connect raised: %s",
+                platform.value if platform is not None else "adapter",
+                e,
+            )
+
     # -----------------------------------------------------------------
 
     def _flush_memories_for_session(
@@ -1913,6 +1933,15 @@ class GatewayRunner:
                     logger.info("✓ %s connected", platform.value)
                 else:
                     logger.warning("✗ %s failed to connect", platform.value)
+                    # Defensive cleanup: a failed connect() may have
+                    # allocated resources (aiohttp.ClientSession, poll
+                    # tasks, bridge subprocesses) before giving up.
+                    # Without this call, those resources are orphaned
+                    # and Python logs "Unclosed client session" at
+                    # process exit. Adapter disconnect() implementations
+                    # are expected to be idempotent and tolerate
+                    # partial-init state.
+                    await self._safe_adapter_disconnect(adapter, platform)
                     if adapter.has_fatal_error:
                         self._update_platform_runtime_status(
                             platform.value,
@@ -1953,6 +1982,10 @@ class GatewayRunner:
                         }
             except Exception as e:
                 logger.error("✗ %s error: %s", platform.value, e)
+                # Same defensive cleanup path for exceptions — an adapter
+                # that raised mid-connect may still have a live
+                # aiohttp.ClientSession or child subprocess.
+                await self._safe_adapter_disconnect(adapter, platform)
                 self._update_platform_runtime_status(
                     platform.value,
                     platform_state="retrying",
diff --git a/tests/gateway/test_safe_adapter_disconnect.py b/tests/gateway/test_safe_adapter_disconnect.py
new file mode 100644
index 00000000000..ec11f2663ad
--- /dev/null
+++ b/tests/gateway/test_safe_adapter_disconnect.py
@@ -0,0 +1,59 @@
+"""Regression tests: failed-connect path must call adapter.disconnect().
+
+When adapter.connect() returns False or raises, the adapter may have
+allocated resources (aiohttp.ClientSession, poll tasks, child
+subprocesses) before giving up. Without a defensive disconnect() call
+these leak and surface as "Unclosed client session" warnings at
+process exit (seen on the 2026-04-18 18:08:16 gateway restart).
+
+The fix: gateway/run.py wraps each adapter connect() with a safety-net
+call to _safe_adapter_disconnect() in the failure branches.
+"""
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import Platform
+from gateway.run import GatewayRunner
+
+
+@pytest.fixture
+def bare_runner():
+    """A GatewayRunner shell that only needs to support _safe_adapter_disconnect."""
+    return object.__new__(GatewayRunner)
+
+
+@pytest.mark.asyncio
+async def test_safe_disconnect_calls_adapter_disconnect(bare_runner):
+    """The helper forwards to adapter.disconnect()."""
+    adapter = MagicMock()
+    adapter.disconnect = AsyncMock(return_value=None)
+
+    await bare_runner._safe_adapter_disconnect(adapter, Platform.TELEGRAM)
+
+    adapter.disconnect.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_safe_disconnect_swallows_exceptions(bare_runner):
+    """An exception in adapter.disconnect() must not propagate — the
+    caller is already on an error path."""
+    adapter = MagicMock()
+    adapter.disconnect = AsyncMock(side_effect=RuntimeError("partial init"))
+
+    # Must NOT raise
+    await bare_runner._safe_adapter_disconnect(adapter, Platform.TELEGRAM)
+
+    adapter.disconnect.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_safe_disconnect_handles_none_platform(bare_runner):
+    """Logging path must tolerate platform=None."""
+    adapter = MagicMock()
+    adapter.disconnect = AsyncMock(side_effect=ValueError("nope"))
+
+    await bare_runner._safe_adapter_disconnect(adapter, None)
+
+    adapter.disconnect.assert_awaited_once()

From 2a2e5c0fed1e341c3250825ad7b5bee4190d1a71 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Thu, 9 Apr 2026 03:06:15 -0700
Subject: [PATCH 086/143] fix: force relogin on 401/403 Codex token refresh
 failures

When the OAuth token endpoint returns 401/403 but the JSON body
doesn't contain a known error code (invalid_grant, etc.),
relogin_required stayed False. Users saw a bare error message
without guidance to re-authenticate.

Now any 401/403 from the token endpoint forces relogin_required=True,
since these status codes always indicate invalid credentials on a
refresh endpoint. 500+ errors remain as transient (no relogin).
---
 hermes_cli/auth.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 831f81bf266..c468948e981 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -1544,6 +1544,11 @@ def refresh_codex_oauth_pure(
                 "then run `hermes auth` to re-authenticate."
             )
             relogin_required = True
+        # A 401/403 from the token endpoint always means the refresh token
+        # is invalid/expired — force relogin even if the body error code
+        # wasn't one of the known strings above.
+        if response.status_code in (401, 403) and not relogin_required:
+            relogin_required = True
         raise AuthError(
             message,
             provider="openai-codex",

From 1e5f0439d9cd037d383cf8cc786e38611d3b9bd7 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sat, 18 Apr 2026 18:55:21 -0700
Subject: [PATCH 087/143] docs: update Anthropic console URLs to
 platform.claude.com
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Anthropic migrated their developer console from console.anthropic.com
to platform.claude.com. Two user-facing display URLs were still pointing
to the old domain:

- hermes_cli/main.py — API key prompt in the Anthropic model flow
- run_agent.py — 401 troubleshooting output

The OAuth token refresh endpoint was already migrated in PR #3246
(with fallback).

Spotted by @LucidPaths in PR #3237.

(Salvage of #3758 — dropped the setup.py hunk since that section was
refactored away and no longer contains the stale URL.)
---
 hermes_cli/main.py | 2 +-
 run_agent.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index ce02c2e72c4..7e0220d9186 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -3973,7 +3973,7 @@ def _model_flow_anthropic(config, current_model=""):
 
         elif choice == "2":
             print()
-            print("  Get an API key at: https://console.anthropic.com/settings/keys")
+            print("  Get an API key at: https://platform.claude.com/settings/keys")
             print()
             try:
                 import getpass
diff --git a/run_agent.py b/run_agent.py
index 756bb62eddd..0051fce63f4 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -10151,7 +10151,7 @@ class AIAgent:
                         _dhh = _dhh_fn()
                         print(f"{self.log_prefix}     • Check ANTHROPIC_TOKEN in {_dhh}/.env for Hermes-managed OAuth/setup tokens")
                         print(f"{self.log_prefix}     • Check ANTHROPIC_API_KEY in {_dhh}/.env for API keys or legacy token values")
-                        print(f"{self.log_prefix}     • For API keys: verify at https://console.anthropic.com/settings/keys")
+                        print(f"{self.log_prefix}     • For API keys: verify at https://platform.claude.com/settings/keys")
                         print(f"{self.log_prefix}     • For Claude Code: run 'claude /login' to refresh, then retry")
                         print(f"{self.log_prefix}     • Legacy cleanup: hermes config set ANTHROPIC_TOKEN \"\"")
                         print(f"{self.log_prefix}     • Clear stale keys: hermes config set ANTHROPIC_API_KEY \"\"")

From 3fe0d503b626965bffa0c3665b1257acef3c165c Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sun, 22 Mar 2026 18:20:25 -0700
Subject: [PATCH 088/143] fix(uninstall): properly stop and destroy gateway on
 hermes uninstall

The uninstaller's gateway cleanup was incomplete:
- Linux only (ignored macOS launchd)
- Only checked user systemd scope (missed system services)
- Didn't kill standalone gateway processes (hermes gateway run)
- Missing DBUS env setup for headless servers

Now delegates to gateway.py's existing machinery:
1. Kill any standalone gateway processes (all platforms)
2. Linux: stop + disable + remove both user AND system systemd services
3. macOS: unload + remove launchd plist
4. Warns (instead of silently failing) when system service needs sudo
---
 hermes_cli/uninstall.py | 137 ++++++++++++++++++++++++----------------
 1 file changed, 84 insertions(+), 53 deletions(-)

diff --git a/hermes_cli/uninstall.py b/hermes_cli/uninstall.py
index 8d8e3393b36..c9f2734fe47 100644
--- a/hermes_cli/uninstall.py
+++ b/hermes_cli/uninstall.py
@@ -118,57 +118,90 @@ def remove_wrapper_script():
 
 
 def uninstall_gateway_service():
-    """Stop and uninstall the gateway service if running."""
+    """Stop and uninstall the gateway service (systemd, launchd) and kill any
+    standalone gateway processes.
+
+    Delegates to the gateway module which handles:
+    - Linux: user + system systemd services (with proper DBUS env setup)
+    - macOS: launchd plists
+    - All platforms: standalone ``hermes gateway run`` processes
+    - Termux/Android: skips systemd (no systemd on Android), still kills standalone processes
+    """
     import platform
-    
-    if platform.system() != "Linux":
-        return False
+    stopped_something = False
 
-    prefix = os.getenv("PREFIX", "")
-    if os.getenv("TERMUX_VERSION") or "com.termux/files/usr" in prefix:
-        return False
-    
+    # 1. Kill any standalone gateway processes (all platforms, including Termux)
     try:
-        from hermes_cli.gateway import get_service_name
-        svc_name = get_service_name()
-    except Exception:
-        svc_name = "hermes-gateway"
-
-    service_file = Path.home() / ".config" / "systemd" / "user" / f"{svc_name}.service"
-    
-    if not service_file.exists():
-        return False
-    
-    try:
-        # Stop the service
-        subprocess.run(
-            ["systemctl", "--user", "stop", svc_name],
-            capture_output=True,
-            check=False
-        )
-        
-        # Disable the service
-        subprocess.run(
-            ["systemctl", "--user", "disable", svc_name],
-            capture_output=True,
-            check=False
-        )
-        
-        # Remove service file
-        service_file.unlink()
-        
-        # Reload systemd
-        subprocess.run(
-            ["systemctl", "--user", "daemon-reload"],
-            capture_output=True,
-            check=False
-        )
-        
-        return True
-        
+        from hermes_cli.gateway import kill_gateway_processes, find_gateway_pids
+        pids = find_gateway_pids()
+        if pids:
+            killed = kill_gateway_processes()
+            if killed:
+                log_success(f"Killed {killed} running gateway process(es)")
+                stopped_something = True
     except Exception as e:
-        log_warn(f"Could not fully remove gateway service: {e}")
-        return False
+        log_warn(f"Could not check for gateway processes: {e}")
+
+    system = platform.system()
+
+    # Termux/Android has no systemd and no launchd — nothing left to do.
+    prefix = os.getenv("PREFIX", "")
+    is_termux = bool(os.getenv("TERMUX_VERSION") or "com.termux/files/usr" in prefix)
+    if is_termux:
+        return stopped_something
+
+    # 2. Linux: uninstall systemd services (both user and system scopes)
+    if system == "Linux":
+        try:
+            from hermes_cli.gateway import (
+                get_systemd_unit_path,
+                get_service_name,
+                _systemctl_cmd,
+            )
+            svc_name = get_service_name()
+
+            for is_system in (False, True):
+                unit_path = get_systemd_unit_path(system=is_system)
+                if not unit_path.exists():
+                    continue
+
+                scope = "system" if is_system else "user"
+                try:
+                    if is_system and os.geteuid() != 0:
+                        log_warn(f"System gateway service exists at {unit_path} "
+                                 f"but needs sudo to remove")
+                        continue
+
+                    cmd = _systemctl_cmd(is_system)
+                    subprocess.run(cmd + ["stop", svc_name],
+                                   capture_output=True, check=False)
+                    subprocess.run(cmd + ["disable", svc_name],
+                                   capture_output=True, check=False)
+                    unit_path.unlink()
+                    subprocess.run(cmd + ["daemon-reload"],
+                                   capture_output=True, check=False)
+                    log_success(f"Removed {scope} gateway service ({unit_path})")
+                    stopped_something = True
+                except Exception as e:
+                    log_warn(f"Could not remove {scope} gateway service: {e}")
+        except Exception as e:
+            log_warn(f"Could not check systemd gateway services: {e}")
+
+    # 3. macOS: uninstall launchd plist
+    elif system == "Darwin":
+        try:
+            from hermes_cli.gateway import get_launchd_plist_path
+            plist_path = get_launchd_plist_path()
+            if plist_path.exists():
+                subprocess.run(["launchctl", "unload", str(plist_path)],
+                               capture_output=True, check=False)
+                plist_path.unlink()
+                log_success(f"Removed macOS gateway service ({plist_path})")
+                stopped_something = True
+        except Exception as e:
+            log_warn(f"Could not remove launchd gateway service: {e}")
+
+    return stopped_something
 
 
 def run_uninstall(args):
@@ -247,12 +280,10 @@ def run_uninstall(args):
     print(color("Uninstalling...", Colors.CYAN, Colors.BOLD))
     print()
     
-    # 1. Stop and uninstall gateway service
-    log_info("Checking for gateway service...")
-    if uninstall_gateway_service():
-        log_success("Gateway service stopped and removed")
-    else:
-        log_info("No gateway service found")
+    # 1. Stop and uninstall gateway service + kill standalone processes
+    log_info("Checking for running gateway...")
+    if not uninstall_gateway_service():
+        log_info("No gateway service or processes found")
     
     # 2. Remove PATH entries from shell configs
     log_info("Removing PATH entries from shell configs...")

From 79c5a381c59c948a7334988657b5bf19c4765a32 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sat, 18 Apr 2026 19:14:55 -0700
Subject: [PATCH 089/143] feat(uninstall): offer to remove named profiles when
 uninstalling from default

When `hermes uninstall` runs from the default HERMES_HOME (~/.hermes)
and other named profiles exist under ~/.hermes/profiles/, show them in
the installation overview and prompt:

    Also stop and remove these N profile(s)? [y/N]

If confirmed, for each named profile we:
  1. Shell out to `python -m hermes_cli.main -p <name> gateway stop/uninstall`
     to stop the gateway and remove its systemd unit or launchd plist
     (service names + unit paths are derived from HERMES_HOME, so we
     can't cleanly switch in-process)
  2. Remove the ~/.local/bin/<name> alias wrapper (outside HERMES_HOME)
  3. Wipe the profile's HERMES_HOME dir

Previously `hermes uninstall` was silently profile-scoped, leaving
zombie systemd units at ~/.config/systemd/user/hermes-gateway-<profile>.service
and zombie HERMES_HOMEs under ~/.hermes/profiles/ whenever a user
uninstalled from default with other profiles configured.

Prompt only appears when uninstalling from the default root. Uninstalling
from within a named profile stays profile-scoped as before.
---
 hermes_cli/uninstall.py | 130 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 127 insertions(+), 3 deletions(-)

diff --git a/hermes_cli/uninstall.py b/hermes_cli/uninstall.py
index c9f2734fe47..67cea418209 100644
--- a/hermes_cli/uninstall.py
+++ b/hermes_cli/uninstall.py
@@ -204,6 +204,80 @@ def uninstall_gateway_service():
     return stopped_something
 
 
+def _is_default_hermes_home(hermes_home: Path) -> bool:
+    """Return True when ``hermes_home`` points at the default (non-profile) root."""
+    try:
+        from hermes_constants import get_default_hermes_root
+        return hermes_home.resolve() == get_default_hermes_root().resolve()
+    except Exception:
+        return False
+
+
+def _discover_named_profiles():
+    """Return a list of ``ProfileInfo`` for every non-default profile, or ``[]``
+    if profile support is unavailable or nothing is installed beyond the
+    default root."""
+    try:
+        from hermes_cli.profiles import list_profiles
+    except Exception:
+        return []
+    try:
+        return [p for p in list_profiles() if not getattr(p, "is_default", False)]
+    except Exception as e:
+        log_warn(f"Could not enumerate profiles: {e}")
+        return []
+
+
+def _uninstall_profile(profile) -> None:
+    """Fully uninstall a single named profile: stop its gateway service,
+    remove its alias wrapper, and wipe its HERMES_HOME directory.
+
+    We shell out to ``hermes -p <name> gateway stop|uninstall`` because
+    service names, unit paths, and plist paths are all derived from the
+    current HERMES_HOME and can't be easily switched in-process.
+    """
+    import sys as _sys
+    name = profile.name
+    profile_home = profile.path
+
+    log_info(f"Uninstalling profile '{name}'...")
+
+    # 1. Stop and remove this profile's gateway service.
+    #    Use `python -m hermes_cli.main` so we don't depend on a `hermes`
+    #    wrapper that may be half-removed mid-uninstall.
+    hermes_invocation = [_sys.executable, "-m", "hermes_cli.main", "--profile", name]
+    for subcmd in ("stop", "uninstall"):
+        try:
+            subprocess.run(
+                hermes_invocation + ["gateway", subcmd],
+                capture_output=True,
+                text=True,
+                timeout=60,
+                check=False,
+            )
+        except subprocess.TimeoutExpired:
+            log_warn(f"  Gateway {subcmd} timed out for '{name}'")
+        except Exception as e:
+            log_warn(f"  Could not run gateway {subcmd} for '{name}': {e}")
+
+    # 2. Remove the wrapper alias script at ~/.local/bin/<name> (if any).
+    alias_path = getattr(profile, "alias_path", None)
+    if alias_path and alias_path.exists():
+        try:
+            alias_path.unlink()
+            log_success(f"  Removed alias {alias_path}")
+        except Exception as e:
+            log_warn(f"  Could not remove alias {alias_path}: {e}")
+
+    # 3. Wipe the profile's HERMES_HOME directory.
+    try:
+        if profile_home.exists():
+            shutil.rmtree(profile_home)
+            log_success(f"  Removed {profile_home}")
+    except Exception as e:
+        log_warn(f"  Could not remove {profile_home}: {e}")
+
+
 def run_uninstall(args):
     """
     Run the uninstall process.
@@ -214,7 +288,13 @@ def run_uninstall(args):
     """
     project_root = get_project_root()
     hermes_home = get_hermes_home()
-    
+
+    # Detect named profiles when uninstalling from the default root —
+    # offer to clean them up too instead of leaving zombie HERMES_HOMEs
+    # and systemd units behind.
+    is_default_profile = _is_default_hermes_home(hermes_home)
+    named_profiles = _discover_named_profiles() if is_default_profile else []
+
     print()
     print(color("┌─────────────────────────────────────────────────────────┐", Colors.MAGENTA, Colors.BOLD))
     print(color("│            ⚕ Hermes Agent Uninstaller                  │", Colors.MAGENTA, Colors.BOLD))
@@ -228,6 +308,13 @@ def run_uninstall(args):
     print(f"  Secrets: {hermes_home / '.env'}")
     print(f"  Data:    {hermes_home / 'cron/'}, {hermes_home / 'sessions/'}, {hermes_home / 'logs/'}")
     print()
+
+    if named_profiles:
+        print(color("Other profiles detected:", Colors.CYAN, Colors.BOLD))
+        for p in named_profiles:
+            running = " (gateway running)" if getattr(p, "gateway_running", False) else ""
+            print(f"  • {p.name}{running}: {p.path}")
+        print()
     
     # Ask for confirmation
     print(color("Uninstall Options:", Colors.YELLOW, Colors.BOLD))
@@ -254,12 +341,40 @@ def run_uninstall(args):
         return
     
     full_uninstall = (choice == "2")
-    
+
+    # When doing a full uninstall from the default profile, also offer to
+    # remove any named profiles — stopping their gateway services, unlinking
+    # their alias wrappers, and wiping their HERMES_HOME dirs. Otherwise
+    # those leave zombie services and data behind.
+    remove_profiles = False
+    if full_uninstall and named_profiles:
+        print()
+        print(color("Other profiles will NOT be removed by default.", Colors.YELLOW))
+        print(f"Found {len(named_profiles)} named profile(s): " +
+              ", ".join(p.name for p in named_profiles))
+        print()
+        try:
+            resp = input(color(
+                f"Also stop and remove these {len(named_profiles)} profile(s)? [y/N]: ",
+                Colors.BOLD
+            )).strip().lower()
+        except (KeyboardInterrupt, EOFError):
+            print()
+            print("Cancelled.")
+            return
+        remove_profiles = resp in ("y", "yes")
+
     # Final confirmation
     print()
     if full_uninstall:
         print(color("⚠️  WARNING: This will permanently delete ALL Hermes data!", Colors.RED, Colors.BOLD))
         print(color("   Including: configs, API keys, sessions, scheduled jobs, logs", Colors.RED))
+        if remove_profiles:
+            print(color(
+                f"   Plus {len(named_profiles)} profile(s): " +
+                ", ".join(p.name for p in named_profiles),
+                Colors.RED
+            ))
     else:
         print("This will remove the Hermes code but keep your configuration and data.")
     
@@ -322,8 +437,17 @@ def run_uninstall(args):
         log_warn(f"Could not fully remove {project_root}: {e}")
         log_info("You may need to manually remove it")
     
-    # 5. Optionally remove ~/.hermes/ data directory
+    # 5. Optionally remove ~/.hermes/ data directory (and named profiles)
     if full_uninstall:
+        # 5a. Stop and remove each named profile's gateway service and
+        #     alias wrapper. The profile HERMES_HOME dirs live under
+        #     ``<default>/profiles/<name>/`` and will be swept away by the
+        #     rmtree below, but services + alias scripts live OUTSIDE the
+        #     default root and have to be cleaned up explicitly.
+        if remove_profiles and named_profiles:
+            for prof in named_profiles:
+                _uninstall_profile(prof)
+
         log_info("Removing configuration and data...")
         try:
             if hermes_home.exists():

From 9489d1577db1b05e869b9d842ccdec3197f1954b Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sat, 18 Apr 2026 19:17:52 -0700
Subject: [PATCH 090/143] fix(agent): strip unterminated <think> blocks from
 visible content
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Providers served via NIM (MiniMax M2.7, some Moonshot/DeepSeek proxies) sometimes drop the closing </think> tag, leaving raw reasoning in the assistant's content field.  _strip_think_blocks()'s closed-pair regex is non-greedy so it only matches complete blocks — any orphan <think>...EOF survived the stripper and leaked to users (#8878, #9568, #10408).

Adds an unterminated-tag pass that fires when an open reasoning tag sits at a block boundary (start of text or after a newline) with no matching close.  Everything from that tag to end of string is stripped.  The block-boundary check mirrors gateway/stream_consumer.py's filter so models that mention <think> in prose are not over-stripped.

Also makes the closed-pair regexes consistently case-insensitive so <THINK>...</THINK> and <Thinking>...</Thinking> are handled uniformly — previously the mixed-case open tag would bypass the closed-pair pass and be caught by the unterminated-tag pass, taking trailing visible content with it.

6 new regression tests in TestStripThinkBlocks covering: unterminated <think>, unterminated <thought>, multi-line unterminated, line-start orphan with preserved prefix, prose-mention non-regression, mixed-case closed pairs.

The implementation is inspired by @luinbytes's PR #10408 report of the NIM/MiniMax symptom.  This commit does not include the 💭/🧠 emoji regexes from that PR — those glyphs are Hermes CLI display decorations, not model content markers.
---
 run_agent.py                      | 46 ++++++++++++++++++++++----
 tests/run_agent/test_run_agent.py | 54 +++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 7 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 0051fce63f4..33635ef2fee 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2172,17 +2172,49 @@ class AIAgent:
         return bool(cleaned.strip())
     
     def _strip_think_blocks(self, content: str) -> str:
-        """Remove reasoning/thinking blocks from content, returning only visible text."""
+        """Remove reasoning/thinking blocks from content, returning only visible text.
+
+        Handles four cases:
+          1. Closed tag pairs (``<think>…</think>``) — the common path when
+             the provider emits complete reasoning blocks.
+          2. Unterminated open tag at a block boundary (start of text or
+             after a newline) — e.g. MiniMax M2.7 / NIM endpoints where the
+             closing tag is dropped.  Everything from the open tag to end
+             of string is stripped.  The block-boundary check mirrors
+             ``gateway/stream_consumer.py``'s filter so models that mention
+             ``<think>`` in prose aren't over-stripped.
+          3. Stray orphan open/close tags that slip through.
+          4. Tag variants: ``<think>``, ``<thinking>``, ``<reasoning>``,
+             ``<REASONING_SCRATCHPAD>``, ``<thought>`` (Gemma 4), all
+             case-insensitive.
+        """
         if not content:
             return ""
-        # Strip all reasoning tag variants: <think>, <thinking>, <THINKING>,
-        # <reasoning>, <REASONING_SCRATCHPAD>, <thought> (Gemma 4)
-        content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
+        # 1. Closed tag pairs — case-insensitive for all variants so
+        #    mixed-case tags (<THINK>, <Thinking>) don't slip through to
+        #    the unterminated-tag pass and take trailing content with them.
+        content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL | re.IGNORECASE)
         content = re.sub(r'<thinking>.*?</thinking>', '', content, flags=re.DOTALL | re.IGNORECASE)
-        content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
-        content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL)
+        content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL | re.IGNORECASE)
+        content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL | re.IGNORECASE)
         content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)
-        content = re.sub(r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
+        # 2. Unterminated reasoning block — open tag at a block boundary
+        #    (start of text, or after a newline) with no matching close.
+        #    Strip from the tag to end of string.  Fixes #8878 / #9568
+        #    (MiniMax M2.7 leaking raw reasoning into assistant content).
+        content = re.sub(
+            r'(?:^|\n)[ \t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\b[^>]*>.*$',
+            '',
+            content,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+        # 3. Stray orphan open/close tags that slipped through.
+        content = re.sub(
+            r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*',
+            '',
+            content,
+            flags=re.IGNORECASE,
+        )
         return content
 
     @staticmethod
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 86f95580f02..bde5ed5aae7 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -317,6 +317,60 @@ class TestStripThinkBlocks:
         result = agent._strip_think_blocks("<thought>orphaned reasoning without close")
         assert "<thought>" not in result
 
+    # ─── Unterminated-block coverage (#8878, #9568, #10408) ──────────────
+    # Reasoning models served via NIM / MiniMax M2.7 frequently drop the
+    # closing tag, leaking raw reasoning into assistant content. The open
+    # tag appears at a block boundary (start of text or after a newline);
+    # everything from that tag to end-of-string is stripped.
+
+    def test_unterminated_think_block_content_stripped(self, agent):
+        """Content after unterminated <think> is fully stripped."""
+        result = agent._strip_think_blocks("<think>orphaned reasoning without close")
+        assert "orphaned reasoning" not in result
+        assert result.strip() == ""
+
+    def test_unterminated_thought_block_content_stripped(self, agent):
+        """Gemma-style <thought> with no close is fully stripped."""
+        result = agent._strip_think_blocks("<thought>orphaned reasoning without close")
+        assert "orphaned reasoning" not in result
+        assert result.strip() == ""
+
+    def test_unterminated_multiline_block_stripped(self, agent):
+        """Multi-line unterminated blocks are stripped in full."""
+        result = agent._strip_think_blocks(
+            "<think>\nmulti\nline\nreasoning\nthat never closes"
+        )
+        assert "multi" not in result
+        assert "never closes" not in result
+
+    def test_unterminated_block_after_answer_preserves_prefix(self, agent):
+        """Visible answer before a line-starting unterminated tag is kept."""
+        result = agent._strip_think_blocks(
+            "Answer is 42.\n<think>actually let me reconsider"
+        )
+        assert "Answer is 42." in result
+        assert "reconsider" not in result
+
+    def test_inline_think_mention_in_prose_not_over_stripped(self, agent):
+        """Mid-line `<think>` mentioned in prose must not swallow the rest
+        of the content (the block-boundary check prevents this)."""
+        text = "Use the <think> tag like this in your prose."
+        result = agent._strip_think_blocks(text)
+        # Block-boundary check prevents unterminated-strip from firing
+        assert "prose" in result
+        assert "Use the" in result
+
+    def test_mixed_case_closed_pair_stripped(self, agent):
+        """Mixed-case variants <THINK>…</THINK>, <Thinking>…</Thinking> are
+        handled by case-insensitive closed-pair regex, so the trailing
+        content is preserved."""
+        result = agent._strip_think_blocks("<THINK>upper</THINK>final")
+        assert "upper" not in result
+        assert "final" in result
+        result = agent._strip_think_blocks("<Thinking>mixed</Thinking>final")
+        assert "mixed" not in result
+        assert "final" in result
+
 
 class TestExtractReasoning:
     def test_reasoning_field(self, agent):

From ec48ec5530871edda11e068d0f03c16985f43455 Mon Sep 17 00:00:00 2001
From: Tranquil-Flow <tranquil_flow@protonmail.com>
Date: Sat, 18 Apr 2026 19:18:03 -0700
Subject: [PATCH 091/143] fix(agent): strip <think> blocks from stored
 assistant content
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Inline reasoning tags in an assistant message's content field leak to every downstream consumer: messaging platforms (#8878, #9568), API replay of prior turns, session transcript, CLI recap, generated session titles, and context compression.  _extract_reasoning() already captures the reasoning text into msg['reasoning'] separately, so the raw tags in content are redundant.

Stripping once at the storage boundary in _build_assistant_message() cleans the content for every downstream path in one place — no per-platform or per-path stripper needed.  Measured impact on a real MiniMax M2.7-highspeed session (per @luoyejiaoe-source, #9306): 55% of assistant messages started with <think> blocks, 51/100 session titles were polluted, 16% content-size reduction.

3 new regression tests in TestBuildAssistantMessage: closed-pair strip with reasoning capture, no-think-tag passthrough, and unterminated-block strip.

Resolves #8878 and #9568.

Originally proposed as PR #9250.
---
 run_agent.py                      | 14 +++++++++++++
 tests/run_agent/test_run_agent.py | 35 +++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/run_agent.py b/run_agent.py
index 33635ef2fee..c87bd351528 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7294,6 +7294,20 @@ class AIAgent:
         if reasoning_text:
             reasoning_text = _sanitize_surrogates(reasoning_text)
 
+        # Strip inline reasoning tags (<think>…</think> etc.) from the stored
+        # assistant content.  Reasoning was already captured into
+        # ``reasoning_text`` above (either from structured fields or the
+        # inline-block fallback), so the raw tags in content are redundant.
+        # Leaving them in place caused reasoning to leak to messaging
+        # platforms (#8878, #9568), inflate context on subsequent turns
+        # (#9306 observed 16% content-size reduction on a real MiniMax
+        # session), and pollute generated session titles.  One strip at the
+        # storage boundary cleans content for every downstream consumer:
+        # API replay, session transcript, gateway delivery, CLI display,
+        # compression, title generation.
+        if isinstance(_san_content, str) and _san_content:
+            _san_content = self._strip_think_blocks(_san_content).strip()
+
         msg = {
             "role": "assistant",
             "content": _san_content,
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index bde5ed5aae7..d30445cf459 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -1142,6 +1142,41 @@ class TestBuildAssistantMessage:
         result = agent._build_assistant_message(msg, "tool_calls")
         assert "extra_content" not in result["tool_calls"][0]
 
+    def test_think_blocks_stripped_from_content(self, agent):
+        """Inline <think> blocks are stripped from stored content (#8878, #9568).
+
+        The reasoning is captured into ``msg['reasoning']`` via the inline
+        fallback in ``_extract_reasoning``; the raw tags in ``content`` are
+        redundant and leak to messaging platforms / pollute titles /
+        inflate context if left in place.
+        """
+        msg = _mock_assistant_msg(
+            content="<think>internal reasoning</think>The actual answer."
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert "<think>" not in result["content"]
+        assert "internal reasoning" not in result["content"]
+        assert "The actual answer." in result["content"]
+        # Reasoning preserved separately via inline extraction fallback
+        assert result["reasoning"] == "internal reasoning"
+
+    def test_think_blocks_stripped_preserves_normal_content(self, agent):
+        """Content without reasoning tags passes through unchanged."""
+        msg = _mock_assistant_msg(content="No thinking here.")
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["content"] == "No thinking here."
+
+    def test_unterminated_think_block_stripped(self, agent):
+        """Unterminated <think> block (MiniMax / NIM dropped close tag) is
+        fully stripped from stored content."""
+        msg = _mock_assistant_msg(
+            content="<think>reasoning that never closes on this NIM endpoint"
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert "<think>" not in result["content"]
+        assert "reasoning that never closes" not in result["content"]
+        assert result["content"] == ""
+
 
 class TestFormatToolsForSystemMessage:
     def test_no_tools_returns_empty_array(self, agent):

From bd01ec7885f9cc05ef44d8e3e71ce043617b0dda Mon Sep 17 00:00:00 2001
From: yeyitech <yeyitech@users.noreply.github.com>
Date: Sat, 18 Apr 2026 19:18:14 -0700
Subject: [PATCH 092/143] fix(cli): strip all reasoning tag variants from
 /resume recap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

HermesCLI._display_resumed_history() calls the module-level _strip_reasoning_tags() to clean assistant content before rendering the recap panel.  The tag list was missing <thought> (Gemma 4) and there was no pass for stray orphan </tag> closes, so those variants leaked internal reasoning into the recap display (#11316).

- Add <thought> to _REASONING_TAGS.
- Add a third regex pass that strips orphan close tags (e.g. 'stuff</think>answer' → 'stuffanswer').
- Apply IGNORECASE to closed-pair and unclosed-pair passes so mixed-case variants (<THINK>, <Thinking>) are handled uniformly — previously both 'THINKING' and 'thinking' had to be listed explicitly as distinct tuple entries, which missed <Thinking>.

7 new regression tests in tests/cli/test_resume_display.py covering: <think>, <thinking>, <reasoning>, <thought>, unclosed <think>, multiple interleaved blocks, and orphan </think> close.

Resolves #11316.

Originally proposed as PR #11366.
---
 cli.py                           |  42 ++++++++++-
 tests/cli/test_resume_display.py | 121 +++++++++++++++++++++++++++++++
 2 files changed, 159 insertions(+), 4 deletions(-)

diff --git a/cli.py b/cli.py
index 02c1a4f7ef6..c9ce95e9f2e 100644
--- a/cli.py
+++ b/cli.py
@@ -83,17 +83,51 @@ load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
 _REASONING_TAGS = (
     "REASONING_SCRATCHPAD",
     "think",
-    "reasoning",
-    "THINKING",
     "thinking",
+    "reasoning",
+    "thought",
 )
 
 
 def _strip_reasoning_tags(text: str) -> str:
+    """Remove reasoning/thinking blocks from displayed text.
+
+    Handles every case:
+      * Closed pairs ``<tag>…</tag>`` (case-insensitive, multi-line).
+      * Unterminated open tags that run to end-of-text (e.g. truncated
+        generations on NIM/MiniMax where the close tag is dropped).
+      * Stray orphan close tags (``stuff</think>answer``) left behind by
+        partial-content dumps.
+
+    Covers the variants emitted by reasoning models today: ``<think>``,
+    ``<thinking>``, ``<reasoning>``, ``<REASONING_SCRATCHPAD>``, and
+    ``<thought>`` (Gemma 4).  Must stay in sync with
+    ``run_agent.py::_strip_think_blocks`` and the stream consumer's
+    ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples.
+    """
     cleaned = text
     for tag in _REASONING_TAGS:
-        cleaned = re.sub(rf"<{tag}>.*?</{tag}>\s*", "", cleaned, flags=re.DOTALL)
-        cleaned = re.sub(rf"<{tag}>.*$", "", cleaned, flags=re.DOTALL)
+        # Closed pair — case-insensitive so <THINK>…</THINK> is handled too.
+        cleaned = re.sub(
+            rf"<{tag}>.*?</{tag}>\s*",
+            "",
+            cleaned,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+        # Unterminated open tag — strip from the tag to end of text.
+        cleaned = re.sub(
+            rf"<{tag}>.*$",
+            "",
+            cleaned,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+        # Stray orphan close tag left behind by partial dumps.
+        cleaned = re.sub(
+            rf"</{tag}>\s*",
+            "",
+            cleaned,
+            flags=re.IGNORECASE,
+        )
     return cleaned.strip()
 
 
diff --git a/tests/cli/test_resume_display.py b/tests/cli/test_resume_display.py
index d183e48b2bc..bb931bb1fea 100644
--- a/tests/cli/test_resume_display.py
+++ b/tests/cli/test_resume_display.py
@@ -344,6 +344,127 @@ class TestDisplayResumedHistory:
         assert "Just thinking" not in output
         assert "Hi there!" in output
 
+    def test_think_tags_stripped(self):
+        """<think>...</think> blocks should be stripped from display (#11316)."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Solve this"},
+            {
+                "role": "assistant",
+                "content": "<think>\nI need to reason carefully here.\n</think>\n\nThe answer is 7.",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<think>" not in output
+        assert "</think>" not in output
+        assert "I need to reason carefully here" not in output
+        assert "The answer is 7" in output
+
+    def test_thinking_tags_stripped(self):
+        """<thinking>...</thinking> blocks should be stripped from display."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "What is 2+2?"},
+            {
+                "role": "assistant",
+                "content": "<thinking>\nLet me compute: 2 + 2 = 4\n</thinking>\n\nThe answer is 4.",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<thinking>" not in output
+        assert "Let me compute" not in output
+        assert "The answer is 4" in output
+
+    def test_reasoning_tags_stripped(self):
+        """<reasoning>...</reasoning> blocks should be stripped from display."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Explain gravity"},
+            {
+                "role": "assistant",
+                "content": (
+                    "<reasoning>\nGravity is a fundamental force...\n</reasoning>\n\n"
+                    "Gravity pulls objects together."
+                ),
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<reasoning>" not in output
+        assert "fundamental force" not in output
+        assert "Gravity pulls objects together" in output
+
+    def test_thought_tags_stripped(self):
+        """<thought>...</thought> blocks (Gemma 4) should be stripped."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Say hello"},
+            {
+                "role": "assistant",
+                "content": "<thought>\nInternal thought here.\n</thought>\n\nHello!",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<thought>" not in output
+        assert "Internal thought here" not in output
+        assert "Hello!" in output
+
+    def test_unclosed_think_tag_stripped(self):
+        """Unclosed <think> (truncated generation) should not leak reasoning."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Truncated response"},
+            {
+                "role": "assistant",
+                "content": "Some text before.\n<think>\nUnfinished reasoning...",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<think>" not in output
+        assert "Unfinished reasoning" not in output
+        assert "Some text before" in output
+
+    def test_multiple_reasoning_blocks_all_stripped(self):
+        """Multiple interleaved reasoning blocks are all stripped."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Complex question"},
+            {
+                "role": "assistant",
+                "content": (
+                    "<think>\nFirst thought.\n</think>\n"
+                    "Partial text.\n"
+                    "<reasoning>\nSecond thought.\n</reasoning>\n"
+                    "Final answer."
+                ),
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "First thought" not in output
+        assert "Second thought" not in output
+        assert "Partial text" in output
+        assert "Final answer" in output
+
+    def test_orphan_closing_think_tag_stripped(self):
+        """A stray </think> with no matching open should not render to user."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Broken output"},
+            {
+                "role": "assistant",
+                "content": "some leftover reasoning</think>Visible answer.",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "</think>" not in output
+        assert "Visible answer" in output
+
     def test_assistant_with_text_and_tool_calls(self):
         """When an assistant message has both text content AND tool_calls."""
         cli = _make_cli()

From b02833f32d4b22d989c668a0d6cb2f1cf3b57f75 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 19:19:46 -0700
Subject: [PATCH 093/143] fix(codex): Hermes owns its own Codex auth; stop
 touching ~/.codex/auth.json (#12360)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Codex OAuth refresh tokens are single-use and rotate on every refresh.
Sharing them with the Codex CLI / VS Code via ~/.codex/auth.json made
concurrent use of both tools a race: whoever refreshed last invalidated
the other side's refresh_token.  On top of that, the silent auto-import
path picked up placeholder / aborted-auth data from ~/.codex/auth.json
(e.g. literal {"access_token":"access-new","refresh_token":"refresh-new"})
and seeded it into the Hermes pool as an entry the selector could
eventually pick.

Hermes now owns its own Codex auth state end-to-end:

Removed
- agent/credential_pool.py: _sync_codex_entry_from_cli() method,
  its pre-refresh + retry + _available_entries call sites, and the
  post-refresh write-back to ~/.codex/auth.json.
- agent/credential_pool.py: auto-import from ~/.codex/auth.json in
  _seed_from_singletons() — users now run `hermes auth openai-codex`
  explicitly.
- hermes_cli/auth.py: silent runtime migration in
  resolve_codex_runtime_credentials() — now surfaces
  `codex_auth_missing` directly (message already points to `hermes auth`).
- hermes_cli/auth.py: post-refresh write-back in
  _refresh_codex_auth_tokens().
- hermes_cli/auth.py: dead helper _write_codex_cli_tokens() and its 4
  tests in test_auth_codex_provider.py.

Kept
- hermes_cli/auth.py: _import_codex_cli_tokens() — still used by the
  interactive `hermes auth openai-codex` setup flow for a user-gated
  one-time import (with "a separate login is recommended" messaging).

User-visible impact
- On existing installs with Hermes auth already present: no change.
- On a fresh install where the user has only logged in via Codex CLI:
  `hermes chat --provider openai-codex` now fails with "No Codex
  credentials stored. Run `hermes auth` to authenticate." The
  interactive setup flow then detects ~/.codex/auth.json and offers a
  one-time import.
- On an install where Codex CLI later refreshes its token: Hermes is
  unaffected (we no longer read from that file at runtime).

Tests
- tests/hermes_cli/test_auth_codex_provider.py: 15/15 pass.
- tests/hermes_cli/test_auth_commands.py: 20/20 pass.
- tests/agent/test_credential_pool.py: 31/31 pass.
- Live E2E on openai-codex/gpt-5.4: 1 API call, 1.7s latency,
  3 log lines, no refresh events, no auth drama.

The related 14:52 refresh-loop bug (hundreds of rotations/minute on a
single entry) is a separate issue — that requires a refresh-attempt
cap on the auth-recovery path in run_agent.py, which remains open.
---
 agent/credential_pool.py                     | 129 ++-----------------
 hermes_cli/auth.py                           |  69 +---------
 tests/hermes_cli/test_auth_codex_provider.py |  93 -------------
 3 files changed, 9 insertions(+), 282 deletions(-)

diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index a67eee6c422..b02514e990c 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -22,8 +22,6 @@ from hermes_cli.auth import (
     _auth_store_lock,
     _codex_access_token_is_expiring,
     _decode_jwt_claims,
-    _import_codex_cli_tokens,
-    _write_codex_cli_tokens,
     _load_auth_store,
     _load_provider_state,
     _resolve_kimi_base_url,
@@ -457,39 +455,6 @@ class CredentialPool:
             logger.debug("Failed to sync from credentials file: %s", exc)
         return entry
 
-    def _sync_codex_entry_from_cli(self, entry: PooledCredential) -> PooledCredential:
-        """Sync an openai-codex pool entry from ~/.codex/auth.json if tokens differ.
-
-        OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
-        When the Codex CLI (or another Hermes profile) refreshes its token,
-        the pool entry's refresh_token becomes stale.  This method detects that
-        by comparing against ~/.codex/auth.json and syncing the fresh pair.
-        """
-        if self.provider != "openai-codex":
-            return entry
-        try:
-            cli_tokens = _import_codex_cli_tokens()
-            if not cli_tokens:
-                return entry
-            cli_refresh = cli_tokens.get("refresh_token", "")
-            cli_access = cli_tokens.get("access_token", "")
-            if cli_refresh and cli_refresh != entry.refresh_token:
-                logger.debug("Pool entry %s: syncing tokens from ~/.codex/auth.json (refresh token changed)", entry.id)
-                updated = replace(
-                    entry,
-                    access_token=cli_access,
-                    refresh_token=cli_refresh,
-                    last_status=None,
-                    last_status_at=None,
-                    last_error_code=None,
-                )
-                self._replace_entry(entry, updated)
-                self._persist()
-                return updated
-        except Exception as exc:
-            logger.debug("Failed to sync from ~/.codex/auth.json: %s", exc)
-        return entry
-
     def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
         """Write refreshed pool entry tokens back to auth.json providers.
 
@@ -585,13 +550,6 @@ class CredentialPool:
                     except Exception as wexc:
                         logger.debug("Failed to write refreshed token to credentials file: %s", wexc)
             elif self.provider == "openai-codex":
-                # Proactively sync from ~/.codex/auth.json before refresh.
-                # The Codex CLI (or another Hermes profile) may have already
-                # consumed our refresh_token.  Syncing first avoids a
-                # "refresh_token_reused" error when the CLI has a newer pair.
-                synced = self._sync_codex_entry_from_cli(entry)
-                if synced is not entry:
-                    entry = synced
                 refreshed = auth_mod.refresh_codex_oauth_pure(
                     entry.access_token,
                     entry.refresh_token,
@@ -677,45 +635,6 @@ class CredentialPool:
                     # Credentials file had a valid (non-expired) token — use it directly
                     logger.debug("Credentials file has valid token, using without refresh")
                     return synced
-            # For openai-codex: the refresh_token may have been consumed by
-            # the Codex CLI between our proactive sync and the refresh call.
-            # Re-sync and retry once.
-            if self.provider == "openai-codex":
-                synced = self._sync_codex_entry_from_cli(entry)
-                if synced.refresh_token != entry.refresh_token:
-                    logger.debug("Retrying Codex refresh with synced token from ~/.codex/auth.json")
-                    try:
-                        refreshed = auth_mod.refresh_codex_oauth_pure(
-                            synced.access_token,
-                            synced.refresh_token,
-                        )
-                        updated = replace(
-                            synced,
-                            access_token=refreshed["access_token"],
-                            refresh_token=refreshed["refresh_token"],
-                            last_refresh=refreshed.get("last_refresh"),
-                            last_status=STATUS_OK,
-                            last_status_at=None,
-                            last_error_code=None,
-                        )
-                        self._replace_entry(synced, updated)
-                        self._persist()
-                        self._sync_device_code_entry_to_auth_store(updated)
-                        try:
-                            _write_codex_cli_tokens(
-                                updated.access_token,
-                                updated.refresh_token,
-                                last_refresh=updated.last_refresh,
-                            )
-                        except Exception as wexc:
-                            logger.debug("Failed to write refreshed Codex tokens to CLI file (retry): %s", wexc)
-                        return updated
-                    except Exception as retry_exc:
-                        logger.debug("Codex retry refresh also failed: %s", retry_exc)
-                elif not self._entry_needs_refresh(synced):
-                    logger.debug("Codex CLI has valid token, using without refresh")
-                    self._sync_device_code_entry_to_auth_store(synced)
-                    return synced
             self._mark_exhausted(entry, None)
             return None
 
@@ -734,17 +653,6 @@ class CredentialPool:
         # _seed_from_singletons() on the next load_pool() sees fresh state
         # instead of re-seeding stale/consumed tokens.
         self._sync_device_code_entry_to_auth_store(updated)
-        # Write refreshed tokens back to ~/.codex/auth.json so Codex CLI
-        # and VS Code don't hit "refresh_token_reused" on their next refresh.
-        if self.provider == "openai-codex":
-            try:
-                _write_codex_cli_tokens(
-                    updated.access_token,
-                    updated.refresh_token,
-                    last_refresh=updated.last_refresh,
-                )
-            except Exception as wexc:
-                logger.debug("Failed to write refreshed Codex tokens to CLI file: %s", wexc)
         return updated
 
     def _entry_needs_refresh(self, entry: PooledCredential) -> bool:
@@ -790,16 +698,6 @@ class CredentialPool:
                 if synced is not entry:
                     entry = synced
                     cleared_any = True
-            # For openai-codex entries, sync from ~/.codex/auth.json before
-            # any status/refresh checks.  This picks up tokens refreshed by
-            # the Codex CLI or another Hermes profile.
-            if (self.provider == "openai-codex"
-                    and entry.last_status == STATUS_EXHAUSTED
-                    and entry.refresh_token):
-                synced = self._sync_codex_entry_from_cli(entry)
-                if synced is not entry:
-                    entry = synced
-                    cleared_any = True
             if entry.last_status == STATUS_EXHAUSTED:
                 exhausted_until = _exhausted_until(entry)
                 if exhausted_until is not None and now < exhausted_until:
@@ -1218,8 +1116,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
     elif provider == "openai-codex":
         # Respect user suppression — `hermes auth remove openai-codex` marks
         # the device_code source as suppressed so it won't be re-seeded from
-        # either the Hermes auth store or ~/.codex/auth.json.  Without this
-        # gate the removal is instantly undone on the next load_pool() call.
+        # the Hermes auth store.  Without this gate the removal is instantly
+        # undone on the next load_pool() call.
         codex_suppressed = False
         try:
             from hermes_cli.auth import is_source_suppressed
@@ -1231,23 +1129,12 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 
         state = _load_provider_state(auth_store, "openai-codex")
         tokens = state.get("tokens") if isinstance(state, dict) else None
-        # Fallback: import from Codex CLI (~/.codex/auth.json) if Hermes auth
-        # store has no tokens.  This mirrors resolve_codex_runtime_credentials()
-        # so that load_pool() and list_authenticated_providers() detect tokens
-        # that only exist in the Codex CLI shared file.
-        if not (isinstance(tokens, dict) and tokens.get("access_token")):
-            try:
-                from hermes_cli.auth import _import_codex_cli_tokens, _save_codex_tokens
-                cli_tokens = _import_codex_cli_tokens()
-                if cli_tokens:
-                    logger.info("Importing Codex CLI tokens into Hermes auth store.")
-                    _save_codex_tokens(cli_tokens)
-                    # Re-read state after import
-                    auth_store = _load_auth_store()
-                    state = _load_provider_state(auth_store, "openai-codex")
-                    tokens = state.get("tokens") if isinstance(state, dict) else None
-            except Exception as exc:
-                logger.debug("Codex CLI token import failed: %s", exc)
+        # Hermes owns its own Codex auth state — we do NOT auto-import from
+        # ~/.codex/auth.json at pool-load time.  OAuth refresh tokens are
+        # single-use, so sharing them with Codex CLI / VS Code causes
+        # refresh_token_reused race failures.  Users who want to adopt
+        # existing Codex CLI credentials get a one-time, explicit prompt
+        # via `hermes auth openai-codex`.
         if isinstance(tokens, dict) and tokens.get("access_token"):
             active_sources.add("device_code")
             changed |= _upsert_entry(
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index c468948e981..4623147a5a5 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -1434,49 +1434,6 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]:
     }
 
 
-def _write_codex_cli_tokens(
-    access_token: str,
-    refresh_token: str,
-    *,
-    last_refresh: Optional[str] = None,
-) -> None:
-    """Write refreshed tokens back to ~/.codex/auth.json.
-
-    OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
-    When Hermes refreshes a token it consumes the old refresh_token; if we
-    don't write the new pair back, the Codex CLI (or VS Code extension) will
-    fail with ``refresh_token_reused`` on its next refresh attempt.
-
-    This mirrors the Anthropic write-back to ~/.claude/.credentials.json
-    via ``_write_claude_code_credentials()``.
-    """
-    codex_home = os.getenv("CODEX_HOME", "").strip()
-    if not codex_home:
-        codex_home = str(Path.home() / ".codex")
-    auth_path = Path(codex_home).expanduser() / "auth.json"
-    try:
-        existing: Dict[str, Any] = {}
-        if auth_path.is_file():
-            existing = json.loads(auth_path.read_text(encoding="utf-8"))
-        if not isinstance(existing, dict):
-            existing = {}
-
-        tokens_dict = existing.get("tokens")
-        if not isinstance(tokens_dict, dict):
-            tokens_dict = {}
-        tokens_dict["access_token"] = access_token
-        tokens_dict["refresh_token"] = refresh_token
-        existing["tokens"] = tokens_dict
-        if last_refresh is not None:
-            existing["last_refresh"] = last_refresh
-
-        auth_path.parent.mkdir(parents=True, exist_ok=True)
-        auth_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
-        auth_path.chmod(0o600)
-    except (OSError, IOError) as exc:
-        logger.debug("Failed to write refreshed tokens to %s: %s", auth_path, exc)
-
-
 def _save_codex_tokens(tokens: Dict[str, str], last_refresh: str = None) -> None:
     """Save Codex OAuth tokens to Hermes auth store (~/.hermes/auth.json)."""
     if last_refresh is None:
@@ -1604,12 +1561,6 @@ def _refresh_codex_auth_tokens(
     updated_tokens["refresh_token"] = refreshed["refresh_token"]
 
     _save_codex_tokens(updated_tokens)
-    # Write back to ~/.codex/auth.json so Codex CLI / VS Code stay in sync.
-    _write_codex_cli_tokens(
-        refreshed["access_token"],
-        refreshed["refresh_token"],
-        last_refresh=refreshed.get("last_refresh"),
-    )
     return updated_tokens
 
 
@@ -1654,25 +1605,7 @@ def resolve_codex_runtime_credentials(
     refresh_skew_seconds: int = CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
 ) -> Dict[str, Any]:
     """Resolve runtime credentials from Hermes's own Codex token store."""
-    try:
-        data = _read_codex_tokens()
-    except AuthError as orig_err:
-        # Only attempt migration when there are NO tokens stored at all
-        # (code == "codex_auth_missing"), not when tokens exist but are invalid.
-        if orig_err.code != "codex_auth_missing":
-            raise
-
-        # Migration: user had Codex as active provider with old storage (~/.codex/).
-        cli_tokens = _import_codex_cli_tokens()
-        if cli_tokens:
-            logger.info("Migrating Codex credentials from ~/.codex/ to Hermes auth store")
-            print("⚠️  Migrating Codex credentials to Hermes's own auth store.")
-            print("   This avoids conflicts with Codex CLI and VS Code.")
-            print("   Run `hermes auth` to create a fully independent session.\n")
-            _save_codex_tokens(cli_tokens)
-            data = _read_codex_tokens()
-        else:
-            raise
+    data = _read_codex_tokens()
     tokens = dict(data["tokens"])
     access_token = str(tokens.get("access_token", "") or "").strip()
     refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20"))
diff --git a/tests/hermes_cli/test_auth_codex_provider.py b/tests/hermes_cli/test_auth_codex_provider.py
index f05a80b6ac1..ddcaf1721f3 100644
--- a/tests/hermes_cli/test_auth_codex_provider.py
+++ b/tests/hermes_cli/test_auth_codex_provider.py
@@ -14,7 +14,6 @@ from hermes_cli.auth import (
     PROVIDER_REGISTRY,
     _read_codex_tokens,
     _save_codex_tokens,
-    _write_codex_cli_tokens,
     _import_codex_cli_tokens,
     get_codex_auth_status,
     get_provider_auth_state,
@@ -182,98 +181,6 @@ def test_codex_tokens_not_written_to_shared_file(tmp_path, monkeypatch):
     assert data["tokens"]["access_token"] == "hermes-at"
 
 
-def test_write_codex_cli_tokens_creates_file(tmp_path, monkeypatch):
-    """_write_codex_cli_tokens creates ~/.codex/auth.json with refreshed tokens."""
-    codex_home = tmp_path / "codex-cli"
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    _write_codex_cli_tokens("new-access", "new-refresh", last_refresh="2026-04-12T00:00:00Z")
-
-    auth_path = codex_home / "auth.json"
-    assert auth_path.exists()
-    data = json.loads(auth_path.read_text())
-    assert data["tokens"]["access_token"] == "new-access"
-    assert data["tokens"]["refresh_token"] == "new-refresh"
-    assert data["last_refresh"] == "2026-04-12T00:00:00Z"
-    # Verify file permissions are restricted
-    assert (auth_path.stat().st_mode & 0o777) == 0o600
-
-
-def test_write_codex_cli_tokens_preserves_existing(tmp_path, monkeypatch):
-    """_write_codex_cli_tokens preserves extra fields in existing auth.json."""
-    codex_home = tmp_path / "codex-cli"
-    codex_home.mkdir(parents=True, exist_ok=True)
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    existing = {
-        "tokens": {
-            "access_token": "old-access",
-            "refresh_token": "old-refresh",
-            "extra_field": "preserved",
-        },
-        "last_refresh": "2026-01-01T00:00:00Z",
-        "custom_key": "keep_me",
-    }
-    (codex_home / "auth.json").write_text(json.dumps(existing))
-
-    _write_codex_cli_tokens("updated-access", "updated-refresh")
-
-    data = json.loads((codex_home / "auth.json").read_text())
-    assert data["tokens"]["access_token"] == "updated-access"
-    assert data["tokens"]["refresh_token"] == "updated-refresh"
-    assert data["tokens"]["extra_field"] == "preserved"
-    assert data["custom_key"] == "keep_me"
-    # last_refresh not updated since we didn't pass it
-    assert data["last_refresh"] == "2026-01-01T00:00:00Z"
-
-
-def test_write_codex_cli_tokens_handles_missing_dir(tmp_path, monkeypatch):
-    """_write_codex_cli_tokens creates parent directories if missing."""
-    codex_home = tmp_path / "does" / "not" / "exist"
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    _write_codex_cli_tokens("at", "rt")
-
-    assert (codex_home / "auth.json").exists()
-    data = json.loads((codex_home / "auth.json").read_text())
-    assert data["tokens"]["access_token"] == "at"
-
-
-def test_refresh_codex_auth_tokens_writes_back_to_cli(tmp_path, monkeypatch):
-    """After refreshing, _refresh_codex_auth_tokens writes back to ~/.codex/auth.json."""
-    from hermes_cli.auth import _refresh_codex_auth_tokens
-
-    hermes_home = tmp_path / "hermes"
-    codex_home = tmp_path / "codex-cli"
-    hermes_home.mkdir(parents=True, exist_ok=True)
-    codex_home.mkdir(parents=True, exist_ok=True)
-    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
-    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    # Write initial CLI tokens
-    (codex_home / "auth.json").write_text(json.dumps({
-        "tokens": {"access_token": "old-at", "refresh_token": "old-rt"},
-    }))
-
-    # Mock the pure refresh to return new tokens
-    monkeypatch.setattr("hermes_cli.auth.refresh_codex_oauth_pure", lambda *a, **kw: {
-        "access_token": "refreshed-at",
-        "refresh_token": "refreshed-rt",
-        "last_refresh": "2026-04-12T01:00:00Z",
-    })
-
-    _refresh_codex_auth_tokens(
-        {"access_token": "old-at", "refresh_token": "old-rt"},
-        timeout_seconds=10,
-    )
-
-    # Verify CLI file was updated
-    cli_data = json.loads((codex_home / "auth.json").read_text())
-    assert cli_data["tokens"]["access_token"] == "refreshed-at"
-    assert cli_data["tokens"]["refresh_token"] == "refreshed-rt"
-
-
 def test_resolve_returns_hermes_auth_store_source(tmp_path, monkeypatch):
     hermes_home = tmp_path / "hermes"
     _setup_hermes_auth(hermes_home)

From 762f7e97965ab9b19d6672724e90660921196569 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sun, 29 Mar 2026 11:37:06 -0700
Subject: [PATCH 094/143] feat: configurable approval mode for cron jobs
 (approvals.cron_mode)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add approvals.cron_mode config option that controls how cron jobs handle
dangerous commands. Previously, cron jobs silently auto-approved all
dangerous commands because there was no user present to approve them.

Now the behavior is configurable:
  - deny (default): block dangerous commands and return a message telling
    the agent to find an alternative approach. The agent loop continues —
    it just can't use that specific command.
  - approve: auto-approve all dangerous commands (previous behavior).

When a command is blocked, the agent receives the same response format as
a user denial in the CLI — exit_code=-1, status=blocked, with a message
explaining why and pointing to the config option. This keeps the agent
loop running and encourages it to adapt.

Implementation:
  - config.py: add approvals.cron_mode to DEFAULT_CONFIG
  - scheduler.py: set HERMES_CRON_SESSION=1 env var before agent runs
  - approval.py: both check_command_approval() and check_all_command_guards()
    now check for cron sessions and apply the configured mode
  - 21 new tests covering config parsing, deny/approve behavior, and
    interaction with other bypass mechanisms (yolo, containers)
---
 cron/scheduler.py                      |   5 +
 hermes_cli/config.py                   |   5 +
 tests/tools/test_cron_approval_mode.py | 256 +++++++++++++++++++++++++
 tools/approval.py                      |  42 ++++
 4 files changed, 308 insertions(+)
 create mode 100644 tests/tools/test_cron_approval_mode.py

diff --git a/cron/scheduler.py b/cron/scheduler.py
index db5991c6f02..8938063c7ff 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -681,6 +681,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
     logger.info("Running job '%s' (ID: %s)", job_name, job_id)
     logger.info("Prompt: %s", prompt[:100])
 
+    # Mark this as a cron session so the approval system can apply cron_mode.
+    # This env var is process-wide and persists for the lifetime of the
+    # scheduler process — every job this process runs is a cron job.
+    os.environ["HERMES_CRON_SESSION"] = "1"
+
     try:
         # Inject origin context so the agent's send_message tool knows the chat.
         # Must be INSIDE the try block so the finally cleanup always runs.
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index dfb6b7210a4..d53899b135e 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -737,9 +737,14 @@ DEFAULT_CONFIG = {
     #   manual — always prompt the user (default)
     #   smart  — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk
     #   off    — skip all approval prompts (equivalent to --yolo)
+    #
+    # cron_mode — what to do when a cron job hits a dangerous command:
+    #   deny    — block the command and let the agent find another way (default, safe)
+    #   approve — auto-approve all dangerous commands in cron jobs
     "approvals": {
         "mode": "manual",
         "timeout": 60,
+        "cron_mode": "deny",
     },
 
     # Permanently allowed dangerous command patterns (added via "always" approval)
diff --git a/tests/tools/test_cron_approval_mode.py b/tests/tools/test_cron_approval_mode.py
new file mode 100644
index 00000000000..965d2eaa474
--- /dev/null
+++ b/tests/tools/test_cron_approval_mode.py
@@ -0,0 +1,256 @@
+"""Tests for approvals.cron_mode — configurable approval behavior for cron jobs."""
+
+import os
+import pytest
+
+import tools.approval as approval_module
+from tools.approval import (
+    _get_cron_approval_mode,
+    check_all_command_guards,
+    check_dangerous_command,
+    detect_dangerous_command,
+)
+
+
+@pytest.fixture(autouse=True)
+def _clear_approval_state():
+    approval_module._permanent_approved.clear()
+    approval_module.clear_session("default")
+    approval_module.clear_session("test-session")
+    yield
+    approval_module._permanent_approved.clear()
+    approval_module.clear_session("default")
+    approval_module.clear_session("test-session")
+
+
+# ---------------------------------------------------------------------------
+# _get_cron_approval_mode() config parsing
+# ---------------------------------------------------------------------------
+
+class TestCronApprovalModeParsing:
+    def test_default_is_deny(self):
+        """When no config is set, cron_mode defaults to 'deny'."""
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {}}):
+            assert _get_cron_approval_mode() == "deny"
+
+    def test_explicit_deny(self):
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "deny"}}):
+            assert _get_cron_approval_mode() == "deny"
+
+    def test_explicit_approve(self):
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "approve"}}):
+            assert _get_cron_approval_mode() == "approve"
+
+    def test_off_maps_to_approve(self):
+        """'off' is an alias for 'approve' (matches --yolo semantics)."""
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "off"}}):
+            assert _get_cron_approval_mode() == "approve"
+
+    def test_allow_maps_to_approve(self):
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "allow"}}):
+            assert _get_cron_approval_mode() == "approve"
+
+    def test_yes_maps_to_approve(self):
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "yes"}}):
+            assert _get_cron_approval_mode() == "approve"
+
+    def test_case_insensitive(self):
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "APPROVE"}}):
+            assert _get_cron_approval_mode() == "approve"
+
+    def test_unknown_value_defaults_to_deny(self):
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "maybe"}}):
+            assert _get_cron_approval_mode() == "deny"
+
+    def test_config_load_failure_defaults_to_deny(self):
+        """If config loading fails entirely, default to deny (safe)."""
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", side_effect=RuntimeError("config broken")):
+            assert _get_cron_approval_mode() == "deny"
+
+    def test_yaml_boolean_false_maps_to_deny(self):
+        """YAML 1.1 parses bare 'off' as False. Ensure it maps to deny."""
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": False}}):
+            # str(False) = "False", which is not in the approve set, so deny
+            assert _get_cron_approval_mode() == "deny"
+
+
+# ---------------------------------------------------------------------------
+# check_dangerous_command() with cron session
+# ---------------------------------------------------------------------------
+
+class TestCronDenyMode:
+    """When HERMES_CRON_SESSION is set and cron_mode=deny, dangerous commands are blocked."""
+
+    def test_dangerous_command_blocked_in_cron_deny_mode(self, monkeypatch):
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_dangerous_command("rm -rf /tmp/stuff", "local")
+            assert not result["approved"]
+            assert "BLOCKED" in result["message"]
+            assert "cron_mode" in result["message"]
+
+    def test_safe_command_allowed_in_cron_deny_mode(self, monkeypatch):
+        """Non-dangerous commands still work even with cron_mode=deny."""
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_dangerous_command("ls -la", "local")
+            assert result["approved"]
+
+    def test_multiple_dangerous_patterns_blocked(self, monkeypatch):
+        """All dangerous patterns are blocked, not just rm."""
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        dangerous_commands = [
+            "rm -rf /",
+            "chmod 777 /etc/passwd",
+            "mkfs.ext4 /dev/sda1",
+            "dd if=/dev/zero of=/dev/sda",
+        ]
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            for cmd in dangerous_commands:
+                is_dangerous, _, _ = detect_dangerous_command(cmd)
+                if is_dangerous:
+                    result = check_dangerous_command(cmd, "local")
+                    assert not result["approved"], f"Should be blocked: {cmd}"
+                    assert "BLOCKED" in result["message"]
+
+    def test_block_message_includes_description(self, monkeypatch):
+        """The block message should mention what pattern was matched."""
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_dangerous_command("rm -rf /tmp/stuff", "local")
+            assert not result["approved"]
+            # Should contain the description of what was flagged
+            assert "dangerous" in result["message"].lower() or "delete" in result["message"].lower()
+
+
+class TestCronApproveMode:
+    """When HERMES_CRON_SESSION is set and cron_mode=approve, dangerous commands pass through."""
+
+    def test_dangerous_command_allowed_in_cron_approve_mode(self, monkeypatch):
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="approve"):
+            result = check_dangerous_command("rm -rf /tmp/stuff", "local")
+            assert result["approved"]
+
+
+# ---------------------------------------------------------------------------
+# check_all_command_guards() with cron session
+# ---------------------------------------------------------------------------
+
+class TestCronDenyModeAllGuards:
+    """The combined guard function also respects cron_mode."""
+
+    def test_dangerous_command_blocked_in_combined_guard(self, monkeypatch):
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_all_command_guards("rm -rf /tmp/stuff", "local")
+            assert not result["approved"]
+            assert "BLOCKED" in result["message"]
+
+    def test_safe_command_allowed_in_combined_guard(self, monkeypatch):
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_all_command_guards("echo hello", "local")
+            assert result["approved"]
+
+    def test_combined_guard_approve_mode(self, monkeypatch):
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="approve"):
+            result = check_all_command_guards("rm -rf /tmp/stuff", "local")
+            assert result["approved"]
+
+
+# ---------------------------------------------------------------------------
+# Edge cases: cron mode interaction with other approval mechanisms
+# ---------------------------------------------------------------------------
+
+class TestCronModeInteractions:
+    """Cron mode should NOT interfere with other approval bypass mechanisms."""
+
+    def test_container_env_still_auto_approves(self, monkeypatch):
+        """Docker/sandbox environments bypass approvals regardless of cron_mode."""
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_dangerous_command("rm -rf /", "docker")
+            assert result["approved"]
+
+    def test_yolo_overrides_cron_deny(self, monkeypatch):
+        """--yolo still works even if cron_mode=deny."""
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.setenv("HERMES_YOLO_MODE", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_dangerous_command("rm -rf /", "local")
+            assert result["approved"]
+
+    def test_non_cron_non_interactive_still_auto_approves(self, monkeypatch):
+        """Non-cron, non-interactive sessions (e.g. scripted usage) still auto-approve."""
+        monkeypatch.delenv("HERMES_CRON_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        result = check_dangerous_command("rm -rf /tmp/stuff", "local")
+        assert result["approved"]
diff --git a/tools/approval.py b/tools/approval.py
index 7d8c5b032e8..fc344bd77b7 100644
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -532,6 +532,19 @@ def _get_approval_timeout() -> int:
         return 60
 
 
+def _get_cron_approval_mode() -> str:
+    """Read the cron approval mode from config. Returns 'deny' or 'approve'."""
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        mode = str(config.get("approvals", {}).get("cron_mode", "deny")).lower().strip()
+        if mode in ("approve", "off", "allow", "yes"):
+            return "approve"
+        return "deny"
+    except Exception:
+        return "deny"
+
+
 def _smart_approve(command: str, description: str) -> str:
     """Use the auxiliary LLM to assess risk and decide approval.
 
@@ -614,6 +627,19 @@ def check_dangerous_command(command: str, env_type: str,
     is_gateway = os.getenv("HERMES_GATEWAY_SESSION")
 
     if not is_cli and not is_gateway:
+        # Cron sessions: respect cron_mode config
+        if os.getenv("HERMES_CRON_SESSION"):
+            if _get_cron_approval_mode() == "deny":
+                return {
+                    "approved": False,
+                    "message": (
+                        f"BLOCKED: Command flagged as dangerous ({description}) "
+                        "but cron jobs run without a user present to approve it. "
+                        "Find an alternative approach that avoids this command. "
+                        "To allow dangerous commands in cron jobs, set "
+                        "approvals.cron_mode: approve in config.yaml."
+                    ),
+                }
         return {"approved": True, "message": None}
 
     if is_gateway or os.getenv("HERMES_EXEC_ASK"):
@@ -712,6 +738,22 @@ def check_all_command_guards(command: str, env_type: str,
     # Preserve the existing non-interactive behavior: outside CLI/gateway/ask
     # flows, we do not block on approvals and we skip external guard work.
     if not is_cli and not is_gateway and not is_ask:
+        # Cron sessions: respect cron_mode config
+        if os.getenv("HERMES_CRON_SESSION"):
+            if _get_cron_approval_mode() == "deny":
+                # Run detection to get a description for the block message
+                is_dangerous, _pk, description = detect_dangerous_command(command)
+                if is_dangerous:
+                    return {
+                        "approved": False,
+                        "message": (
+                            f"BLOCKED: Command flagged as dangerous ({description}) "
+                            "but cron jobs run without a user present to approve it. "
+                            "Find an alternative approach that avoids this command. "
+                            "To allow dangerous commands in cron jobs, set "
+                            "approvals.cron_mode: approve in config.yaml."
+                        ),
+                    }
         return {"approved": True, "message": None}
 
     # --- Phase 1: Gather findings from both checks ---

From 3a6351454b92c0d4b9f54e6eef43e3ff187ad828 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 19:32:26 -0700
Subject: [PATCH 095/143] fix(gateway): close pending-drain and late-arrival
 races in base adapter (#12371)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two related race conditions in gateway/platforms/base.py that could
produce duplicate agent runs or silently drop messages. Neither is
specific to any one platform — all adapters inherit this logic.

R5 (HIGH) — duplicate agent spawn on turn chain
  In _process_message_background, the pending-drain path deleted
  _active_sessions[session_key] before awaiting typing_task.cancel()
  and then recursively awaiting _process_message_background for the
  queued event. During the typing_task await, a fresh inbound message
  M3 could pass the Level-1 guard (entry now missing), set its own
  Event, and spawn a second _process_message_background for the same
  session_key — two agents running simultaneously, duplicate responses,
  duplicate tool calls.

  Fix: keep the _active_sessions entry populated and only clear() the
  Event. The guard stays live, so any concurrent inbound message takes
  the busy-handler path (queue + interrupt) as intended.

R6 (MED-HIGH) — message dropped during finally cleanup
  The finally block has two await points (typing_task, stop_typing)
  before it deletes _active_sessions. A message arriving in that
  window passes the guard (entry still live), lands in
  _pending_messages via the busy-handler — and then the unconditional
  del removes the guard with that message still queued. Nothing
  drains it; the user never gets a reply.

  Fix: before deleting _active_sessions in finally, pop any late
  pending_messages entry and spawn a drain task for it. Only delete
  _active_sessions when no pending is waiting.

Tests: tests/gateway/test_pending_drain_race.py — three regression
cases. Validated: without the fix, two of the three fail exactly
where the races manifest (duplicate-spawn guard loses identity,
late-arrival 'LATE' message not in processed list).
---
 gateway/platforms/base.py                |  43 ++++-
 tests/gateway/test_pending_drain_race.py | 212 +++++++++++++++++++++++
 2 files changed, 252 insertions(+), 3 deletions(-)
 create mode 100644 tests/gateway/test_pending_drain_race.py

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index f82b1fa0683..65f7226e10a 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -1926,9 +1926,18 @@ class BasePlatformAdapter(ABC):
             if session_key in self._pending_messages:
                 pending_event = self._pending_messages.pop(session_key)
                 logger.debug("[%s] Processing queued message from interrupt", self.name)
-                # Clean up current session before processing pending
-                if session_key in self._active_sessions:
-                    del self._active_sessions[session_key]
+                # Keep the _active_sessions entry live across the turn chain
+                # and only CLEAR the interrupt Event — do NOT delete the entry.
+                # If we deleted here, a concurrent inbound message arriving
+                # during the awaits below would pass the Level-1 guard, spawn
+                # its own _process_message_background, and run simultaneously
+                # with the recursive drain below.  Two agents on one
+                # session_key = duplicate responses, duplicate tool calls.
+                # Clearing the Event keeps the guard live so follow-ups take
+                # the busy-handler path (queue + interrupt) as intended.
+                _active = self._active_sessions.get(session_key)
+                if _active is not None:
+                    _active.clear()
                 typing_task.cancel()
                 try:
                     await typing_task
@@ -1986,6 +1995,34 @@ class BasePlatformAdapter(ABC):
                     await self.stop_typing(event.source.chat_id)
             except Exception:
                 pass
+            # Late-arrival drain: a message may have arrived during the
+            # cleanup awaits above (typing_task cancel, stop_typing).  Such
+            # messages passed the Level-1 guard (entry still live, Event
+            # possibly set) and landed in _pending_messages via the
+            # busy-handler path.  Without this block, we would delete the
+            # active-session entry and the queued message would be silently
+            # dropped (user never gets a reply).
+            late_pending = self._pending_messages.pop(session_key, None)
+            if late_pending is not None:
+                logger.debug(
+                    "[%s] Late-arrival pending message during cleanup — spawning drain task",
+                    self.name,
+                )
+                _active = self._active_sessions.get(session_key)
+                if _active is not None:
+                    _active.clear()
+                drain_task = asyncio.create_task(
+                    self._process_message_background(late_pending, session_key)
+                )
+                try:
+                    self._background_tasks.add(drain_task)
+                    drain_task.add_done_callback(self._background_tasks.discard)
+                except TypeError:
+                    # Tests stub create_task() with non-hashable sentinels; tolerate.
+                    pass
+                # Leave _active_sessions[session_key] populated — the drain
+                # task's own lifecycle will clean it up.
+                return
             # Clean up session tracking
             if session_key in self._active_sessions:
                 del self._active_sessions[session_key]
diff --git a/tests/gateway/test_pending_drain_race.py b/tests/gateway/test_pending_drain_race.py
new file mode 100644
index 00000000000..810d52e9e2a
--- /dev/null
+++ b/tests/gateway/test_pending_drain_race.py
@@ -0,0 +1,212 @@
+"""Regression tests: pending-drain + finally-cleanup races must not spawn
+duplicate agents OR silently drop messages that arrived during cleanup.
+
+Two related races in gateway/platforms/base.py:_process_message_background:
+
+1. Pending-drain path (previous line 1931):
+   ``del self._active_sessions[session_key]`` opened a window where a
+   concurrent inbound message could pass the Level-1 guard, spawn its
+   own _process_message_background, and run simultaneously with the
+   recursive drain.  Two agents on one session_key = duplicate responses.
+
+2. Finally-cleanup path (previous line 1990-1991):
+   Between the awaits in finally (typing_task, stop_typing) and the
+   ``del self._active_sessions[session_key]``, a new message could
+   land in _pending_messages.  The del ran anyway, and the message was
+   silently dropped — user never got a reply.
+
+Fix: keep the _active_sessions entry live across the turn chain and
+clear the Event instead of deleting; in finally, drain any
+late-arrival pending message by spawning a task instead of
+dropping it.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+)
+from gateway.session import SessionSource, build_session_key
+
+
+class _StubAdapter(BasePlatformAdapter):
+    async def connect(self):
+        pass
+
+    async def disconnect(self):
+        pass
+
+    async def send(self, chat_id, text, **kwargs):
+        return None
+
+    async def get_chat_info(self, chat_id):
+        return {}
+
+
+def _make_adapter():
+    adapter = _StubAdapter(PlatformConfig(enabled=True, token="t"), Platform.TELEGRAM)
+    adapter._send_with_retry = AsyncMock(return_value=None)
+    return adapter
+
+
+def _make_event(text="hi", chat_id="42"):
+    return MessageEvent(
+        text=text,
+        message_type=MessageType.TEXT,
+        source=SessionSource(platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"),
+    )
+
+
+def _sk(chat_id="42"):
+    return build_session_key(
+        SessionSource(platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm")
+    )
+
+
+@pytest.mark.asyncio
+async def test_pending_drain_keeps_active_session_guard_live():
+    """Fix for R5: during pending-drain cleanup, _active_sessions must stay
+    populated so concurrent inbound messages can't spawn a duplicate
+    _process_message_background.  We only CLEAR the Event, never delete."""
+    adapter = _make_adapter()
+    sk = _sk()
+
+    # Register a slow handler so the agent is "mid-processing" when the
+    # pending message arrives.
+    first_started = asyncio.Event()
+    release_first = asyncio.Event()
+
+    async def handler(event):
+        first_started.set()
+        await release_first.wait()
+        return "done"
+
+    adapter._message_handler = handler
+
+    # Spawn M1 through handle_message.
+    await adapter.handle_message(_make_event(text="M1"))
+
+    # Wait until M1 is actively running inside the handler.
+    await asyncio.wait_for(first_started.wait(), timeout=1.0)
+
+    # Assert: session is active.
+    assert sk in adapter._active_sessions
+    active_event = adapter._active_sessions[sk]
+
+    # Simulate pending message (M2) queued while M1 runs.
+    adapter._pending_messages[sk] = _make_event(text="M2")
+
+    # Release M1 — pending-drain block now runs.  During its cleanup
+    # awaits, _active_sessions[sk] must remain populated (same object
+    # reference) so any M3 arriving in that window hits the busy-handler.
+    release_first.set()
+
+    # Give the drain a moment to execute its .clear() + await typing_task
+    # without letting it fully finish the recursive call.
+    await asyncio.sleep(0)
+    await asyncio.sleep(0)
+
+    # Across the drain transition, the Event object must be the SAME
+    # reference (not replaced, not deleted).  If del happened, the key
+    # would be missing briefly; if a new Event was installed, the
+    # identity would differ.
+    assert sk in adapter._active_sessions, (
+        "_active_sessions[session_key] was deleted during pending-drain — "
+        "opens a window for duplicate-agent spawn"
+    )
+    assert adapter._active_sessions[sk] is active_event, (
+        "_active_sessions[session_key] was replaced during pending-drain — "
+        "the old Event may have waiters that now won't be signaled"
+    )
+
+    # Finish drain.
+    await asyncio.sleep(0.1)
+    await adapter.cancel_background_tasks()
+
+
+@pytest.mark.asyncio
+async def test_finally_cleanup_drains_late_arrival_pending():
+    """Fix for R6: if a message lands in _pending_messages during the
+    finally-block cleanup awaits, the finally must spawn a drain task
+    instead of deleting _active_sessions and dropping the message."""
+    adapter = _make_adapter()
+    sk = _sk()
+
+    processed = []
+
+    async def handler(event):
+        processed.append(event.text)
+        return "ok"
+
+    adapter._message_handler = handler
+
+    # Instrument stop_typing to inject a late-arrival pending message
+    # during the finally-block await window.  This exactly simulates the
+    # R6 race: the message arrives after the response has been sent but
+    # before _active_sessions is deleted.
+    original_stop = adapter.stop_typing if hasattr(adapter, "stop_typing") else None
+
+    injected = {"done": False}
+
+    async def stop_typing_injects_pending(*args, **kwargs):
+        # Yield so the injection happens mid-await.
+        await asyncio.sleep(0)
+        if not injected["done"]:
+            adapter._pending_messages[sk] = _make_event(text="LATE")
+            injected["done"] = True
+        if original_stop:
+            return await original_stop(*args, **kwargs)
+        return None
+
+    adapter.stop_typing = stop_typing_injects_pending
+
+    # Send M1.
+    await adapter.handle_message(_make_event(text="M1"))
+
+    # Drain: wait for M1 to finish and the late-drain task to process LATE.
+    for _ in range(50):  # up to ~0.5s
+        if "LATE" in processed:
+            break
+        await asyncio.sleep(0.01)
+
+    await adapter.cancel_background_tasks()
+
+    assert "M1" in processed, "M1 was not processed"
+    assert "LATE" in processed, (
+        "Late-arrival pending message was silently dropped — finally "
+        "cleanup should have spawned a drain task"
+    )
+
+
+@pytest.mark.asyncio
+async def test_no_pending_cleans_up_normally():
+    """Regression guard: when no pending message exists, the finally
+    block must still delete _active_sessions as before (no leak)."""
+    adapter = _make_adapter()
+    sk = _sk()
+
+    async def handler(event):
+        return "ok"
+
+    adapter._message_handler = handler
+
+    await adapter.handle_message(_make_event(text="solo"))
+
+    # Wait for background task to finish.
+    for _ in range(50):
+        if sk not in adapter._active_sessions:
+            break
+        await asyncio.sleep(0.01)
+
+    assert sk not in adapter._active_sessions, (
+        "_active_sessions was not cleaned up after a normal turn with no pending"
+    )
+    assert sk not in adapter._pending_messages
+
+    await adapter.cancel_background_tasks()

From bf5d7462ba33028b34cbbf500ca268b8684a0e9c Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 18 Apr 2026 22:30:10 -0700
Subject: [PATCH 096/143] fix(tui): reject history-mutating commands while
 session is running (#12416)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes silent data loss in the TUI when /undo, /compress, /retry, or
rollback.restore runs during an in-flight agent turn.  The version-
guard at prompt.submit:1449 would fail the version check and silently
skip writing the agent's result — UI showed the assistant reply but
DB / backend history never received it, causing UI↔backend desync
that persisted across session resume.

Changes (tui_gateway/server.py):
- session.undo, session.compress, /retry, rollback.restore (full-history
  only — file-scoped rollbacks still allowed): reject with 4009 when
  session.running is True.  Users can /interrupt first.
- prompt.submit: on history_version mismatch (defensive backstop),
  attach a 'warning' field to message.complete and log to stderr
  instead of silently dropping the agent's output.  The UI can surface
  the warning to the user; the operator can spot it in logs.

Tests (tests/test_tui_gateway_server.py): 6 new cases.
- test_session_undo_rejects_while_running
- test_session_undo_allowed_when_idle (regression guard)
- test_session_compress_rejects_while_running
- test_rollback_restore_rejects_full_history_while_running
- test_prompt_submit_history_version_mismatch_surfaces_warning
- test_prompt_submit_history_version_match_persists_normally (regression)

Validated: against unpatched server.py the three 'rejects_while_running'
tests fail and the version-mismatch test fails (no 'warning' field).
With the fix, all 6 pass, all 33 tests in the file pass, 74 TUI tests
in total pass.  Live E2E against the live Python environment confirmed
all 5 patches present and guards enforce 4009 exactly as designed.
---
 tests/test_tui_gateway_server.py | 166 +++++++++++++++++++++++++++++++
 tui_gateway/server.py            |  43 +++++++-
 2 files changed, 208 insertions(+), 1 deletion(-)

diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index 35bc3f449b2..8831efb8965 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -546,3 +546,169 @@ def test_session_info_includes_mcp_servers(monkeypatch):
 
     assert info["mcp_servers"] == fake_status
 
+
+# ---------------------------------------------------------------------------
+# History-mutating commands must reject while session.running is True.
+# Without these guards, prompt.submit's post-run history write either
+# clobbers the mutation (version matches) or silently drops the agent's
+# output (version mismatch) — both produce UI<->backend state desync.
+# ---------------------------------------------------------------------------
+
+
+def test_session_undo_rejects_while_running():
+    """Fix for TUI silent-drop #1: /undo must not mutate history
+    while the agent is mid-turn — would either clobber the undo or
+    cause prompt.submit to silently drop the agent's response."""
+    server._sessions["sid"] = _session(running=True, history=[
+        {"role": "user", "content": "hi"},
+        {"role": "assistant", "content": "hello"},
+    ])
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("error"), "session.undo should reject while running"
+        assert resp["error"]["code"] == 4009
+        assert "session busy" in resp["error"]["message"]
+        # History must be unchanged
+        assert len(server._sessions["sid"]["history"]) == 2
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_session_undo_allowed_when_idle():
+    """Regression guard: when not running, /undo still works."""
+    server._sessions["sid"] = _session(running=False, history=[
+        {"role": "user", "content": "hi"},
+        {"role": "assistant", "content": "hello"},
+    ])
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("result"), f"got error: {resp.get('error')}"
+        assert resp["result"]["removed"] == 2
+        assert server._sessions["sid"]["history"] == []
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_session_compress_rejects_while_running(monkeypatch):
+    server._sessions["sid"] = _session(running=True)
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "session.compress", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("error")
+        assert resp["error"]["code"] == 4009
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_rollback_restore_rejects_full_history_while_running(monkeypatch):
+    """Full-history rollback must reject; file-scoped rollback still allowed."""
+    server._sessions["sid"] = _session(running=True)
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "rollback.restore", "params": {"session_id": "sid", "hash": "abc"}}
+        )
+        assert resp.get("error"), "full-history rollback should reject while running"
+        assert resp["error"]["code"] == 4009
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_prompt_submit_history_version_mismatch_surfaces_warning(monkeypatch):
+    """Fix for TUI silent-drop #2: the defensive backstop at prompt.submit
+    must attach a 'warning' to message.complete when history was
+    mutated externally during the turn (instead of silently dropping
+    the agent's output)."""
+    # Agent bumps history_version itself mid-run to simulate an external
+    # mutation slipping past the guards.
+    session_ref = {"s": None}
+
+    class _RacyAgent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            # Simulate: something external bumped history_version
+            # while we were running.
+            with session_ref["s"]["history_lock"]:
+                session_ref["s"]["history_version"] += 1
+            return {"final_response": "agent reply", "messages": [{"role": "assistant", "content": "agent reply"}]}
+
+    class _ImmediateThread:
+        def __init__(self, target=None, daemon=None):
+            self._target = target
+
+        def start(self):
+            self._target()
+
+    server._sessions["sid"] = _session(agent=_RacyAgent())
+    session_ref["s"] = server._sessions["sid"]
+    emits: list[tuple] = []
+    try:
+        monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+        monkeypatch.setattr(server, "_get_usage", lambda _a: {})
+        monkeypatch.setattr(server, "render_message", lambda _t, _c: "")
+        monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a))
+
+        resp = server.handle_request(
+            {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}}
+        )
+        assert resp.get("result"), f"got error: {resp.get('error')}"
+
+        # History should NOT contain the agent's output (version mismatch)
+        assert server._sessions["sid"]["history"] == []
+
+        # message.complete must carry a 'warning' so the UI / operator
+        # knows the output was not persisted.
+        complete_calls = [a for a in emits if a[0] == "message.complete"]
+        assert len(complete_calls) == 1
+        _, _, payload = complete_calls[0]
+        assert "warning" in payload, (
+            "message.complete must include a 'warning' field on "
+            "history_version mismatch — otherwise the UI silently "
+            "shows output that was never persisted"
+        )
+        assert "not saved" in payload["warning"].lower() or "changed" in payload["warning"].lower()
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_prompt_submit_history_version_match_persists_normally(monkeypatch):
+    """Regression guard: the backstop does not affect the happy path."""
+    class _Agent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            return {"final_response": "reply", "messages": [{"role": "assistant", "content": "reply"}]}
+
+    class _ImmediateThread:
+        def __init__(self, target=None, daemon=None):
+            self._target = target
+
+        def start(self):
+            self._target()
+
+    server._sessions["sid"] = _session(agent=_Agent())
+    emits: list[tuple] = []
+    try:
+        monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+        monkeypatch.setattr(server, "_get_usage", lambda _a: {})
+        monkeypatch.setattr(server, "render_message", lambda _t, _c: "")
+        monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a))
+
+        resp = server.handle_request(
+            {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}}
+        )
+        assert resp.get("result")
+
+        # History was written
+        assert server._sessions["sid"]["history"] == [{"role": "assistant", "content": "reply"}]
+        assert server._sessions["sid"]["history_version"] == 1
+
+        # No warning should be attached
+        complete_calls = [a for a in emits if a[0] == "message.complete"]
+        assert len(complete_calls) == 1
+        _, _, payload = complete_calls[0]
+        assert "warning" not in payload
+    finally:
+        server._sessions.pop("sid", None)
+
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index d86db000663..c58c65763ed 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1224,6 +1224,13 @@ def _(rid, params: dict) -> dict:
     session, err = _sess(params, rid)
     if err:
         return err
+    # Reject during an in-flight turn.  If we mutated history while
+    # the agent thread is running, prompt.submit's post-run history
+    # write would either clobber the undo (version matches) or
+    # silently drop the agent's output (version mismatch, see below).
+    # Neither is what the user wants — make them /interrupt first.
+    if session.get("running"):
+        return _err(rid, 4009, "session busy — /interrupt the current turn before /undo")
     removed = 0
     with session["history_lock"]:
         history = session.get("history", [])
@@ -1243,6 +1250,8 @@ def _(rid, params: dict) -> dict:
     session, err = _sess(params, rid)
     if err:
         return err
+    if session.get("running"):
+        return _err(rid, 4009, "session busy — /interrupt the current turn before /compress")
     try:
         with session["history_lock"]:
             removed, usage = _compress_session_history(session, str(params.get("focus_topic", "") or "").strip())
@@ -1443,12 +1452,33 @@ def _(rid, params: dict) -> dict:
             )
 
             last_reasoning = None
+            status_note = None
             if isinstance(result, dict):
                 if isinstance(result.get("messages"), list):
                     with session["history_lock"]:
-                        if int(session.get("history_version", 0)) == history_version:
+                        current_version = int(session.get("history_version", 0))
+                        if current_version == history_version:
                             session["history"] = result["messages"]
                             session["history_version"] = history_version + 1
+                        else:
+                            # History mutated externally during the turn
+                            # (undo/compress/retry/rollback now guard on
+                            # session.running, but this is the defensive
+                            # backstop for any path that slips past).
+                            # Surface the desync rather than silently
+                            # dropping the agent's output — the UI can
+                            # show the response and warn that it was
+                            # not persisted.
+                            print(
+                                f"[tui_gateway] prompt.submit: history_version mismatch "
+                                f"(expected={history_version} current={current_version}) — "
+                                f"agent output NOT written to session history",
+                                file=sys.stderr,
+                            )
+                            status_note = (
+                                "History changed during this turn — the response above is visible "
+                                "but was not saved to session history."
+                            )
                 raw = result.get("final_response", "")
                 status = "interrupted" if result.get("interrupted") else "error" if result.get("error") else "complete"
                 lr = result.get("last_reasoning")
@@ -1461,6 +1491,8 @@ def _(rid, params: dict) -> dict:
             payload = {"text": raw, "usage": _get_usage(agent), "status": status}
             if last_reasoning:
                 payload["reasoning"] = last_reasoning
+            if status_note:
+                payload["warning"] = status_note
             rendered = render_message(raw, cols)
             if rendered:
                 payload["rendered"] = rendered
@@ -2168,6 +2200,8 @@ def _(rid, params: dict) -> dict:
     if name == "retry":
         if not session:
             return _err(rid, 4001, "no active session to retry")
+        if session.get("running"):
+            return _err(rid, 4009, "session busy — /interrupt the current turn before /retry")
         history = session.get("history", [])
         if not history:
             return _err(rid, 4018, "no previous user message to retry")
@@ -2578,6 +2612,13 @@ def _(rid, params: dict) -> dict:
     file_path = params.get("file_path", "")
     if not target:
         return _err(rid, 4014, "hash required")
+    # Full-history rollback mutates session history.  Rejecting during
+    # an in-flight turn prevents prompt.submit from silently dropping
+    # the agent's output (version mismatch path) or clobbering the
+    # rollback (version-matches path).  A file-scoped rollback only
+    # touches disk, so we allow it.
+    if not file_path and session.get("running"):
+        return _err(rid, 4009, "session busy — /interrupt the current turn before full rollback.restore")
     try:
         def go(mgr, cwd):
             resolved = _resolve_checkpoint_hash(mgr, cwd, target)

From 78586ce036baab8c294e55a1ef0a279c47a447ed Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Sat, 18 Apr 2026 09:35:42 -0400
Subject: [PATCH 097/143] =?UTF-8?q?fix(honcho):=20dialectic=20lifecycle=20?=
 =?UTF-8?q?=E2=80=94=20defaults,=20retry,=20prewarm=20consumption?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Several correctness and cost-safety fixes to the Honcho dialectic path
after a multi-turn investigation surfaced a chain of silent failures:

- dialecticCadence default flipped 3 → 1. PR #10619 changed this from 1 to
  3 for cost, but existing installs with no explicit config silently went
  from per-turn dialectic to every-3-turns on upgrade. Restores pre-#10619
  behavior; 3+ remains available for cost-conscious setups. Docs + wizard
  + status output updated to match.

- Session-start prewarm now consumed. Previously fired a .chat() on init
  whose result landed in HonchoSessionManager._dialectic_cache and was
  never read — pop_dialectic_result had zero call sites. Turn 1 paid for
  a duplicate synchronous dialectic. Prewarm now writes directly to the
  plugin's _prefetch_result via _prefetch_lock so turn 1 consumes it with
  no extra call.

- Prewarm is now dialecticDepth-aware. A single-pass prewarm can return
  weak output on cold peers; the multi-pass audit/reconcile cycle is
  exactly the case dialecticDepth was built for. Prewarm now runs the
  full configured depth in the background.

- Silent dialectic failure no longer burns the cadence window.
  _last_dialectic_turn now advances only when the result is non-empty.
  Empty result → next eligible turn retries immediately instead of
  waiting the full cadence gap.

- Thread pile-up guard. queue_prefetch skips when a prior dialectic
  thread is still in-flight, preventing stacked races on _prefetch_result.

- First-turn sync timeout is recoverable. Previously on timeout the
  background thread's result was stored in a dead local list. Now the
  thread writes into _prefetch_result under lock so the next turn
  picks it up.

- Cadence gate applies uniformly. At cadence=1 the old "cadence > 1"
  guard let first-turn sync + same-turn queue_prefetch both fire.
  Gate now always applies.

- Restored query-length reasoning-level scaling, dropped in 9a0ab34c.
  Scales dialecticReasoningLevel up on longer queries (+1 at ≥120 chars,
  +2 at ≥400), clamped at reasoningLevelCap. Two new config keys:
  `reasoningHeuristic` (bool, default true) and `reasoningLevelCap`
  (string, default "high"; previously parsed but never enforced).
  Respects dialecticDepthLevels and proportional lighter-early passes.

- Restored short-prompt skip, dropped in ef7f3156. One-word
  acknowledgements ("ok", "y", "thanks") and slash commands bypass
  both injection and dialectic fire.

- Purged dead code in session.py: prefetch_dialectic, _dialectic_cache,
  set_dialectic_result, pop_dialectic_result — all unused after prewarm
  refactor.

Tests: 542 passed across honcho_plugin/, agent/test_memory_provider.py,
and run_agent/test_run_agent.py. New coverage:
- TestTrivialPromptHeuristic (classifier + prefetch/queue skip)
- TestDialecticCadenceAdvancesOnSuccess (empty-result retry, pile-up guard)
- TestSessionStartDialecticPrewarm (prewarm consumed, sync fallback)
- TestReasoningHeuristic (length bumps, cap clamp, interaction with depth)
- TestDialecticLifecycleSmoke (end-to-end 8-turn session walk)
---
 .../autonomous-ai-agents/honcho/SKILL.md      |   6 +-
 plugins/memory/honcho/__init__.py             | 199 ++++++--
 plugins/memory/honcho/cli.py                  |   8 +-
 plugins/memory/honcho/client.py               |  18 +
 plugins/memory/honcho/session.py              |  46 +-
 tests/agent/test_memory_provider.py           |   2 -
 tests/honcho_plugin/test_async_memory.py      |   7 -
 tests/honcho_plugin/test_session.py           | 478 +++++++++++++++++-
 website/docs/user-guide/features/honcho.md    |   4 +-
 .../user-guide/features/memory-providers.md   |   4 +-
 10 files changed, 665 insertions(+), 107 deletions(-)

diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
index c60d2c63561..5d03a549858 100644
--- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md
+++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
@@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen.
 | Key | Default | Description |
 |-----|---------|-------------|
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `3` | Min turns between dialectic API calls |
+| `dialecticCadence` | `1` | Min turns between dialectic API calls |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection |
 
-Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn.
+Higher cadence values reduce API calls and cost. `dialecticCadence: 1` (default) fires every turn; set to `3` or higher to throttle for cost.
 
 ### Depth (how many)
 
@@ -368,7 +368,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso
 | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` |
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `3` | Min turns between dialectic LLM calls |
+| `dialecticCadence` | `1` | Min turns between dialectic LLM calls |
 
 The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions.
 
diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index ca44ce60193..ac0f60279a6 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -206,10 +206,11 @@ class HonchoMemoryProvider(MemoryProvider):
         self._turn_count = 0
         self._injection_frequency = "every-turn"  # or "first-turn"
         self._context_cadence = 1   # minimum turns between context API calls
-        self._dialectic_cadence = 3  # minimum turns between dialectic API calls
+        self._dialectic_cadence = 1  # minimum turns between dialectic API calls
         self._dialectic_depth = 1   # how many .chat() calls per dialectic cycle (1-3)
         self._dialectic_depth_levels: list[str] | None = None  # per-pass reasoning levels
-        self._reasoning_level_cap: Optional[str] = None  # "minimal", "low", "medium", "high"
+        self._reasoning_heuristic: bool = True  # scale base level by query length
+        self._reasoning_level_cap: str = "high"  # ceiling for auto-selected level
         self._last_context_turn = -999
         self._last_dialectic_turn = -999
 
@@ -305,12 +306,12 @@ class HonchoMemoryProvider(MemoryProvider):
                 raw = cfg.raw or {}
                 self._injection_frequency = raw.get("injectionFrequency", "every-turn")
                 self._context_cadence = int(raw.get("contextCadence", 1))
-                self._dialectic_cadence = int(raw.get("dialecticCadence", 3))
+                self._dialectic_cadence = int(raw.get("dialecticCadence", 1))
                 self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3))
                 self._dialectic_depth_levels = cfg.dialectic_depth_levels
-                cap = raw.get("reasoningLevelCap")
-                if cap and cap in ("minimal", "low", "medium", "high"):
-                    self._reasoning_level_cap = cap
+                self._reasoning_heuristic = cfg.reasoning_heuristic
+                if cfg.reasoning_level_cap in self._LEVEL_ORDER:
+                    self._reasoning_level_cap = cfg.reasoning_level_cap
             except Exception as e:
                 logger.debug("Honcho cost-awareness config parse error: %s", e)
 
@@ -391,14 +392,42 @@ class HonchoMemoryProvider(MemoryProvider):
         except Exception as e:
             logger.debug("Honcho memory file migration skipped: %s", e)
 
-        # ----- B7: Pre-warming context at init -----
+        # ----- B7: Pre-warming at init -----
+        # Context prewarm: warms peer.context() cache (base layer), consumed
+        # via pop_context_result() in prefetch().
+        # Dialectic prewarm: fires a depth-aware cycle against the plugin's
+        # own _prefetch_result so turn 1 can consume it directly. Without this
+        # the first-turn sync path pays for a duplicate .chat() — and at
+        # depth>1 a single-pass session-start dialectic often returns weak
+        # output that multi-pass audit/reconciliation is meant to catch.
         if self._recall_mode in ("context", "hybrid"):
             try:
                 self._manager.prefetch_context(self._session_key)
-                self._manager.prefetch_dialectic(self._session_key, "What should I know about this user?")
-                logger.debug("Honcho pre-warm threads started for session: %s", self._session_key)
             except Exception as e:
-                logger.debug("Honcho pre-warm failed: %s", e)
+                logger.debug("Honcho context prewarm failed: %s", e)
+
+            _prewarm_query = (
+                "Summarize what you know about this user. "
+                "Focus on preferences, current projects, and working style."
+            )
+
+            def _prewarm_dialectic() -> None:
+                try:
+                    r = self._run_dialectic_depth(_prewarm_query)
+                except Exception as exc:
+                    logger.debug("Honcho dialectic prewarm failed: %s", exc)
+                    return
+                if r and r.strip():
+                    with self._prefetch_lock:
+                        self._prefetch_result = r
+                    # Treat prewarm as turn 0 so cadence gating starts clean.
+                    self._last_dialectic_turn = 0
+
+            self._prefetch_thread = threading.Thread(
+                target=_prewarm_dialectic, daemon=True, name="honcho-prewarm-dialectic"
+            )
+            self._prefetch_thread.start()
+            logger.debug("Honcho pre-warm started for session: %s", self._session_key)
 
     def _ensure_session(self) -> bool:
         """Lazily initialize the Honcho session (for tools-only mode).
@@ -526,6 +555,11 @@ class HonchoMemoryProvider(MemoryProvider):
         if self._injection_frequency == "first-turn" and self._turn_count > 1:
             return ""
 
+        # Skip trivial prompts — "ok", "yes", slash commands carry no semantic signal,
+        # so injecting user context there just burns tokens and can derail the reply.
+        if self._is_trivial_prompt(query):
+            return ""
+
         parts = []
 
         # ----- Layer 1: Base context (representation + card) -----
@@ -560,37 +594,46 @@ class HonchoMemoryProvider(MemoryProvider):
         # On the very first turn, no queue_prefetch() has run yet so the
         # dialectic result is empty.  Run with a bounded timeout so a slow
         # Honcho connection doesn't block the first response indefinitely.
-        # On timeout the result is skipped and queue_prefetch() will pick it
-        # up at the next cadence-allowed turn.
+        # On timeout we let the thread keep running and write its result into
+        # _prefetch_result under the lock, so the next turn picks it up.
+        #
+        # Skip if the session-start prewarm already filled _prefetch_result —
+        # firing another .chat() would be duplicate work.
+        with self._prefetch_lock:
+            _prewarm_landed = bool(self._prefetch_result)
+        if _prewarm_landed and self._last_dialectic_turn == -999:
+            self._last_dialectic_turn = self._turn_count
+
         if self._last_dialectic_turn == -999 and query:
             _first_turn_timeout = (
                 self._config.timeout if self._config and self._config.timeout else 8.0
             )
-            _result_holder: list[str] = []
+            _fired_at = self._turn_count
 
             def _run_first_turn() -> None:
                 try:
-                    _result_holder.append(self._run_dialectic_depth(query))
+                    r = self._run_dialectic_depth(query)
                 except Exception as exc:
                     logger.debug("Honcho first-turn dialectic failed: %s", exc)
-
-            _t = threading.Thread(target=_run_first_turn, daemon=True)
-            _t.start()
-            _t.join(timeout=_first_turn_timeout)
-            if not _t.is_alive():
-                first_turn_dialectic = _result_holder[0] if _result_holder else ""
-                if first_turn_dialectic and first_turn_dialectic.strip():
+                    return
+                if r and r.strip():
                     with self._prefetch_lock:
-                        self._prefetch_result = first_turn_dialectic
-                self._last_dialectic_turn = self._turn_count
-            else:
+                        self._prefetch_result = r
+                    # Only advance cadence on a non-empty result so failures
+                    # don't burn a 3-turn cooldown on nothing.
+                    self._last_dialectic_turn = _fired_at
+
+            self._prefetch_thread = threading.Thread(
+                target=_run_first_turn, daemon=True, name="honcho-prefetch-first"
+            )
+            self._prefetch_thread.start()
+            self._prefetch_thread.join(timeout=_first_turn_timeout)
+            if self._prefetch_thread.is_alive():
                 logger.debug(
-                    "Honcho first-turn dialectic timed out (%.1fs) — "
-                    "will inject at next cadence-allowed turn",
+                    "Honcho first-turn dialectic still running after %.1fs — "
+                    "will surface on next turn",
                     _first_turn_timeout,
                 )
-                # Don't update _last_dialectic_turn: queue_prefetch() will
-                # retry at the next cadence-allowed turn via the async path.
 
         if self._prefetch_thread and self._prefetch_thread.is_alive():
             self._prefetch_thread.join(timeout=3.0)
@@ -641,6 +684,10 @@ class HonchoMemoryProvider(MemoryProvider):
         if self._recall_mode == "tools":
             return
 
+        # Trivial prompts don't warrant either a context refresh or a dialectic call.
+        if self._is_trivial_prompt(query):
+            return
+
         # ----- Context refresh (base layer) — independent cadence -----
         if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
             self._last_context_turn = self._turn_count
@@ -650,23 +697,35 @@ class HonchoMemoryProvider(MemoryProvider):
                 logger.debug("Honcho context prefetch failed: %s", e)
 
         # ----- Dialectic prefetch (supplement layer) -----
-        # B5: cadence check — skip if too soon since last dialectic call
-        if self._dialectic_cadence > 1:
-            if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence:
-                logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d",
-                             self._dialectic_cadence, self._turn_count - self._last_dialectic_turn)
-                return
+        # Guard against thread pile-up: if a prior dialectic is still in flight,
+        # let it finish instead of stacking races on _prefetch_result.
+        if self._prefetch_thread and self._prefetch_thread.is_alive():
+            logger.debug("Honcho dialectic prefetch skipped: prior thread still running")
+            return
 
-        self._last_dialectic_turn = self._turn_count
+        # B5: cadence check — skip if too soon since last *successful* dialectic call.
+        # The gate applies uniformly (including cadence=1): "every turn" means once
+        # per turn, not twice on the same turn when first-turn sync already fired.
+        if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence:
+            logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d",
+                         self._dialectic_cadence, self._turn_count - self._last_dialectic_turn)
+            return
+
+        # Advance cadence only on a non-empty result — otherwise a silent failure
+        # (empty dialectic, transient API error) would burn the full cadence window
+        # before the next retry, making it look like dialectic "never fires again".
+        _fired_at = self._turn_count
 
         def _run():
             try:
                 result = self._run_dialectic_depth(query)
-                if result and result.strip():
-                    with self._prefetch_lock:
-                        self._prefetch_result = result
             except Exception as e:
                 logger.debug("Honcho prefetch failed: %s", e)
+                return
+            if result and result.strip():
+                with self._prefetch_lock:
+                    self._prefetch_result = result
+                self._last_dialectic_turn = _fired_at
 
         self._prefetch_thread = threading.Thread(
             target=_run, daemon=True, name="honcho-prefetch"
@@ -692,11 +751,42 @@ class HonchoMemoryProvider(MemoryProvider):
 
     _LEVEL_ORDER = ("minimal", "low", "medium", "high", "max")
 
-    def _resolve_pass_level(self, pass_idx: int) -> str:
+    # Reasoning-level heuristic thresholds (restored from pre-9a0ab34c behavior).
+    # Promoted to class constants so tests can override without widening the
+    # config surface. Bump to config fields only if real use shows they're needed.
+    _HEURISTIC_LENGTH_MEDIUM = 120
+    _HEURISTIC_LENGTH_HIGH = 400
+
+    def _apply_reasoning_heuristic(self, base: str, query: str) -> str:
+        """Scale `base` up by query length, clamped at reasoning_level_cap.
+
+        Char-count heuristic: +1 at >=120 chars, +2 at >=400. Ceiling is
+        reasoning_level_cap (default 'high' — 'max' is reserved for
+        explicit tool-path selection).
+        """
+        if not self._reasoning_heuristic or not query:
+            return base
+        if base not in self._LEVEL_ORDER:
+            return base
+        n = len(query)
+        if n < self._HEURISTIC_LENGTH_MEDIUM:
+            bump = 0
+        elif n < self._HEURISTIC_LENGTH_HIGH:
+            bump = 1
+        else:
+            bump = 2
+        base_idx = self._LEVEL_ORDER.index(base)
+        cap_idx = self._LEVEL_ORDER.index(self._reasoning_level_cap)
+        return self._LEVEL_ORDER[min(base_idx + bump, cap_idx)]
+
+    def _resolve_pass_level(self, pass_idx: int, query: str = "") -> str:
         """Resolve reasoning level for a given pass index.
 
-        Uses dialecticDepthLevels if configured, otherwise proportional
-        defaults relative to dialecticReasoningLevel.
+        Precedence:
+          1. dialecticDepthLevels (explicit per-pass) — wins absolutely
+          2. _PROPORTIONAL_LEVELS table (depth>1 lighter-early passes)
+          3. Base level = dialecticReasoningLevel, optionally scaled by the
+             reasoning heuristic when the mapping falls through to 'base'
         """
         if self._dialectic_depth_levels and pass_idx < len(self._dialectic_depth_levels):
             return self._dialectic_depth_levels[pass_idx]
@@ -704,7 +794,7 @@ class HonchoMemoryProvider(MemoryProvider):
         base = (self._config.dialectic_reasoning_level if self._config else "low")
         mapping = self._PROPORTIONAL_LEVELS.get((self._dialectic_depth, pass_idx))
         if mapping is None or mapping == "base":
-            return base
+            return self._apply_reasoning_heuristic(base, query)
         return mapping
 
     def _build_dialectic_prompt(self, pass_idx: int, prior_results: list[str], is_cold: bool) -> str:
@@ -791,7 +881,7 @@ class HonchoMemoryProvider(MemoryProvider):
                     break
                 prompt = self._build_dialectic_prompt(i, results, is_cold)
 
-            level = self._resolve_pass_level(i)
+            level = self._resolve_pass_level(i, query=query)
             logger.debug("Honcho dialectic depth %d: pass %d, level=%s, cold=%s",
                          self._dialectic_depth, i, level, is_cold)
 
@@ -808,6 +898,29 @@ class HonchoMemoryProvider(MemoryProvider):
                 return r
         return ""
 
+    # Prompts that carry no semantic signal — trivial acknowledgements, slash
+    # commands, empty input. Skipping injection here saves tokens and prevents
+    # stale user-model context from derailing one-word replies.
+    _TRIVIAL_PROMPT_RE = re.compile(
+        r'^(yes|no|ok|okay|sure|thanks|thank you|y|n|yep|nope|yeah|nah|'
+        r'continue|go ahead|do it|proceed|got it|cool|nice|great|done|next|lgtm|k)$',
+        re.IGNORECASE,
+    )
+
+    @classmethod
+    def _is_trivial_prompt(cls, text: str) -> bool:
+        """Return True if the prompt is too trivial to warrant context injection."""
+        if not text:
+            return True
+        stripped = text.strip()
+        if not stripped:
+            return True
+        if stripped.startswith("/"):
+            return True
+        if cls._TRIVIAL_PROMPT_RE.match(stripped):
+            return True
+        return False
+
     def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
         """Track turn count for cadence and injection_frequency logic."""
         self._turn_count = turn_number
diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py
index 536d34002de..478bf39d8a6 100644
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@@ -460,17 +460,17 @@ def cmd_setup(args) -> None:
             pass  # keep current
 
     # --- 7b. Dialectic cadence ---
-    current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "3")
+    current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "1")
     print("\n  Dialectic cadence:")
     print("    How often Honcho rebuilds its user model (LLM call on Honcho backend).")
-    print("    1 = every turn (aggressive), 3 = every 3 turns (recommended), 5+ = sparse.")
+    print("    1 = every turn (default), 3+ = sparse (cost-saving).")
     new_dialectic = _prompt("Dialectic cadence", default=current_dialectic)
     try:
         val = int(new_dialectic)
         if val >= 1:
             hermes_host["dialecticCadence"] = val
     except (ValueError, TypeError):
-        hermes_host["dialecticCadence"] = 3
+        hermes_host["dialecticCadence"] = 1
 
     # --- 8. Session strategy ---
     current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session")
@@ -636,7 +636,7 @@ def cmd_status(args) -> None:
     print(f"  Recall mode:    {hcfg.recall_mode}")
     print(f"  Context budget: {hcfg.context_tokens or '(uncapped)'} tokens")
     raw = getattr(hcfg, "raw", None) or {}
-    dialectic_cadence = raw.get("dialecticCadence") or 3
+    dialectic_cadence = raw.get("dialecticCadence") or 1
     print(f"  Dialectic cad:  every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}")
     print(f"  Observation:    user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})")
     print(f"  Write freq:     {hcfg.write_frequency}")
diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py
index 2474d3a2b65..136b1e60dc6 100644
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@@ -251,6 +251,14 @@ class HonchoClientConfig:
     # matching dialectic_depth length. When None, uses proportional defaults
     # derived from dialectic_reasoning_level.
     dialectic_depth_levels: list[str] | None = None
+    # Reasoning-level heuristic for auto-injected dialectic calls. When true,
+    # scales the base level up on longer queries (restored from pre-#10619
+    # behavior; see plugins/memory/honcho/__init__.py for thresholds).
+    # Never auto-selects a level above reasoning_level_cap.
+    reasoning_heuristic: bool = True
+    # Ceiling for heuristic-selected reasoning level. "max" is reserved for
+    # explicit tool-path selection; default "high" matches the old behavior.
+    reasoning_level_cap: str = "high"
     # Honcho API limits — configurable for self-hosted instances
     # Max chars per message sent via add_messages() (Honcho cloud: 25000)
     message_max_chars: int = 25000
@@ -446,6 +454,16 @@ class HonchoClientConfig:
                 raw.get("dialecticDepthLevels"),
                 depth=_parse_dialectic_depth(host_block.get("dialecticDepth"), raw.get("dialecticDepth")),
             ),
+            reasoning_heuristic=_resolve_bool(
+                host_block.get("reasoningHeuristic"),
+                raw.get("reasoningHeuristic"),
+                default=True,
+            ),
+            reasoning_level_cap=(
+                host_block.get("reasoningLevelCap")
+                or raw.get("reasoningLevelCap")
+                or "high"
+            ),
             message_max_chars=int(
                 host_block.get("messageMaxChars")
                 or raw.get("messageMaxChars")
diff --git a/plugins/memory/honcho/session.py b/plugins/memory/honcho/session.py
index fd91ee3b3b9..7344b517e40 100644
--- a/plugins/memory/honcho/session.py
+++ b/plugins/memory/honcho/session.py
@@ -100,9 +100,11 @@ class HonchoSessionManager:
         self._write_frequency = write_frequency
         self._turn_counter: int = 0
 
-        # Prefetch caches: session_key → last result (consumed once per turn)
+        # Prefetch cache: session_key → last context result (consumed once per turn).
+        # Dialectic results are cached on the plugin side (HonchoMemoryProvider
+        # ._prefetch_result) so session-start prewarm and turn-driven fires share
+        # one source of truth; see __init__.py _do_session_init for the prewarm.
         self._context_cache: dict[str, dict] = {}
-        self._dialectic_cache: dict[str, str] = {}
         self._prefetch_cache_lock = threading.Lock()
         self._dialectic_reasoning_level: str = (
             config.dialectic_reasoning_level if config else "low"
@@ -499,8 +501,8 @@ class HonchoSessionManager:
         Query Honcho's dialectic endpoint about a peer.
 
         Runs an LLM on Honcho's backend against the target peer's full
-        representation. Higher latency than context() — call async via
-        prefetch_dialectic() to avoid blocking the response.
+        representation. Higher latency than context() — callers run this in
+        a background thread (see HonchoMemoryProvider) to avoid blocking.
 
         Args:
             session_key: The session key to query against.
@@ -555,42 +557,6 @@ class HonchoSessionManager:
             logger.warning("Honcho dialectic query failed: %s", e)
             return ""
 
-    def prefetch_dialectic(self, session_key: str, query: str) -> None:
-        """
-        Fire a dialectic_query in a background thread, caching the result.
-
-        Non-blocking. The result is available via pop_dialectic_result()
-        on the next call (typically the following turn). Reasoning level
-        is selected dynamically based on query complexity.
-
-        Args:
-            session_key: The session key to query against.
-            query: The user's current message, used as the query.
-        """
-        def _run():
-            result = self.dialectic_query(session_key, query)
-            if result:
-                self.set_dialectic_result(session_key, result)
-
-        t = threading.Thread(target=_run, name="honcho-dialectic-prefetch", daemon=True)
-        t.start()
-
-    def set_dialectic_result(self, session_key: str, result: str) -> None:
-        """Store a prefetched dialectic result in a thread-safe way."""
-        if not result:
-            return
-        with self._prefetch_cache_lock:
-            self._dialectic_cache[session_key] = result
-
-    def pop_dialectic_result(self, session_key: str) -> str:
-        """
-        Return and clear the cached dialectic result for this session.
-
-        Returns empty string if no result is ready yet.
-        """
-        with self._prefetch_cache_lock:
-            return self._dialectic_cache.pop(session_key, "")
-
     def prefetch_context(self, session_key: str, user_message: str | None = None) -> None:
         """
         Fire get_prefetch_context in a background thread, caching the result.
diff --git a/tests/agent/test_memory_provider.py b/tests/agent/test_memory_provider.py
index 9301960b717..5cd0d8ab413 100644
--- a/tests/agent/test_memory_provider.py
+++ b/tests/agent/test_memory_provider.py
@@ -971,8 +971,6 @@ class TestHonchoCadenceTracking:
         class FakeManager:
             def prefetch_context(self, key, query=None):
                 pass
-            def prefetch_dialectic(self, key, query):
-                pass
 
         p._manager = FakeManager()
 
diff --git a/tests/honcho_plugin/test_async_memory.py b/tests/honcho_plugin/test_async_memory.py
index 936f478846f..5df8d274540 100644
--- a/tests/honcho_plugin/test_async_memory.py
+++ b/tests/honcho_plugin/test_async_memory.py
@@ -460,10 +460,3 @@ class TestPrefetchCacheAccessors:
         assert mgr.pop_context_result("cli:test") == payload
         assert mgr.pop_context_result("cli:test") == {}
 
-    def test_set_and_pop_dialectic_result(self):
-        mgr = _make_manager(write_frequency="turn")
-
-        mgr.set_dialectic_result("cli:test", "Resume with toolset cleanup")
-
-        assert mgr.pop_dialectic_result("cli:test") == "Resume with toolset cleanup"
-        assert mgr.pop_dialectic_result("cli:test") == ""
diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py
index 9784959d37d..b0282b1969c 100644
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@@ -815,6 +815,24 @@ class TestDialecticInputGuard:
 # ---------------------------------------------------------------------------
 
 
+def _settle_prewarm(provider):
+    """Wait for the session-start prewarm dialectic thread, then return the
+    provider to a clean 'nothing fired yet' state so cadence/first-turn/
+    trivial-prompt tests can assert from a known baseline."""
+    if provider._prefetch_thread:
+        provider._prefetch_thread.join(timeout=3.0)
+    with provider._prefetch_lock:
+        provider._prefetch_result = ""
+    provider._prefetch_thread = None
+    provider._last_dialectic_turn = -999
+    if getattr(provider, "_manager", None) is not None:
+        try:
+            provider._manager.dialectic_query.reset_mock()
+            provider._manager.prefetch_context.reset_mock()
+        except AttributeError:
+            pass
+
+
 class TestDialecticCadenceDefaults:
     """Regression tests for dialectic_cadence default value."""
 
@@ -840,12 +858,15 @@ class TestDialecticCadenceDefaults:
              patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
             provider.initialize(session_id="test-session-001")
 
+        _settle_prewarm(provider)
         return provider
 
-    def test_default_is_3(self):
-        """Default dialectic_cadence should be 3 to avoid per-turn LLM calls."""
+    def test_default_is_1(self):
+        """Default dialectic_cadence should be 1 (every turn) — restored from
+        pre-#10619 behavior to avoid a silent regression on upgrade for users
+        who never set dialecticCadence explicitly."""
         provider = self._make_provider()
-        assert provider._dialectic_cadence == 3
+        assert provider._dialectic_cadence == 1
 
     def test_config_override(self):
         """dialecticCadence from config overrides the default."""
@@ -908,6 +929,7 @@ class TestDialecticDepth:
              patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
             provider.initialize(session_id="test-session-001")
 
+        _settle_prewarm(provider)
         return provider
 
     def test_default_depth_is_1(self):
@@ -1062,7 +1084,8 @@ class TestDialecticDepth:
         provider.prefetch("hello")
         assert provider._manager.dialectic_query.call_count == 1
 
-        # Now queue_prefetch on same turn should skip (cadence: 0 - 0 < 3)
+        # Now queue_prefetch on same turn should skip — _last_dialectic_turn
+        # was just set to _turn_count by the sync path, so (0 - 0 = 0) < cadence.
         provider._manager.dialectic_query.reset_mock()
         provider.queue_prefetch("hello")
         assert provider._manager.dialectic_query.call_count == 0
@@ -1083,6 +1106,453 @@ class TestDialecticDepth:
         assert provider._manager.dialectic_query.call_count == 1
 
 
+# ---------------------------------------------------------------------------
+# Trivial-prompt heuristic + dialectic cadence silent-failure guards
+# ---------------------------------------------------------------------------
+
+
+class TestTrivialPromptHeuristic:
+    """Trivial prompts ('ok', 'y', slash commands) must short-circuit injection.
+
+    Restored after accidental removal during the two-layer prefetch refactor.
+    """
+
+    @staticmethod
+    def _make_provider():
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        cfg = HonchoClientConfig(api_key="test-key", enabled=True, recall_mode="hybrid")
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_session = MagicMock()
+        mock_session.messages = []
+        mock_manager.get_or_create.return_value = mock_session
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-session-trivial")
+        _settle_prewarm(provider)
+        return provider
+
+    def test_classifier_catches_common_trivial_forms(self):
+        for t in ("ok", "OK", " ok ", "y", "yes", "sure", "thanks", "lgtm", "/help", "", "   "):
+            assert HonchoMemoryProvider._is_trivial_prompt(t), f"expected trivial: {t!r}"
+
+    def test_classifier_lets_substantive_prompts_through(self):
+        for t in ("hello world", "what's my name", "explain this", "ok so what's next"):
+            assert not HonchoMemoryProvider._is_trivial_prompt(t), f"expected non-trivial: {t!r}"
+
+    def test_prefetch_skips_on_trivial_prompt(self):
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._base_context_cache = "cached base"
+        provider._last_dialectic_turn = 0
+        provider._turn_count = 5
+
+        assert provider.prefetch("ok") == ""
+        assert provider.prefetch("/help") == ""
+        # Dialectic should not have fired
+        assert provider._manager.dialectic_query.call_count == 0
+
+    def test_queue_prefetch_skips_on_trivial_prompt(self):
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._turn_count = 10
+        provider._last_dialectic_turn = -999  # would otherwise fire
+        # initialize() pre-warms; clear call counts before the assertion.
+        provider._manager.prefetch_context.reset_mock()
+        provider._manager.dialectic_query.reset_mock()
+
+        provider.queue_prefetch("y")
+        # Trivial prompts short-circuit both context refresh and dialectic fire.
+        assert provider._manager.prefetch_context.call_count == 0
+        assert provider._manager.dialectic_query.call_count == 0
+
+
+class TestDialecticCadenceAdvancesOnSuccess:
+    """Cadence tracker must only advance when the dialectic call actually returned.
+
+    A silent failure (empty result, API blip) used to burn the full cadence window
+    before retrying — making it look like dialectic 'never fires again'.
+    """
+
+    @staticmethod
+    def _make_provider():
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        cfg = HonchoClientConfig(
+            api_key="test-key", enabled=True, recall_mode="hybrid", dialectic_depth=1,
+        )
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_session = MagicMock()
+        mock_session.messages = []
+        mock_manager.get_or_create.return_value = mock_session
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-session-retry")
+        _settle_prewarm(provider)
+        return provider
+
+    def test_empty_dialectic_result_does_not_advance_cadence(self):
+        import time as _time
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._manager.dialectic_query.return_value = ""  # silent failure
+        provider._turn_count = 5
+        provider._last_dialectic_turn = 0  # would fire (5 - 0 = 5 ≥ 3)
+
+        provider.queue_prefetch("hello")
+        # wait for the background thread to settle
+        if provider._prefetch_thread:
+            provider._prefetch_thread.join(timeout=2.0)
+
+        # Dialectic call was attempted
+        assert provider._manager.dialectic_query.call_count == 1
+        # But cadence tracker did NOT advance — next turn should retry
+        assert provider._last_dialectic_turn == 0
+
+    def test_non_empty_dialectic_result_advances_cadence(self):
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._manager.dialectic_query.return_value = "real synthesis output"
+        provider._turn_count = 5
+        provider._last_dialectic_turn = 0
+
+        provider.queue_prefetch("hello")
+        if provider._prefetch_thread:
+            provider._prefetch_thread.join(timeout=2.0)
+
+        assert provider._last_dialectic_turn == 5
+
+    def test_in_flight_thread_is_not_stacked(self):
+        import threading as _threading
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._turn_count = 10
+        provider._last_dialectic_turn = 0
+
+        # Simulate a prior thread still running
+        hold = _threading.Event()
+
+        def _block():
+            hold.wait(timeout=5.0)
+
+        stale = _threading.Thread(target=_block, daemon=True)
+        stale.start()
+        provider._prefetch_thread = stale
+
+        provider.queue_prefetch("hello")
+        # Should have short-circuited — no new dialectic call
+        assert provider._manager.dialectic_query.call_count == 0
+        hold.set()
+        stale.join(timeout=2.0)
+
+
+class TestSessionStartDialecticPrewarm:
+    """Session-start prewarm fires a depth-aware dialectic whose result is
+    consumed by turn 1 — no duplicate .chat() and no dead-cache orphaning."""
+
+    @staticmethod
+    def _make_provider(cfg_extra=None, dialectic_result="prewarm synthesis"):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid")
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_manager.get_or_create.return_value = MagicMock(messages=[])
+        mock_manager.get_prefetch_context.return_value = None
+        mock_manager.pop_context_result.return_value = None
+        mock_manager.dialectic_query.return_value = dialectic_result
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-prewarm")
+        return provider
+
+    def test_prewarm_populates_prefetch_result(self):
+        p = self._make_provider()
+        # Wait for prewarm thread to land
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=3.0)
+        with p._prefetch_lock:
+            assert p._prefetch_result == "prewarm synthesis"
+        assert p._last_dialectic_turn == 0
+
+    def test_turn1_consumes_prewarm_without_duplicate_dialectic(self):
+        """With prewarm result already in _prefetch_result, turn 1 prefetch
+        should NOT fire another dialectic."""
+        p = self._make_provider()
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=3.0)
+        p._manager.dialectic_query.reset_mock()
+        p._session_key = "test-prewarm"
+        p._base_context_cache = ""
+        p._turn_count = 1
+
+        result = p.prefetch("hello world")
+        assert "prewarm synthesis" in result
+        # The sync first-turn path must NOT have fired another .chat()
+        assert p._manager.dialectic_query.call_count == 0
+
+    def test_turn1_falls_back_to_sync_when_prewarm_missing(self):
+        """If the prewarm produced nothing (empty graph, API blip), turn 1
+        still fires its own sync dialectic."""
+        p = self._make_provider(dialectic_result="")  # prewarm returns empty
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=3.0)
+        with p._prefetch_lock:
+            assert p._prefetch_result == ""  # prewarm landed nothing
+        # Switch dialectic_query to return something on the sync first-turn call
+        p._manager.dialectic_query.return_value = "sync recovery"
+        p._manager.dialectic_query.reset_mock()
+        p._session_key = "test-prewarm"
+        p._base_context_cache = ""
+        p._turn_count = 1
+
+        result = p.prefetch("hello world")
+        assert "sync recovery" in result
+        assert p._manager.dialectic_query.call_count == 1
+
+
+class TestDialecticLifecycleSmoke:
+    """End-to-end smoke: walks a realistic multi-turn session through every
+    behavior we care about — prewarm → turn 1 consume → trivial skip → cadence
+    fire → silent-failure retry → heuristic bump → session-end flush.
+
+    This is the 'velvet circuit' test: one provider, one flow, one set of
+    assertions. If the suite above lies about intent, this one catches it.
+    """
+
+    @staticmethod
+    def _make_provider(cfg_extra=None):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(
+            api_key="test-key", enabled=True, recall_mode="hybrid",
+            dialectic_reasoning_level="low", reasoning_heuristic=True,
+            reasoning_level_cap="high", dialectic_depth=1,
+        )
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_session = MagicMock()
+        mock_session.messages = []
+        mock_manager.get_or_create.return_value = mock_session
+        mock_manager.get_prefetch_context.return_value = None
+        mock_manager.pop_context_result.return_value = None
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            return provider, mock_manager, cfg
+
+    def _await_thread(self, provider):
+        if provider._prefetch_thread:
+            provider._prefetch_thread.join(timeout=3.0)
+
+    def test_full_multi_turn_session(self):
+        """Walks init → turns 1..8 → session end. Asserts at every step that
+        the plugin did exactly what it should and nothing more.
+
+        Uses dialecticCadence=3 so we can exercise skip-turns between fires
+        and the silent-failure retry path without their gates tripping each
+        other. Trivial + slash skips apply independent of cadence.
+        """
+        from unittest.mock import patch, MagicMock
+        provider, mgr, cfg = self._make_provider(
+            cfg_extra={"raw": {"dialecticCadence": 3}}
+        )
+
+        # Program the dialectic responses in the exact order they'll be requested.
+        # An extra or missing call fails the test — strong smoke signal.
+        responses = iter([
+            "prewarm: user is eri, works on hermes",      # session-start prewarm
+            "cadence fire: long query synthesis",         # turn 4 queue_prefetch
+            "",                                           # turn 7 fire: silent failure
+            "retry success: fresh synthesis",             # turn 8 queue_prefetch retry
+        ])
+        mgr.dialectic_query.side_effect = lambda *a, **kw: next(responses)
+
+        # ---- init: prewarm fires ----
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mgr), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="smoke-test")
+
+        self._await_thread(provider)
+        with provider._prefetch_lock:
+            assert provider._prefetch_result.startswith("prewarm"), \
+                "session-start prewarm must land in _prefetch_result"
+        assert provider._last_dialectic_turn == 0, "prewarm marks turn 0"
+        assert mgr.dialectic_query.call_count == 1
+
+        # ---- turn 1: consume prewarm, no duplicate dialectic ----
+        provider.on_turn_start(1, "hey")
+        inject1 = provider.prefetch("hey")
+        assert "prewarm" in inject1, "turn 1 must surface prewarm"
+        provider.sync_turn("hey", "hi there")
+        provider.queue_prefetch("hey")  # cadence gate: (1-0)<3 → skip
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 1, \
+            "turn 1 must not fire — prewarm covered it and cadence skips"
+
+        # ---- turn 2: trivial 'ok' → skip everything ----
+        mgr.prefetch_context.reset_mock()
+        provider.on_turn_start(2, "ok")
+        assert provider.prefetch("ok") == "", "trivial prompt must short-circuit injection"
+        provider.sync_turn("ok", "cool")
+        provider.queue_prefetch("ok")
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 1, "trivial must not fire dialectic"
+        assert mgr.prefetch_context.call_count == 0, "trivial must not fire context refresh"
+
+        # ---- turn 3: slash '/help' → also skip ----
+        provider.on_turn_start(3, "/help")
+        assert provider.prefetch("/help") == ""
+        provider.queue_prefetch("/help")
+        assert mgr.dialectic_query.call_count == 1
+
+        # ---- turn 4: long query → cadence fires + heuristic bumps ----
+        long_q = "walk me through " + ("x " * 100)  # ~200 chars → heuristic +1
+        provider.on_turn_start(4, long_q)
+        provider.prefetch(long_q)
+        provider.sync_turn(long_q, "sure")
+        provider.queue_prefetch(long_q)  # (4-0)≥3 → fires
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 2, "turn 4 cadence fire"
+        _, kwargs = mgr.dialectic_query.call_args
+        assert kwargs.get("reasoning_level") in ("medium", "high"), \
+            f"long query must bump reasoning level above 'low'; got {kwargs.get('reasoning_level')}"
+        assert provider._last_dialectic_turn == 4, "cadence tracker advances on success"
+
+        # ---- turns 5–6: cadence cooldown, no fires ----
+        for t in (5, 6):
+            provider.on_turn_start(t, "tell me more")
+            provider.queue_prefetch("tell me more")
+            self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 2, "turns 5–6 blocked by cadence window"
+
+        # ---- turn 7: fires but silent failure (empty dialectic) ----
+        provider.on_turn_start(7, "and then what")
+        provider.queue_prefetch("and then what")  # (7-4)≥3 → fires
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 3, "turn 7 fires"
+        assert provider._last_dialectic_turn == 4, \
+            "silent failure must NOT burn the cadence window"
+
+        # ---- turn 8: retries because cadence didn't advance ----
+        provider.on_turn_start(8, "try again")
+        provider.queue_prefetch("try again")  # (8-4)≥3 → fires again
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 4, \
+            "turn 8 retries because turn 7's empty result didn't advance cadence"
+        assert provider._last_dialectic_turn == 8, "retry success advances"
+
+        # ---- session end: flush messages ----
+        provider.on_session_end([])
+        mgr.flush_all.assert_called()
+
+
+class TestReasoningHeuristic:
+    """Restored char-count heuristic for auto-injected dialectic reasoning level.
+
+    Pre-9a0ab34c behavior: scale base up by query length, capped at
+    reasoning_level_cap. 'max' is reserved for explicit tool-path selection.
+    """
+
+    @staticmethod
+    def _make_provider(cfg_extra=None):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(
+            api_key="test-key", enabled=True, recall_mode="hybrid",
+            dialectic_reasoning_level="low", reasoning_heuristic=True,
+            reasoning_level_cap="high",
+        )
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_manager.get_or_create.return_value = MagicMock(messages=[])
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-heuristic")
+        _settle_prewarm(provider)
+        return provider
+
+    def test_short_query_stays_at_base(self):
+        p = self._make_provider()
+        assert p._apply_reasoning_heuristic("low", "hey") == "low"
+
+    def test_medium_query_bumps_one_level(self):
+        p = self._make_provider()
+        q = "x" * 150
+        assert p._apply_reasoning_heuristic("low", q) == "medium"
+
+    def test_long_query_bumps_two_levels(self):
+        p = self._make_provider()
+        q = "x" * 500
+        assert p._apply_reasoning_heuristic("low", q) == "high"
+
+    def test_bump_respects_cap(self):
+        p = self._make_provider(cfg_extra={"reasoning_level_cap": "medium"})
+        q = "x" * 500  # would hit 'high' without the cap
+        assert p._apply_reasoning_heuristic("low", q) == "medium"
+
+    def test_max_never_auto_selected_with_default_cap(self):
+        p = self._make_provider(cfg_extra={"dialectic_reasoning_level": "high"})
+        q = "x" * 500  # base=high, bump would push to 'max'
+        assert p._apply_reasoning_heuristic("high", q) == "high"
+
+    def test_heuristic_disabled_returns_base(self):
+        p = self._make_provider(cfg_extra={"reasoning_heuristic": False})
+        q = "x" * 500
+        assert p._apply_reasoning_heuristic("low", q) == "low"
+
+    def test_resolve_pass_level_applies_heuristic_at_base_mapping(self):
+        """Depth=1, pass 0 maps to 'base' → heuristic applies."""
+        p = self._make_provider()
+        q = "x" * 150
+        assert p._resolve_pass_level(0, query=q) == "medium"
+
+    def test_resolve_pass_level_does_not_touch_explicit_per_pass(self):
+        """dialecticDepthLevels wins absolutely — no heuristic scaling."""
+        p = self._make_provider(cfg_extra={"dialectic_depth_levels": ["minimal"]})
+        q = "x" * 500  # heuristic would otherwise bump to 'high'
+        assert p._resolve_pass_level(0, query=q) == "minimal"
+
+    def test_resolve_pass_level_does_not_touch_lighter_passes(self):
+        """Depth 3 pass 0 is hardcoded 'minimal' — heuristic must not bump it."""
+        p = self._make_provider(cfg_extra={"dialectic_depth": 3})
+        q = "x" * 500
+        assert p._resolve_pass_level(0, query=q) == "minimal"
+        # But the 'base' pass (idx 1 for depth 3) does get heuristic
+        assert p._resolve_pass_level(1, query=q) == "high"
+
+
 # ---------------------------------------------------------------------------
 # set_peer_card None guard
 # ---------------------------------------------------------------------------
diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index 2040949d258..906a7c030eb 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs:
 | Knob | Controls | Default |
 |------|----------|---------|
 | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` |
-| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `3` |
+| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `1` |
 | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` |
 
 These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes.
@@ -104,7 +104,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho
 |-----|---------|-------------|
 | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries |
 | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
-| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly |
+| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly |
 | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 |
 | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
 | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md
index f571c7d48f1..181f30f7fa2 100644
--- a/website/docs/user-guide/features/memory-providers.md
+++ b/website/docs/user-guide/features/memory-providers.md
@@ -82,7 +82,7 @@ hermes memory setup        # select "honcho"
 | `workspace` | host key | Shared workspace ID |
 | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries |
 | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
-| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes |
+| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes |
 | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation |
 | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
 | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
@@ -181,7 +181,7 @@ This inherits settings from the default `hermes` host block and creates new AI p
       },
       "dialecticReasoningLevel": "low",
       "dialecticDynamic": true,
-      "dialecticCadence": 3,
+      "dialecticCadence": 1,
       "dialecticDepth": 1,
       "dialecticMaxChars": 600,
       "contextCadence": 1,

From 5f9907c11616f30a03356900b8831b1fc98e7d31 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Sat, 18 Apr 2026 11:01:45 -0400
Subject: [PATCH 098/143] chore(honcho): drop docs from PR scope, scrub
 commentary

- Revert website/docs and SKILL.md changes; docs unification handled separately
- Scrub commit/PR refs and process narration from code comments and test
  docstrings (no behavior change)
---
 .../autonomous-ai-agents/honcho/SKILL.md      |  6 ++--
 plugins/memory/honcho/__init__.py             | 27 ++++++--------
 plugins/memory/honcho/cli.py                  |  2 +-
 plugins/memory/honcho/client.py               | 10 +++---
 tests/honcho_plugin/test_session.py           | 35 ++++++-------------
 website/docs/user-guide/features/honcho.md    |  4 +--
 .../user-guide/features/memory-providers.md   |  4 +--
 7 files changed, 33 insertions(+), 55 deletions(-)

diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
index 5d03a549858..c60d2c63561 100644
--- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md
+++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
@@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen.
 | Key | Default | Description |
 |-----|---------|-------------|
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `1` | Min turns between dialectic API calls |
+| `dialecticCadence` | `3` | Min turns between dialectic API calls |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection |
 
-Higher cadence values reduce API calls and cost. `dialecticCadence: 1` (default) fires every turn; set to `3` or higher to throttle for cost.
+Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn.
 
 ### Depth (how many)
 
@@ -368,7 +368,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso
 | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` |
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `1` | Min turns between dialectic LLM calls |
+| `dialecticCadence` | `3` | Min turns between dialectic LLM calls |
 
 The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions.
 
diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index ac0f60279a6..51345b8e921 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -393,13 +393,10 @@ class HonchoMemoryProvider(MemoryProvider):
             logger.debug("Honcho memory file migration skipped: %s", e)
 
         # ----- B7: Pre-warming at init -----
-        # Context prewarm: warms peer.context() cache (base layer), consumed
-        # via pop_context_result() in prefetch().
-        # Dialectic prewarm: fires a depth-aware cycle against the plugin's
-        # own _prefetch_result so turn 1 can consume it directly. Without this
-        # the first-turn sync path pays for a duplicate .chat() — and at
-        # depth>1 a single-pass session-start dialectic often returns weak
-        # output that multi-pass audit/reconciliation is meant to catch.
+        # Context prewarm warms peer.context() (base layer), consumed via
+        # pop_context_result() in prefetch(). Dialectic prewarm runs the
+        # full configured depth and writes into _prefetch_result so turn 1
+        # consumes the result directly.
         if self._recall_mode in ("context", "hybrid"):
             try:
                 self._manager.prefetch_context(self._session_key)
@@ -555,8 +552,7 @@ class HonchoMemoryProvider(MemoryProvider):
         if self._injection_frequency == "first-turn" and self._turn_count > 1:
             return ""
 
-        # Skip trivial prompts — "ok", "yes", slash commands carry no semantic signal,
-        # so injecting user context there just burns tokens and can derail the reply.
+        # Trivial prompts ("ok", "yes", slash commands) carry no semantic signal.
         if self._is_trivial_prompt(query):
             return ""
 
@@ -619,8 +615,8 @@ class HonchoMemoryProvider(MemoryProvider):
                 if r and r.strip():
                     with self._prefetch_lock:
                         self._prefetch_result = r
-                    # Only advance cadence on a non-empty result so failures
-                    # don't burn a 3-turn cooldown on nothing.
+                    # Advance cadence only on a non-empty result so the next
+                    # turn retries when the call returned nothing.
                     self._last_dialectic_turn = _fired_at
 
             self._prefetch_thread = threading.Thread(
@@ -711,9 +707,8 @@ class HonchoMemoryProvider(MemoryProvider):
                          self._dialectic_cadence, self._turn_count - self._last_dialectic_turn)
             return
 
-        # Advance cadence only on a non-empty result — otherwise a silent failure
-        # (empty dialectic, transient API error) would burn the full cadence window
-        # before the next retry, making it look like dialectic "never fires again".
+        # Cadence advances only on a non-empty result so empty returns
+        # (transient API error, sparse representation) retry next turn.
         _fired_at = self._turn_count
 
         def _run():
@@ -751,9 +746,7 @@ class HonchoMemoryProvider(MemoryProvider):
 
     _LEVEL_ORDER = ("minimal", "low", "medium", "high", "max")
 
-    # Reasoning-level heuristic thresholds (restored from pre-9a0ab34c behavior).
-    # Promoted to class constants so tests can override without widening the
-    # config surface. Bump to config fields only if real use shows they're needed.
+    # Char-count thresholds for the query-length reasoning heuristic.
     _HEURISTIC_LENGTH_MEDIUM = 120
     _HEURISTIC_LENGTH_HIGH = 400
 
diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py
index 478bf39d8a6..5cd25bfbab3 100644
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@@ -463,7 +463,7 @@ def cmd_setup(args) -> None:
     current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "1")
     print("\n  Dialectic cadence:")
     print("    How often Honcho rebuilds its user model (LLM call on Honcho backend).")
-    print("    1 = every turn (default), 3+ = sparse (cost-saving).")
+    print("    1 = every turn (default), 3+ = sparse.")
     new_dialectic = _prompt("Dialectic cadence", default=current_dialectic)
     try:
         val = int(new_dialectic)
diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py
index 136b1e60dc6..346c2b76e68 100644
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@@ -251,13 +251,11 @@ class HonchoClientConfig:
     # matching dialectic_depth length. When None, uses proportional defaults
     # derived from dialectic_reasoning_level.
     dialectic_depth_levels: list[str] | None = None
-    # Reasoning-level heuristic for auto-injected dialectic calls. When true,
-    # scales the base level up on longer queries (restored from pre-#10619
-    # behavior; see plugins/memory/honcho/__init__.py for thresholds).
-    # Never auto-selects a level above reasoning_level_cap.
+    # When true, the auto-injected dialectic scales reasoning level up on
+    # longer queries. See HonchoMemoryProvider for thresholds.
     reasoning_heuristic: bool = True
-    # Ceiling for heuristic-selected reasoning level. "max" is reserved for
-    # explicit tool-path selection; default "high" matches the old behavior.
+    # Ceiling for the heuristic-selected reasoning level. "max" is reserved
+    # for explicit tool-path selection.
     reasoning_level_cap: str = "high"
     # Honcho API limits — configurable for self-hosted instances
     # Max chars per message sent via add_messages() (Honcho cloud: 25000)
diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py
index b0282b1969c..83db3f24dc9 100644
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@@ -862,9 +862,7 @@ class TestDialecticCadenceDefaults:
         return provider
 
     def test_default_is_1(self):
-        """Default dialectic_cadence should be 1 (every turn) — restored from
-        pre-#10619 behavior to avoid a silent regression on upgrade for users
-        who never set dialecticCadence explicitly."""
+        """Default dialectic_cadence is 1 — fires every turn unless overridden."""
         provider = self._make_provider()
         assert provider._dialectic_cadence == 1
 
@@ -1112,10 +1110,7 @@ class TestDialecticDepth:
 
 
 class TestTrivialPromptHeuristic:
-    """Trivial prompts ('ok', 'y', slash commands) must short-circuit injection.
-
-    Restored after accidental removal during the two-layer prefetch refactor.
-    """
+    """Trivial prompts ('ok', 'y', slash commands) must short-circuit injection."""
 
     @staticmethod
     def _make_provider():
@@ -1173,11 +1168,9 @@ class TestTrivialPromptHeuristic:
 
 
 class TestDialecticCadenceAdvancesOnSuccess:
-    """Cadence tracker must only advance when the dialectic call actually returned.
-
-    A silent failure (empty result, API blip) used to burn the full cadence window
-    before retrying — making it look like dialectic 'never fires again'.
-    """
+    """Cadence tracker advances only when the dialectic call returns a
+    non-empty result. Empty results (transient API error, sparse representation)
+    must retry on the next eligible turn instead of waiting the full cadence."""
 
     @staticmethod
     def _make_provider():
@@ -1329,13 +1322,9 @@ class TestSessionStartDialecticPrewarm:
 
 
 class TestDialecticLifecycleSmoke:
-    """End-to-end smoke: walks a realistic multi-turn session through every
-    behavior we care about — prewarm → turn 1 consume → trivial skip → cadence
-    fire → silent-failure retry → heuristic bump → session-end flush.
-
-    This is the 'velvet circuit' test: one provider, one flow, one set of
-    assertions. If the suite above lies about intent, this one catches it.
-    """
+    """End-to-end smoke walking a multi-turn session through prewarm,
+    turn 1 consume, trivial skip, cadence fire, empty-result retry,
+    heuristic bump, and session-end flush."""
 
     @staticmethod
     def _make_provider(cfg_extra=None):
@@ -1473,11 +1462,9 @@ class TestDialecticLifecycleSmoke:
 
 
 class TestReasoningHeuristic:
-    """Restored char-count heuristic for auto-injected dialectic reasoning level.
-
-    Pre-9a0ab34c behavior: scale base up by query length, capped at
-    reasoning_level_cap. 'max' is reserved for explicit tool-path selection.
-    """
+    """Char-count heuristic that scales the auto-injected reasoning level by
+    query length, clamped at reasoning_level_cap. 'max' is reserved for
+    explicit tool-path selection."""
 
     @staticmethod
     def _make_provider(cfg_extra=None):
diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index 906a7c030eb..2040949d258 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs:
 | Knob | Controls | Default |
 |------|----------|---------|
 | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` |
-| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `1` |
+| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `3` |
 | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` |
 
 These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes.
@@ -104,7 +104,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho
 |-----|---------|-------------|
 | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries |
 | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
-| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly |
+| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly |
 | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 |
 | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
 | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md
index 181f30f7fa2..f571c7d48f1 100644
--- a/website/docs/user-guide/features/memory-providers.md
+++ b/website/docs/user-guide/features/memory-providers.md
@@ -82,7 +82,7 @@ hermes memory setup        # select "honcho"
 | `workspace` | host key | Shared workspace ID |
 | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries |
 | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
-| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes |
+| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes |
 | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation |
 | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
 | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
@@ -181,7 +181,7 @@ This inherits settings from the default `hermes` host block and creates new AI p
       },
       "dialecticReasoningLevel": "low",
       "dialecticDynamic": true,
-      "dialecticCadence": 1,
+      "dialecticCadence": 3,
       "dialecticDepth": 1,
       "dialecticMaxChars": 600,
       "contextCadence": 1,

From 098efde848a1253033fedf04e8184ef843115e11 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Sat, 18 Apr 2026 12:45:04 -0400
Subject: [PATCH 099/143] docs(honcho): wizard cadence default 2, prewarm/depth
 + observation + multi-peer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- cli: setup wizard pre-fills dialecticCadence=2 (code default stays 1
  so unset → every turn)
- honcho.md: fix stale dialecticCadence default in tables, add
  Session-Start Prewarm subsection (depth runs at init), add
  Query-Adaptive Reasoning Level subsection, expand Observation
  section with directional vs unified semantics and per-peer patterns
- memory-providers.md: fix stale default, rename Multi-agent/Profiles
  to Multi-peer setup, add concrete walkthrough for new profiles and
  sync, document observation toggles + presets, link to honcho.md
- SKILL.md: fix stale defaults, add Depth at session start callout
---
 .../autonomous-ai-agents/honcho/SKILL.md      |  8 ++-
 plugins/memory/honcho/cli.py                  |  6 +-
 website/docs/user-guide/features/honcho.md    | 47 ++++++++++++++-
 .../user-guide/features/memory-providers.md   | 59 ++++++++++++++++---
 4 files changed, 103 insertions(+), 17 deletions(-)

diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
index c60d2c63561..e79875aa073 100644
--- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md
+++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
@@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen.
 | Key | Default | Description |
 |-----|---------|-------------|
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `3` | Min turns between dialectic API calls |
+| `dialecticCadence` | `1` (wizard: `2`) | Min turns between dialectic API calls. Unset → every turn; wizard pre-fills `2` |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection |
 
-Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn.
+Higher cadence values fire the dialectic LLM less often. `dialecticCadence: 2` means the engine fires every other turn. Setting it to `1` fires every turn.
 
 ### Depth (how many)
 
@@ -180,6 +180,8 @@ If `dialecticDepthLevels` is omitted, rounds use **proportional levels** derived
 
 This keeps earlier passes cheap while using full depth on the final synthesis.
 
+**Depth at session start.** The session-start prewarm runs the full configured `dialecticDepth` in the background before turn 1. A single-pass prewarm on a cold peer often returns thin output — multi-pass depth runs the audit/reconcile cycle before the user ever speaks. Turn 1 consumes the prewarm result directly; if prewarm hasn't landed in time, turn 1 falls back to a synchronous call with a bounded timeout.
+
 ### Level (how hard)
 
 Controls the **intensity** of each dialectic reasoning round.
@@ -368,7 +370,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso
 | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` |
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `3` | Min turns between dialectic LLM calls |
+| `dialecticCadence` | `1` (wizard: `2`) | Min turns between dialectic LLM calls |
 
 The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions.
 
diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py
index 5cd25bfbab3..c73dd66f39e 100644
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@@ -460,17 +460,17 @@ def cmd_setup(args) -> None:
             pass  # keep current
 
     # --- 7b. Dialectic cadence ---
-    current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "1")
+    current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "2")
     print("\n  Dialectic cadence:")
     print("    How often Honcho rebuilds its user model (LLM call on Honcho backend).")
-    print("    1 = every turn (default), 3+ = sparse.")
+    print("    1 = every turn, 2 = every other turn (wizard default), 3+ = sparse.")
     new_dialectic = _prompt("Dialectic cadence", default=current_dialectic)
     try:
         val = int(new_dialectic)
         if val >= 1:
             hermes_host["dialecticCadence"] = val
     except (ValueError, TypeError):
-        hermes_host["dialecticCadence"] = 1
+        hermes_host["dialecticCadence"] = 2
 
     # --- 8. Session strategy ---
     current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session")
diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index 2040949d258..bf4b5c6bc37 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs:
 | Knob | Controls | Default |
 |------|----------|---------|
 | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` |
-| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `3` |
+| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `1` (code default) / `2` (setup wizard default) |
 | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` |
 
 These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes.
@@ -94,6 +94,14 @@ Each pass uses a proportional reasoning level (lighter early passes, base level
 
 Passes bail out early if the prior pass returned strong signal (long, structured output), so depth 3 doesn't always mean 3 LLM calls.
 
+### Session-Start Prewarm
+
+On session init, Honcho fires a dialectic call in the background at the full configured `dialecticDepth` and hands the result directly to turn 1's context assembly. A single-pass prewarm on a cold peer often returns thin output — multi-pass depth runs the audit/reconcile cycle before the user ever speaks. If prewarm hasn't landed by turn 1, turn 1 falls back to a synchronous call with a bounded timeout.
+
+### Query-Adaptive Reasoning Level
+
+The auto-injected dialectic scales `dialecticReasoningLevel` by query length: +1 level at ≥120 chars, +2 at ≥400, clamped at `reasoningLevelCap` (default `"high"`). Disable with `reasoningHeuristic: false` to pin every auto call to `dialecticReasoningLevel`. `"max"` is reserved for explicit tool-path selection via `honcho_reasoning`.
+
 ## Configuration Options
 
 Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho.json` (profile-local). The setup wizard handles this for you.
@@ -104,7 +112,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho
 |-----|---------|-------------|
 | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries |
 | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
-| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly |
+| `dialecticCadence` | `1` (wizard sets `2`) | Minimum turns between `peer.chat()` LLM calls (dialectic layer). Code default fires every turn when the key is unset; the setup wizard pre-fills `2`. In `tools` mode, irrelevant — model calls explicitly |
 | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 |
 | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
 | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
@@ -142,6 +150,41 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho
 
 In `tools` mode, the model is fully in control — it calls `honcho_reasoning` when it wants, at whatever `reasoning_level` it picks. Cadence and budget settings only apply to modes with auto-injection (`hybrid` and `context`).
 
+## Observation (Directional vs. Unified)
+
+Honcho models a conversation as peers exchanging messages. Each peer has two observation toggles that map 1:1 to Honcho's `SessionPeerConfig`:
+
+| Toggle | Effect |
+|--------|--------|
+| `observeMe` | Honcho builds a representation of this peer from its own messages |
+| `observeOthers` | This peer observes the other peer's messages (feeds cross-peer reasoning) |
+
+Two peers × two toggles = four flags. `observationMode` is a shorthand preset:
+
+| Preset | User flags | AI flags | Semantics |
+|--------|-----------|----------|-----------|
+| `"directional"` (default) | me: on, others: on | me: on, others: on | Full mutual observation. Enables cross-peer dialectic — "what does the AI know about the user, based on what the user said and the AI replied." |
+| `"unified"` | me: on, others: off | me: off, others: on | Shared-pool semantics — the AI observes the user's messages only, the user peer only self-models. Single-observer pool. |
+
+Override the preset with an explicit `observation` block for per-peer control:
+
+```json
+"observation": {
+  "user": { "observeMe": true,  "observeOthers": true },
+  "ai":   { "observeMe": true,  "observeOthers": false }
+}
+```
+
+Common patterns:
+
+| Intent | Config |
+|--------|--------|
+| Full observation (most users) | `"observationMode": "directional"` |
+| AI shouldn't re-model the user from its own replies | `"ai": {"observeMe": true, "observeOthers": false}` |
+| Strong persona the AI peer shouldn't update from self-observation | `"ai": {"observeMe": false, "observeOthers": true}` |
+
+Server-side toggles set via the Honcho dashboard win over local defaults — Hermes syncs them back at session init.
+
 ## Tools
 
 When Honcho is active as the memory provider, five tools become available:
diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md
index f571c7d48f1..b2469a13ee3 100644
--- a/website/docs/user-guide/features/memory-providers.md
+++ b/website/docs/user-guide/features/memory-providers.md
@@ -82,7 +82,7 @@ hermes memory setup        # select "honcho"
 | `workspace` | host key | Shared workspace ID |
 | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries |
 | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
-| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes |
+| `dialecticCadence` | `1` (wizard sets `2`) | Minimum turns between `peer.chat()` LLM calls. Unset → every turn; wizard pre-fills `2`. Only applies to `hybrid`/`context` modes |
 | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation |
 | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
 | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
@@ -140,23 +140,64 @@ hermes memory setup        # select "honcho"
 If you previously used `hermes honcho setup`, your config and all server-side data are intact. Just re-enable through the setup wizard again or manually set `memory.provider: honcho` to reactivate via the new system.
 :::
 
-**Multi-agent / Profiles:**
+**Multi-peer setup:**
 
-Each Hermes profile gets its own Honcho AI peer while sharing the same workspace -- all profiles see the same user representation, but each agent builds its own identity and observations.
+Honcho models conversations as peers exchanging messages — one user peer plus one AI peer per Hermes profile, all sharing a workspace. The workspace is the shared environment: the user peer is global across profiles, each AI peer is its own identity. Every AI peer builds an independent representation / card from its own observations, so a `coder` profile stays code-oriented while a `writer` profile stays editorial against the same user.
+
+The mapping:
+
+| Concept | What it is |
+|---------|-----------|
+| **Workspace** | Shared environment. All Hermes profiles under one workspace see the same user identity. |
+| **User peer** (`peerName`) | The human. Shared across profiles in the workspace. |
+| **AI peer** (`aiPeer`) | One per Hermes profile. Host key `hermes` → default; `hermes.<profile>` for others. |
+| **Observation** | Per-peer toggles controlling what Honcho models from whose messages. `directional` (default, all four on) or `unified` (single-observer pool). |
+
+### New profile, fresh Honcho peer
 
 ```bash
-hermes profile create coder --clone   # creates honcho peer "coder", inherits config from default
+hermes profile create coder --clone
 ```
 
-What `--clone` does: creates a `hermes.coder` host block in `honcho.json` with `aiPeer: "coder"`, shared `workspace`, inherited `peerName`, `recallMode`, `writeFrequency`, `observation`, etc. The peer is eagerly created in Honcho so it exists before first message.
+`--clone` creates a `hermes.coder` host block in `honcho.json` with `aiPeer: "coder"`, shared `workspace`, inherited `peerName`, `recallMode`, `writeFrequency`, `observation`, etc. The AI peer is eagerly created in Honcho so it exists before the first message.
 
-For profiles created before Honcho was set up:
+### Existing profiles, backfill Honcho peers
 
 ```bash
-hermes honcho sync   # scans all profiles, creates host blocks for any missing ones
+hermes honcho sync
 ```
 
-This inherits settings from the default `hermes` host block and creates new AI peers for each profile. Idempotent -- skips profiles that already have a host block.
+Scans every Hermes profile, creates host blocks for any profile without one, inherits settings from the default `hermes` block, and creates the new AI peers eagerly. Idempotent — skips profiles that already have a host block.
+
+### Per-profile observation
+
+Each host block can override the observation config independently. Example: a code-focused profile where the AI peer observes the user but doesn't self-model:
+
+```json
+"hermes.coder": {
+  "aiPeer": "coder",
+  "observation": {
+    "user": { "observeMe": true, "observeOthers": true },
+    "ai":   { "observeMe": false, "observeOthers": true }
+  }
+}
+```
+
+**Observation toggles (one set per peer):**
+
+| Toggle | Effect |
+|--------|--------|
+| `observeMe` | Honcho builds a representation of this peer from its own messages |
+| `observeOthers` | This peer observes the other peer's messages (feeds cross-peer reasoning) |
+
+Presets via `observationMode`:
+
+- **`"directional"`** (default) — all four flags on. Full mutual observation; enables cross-peer dialectic.
+- **`"unified"`** — user `observeMe: true`, AI `observeOthers: true`, rest false. Single-observer pool; AI models the user but not itself, user peer only self-models.
+
+Server-side toggles set via the [Honcho dashboard](https://app.honcho.dev) win over local defaults — synced back at session init.
+
+See the [Honcho page](./honcho.md#observation-directional-vs-unified) for the full observation reference.
 
 <details>
 <summary>Full honcho.json example (multi-profile)</summary>
@@ -181,7 +222,7 @@ This inherits settings from the default `hermes` host block and creates new AI p
       },
       "dialecticReasoningLevel": "low",
       "dialecticDynamic": true,
-      "dialecticCadence": 3,
+      "dialecticCadence": 2,
       "dialecticDepth": 1,
       "dialecticMaxChars": 600,
       "contextCadence": 1,

From c630dfcdac4a64a3d55aa8724c7ca3bdd7e64b85 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Sat, 18 Apr 2026 13:07:09 -0400
Subject: [PATCH 100/143] =?UTF-8?q?feat(honcho):=20dialectic=20liveness=20?=
 =?UTF-8?q?=E2=80=94=20stale-thread=20watchdog,=20stale-result=20discard,?=
 =?UTF-8?q?=20empty-streak=20backoff?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hardens the dialectic lifecycle against three failure modes that could
leave the prefetch pipeline stuck or injecting stale content:

- Stale-thread watchdog: _thread_is_live() treats any prefetch thread
  older than timeout × 2.0 as dead. A hung Honcho call can no longer
  block subsequent fires indefinitely.

- Stale-result discard: pending _prefetch_result is tagged with its
  fire turn. prefetch() discards the result if more than cadence × 2
  turns passed before a consumer read it (e.g. a run of trivial-prompt
  turns between fire and read).

- Empty-streak backoff: consecutive empty dialectic returns widen the
  effective cadence (dialectic_cadence + streak, capped at cadence × 8).
  A healthy fire resets the streak. Prevents the plugin from hammering
  the backend every turn when the peer graph is cold.

- liveness_snapshot() on the provider exposes current turn, last fire,
  pending fire-at, empty streak, effective cadence, and thread status
  for in-process diagnostics.

- system_prompt_block: nudge the model that honcho_reasoning accepts
  reasoning_level minimal/low/medium/high/max per call.

- hermes honcho status: surface base reasoning level, cap, and heuristic
  toggle so config drift is visible at a glance.

Tests: 550 passed.
- TestDialecticLiveness (8 tests): stale-thread recovery, stale-result
  discard, fresh-result retention, backoff widening, backoff ceiling,
  streak reset on success, streak increment on empty, snapshot shape.
- Existing TestDialecticCadenceAdvancesOnSuccess::test_in_flight_thread_is_not_stacked
  updated to set _prefetch_thread_started_at so it tests the
  fresh-thread-blocks branch (stale path covered separately).
- test_cli TestCmdStatus fake updated with the new config attrs surfaced
  in the status block.
---
 plugins/memory/honcho/__init__.py   | 120 +++++++++++++++++++--
 plugins/memory/honcho/cli.py        |   3 +
 tests/honcho_plugin/test_cli.py     |   3 +
 tests/honcho_plugin/test_session.py | 156 +++++++++++++++++++++++++++-
 4 files changed, 266 insertions(+), 16 deletions(-)

diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index 51345b8e921..68fa868855c 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -19,6 +19,7 @@ import json
 import logging
 import re
 import threading
+import time
 from typing import Any, Dict, List, Optional
 
 from agent.memory_provider import MemoryProvider
@@ -214,6 +215,11 @@ class HonchoMemoryProvider(MemoryProvider):
         self._last_context_turn = -999
         self._last_dialectic_turn = -999
 
+        # Liveness + observability state
+        self._prefetch_thread_started_at: float = 0.0   # monotonic ts of current thread
+        self._prefetch_result_fired_at: int = -999      # turn the pending result was fired at
+        self._dialectic_empty_streak: int = 0           # consecutive empty returns
+
         # Port #1957: lazy session init for tools-only mode
         self._session_initialized = False
         self._lazy_init_kwargs: Optional[dict] = None
@@ -413,13 +419,19 @@ class HonchoMemoryProvider(MemoryProvider):
                     r = self._run_dialectic_depth(_prewarm_query)
                 except Exception as exc:
                     logger.debug("Honcho dialectic prewarm failed: %s", exc)
+                    self._dialectic_empty_streak += 1
                     return
                 if r and r.strip():
                     with self._prefetch_lock:
                         self._prefetch_result = r
+                        self._prefetch_result_fired_at = 0
                     # Treat prewarm as turn 0 so cadence gating starts clean.
                     self._last_dialectic_turn = 0
+                    self._dialectic_empty_streak = 0
+                else:
+                    self._dialectic_empty_streak += 1
 
+            self._prefetch_thread_started_at = time.monotonic()
             self._prefetch_thread = threading.Thread(
                 target=_prewarm_dialectic, daemon=True, name="honcho-prewarm-dialectic"
             )
@@ -513,7 +525,8 @@ class HonchoMemoryProvider(MemoryProvider):
                 "# Honcho Memory\n"
                 "Active (tools-only mode). Use honcho_profile for a quick factual snapshot, "
                 "honcho_search for raw excerpts, honcho_context for raw peer context, "
-                "honcho_reasoning for synthesized answers, "
+                "honcho_reasoning for synthesized answers (pass reasoning_level "
+                "minimal/low/medium/high/max — you pick the depth per call), "
                 "honcho_conclude to save facts about the user. "
                 "No automatic context injection — you must use tools to access memory."
             )
@@ -523,7 +536,8 @@ class HonchoMemoryProvider(MemoryProvider):
                 "Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. "
                 "Use honcho_profile for a quick factual snapshot, "
                 "honcho_search for raw excerpts, honcho_context for raw peer context, "
-                "honcho_reasoning for synthesized answers, "
+                "honcho_reasoning for synthesized answers (pass reasoning_level "
+                "minimal/low/medium/high/max — you pick the depth per call), "
                 "honcho_conclude to save facts about the user."
             )
 
@@ -611,14 +625,20 @@ class HonchoMemoryProvider(MemoryProvider):
                     r = self._run_dialectic_depth(query)
                 except Exception as exc:
                     logger.debug("Honcho first-turn dialectic failed: %s", exc)
+                    self._dialectic_empty_streak += 1
                     return
                 if r and r.strip():
                     with self._prefetch_lock:
                         self._prefetch_result = r
+                        self._prefetch_result_fired_at = _fired_at
                     # Advance cadence only on a non-empty result so the next
                     # turn retries when the call returned nothing.
                     self._last_dialectic_turn = _fired_at
+                    self._dialectic_empty_streak = 0
+                else:
+                    self._dialectic_empty_streak += 1
 
+            self._prefetch_thread_started_at = time.monotonic()
             self._prefetch_thread = threading.Thread(
                 target=_run_first_turn, daemon=True, name="honcho-prefetch-first"
             )
@@ -635,7 +655,21 @@ class HonchoMemoryProvider(MemoryProvider):
             self._prefetch_thread.join(timeout=3.0)
         with self._prefetch_lock:
             dialectic_result = self._prefetch_result
+            fired_at = self._prefetch_result_fired_at
             self._prefetch_result = ""
+            self._prefetch_result_fired_at = -999
+
+        # Discard stale pending results: if the fire happened more than
+        # cadence × multiplier turns ago (e.g. a run of trivial-prompt turns
+        # passed without consumption), the content likely no longer tracks
+        # the current conversational pivot.
+        stale_limit = self._dialectic_cadence * self._STALE_RESULT_MULTIPLIER
+        if dialectic_result and fired_at >= 0 and (self._turn_count - fired_at) > stale_limit:
+            logger.debug(
+                "Honcho pending dialectic discarded as stale: fired_at=%d, "
+                "turn=%d, limit=%d", fired_at, self._turn_count, stale_limit,
+            )
+            dialectic_result = ""
 
         if dialectic_result and dialectic_result.strip():
             parts.append(dialectic_result)
@@ -693,18 +727,23 @@ class HonchoMemoryProvider(MemoryProvider):
                 logger.debug("Honcho context prefetch failed: %s", e)
 
         # ----- Dialectic prefetch (supplement layer) -----
-        # Guard against thread pile-up: if a prior dialectic is still in flight,
-        # let it finish instead of stacking races on _prefetch_result.
-        if self._prefetch_thread and self._prefetch_thread.is_alive():
+        # Thread-alive guard with stale-thread recovery: a hung Honcho call
+        # older than timeout × multiplier is treated as dead so it can't
+        # block subsequent fires.
+        if self._thread_is_live():
             logger.debug("Honcho dialectic prefetch skipped: prior thread still running")
             return
 
-        # B5: cadence check — skip if too soon since last *successful* dialectic call.
-        # The gate applies uniformly (including cadence=1): "every turn" means once
-        # per turn, not twice on the same turn when first-turn sync already fired.
-        if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence:
-            logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d",
-                         self._dialectic_cadence, self._turn_count - self._last_dialectic_turn)
+        # Cadence gate, widened by the empty-streak backoff so a persistently
+        # silent backend doesn't retry every turn forever.
+        effective = self._effective_cadence()
+        if (self._turn_count - self._last_dialectic_turn) < effective:
+            logger.debug(
+                "Honcho dialectic prefetch skipped: effective cadence %d "
+                "(base %d, empty streak %d), turns since last: %d",
+                effective, self._dialectic_cadence, self._dialectic_empty_streak,
+                self._turn_count - self._last_dialectic_turn,
+            )
             return
 
         # Cadence advances only on a non-empty result so empty returns
@@ -716,12 +755,18 @@ class HonchoMemoryProvider(MemoryProvider):
                 result = self._run_dialectic_depth(query)
             except Exception as e:
                 logger.debug("Honcho prefetch failed: %s", e)
+                self._dialectic_empty_streak += 1
                 return
             if result and result.strip():
                 with self._prefetch_lock:
                     self._prefetch_result = result
+                    self._prefetch_result_fired_at = _fired_at
                 self._last_dialectic_turn = _fired_at
+                self._dialectic_empty_streak = 0
+            else:
+                self._dialectic_empty_streak += 1
 
+        self._prefetch_thread_started_at = time.monotonic()
         self._prefetch_thread = threading.Thread(
             target=_run, daemon=True, name="honcho-prefetch"
         )
@@ -750,6 +795,59 @@ class HonchoMemoryProvider(MemoryProvider):
     _HEURISTIC_LENGTH_MEDIUM = 120
     _HEURISTIC_LENGTH_HIGH = 400
 
+    # Liveness constants. A thread older than timeout × multiplier is treated
+    # as dead so a hung Honcho call can't block future retries indefinitely.
+    _STALE_THREAD_MULTIPLIER = 2.0
+    # Pending result whose fire-turn is older than cadence × multiplier is
+    # discarded on read so we don't inject context for a stale conversational
+    # pivot after a gap of trivial-prompt turns.
+    _STALE_RESULT_MULTIPLIER = 2
+    # Cap on the empty-streak backoff so a persistently silent backend
+    # eventually settles on a ceiling instead of unbounded widening.
+    _BACKOFF_MAX = 8
+
+    def _thread_is_live(self) -> bool:
+        """Thread-alive guard that treats threads older than the stale
+        threshold as dead, so a hung Honcho request can't block new fires."""
+        if not self._prefetch_thread or not self._prefetch_thread.is_alive():
+            return False
+        timeout = (self._config.timeout if self._config and self._config.timeout else 8.0)
+        age = time.monotonic() - self._prefetch_thread_started_at
+        if age > timeout * self._STALE_THREAD_MULTIPLIER:
+            logger.debug(
+                "Honcho prefetch thread age %.1fs exceeds stale threshold "
+                "%.1fs — treating as dead", age, timeout * self._STALE_THREAD_MULTIPLIER,
+            )
+            return False
+        return True
+
+    def _effective_cadence(self) -> int:
+        """Cadence plus empty-streak backoff, capped at _BACKOFF_MAX × base."""
+        if self._dialectic_empty_streak <= 0:
+            return self._dialectic_cadence
+        widened = self._dialectic_cadence + self._dialectic_empty_streak
+        ceiling = self._dialectic_cadence * self._BACKOFF_MAX
+        return min(widened, ceiling)
+
+    def liveness_snapshot(self) -> dict:
+        """In-process snapshot of dialectic liveness state for diagnostics.
+
+        Returns current turn, last successful dialectic turn, pending-result
+        fire turn, empty streak, effective cadence, and thread status.
+        """
+        thread_age = None
+        if self._prefetch_thread and self._prefetch_thread.is_alive():
+            thread_age = time.monotonic() - self._prefetch_thread_started_at
+        return {
+            "turn_count": self._turn_count,
+            "last_dialectic_turn": self._last_dialectic_turn,
+            "pending_result_fired_at": self._prefetch_result_fired_at,
+            "empty_streak": self._dialectic_empty_streak,
+            "effective_cadence": self._effective_cadence(),
+            "thread_alive": thread_age is not None,
+            "thread_age_seconds": thread_age,
+        }
+
     def _apply_reasoning_heuristic(self, base: str, query: str) -> str:
         """Scale `base` up by query length, clamped at reasoning_level_cap.
 
diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py
index c73dd66f39e..eb21c48eaa8 100644
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@@ -638,6 +638,9 @@ def cmd_status(args) -> None:
     raw = getattr(hcfg, "raw", None) or {}
     dialectic_cadence = raw.get("dialecticCadence") or 1
     print(f"  Dialectic cad:  every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}")
+    reasoning_cap = raw.get("reasoningLevelCap") or hcfg.reasoning_level_cap
+    heuristic_on = "on" if hcfg.reasoning_heuristic else "off"
+    print(f"  Reasoning:      base={hcfg.dialectic_reasoning_level}, cap={reasoning_cap}, heuristic={heuristic_on}")
     print(f"  Observation:    user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})")
     print(f"  Write freq:     {hcfg.write_frequency}")
 
diff --git a/tests/honcho_plugin/test_cli.py b/tests/honcho_plugin/test_cli.py
index 006d687dc1d..a6fc39ea7c0 100644
--- a/tests/honcho_plugin/test_cli.py
+++ b/tests/honcho_plugin/test_cli.py
@@ -26,6 +26,9 @@ class TestCmdStatus:
             write_frequency = "async"
             session_strategy = "per-session"
             context_tokens = 800
+            dialectic_reasoning_level = "low"
+            reasoning_level_cap = "high"
+            reasoning_heuristic = True
 
             def resolve_session_name(self):
                 return "hermes"
diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py
index 83db3f24dc9..37f54b54103 100644
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@@ -823,8 +823,11 @@ def _settle_prewarm(provider):
         provider._prefetch_thread.join(timeout=3.0)
     with provider._prefetch_lock:
         provider._prefetch_result = ""
+        provider._prefetch_result_fired_at = -999
     provider._prefetch_thread = None
+    provider._prefetch_thread_started_at = 0.0
     provider._last_dialectic_turn = -999
+    provider._dialectic_empty_streak = 0
     if getattr(provider, "_manager", None) is not None:
         try:
             provider._manager.dialectic_query.reset_mock()
@@ -1227,26 +1230,28 @@ class TestDialecticCadenceAdvancesOnSuccess:
 
     def test_in_flight_thread_is_not_stacked(self):
         import threading as _threading
+        import time as _time
         provider = self._make_provider()
         provider._session_key = "test"
         provider._turn_count = 10
         provider._last_dialectic_turn = 0
 
-        # Simulate a prior thread still running
+        # Simulate a prior thread still running (fresh, not stale)
         hold = _threading.Event()
 
         def _block():
             hold.wait(timeout=5.0)
 
-        stale = _threading.Thread(target=_block, daemon=True)
-        stale.start()
-        provider._prefetch_thread = stale
+        fresh = _threading.Thread(target=_block, daemon=True)
+        fresh.start()
+        provider._prefetch_thread = fresh
+        provider._prefetch_thread_started_at = _time.monotonic()  # fresh start
 
         provider.queue_prefetch("hello")
         # Should have short-circuited — no new dialectic call
         assert provider._manager.dialectic_query.call_count == 0
         hold.set()
-        stale.join(timeout=2.0)
+        fresh.join(timeout=2.0)
 
 
 class TestSessionStartDialecticPrewarm:
@@ -1321,6 +1326,147 @@ class TestSessionStartDialecticPrewarm:
         assert p._manager.dialectic_query.call_count == 1
 
 
+class TestDialecticLiveness:
+    """Liveness + observability: stale-thread recovery, stale-result discard,
+    empty-streak backoff, and the snapshot method used for diagnostics."""
+
+    @staticmethod
+    def _make_provider(cfg_extra=None):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid", timeout=2.0)
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_manager.get_or_create.return_value = MagicMock(messages=[])
+        mock_manager.get_prefetch_context.return_value = None
+        mock_manager.pop_context_result.return_value = None
+        mock_manager.dialectic_query.return_value = ""  # default: silent
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-liveness")
+        _settle_prewarm(provider)
+        return provider
+
+    def test_stale_thread_is_treated_as_dead(self):
+        """A thread older than timeout × multiplier no longer blocks new fires."""
+        import threading as _threading
+        p = self._make_provider()
+        p._session_key = "test"
+        p._turn_count = 10
+        p._last_dialectic_turn = 0
+        p._manager.dialectic_query.return_value = "fresh synthesis"
+
+        # Plant an alive thread with an old timestamp (stale)
+        hold = _threading.Event()
+        stuck = _threading.Thread(target=lambda: hold.wait(timeout=10.0), daemon=True)
+        stuck.start()
+        p._prefetch_thread = stuck
+        # timeout=2.0, multiplier=2.0, so anything older than 4s is stale
+        p._prefetch_thread_started_at = 0.0  # very old (1970 monotonic baseline)
+
+        p.queue_prefetch("hello")
+        # New thread should have been spawned since stuck one is stale
+        assert p._prefetch_thread is not stuck, "stale thread must be recycled"
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=2.0)
+        assert p._manager.dialectic_query.call_count == 1
+        hold.set()
+        stuck.join(timeout=2.0)
+
+    def test_stale_pending_result_is_discarded_on_read(self):
+        """A pending dialectic result from many turns ago is discarded
+        instead of injected against a fresh conversational pivot."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 2}})
+        p._session_key = "test"
+        p._base_context_cache = "base ctx"
+        with p._prefetch_lock:
+            p._prefetch_result = "ancient synthesis"
+            p._prefetch_result_fired_at = 1
+        # cadence=2, multiplier=2 → stale after 4 turns since fire
+        p._turn_count = 10
+        p._last_dialectic_turn = 1  # prevents sync first-turn path
+
+        result = p.prefetch("what's new")
+        assert "ancient synthesis" not in result, "stale pending must be discarded"
+        # Cache slot cleared
+        with p._prefetch_lock:
+            assert p._prefetch_result == ""
+            assert p._prefetch_result_fired_at == -999
+
+    def test_fresh_pending_result_is_kept(self):
+        """A pending result within the staleness window is injected normally."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 3}})
+        p._session_key = "test"
+        p._base_context_cache = ""
+        with p._prefetch_lock:
+            p._prefetch_result = "recent synthesis"
+            p._prefetch_result_fired_at = 8
+        p._turn_count = 9  # 1 turn since fire, well within cadence × 2 = 6
+        p._last_dialectic_turn = 8
+
+        result = p.prefetch("what's new")
+        assert "recent synthesis" in result
+
+    def test_empty_streak_widens_effective_cadence(self):
+        """After N empty returns, the gate waits cadence + N turns."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}})
+        p._dialectic_empty_streak = 3
+        # cadence=1, streak=3 → effective = 4
+        assert p._effective_cadence() == 4
+
+    def test_backoff_is_capped(self):
+        """Effective cadence is capped at cadence × _BACKOFF_MAX."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 2}})
+        p._dialectic_empty_streak = 100
+        # cadence=2, ceiling = 2 × 8 = 16
+        assert p._effective_cadence() == 16
+
+    def test_success_resets_empty_streak(self):
+        """A non-empty result zeroes the streak so healthy operation restores
+        the base cadence immediately."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}})
+        p._session_key = "test"
+        p._dialectic_empty_streak = 5
+        p._turn_count = 10
+        p._last_dialectic_turn = 0
+        p._manager.dialectic_query.return_value = "real output"
+
+        p.queue_prefetch("hello")
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=2.0)
+        assert p._dialectic_empty_streak == 0
+        assert p._last_dialectic_turn == 10
+
+    def test_empty_result_increments_streak(self):
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}})
+        p._session_key = "test"
+        p._turn_count = 5
+        p._last_dialectic_turn = 0
+        p._manager.dialectic_query.return_value = ""  # empty
+
+        p.queue_prefetch("hello")
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=2.0)
+        assert p._dialectic_empty_streak == 1
+        assert p._last_dialectic_turn == 0  # cadence not advanced
+
+    def test_liveness_snapshot_shape(self):
+        p = self._make_provider()
+        snap = p.liveness_snapshot()
+        for key in (
+            "turn_count", "last_dialectic_turn", "pending_result_fired_at",
+            "empty_streak", "effective_cadence", "thread_alive", "thread_age_seconds",
+        ):
+            assert key in snap
+
+
 class TestDialecticLifecycleSmoke:
     """End-to-end smoke walking a multi-turn session through prewarm,
     turn 1 consume, trivial skip, cadence fire, empty-result retry,

From ba7da73ca931bcdaf64de294c8c9551e0b3615b1 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Sat, 18 Apr 2026 13:17:44 -0400
Subject: [PATCH 101/143] test(honcho): drop two first-turn tests subsumed by
 prewarm + smoke coverage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- TestDialecticDepth::test_first_turn_runs_dialectic_synchronously:
  covered by TestSessionStartDialecticPrewarm::test_turn1_falls_back_to_sync_when_prewarm_missing
  (more realistic — exercises the empty-prewarm → sync-fallback path)
- TestDialecticDepth::test_first_turn_dialectic_does_not_double_fire:
  covered by TestDialecticLifecycleSmoke (turn 1 flow) and
  TestDialecticCadenceAdvancesOnSuccess::test_empty_dialectic_result_does_not_advance_cadence

Both predate the prewarm refactor and test paths that are now
fallback behaviors already covered elsewhere.
---
 tests/honcho_plugin/test_session.py | 41 -----------------------------
 1 file changed, 41 deletions(-)

diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py
index 37f54b54103..7b5ac7e3d0e 100644
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@@ -1050,47 +1050,6 @@ class TestDialecticDepth:
         assert provider._manager.dialectic_query.call_count == 2
         assert "Synthesis" in result
 
-    def test_first_turn_runs_dialectic_synchronously(self):
-        """First turn should fire the dialectic synchronously (cold start)."""
-        from unittest.mock import MagicMock, patch
-        provider = self._make_provider(cfg_extra={"dialectic_depth": 1})
-        provider._manager = MagicMock()
-        provider._manager.dialectic_query.return_value = "cold start synthesis"
-        provider._manager.get_prefetch_context.return_value = None
-        provider._manager.pop_context_result.return_value = None
-        provider._session_key = "test"
-        provider._base_context_cache = ""  # cold start
-        provider._last_dialectic_turn = -999  # never fired
-
-        result = provider.prefetch("hello world")
-        assert "cold start synthesis" in result
-        assert provider._manager.dialectic_query.call_count == 1
-        # After first-turn sync, _last_dialectic_turn should be updated
-        assert provider._last_dialectic_turn != -999
-
-    def test_first_turn_dialectic_does_not_double_fire(self):
-        """After first-turn sync dialectic, queue_prefetch should skip (cadence)."""
-        from unittest.mock import MagicMock
-        provider = self._make_provider(cfg_extra={"dialectic_depth": 1})
-        provider._manager = MagicMock()
-        provider._manager.dialectic_query.return_value = "cold start synthesis"
-        provider._manager.get_prefetch_context.return_value = None
-        provider._manager.pop_context_result.return_value = None
-        provider._session_key = "test"
-        provider._base_context_cache = ""
-        provider._last_dialectic_turn = -999
-        provider._turn_count = 0
-
-        # First turn fires sync dialectic
-        provider.prefetch("hello")
-        assert provider._manager.dialectic_query.call_count == 1
-
-        # Now queue_prefetch on same turn should skip — _last_dialectic_turn
-        # was just set to _turn_count by the sync path, so (0 - 0 = 0) < cadence.
-        provider._manager.dialectic_query.reset_mock()
-        provider.queue_prefetch("hello")
-        assert provider._manager.dialectic_query.call_count == 0
-
     def test_run_dialectic_depth_bails_early_on_strong_signal(self):
         """Depth 2 skips pass 1 when pass 0 returns strong signal."""
         from unittest.mock import MagicMock

From 5b6792f04d973f996fcb981ae570e674472c3d4d Mon Sep 17 00:00:00 2001
From: LeonSGP43 <154585401+LeonSGP43@users.noreply.github.com>
Date: Fri, 17 Apr 2026 13:49:31 +0800
Subject: [PATCH 102/143] fix(honcho): scope gateway sessions by runtime user
 id

---
 plugins/memory/honcho/__init__.py   |  9 +---
 plugins/memory/honcho/session.py    |  9 +++-
 tests/agent/test_memory_user_id.py  | 65 +++++++++++++++++++++++++----
 tests/honcho_plugin/test_session.py | 21 +++++-----
 4 files changed, 75 insertions(+), 29 deletions(-)

diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index 68fa868855c..d104deb5d52 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -293,14 +293,6 @@ class HonchoMemoryProvider(MemoryProvider):
                 logger.debug("Honcho not configured — plugin inactive")
                 return
 
-            # Override peer_name with gateway user_id for per-user memory scoping.
-            # Only when no explicit peerName was configured — an explicit peerName
-            # means the user chose their identity; a raw user_id (e.g. Telegram
-            # chat ID) should not silently replace it.
-            _gw_user_id = kwargs.get("user_id")
-            if _gw_user_id and not cfg.peer_name:
-                cfg.peer_name = _gw_user_id
-
             self._config = cfg
 
             # ----- B1: recall_mode from config -----
@@ -359,6 +351,7 @@ class HonchoMemoryProvider(MemoryProvider):
             honcho=client,
             config=cfg,
             context_tokens=cfg.context_tokens,
+            runtime_user_peer_name=kwargs.get("user_id") or None,
         )
 
         # ----- B3: resolve_session_name -----
diff --git a/plugins/memory/honcho/session.py b/plugins/memory/honcho/session.py
index 7344b517e40..79625b5cd58 100644
--- a/plugins/memory/honcho/session.py
+++ b/plugins/memory/honcho/session.py
@@ -78,6 +78,7 @@ class HonchoSessionManager:
         honcho: Honcho | None = None,
         context_tokens: int | None = None,
         config: Any | None = None,
+        runtime_user_peer_name: str | None = None,
     ):
         """
         Initialize the session manager.
@@ -87,10 +88,12 @@ class HonchoSessionManager:
             context_tokens: Max tokens for context() calls (None = Honcho default).
             config: HonchoClientConfig from global config (provides peer_name, ai_peer,
                     write_frequency, observation, etc.).
+            runtime_user_peer_name: Gateway user identity for per-user memory scoping.
         """
         self._honcho = honcho
         self._context_tokens = context_tokens
         self._config = config
+        self._runtime_user_peer_name = runtime_user_peer_name
         self._cache: dict[str, HonchoSession] = {}
         self._peers_cache: dict[str, Any] = {}
         self._sessions_cache: dict[str, Any] = {}
@@ -274,8 +277,10 @@ class HonchoSessionManager:
             logger.debug("Local session cache hit: %s", key)
             return self._cache[key]
 
-        # Use peer names from global config when available
-        if self._config and self._config.peer_name:
+        # Gateway sessions should use the runtime user identity when available.
+        if self._runtime_user_peer_name:
+            user_peer_id = self._sanitize_id(self._runtime_user_peer_name)
+        elif self._config and self._config.peer_name:
             user_peer_id = self._sanitize_id(self._config.peer_name)
         else:
             # Fallback: derive from session key
diff --git a/tests/agent/test_memory_user_id.py b/tests/agent/test_memory_user_id.py
index c1b82208d0e..d33753bd2e1 100644
--- a/tests/agent/test_memory_user_id.py
+++ b/tests/agent/test_memory_user_id.py
@@ -208,34 +208,81 @@ class TestMem0UserIdScoping:
 
 
 class TestHonchoUserIdScoping:
-    """Verify Honcho plugin uses gateway user_id for peer_name when provided."""
+    """Verify Honcho plugin keeps runtime user scoping separate from config peer_name."""
 
-    def test_gateway_user_id_overrides_peer_name(self):
-        """When user_id is in kwargs and no explicit peer_name, user_id should be used."""
+    def test_gateway_user_id_is_passed_as_runtime_peer(self):
+        """Gateway user_id should scope Honcho sessions without mutating config peer_name."""
         from plugins.memory.honcho import HonchoMemoryProvider
 
         provider = HonchoMemoryProvider()
 
-        # Create a mock config with NO explicit peer_name
         mock_cfg = MagicMock()
         mock_cfg.enabled = True
         mock_cfg.api_key = "test-key"
         mock_cfg.base_url = None
-        mock_cfg.peer_name = ""  # No explicit peer_name — user_id should fill it
-        mock_cfg.recall_mode = "tools"  # Use tools mode to defer session init
+        mock_cfg.peer_name = "static-user"
+        mock_cfg.recall_mode = "context"
+        mock_cfg.context_tokens = None
+        mock_cfg.raw = {}
+        mock_cfg.dialectic_depth = 1
+        mock_cfg.dialectic_depth_levels = None
+        mock_cfg.init_on_session_start = False
+        mock_cfg.ai_peer = "hermes"
+        mock_cfg.resolve_session_name.return_value = "test-sess"
+        mock_cfg.session_strategy = "shared"
 
         with patch(
             "plugins.memory.honcho.client.HonchoClientConfig.from_global_config",
             return_value=mock_cfg,
-        ):
+        ), patch(
+            "plugins.memory.honcho.client.get_honcho_client",
+            return_value=MagicMock(),
+        ), patch(
+            "plugins.memory.honcho.session.HonchoSessionManager",
+        ) as mock_manager_cls:
+            mock_manager = MagicMock()
+            mock_manager.get_or_create.return_value = MagicMock(messages=[])
+            mock_manager_cls.return_value = mock_manager
             provider.initialize(
                 session_id="test-sess",
                 user_id="discord_user_789",
                 platform="discord",
             )
 
-        # The config's peer_name should have been overridden with the user_id
-        assert mock_cfg.peer_name == "discord_user_789"
+        assert mock_cfg.peer_name == "static-user"
+        assert mock_manager_cls.call_args.kwargs["runtime_user_peer_name"] == "discord_user_789"
+
+    def test_session_manager_prefers_runtime_user_id_over_config_peer_name(self):
+        """Session manager should isolate gateway users even when config peer_name is static."""
+        from plugins.memory.honcho.session import HonchoSessionManager
+
+        mock_cfg = MagicMock()
+        mock_cfg.peer_name = "static-user"
+        mock_cfg.ai_peer = "hermes"
+        mock_cfg.write_frequency = "sync"
+        mock_cfg.dialectic_reasoning_level = "low"
+        mock_cfg.dialectic_dynamic = True
+        mock_cfg.dialectic_max_chars = 600
+        mock_cfg.observation_mode = "directional"
+        mock_cfg.user_observe_me = True
+        mock_cfg.user_observe_others = True
+        mock_cfg.ai_observe_me = True
+        mock_cfg.ai_observe_others = True
+
+        manager = HonchoSessionManager(
+            honcho=MagicMock(),
+            config=mock_cfg,
+            runtime_user_peer_name="discord_user_789",
+        )
+
+        with patch.object(manager, "_get_or_create_peer", return_value=MagicMock()), patch.object(
+            manager,
+            "_get_or_create_honcho_session",
+            return_value=(MagicMock(), []),
+        ):
+            session = manager.get_or_create("discord:channel-1")
+
+        assert session.user_peer_id == "discord_user_789"
 
     def test_no_user_id_preserves_config_peer_name(self):
         """Without user_id, the config peer_name should be preserved."""
diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py
index 7b5ac7e3d0e..f2a66029292 100644
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@@ -568,15 +568,15 @@ class TestToolsModeInitBehavior:
 
         with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
              patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
-             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager) as mock_manager_cls, \
              patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
             provider.initialize(session_id="test-session-001", **init_kwargs)
 
-        return provider, cfg
+        return provider, cfg, mock_manager_cls
 
     def test_tools_lazy_default(self):
         """tools + initOnSessionStart=false → session NOT initialized after initialize()."""
-        provider, _ = self._make_provider_with_config(
+        provider, _, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=False,
         )
         assert provider._session_initialized is False
@@ -585,7 +585,7 @@ class TestToolsModeInitBehavior:
 
     def test_tools_eager_init(self):
         """tools + initOnSessionStart=true → session IS initialized after initialize()."""
-        provider, _ = self._make_provider_with_config(
+        provider, _, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=True,
         )
         assert provider._session_initialized is True
@@ -593,33 +593,34 @@ class TestToolsModeInitBehavior:
 
     def test_tools_eager_prefetch_still_empty(self):
         """tools mode with eager init still returns empty from prefetch() (no auto-injection)."""
-        provider, _ = self._make_provider_with_config(
+        provider, _, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=True,
         )
         assert provider.prefetch("test query") == ""
 
     def test_tools_lazy_prefetch_empty(self):
         """tools mode with lazy init also returns empty from prefetch()."""
-        provider, _ = self._make_provider_with_config(
+        provider, _, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=False,
         )
         assert provider.prefetch("test query") == ""
 
     def test_explicit_peer_name_not_overridden_by_user_id(self):
         """Explicit peerName in config must not be replaced by gateway user_id."""
-        _, cfg = self._make_provider_with_config(
+        _, cfg, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=True,
             peer_name="Kathie", user_id="8439114563",
         )
         assert cfg.peer_name == "Kathie"
 
     def test_user_id_used_when_no_peer_name(self):
-        """Gateway user_id is used as peer_name when no explicit peerName configured."""
-        _, cfg = self._make_provider_with_config(
+        """Gateway user_id is passed separately from config peer_name."""
+        _, cfg, mock_manager_cls = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=True,
             peer_name=None, user_id="8439114563",
         )
-        assert cfg.peer_name == "8439114563"
+        assert cfg.peer_name is None
+        assert mock_manager_cls.call_args.kwargs["runtime_user_peer_name"] == "8439114563"
 
 
 class TestPerSessionMigrateGuard:

From 21d5ef2f1742b4a8bd5fb69c07eda79cefdc57ab Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Sat, 18 Apr 2026 13:49:50 -0400
Subject: [PATCH 103/143] feat(honcho): wizard cadence default 2, surface
 reasoning level, backwards-compat fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Setup wizard now always writes dialecticCadence=2 on new configs and
surfaces the reasoning level as an explicit step with all five options
(minimal / low / medium / high / max), always writing
dialecticReasoningLevel.

Code keeps a backwards-compat fallback of 1 when dialecticCadence is
unset so existing honcho.json configs that predate the setting keep
firing every turn on upgrade. New setups via the wizard get 2
explicitly; docs show 2 as the default.

Also scrubs editorial lines from code and docs ("max is reserved for
explicit tool-path selection", "Unset → every turn; wizard pre-fills 2",
and similar process-exposing phrasing) and adds an inline link to
app.honcho.dev where the server-side observation sync is mentioned in
honcho.md. Recommended cadence range updated to 1-5 across docs and
wizard copy.
---
 .../autonomous-ai-agents/honcho/SKILL.md      |  4 ++--
 plugins/memory/honcho/__init__.py             | 10 +++++----
 plugins/memory/honcho/cli.py                  | 22 ++++++++++++++++++-
 plugins/memory/honcho/client.py               |  3 +--
 tests/honcho_plugin/test_session.py           |  9 ++++----
 website/docs/user-guide/features/honcho.md    |  8 +++----
 .../user-guide/features/memory-providers.md   |  2 +-
 7 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
index e79875aa073..1c099ca605f 100644
--- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md
+++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
@@ -145,7 +145,7 @@ Controls **how often** dialectic and context calls happen.
 | Key | Default | Description |
 |-----|---------|-------------|
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `1` (wizard: `2`) | Min turns between dialectic API calls. Unset → every turn; wizard pre-fills `2` |
+| `dialecticCadence` | `2` | Min turns between dialectic API calls. Recommended 1–5 |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection |
 
 Higher cadence values fire the dialectic LLM less often. `dialecticCadence: 2` means the engine fires every other turn. Setting it to `1` fires every turn.
@@ -370,7 +370,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso
 | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` |
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `1` (wizard: `2`) | Min turns between dialectic LLM calls |
+| `dialecticCadence` | `2` | Min turns between dialectic LLM calls (recommended 1–5) |
 
 The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions.
 
diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index d104deb5d52..6ca32c1dcbb 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -207,7 +207,7 @@ class HonchoMemoryProvider(MemoryProvider):
         self._turn_count = 0
         self._injection_frequency = "every-turn"  # or "first-turn"
         self._context_cadence = 1   # minimum turns between context API calls
-        self._dialectic_cadence = 1  # minimum turns between dialectic API calls
+        self._dialectic_cadence = 1  # backwards-compat fallback; wizard writes 2 on new configs
         self._dialectic_depth = 1   # how many .chat() calls per dialectic cycle (1-3)
         self._dialectic_depth_levels: list[str] | None = None  # per-pass reasoning levels
         self._reasoning_heuristic: bool = True  # scale base level by query length
@@ -304,6 +304,10 @@ class HonchoMemoryProvider(MemoryProvider):
                 raw = cfg.raw or {}
                 self._injection_frequency = raw.get("injectionFrequency", "every-turn")
                 self._context_cadence = int(raw.get("contextCadence", 1))
+                # Backwards-compat: unset dialecticCadence falls back to 1
+                # (every turn) so existing honcho.json configs without the key
+                # behave as they did before. New setups via `hermes honcho setup`
+                # get dialecticCadence=2 written explicitly by the wizard.
                 self._dialectic_cadence = int(raw.get("dialecticCadence", 1))
                 self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3))
                 self._dialectic_depth_levels = cfg.dialectic_depth_levels
@@ -844,9 +848,7 @@ class HonchoMemoryProvider(MemoryProvider):
     def _apply_reasoning_heuristic(self, base: str, query: str) -> str:
         """Scale `base` up by query length, clamped at reasoning_level_cap.
 
-        Char-count heuristic: +1 at >=120 chars, +2 at >=400. Ceiling is
-        reasoning_level_cap (default 'high' — 'max' is reserved for
-        explicit tool-path selection).
+        Char-count heuristic: +1 at >=120 chars, +2 at >=400.
         """
         if not self._reasoning_heuristic or not query:
             return base
diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py
index eb21c48eaa8..5c829a4c989 100644
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@@ -463,7 +463,8 @@ def cmd_setup(args) -> None:
     current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "2")
     print("\n  Dialectic cadence:")
     print("    How often Honcho rebuilds its user model (LLM call on Honcho backend).")
-    print("    1 = every turn, 2 = every other turn (wizard default), 3+ = sparse.")
+    print("    1 = every turn, 2 = every other turn, 3+ = sparser.")
+    print("    Recommended: 1-5.")
     new_dialectic = _prompt("Dialectic cadence", default=current_dialectic)
     try:
         val = int(new_dialectic)
@@ -472,6 +473,25 @@ def cmd_setup(args) -> None:
     except (ValueError, TypeError):
         hermes_host["dialecticCadence"] = 2
 
+    # --- 7c. Dialectic reasoning level ---
+    current_reasoning = (
+        hermes_host.get("dialecticReasoningLevel")
+        or cfg.get("dialecticReasoningLevel")
+        or "low"
+    )
+    print("\n  Dialectic reasoning level:")
+    print("    Depth Honcho uses when synthesizing user context on auto-injected calls.")
+    print("    minimal  -- quick factual lookups")
+    print("    low      -- straightforward questions (default)")
+    print("    medium   -- multi-aspect synthesis")
+    print("    high     -- complex behavioral patterns")
+    print("    max      -- thorough audit-level analysis")
+    new_reasoning = _prompt("Reasoning level", default=current_reasoning)
+    if new_reasoning in ("minimal", "low", "medium", "high", "max"):
+        hermes_host["dialecticReasoningLevel"] = new_reasoning
+    else:
+        hermes_host["dialecticReasoningLevel"] = "low"
+
     # --- 8. Session strategy ---
     current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session")
     print("\n  Session strategy:")
diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py
index 346c2b76e68..fef2e2d58f1 100644
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@@ -254,8 +254,7 @@ class HonchoClientConfig:
     # When true, the auto-injected dialectic scales reasoning level up on
     # longer queries. See HonchoMemoryProvider for thresholds.
     reasoning_heuristic: bool = True
-    # Ceiling for the heuristic-selected reasoning level. "max" is reserved
-    # for explicit tool-path selection.
+    # Ceiling for the heuristic-selected reasoning level.
     reasoning_level_cap: str = "high"
     # Honcho API limits — configurable for self-hosted instances
     # Max chars per message sent via add_messages() (Honcho cloud: 25000)
diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py
index f2a66029292..25426118312 100644
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@@ -865,8 +865,10 @@ class TestDialecticCadenceDefaults:
         _settle_prewarm(provider)
         return provider
 
-    def test_default_is_1(self):
-        """Default dialectic_cadence is 1 — fires every turn unless overridden."""
+    def test_unset_falls_back_to_1(self):
+        """Unset dialecticCadence falls back to 1 (every turn) for backwards
+        compatibility with existing configs that predate the setting. The
+        setup wizard writes 2 explicitly on new configs."""
         provider = self._make_provider()
         assert provider._dialectic_cadence == 1
 
@@ -1569,8 +1571,7 @@ class TestDialecticLifecycleSmoke:
 
 class TestReasoningHeuristic:
     """Char-count heuristic that scales the auto-injected reasoning level by
-    query length, clamped at reasoning_level_cap. 'max' is reserved for
-    explicit tool-path selection."""
+    query length, clamped at reasoning_level_cap."""
 
     @staticmethod
     def _make_provider(cfg_extra=None):
diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index bf4b5c6bc37..60e82b4b08f 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs:
 | Knob | Controls | Default |
 |------|----------|---------|
 | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` |
-| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `1` (code default) / `2` (setup wizard default) |
+| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `2` (recommended 1–5) |
 | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` |
 
 These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes.
@@ -100,7 +100,7 @@ On session init, Honcho fires a dialectic call in the background at the full con
 
 ### Query-Adaptive Reasoning Level
 
-The auto-injected dialectic scales `dialecticReasoningLevel` by query length: +1 level at ≥120 chars, +2 at ≥400, clamped at `reasoningLevelCap` (default `"high"`). Disable with `reasoningHeuristic: false` to pin every auto call to `dialecticReasoningLevel`. `"max"` is reserved for explicit tool-path selection via `honcho_reasoning`.
+The auto-injected dialectic scales `dialecticReasoningLevel` by query length: +1 level at ≥120 chars, +2 at ≥400, clamped at `reasoningLevelCap` (default `"high"`). Disable with `reasoningHeuristic: false` to pin every auto call to `dialecticReasoningLevel`. Available levels: `minimal`, `low`, `medium`, `high`, `max`.
 
 ## Configuration Options
 
@@ -112,7 +112,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho
 |-----|---------|-------------|
 | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries |
 | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
-| `dialecticCadence` | `1` (wizard sets `2`) | Minimum turns between `peer.chat()` LLM calls (dialectic layer). Code default fires every turn when the key is unset; the setup wizard pre-fills `2`. In `tools` mode, irrelevant — model calls explicitly |
+| `dialecticCadence` | `2` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). Recommended 1–5. In `tools` mode, irrelevant — model calls explicitly |
 | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 |
 | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
 | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
@@ -183,7 +183,7 @@ Common patterns:
 | AI shouldn't re-model the user from its own replies | `"ai": {"observeMe": true, "observeOthers": false}` |
 | Strong persona the AI peer shouldn't update from self-observation | `"ai": {"observeMe": false, "observeOthers": true}` |
 
-Server-side toggles set via the Honcho dashboard win over local defaults — Hermes syncs them back at session init.
+Server-side toggles set via the [Honcho dashboard](https://app.honcho.dev) win over local defaults — Hermes syncs them back at session init.
 
 ## Tools
 
diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md
index b2469a13ee3..d11c36657a3 100644
--- a/website/docs/user-guide/features/memory-providers.md
+++ b/website/docs/user-guide/features/memory-providers.md
@@ -82,7 +82,7 @@ hermes memory setup        # select "honcho"
 | `workspace` | host key | Shared workspace ID |
 | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries |
 | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
-| `dialecticCadence` | `1` (wizard sets `2`) | Minimum turns between `peer.chat()` LLM calls. Unset → every turn; wizard pre-fills `2`. Only applies to `hybrid`/`context` modes |
+| `dialecticCadence` | `2` | Minimum turns between `peer.chat()` LLM calls. Recommended 1–5. Only applies to `hybrid`/`context` modes |
 | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation |
 | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
 | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |

From 0a8d48809f15157431f373e0add4f1a1be76af4b Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sun, 19 Apr 2026 11:01:26 +0530
Subject: [PATCH 104/143] chore: add LeonSGP43 numeric noreply email to
 AUTHOR_MAP

The cherry-picked commit from #11434 uses the 154585401+ prefixed
noreply format. Add it alongside the existing bare entry so the
contributor audit passes.
---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 90c2a13d0b5..b153140057b 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -215,6 +215,7 @@ AUTHOR_MAP = {
     "ziliangpeng@users.noreply.github.com": "ziliangpeng",
     "centripetal-star@users.noreply.github.com": "centripetal-star",
     "LeonSGP43@users.noreply.github.com": "LeonSGP43",
+    "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
     "Lubrsy706@users.noreply.github.com": "Lubrsy706",
     "niyant@spicefi.xyz": "spniyant",
     "olafthiele@gmail.com": "olafthiele",

From 7b1a11b97179222c3fc9a721d614eae2d5f4c9f3 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Sat, 18 Apr 2026 17:37:02 -0600
Subject: [PATCH 105/143] fix(memory): keep Honcho provider opt-in

---
 run_agent.py                                 | 25 -------------
 tests/run_agent/test_memory_provider_init.py | 39 ++++++++++++++++++++
 2 files changed, 39 insertions(+), 25 deletions(-)
 create mode 100644 tests/run_agent/test_memory_provider_init.py

diff --git a/run_agent.py b/run_agent.py
index c87bd351528..01064880982 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1306,31 +1306,6 @@ class AIAgent:
             try:
                 _mem_provider_name = mem_config.get("provider", "") if mem_config else ""
 
-                # Auto-migrate: if Honcho was actively configured (enabled +
-                # credentials) but memory.provider is not set, activate the
-                # honcho plugin automatically.  Just having the config file
-                # is not enough — the user may have disabled Honcho or the
-                # file may be from a different tool.
-                if not _mem_provider_name:
-                    try:
-                        from plugins.memory.honcho.client import HonchoClientConfig as _HCC
-                        _hcfg = _HCC.from_global_config()
-                        if _hcfg.enabled and (_hcfg.api_key or _hcfg.base_url):
-                            _mem_provider_name = "honcho"
-                            # Persist so this only auto-migrates once
-                            try:
-                                from hermes_cli.config import load_config as _lc, save_config as _sc
-                                _cfg = _lc()
-                                _cfg.setdefault("memory", {})["provider"] = "honcho"
-                                _sc(_cfg)
-                            except Exception:
-                                pass
-                            if not self.quiet_mode:
-                                print("  ✓ Auto-migrated Honcho to memory provider plugin.")
-                                print("    Your config and data are preserved.\n")
-                    except Exception:
-                        pass
-
                 if _mem_provider_name:
                     from agent.memory_manager import MemoryManager as _MemoryManager
                     from plugins.memory import load_memory_provider as _load_mem
diff --git a/tests/run_agent/test_memory_provider_init.py b/tests/run_agent/test_memory_provider_init.py
new file mode 100644
index 00000000000..89431db85d0
--- /dev/null
+++ b/tests/run_agent/test_memory_provider_init.py
@@ -0,0 +1,39 @@
+"""Regression tests for memory provider selection during AIAgent init."""
+
+from types import SimpleNamespace
+from unittest.mock import patch
+
+
+def test_blank_memory_provider_does_not_auto_enable_honcho():
+    """Blank memory.provider should remain opt-out even if Honcho fallback looks configured."""
+    cfg = {"memory": {"provider": ""}, "agent": {}}
+    honcho_cfg = SimpleNamespace(enabled=True, api_key="stale-key", base_url=None)
+
+    with (
+        patch("hermes_cli.config.load_config", return_value=cfg),
+        patch("hermes_cli.config.save_config") as save_config,
+        patch(
+            "plugins.memory.honcho.client.HonchoClientConfig.from_global_config",
+            return_value=honcho_cfg,
+        ) as from_global_config,
+        patch("plugins.memory.load_memory_provider") as load_memory_provider,
+        patch("agent.model_metadata.get_model_context_length", return_value=204_800),
+        patch("run_agent.get_tool_definitions", return_value=[]),
+        patch("run_agent.check_toolset_requirements", return_value={}),
+        patch("run_agent.OpenAI"),
+    ):
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            api_key="test-key-1234567890",
+            base_url="https://openrouter.ai/api/v1",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=False,
+        )
+
+    assert agent._memory_manager is None
+    from_global_config.assert_not_called()
+    load_memory_provider.assert_not_called()
+    save_config.assert_not_called()
+

From d66414a844b780467b33ea9c861cf07c098ab73b Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Sat, 18 Apr 2026 15:54:05 -0600
Subject: [PATCH 106/143] docs(custom-providers): use key_env in examples

---
 hermes_cli/config.py                                   | 4 ++--
 website/docs/integrations/providers.md                 | 8 ++++----
 website/docs/user-guide/features/fallback-providers.md | 8 ++++----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index d53899b135e..1dedc1710aa 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -2861,7 +2861,7 @@ _FALLBACK_COMMENT = """
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
 #   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
 #
-# For custom OpenAI-compatible endpoints, add base_url and api_key_env.
+# For custom OpenAI-compatible endpoints, add base_url and key_env.
 #
 # fallback_model:
 #   provider: openrouter
@@ -2905,7 +2905,7 @@ _COMMENTED_SECTIONS = """
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
 #   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
 #
-# For custom OpenAI-compatible endpoints, add base_url and api_key_env.
+# For custom OpenAI-compatible endpoints, add base_url and key_env.
 #
 # fallback_model:
 #   provider: openrouter
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index 4f536ec7496..9d32fc21ecb 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -1052,11 +1052,11 @@ custom_providers:
     # api_key omitted — Hermes uses "no-key-required" for keyless local servers
   - name: work
     base_url: https://gpu-server.internal.corp/v1
-    api_key: corp-api-key
+    key_env: CORP_API_KEY
     api_mode: chat_completions   # optional, auto-detected from URL
   - name: anthropic-proxy
     base_url: https://proxy.example.com/anthropic
-    api_key: proxy-key
+    key_env: ANTHROPIC_PROXY_KEY
     api_mode: anthropic_messages  # for Anthropic-compatible proxies
 ```
 
@@ -1154,7 +1154,7 @@ fallback_model:
   provider: openrouter                    # required
   model: anthropic/claude-sonnet-4        # required
   # base_url: http://localhost:8000/v1    # optional, for custom endpoints
-  # api_key_env: MY_CUSTOM_KEY           # optional, env var name for custom endpoint API key
+  # key_env: MY_CUSTOM_KEY               # optional, env var name for custom endpoint API key
 ```
 
 When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session.
@@ -1178,7 +1178,7 @@ smart_model_routing:
     provider: openrouter
     model: google/gemini-2.5-flash
     # base_url: http://localhost:8000/v1  # optional custom endpoint
-    # api_key_env: MY_CUSTOM_KEY          # optional env var name for that endpoint's API key
+    # key_env: MY_CUSTOM_KEY              # optional env var name for that endpoint's API key
 ```
 
 How it works:
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index 2e9bcad99b0..01e5524f6a4 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -61,18 +61,18 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
 | Arcee AI | `arcee` | `ARCEEAI_API_KEY` |
 | Alibaba / DashScope | `alibaba` | `DASHSCOPE_API_KEY` |
 | Hugging Face | `huggingface` | `HF_TOKEN` |
-| Custom endpoint | `custom` | `base_url` + `api_key_env` (see below) |
+| Custom endpoint | `custom` | `base_url` + `key_env` (see below) |
 
 ### Custom Endpoint Fallback
 
-For a custom OpenAI-compatible endpoint, add `base_url` and optionally `api_key_env`:
+For a custom OpenAI-compatible endpoint, add `base_url` and optionally `key_env`:
 
 ```yaml
 fallback_model:
   provider: custom
   model: my-local-model
   base_url: http://localhost:8000/v1
-  api_key_env: MY_LOCAL_KEY          # env var name containing the API key
+  key_env: MY_LOCAL_KEY              # env var name containing the API key
 ```
 
 ### When Fallback Triggers
@@ -128,7 +128,7 @@ fallback_model:
   provider: custom
   model: llama-3.1-70b
   base_url: http://localhost:8000/v1
-  api_key_env: LOCAL_API_KEY
+  key_env: LOCAL_API_KEY
 ```
 
 **Codex OAuth as fallback:**

From ce410521b3d21d71f28e0dd041df872ffbd8344f Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 19 Apr 2026 00:03:10 -0700
Subject: [PATCH 107/143] feat(browser): add browser_cdp raw DevTools Protocol
 passthrough (#12369)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Agents can now send arbitrary CDP commands to the browser. The tool is
gated on a reachable CDP endpoint at session start — it only appears in
the toolset when BROWSER_CDP_URL is set (from '/browser connect') or
'browser.cdp_url' is configured in config.yaml. Backends that don't
currently expose CDP to the Python side (Camofox, default local
agent-browser, cloud providers whose per-session cdp_url is not yet
surfaced) do not see the tool at all.

Tool schema description links to the CDP method reference at
https://chromedevtools.github.io/devtools-protocol/ so the agent can
web_extract specific method docs on demand.

Stateless per call. Browser-level methods (Target.*, Browser.*,
Storage.*) omit target_id. Page-level methods attach to the target
with flatten=true and dispatch the method on the returned sessionId.
Clean errors when the endpoint becomes unreachable mid-session or
the URL isn't a WebSocket.

Tests: 19 unit (mock CDP server + gate checks) + E2E against real
headless Chrome (Target.getTargets, Browser.getVersion,
Runtime.evaluate with target_id, Page.navigate + re-eval, bogus
method, bogus target_id, missing endpoint) + E2E of the check_fn
gate (tool hidden without CDP URL, visible with it, hidden again
after unset).
---
 tests/tools/test_browser_cdp_tool.py         | 408 ++++++++++++++++++
 tools/browser_cdp_tool.py                    | 416 +++++++++++++++++++
 toolsets.py                                  |   8 +-
 website/docs/reference/tools-reference.md    |   5 +-
 website/docs/reference/toolsets-reference.md |   2 +-
 website/docs/user-guide/features/browser.md  |  30 ++
 6 files changed, 862 insertions(+), 7 deletions(-)
 create mode 100644 tests/tools/test_browser_cdp_tool.py
 create mode 100644 tools/browser_cdp_tool.py

diff --git a/tests/tools/test_browser_cdp_tool.py b/tests/tools/test_browser_cdp_tool.py
new file mode 100644
index 00000000000..e7e187ceb0b
--- /dev/null
+++ b/tests/tools/test_browser_cdp_tool.py
@@ -0,0 +1,408 @@
+"""Unit tests for browser_cdp tool.
+
+Uses a tiny in-process ``websockets`` server to simulate a CDP endpoint —
+gives real protocol coverage (connect, send, recv, close) without needing
+a real Chrome instance.
+"""
+from __future__ import annotations
+
+import asyncio
+import json
+import threading
+import time
+from typing import Any, Dict, List
+
+import pytest
+
+import websockets
+from websockets.asyncio.server import serve
+
+from tools import browser_cdp_tool
+
+
+# ---------------------------------------------------------------------------
+# In-process CDP mock server
+# ---------------------------------------------------------------------------
+
+
+class _CDPServer:
+    """A tiny CDP-over-WebSocket mock.
+
+    Each client gets a greeting-free stream.  The server replies to each
+    inbound request whose ``id`` is set, using the registered handler for
+    that method.  If no handler is registered, returns a generic CDP error.
+    """
+
+    def __init__(self) -> None:
+        self._handlers: Dict[str, Any] = {}
+        self._responses: List[Dict[str, Any]] = []
+        self._loop: asyncio.AbstractEventLoop | None = None
+        self._server: Any = None
+        self._thread: threading.Thread | None = None
+        self._host = "127.0.0.1"
+        self._port = 0
+
+    # --- handler registration --------------------------------------------
+
+    def on(self, method: str, handler):
+        """Register a handler ``handler(params, session_id) -> dict or Exception``."""
+        self._handlers[method] = handler
+
+    # --- lifecycle -------------------------------------------------------
+
+    def start(self) -> str:
+        ready = threading.Event()
+
+        def _run() -> None:
+            self._loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(self._loop)
+
+            async def _handler(ws):
+                try:
+                    async for raw in ws:
+                        msg = json.loads(raw)
+                        call_id = msg.get("id")
+                        method = msg.get("method", "")
+                        params = msg.get("params", {}) or {}
+                        session_id = msg.get("sessionId")
+                        self._responses.append(msg)
+
+                        fn = self._handlers.get(method)
+                        if fn is None:
+                            reply = {
+                                "id": call_id,
+                                "error": {
+                                    "code": -32601,
+                                    "message": f"No handler for {method}",
+                                },
+                            }
+                        else:
+                            try:
+                                result = fn(params, session_id)
+                                if isinstance(result, Exception):
+                                    raise result
+                                reply = {"id": call_id, "result": result}
+                            except Exception as exc:
+                                reply = {
+                                    "id": call_id,
+                                    "error": {"code": -1, "message": str(exc)},
+                                }
+                        if session_id:
+                            reply["sessionId"] = session_id
+                        await ws.send(json.dumps(reply))
+                except websockets.exceptions.ConnectionClosed:
+                    pass
+
+            async def _serve() -> None:
+                self._server = await serve(_handler, self._host, 0)
+                sock = next(iter(self._server.sockets))
+                self._port = sock.getsockname()[1]
+                ready.set()
+                await self._server.wait_closed()
+
+            try:
+                self._loop.run_until_complete(_serve())
+            finally:
+                self._loop.close()
+
+        self._thread = threading.Thread(target=_run, daemon=True)
+        self._thread.start()
+        if not ready.wait(timeout=5.0):
+            raise RuntimeError("CDP mock server failed to start within 5s")
+        return f"ws://{self._host}:{self._port}/devtools/browser/mock"
+
+    def stop(self) -> None:
+        if self._loop and self._server:
+            def _close() -> None:
+                self._server.close()
+
+            self._loop.call_soon_threadsafe(_close)
+        if self._thread:
+            self._thread.join(timeout=3.0)
+
+    def received(self) -> List[Dict[str, Any]]:
+        return list(self._responses)
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def cdp_server(monkeypatch):
+    """Start a CDP mock and route tool resolution to it."""
+    server = _CDPServer()
+    ws_url = server.start()
+    monkeypatch.setattr(
+        browser_cdp_tool, "_resolve_cdp_endpoint", lambda: ws_url
+    )
+    try:
+        yield server
+    finally:
+        server.stop()
+
+
+# ---------------------------------------------------------------------------
+# Input validation
+# ---------------------------------------------------------------------------
+
+
+def test_missing_method_returns_error():
+    result = json.loads(browser_cdp_tool.browser_cdp(method=""))
+    assert "error" in result
+    assert "method" in result["error"].lower()
+    assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL
+
+
+def test_non_string_method_returns_error():
+    result = json.loads(browser_cdp_tool.browser_cdp(method=123))  # type: ignore[arg-type]
+    assert "error" in result
+    assert "method" in result["error"].lower()
+
+
+def test_non_dict_params_returns_error(monkeypatch):
+    monkeypatch.setattr(
+        browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "ws://localhost:9999"
+    )
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(method="Target.getTargets", params="not-a-dict")  # type: ignore[arg-type]
+    )
+    assert "error" in result
+    assert "object" in result["error"].lower() or "dict" in result["error"].lower()
+
+
+# ---------------------------------------------------------------------------
+# Endpoint resolution
+# ---------------------------------------------------------------------------
+
+
+def test_no_endpoint_returns_helpful_error(monkeypatch):
+    monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "")
+    result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
+    assert "error" in result
+    assert "/browser connect" in result["error"]
+    assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL
+
+
+def test_non_ws_endpoint_returns_error(monkeypatch):
+    monkeypatch.setattr(
+        browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "http://localhost:9222"
+    )
+    result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
+    assert "error" in result
+    assert "WebSocket" in result["error"]
+
+
+def test_websockets_missing_returns_error(monkeypatch):
+    monkeypatch.setattr(browser_cdp_tool, "_WS_AVAILABLE", False)
+    result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
+    assert "error" in result
+    assert "websockets" in result["error"].lower()
+
+
+# ---------------------------------------------------------------------------
+# Happy-path: browser-level call
+# ---------------------------------------------------------------------------
+
+
+def test_browser_level_success(cdp_server):
+    cdp_server.on(
+        "Target.getTargets",
+        lambda params, sid: {
+            "targetInfos": [
+                {"targetId": "A", "type": "page", "title": "Tab 1", "url": "about:blank"},
+                {"targetId": "B", "type": "page", "title": "Tab 2", "url": "https://a.test"},
+            ]
+        },
+    )
+    result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
+    assert result["success"] is True
+    assert result["method"] == "Target.getTargets"
+    assert "target_id" not in result
+    assert len(result["result"]["targetInfos"]) == 2
+    # Verify the server actually received exactly one call (no extra traffic)
+    calls = cdp_server.received()
+    assert len(calls) == 1
+    assert calls[0]["method"] == "Target.getTargets"
+    assert "sessionId" not in calls[0]
+
+
+def test_empty_params_sends_empty_object(cdp_server):
+    cdp_server.on("Browser.getVersion", lambda params, sid: {"product": "Mock/1.0"})
+    json.loads(browser_cdp_tool.browser_cdp(method="Browser.getVersion"))
+    assert cdp_server.received()[0]["params"] == {}
+
+
+# ---------------------------------------------------------------------------
+# Happy-path: target-attached call
+# ---------------------------------------------------------------------------
+
+
+def test_target_attach_then_call(cdp_server):
+    cdp_server.on(
+        "Target.attachToTarget",
+        lambda params, sid: {"sessionId": f"sess-{params['targetId']}"},
+    )
+    cdp_server.on(
+        "Runtime.evaluate",
+        lambda params, sid: {
+            "result": {"type": "string", "value": f"evaluated[{sid}]"},
+        },
+    )
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(
+            method="Runtime.evaluate",
+            params={"expression": "document.title", "returnByValue": True},
+            target_id="tab-A",
+        )
+    )
+    assert result["success"] is True
+    assert result["target_id"] == "tab-A"
+    assert result["result"]["result"]["value"] == "evaluated[sess-tab-A]"
+
+    calls = cdp_server.received()
+    # First call: attach
+    assert calls[0]["method"] == "Target.attachToTarget"
+    assert calls[0]["params"] == {"targetId": "tab-A", "flatten": True}
+    # Second call: dispatched method on the session
+    assert calls[1]["method"] == "Runtime.evaluate"
+    assert calls[1]["sessionId"] == "sess-tab-A"
+
+
+# ---------------------------------------------------------------------------
+# CDP error responses
+# ---------------------------------------------------------------------------
+
+
+def test_cdp_method_error_returns_tool_error(cdp_server):
+    # No handler registered -> server returns CDP error
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(method="NonExistent.method")
+    )
+    assert "error" in result
+    assert "CDP error" in result["error"]
+    assert result.get("method") == "NonExistent.method"
+
+
+def test_attach_failure_returns_tool_error(cdp_server):
+    # Target.attachToTarget has no handler -> server errors on attach
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(
+            method="Runtime.evaluate",
+            params={"expression": "1+1"},
+            target_id="missing",
+        )
+    )
+    assert "error" in result
+    assert "Target.attachToTarget" in result["error"]
+
+
+# ---------------------------------------------------------------------------
+# Timeouts
+# ---------------------------------------------------------------------------
+
+
+def test_timeout_when_server_never_replies(cdp_server):
+    # Register a handler that blocks forever
+    def slow(params, sid):
+        time.sleep(10)
+        return {}
+
+    cdp_server.on("Page.slowMethod", slow)
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(
+            method="Page.slowMethod", timeout=0.5
+        )
+    )
+    assert "error" in result
+    assert "tim" in result["error"].lower()
+
+
+# ---------------------------------------------------------------------------
+# Timeout clamping
+# ---------------------------------------------------------------------------
+
+
+def test_timeout_clamped_above_max(cdp_server):
+    cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"})
+    # timeout=10_000 should be clamped to 300 but still succeed
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout=10_000)
+    )
+    assert result["success"] is True
+
+
+def test_invalid_timeout_falls_back_to_default(cdp_server):
+    cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"})
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout="nope")  # type: ignore[arg-type]
+    )
+    assert result["success"] is True
+
+
+# ---------------------------------------------------------------------------
+# Registry integration
+# ---------------------------------------------------------------------------
+
+
+def test_registered_in_browser_toolset():
+    from tools.registry import registry
+
+    entry = registry.get_entry("browser_cdp")
+    assert entry is not None
+    assert entry.toolset == "browser"
+    assert entry.schema["name"] == "browser_cdp"
+    assert entry.schema["parameters"]["required"] == ["method"]
+    assert "Chrome DevTools Protocol" in entry.schema["description"]
+    assert browser_cdp_tool.CDP_DOCS_URL in entry.schema["description"]
+
+
+def test_dispatch_through_registry(cdp_server):
+    from tools.registry import registry
+
+    cdp_server.on("Target.getTargets", lambda p, s: {"targetInfos": []})
+    raw = registry.dispatch(
+        "browser_cdp", {"method": "Target.getTargets"}, task_id="t1"
+    )
+    result = json.loads(raw)
+    assert result["success"] is True
+    assert result["method"] == "Target.getTargets"
+
+
+# ---------------------------------------------------------------------------
+# check_fn gating
+# ---------------------------------------------------------------------------
+
+
+def test_check_fn_false_when_no_cdp_url(monkeypatch):
+    """Gate closes when no CDP URL is set — even if the browser toolset is
+    otherwise configured."""
+    import tools.browser_tool as bt
+
+    monkeypatch.setattr(bt, "check_browser_requirements", lambda: True)
+    monkeypatch.setattr(bt, "_get_cdp_override", lambda: "")
+    assert browser_cdp_tool._browser_cdp_check() is False
+
+
+def test_check_fn_true_when_cdp_url_set(monkeypatch):
+    """Gate opens as soon as a CDP URL is resolvable."""
+    import tools.browser_tool as bt
+
+    monkeypatch.setattr(bt, "check_browser_requirements", lambda: True)
+    monkeypatch.setattr(
+        bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x"
+    )
+    assert browser_cdp_tool._browser_cdp_check() is True
+
+
+def test_check_fn_false_when_browser_requirements_fail(monkeypatch):
+    """Even with a CDP URL, gate closes if the overall browser toolset is
+    unavailable (e.g. agent-browser not installed)."""
+    import tools.browser_tool as bt
+
+    monkeypatch.setattr(bt, "check_browser_requirements", lambda: False)
+    monkeypatch.setattr(
+        bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x"
+    )
+    assert browser_cdp_tool._browser_cdp_check() is False
diff --git a/tools/browser_cdp_tool.py b/tools/browser_cdp_tool.py
new file mode 100644
index 00000000000..7817b9c35a5
--- /dev/null
+++ b/tools/browser_cdp_tool.py
@@ -0,0 +1,416 @@
+#!/usr/bin/env python3
+"""
+Raw Chrome DevTools Protocol (CDP) passthrough tool.
+
+Exposes a single tool, ``browser_cdp``, that sends arbitrary CDP commands to
+the browser's DevTools WebSocket endpoint.  Works when a CDP URL is
+configured — either via ``/browser connect`` (sets ``BROWSER_CDP_URL``) or
+``browser.cdp_url`` in ``config.yaml`` — or when a CDP-backed cloud provider
+session is active.
+
+This is the escape hatch for browser operations not covered by the main
+browser tool surface (``browser_navigate``, ``browser_click``,
+``browser_console``, etc.) — handling native dialogs, iframe-scoped
+evaluation, cookie/network control, low-level tab management, etc.
+
+Method reference: https://chromedevtools.github.io/devtools-protocol/
+"""
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+from typing import Any, Dict, Optional
+
+from tools.registry import registry, tool_error
+
+logger = logging.getLogger(__name__)
+
+CDP_DOCS_URL = "https://chromedevtools.github.io/devtools-protocol/"
+
+# ``websockets`` is a transitive dependency of hermes-agent (via fal_client
+# and firecrawl-py) and is already imported by gateway/platforms/feishu.py.
+# Wrap the import so a clean error surfaces if the package is ever absent.
+try:
+    import websockets
+    from websockets.exceptions import WebSocketException
+
+    _WS_AVAILABLE = True
+except ImportError:
+    websockets = None  # type: ignore[assignment]
+    WebSocketException = Exception  # type: ignore[assignment,misc]
+    _WS_AVAILABLE = False
+
+
+# ---------------------------------------------------------------------------
+# Async-from-sync bridge (matches the pattern in homeassistant_tool.py)
+# ---------------------------------------------------------------------------
+
+
+def _run_async(coro):
+    """Run an async coroutine from a sync handler, safe inside or outside a loop."""
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = None
+
+    if loop and loop.is_running():
+        import concurrent.futures
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            future = pool.submit(asyncio.run, coro)
+            return future.result()
+    return asyncio.run(coro)
+
+
+# ---------------------------------------------------------------------------
+# Endpoint resolution
+# ---------------------------------------------------------------------------
+
+
+def _resolve_cdp_endpoint() -> str:
+    """Return the normalized CDP WebSocket URL, or empty string if unavailable.
+
+    Delegates to ``tools.browser_tool._get_cdp_override`` so precedence stays
+    consistent with the rest of the browser tool surface:
+
+    1. ``BROWSER_CDP_URL`` env var (live override from ``/browser connect``)
+    2. ``browser.cdp_url`` in ``config.yaml``
+    """
+    try:
+        from tools.browser_tool import _get_cdp_override  # type: ignore[import-not-found]
+
+        return (_get_cdp_override() or "").strip()
+    except Exception as exc:  # pragma: no cover — defensive
+        logger.debug("browser_cdp: failed to resolve CDP endpoint: %s", exc)
+        return ""
+
+
+# ---------------------------------------------------------------------------
+# Core CDP call
+# ---------------------------------------------------------------------------
+
+
+async def _cdp_call(
+    ws_url: str,
+    method: str,
+    params: Dict[str, Any],
+    target_id: Optional[str],
+    timeout: float,
+) -> Dict[str, Any]:
+    """Make a single CDP call, optionally attaching to a target first.
+
+    When ``target_id`` is provided, we call ``Target.attachToTarget`` with
+    ``flatten=True`` to multiplex a page-level session over the same
+    browser-level WebSocket, then send ``method`` with that ``sessionId``.
+    When ``target_id`` is None, ``method`` is sent at browser level — which
+    works for ``Target.*``, ``Browser.*``, ``Storage.*`` and a few other
+    globally-scoped domains.
+    """
+    assert websockets is not None  # guarded by _WS_AVAILABLE at call-site
+
+    async with websockets.connect(
+        ws_url,
+        max_size=None,  # CDP responses (e.g. DOM.getDocument) can be large
+        open_timeout=timeout,
+        close_timeout=5,
+        ping_interval=None,  # CDP server doesn't expect pings
+    ) as ws:
+        next_id = 1
+        session_id: Optional[str] = None
+
+        # --- Step 1: attach to target if requested ---
+        if target_id:
+            attach_id = next_id
+            next_id += 1
+            await ws.send(
+                json.dumps(
+                    {
+                        "id": attach_id,
+                        "method": "Target.attachToTarget",
+                        "params": {"targetId": target_id, "flatten": True},
+                    }
+                )
+            )
+            deadline = asyncio.get_event_loop().time() + timeout
+            while True:
+                remaining = deadline - asyncio.get_event_loop().time()
+                if remaining <= 0:
+                    raise TimeoutError(
+                        f"Timed out attaching to target {target_id}"
+                    )
+                raw = await asyncio.wait_for(ws.recv(), timeout=remaining)
+                msg = json.loads(raw)
+                if msg.get("id") == attach_id:
+                    if "error" in msg:
+                        raise RuntimeError(
+                            f"Target.attachToTarget failed: {msg['error']}"
+                        )
+                    session_id = msg.get("result", {}).get("sessionId")
+                    if not session_id:
+                        raise RuntimeError(
+                            "Target.attachToTarget did not return a sessionId"
+                        )
+                    break
+                # Ignore events (messages without "id") while waiting
+
+        # --- Step 2: dispatch the real method ---
+        call_id = next_id
+        next_id += 1
+        req: Dict[str, Any] = {
+            "id": call_id,
+            "method": method,
+            "params": params or {},
+        }
+        if session_id:
+            req["sessionId"] = session_id
+        await ws.send(json.dumps(req))
+
+        deadline = asyncio.get_event_loop().time() + timeout
+        while True:
+            remaining = deadline - asyncio.get_event_loop().time()
+            if remaining <= 0:
+                raise TimeoutError(
+                    f"Timed out waiting for response to {method}"
+                )
+            raw = await asyncio.wait_for(ws.recv(), timeout=remaining)
+            msg = json.loads(raw)
+            if msg.get("id") == call_id:
+                if "error" in msg:
+                    raise RuntimeError(f"CDP error: {msg['error']}")
+                return msg.get("result", {})
+            # Ignore events / out-of-order responses
+
+
+# ---------------------------------------------------------------------------
+# Public tool function
+# ---------------------------------------------------------------------------
+
+
+def browser_cdp(
+    method: str,
+    params: Optional[Dict[str, Any]] = None,
+    target_id: Optional[str] = None,
+    timeout: float = 30.0,
+    task_id: Optional[str] = None,
+) -> str:
+    """Send a raw CDP command.  See ``CDP_DOCS_URL`` for method documentation.
+
+    Args:
+        method: CDP method name, e.g. ``"Target.getTargets"``.
+        params: Method-specific parameters; defaults to ``{}``.
+        target_id: Optional target/tab ID for page-level methods.  When set,
+            we first attach to the target (``flatten=True``) and send
+            ``method`` with the resulting ``sessionId``.
+        timeout: Seconds to wait for the call to complete.
+        task_id: Unused (tool is stateless) — accepted for uniformity with
+            other browser tools.
+
+    Returns:
+        JSON string ``{"success": True, "method": ..., "result": {...}}`` on
+        success, or ``{"error": "..."}`` on failure.
+    """
+    del task_id  # unused — stateless
+
+    if not method or not isinstance(method, str):
+        return tool_error(
+            "'method' is required (e.g. 'Target.getTargets')",
+            cdp_docs=CDP_DOCS_URL,
+        )
+
+    if not _WS_AVAILABLE:
+        return tool_error(
+            "The 'websockets' Python package is required but not installed. "
+            "Install it with: pip install websockets"
+        )
+
+    endpoint = _resolve_cdp_endpoint()
+    if not endpoint:
+        return tool_error(
+            "No CDP endpoint is available. Run '/browser connect' to attach "
+            "to a running Chrome, or set 'browser.cdp_url' in config.yaml. "
+            "The Camofox backend is REST-only and does not expose CDP.",
+            cdp_docs=CDP_DOCS_URL,
+        )
+
+    if not endpoint.startswith(("ws://", "wss://")):
+        return tool_error(
+            f"CDP endpoint is not a WebSocket URL: {endpoint!r}. "
+            "Expected ws://... or wss://... — the /browser connect "
+            "resolver should have rewritten this. Check that Chrome is "
+            "actually listening on the debug port."
+        )
+
+    call_params: Dict[str, Any] = params or {}
+    if not isinstance(call_params, dict):
+        return tool_error(
+            f"'params' must be an object/dict, got {type(call_params).__name__}"
+        )
+
+    try:
+        safe_timeout = float(timeout) if timeout else 30.0
+    except (TypeError, ValueError):
+        safe_timeout = 30.0
+    safe_timeout = max(1.0, min(safe_timeout, 300.0))
+
+    try:
+        result = _run_async(
+            _cdp_call(endpoint, method, call_params, target_id, safe_timeout)
+        )
+    except asyncio.TimeoutError as exc:
+        return tool_error(
+            f"CDP call timed out after {safe_timeout}s: {exc}",
+            method=method,
+        )
+    except TimeoutError as exc:
+        return tool_error(str(exc), method=method)
+    except RuntimeError as exc:
+        return tool_error(str(exc), method=method)
+    except WebSocketException as exc:
+        return tool_error(
+            f"WebSocket error talking to CDP at {endpoint}: {exc}. The "
+            "browser may have disconnected — try '/browser connect' again.",
+            method=method,
+        )
+    except Exception as exc:  # pragma: no cover — unexpected
+        logger.exception("browser_cdp unexpected error")
+        return tool_error(
+            f"Unexpected error: {type(exc).__name__}: {exc}",
+            method=method,
+        )
+
+    payload: Dict[str, Any] = {
+        "success": True,
+        "method": method,
+        "result": result,
+    }
+    if target_id:
+        payload["target_id"] = target_id
+    return json.dumps(payload, ensure_ascii=False)
+
+
+# ---------------------------------------------------------------------------
+# Registry
+# ---------------------------------------------------------------------------
+
+
+BROWSER_CDP_SCHEMA: Dict[str, Any] = {
+    "name": "browser_cdp",
+    "description": (
+        "Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for "
+        "browser operations not covered by browser_navigate, browser_click, "
+        "browser_console, etc.\n\n"
+        "**Requires a reachable CDP endpoint.** Available when the user has "
+        "run '/browser connect' to attach to a running Chrome, or when "
+        "'browser.cdp_url' is set in config.yaml. Not currently wired up for "
+        "cloud backends (Browserbase, Browser Use, Firecrawl) — those expose "
+        "CDP per session but live-session routing is a follow-up. Camofox is "
+        "REST-only and will never support CDP. If the tool is in your toolset "
+        "at all, a CDP endpoint is already reachable.\n\n"
+        f"**CDP method reference:** {CDP_DOCS_URL} — use web_extract on a "
+        "method's URL (e.g. '/tot/Page/#method-handleJavaScriptDialog') "
+        "to look up parameters and return shape.\n\n"
+        "**Common patterns:**\n"
+        "- List tabs: method='Target.getTargets', params={}\n"
+        "- Handle a native JS dialog: method='Page.handleJavaScriptDialog', "
+        "params={'accept': true, 'promptText': ''}, target_id=<tabId>\n"
+        "- Get all cookies: method='Network.getAllCookies', params={}\n"
+        "- Eval in a specific tab: method='Runtime.evaluate', "
+        "params={'expression': '...', 'returnByValue': true}, "
+        "target_id=<tabId>\n"
+        "- Set viewport for a tab: method='Emulation.setDeviceMetricsOverride', "
+        "params={'width': 1280, 'height': 720, 'deviceScaleFactor': 1, "
+        "'mobile': false}, target_id=<tabId>\n\n"
+        "**Usage rules:**\n"
+        "- Browser-level methods (Target.*, Browser.*, Storage.*): omit "
+        "target_id.\n"
+        "- Page-level methods (Page.*, Runtime.*, DOM.*, Emulation.*, "
+        "Network.* scoped to a tab): pass target_id from Target.getTargets.\n"
+        "- Each call is independent — sessions and event subscriptions do "
+        "not persist between calls. For stateful workflows, prefer the "
+        "dedicated browser tools."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "method": {
+                "type": "string",
+                "description": (
+                    "CDP method name, e.g. 'Target.getTargets', "
+                    "'Runtime.evaluate', 'Page.handleJavaScriptDialog'."
+                ),
+            },
+            "params": {
+                "type": "object",
+                "description": (
+                    "Method-specific parameters as a JSON object. Omit or "
+                    "pass {} for methods that take no parameters."
+                ),
+                "additionalProperties": True,
+            },
+            "target_id": {
+                "type": "string",
+                "description": (
+                    "Optional. Target/tab ID from Target.getTargets result "
+                    "(each entry's 'targetId'). Required for page-level "
+                    "methods; must be omitted for browser-level methods."
+                ),
+            },
+            "timeout": {
+                "type": "number",
+                "description": (
+                    "Timeout in seconds (default 30, max 300)."
+                ),
+                "default": 30,
+            },
+        },
+        "required": ["method"],
+    },
+}
+
+
+def _browser_cdp_check() -> bool:
+    """Availability check for browser_cdp.
+
+    The tool is only offered when the Python side can actually reach a CDP
+    endpoint right now — meaning a static URL is set via ``/browser connect``
+    (``BROWSER_CDP_URL``) or ``browser.cdp_url`` in ``config.yaml``.
+
+    Backends that do *not* currently expose CDP to us — Camofox (REST-only),
+    the default local agent-browser mode (Playwright hides its internal CDP
+    port), and cloud providers whose per-session ``cdp_url`` is not yet
+    surfaced — are gated out so the model doesn't see a tool that would
+    reliably fail.  Cloud-provider CDP routing is a follow-up.
+
+    Kept in a thin wrapper so the registration statement stays at module top
+    level (the tool-discovery AST scan only picks up top-level
+    ``registry.register(...)`` calls).
+    """
+    try:
+        from tools.browser_tool import (  # type: ignore[import-not-found]
+            _get_cdp_override,
+            check_browser_requirements,
+        )
+    except ImportError as exc:  # pragma: no cover — defensive
+        logger.debug("browser_cdp check: browser_tool import failed: %s", exc)
+        return False
+    if not check_browser_requirements():
+        return False
+    return bool(_get_cdp_override())
+
+
+registry.register(
+    name="browser_cdp",
+    toolset="browser",
+    schema=BROWSER_CDP_SCHEMA,
+    handler=lambda args, **kw: browser_cdp(
+        method=args.get("method", ""),
+        params=args.get("params"),
+        target_id=args.get("target_id"),
+        timeout=args.get("timeout", 30.0),
+        task_id=kw.get("task_id"),
+    ),
+    check_fn=_browser_cdp_check,
+    emoji="🧪",
+)
diff --git a/toolsets.py b/toolsets.py
index 6ac8d0782d6..d9f353e1f20 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -43,7 +43,7 @@ _HERMES_CORE_TOOLS = [
     "browser_navigate", "browser_snapshot", "browser_click",
     "browser_type", "browser_scroll", "browser_back",
     "browser_press", "browser_get_images",
-    "browser_vision", "browser_console",
+    "browser_vision", "browser_console", "browser_cdp",
     # Text-to-speech
     "text_to_speech",
     # Planning & memory
@@ -115,7 +115,7 @@ TOOLSETS = {
             "browser_navigate", "browser_snapshot", "browser_click",
             "browser_type", "browser_scroll", "browser_back",
             "browser_press", "browser_get_images",
-            "browser_vision", "browser_console", "web_search"
+            "browser_vision", "browser_console", "browser_cdp", "web_search"
         ],
         "includes": []
     },
@@ -249,7 +249,7 @@ TOOLSETS = {
             "browser_navigate", "browser_snapshot", "browser_click",
             "browser_type", "browser_scroll", "browser_back",
             "browser_press", "browser_get_images",
-            "browser_vision", "browser_console",
+            "browser_vision", "browser_console", "browser_cdp",
             "todo", "memory",
             "session_search",
             "execute_code", "delegate_task",
@@ -274,7 +274,7 @@ TOOLSETS = {
             "browser_navigate", "browser_snapshot", "browser_click",
             "browser_type", "browser_scroll", "browser_back",
             "browser_press", "browser_get_images",
-            "browser_vision", "browser_console",
+            "browser_vision", "browser_console", "browser_cdp",
             # Planning & memory
             "todo", "memory",
             # Session history search
diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md
index 40d44627ec7..c255c8f6a41 100644
--- a/website/docs/reference/tools-reference.md
+++ b/website/docs/reference/tools-reference.md
@@ -6,9 +6,9 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool
 
 # Built-in Tools Reference
 
-This page documents all 52 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
+This page documents all 53 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
 
-**Quick counts:** 10 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets.
+**Quick counts:** 11 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets.
 
 :::tip MCP Tools
 In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration.
@@ -19,6 +19,7 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server
 | Tool | Description | Requires environment |
 |------|-------------|----------------------|
 | `browser_back` | Navigate back to the previous page in browser history. Requires browser_navigate to be called first. | — |
+| `browser_cdp` | Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for browser operations not covered by browser_navigate, browser_click, browser_console, etc. Only available when a CDP endpoint is reachable at session start — via `/browser connect` or `browser.cdp_url` config. See https://chromedevtools.github.io/devtools-protocol/ | — |
 | `browser_click` | Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first. | — |
 | `browser_console` | Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requi… | — |
 | `browser_get_images` | Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first. | — |
diff --git a/website/docs/reference/toolsets-reference.md b/website/docs/reference/toolsets-reference.md
index 7593a3fdcfd..bb911004e19 100644
--- a/website/docs/reference/toolsets-reference.md
+++ b/website/docs/reference/toolsets-reference.md
@@ -52,7 +52,7 @@ Or in-session:
 
 | Toolset | Tools | Purpose |
 |---------|-------|---------|
-| `browser` | `browser_back`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. |
+| `browser` | `browser_back`, `browser_cdp`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. `browser_cdp` is a raw CDP passthrough gated on a reachable CDP endpoint — it only appears when `/browser connect` is active or `browser.cdp_url` is set. |
 | `clarify` | `clarify` | Ask the user a question when the agent needs clarification. |
 | `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. |
 | `cronjob` | `cronjob` | Schedule and manage recurring tasks. |
diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md
index 5b2462d2e37..d6624bf7d11 100644
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@@ -327,6 +327,36 @@ Check the browser console for any JavaScript errors
 
 Use `clear=True` to clear the console after reading, so subsequent calls only show new messages.
 
+### `browser_cdp`
+
+Raw Chrome DevTools Protocol passthrough — the escape hatch for browser operations not covered by the other tools. Use for native dialog handling, iframe-scoped evaluation, cookie/network control, or any CDP verb the agent needs.
+
+**Only available when a CDP endpoint is reachable at session start** — meaning `/browser connect` has attached to a running Chrome, or `browser.cdp_url` is set in `config.yaml`. The default local agent-browser mode, Camofox, and cloud providers (Browserbase, Browser Use, Firecrawl) do not currently expose CDP to this tool — cloud providers have per-session CDP URLs but live-session routing is a follow-up.
+
+**CDP method reference:** https://chromedevtools.github.io/devtools-protocol/ — the agent can `web_extract` a specific method's page to look up parameters and return shape.
+
+Common patterns:
+
+```
+# List tabs (browser-level, no target_id)
+browser_cdp(method="Target.getTargets")
+
+# Handle a native JS dialog on a tab
+browser_cdp(method="Page.handleJavaScriptDialog",
+            params={"accept": true, "promptText": ""},
+            target_id="<tabId>")
+
+# Evaluate JS in a specific tab
+browser_cdp(method="Runtime.evaluate",
+            params={"expression": "document.title", "returnByValue": true},
+            target_id="<tabId>")
+
+# Get all cookies
+browser_cdp(method="Network.getAllCookies")
+```
+
+Browser-level methods (`Target.*`, `Browser.*`, `Storage.*`) omit `target_id`. Page-level methods (`Page.*`, `Runtime.*`, `DOM.*`, `Emulation.*`) require a `target_id` from `Target.getTargets`. Each call is independent — sessions do not persist between calls.
+
 ## Practical Examples
 
 ### Filling Out a Web Form

From dca439fe9213f86c83fdd43f70bf6e1750902b54 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 19 Apr 2026 00:03:58 -0700
Subject: [PATCH 108/143] fix(tui): scope session.interrupt pending-prompt
 release to the calling session (#12441)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

session.interrupt on session A was blast-resolving pending
clarify/sudo/secret prompts on ALL sessions sharing the same
tui_gateway process.  Other sessions' agent threads unblocked with
empty-string answers as if the user had cancelled — silent
cross-session corruption.

Root cause: _pending and _answers were globals keyed by random rid
with no record of the owning session.  _clear_pending() iterated
every entry, so the session.interrupt handler had no way to limit
the release to its own sid.

Fix:
- tui_gateway/server.py: _pending now maps rid to (sid, Event)
  tuples.  _clear_pending takes an optional sid argument and filters
  by owner_sid when provided.  session.interrupt passes the calling
  sid so unrelated sessions are untouched.  _clear_pending(None)
  remains the shutdown path for completeness.
- _block and _respond updated to pack/unpack the new tuple format.

Tests (tests/test_tui_gateway_server.py): 4 new cases.
- test_interrupt_only_clears_own_session_pending: two sessions with
  pending prompts, interrupting one must not release the other.
- test_interrupt_clears_multiple_own_pending: same-sid multi-prompt
  release works.
- test_clear_pending_without_sid_clears_all: shutdown path preserved.
- test_respond_unpacks_sid_tuple_correctly: _respond handles the
  tuple format.

Also updated tests/tui_gateway/test_protocol.py to use the new tuple
format for test_block_and_respond and test_clear_pending.

Live E2E against the live Python environment confirmed cross-session
isolation: interrupting sid_a released its own pending prompt without
touching sid_b's.  All 78 related tests pass.
---
 tests/test_tui_gateway_server.py   | 116 +++++++++++++++++++++++++++++
 tests/tui_gateway/test_protocol.py |   7 +-
 tui_gateway/server.py              |  32 +++++---
 3 files changed, 144 insertions(+), 11 deletions(-)

diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index 8831efb8965..07a68ac9e9d 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -712,3 +712,119 @@ def test_prompt_submit_history_version_match_persists_normally(monkeypatch):
     finally:
         server._sessions.pop("sid", None)
 
+
+# ---------------------------------------------------------------------------
+# session.interrupt must only cancel pending prompts owned by the calling
+# session — it must not blast-resolve clarify/sudo/secret prompts on
+# unrelated sessions sharing the same tui_gateway process.  Without
+# session scoping the other sessions' prompts silently resolve to empty
+# strings, unblocking their agent threads as if the user cancelled.
+# ---------------------------------------------------------------------------
+
+
+def test_interrupt_only_clears_own_session_pending():
+    """session.interrupt on session A must NOT release pending prompts
+    that belong to session B."""
+    import types
+
+    session_a = _session()
+    session_a["agent"] = types.SimpleNamespace(interrupt=lambda: None)
+    session_b = _session()
+    session_b["agent"] = types.SimpleNamespace(interrupt=lambda: None)
+    server._sessions["sid_a"] = session_a
+    server._sessions["sid_b"] = session_b
+
+    try:
+        # Simulate pending prompts on both sessions (what _block creates
+        # while a clarify/sudo/secret request is outstanding).
+        ev_a = threading.Event()
+        ev_b = threading.Event()
+        server._pending["rid-a"] = ("sid_a", ev_a)
+        server._pending["rid-b"] = ("sid_b", ev_b)
+        server._answers.clear()
+
+        # Interrupt session A.
+        resp = server.handle_request(
+            {"id": "1", "method": "session.interrupt", "params": {"session_id": "sid_a"}}
+        )
+        assert resp.get("result"), f"got error: {resp.get('error')}"
+
+        # Session A's pending must be released to empty.
+        assert ev_a.is_set(), "sid_a pending Event should be set after interrupt"
+        assert server._answers.get("rid-a") == ""
+
+        # Session B's pending MUST remain untouched — no cross-session blast.
+        assert not ev_b.is_set(), (
+            "CRITICAL: session.interrupt on sid_a released a pending prompt "
+            "belonging to sid_b — other sessions' clarify/sudo/secret "
+            "prompts are being silently cancelled"
+        )
+        assert "rid-b" not in server._answers
+    finally:
+        server._sessions.pop("sid_a", None)
+        server._sessions.pop("sid_b", None)
+        server._pending.pop("rid-a", None)
+        server._pending.pop("rid-b", None)
+        server._answers.pop("rid-a", None)
+        server._answers.pop("rid-b", None)
+
+
+def test_interrupt_clears_multiple_own_pending():
+    """When a single session has multiple pending prompts (uncommon but
+    possible via nested tool calls), interrupt must release all of them."""
+    import types
+
+    sess = _session()
+    sess["agent"] = types.SimpleNamespace(interrupt=lambda: None)
+    server._sessions["sid"] = sess
+
+    try:
+        ev1, ev2 = threading.Event(), threading.Event()
+        server._pending["r1"] = ("sid", ev1)
+        server._pending["r2"] = ("sid", ev2)
+
+        resp = server.handle_request(
+            {"id": "1", "method": "session.interrupt", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("result")
+        assert ev1.is_set() and ev2.is_set()
+        assert server._answers.get("r1") == "" and server._answers.get("r2") == ""
+    finally:
+        server._sessions.pop("sid", None)
+        for key in ("r1", "r2"):
+            server._pending.pop(key, None)
+            server._answers.pop(key, None)
+
+
+def test_clear_pending_without_sid_clears_all():
+    """_clear_pending(None) is the shutdown path — must still release
+    every pending prompt regardless of owning session."""
+    ev1, ev2, ev3 = threading.Event(), threading.Event(), threading.Event()
+    server._pending["a"] = ("sid_x", ev1)
+    server._pending["b"] = ("sid_y", ev2)
+    server._pending["c"] = ("sid_z", ev3)
+    try:
+        server._clear_pending(None)
+        assert ev1.is_set() and ev2.is_set() and ev3.is_set()
+    finally:
+        for key in ("a", "b", "c"):
+            server._pending.pop(key, None)
+            server._answers.pop(key, None)
+
+
+def test_respond_unpacks_sid_tuple_correctly():
+    """After the (sid, Event) tuple change, _respond must still work."""
+    ev = threading.Event()
+    server._pending["rid-x"] = ("sid_x", ev)
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "clarify.respond",
+             "params": {"request_id": "rid-x", "answer": "the answer"}}
+        )
+        assert resp.get("result")
+        assert ev.is_set()
+        assert server._answers.get("rid-x") == "the answer"
+    finally:
+        server._pending.pop("rid-x", None)
+        server._answers.pop("rid-x", None)
+
diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py
index eb51cccfecb..926dfadf170 100644
--- a/tests/tui_gateway/test_protocol.py
+++ b/tests/tui_gateway/test_protocol.py
@@ -120,7 +120,9 @@ def test_block_and_respond(capture):
 
     rid = next(iter(server._pending))
     server._answers[rid] = "my_answer"
-    server._pending[rid].set()
+    # _pending values are (sid, Event) tuples — unpack to set the Event
+    _, ev = server._pending[rid]
+    ev.set()
 
     threading.Event().wait(0.1)
     assert result[0] == "my_answer"
@@ -128,7 +130,8 @@ def test_block_and_respond(capture):
 
 def test_clear_pending(server):
     ev = threading.Event()
-    server._pending["r1"] = ev
+    # _pending values are (sid, Event) tuples
+    server._pending["r1"] = ("sid-x", ev)
     server._clear_pending()
 
     assert ev.is_set()
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index c58c65763ed..921f868a3c0 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -27,7 +27,7 @@ from tui_gateway.render import make_stream_renderer, render_diff, render_message
 
 _sessions: dict[str, dict] = {}
 _methods: dict[str, callable] = {}
-_pending: dict[str, threading.Event] = {}
+_pending: dict[str, tuple[str, threading.Event]] = {}
 _answers: dict[str, str] = {}
 _db = None
 _stdout_lock = threading.Lock()
@@ -296,7 +296,7 @@ def _enable_gateway_prompts() -> None:
 def _block(event: str, sid: str, payload: dict, timeout: int = 300) -> str:
     rid = uuid.uuid4().hex[:8]
     ev = threading.Event()
-    _pending[rid] = ev
+    _pending[rid] = (sid, ev)
     payload["request_id"] = rid
     _emit(event, sid, payload)
     ev.wait(timeout=timeout)
@@ -304,10 +304,19 @@ def _block(event: str, sid: str, payload: dict, timeout: int = 300) -> str:
     return _answers.pop(rid, "")
 
 
-def _clear_pending():
-    for rid, ev in list(_pending.items()):
-        _answers[rid] = ""
-        ev.set()
+def _clear_pending(sid: str | None = None) -> None:
+    """Release pending prompts with an empty answer.
+
+    When *sid* is provided, only prompts owned by that session are
+    released — critical for session.interrupt, which must not
+    collaterally cancel clarify/sudo/secret prompts on unrelated
+    sessions sharing the same tui_gateway process.  When *sid* is
+    None, every pending prompt is released (used during shutdown).
+    """
+    for rid, (owner_sid, ev) in list(_pending.items()):
+        if sid is None or owner_sid == sid:
+            _answers[rid] = ""
+            ev.set()
 
 
 # ── Agent factory ────────────────────────────────────────────────────
@@ -1345,7 +1354,11 @@ def _(rid, params: dict) -> dict:
         return err
     if hasattr(session["agent"], "interrupt"):
         session["agent"].interrupt()
-    _clear_pending()
+    # Scope the pending-prompt release to THIS session.  A global
+    # _clear_pending() would collaterally cancel clarify/sudo/secret
+    # prompts on unrelated sessions sharing the same tui_gateway
+    # process, silently resolving them to empty strings.
+    _clear_pending(params.get("session_id", ""))
     try:
         from tools.approval import resolve_gateway_approval
         resolve_gateway_approval(session["session_key"], "deny", resolve_all=True)
@@ -1684,9 +1697,10 @@ def _(rid, params: dict) -> dict:
 
 def _respond(rid, params, key):
     r = params.get("request_id", "")
-    ev = _pending.get(r)
-    if not ev:
+    entry = _pending.get(r)
+    if not entry:
         return _err(rid, 4009, f"no pending {key} request")
+    _, ev = entry
     _answers[r] = params.get(key, "")
     ev.set()
     return _ok(rid, {"status": "ok"})

From 7c10761dd2a2c4e79485f0817011eef6e52dae59 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 19 Apr 2026 00:09:38 -0700
Subject: [PATCH 109/143] fix(discord): shield text-batch flush from follow-up
 cancel (#12444)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When Discord splits a long message at 2000 chars, _enqueue_text_event
buffers each chunk and schedules a _flush_text_batch task with a
short delay.  If another chunk lands while the prior flush task is
already inside handle_message, _enqueue_text_event calls
prior_task.cancel() — and without asyncio.shield, CancelledError
propagates from the flush task into handle_message → the agent's
streaming request, aborting the response the user was waiting on.

Reproducer: user sends a 3000-char prompt (split by Discord into 2
messages).  Chunk 1 lands, flush delay starts, chunk 2 lands during
the brief window when chunk 1's flush has already committed to
handle_message.  Agent's current streaming response is cancelled
with CancelledError, user sees a truncated or missing reply.

Fix (gateway/platforms/discord.py):
- Wrap the handle_message call in asyncio.shield so the inner
  dispatch is protected from the outer task's cancel.
- Add an except asyncio.CancelledError clause so the outer task
  still exits cleanly when cancel lands during the sleep window
  (before the pop) — semantics for that path are unchanged.

The new flush task spawned by the follow-up chunk still handles its
own batch via the normal pending-message / active-session machinery
in base.py, so follow-ups are not lost.

Tests: tests/gateway/test_text_batching.py —
test_shield_protects_handle_message_from_cancel.  Tracks a distinct
first_handle_cancelled event so the assertion fails cleanly when the
shield is missing (verified by stashing the fix and re-running).

Live E2E on the live-loaded DiscordAdapter:
  first_handle_cancelled: False  (shield worked)
  first_handle_completed: True   (handle_message ran to completion)
---
 gateway/platforms/discord.py        | 15 ++++++-
 tests/gateway/test_text_batching.py | 64 +++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index b1585637ff4..1ec831b66de 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -3265,7 +3265,20 @@ class DiscordAdapter(BasePlatformAdapter):
                 "[Discord] Flushing text batch %s (%d chars)",
                 key, len(event.text or ""),
             )
-            await self.handle_message(event)
+            # Shield the downstream dispatch so that a subsequent chunk
+            # arriving while handle_message is mid-flight cannot cancel
+            # the running agent turn.  _enqueue_text_event always cancels
+            # the prior flush task when a new chunk lands; without this
+            # shield, CancelledError would propagate from our task down
+            # into handle_message → the agent's streaming request,
+            # aborting the response the user was waiting on.  The new
+            # chunk is handled by the fresh flush task regardless.
+            await asyncio.shield(self.handle_message(event))
+        except asyncio.CancelledError:
+            # Only reached if cancel landed before the pop — the shielded
+            # handle_message is unaffected either way.  Let the task exit
+            # cleanly so the finally block cleans up.
+            pass
         finally:
             if self._pending_text_batch_tasks.get(key) is current_task:
                 self._pending_text_batch_tasks.pop(key, None)
diff --git a/tests/gateway/test_text_batching.py b/tests/gateway/test_text_batching.py
index 56bc602ef09..1ad89ffd055 100644
--- a/tests/gateway/test_text_batching.py
+++ b/tests/gateway/test_text_batching.py
@@ -148,6 +148,70 @@ class TestDiscordTextBatching:
         await asyncio.sleep(0.25)
         adapter.handle_message.assert_called_once()
 
+    @pytest.mark.asyncio
+    async def test_shield_protects_handle_message_from_cancel(self):
+        """Regression guard: a follow-up chunk arriving while
+        handle_message is mid-flight must NOT cancel the running
+        dispatch.  _enqueue_text_event fires prior_task.cancel() on
+        every new chunk; without asyncio.shield around handle_message
+        the cancel propagates into the agent's streaming request and
+        aborts the response.
+        """
+        adapter = _make_discord_adapter()
+
+        handle_started = asyncio.Event()
+        release_handle = asyncio.Event()
+        first_handle_cancelled = asyncio.Event()
+        first_handle_completed = asyncio.Event()
+        call_count = [0]
+
+        async def slow_handle(event):
+            call_count[0] += 1
+            # Only the first call (batch 1) is the one we're protecting.
+            if call_count[0] == 1:
+                handle_started.set()
+                try:
+                    await release_handle.wait()
+                    first_handle_completed.set()
+                except asyncio.CancelledError:
+                    first_handle_cancelled.set()
+                    raise
+            # Second call (batch 2) returns immediately — not the subject
+            # of this test.
+
+        adapter.handle_message = slow_handle
+
+        # Prime batch 1 and wait for it to land inside handle_message.
+        adapter._enqueue_text_event(_make_event("batch 1", Platform.DISCORD))
+        await asyncio.wait_for(handle_started.wait(), timeout=1.0)
+
+        # A new chunk arrives — _enqueue_text_event fires
+        # prior_task.cancel() on batch 1's flush task, which is
+        # currently awaiting inside handle_message.
+        adapter._enqueue_text_event(_make_event("batch 2 follow-up", Platform.DISCORD))
+
+        # Let the cancel propagate.
+        await asyncio.sleep(0.05)
+
+        # CRITICAL ASSERTION: batch 1's handle_message must NOT have
+        # been cancelled.  Without asyncio.shield this assertion fails
+        # because CancelledError propagates from the flush task's
+        # `await self.handle_message(event)` into slow_handle.
+        assert not first_handle_cancelled.is_set(), (
+            "handle_message for batch 1 was cancelled by a follow-up "
+            "chunk — asyncio.shield is missing or broken"
+        )
+
+        # Release batch 1's handle_message and let it complete.
+        release_handle.set()
+        await asyncio.wait_for(first_handle_completed.wait(), timeout=1.0)
+        assert first_handle_completed.is_set()
+
+        # Cleanup
+        for task in list(adapter._pending_text_batch_tasks.values()):
+            task.cancel()
+        await asyncio.sleep(0.01)
+
 
 # =====================================================================
 # Matrix text batching

From 3ade655999afe1f88e00fd3219bc141988e8c0d3 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Mon, 13 Apr 2026 15:07:37 -0600
Subject: [PATCH 110/143] fix(whatsapp): log allowlist drops in bridge

---
 scripts/whatsapp-bridge/bridge.js | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js
index 70cf8e95d9f..9af85caeea7 100644
--- a/scripts/whatsapp-bridge/bridge.js
+++ b/scripts/whatsapp-bridge/bridge.js
@@ -26,7 +26,7 @@ import path from 'path';
 import { mkdirSync, readFileSync, writeFileSync, existsSync, readdirSync } from 'fs';
 import { randomBytes } from 'crypto';
 import qrcode from 'qrcode-terminal';
-import { matchesAllowedUser, parseAllowedUsers } from './allowlist.js';
+import { expandWhatsAppIdentifiers, matchesAllowedUser, parseAllowedUsers } from './allowlist.js';
 
 // Parse CLI args
 const args = process.argv.slice(2);
@@ -229,6 +229,15 @@ async function startSocket() {
 
       // Check allowlist for messages from others (resolve LID ↔ phone aliases)
       if (!msg.key.fromMe && !matchesAllowedUser(senderId, ALLOWED_USERS, SESSION_DIR)) {
+        try {
+          console.log(JSON.stringify({
+            event: 'ignored',
+            reason: 'allowlist_mismatch',
+            chatId,
+            senderId,
+            senderAliases: Array.from(expandWhatsAppIdentifiers(senderId, SESSION_DIR)),
+          }));
+        } catch {}
         continue;
       }
 

From 361675018f436a95c0353a2755d7cfdd3b0ac44a Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Sat, 18 Apr 2026 16:44:40 -0600
Subject: [PATCH 111/143] fix(setup): stop hardcoding max-iterations copy

---
 hermes_cli/setup.py                           |  4 ++-
 tests/hermes_cli/test_setup_agent_settings.py | 29 +++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 tests/hermes_cli/test_setup_agent_settings.py

diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 8f6b633c6ac..f969bd4bd16 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -1460,7 +1460,9 @@ def setup_agent_settings(config: dict):
     )
     print_info("Maximum tool-calling iterations per conversation.")
     print_info("Higher = more complex tasks, but costs more tokens.")
-    print_info("Default is 90, which works for most tasks. Use 150+ for open exploration.")
+    print_info(
+        f"Press Enter to keep {current_max}. Use 90 for most tasks or 150+ for open exploration."
+    )
 
     max_iter_str = prompt("Max iterations", current_max)
     try:
diff --git a/tests/hermes_cli/test_setup_agent_settings.py b/tests/hermes_cli/test_setup_agent_settings.py
new file mode 100644
index 00000000000..868be7508c0
--- /dev/null
+++ b/tests/hermes_cli/test_setup_agent_settings.py
@@ -0,0 +1,29 @@
+"""Tests for agent-settings copy in the interactive setup wizard."""
+
+from hermes_cli.setup import setup_agent_settings
+
+
+def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monkeypatch, capsys):
+    """The helper text should match the value shown in the prompt."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    config = {
+        "agent": {"max_turns": 90},
+        "display": {"tool_progress": "all"},
+        "compression": {"threshold": 0.50},
+        "session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
+    }
+
+    prompt_answers = iter(["60", "all", "0.5"])
+
+    monkeypatch.setattr("hermes_cli.setup.get_env_value", lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "")
+    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
+    monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
+    monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
+    monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)
+
+    setup_agent_settings(config)
+
+    out = capsys.readouterr().out
+    assert "Press Enter to keep 60." in out
+    assert "Default is 90" not in out

From cd59af17cc095da08b223a9378c4a1621f7c0393 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Sat, 18 Apr 2026 14:28:50 -0600
Subject: [PATCH 112/143] fix(agent): silence quiet_mode in python library use

---
 run_agent.py                      | 17 +++++++------
 tests/run_agent/test_run_agent.py | 40 +++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 01064880982..050faeea4f0 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1916,13 +1916,16 @@ class AIAgent:
     def _should_emit_quiet_tool_messages(self) -> bool:
         """Return True when quiet-mode tool summaries should print directly.
 
-        When the caller provides ``tool_progress_callback`` (for example the CLI
-        TUI or a gateway progress renderer), that callback owns progress display.
-        Emitting quiet-mode summary lines here duplicates progress and leaks tool
-        previews into flows that are expected to stay silent, such as
-        ``hermes chat -q``.
+        Quiet mode is used by both the interactive CLI and embedded/library
+        callers. The CLI may still want compact progress hints when no callback
+        owns rendering. Embedded/library callers, on the other hand, expect
+        quiet mode to be truly silent.
         """
-        return self.quiet_mode and not self.tool_progress_callback
+        return (
+            self.quiet_mode
+            and not self.tool_progress_callback
+            and getattr(self, "platform", "") == "cli"
+        )
 
     def _emit_status(self, message: str) -> None:
         """Emit a lifecycle status message to both CLI and gateway channels.
@@ -11184,7 +11187,7 @@ class AIAgent:
                         self._last_content_tools_all_housekeeping = _all_housekeeping
                         if _all_housekeeping and self._has_stream_consumers():
                             self._mute_post_response = True
-                        elif self.quiet_mode:
+                        elif self.quiet_mode and getattr(self, "platform", "") == "cli":
                             clean = self._strip_think_blocks(turn_content).strip()
                             if clean:
                                 relayed = False
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index d30445cf459..bedb7bbf484 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -1285,6 +1285,7 @@ class TestExecuteToolCalls:
         tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1")
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc])
         messages = []
+        agent.platform = "cli"
         agent.tool_progress_callback = None
 
         with patch("run_agent.handle_function_call", return_value="search result"), \
@@ -1296,6 +1297,21 @@ class TestExecuteToolCalls:
         assert len(messages) == 1
         assert messages[0]["role"] == "tool"
 
+    def test_quiet_tool_output_suppressed_without_progress_callback_for_non_cli_agent(self, agent):
+        tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc])
+        messages = []
+        agent.platform = None
+        agent.tool_progress_callback = None
+
+        with patch("run_agent.handle_function_call", return_value="search result"), \
+             patch.object(agent, "_safe_print") as mock_print:
+            agent._execute_tool_calls(mock_msg, messages, "task-1")
+
+        mock_print.assert_not_called()
+        assert len(messages) == 1
+        assert messages[0]["role"] == "tool"
+
     def test_vprint_suppressed_in_parseable_quiet_mode(self, agent):
         agent.suppress_status_output = True
 
@@ -1876,6 +1892,30 @@ class TestRunConversation:
         assert all("message_count" in c and "messages" not in c for c in pre_request_calls)
         assert all("usage" in c and "response" not in c for c in post_request_calls)
 
+    def test_content_with_tool_calls_stays_silent_for_non_cli_quiet_mode(self, agent):
+        self._setup_agent(agent)
+        agent.platform = None
+        tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
+        resp1 = _mock_response(
+            content="I'll search for that.",
+            finish_reason="tool_calls",
+            tool_calls=[tc],
+        )
+        resp2 = _mock_response(content="Done searching", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [resp1, resp2]
+
+        with (
+            patch("run_agent.handle_function_call", return_value="search result"),
+            patch.object(agent, "_safe_print") as mock_print,
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("search something")
+
+        assert result["final_response"] == "Done searching"
+        mock_print.assert_not_called()
+
     def test_interrupt_breaks_loop(self, agent):
         self._setup_agent(agent)
 

From 175cf7e6bb4e629a5f121c8e6f3a56a5903105b7 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sun, 19 Apr 2026 12:57:17 +0530
Subject: [PATCH 113/143] fix: tighten quiet-mode salvage follow-ups

Follow-up for the helix4u easy-fix salvage batch:
- route remaining context-engine quiet-mode output through
  _should_emit_quiet_tool_messages() so non-CLI/library callers stay
  silent consistently
- drop the extra senderAliases computation from WhatsApp allowlist-drop
  logging and remove the now-unused import

This keeps the batch scoped to the intended fixes while avoiding
leaked quiet-mode output and unnecessary duplicate work in the bridge.
---
 run_agent.py                      | 15 ++++-----------
 scripts/whatsapp-bridge/bridge.js |  3 +--
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 050faeea4f0..8e1fbfed194 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -8325,7 +8325,7 @@ class AIAgent:
             elif self._context_engine_tool_names and function_name in self._context_engine_tool_names:
                 # Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
                 spinner = None
-                if self.quiet_mode and not self.tool_progress_callback:
+                if self._should_emit_quiet_tool_messages():
                     face = random.choice(KawaiiSpinner.get_waiting_faces())
                     emoji = _get_tool_emoji(function_name)
                     preview = _build_tool_preview(function_name, function_args) or function_name
@@ -8343,7 +8343,7 @@ class AIAgent:
                     cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result)
                     if spinner:
                         spinner.stop(cute_msg)
-                    elif self.quiet_mode:
+                    elif self._should_emit_quiet_tool_messages():
                         self._vprint(f"  {cute_msg}")
             elif self._memory_manager and self._memory_manager.has_tool(function_name):
                 # Memory provider tools (hindsight_retain, honcho_search, etc.)
@@ -11187,17 +11187,10 @@ class AIAgent:
                         self._last_content_tools_all_housekeeping = _all_housekeeping
                         if _all_housekeeping and self._has_stream_consumers():
                             self._mute_post_response = True
-                        elif self.quiet_mode and getattr(self, "platform", "") == "cli":
+                        elif self._should_emit_quiet_tool_messages():
                             clean = self._strip_think_blocks(turn_content).strip()
                             if clean:
-                                relayed = False
-                                if (
-                                    self.tool_progress_callback
-                                    and getattr(self, "platform", "") == "tui"
-                                ):
-                                    relayed = True
-                                if not relayed:
-                                    self._vprint(f"  ┊ 💬 {clean}")
+                                self._vprint(f"  ┊ 💬 {clean}")
                     
                     # Pop thinking-only prefill message(s) before appending
                     # (tool-call path — same rationale as the final-response path).
diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js
index 9af85caeea7..401651c8a83 100644
--- a/scripts/whatsapp-bridge/bridge.js
+++ b/scripts/whatsapp-bridge/bridge.js
@@ -26,7 +26,7 @@ import path from 'path';
 import { mkdirSync, readFileSync, writeFileSync, existsSync, readdirSync } from 'fs';
 import { randomBytes } from 'crypto';
 import qrcode from 'qrcode-terminal';
-import { expandWhatsAppIdentifiers, matchesAllowedUser, parseAllowedUsers } from './allowlist.js';
+import { matchesAllowedUser, parseAllowedUsers } from './allowlist.js';
 
 // Parse CLI args
 const args = process.argv.slice(2);
@@ -235,7 +235,6 @@ async function startSocket() {
             reason: 'allowlist_mismatch',
             chatId,
             senderId,
-            senderAliases: Array.from(expandWhatsAppIdentifiers(senderId, SESSION_DIR)),
           }));
         } catch {}
         continue;

From c94d26c69bf57539f8a53936854b1a8925d70262 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Fri, 17 Apr 2026 13:51:14 -0600
Subject: [PATCH 114/143] fix(cli): sanitize interactive command output

---
 cli.py                           | 111 ++++++++++++++++---------------
 tests/cli/test_gquota_command.py |  21 ++++++
 tests/cli/test_quick_commands.py |  14 ++++
 3 files changed, 94 insertions(+), 52 deletions(-)
 create mode 100644 tests/cli/test_gquota_command.py

diff --git a/cli.py b/cli.py
index c9ce95e9f2e..e814e35b122 100644
--- a/cli.py
+++ b/cli.py
@@ -1810,7 +1810,7 @@ class HermesCLI:
             mcp_names = set((CLI_CONFIG.get("mcp_servers") or {}).keys())
             invalid = [t for t in toolsets if not validate_toolset(t) and t not in mcp_names]
             if invalid:
-                self.console.print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]")
+                self._console_print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]")
         
         # Filesystem checkpoints: CLI flag > config
         cp_cfg = CLI_CONFIG.get("checkpoints", {})
@@ -2261,7 +2261,7 @@ class HermesCLI:
                 normalized_model = normalize_model_for_provider(current_model, resolved_provider)
                 if normalized_model and normalized_model != current_model:
                     if not self._model_is_default:
-                        self.console.print(
+                        self._console_print(
                             f"[yellow]⚠️  Normalized model '{current_model}' to '{normalized_model}' for {resolved_provider}.[/]"
                         )
                     self.model = normalized_model
@@ -2277,7 +2277,7 @@ class HermesCLI:
                 canonical = normalize_copilot_model_id(current_model, api_key=self.api_key)
                 if canonical and canonical != current_model:
                     if not self._model_is_default:
-                        self.console.print(
+                        self._console_print(
                             f"[yellow]⚠️  Normalized Copilot model '{current_model}' to '{canonical}'.[/]"
                         )
                     self.model = canonical
@@ -2299,7 +2299,7 @@ class HermesCLI:
                 canonical = normalize_opencode_model_id(resolved_provider, current_model)
                 if canonical and canonical != current_model:
                     if not self._model_is_default:
-                        self.console.print(
+                        self._console_print(
                             f"[yellow]⚠️  Stripped provider prefix from '{current_model}'; using '{canonical}' for {resolved_provider}.[/]"
                         )
                     self.model = canonical
@@ -2321,7 +2321,7 @@ class HermesCLI:
         if "/" in current_model:
             slug = current_model.split("/", 1)[1]
             if not self._model_is_default:
-                self.console.print(
+                self._console_print(
                     f"[yellow]⚠️  Stripped provider prefix from '{current_model}'; "
                     f"using '{slug}' for OpenAI Codex.[/]"
                 )
@@ -3070,7 +3070,7 @@ class HermesCLI:
         use_compact = self.compact or term_width < 80
         
         if use_compact:
-            self.console.print(_build_compact_banner())
+            self._console_print(_build_compact_banner())
             self._show_status()
         else:
             # Get tools for display
@@ -3095,25 +3095,25 @@ class HermesCLI:
 
         # Warn about very low context lengths (common with local servers)
         if ctx_len and ctx_len <= 8192:
-            self.console.print()
-            self.console.print(
+            self._console_print()
+            self._console_print(
                 f"[yellow]⚠️  Context length is only {ctx_len:,} tokens — "
                 f"this is likely too low for agent use with tools.[/]"
             )
-            self.console.print(
+            self._console_print(
                 "[dim]   Hermes needs 16k–32k minimum. Tool schemas + system prompt alone use ~4k–8k.[/]"
             )
             base_url = getattr(self, "base_url", "") or ""
             if "11434" in base_url or "ollama" in base_url.lower():
-                self.console.print(
+                self._console_print(
                     "[dim]   Ollama fix: OLLAMA_CONTEXT_LENGTH=32768 ollama serve[/]"
                 )
             elif "1234" in base_url:
-                self.console.print(
+                self._console_print(
                     "[dim]   LM Studio fix: Set context length in model settings → reload model[/]"
                 )
             else:
-                self.console.print(
+                self._console_print(
                     "[dim]   Fix: Set model.context_length in config.yaml, or increase your server's context setting[/]"
                 )
 
@@ -3122,20 +3122,20 @@ class HermesCLI:
 
         model_name = getattr(self, "model", "") or ""
         if is_nous_hermes_non_agentic(model_name):
-            self.console.print()
-            self.console.print(
+            self._console_print()
+            self._console_print(
                 "[bold yellow]⚠  Nous Research Hermes 3 & 4 models are NOT agentic and are not "
                 "designed for use with Hermes Agent.[/]"
             )
-            self.console.print(
+            self._console_print(
                 "[dim]   They lack tool-calling capabilities required for agent workflows. "
                 "Consider using an agentic model (Claude, GPT, Gemini, DeepSeek, etc.).[/]"
             )
-            self.console.print(
+            self._console_print(
                 "[dim]   Switch with: /model sonnet  or  /model gpt5[/]"
             )
 
-        self.console.print()
+        self._console_print()
 
     def _preload_resumed_session(self) -> bool:
         """Load a resumed session's history from the DB early (before first chat).
@@ -3153,10 +3153,10 @@ class HermesCLI:
 
         session_meta = self._session_db.get_session(self.session_id)
         if not session_meta:
-            self.console.print(
+            self._console_print(
                 f"[bold red]Session not found: {self.session_id}[/]"
             )
-            self.console.print(
+            self._console_print(
                 "[dim]Use a session ID from a previous CLI run "
                 "(hermes sessions list).[/]"
             )
@@ -3171,7 +3171,7 @@ class HermesCLI:
             if session_meta.get("title"):
                 title_part = f' "{session_meta["title"]}"'
             accent_color = _accent_hex()
-            self.console.print(
+            self._console_print(
                 f"[{accent_color}]↻ Resumed session [bold]{self.session_id}[/bold]"
                 f"{title_part} "
                 f"({msg_count} user message{'s' if msg_count != 1 else ''}, "
@@ -3179,7 +3179,7 @@ class HermesCLI:
             )
         else:
             accent_color = _accent_hex()
-            self.console.print(
+            self._console_print(
                 f"[{accent_color}]Session {self.session_id} found but has no "
                 f"messages. Starting fresh.[/]"
             )
@@ -3354,7 +3354,7 @@ class HermesCLI:
             padding=(0, 1),
             style=_history_text_c,
         )
-        self.console.print(panel)
+        self._console_print(panel)
 
     def _try_attach_clipboard_image(self) -> bool:
         """Check clipboard for an image and attach it if found.
@@ -3790,14 +3790,14 @@ class HermesCLI:
             api_key_missing = [u for u in unavailable if u["missing_vars"]]
             
             if api_key_missing:
-                self.console.print()
-                self.console.print("[yellow]⚠️  Some tools disabled (missing API keys):[/]")
+                self._console_print()
+                self._console_print("[yellow]⚠️  Some tools disabled (missing API keys):[/]")
                 for item in api_key_missing:
                     tools_str = ", ".join(item["tools"][:2])  # Show first 2 tools
                     if len(item["tools"]) > 2:
                         tools_str += f", +{len(item['tools'])-2} more"
-                    self.console.print(f"   [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]")
-                self.console.print("[dim]   Run 'hermes setup' to configure[/]")
+                    self._console_print(f"   [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]")
+                self._console_print("[dim]   Run 'hermes setup' to configure[/]")
         except Exception:
             pass  # Don't crash on import errors
     
@@ -3835,7 +3835,7 @@ class HermesCLI:
         if self._provider_source:
             provider_info += f" [dim {separator_color}]·[/] [dim]auth: {self._provider_source}[/]"
 
-        self.console.print(
+        self._console_print(
             f"  {api_indicator} [{accent_color}]{model_short}[/] "
             f"[dim {separator_color}]·[/] [bold {label_color}]{tool_count} tools[/]"
             f"{toolsets_info}{provider_info}"
@@ -3892,7 +3892,7 @@ class HermesCLI:
             f"Tokens: {total_tokens:,}",
             f"Agent Running: {'Yes' if is_running else 'No'}",
         ])
-        self.console.print("\n".join(lines), highlight=False, markup=False)
+        self._console_print("\n".join(lines), highlight=False, markup=False)
     
     def _fast_command_available(self) -> bool:
         try:
@@ -5090,8 +5090,15 @@ class HermesCLI:
 
         print("  To change model or provider, use: hermes model")
 
+    def _output_console(self):
+        """Use prompt_toolkit-safe Rich rendering once the TUI is live."""
+        if getattr(self, "_app", None):
+            return ChatConsole()
+        return self.console
 
-    
+    def _console_print(self, *args, **kwargs):
+        """Print through the active command-safe console."""
+        self._output_console().print(*args, **kwargs)
 
     @staticmethod
     def _resolve_personality_prompt(value) -> str:
@@ -5111,14 +5118,14 @@ class HermesCLI:
             from agent.google_oauth import get_valid_access_token, GoogleOAuthError, load_credentials
             from agent.google_code_assist import retrieve_user_quota, CodeAssistError
         except ImportError as exc:
-            self.console.print(f"  [red]Gemini modules unavailable: {exc}[/]")
+            self._console_print(f"  [red]Gemini modules unavailable: {exc}[/]")
             return
 
         try:
             access_token = get_valid_access_token()
         except GoogleOAuthError as exc:
-            self.console.print(f"  [yellow]{exc}[/]")
-            self.console.print("  Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.")
+            self._console_print(f"  [yellow]{exc}[/]")
+            self._console_print("  Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.")
             return
 
         creds = load_credentials()
@@ -5127,18 +5134,18 @@ class HermesCLI:
         try:
             buckets = retrieve_user_quota(access_token, project_id=project_id)
         except CodeAssistError as exc:
-            self.console.print(f"  [red]Quota lookup failed:[/] {exc}")
+            self._console_print(f"  [red]Quota lookup failed:[/] {exc}")
             return
 
         if not buckets:
-            self.console.print("  [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]")
+            self._console_print("  [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]")
             return
 
         # Sort for stable display, group by model
         buckets.sort(key=lambda b: (b.model_id, b.token_type))
-        self.console.print()
-        self.console.print(f"  [bold]Gemini Code Assist quota[/]  (project: {project_id or '(auto / free-tier)'})")
-        self.console.print()
+        self._console_print()
+        self._console_print(f"  [bold]Gemini Code Assist quota[/]  (project: {project_id or '(auto / free-tier)'})")
+        self._console_print()
         for b in buckets:
             pct = max(0.0, min(1.0, b.remaining_fraction))
             width = 20
@@ -5148,8 +5155,8 @@ class HermesCLI:
             header = b.model_id
             if b.token_type:
                 header += f" [{b.token_type}]"
-            self.console.print(f"    {header:40s}  {bar}  {pct_str}")
-        self.console.print()
+            self._console_print(f"    {header:40s}  {bar}  {pct_str}")
+        self._console_print()
 
     def _handle_personality_command(self, cmd: str):
         """Handle the /personality command to set predefined personalities."""
@@ -5597,7 +5604,7 @@ class HermesCLI:
                         _tip_color = get_active_skin().get_color("banner_dim", "#B8860B")
                     except Exception:
                         _tip_color = "#B8860B"
-                    self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
+                    self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
                 except Exception:
                     pass
         elif canonical == "history":
@@ -5691,7 +5698,7 @@ class HermesCLI:
         elif canonical == "statusbar":
             self._status_bar_visible = not self._status_bar_visible
             state = "visible" if self._status_bar_visible else "hidden"
-            self.console.print(f"  Status bar {state}")
+            self._console_print(f"  Status bar {state}")
         elif canonical == "verbose":
             self._toggle_verbose()
         elif canonical == "yolo":
@@ -5814,15 +5821,15 @@ class HermesCLI:
                             )
                             output = result.stdout.strip() or result.stderr.strip()
                             if output:
-                                self.console.print(_rich_text_from_ansi(output))
+                                self._console_print(_rich_text_from_ansi(output))
                             else:
-                                self.console.print("[dim]Command returned no output[/]")
+                                self._console_print("[dim]Command returned no output[/]")
                         except subprocess.TimeoutExpired:
-                            self.console.print("[bold red]Quick command timed out (30s)[/]")
+                            self._console_print("[bold red]Quick command timed out (30s)[/]")
                         except Exception as e:
-                            self.console.print(f"[bold red]Quick command error: {e}[/]")
+                            self._console_print(f"[bold red]Quick command error: {e}[/]")
                     else:
-                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
+                        self._console_print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
                 elif qcmd.get("type") == "alias":
                     target = qcmd.get("target", "").strip()
                     if target:
@@ -5831,9 +5838,9 @@ class HermesCLI:
                         aliased_command = f"{target} {user_args}".strip()
                         return self.process_command(aliased_command)
                     else:
-                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
+                        self._console_print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
                 else:
-                    self.console.print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
+                    self._console_print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
             # Check for plugin-registered slash commands
             elif base_cmd.lstrip("/") in _get_plugin_cmd_handler_names():
                 from hermes_cli.plugins import get_plugin_command_handler
@@ -8603,7 +8610,7 @@ class HermesCLI:
         except Exception:
             _welcome_text = "Welcome to Hermes Agent! Type your message or /help for commands."
             _welcome_color = "#FFF8DC"
-        self.console.print(f"[{_welcome_color}]{_welcome_text}[/]")
+        self._console_print(f"[{_welcome_color}]{_welcome_text}[/]")
         # Show a random tip to help users discover features
         try:
             from hermes_cli.tips import get_random_tip
@@ -8612,16 +8619,16 @@ class HermesCLI:
                 _tip_color = _welcome_skin.get_color("banner_dim", "#B8860B")
             except Exception:
                 _tip_color = "#B8860B"
-            self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
+            self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
         except Exception:
             pass  # Tips are non-critical — never break startup
         if self.preloaded_skills and not self._startup_skills_line_shown:
             skills_label = ", ".join(self.preloaded_skills)
-            self.console.print(
+            self._console_print(
                 f"[bold {_accent_hex()}]Activated skills:[/] {skills_label}"
             )
             self._startup_skills_line_shown = True
-        self.console.print()
+        self._console_print()
         
         # State for async operation
         self._agent_running = False
diff --git a/tests/cli/test_gquota_command.py b/tests/cli/test_gquota_command.py
new file mode 100644
index 00000000000..0740e001262
--- /dev/null
+++ b/tests/cli/test_gquota_command.py
@@ -0,0 +1,21 @@
+from unittest.mock import MagicMock, patch
+
+
+def test_gquota_uses_chat_console_when_tui_is_live():
+    from agent.google_oauth import GoogleOAuthError
+    from cli import HermesCLI
+
+    cli = HermesCLI.__new__(HermesCLI)
+    cli.console = MagicMock()
+    cli._app = object()
+
+    live_console = MagicMock()
+
+    with patch("cli.ChatConsole", return_value=live_console), \
+         patch("agent.google_oauth.get_valid_access_token", side_effect=GoogleOAuthError("No Google OAuth credentials found")), \
+         patch("agent.google_oauth.load_credentials", return_value=None), \
+         patch("agent.google_code_assist.retrieve_user_quota"):
+        cli._handle_gquota_command("/gquota")
+
+    assert live_console.print.call_count == 2
+    cli.console.print.assert_not_called()
diff --git a/tests/cli/test_quick_commands.py b/tests/cli/test_quick_commands.py
index 7a89d4ca28a..1c94cb1b025 100644
--- a/tests/cli/test_quick_commands.py
+++ b/tests/cli/test_quick_commands.py
@@ -33,6 +33,20 @@ class TestCLIQuickCommands:
         printed = self._printed_plain(cli.console.print.call_args[0][0])
         assert printed == "daily-note"
 
+    def test_exec_command_uses_chat_console_when_tui_is_live(self):
+        cli = self._make_cli({"dn": {"type": "exec", "command": "echo daily-note"}})
+        cli._app = object()
+        live_console = MagicMock()
+
+        with patch("cli.ChatConsole", return_value=live_console):
+            result = cli.process_command("/dn")
+
+        assert result is True
+        live_console.print.assert_called_once()
+        printed = self._printed_plain(live_console.print.call_args[0][0])
+        assert printed == "daily-note"
+        cli.console.print.assert_not_called()
+
     def test_exec_command_stderr_shown_on_no_stdout(self):
         cli = self._make_cli({"err": {"type": "exec", "command": "echo error >&2"}})
         result = cli.process_command("/err")

From e0171314030fa5fad2e7e7e96c116c98a0178e33 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sat, 18 Apr 2026 19:30:07 -0700
Subject: [PATCH 115/143] =?UTF-8?q?feat(cron):=20add=20wakeAgent=20gate=20?=
 =?UTF-8?q?=E2=80=94=20scripts=20can=20skip=20the=20agent=20entirely?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extends the existing cron script hook with a wake gate ported from
nanoclaw #1232. When a cron job's pre-check Python script (already
sandboxed to HERMES_HOME/scripts/) writes a JSON line like
```json
{"wakeAgent": false}
```
on its last stdout line, `run_job()` returns the SILENT marker and
skips the agent entirely — no LLM call, no delivery, no tokens spent.
Useful for frequent polls (every 1-5 min) that only need to wake the
agent when something has genuinely changed.

Any other script output (non-JSON, missing key, non-dict, `wakeAgent: true`,
truthy/falsy non-False values) behaves as before: stdout is injected
as context and the agent runs normally. Strict `False` is required
to skip — avoids accidental gating from arbitrary JSON.

Refactor:
- New pure helper `_parse_wake_gate(script_output)` in cron/scheduler.py
- `_build_job_prompt` accepts optional `prerun_script` tuple so the
  script runs exactly once per job (run_job runs it for the gate check,
  reuses the output for prompt injection)
- `run_job` short-circuits with SILENT_MARKER when gate fires

Script failures (success=False) still cannot trigger the gate — the
failure is reported as context to the agent as before.

This replaces the approach in closed PR #3837, which inlined bash
scripts via tempfile and lost the path-traversal/scripts-dir sandbox
that main's impl has. The wake-gate idea (the one net-new capability)
is ported on top of the existing sandboxed Python-script model.

Tests:
- 11 pure unit tests for _parse_wake_gate (empty, whitespace, non-JSON,
  non-dict JSON, missing key, truthy/falsy non-False, multi-line,
  trailing blanks, non-last-line JSON)
- 5 integration tests for run_job wake-gate (skip returns SILENT,
  wake-true passes through, script-runs-only-once, script failure
  doesn't gate, no-script regression)
- Full tests/cron/ suite: 194/194 pass
---
 cron/scheduler.py            |  69 +++++++++++++-
 tests/cron/test_scheduler.py | 174 +++++++++++++++++++++++++++++++++++
 2 files changed, 239 insertions(+), 4 deletions(-)

diff --git a/cron/scheduler.py b/cron/scheduler.py
index 8938063c7ff..6e93fc02fee 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -564,15 +564,53 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
         return False, f"Script execution failed: {exc}"
 
 
-def _build_job_prompt(job: dict) -> str:
-    """Build the effective prompt for a cron job, optionally loading one or more skills first."""
+def _parse_wake_gate(script_output: str) -> bool:
+    """Parse the last non-empty stdout line of a cron job's pre-check script
+    as a wake gate.
+
+    The convention (ported from nanoclaw #1232): if the last stdout line is
+    JSON like ``{"wakeAgent": false}``, the agent is skipped entirely — no
+    LLM run, no delivery. Any other output (non-JSON, missing flag, gate
+    absent, or ``wakeAgent: true``) means wake the agent normally.
+
+    Returns True if the agent should wake, False to skip.
+    """
+    if not script_output:
+        return True
+    stripped_lines = [line for line in script_output.splitlines() if line.strip()]
+    if not stripped_lines:
+        return True
+    last_line = stripped_lines[-1].strip()
+    try:
+        gate = json.loads(last_line)
+    except (json.JSONDecodeError, ValueError):
+        return True
+    if not isinstance(gate, dict):
+        return True
+    return gate.get("wakeAgent", True) is not False
+
+
+def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
+    """Build the effective prompt for a cron job, optionally loading one or more skills first.
+
+    Args:
+        job: The cron job dict.
+        prerun_script: Optional ``(success, stdout)`` from a script that has
+            already been executed by the caller (e.g. for a wake-gate check).
+            When provided, the script is not re-executed and the cached
+            result is used for prompt injection. When omitted, the script
+            (if any) runs inline as before.
+    """
     prompt = job.get("prompt", "")
     skills = job.get("skills")
 
     # Run data-collection script if configured, inject output as context.
     script_path = job.get("script")
     if script_path:
-        success, script_output = _run_job_script(script_path)
+        if prerun_script is not None:
+            success, script_output = prerun_script
+        else:
+            success, script_output = _run_job_script(script_path)
         if success:
             if script_output:
                 prompt = (
@@ -674,7 +712,30 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
     
     job_id = job["id"]
     job_name = job["name"]
-    prompt = _build_job_prompt(job)
+
+    # Wake-gate: if this job has a pre-check script, run it BEFORE building
+    # the prompt so a ``{"wakeAgent": false}`` response can short-circuit
+    # the whole agent run. We pass the result into _build_job_prompt so
+    # the script is only executed once.
+    prerun_script = None
+    script_path = job.get("script")
+    if script_path:
+        prerun_script = _run_job_script(script_path)
+        _ran_ok, _script_output = prerun_script
+        if _ran_ok and not _parse_wake_gate(_script_output):
+            logger.info(
+                "Job '%s' (ID: %s): wakeAgent=false, skipping agent run",
+                job_name, job_id,
+            )
+            silent_doc = (
+                f"# Cron Job: {job_name}\n\n"
+                f"**Job ID:** {job_id}\n"
+                f"**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
+                "Script gate returned `wakeAgent=false` — agent skipped.\n"
+            )
+            return True, silent_doc, SILENT_MARKER, None
+
+    prompt = _build_job_prompt(job, prerun_script=prerun_script)
     origin = _resolve_origin(job)
     _cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"
 
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index 2717584e464..b889ede372e 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -1175,6 +1175,180 @@ class TestBuildJobPromptSilentHint:
         assert system_pos < prompt_pos
 
 
+class TestParseWakeGate:
+    """Unit tests for _parse_wake_gate — pure function, no side effects."""
+
+    def test_empty_output_wakes(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate("") is True
+        assert _parse_wake_gate(None) is True
+
+    def test_whitespace_only_wakes(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate("   \n\n  \t\n") is True
+
+    def test_non_json_last_line_wakes(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate("hello world") is True
+        assert _parse_wake_gate("line 1\nline 2\nplain text") is True
+
+    def test_json_non_dict_wakes(self):
+        """Bare arrays, numbers, strings must not be interpreted as a gate."""
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate("[1, 2, 3]") is True
+        assert _parse_wake_gate("42") is True
+        assert _parse_wake_gate('"wakeAgent"') is True
+
+    def test_wake_gate_false_skips(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate('{"wakeAgent": false}') is False
+
+    def test_wake_gate_true_wakes(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate('{"wakeAgent": true}') is True
+
+    def test_wake_gate_missing_wakes(self):
+        """A JSON dict without a wakeAgent key defaults to waking."""
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate('{"data": {"foo": "bar"}}') is True
+
+    def test_non_boolean_false_still_wakes(self):
+        """Only strict ``False`` skips — truthy/falsy shortcuts are too risky."""
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate('{"wakeAgent": 0}') is True
+        assert _parse_wake_gate('{"wakeAgent": null}') is True
+        assert _parse_wake_gate('{"wakeAgent": ""}') is True
+
+    def test_only_last_non_empty_line_parsed(self):
+        from cron.scheduler import _parse_wake_gate
+        multi = 'some log output\nmore output\n{"wakeAgent": false}'
+        assert _parse_wake_gate(multi) is False
+
+    def test_trailing_blank_lines_ignored(self):
+        from cron.scheduler import _parse_wake_gate
+        multi = '{"wakeAgent": false}\n\n\n'
+        assert _parse_wake_gate(multi) is False
+
+    def test_non_last_json_line_does_not_gate(self):
+        """A JSON gate on an earlier line with plain text after it does NOT trigger."""
+        from cron.scheduler import _parse_wake_gate
+        multi = '{"wakeAgent": false}\nactually this is the real output'
+        assert _parse_wake_gate(multi) is True
+
+
+class TestRunJobWakeGate:
+    """Integration tests for run_job wake-gate short-circuit."""
+
+    def _make_job(self, name="wake-gate-test", script="check.py"):
+        """Minimal valid cron job dict for run_job."""
+        return {
+            "id": f"job_{name}",
+            "name": name,
+            "prompt": "Do a thing",
+            "schedule": "*/5 * * * *",
+            "script": script,
+        }
+
+    def test_wake_false_skips_agent_and_returns_silent(self, caplog):
+        """When _run_job_script output ends with {wakeAgent: false}, the agent
+        is not invoked and run_job returns the SILENT marker so delivery is
+        suppressed."""
+        from cron.scheduler import SILENT_MARKER
+        import cron.scheduler as scheduler
+
+        with patch.object(scheduler, "_run_job_script",
+                          return_value=(True, '{"wakeAgent": false}')), \
+             patch("run_agent.AIAgent") as agent_cls:
+            success, doc, final, err = scheduler.run_job(self._make_job())
+
+        assert success is True
+        assert err is None
+        assert final == SILENT_MARKER
+        assert "Script gate returned `wakeAgent=false`" in doc
+        agent_cls.assert_not_called()
+
+    def test_wake_true_runs_agent_with_injected_output(self):
+        """When the script returns {wakeAgent: true, data: ...}, the agent is
+        invoked and the data line still shows up in the prompt."""
+        import cron.scheduler as scheduler
+
+        script_output = '{"wakeAgent": true, "data": {"new": 3}}'
+        agent = MagicMock()
+        agent.run_conversation = MagicMock(return_value={
+            "final_response": "ok", "messages": []
+        })
+        with patch.object(scheduler, "_run_job_script",
+                          return_value=(True, script_output)), \
+             patch("run_agent.AIAgent", return_value=agent) as agent_cls:
+            success, doc, final, err = scheduler.run_job(self._make_job())
+
+        agent_cls.assert_called_once()
+        # The script output should be visible in the prompt passed to
+        # run_conversation.
+        call_kwargs = agent.run_conversation.call_args
+        prompt_arg = call_kwargs.args[0] if call_kwargs.args else call_kwargs.kwargs.get("user_message", "")
+        assert script_output in prompt_arg
+        assert success is True
+        assert err is None
+
+    def test_script_runs_only_once_on_wake(self):
+        """Wake-true path must not re-run the script inside _build_job_prompt
+        (script would execute twice otherwise, wasting work and risking
+        double-side-effects)."""
+        import cron.scheduler as scheduler
+
+        call_count = 0
+        def _script_stub(path):
+            nonlocal call_count
+            call_count += 1
+            return (True, "regular output")
+
+        agent = MagicMock()
+        agent.run_conversation = MagicMock(return_value={
+            "final_response": "ok", "messages": []
+        })
+        with patch.object(scheduler, "_run_job_script", side_effect=_script_stub), \
+             patch("run_agent.AIAgent", return_value=agent):
+            scheduler.run_job(self._make_job())
+
+        assert call_count == 1, f"script ran {call_count}x, expected exactly 1"
+
+    def test_script_failure_does_not_trigger_gate(self):
+        """If _run_job_script returns success=False, the gate is NOT evaluated
+        and the agent still runs (the failure is reported as context)."""
+        import cron.scheduler as scheduler
+
+        # Malicious or broken script whose stderr happens to contain the
+        # gate JSON — we must NOT honor it because ran_ok is False.
+        agent = MagicMock()
+        agent.run_conversation = MagicMock(return_value={
+            "final_response": "ok", "messages": []
+        })
+        with patch.object(scheduler, "_run_job_script",
+                          return_value=(False, '{"wakeAgent": false}')), \
+             patch("run_agent.AIAgent", return_value=agent) as agent_cls:
+            success, doc, final, err = scheduler.run_job(self._make_job())
+
+        agent_cls.assert_called_once()  # Agent DID wake despite the gate-like text
+
+    def test_no_script_path_runs_agent_normally(self):
+        """Regression: jobs without a script still work."""
+        import cron.scheduler as scheduler
+
+        agent = MagicMock()
+        agent.run_conversation = MagicMock(return_value={
+            "final_response": "ok", "messages": []
+        })
+        job = self._make_job(script=None)
+        job.pop("script", None)
+        with patch.object(scheduler, "_run_job_script") as script_fn, \
+             patch("run_agent.AIAgent", return_value=agent) as agent_cls:
+            scheduler.run_job(job)
+
+        script_fn.assert_not_called()
+        agent_cls.assert_called_once()
+
+
 class TestBuildJobPromptMissingSkill:
     """Verify that a missing skill logs a warning and does not crash the job."""
 

From 1d1e1277e496f3b8d2742e4c8ce83b47dde5fa23 Mon Sep 17 00:00:00 2001
From: konsisumer <der@konsi.org>
Date: Sat, 18 Apr 2026 07:10:05 +0200
Subject: [PATCH 116/143] fix(gateway): flush undelivered tail before segment
 reset to preserve streamed text (#8124)

When a streaming edit fails mid-stream (flood control, transport error)
and a tool boundary arrives before the fallback threshold is reached,
the pre-boundary tail in `_accumulated` was silently discarded by
`_reset_segment_state`. The user saw a frozen partial message and
missing words on the other side of the tool call.

Flush the undelivered tail as a continuation message before the reset,
computed relative to the last successfully-delivered prefix so we don't
duplicate content the user already saw.
---
 gateway/stream_consumer.py            | 48 ++++++++++++++++++++++
 tests/gateway/test_stream_consumer.py | 59 ++++++++++++++++++++++++++-
 2 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py
index ae00aee392b..146715b1644 100644
--- a/gateway/stream_consumer.py
+++ b/gateway/stream_consumer.py
@@ -430,6 +430,21 @@ class GatewayStreamConsumer:
                 # a real string like "msg_1", not "__no_edit__", so that case
                 # still resets and creates a fresh segment as intended.)
                 if got_segment_break:
+                    # If the segment-break edit failed to deliver the
+                    # accumulated content (flood control that has not yet
+                    # promoted to fallback mode, or fallback mode itself),
+                    # _accumulated still holds pre-boundary text the user
+                    # never saw. Flush that tail as a continuation message
+                    # before the reset below wipes _accumulated — otherwise
+                    # text generated before the tool boundary is silently
+                    # dropped (issue #8124).
+                    if (
+                        self._accumulated
+                        and not current_update_visible
+                        and self._message_id
+                        and self._message_id != "__no_edit__"
+                    ):
+                        await self._flush_segment_tail_on_edit_failure()
                     self._reset_segment_state(preserve_no_edit=True)
 
                 await asyncio.sleep(0.05)  # Small yield to not busy-loop
@@ -620,6 +635,39 @@ class GatewayStreamConsumer:
         err_lower = err.lower()
         return "flood" in err_lower or "retry after" in err_lower or "rate" in err_lower
 
+    async def _flush_segment_tail_on_edit_failure(self) -> None:
+        """Deliver un-sent tail content before a segment-break reset.
+
+        When an edit fails (flood control, transport error) and a tool
+        boundary arrives before the next retry, ``_accumulated`` holds text
+        that was generated but never shown to the user. Without this flush,
+        the segment reset would discard that tail and leave a frozen cursor
+        in the partial message.
+
+        Sends the tail that sits after the last successfully-delivered
+        prefix as a new message, and best-effort strips the stuck cursor
+        from the previous partial message.
+        """
+        if not self._fallback_final_send:
+            await self._try_strip_cursor()
+        visible = self._fallback_prefix or self._visible_prefix()
+        tail = self._accumulated
+        if visible and tail.startswith(visible):
+            tail = tail[len(visible):].lstrip()
+        tail = self._clean_for_display(tail)
+        if not tail.strip():
+            return
+        try:
+            result = await self.adapter.send(
+                chat_id=self.chat_id,
+                content=tail,
+                metadata=self.metadata,
+            )
+            if result.success:
+                self._already_sent = True
+        except Exception as e:
+            logger.error("Segment-break tail flush error: %s", e)
+
     async def _try_strip_cursor(self) -> None:
         """Best-effort edit to remove the cursor from the last visible message.
 
diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py
index 99ac4dc188c..3063196f414 100644
--- a/tests/gateway/test_stream_consumer.py
+++ b/tests/gateway/test_stream_consumer.py
@@ -502,11 +502,13 @@ class TestSegmentBreakOnToolBoundary:
 
     @pytest.mark.asyncio
     async def test_segment_break_clears_failed_edit_fallback_state(self):
-        """A tool boundary after edit failure must not duplicate the next segment."""
+        """A tool boundary after edit failure must flush the undelivered tail
+        without duplicating the prefix the user already saw (#8124)."""
         adapter = MagicMock()
         send_results = [
             SimpleNamespace(success=True, message_id="msg_1"),
             SimpleNamespace(success=True, message_id="msg_2"),
+            SimpleNamespace(success=True, message_id="msg_3"),
         ]
         adapter.send = AsyncMock(side_effect=send_results)
         adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=False, error="flood_control:6"))
@@ -526,7 +528,60 @@ class TestSegmentBreakOnToolBoundary:
         await task
 
         sent_texts = [call[1]["content"] for call in adapter.send.call_args_list]
-        assert sent_texts == ["Hello ▉", "Next segment"]
+        # The undelivered "world" tail must reach the user, and the next
+        # segment must not duplicate "Hello" that was already visible.
+        assert sent_texts == ["Hello ▉", "world", "Next segment"]
+
+    @pytest.mark.asyncio
+    async def test_segment_break_after_mid_stream_edit_failure_preserves_tail(self):
+        """Regression for #8124: when an earlier edit succeeded but later edits
+        fail (persistent flood control) and a tool boundary arrives before the
+        fallback threshold is reached, the pre-boundary tail must still be
+        delivered — not silently dropped by the segment reset."""
+        adapter = MagicMock()
+        # msg_1 for the initial partial, msg_2 for the flushed tail,
+        # msg_3 for the post-boundary segment.
+        send_results = [
+            SimpleNamespace(success=True, message_id="msg_1"),
+            SimpleNamespace(success=True, message_id="msg_2"),
+            SimpleNamespace(success=True, message_id="msg_3"),
+        ]
+        adapter.send = AsyncMock(side_effect=send_results)
+
+        # First two edits succeed, everything after fails with flood control
+        # — simulating Telegram's "edit once then get rate-limited" pattern.
+        edit_results = [
+            SimpleNamespace(success=True),   # "Hello world ▉"  — succeeds
+            SimpleNamespace(success=False, error="flood_control:6.0"),  # "Hello world more ▉" — flood triggered
+            SimpleNamespace(success=False, error="flood_control:6.0"),  # finalize edit at segment break
+            SimpleNamespace(success=False, error="flood_control:6.0"),  # cursor-strip attempt
+        ]
+        adapter.edit_message = AsyncMock(side_effect=edit_results + [edit_results[-1]] * 10)
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5, cursor=" ▉")
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        consumer.on_delta("Hello")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.08)
+        consumer.on_delta(" world")
+        await asyncio.sleep(0.08)
+        consumer.on_delta(" more")
+        await asyncio.sleep(0.08)
+        consumer.on_delta(None)  # tool boundary
+        consumer.on_delta("Here is the tool result.")
+        consumer.finish()
+        await task
+
+        sent_texts = [call[1]["content"] for call in adapter.send.call_args_list]
+        # "more" must have been delivered, not dropped.
+        all_text = " ".join(sent_texts)
+        assert "more" in all_text, (
+            f"Pre-boundary tail 'more' was silently dropped: sends={sent_texts}"
+        )
+        # Post-boundary text must also reach the user.
+        assert "Here is the tool result." in all_text
 
     @pytest.mark.asyncio
     async def test_no_message_id_enters_fallback_mode(self):

From 62ce6a38ae8de84b7af5772672009f11ada1ef0e Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 19 Apr 2026 01:48:42 -0700
Subject: [PATCH 117/143] fix(gateway): cancel_background_tasks must drain
 late-arrivals (#12471)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During gateway shutdown, a message arriving while
cancel_background_tasks is mid-await (inside asyncio.gather) spawns
a fresh _process_message_background task via handle_message and adds
it to self._background_tasks.  The original implementation's
_background_tasks.clear() at the end of cancel_background_tasks
dropped the reference; the task ran untracked against a disconnecting
adapter, logged send-failures, and lingered until it completed on
its own.

Fix: wrap the cancel+gather in a bounded loop (MAX_DRAIN_ROUNDS=5).
If new tasks appeared during the gather, cancel them in the next
round.  The .clear() at the end is preserved as a safety net for
any task that appeared after MAX_DRAIN_ROUNDS — but in practice the
drain stabilizes in 1-2 rounds.

Tests: tests/gateway/test_cancel_background_drain.py — 3 cases.
- test_cancel_background_tasks_drains_late_arrivals: spawn M1, start
  cancel, inject M2 during M1's shielded cleanup, verify M2 is
  cancelled.
- test_cancel_background_tasks_handles_no_tasks: no-op path still
  terminates cleanly.
- test_cancel_background_tasks_bounded_rounds: baseline — single
  task cancels in one round, loop terminates.

Regression-guard validated: against the unpatched implementation,
the late-arrival test fails with exactly the expected message
('task leaked').  With the fix it passes.

Blast radius is shutdown-only; the audit classified this as MED.
Shipping because the fix is small and the hygiene is worth it.

While investigating the audit's other MEDs (busy-handler double-ack,
Discord ExecApprovalView double-resolve, UpdatePromptView
double-resolve), I verified all three were false positives — the
check-and-set patterns have no await between them, so they're
atomic on single-threaded asyncio.  No fix needed for those.
---
 gateway/platforms/base.py                     |  24 ++-
 tests/gateway/test_cancel_background_drain.py | 148 ++++++++++++++++++
 2 files changed, 167 insertions(+), 5 deletions(-)
 create mode 100644 tests/gateway/test_cancel_background_drain.py

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 65f7226e10a..645a642ba1a 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -2033,12 +2033,26 @@ class BasePlatformAdapter(ABC):
         Used during gateway shutdown/replacement so active sessions from the old
         process do not keep running after adapters are being torn down.
         """
-        tasks = [task for task in self._background_tasks if not task.done()]
-        for task in tasks:
-            self._expected_cancelled_tasks.add(task)
-            task.cancel()
-        if tasks:
+        # Loop until no new tasks appear.  Without this, a message
+        # arriving during the `await asyncio.gather` below would spawn
+        # a fresh _process_message_background task (added to
+        # self._background_tasks at line ~1668 via handle_message),
+        # and the _background_tasks.clear() at the end of this method
+        # would drop the reference — the task runs untracked against a
+        # disconnecting adapter, logs send-failures, and may linger
+        # until it completes on its own.  Retrying the drain until the
+        # task set stabilizes closes the window.
+        MAX_DRAIN_ROUNDS = 5
+        for _ in range(MAX_DRAIN_ROUNDS):
+            tasks = [task for task in self._background_tasks if not task.done()]
+            if not tasks:
+                break
+            for task in tasks:
+                self._expected_cancelled_tasks.add(task)
+                task.cancel()
             await asyncio.gather(*tasks, return_exceptions=True)
+            # Loop: late-arrival tasks spawned during the gather above
+            # will be in self._background_tasks now.  Re-check.
         self._background_tasks.clear()
         self._expected_cancelled_tasks.clear()
         self._pending_messages.clear()
diff --git a/tests/gateway/test_cancel_background_drain.py b/tests/gateway/test_cancel_background_drain.py
new file mode 100644
index 00000000000..c95fdc062eb
--- /dev/null
+++ b/tests/gateway/test_cancel_background_drain.py
@@ -0,0 +1,148 @@
+"""Regression test: cancel_background_tasks must drain late-arrival tasks.
+
+During gateway shutdown, a message arriving while
+cancel_background_tasks is mid-await can spawn a fresh
+_process_message_background task via handle_message, which is added
+to self._background_tasks.  Without the re-drain loop, the subsequent
+_background_tasks.clear() drops the reference; the task runs
+untracked against a disconnecting adapter.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType
+from gateway.session import SessionSource, build_session_key
+
+
+class _StubAdapter(BasePlatformAdapter):
+    async def connect(self):
+        pass
+
+    async def disconnect(self):
+        pass
+
+    async def send(self, chat_id, text, **kwargs):
+        return None
+
+    async def get_chat_info(self, chat_id):
+        return {}
+
+
+def _make_adapter():
+    adapter = _StubAdapter(PlatformConfig(enabled=True, token="t"), Platform.TELEGRAM)
+    adapter._send_with_retry = AsyncMock(return_value=None)
+    return adapter
+
+
+def _event(text, cid="42"):
+    return MessageEvent(
+        text=text,
+        message_type=MessageType.TEXT,
+        source=SessionSource(platform=Platform.TELEGRAM, chat_id=cid, chat_type="dm"),
+    )
+
+
+@pytest.mark.asyncio
+async def test_cancel_background_tasks_drains_late_arrivals():
+    """A message that arrives during the gather window must be picked
+    up by the re-drain loop, not leaked as an untracked task."""
+    adapter = _make_adapter()
+    sk = build_session_key(
+        SessionSource(platform=Platform.TELEGRAM, chat_id="42", chat_type="dm")
+    )
+
+    m1_started = asyncio.Event()
+    m1_cleanup_running = asyncio.Event()
+    m2_started = asyncio.Event()
+    m2_cancelled = asyncio.Event()
+
+    async def handler(event):
+        if event.text == "M1":
+            m1_started.set()
+            try:
+                await asyncio.sleep(10)
+            except asyncio.CancelledError:
+                m1_cleanup_running.set()
+                # Widen the gather window with a shielded cleanup
+                # delay so M2 can get injected during it.
+                await asyncio.shield(asyncio.sleep(0.2))
+                raise
+        else:  # M2 — the late arrival
+            m2_started.set()
+            try:
+                await asyncio.sleep(10)
+            except asyncio.CancelledError:
+                m2_cancelled.set()
+                raise
+
+    adapter._message_handler = handler
+
+    # Spawn M1.
+    await adapter.handle_message(_event("M1"))
+    await asyncio.wait_for(m1_started.wait(), timeout=1.0)
+
+    # Kick off shutdown.  This will cancel M1 and await its cleanup.
+    cancel_task = asyncio.create_task(adapter.cancel_background_tasks())
+
+    # Wait until M1's cleanup is running (inside the shielded sleep).
+    # This is the race window: cancel_task is awaiting gather, M1 is
+    # shielded in cleanup, the _active_sessions entry has been cleared
+    # by M1's own finally.
+    await asyncio.wait_for(m1_cleanup_running.wait(), timeout=1.0)
+
+    # Clear the active-session entry (M1's finally hasn't fully run yet,
+    # but in production the platform dispatcher would deliver a new
+    # message that takes the no-active-session spawn path).  For this
+    # repro, make it deterministic.
+    adapter._active_sessions.pop(sk, None)
+
+    # Inject late arrival — spawns a fresh _process_message_background
+    # task and adds it to _background_tasks while cancel_task is still
+    # in gather.
+    await adapter.handle_message(_event("M2"))
+    await asyncio.wait_for(m2_started.wait(), timeout=1.0)
+
+    # Let cancel_task finish.  Round 1's gather completes when M1's
+    # shielded cleanup finishes.  Round 2 should pick up M2.
+    await asyncio.wait_for(cancel_task, timeout=5.0)
+
+    # Assert M2 was drained, not leaked.
+    assert m2_cancelled.is_set(), (
+        "Late-arrival M2 was NOT cancelled by cancel_background_tasks — "
+        "the re-drain loop is missing and the task leaked"
+    )
+    assert adapter._background_tasks == set()
+
+
+@pytest.mark.asyncio
+async def test_cancel_background_tasks_handles_no_tasks():
+    """Regression guard: no tasks, no hang, no error."""
+    adapter = _make_adapter()
+    await adapter.cancel_background_tasks()
+    assert adapter._background_tasks == set()
+
+
+@pytest.mark.asyncio
+async def test_cancel_background_tasks_bounded_rounds():
+    """Regression guard: the drain loop is bounded — it does not spin
+    forever even if late-arrival tasks keep getting spawned."""
+    adapter = _make_adapter()
+
+    # Single well-behaved task that cancels cleanly — baseline check
+    # that the loop terminates in one round.
+    async def quick():
+        try:
+            await asyncio.sleep(10)
+        except asyncio.CancelledError:
+            raise
+
+    task = asyncio.create_task(quick())
+    adapter._background_tasks.add(task)
+
+    await adapter.cancel_background_tasks()
+    assert task.done()
+    assert adapter._background_tasks == set()

From b668c09ab2e4a4edeceea04da9521329669b9391 Mon Sep 17 00:00:00 2001
From: Tranquil-Flow <tranquil_flow@protonmail.com>
Date: Sun, 19 Apr 2026 01:48:33 -0700
Subject: [PATCH 118/143] fix(gateway): strip cursor from frozen message on
 empty fallback continuation (#7183)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When _send_fallback_final() is called with nothing new to deliver
(the visible partial already matches final_text), the last edit may
still show the cursor character because fallback mode was entered
after a failed edit.  Before this fix the early-return path left
_already_sent = True without attempting to strip the cursor, so the
message stayed frozen with a visible ▉ permanently.

Adds a best-effort edit inside the empty-continuation branch to clean
the cursor off the last-sent text.  Harmless when fallback mode
wasn't actually armed or when the cursor isn't present.  If the strip
edit itself fails (flood still active), we return without crashing
and without corrupting _last_sent_text.

Adapted from PR #7429 onto current main — the surrounding fallback
block grew the #10807 stale-prefix handling since #7429 was written,
so the cursor strip lives in the new else-branch where we still
return early.

3 unit tests covering: cursor stripped on empty continuation, no edit
attempted when cursor is not configured, cursor-strip edit failure
handled without crash.

Originally proposed as PR #7429.
---
 gateway/stream_consumer.py            | 24 ++++++++
 tests/gateway/test_stream_consumer.py | 84 +++++++++++++++++++++++++++
 2 files changed, 108 insertions(+)

diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py
index 146715b1644..78e365712d9 100644
--- a/gateway/stream_consumer.py
+++ b/gateway/stream_consumer.py
@@ -571,6 +571,30 @@ class GatewayStreamConsumer:
             if final_text.strip() and final_text != self._visible_prefix():
                 continuation = final_text
             else:
+                # Defence-in-depth for #7183: the last edit may still show the
+                # cursor character because fallback mode was entered after an
+                # edit failure left it stuck.  Try one final edit to strip it
+                # so the message doesn't freeze with a visible ▉.  Best-effort
+                # — if this edit also fails (flood control still active),
+                # _try_strip_cursor has already been called on fallback entry
+                # and the adaptive-backoff retries will have had their shot.
+                if (
+                    self._message_id
+                    and self._last_sent_text
+                    and self.cfg.cursor
+                    and self._last_sent_text.endswith(self.cfg.cursor)
+                ):
+                    clean_text = self._last_sent_text[:-len(self.cfg.cursor)]
+                    try:
+                        result = await self.adapter.edit_message(
+                            chat_id=self.chat_id,
+                            message_id=self._message_id,
+                            content=clean_text,
+                        )
+                        if result.success:
+                            self._last_sent_text = clean_text
+                    except Exception:
+                        pass
                 self._already_sent = True
                 self._final_response_sent = True
                 return
diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py
index 3063196f414..0a0e0631db7 100644
--- a/tests/gateway/test_stream_consumer.py
+++ b/tests/gateway/test_stream_consumer.py
@@ -1216,3 +1216,87 @@ class TestBufferOnlyMode:
         # text, the consumer may send then edit, or just send once at got_done.
         # The key assertion: this doesn't break.
         assert adapter.send.call_count >= 1
+
+
+# ── Cursor stripping on fallback (#7183) ────────────────────────────────────
+
+
+class TestCursorStrippingOnFallback:
+    """Regression: cursor must be stripped when fallback continuation is empty (#7183).
+
+    When _send_fallback_final is called with nothing new to deliver (the visible
+    partial already matches final_text), the last edit may still show the cursor
+    character because fallback mode was entered after a failed edit.  Before the
+    fix this would leave the message permanently frozen with a visible ▉.
+    """
+
+    @pytest.mark.asyncio
+    async def test_cursor_stripped_when_continuation_empty(self):
+        """_send_fallback_final must attempt a final edit to strip the cursor."""
+        adapter = MagicMock()
+        adapter.MAX_MESSAGE_LENGTH = 4096
+        adapter.edit_message = AsyncMock(
+            return_value=SimpleNamespace(success=True, message_id="msg-1")
+        )
+
+        consumer = GatewayStreamConsumer(
+            adapter, "chat-1",
+            config=StreamConsumerConfig(cursor=" ▉"),
+        )
+        consumer._message_id = "msg-1"
+        consumer._last_sent_text = "Hello world ▉"
+        consumer._fallback_final_send = False
+
+        await consumer._send_fallback_final("Hello world")
+
+        adapter.edit_message.assert_called_once()
+        call_args = adapter.edit_message.call_args
+        assert call_args.kwargs["content"] == "Hello world"
+        assert consumer._already_sent is True
+        # _last_sent_text should reflect the cleaned text after a successful strip
+        assert consumer._last_sent_text == "Hello world"
+
+    @pytest.mark.asyncio
+    async def test_cursor_not_stripped_when_no_cursor_configured(self):
+        """No edit attempted when cursor is not configured."""
+        adapter = MagicMock()
+        adapter.MAX_MESSAGE_LENGTH = 4096
+        adapter.edit_message = AsyncMock()
+
+        consumer = GatewayStreamConsumer(
+            adapter, "chat-1",
+            config=StreamConsumerConfig(cursor=""),
+        )
+        consumer._message_id = "msg-1"
+        consumer._last_sent_text = "Hello world"
+        consumer._fallback_final_send = False
+
+        await consumer._send_fallback_final("Hello world")
+
+        adapter.edit_message.assert_not_called()
+        assert consumer._already_sent is True
+
+    @pytest.mark.asyncio
+    async def test_cursor_strip_edit_failure_handled(self):
+        """If the cursor-stripping edit itself fails, it must not crash and
+        must not corrupt _last_sent_text."""
+        adapter = MagicMock()
+        adapter.MAX_MESSAGE_LENGTH = 4096
+        adapter.edit_message = AsyncMock(
+            return_value=SimpleNamespace(success=False, error="flood_control")
+        )
+
+        consumer = GatewayStreamConsumer(
+            adapter, "chat-1",
+            config=StreamConsumerConfig(cursor=" ▉"),
+        )
+        consumer._message_id = "msg-1"
+        consumer._last_sent_text = "Hello ▉"
+        consumer._fallback_final_send = False
+
+        await consumer._send_fallback_final("Hello")
+
+        # Should still set already_sent despite the cursor-strip edit failure
+        assert consumer._already_sent is True
+        # _last_sent_text must NOT be updated when the edit failed
+        assert consumer._last_sent_text == "Hello ▉"

From 588333908c52b9eb372fdd2a411062f14d797094 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Wed, 8 Apr 2026 21:13:28 -0600
Subject: [PATCH 119/143] fix(telegram): warn on docker-only media paths

---
 gateway/platforms/telegram.py                 |  9 +++-
 gateway/run.py                                | 50 +++++++++++++++++++
 hermes_cli/config.py                          |  6 ++-
 tests/gateway/test_runner_startup_failures.py | 21 ++++++++
 tests/gateway/test_telegram_documents.py      | 13 +++++
 website/docs/user-guide/configuration.md      | 18 ++++++-
 website/docs/user-guide/messaging/telegram.md | 32 ++++++++++++
 7 files changed, 146 insertions(+), 3 deletions(-)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index f71614054c3..d1935c8090d 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -1759,7 +1759,14 @@ class TelegramAdapter(BasePlatformAdapter):
 
         try:
             if not os.path.exists(file_path):
-                return SendResult(success=False, error=f"File not found: {file_path}")
+                error = f"File not found: {file_path}"
+                if file_path.startswith(("/workspace/", "/output/")):
+                    error += (
+                        " (path may only exist inside the Docker sandbox. "
+                        "Bind-mount a host directory and emit the host-visible "
+                        "path in MEDIA: for gateway file delivery.)"
+                    )
+                return SendResult(success=False, error=error)
 
             display_name = file_name or os.path.basename(file_path)
             _thread = self._metadata_thread_id(metadata)
diff --git a/gateway/run.py b/gateway/run.py
index b72e95eb839..d7dcaf14516 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -96,6 +96,10 @@ from hermes_cli.env_loader import load_hermes_dotenv
 _env_path = _hermes_home / '.env'
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')
 
+
+_DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P<host>.+):(?P<container>/[^:]+?)(?::(?P<options>[^:]+))?$")
+_DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"}
+
 # Bridge config.yaml values into the environment so os.getenv() picks them up.
 # config.yaml is authoritative for terminal settings — overrides .env.
 _config_path = _hermes_home / 'config.yaml'
@@ -585,6 +589,7 @@ class GatewayRunner:
     def __init__(self, config: Optional[GatewayConfig] = None):
         self.config = config or load_gateway_config()
         self.adapters: Dict[Platform, BasePlatformAdapter] = {}
+        self._warn_if_docker_media_delivery_is_likely_misconfigured()
 
         # Load ephemeral config from config.yaml / env vars.
         # Both are injected at API-call time only and never persisted.
@@ -691,6 +696,51 @@ class GatewayRunner:
         self._background_tasks: set = set()
 
 
+    def _warn_if_docker_media_delivery_is_likely_misconfigured(self) -> None:
+        """Warn when Docker-backed gateway setups lack an obvious output bind mount.
+
+        MEDIA delivery happens in the gateway process, so paths emitted by the model
+        must be readable from the host. A plain container-local path like
+        `/workspace/report.txt` often exists only inside Docker.
+        """
+        if os.getenv("TERMINAL_ENV", "").strip().lower() != "docker":
+            return
+
+        connected = self.config.get_connected_platforms()
+        messaging_platforms = [p for p in connected if p not in {Platform.LOCAL, Platform.API_SERVER, Platform.WEBHOOK}]
+        if not messaging_platforms:
+            return
+
+        raw_volumes = os.getenv("TERMINAL_DOCKER_VOLUMES", "").strip()
+        volumes: List[str] = []
+        if raw_volumes:
+            try:
+                parsed = json.loads(raw_volumes)
+                if isinstance(parsed, list):
+                    volumes = [str(v) for v in parsed if isinstance(v, str)]
+            except Exception:
+                logger.debug("Could not parse TERMINAL_DOCKER_VOLUMES for gateway media warning", exc_info=True)
+
+        has_explicit_output_mount = False
+        for spec in volumes:
+            match = _DOCKER_VOLUME_SPEC_RE.match(spec)
+            if not match:
+                continue
+            container_path = match.group("container")
+            if container_path in _DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS:
+                has_explicit_output_mount = True
+                break
+
+        if has_explicit_output_mount:
+            return
+
+        logger.warning(
+            "Docker backend is enabled for the messaging gateway but no explicit host-visible "
+            "output mount (for example '/home/user/.hermes/cache/documents:/output') is configured. "
+            "MEDIA file delivery can fail for files that only exist inside the container, such as "
+            "'/workspace/...'."
+        )
+
 
 
     # -- Setup skill availability ----------------------------------------
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 1dedc1710aa..786ff622d93 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -403,7 +403,11 @@ DEFAULT_CONFIG = {
         "container_persistent": True,   # Persist filesystem across sessions
         # Docker volume mounts — share host directories with the container.
         # Each entry is "host_path:container_path" (standard Docker -v syntax).
-        # Example: ["/home/user/projects:/workspace/projects", "/data:/data"]
+        # Example:
+        # ["/home/user/projects:/workspace/projects",
+        #  "/home/user/.hermes/cache/documents:/output"]
+        # For gateway MEDIA delivery, write inside Docker to /output/... and emit
+        # the host-visible path in MEDIA:, not the container path.
         "docker_volumes": [],
         # Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
         # Default off because passing host directories into a sandbox weakens isolation.
diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py
index 977d66fb3be..ddcdd1aaa0f 100644
--- a/tests/gateway/test_runner_startup_failures.py
+++ b/tests/gateway/test_runner_startup_failures.py
@@ -107,6 +107,7 @@ async def test_runner_allows_cron_only_mode_when_no_platforms_are_enabled(monkey
     assert state["gateway_state"] == "running"
 
 
+<<<<<<< HEAD
 @pytest.mark.asyncio
 async def test_runner_records_connected_platform_state_on_success(monkeypatch, tmp_path):
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
@@ -319,3 +320,23 @@ async def test_start_gateway_replace_clears_marker_on_permission_denied(
     assert ok is False
     # Marker must NOT be left behind
     assert not (tmp_path / ".gateway-takeover.json").exists()
+
+
+def test_runner_warns_when_docker_gateway_lacks_explicit_output_mount(monkeypatch, tmp_path, caplog):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.setenv("TERMINAL_ENV", "docker")
+    monkeypatch.setenv("TERMINAL_DOCKER_VOLUMES", '["/etc/localtime:/etc/localtime:ro"]')
+    config = GatewayConfig(
+        platforms={
+            Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")
+        },
+        sessions_dir=tmp_path / "sessions",
+    )
+
+    with caplog.at_level("WARNING"):
+        GatewayRunner(config)
+
+    assert any(
+        "host-visible output mount" in record.message
+        for record in caplog.records
+    )
diff --git a/tests/gateway/test_telegram_documents.py b/tests/gateway/test_telegram_documents.py
index 86e5cb30fb0..2036f46a216 100644
--- a/tests/gateway/test_telegram_documents.py
+++ b/tests/gateway/test_telegram_documents.py
@@ -483,6 +483,19 @@ class TestSendDocument:
         assert "not found" in result.error.lower()
         connected_adapter._bot.send_document.assert_not_called()
 
+    @pytest.mark.asyncio
+    async def test_send_document_workspace_path_has_docker_hint(self, connected_adapter):
+        """Container-local-looking paths get a more actionable Docker hint."""
+        result = await connected_adapter.send_document(
+            chat_id="12345",
+            file_path="/workspace/report.txt",
+        )
+
+        assert result.success is False
+        assert "docker sandbox" in result.error.lower()
+        assert "host-visible path" in result.error.lower()
+        connected_adapter._bot.send_document.assert_not_called()
+
     @pytest.mark.asyncio
     async def test_send_document_not_connected(self, adapter):
         """If bot is None, returns not connected error."""
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index dbc6b0e47e6..f91a25c3843 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -257,7 +257,7 @@ terminal:
   docker_volumes:
     - "/home/user/projects:/workspace/projects"   # Read-write (default)
     - "/home/user/datasets:/data:ro"              # Read-only
-    - "/home/user/outputs:/outputs"               # Agent writes, you read
+    - "/home/user/.hermes/cache/documents:/output" # Gateway-visible exports
 ```
 
 This is useful for:
@@ -265,6 +265,22 @@ This is useful for:
 - **Receiving files** from the agent (generated code, reports, exports)
 - **Shared workspaces** where both you and the agent access the same files
 
+If you use a messaging gateway and want the agent to send generated files via
+`MEDIA:/...`, prefer a dedicated host-visible export mount such as
+`/home/user/.hermes/cache/documents:/output`.
+
+- Write files inside Docker to `/output/...`
+- Emit the **host path** in `MEDIA:`, for example:
+  `MEDIA:/home/user/.hermes/cache/documents/report.txt`
+- Do **not** emit `/workspace/...` or `/output/...` unless that exact path also
+  exists for the gateway process on the host
+
+:::warning
+YAML duplicate keys silently override earlier ones. If you already have a
+`docker_volumes:` block, merge new mounts into the same list instead of adding
+another `docker_volumes:` key later in the file.
+:::
+
 Can also be set via environment variable: `TERMINAL_DOCKER_VOLUMES='["/host:/container"]'` (JSON array).
 
 ### Docker Credential Forwarding
diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md
index 6dbf9e61dff..a92fc8d2232 100644
--- a/website/docs/user-guide/messaging/telegram.md
+++ b/website/docs/user-guide/messaging/telegram.md
@@ -112,6 +112,38 @@ hermes gateway
 
 The bot should come online within seconds. Send it a message on Telegram to verify.
 
+## Sending Generated Files from Docker-backed Terminals
+
+If your terminal backend is `docker`, keep in mind that Telegram attachments are
+sent by the **gateway process**, not from inside the container. That means the
+final `MEDIA:/...` path must be readable on the host where the gateway is
+running.
+
+Common pitfall:
+
+- the agent writes a file inside Docker to `/workspace/report.txt`
+- the model emits `MEDIA:/workspace/report.txt`
+- Telegram delivery fails because `/workspace/report.txt` only exists inside the
+  container, not on the host
+
+Recommended pattern:
+
+```yaml
+terminal:
+  backend: docker
+  docker_volumes:
+    - "/home/user/.hermes/cache/documents:/output"
+```
+
+Then:
+
+- write files inside Docker to `/output/...`
+- emit the **host-visible** path in `MEDIA:`, for example:
+  `MEDIA:/home/user/.hermes/cache/documents/report.txt`
+
+If you already have a `docker_volumes:` section, add the new mount to the same
+list. YAML duplicate keys silently override earlier ones.
+
 ## Webhook Mode
 
 By default, Hermes connects to Telegram using **long polling** — the gateway makes outbound requests to Telegram's servers to fetch new updates. This works well for local and always-on deployments.

From ff63e2e005ebbbfade9542437713b699624ed254 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sun, 19 Apr 2026 14:08:30 +0530
Subject: [PATCH 120/143] fix: tighten telegram docker-media salvage follow-ups

Follow-up on top of the helix4u #6392 cherry-pick:
- reuse one helper for actionable Docker-local file-not-found errors
  across document/image/video/audio local-media send paths
- include /outputs/... alongside /output/... in the container-local
  path hint
- soften the gateway startup warning so it does not imply custom
  host-visible mounts are broken; the warning now targets the specific
  risky pattern of emitting container-local MEDIA paths without an
  explicit export mount
- add focused regressions for /outputs/... and non-document media hint
  coverage

This keeps the salvage aligned with the actual MEDIA delivery problem on
current main while reducing false-positive operator messaging.
---
 gateway/platforms/telegram.py                 | 30 ++++++++++++-------
 gateway/run.py                                | 14 +++++----
 tests/gateway/test_runner_startup_failures.py |  1 -
 tests/gateway/test_telegram_documents.py      | 24 +++++++++++++++
 4 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index d1935c8090d..0b74c4e15f4 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -1657,6 +1657,21 @@ class TelegramAdapter(BasePlatformAdapter):
         except Exception as exc:
             logger.error("Failed to write update response from callback: %s", exc)
 
+    def _missing_media_path_error(self, label: str, path: str) -> str:
+        """Build an actionable file-not-found error for gateway MEDIA delivery.
+
+        Paths like /workspace/... or /output/... often only exist inside the
+        Docker sandbox, while the gateway process runs on the host.
+        """
+        error = f"{label} file not found: {path}"
+        if path.startswith(("/workspace/", "/output/", "/outputs/")):
+            error += (
+                " (path may only exist inside the Docker sandbox. "
+                "Bind-mount a host directory and emit the host-visible "
+                "path in MEDIA: for gateway file delivery.)"
+            )
+        return error
+
     async def send_voice(
         self,
         chat_id: str,
@@ -1673,7 +1688,7 @@ class TelegramAdapter(BasePlatformAdapter):
         try:
             import os
             if not os.path.exists(audio_path):
-                return SendResult(success=False, error=f"Audio file not found: {audio_path}")
+                return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
             
             with open(audio_path, "rb") as audio_file:
                 # .ogg files -> send as voice (round playable bubble)
@@ -1722,7 +1737,7 @@ class TelegramAdapter(BasePlatformAdapter):
         try:
             import os
             if not os.path.exists(image_path):
-                return SendResult(success=False, error=f"Image file not found: {image_path}")
+                return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))
 
             _thread = self._metadata_thread_id(metadata)
             with open(image_path, "rb") as image_file:
@@ -1759,14 +1774,7 @@ class TelegramAdapter(BasePlatformAdapter):
 
         try:
             if not os.path.exists(file_path):
-                error = f"File not found: {file_path}"
-                if file_path.startswith(("/workspace/", "/output/")):
-                    error += (
-                        " (path may only exist inside the Docker sandbox. "
-                        "Bind-mount a host directory and emit the host-visible "
-                        "path in MEDIA: for gateway file delivery.)"
-                    )
-                return SendResult(success=False, error=error)
+                return SendResult(success=False, error=self._missing_media_path_error("File", file_path))
 
             display_name = file_name or os.path.basename(file_path)
             _thread = self._metadata_thread_id(metadata)
@@ -1800,7 +1808,7 @@ class TelegramAdapter(BasePlatformAdapter):
 
         try:
             if not os.path.exists(video_path):
-                return SendResult(success=False, error=f"Video file not found: {video_path}")
+                return SendResult(success=False, error=self._missing_media_path_error("Video", video_path))
 
             _thread = self._metadata_thread_id(metadata)
             with open(video_path, "rb") as f:
diff --git a/gateway/run.py b/gateway/run.py
index d7dcaf14516..37b27232135 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -589,7 +589,7 @@ class GatewayRunner:
     def __init__(self, config: Optional[GatewayConfig] = None):
         self.config = config or load_gateway_config()
         self.adapters: Dict[Platform, BasePlatformAdapter] = {}
-        self._warn_if_docker_media_delivery_is_likely_misconfigured()
+        self._warn_if_docker_media_delivery_is_risky()
 
         # Load ephemeral config from config.yaml / env vars.
         # Both are injected at API-call time only and never persisted.
@@ -696,12 +696,14 @@ class GatewayRunner:
         self._background_tasks: set = set()
 
 
-    def _warn_if_docker_media_delivery_is_likely_misconfigured(self) -> None:
-        """Warn when Docker-backed gateway setups lack an obvious output bind mount.
+    def _warn_if_docker_media_delivery_is_risky(self) -> None:
+        """Warn when Docker-backed gateways lack an explicit export mount.
 
         MEDIA delivery happens in the gateway process, so paths emitted by the model
         must be readable from the host. A plain container-local path like
-        `/workspace/report.txt` often exists only inside Docker.
+        `/workspace/report.txt` or `/output/report.txt` often exists only inside
+        Docker, so users commonly need a dedicated export mount such as
+        `host-dir:/output`.
         """
         if os.getenv("TERMINAL_ENV", "").strip().lower() != "docker":
             return
@@ -737,8 +739,8 @@ class GatewayRunner:
         logger.warning(
             "Docker backend is enabled for the messaging gateway but no explicit host-visible "
             "output mount (for example '/home/user/.hermes/cache/documents:/output') is configured. "
-            "MEDIA file delivery can fail for files that only exist inside the container, such as "
-            "'/workspace/...'."
+            "This is fine if the model already emits host-visible paths, but MEDIA file delivery can fail "
+            "for container-local paths like '/workspace/...' or '/output/...'."
         )
 
 
diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py
index ddcdd1aaa0f..96d5d4627b0 100644
--- a/tests/gateway/test_runner_startup_failures.py
+++ b/tests/gateway/test_runner_startup_failures.py
@@ -107,7 +107,6 @@ async def test_runner_allows_cron_only_mode_when_no_platforms_are_enabled(monkey
     assert state["gateway_state"] == "running"
 
 
-<<<<<<< HEAD
 @pytest.mark.asyncio
 async def test_runner_records_connected_platform_state_on_success(monkeypatch, tmp_path):
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
diff --git a/tests/gateway/test_telegram_documents.py b/tests/gateway/test_telegram_documents.py
index 2036f46a216..3a68139fa99 100644
--- a/tests/gateway/test_telegram_documents.py
+++ b/tests/gateway/test_telegram_documents.py
@@ -496,6 +496,19 @@ class TestSendDocument:
         assert "host-visible path" in result.error.lower()
         connected_adapter._bot.send_document.assert_not_called()
 
+    @pytest.mark.asyncio
+    async def test_send_document_outputs_path_has_docker_hint(self, connected_adapter):
+        """Legacy /outputs paths also get the Docker hint."""
+        result = await connected_adapter.send_document(
+            chat_id="12345",
+            file_path="/outputs/report.txt",
+        )
+
+        assert result.success is False
+        assert "docker sandbox" in result.error.lower()
+        assert "host-visible path" in result.error.lower()
+        connected_adapter._bot.send_document.assert_not_called()
+
     @pytest.mark.asyncio
     async def test_send_document_not_connected(self, adapter):
         """If bot is None, returns not connected error."""
@@ -678,6 +691,17 @@ class TestSendVideo:
         assert result.success is False
         assert "not found" in result.error.lower()
 
+    @pytest.mark.asyncio
+    async def test_send_video_workspace_path_has_docker_hint(self, connected_adapter):
+        result = await connected_adapter.send_video(
+            chat_id="12345",
+            video_path="/workspace/video.mp4",
+        )
+
+        assert result.success is False
+        assert "docker sandbox" in result.error.lower()
+        assert "host-visible path" in result.error.lower()
+
     @pytest.mark.asyncio
     async def test_send_video_not_connected(self, adapter):
         result = await adapter.send_video(

From b05d30418d1acce913a1b9a768a3330cf63d8341 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Tue, 14 Apr 2026 00:09:43 -0600
Subject: [PATCH 121/143] docs: clarify profiles vs workspaces

---
 website/docs/reference/profile-commands.md |  4 +++
 website/docs/user-guide/profiles.md        | 42 +++++++++++++++++++---
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/website/docs/reference/profile-commands.md b/website/docs/reference/profile-commands.md
index 8c8feafb518..e4f28e83460 100644
--- a/website/docs/reference/profile-commands.md
+++ b/website/docs/reference/profile-commands.md
@@ -81,6 +81,8 @@ Creates a new profile.
 | `--clone-from <profile>` | Clone from a specific profile instead of the current one. Used with `--clone` or `--clone-all`. |
 | `--no-alias` | Skip wrapper script creation. |
 
+Creating a profile does **not** make that profile directory the default project/workspace directory for terminal commands. If you want a profile to start in a specific project, set `terminal.cwd` in that profile's `config.yaml`.
+
 **Examples:**
 
 ```bash
@@ -129,6 +131,8 @@ hermes profile show <name>
 
 Displays details about a profile including its home directory, configured model, gateway status, skills count, and configuration file status.
 
+This shows the profile's Hermes home directory, not the terminal working directory. Terminal commands start from `terminal.cwd` (or the launch directory on the local backend when `cwd: "."`).
+
 | Argument | Description |
 |----------|-------------|
 | `<name>` | Profile to inspect. |
diff --git a/website/docs/user-guide/profiles.md b/website/docs/user-guide/profiles.md
index 67609564f7e..aef4d10b215 100644
--- a/website/docs/user-guide/profiles.md
+++ b/website/docs/user-guide/profiles.md
@@ -4,11 +4,11 @@ sidebar_position: 2
 
 # Profiles: Running Multiple Agents
 
-Run multiple independent Hermes agents on the same machine — each with its own config, API keys, memory, sessions, skills, and gateway.
+Run multiple independent Hermes agents on the same machine — each with its own config, API keys, memory, sessions, skills, and gateway state.
 
 ## What are profiles?
 
-A profile is a fully isolated Hermes environment. Each profile gets its own directory containing its own `config.yaml`, `.env`, `SOUL.md`, memories, sessions, skills, cron jobs, and state database. Profiles let you run separate agents for different purposes — a coding assistant, a personal bot, a research agent — without any cross-contamination.
+A profile is a separate Hermes home directory. Each profile gets its own directory containing its own `config.yaml`, `.env`, `SOUL.md`, memories, sessions, skills, cron jobs, and state database. Profiles let you run separate agents for different purposes — a coding assistant, a personal bot, a research agent — without mixing up Hermes state.
 
 When you create a profile, it automatically becomes its own command. Create a profile called `coder` and you immediately have `coder chat`, `coder setup`, `coder gateway start`, etc.
 
@@ -20,7 +20,7 @@ coder setup                       # configure API keys and model
 coder chat                        # start chatting
 ```
 
-That's it. `coder` is now a fully independent agent. It has its own config, its own memory, its own everything.
+That's it. `coder` is now its own Hermes profile with its own config, memory, and state.
 
 ## Creating a profile
 
@@ -104,6 +104,32 @@ The CLI always shows which profile is active:
 - **Banner**: Shows `Profile: coder` on startup
 - **`hermes profile`**: Shows current profile name, path, model, gateway status
 
+## Profiles vs workspaces vs sandboxing
+
+Profiles are often confused with workspaces or sandboxes, but they are different things:
+
+- A **profile** gives Hermes its own state directory: `config.yaml`, `.env`, `SOUL.md`, sessions, memory, logs, cron jobs, and gateway state.
+- A **workspace** or **working directory** is where terminal commands start. That is controlled separately by `terminal.cwd`.
+- A **sandbox** is what limits filesystem access. Profiles do **not** sandbox the agent.
+
+On the default `local` terminal backend, the agent still has the same filesystem access as your user account. A profile does not stop it from accessing folders outside the profile directory.
+
+If you want a profile to start in a specific project folder, set an explicit absolute `terminal.cwd` in that profile's `config.yaml`:
+
+```yaml
+terminal:
+  backend: local
+  cwd: /absolute/path/to/project
+```
+
+Using `cwd: "."` on the local backend means "the directory Hermes was launched from", not "the profile directory".
+
+Also note:
+
+- `SOUL.md` can guide the model, but it does not enforce a workspace boundary.
+- Changes to `SOUL.md` take effect cleanly on a new session. Existing sessions may still be using the old prompt state.
+- Asking the model "what directory are you in?" is not a reliable isolation test. If you need a predictable starting directory for tools, set `terminal.cwd` explicitly.
+
 ## Running gateways
 
 Each profile runs its own gateway as a separate process with its own bot token:
@@ -151,6 +177,12 @@ coder config set model.model anthropic/claude-sonnet-4
 echo "You are a focused coding assistant." > ~/.hermes/profiles/coder/SOUL.md
 ```
 
+If you want this profile to work in a specific project by default, also set its own `terminal.cwd`:
+
+```bash
+coder config set terminal.cwd /absolute/path/to/project
+```
+
 ## Updating
 
 `hermes update` pulls code once (shared) and syncs new bundled skills to **all** profiles automatically:
@@ -201,6 +233,8 @@ Add the line to your `~/.bashrc` or `~/.zshrc` for persistent completion. Comple
 
 ## How it works
 
-Profiles use the `HERMES_HOME` environment variable. When you run `coder chat`, the wrapper script sets `HERMES_HOME=~/.hermes/profiles/coder` before launching hermes. Since 119+ files in the codebase resolve paths via `get_hermes_home()`, everything automatically scopes to the profile's directory — config, sessions, memory, skills, state database, gateway PID, logs, and cron jobs.
+Profiles use the `HERMES_HOME` environment variable. When you run `coder chat`, the wrapper script sets `HERMES_HOME=~/.hermes/profiles/coder` before launching hermes. Since 119+ files in the codebase resolve paths via `get_hermes_home()`, Hermes state automatically scopes to the profile's directory — config, sessions, memory, skills, state database, gateway PID, logs, and cron jobs.
+
+This is separate from terminal working directory. Tool execution starts from `terminal.cwd` (or the launch directory when `cwd: "."` on the local backend), not automatically from `HERMES_HOME`.
 
 The default profile is simply `~/.hermes` itself. No migration needed — existing installs work identically.

From 150382e8b79018f0967724ee10403409fdec0060 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Sat, 18 Apr 2026 21:21:55 -0600
Subject: [PATCH 122/143] fix(gateway): stop typing loops on session interrupt

---
 gateway/platforms/base.py                 |  38 ++++-
 gateway/run.py                            | 147 +++++++++++++++--
 tests/gateway/test_pending_event_none.py  |  32 +++-
 tests/gateway/test_run_progress_topics.py | 186 ++++++++++++++++++++++
 tests/gateway/test_session_race_guard.py  |  24 ++-
 tests/gateway/test_status_command.py      |  47 ++++++
 6 files changed, 456 insertions(+), 18 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 645a642ba1a..1f26ed854e1 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -1401,7 +1401,13 @@ class BasePlatformAdapter(ABC):
 
         return paths, cleaned
 
-    async def _keep_typing(self, chat_id: str, interval: float = 2.0, metadata=None) -> None:
+    async def _keep_typing(
+        self,
+        chat_id: str,
+        interval: float = 2.0,
+        metadata=None,
+        stop_event: asyncio.Event | None = None,
+    ) -> None:
         """
         Continuously send typing indicator until cancelled.
         
@@ -1415,9 +1421,18 @@ class BasePlatformAdapter(ABC):
         """
         try:
             while True:
+                if stop_event is not None and stop_event.is_set():
+                    return
                 if chat_id not in self._typing_paused:
                     await self.send_typing(chat_id, metadata=metadata)
-                await asyncio.sleep(interval)
+                if stop_event is None:
+                    await asyncio.sleep(interval)
+                    continue
+                try:
+                    await asyncio.wait_for(stop_event.wait(), timeout=interval)
+                except asyncio.TimeoutError:
+                    continue
+                return
         except asyncio.CancelledError:
             pass  # Normal cancellation when handler completes
         finally:
@@ -1444,6 +1459,17 @@ class BasePlatformAdapter(ABC):
         """Resume typing indicator for a chat after approval resolves."""
         self._typing_paused.discard(chat_id)
 
+    async def interrupt_session_activity(self, session_key: str, chat_id: str) -> None:
+        """Signal the active session loop to stop and clear typing immediately."""
+        if session_key:
+            interrupt_event = self._active_sessions.get(session_key)
+            if interrupt_event is not None:
+                interrupt_event.set()
+        try:
+            await self.stop_typing(chat_id)
+        except Exception:
+            pass
+
     # ── Processing lifecycle hooks ──────────────────────────────────────────
     # Subclasses override these to react to message processing events
     # (e.g. Discord adds 👀/✅/❌ reactions).
@@ -1717,7 +1743,13 @@ class BasePlatformAdapter(ABC):
         
         # Start continuous typing indicator (refreshes every 2 seconds)
         _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-        typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id, metadata=_thread_metadata))
+        typing_task = asyncio.create_task(
+            self._keep_typing(
+                event.source.chat_id,
+                metadata=_thread_metadata,
+                stop_event=interrupt_event,
+            )
+        )
         
         try:
             await self._run_processing_hook("on_processing_start", event)
diff --git a/gateway/run.py b/gateway/run.py
index 37b27232135..ed3b6b5ee32 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -402,6 +402,26 @@ def _dequeue_pending_event(adapter, session_key: str) -> MessageEvent | None:
     return adapter.get_pending_message(session_key)
 
 
+_CONTROL_INTERRUPT_MESSAGES = frozenset(
+    {
+        "stop requested",
+        "session reset requested",
+        "execution timed out (inactivity)",
+        "sse client disconnected",
+        "gateway shutting down",
+        "gateway restarting",
+    }
+)
+
+
+def _is_control_interrupt_message(message: Optional[str]) -> bool:
+    """Return True when an interrupt message is internal control flow."""
+    if not message:
+        return False
+    normalized = " ".join(str(message).strip().split()).lower()
+    return normalized in _CONTROL_INTERRUPT_MESSAGES
+
+
 def _check_unavailable_skill(command_name: str) -> str | None:
     """Check if a command matches a known-but-inactive skill.
 
@@ -630,6 +650,7 @@ class GatewayRunner:
         self._running_agents_ts: Dict[str, float] = {}  # start timestamp per session
         self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
         self._busy_ack_ts: Dict[str, float] = {}  # last busy-ack timestamp per session (debounce)
+        self._session_run_generation: Dict[str, int] = {}
 
         # Cache AIAgent instances per session to preserve prompt caching.
         # Without this, a new AIAgent is created per message, rebuilding the
@@ -3064,6 +3085,10 @@ class GatewayRunner:
                     _quick_key[:30], _stale_age, _stale_idle,
                     _raw_stale_timeout, _stale_detail,
                 )
+                self._invalidate_session_run_generation(
+                    _quick_key,
+                    reason="stale_running_agent_eviction",
+                )
                 self._release_running_agent_state(_quick_key)
 
         if _quick_key in self._running_agents:
@@ -3091,7 +3116,13 @@ class GatewayRunner:
                 if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
                     running_agent.interrupt("Stop requested")
                 # Force-clean: remove the session lock regardless of agent state
+                self._invalidate_session_run_generation(
+                    _quick_key,
+                    reason="stop_command",
+                )
                 adapter = self.adapters.get(source.platform)
+                if adapter and hasattr(adapter, "interrupt_session_activity"):
+                    await adapter.interrupt_session_activity(_quick_key, source.chat_id)
                 if adapter and hasattr(adapter, 'get_pending_message'):
                     adapter.get_pending_message(_quick_key)  # consume and discard
                 self._pending_messages.pop(_quick_key, None)
@@ -3111,7 +3142,13 @@ class GatewayRunner:
                 if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
                     running_agent.interrupt("Session reset requested")
                 # Clear any pending messages so the old text doesn't replay
+                self._invalidate_session_run_generation(
+                    _quick_key,
+                    reason="new_command",
+                )
                 adapter = self.adapters.get(source.platform)
+                if adapter and hasattr(adapter, "interrupt_session_activity"):
+                    await adapter.interrupt_session_activity(_quick_key, source.chat_id)
                 if adapter and hasattr(adapter, 'get_pending_message'):
                     adapter.get_pending_message(_quick_key)  # consume and discard
                 self._pending_messages.pop(_quick_key, None)
@@ -3598,9 +3635,10 @@ class GatewayRunner:
         # same session — corrupting the transcript.
         self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL
         self._running_agents_ts[_quick_key] = time.time()
+        _run_generation = self._begin_session_run_generation(_quick_key)
 
         try:
-            return await self._handle_message_with_agent(event, source, _quick_key)
+            return await self._handle_message_with_agent(event, source, _quick_key, _run_generation)
         finally:
             # If _run_agent replaced the sentinel with a real agent and
             # then cleaned it up, this is a no-op.  If we exited early
@@ -3771,7 +3809,7 @@ class GatewayRunner:
 
         return message_text
 
-    async def _handle_message_with_agent(self, event, source, _quick_key: str):
+    async def _handle_message_with_agent(self, event, source, _quick_key: str, run_generation: int):
         """Inner handler that runs under the _running_agents sentinel guard."""
         _msg_start_time = time.time()
         _platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform)
@@ -4246,6 +4284,7 @@ class GatewayRunner:
                 source=source,
                 session_id=session_entry.session_id,
                 session_key=session_key,
+                run_generation=run_generation,
                 event_message_id=event.message_id,
                 channel_prompt=event.channel_prompt,
             )
@@ -4258,6 +4297,17 @@ class GatewayRunner:
             except Exception:
                 pass
 
+            if not self._is_session_run_current(_quick_key, run_generation):
+                logger.info(
+                    "Discarding stale agent result for %s — generation %d is no longer current",
+                    _quick_key[:20] if _quick_key else "?",
+                    run_generation,
+                )
+                _stale_adapter = self.adapters.get(source.platform)
+                if _stale_adapter and hasattr(_stale_adapter, "_post_delivery_callbacks"):
+                    _stale_adapter._post_delivery_callbacks.pop(_quick_key, None)
+                return None
+
             response = agent_result.get("final_response") or ""
 
             # Convert the agent's internal "(empty)" sentinel into a
@@ -4672,6 +4722,7 @@ class GatewayRunner:
         
         # Get existing session key
         session_key = self._session_key_for_source(source)
+        self._invalidate_session_run_generation(session_key, reason="session_reset")
         
         # Flush memories in the background (fire-and-forget) so the user
         # gets the "Session reset!" response immediately.
@@ -4931,6 +4982,10 @@ class GatewayRunner:
         agent = self._running_agents.get(session_key)
         if agent is _AGENT_PENDING_SENTINEL:
             # Force-clean the sentinel so the session is unlocked.
+            self._invalidate_session_run_generation(session_key, reason="stop_command_pending")
+            adapter = self.adapters.get(source.platform)
+            if adapter and hasattr(adapter, "interrupt_session_activity"):
+                await adapter.interrupt_session_activity(session_key, source.chat_id)
             self._release_running_agent_state(session_key)
             logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
             return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
@@ -4938,6 +4993,10 @@ class GatewayRunner:
             agent.interrupt("Stop requested")
             # Force-clean the session lock so a truly hung agent doesn't
             # keep it locked forever.
+            self._invalidate_session_run_generation(session_key, reason="stop_command_handler")
+            adapter = self.adapters.get(source.platform)
+            if adapter and hasattr(adapter, "interrupt_session_activity"):
+                await adapter.interrupt_session_activity(session_key, source.chat_id)
             self._release_running_agent_state(session_key)
             return "⚡ Stopped. You can continue this session."
         else:
@@ -8385,6 +8444,43 @@ class GatewayRunner:
         if hasattr(self, "_busy_ack_ts"):
             self._busy_ack_ts.pop(session_key, None)
 
+    def _begin_session_run_generation(self, session_key: str) -> int:
+        """Claim a fresh run generation token for ``session_key``.
+
+        Every top-level gateway turn gets a monotonically increasing token.
+        If a later command like /stop or /new invalidates that token while the
+        old worker is still unwinding, the late result can be recognized and
+        dropped instead of bleeding into the fresh session.
+        """
+        if not session_key:
+            return 0
+        generations = self.__dict__.get("_session_run_generation")
+        if generations is None:
+            generations = {}
+            self._session_run_generation = generations
+        next_generation = int(generations.get(session_key, 0)) + 1
+        generations[session_key] = next_generation
+        return next_generation
+
+    def _invalidate_session_run_generation(self, session_key: str, *, reason: str = "") -> int:
+        """Invalidate any in-flight run token for ``session_key``."""
+        generation = self._begin_session_run_generation(session_key)
+        if reason:
+            logger.info(
+                "Invalidated run generation for %s → %d (%s)",
+                session_key[:20],
+                generation,
+                reason,
+            )
+        return generation
+
+    def _is_session_run_current(self, session_key: str, generation: int) -> bool:
+        """Return True when ``generation`` is still current for ``session_key``."""
+        if not session_key:
+            return True
+        generations = self.__dict__.get("_session_run_generation") or {}
+        return int(generations.get(session_key, 0)) == int(generation)
+
     def _evict_cached_agent(self, session_key: str) -> None:
         """Remove a cached agent for a session (called on /new, /model, etc)."""
         _lock = getattr(self, "_agent_cache_lock", None)
@@ -8807,6 +8903,7 @@ class GatewayRunner:
         source: SessionSource,
         session_id: str,
         session_key: str = None,
+        run_generation: Optional[int] = None,
         _interrupt_depth: int = 0,
         event_message_id: Optional[str] = None,
         channel_prompt: Optional[str] = None,
@@ -8837,6 +8934,11 @@ class GatewayRunner:
 
         from run_agent import AIAgent
         import queue
+
+        def _run_still_current() -> bool:
+            if run_generation is None or not session_key:
+                return True
+            return self._is_session_run_current(session_key, run_generation)
         
         user_config = _load_gateway_config()
         platform_key = _platform_config_key(source.platform)
@@ -8891,7 +8993,7 @@ class GatewayRunner:
         
         def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs):
             """Callback invoked by agent on tool lifecycle events."""
-            if not progress_queue:
+            if not progress_queue or not _run_still_current():
                 return
 
             # Only act on tool.started events (ignore tool.completed, reasoning.available, etc.)
@@ -8996,6 +9098,14 @@ class GatewayRunner:
 
             while True:
                 try:
+                    if not _run_still_current():
+                        while not progress_queue.empty():
+                            try:
+                                progress_queue.get_nowait()
+                            except Exception:
+                                break
+                        return
+
                     raw = progress_queue.get_nowait()
 
                     # Handle dedup messages: update last line with repeat counter
@@ -9021,6 +9131,9 @@ class GatewayRunner:
                         await asyncio.sleep(_remaining)
                         continue
 
+                    if not _run_still_current():
+                        return
+
                     if can_edit and progress_msg_id is not None:
                         # Try to edit the existing progress message
                         full_text = "\n".join(progress_lines)
@@ -9056,7 +9169,8 @@ class GatewayRunner:
 
                     # Restore typing indicator
                     await asyncio.sleep(0.3)
-                    await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
+                    if _run_still_current():
+                        await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
 
                 except queue.Empty:
                     await asyncio.sleep(0.3)
@@ -9100,6 +9214,8 @@ class GatewayRunner:
         _hooks_ref = self.hooks
 
         def _step_callback_sync(iteration: int, prev_tools: list) -> None:
+            if not _run_still_current():
+                return
             try:
                 # prev_tools may be list[str] or list[dict] with "name"/"result"
                 # keys.  Normalise to keep "tool_names" backward-compatible for
@@ -9130,7 +9246,7 @@ class GatewayRunner:
         _status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None
 
         def _status_callback_sync(event_type: str, message: str) -> None:
-            if not _status_adapter:
+            if not _status_adapter or not _run_still_current():
                 return
             try:
                 asyncio.run_coroutine_threadsafe(
@@ -9261,12 +9377,16 @@ class GatewayRunner:
                             metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None,
                         )
                         if _want_stream_deltas:
-                            _stream_delta_cb = _stream_consumer.on_delta
+                            def _stream_delta_cb(text: str) -> None:
+                                if _run_still_current():
+                                    _stream_consumer.on_delta(text)
                         stream_consumer_holder[0] = _stream_consumer
                 except Exception as _sc_err:
                     logger.debug("Could not set up stream consumer: %s", _sc_err)
 
             def _interim_assistant_cb(text: str, *, already_streamed: bool = False) -> None:
+                if not _run_still_current():
+                    return
                 if _stream_consumer is not None:
                     if already_streamed:
                         _stream_consumer.on_segment_break()
@@ -9370,7 +9490,7 @@ class GatewayRunner:
             _bg_review_pending_lock = threading.Lock()
 
             def _deliver_bg_review_message(message: str) -> None:
-                if not _status_adapter:
+                if not _status_adapter or not _run_still_current():
                     return
                 try:
                     asyncio.run_coroutine_threadsafe(
@@ -9394,7 +9514,7 @@ class GatewayRunner:
 
             # Background review delivery — send "💾 Memory updated" etc. to user
             def _bg_review_send(message: str) -> None:
-                if not _status_adapter:
+                if not _status_adapter or not _run_still_current():
                     return
                 if not _bg_review_release.is_set():
                     with _bg_review_pending_lock:
@@ -10076,7 +10196,15 @@ class GatewayRunner:
             if result and adapter and session_key:
                 pending_event = _dequeue_pending_event(adapter, session_key)
                 if result.get("interrupted") and not pending_event and result.get("interrupt_message"):
-                    pending = result.get("interrupt_message")
+                    interrupt_message = result.get("interrupt_message")
+                    if _is_control_interrupt_message(interrupt_message):
+                        logger.info(
+                            "Ignoring control interrupt message for session %s: %s",
+                            session_key[:20] if session_key else "?",
+                            interrupt_message,
+                        )
+                    else:
+                        pending = interrupt_message
                 elif pending_event:
                     pending = pending_event.text or _build_media_placeholder(pending_event)
                     logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
@@ -10229,6 +10357,7 @@ class GatewayRunner:
                     source=next_source,
                     session_id=session_id,
                     session_key=session_key,
+                    run_generation=run_generation,
                     _interrupt_depth=_interrupt_depth + 1,
                     event_message_id=next_message_id,
                     channel_prompt=next_channel_prompt,
diff --git a/tests/gateway/test_pending_event_none.py b/tests/gateway/test_pending_event_none.py
index b2e1356fa14..e717c88296e 100644
--- a/tests/gateway/test_pending_event_none.py
+++ b/tests/gateway/test_pending_event_none.py
@@ -1,13 +1,18 @@
-"""Tests for the pending_event None guard in recursive _run_agent calls.
+"""Tests for pending follow-up extraction in recursive _run_agent calls.
 
 When pending_event is None (Path B: pending comes from interrupt_message),
 accessing pending_event.channel_prompt previously raised AttributeError.
 This verifies the fix: channel_prompt is captured inside the
 `if pending_event is not None:` block and falls back to None otherwise.
+
+Also verifies that internal control interrupt reasons like "Stop requested"
+do not get recycled into the pending-user-message follow-up path.
 """
 
 from types import SimpleNamespace
 
+from gateway.run import _is_control_interrupt_message
+
 
 def _extract_channel_prompt(pending_event):
     """Reproduce the fixed logic from gateway/run.py.
@@ -21,6 +26,15 @@ def _extract_channel_prompt(pending_event):
     return next_channel_prompt
 
 
+def _extract_pending_text(interrupted, pending_event, interrupt_message):
+    """Reproduce the fixed pending-text selection from gateway/run.py."""
+    if interrupted and pending_event is None and interrupt_message:
+        if _is_control_interrupt_message(interrupt_message):
+            return None
+        return interrupt_message
+    return None
+
+
 class TestPendingEventNoneChannelPrompt:
     """Guard against AttributeError when pending_event is None."""
 
@@ -40,3 +54,19 @@ class TestPendingEventNoneChannelPrompt:
         event = SimpleNamespace()
         result = _extract_channel_prompt(event)
         assert result is None
+
+
+class TestControlInterruptMessages:
+    """Control interrupt reasons must not become follow-up user input."""
+
+    def test_stop_requested_is_not_treated_as_pending_user_message(self):
+        result = _extract_pending_text(True, None, "Stop requested")
+        assert result is None
+
+    def test_session_reset_requested_is_not_treated_as_pending_user_message(self):
+        result = _extract_pending_text(True, None, "Session reset requested")
+        assert result is None
+
+    def test_real_user_interrupt_message_still_requeues(self):
+        result = _extract_pending_text(True, None, "actually use postgres instead")
+        assert result == "actually use postgres instead"
diff --git a/tests/gateway/test_run_progress_topics.py b/tests/gateway/test_run_progress_topics.py
index 4878f2faec8..59e9fa0408d 100644
--- a/tests/gateway/test_run_progress_topics.py
+++ b/tests/gateway/test_run_progress_topics.py
@@ -51,6 +51,9 @@ class ProgressCaptureAdapter(BasePlatformAdapter):
     async def send_typing(self, chat_id, metadata=None) -> None:
         self.typing.append({"chat_id": chat_id, "metadata": metadata})
 
+    async def stop_typing(self, chat_id) -> None:
+        self.typing.append({"chat_id": chat_id, "metadata": {"stopped": True}})
+
     async def get_chat_info(self, chat_id: str):
         return {"id": chat_id}
 
@@ -90,6 +93,40 @@ class LongPreviewAgent:
         }
 
 
+class DelayedProgressAgent:
+    def __init__(self, **kwargs):
+        self.tool_progress_callback = kwargs.get("tool_progress_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        self.tool_progress_callback("tool.started", "terminal", "first command", {})
+        time.sleep(0.45)
+        self.tool_progress_callback("tool.started", "terminal", "second command", {})
+        time.sleep(0.1)
+        return {
+            "final_response": "done",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
+class DelayedInterimAgent:
+    def __init__(self, **kwargs):
+        self.interim_assistant_callback = kwargs.get("interim_assistant_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        self.interim_assistant_callback("first interim")
+        time.sleep(0.45)
+        self.interim_assistant_callback("second interim")
+        time.sleep(0.1)
+        return {
+            "final_response": "done",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
 def _make_runner(adapter):
     gateway_run = importlib.import_module("gateway.run")
     GatewayRunner = gateway_run.GatewayRunner
@@ -104,6 +141,7 @@ def _make_runner(adapter):
     runner._fallback_model = None
     runner._session_db = None
     runner._running_agents = {}
+    runner._session_run_generation = {}
     runner.hooks = SimpleNamespace(loaded_hooks=False)
     runner.config = SimpleNamespace(
         thread_sessions_per_user=False,
@@ -744,6 +782,154 @@ async def test_base_processing_releases_post_delivery_callback_after_main_send()
     assert released == [True]
 
 
+@pytest.mark.asyncio
+async def test_run_agent_drops_tool_progress_after_generation_invalidation(monkeypatch, tmp_path):
+    import yaml
+
+    (tmp_path / "config.yaml").write_text(
+        yaml.dump({"display": {"tool_progress": "all"}}),
+        encoding="utf-8",
+    )
+
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = DelayedProgressAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+    import tools.terminal_tool  # noqa: F401 - register terminal tool metadata
+
+    adapter = ProgressCaptureAdapter(platform=Platform.DISCORD)
+    runner = _make_runner(adapter)
+    gateway_run = importlib.import_module("gateway.run")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+
+    source = SessionSource(
+        platform=Platform.DISCORD,
+        chat_id="dm-1",
+        chat_type="dm",
+        thread_id=None,
+    )
+    session_key = "agent:main:discord:dm:dm-1"
+    runner._session_run_generation[session_key] = 1
+
+    original_send = adapter.send
+    invalidated = {"done": False}
+
+    async def send_and_invalidate(chat_id, content, reply_to=None, metadata=None):
+        result = await original_send(chat_id, content, reply_to=reply_to, metadata=metadata)
+        if "first command" in content and not invalidated["done"]:
+            invalidated["done"] = True
+            runner._invalidate_session_run_generation(session_key, reason="test_stop")
+        return result
+
+    adapter.send = send_and_invalidate
+
+    result = await runner._run_agent(
+        message="hello",
+        context_prompt="",
+        history=[],
+        source=source,
+        session_id="sess-progress-stop",
+        session_key=session_key,
+        run_generation=1,
+    )
+
+    all_progress_text = " ".join(call["content"] for call in adapter.sent)
+    all_progress_text += " ".join(call["content"] for call in adapter.edits)
+    assert result["final_response"] == "done"
+    assert 'first command' in all_progress_text
+    assert 'second command' not in all_progress_text
+
+
+@pytest.mark.asyncio
+async def test_run_agent_drops_interim_commentary_after_generation_invalidation(monkeypatch, tmp_path):
+    import yaml
+
+    (tmp_path / "config.yaml").write_text(
+        yaml.dump({"display": {"tool_progress": "off", "interim_assistant_messages": True}}),
+        encoding="utf-8",
+    )
+
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = DelayedInterimAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    adapter = ProgressCaptureAdapter(platform=Platform.DISCORD)
+    runner = _make_runner(adapter)
+    gateway_run = importlib.import_module("gateway.run")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+
+    source = SessionSource(
+        platform=Platform.DISCORD,
+        chat_id="dm-2",
+        chat_type="dm",
+        thread_id=None,
+    )
+    session_key = "agent:main:discord:dm:dm-2"
+    runner._session_run_generation[session_key] = 1
+
+    original_send = adapter.send
+    invalidated = {"done": False}
+
+    async def send_and_invalidate(chat_id, content, reply_to=None, metadata=None):
+        result = await original_send(chat_id, content, reply_to=reply_to, metadata=metadata)
+        if content == "first interim" and not invalidated["done"]:
+            invalidated["done"] = True
+            runner._invalidate_session_run_generation(session_key, reason="test_stop")
+        return result
+
+    adapter.send = send_and_invalidate
+
+    result = await runner._run_agent(
+        message="hello",
+        context_prompt="",
+        history=[],
+        source=source,
+        session_id="sess-commentary-stop",
+        session_key=session_key,
+        run_generation=1,
+    )
+
+    sent_texts = [call["content"] for call in adapter.sent]
+    assert result["final_response"] == "done"
+    assert "first interim" in sent_texts
+    assert "second interim" not in sent_texts
+
+
+@pytest.mark.asyncio
+async def test_keep_typing_stops_immediately_when_interrupt_event_is_set():
+    adapter = ProgressCaptureAdapter(platform=Platform.DISCORD)
+    stop_event = asyncio.Event()
+
+    task = asyncio.create_task(
+        adapter._keep_typing(
+            "dm-typing-stop",
+            interval=30.0,
+            stop_event=stop_event,
+        )
+    )
+    await asyncio.sleep(0.05)
+    stop_event.set()
+    await asyncio.wait_for(task, timeout=0.5)
+
+    normal_typing_calls = [
+        call for call in adapter.typing if call.get("metadata") != {"stopped": True}
+    ]
+    stopped_calls = [
+        call for call in adapter.typing if call.get("metadata") == {"stopped": True}
+    ]
+    assert len(normal_typing_calls) == 1
+    assert len(stopped_calls) == 1
+
+
 @pytest.mark.asyncio
 async def test_verbose_mode_does_not_truncate_args_by_default(monkeypatch, tmp_path):
     """Verbose mode with default tool_preview_length (0) should NOT truncate args.
diff --git a/tests/gateway/test_session_race_guard.py b/tests/gateway/test_session_race_guard.py
index 8c26abec590..fe1ef011a37 100644
--- a/tests/gateway/test_session_race_guard.py
+++ b/tests/gateway/test_session_race_guard.py
@@ -24,10 +24,18 @@ class _FakeAdapter:
 
     def __init__(self):
         self._pending_messages = {}
+        self._active_sessions = {}
+        self.interrupted_sessions = []
 
     async def send(self, chat_id, text, **kwargs):
         pass
 
+    async def interrupt_session_activity(self, session_key, chat_id):
+        self.interrupted_sessions.append((session_key, chat_id))
+        event = self._active_sessions.get(session_key)
+        if event is not None:
+            event.set()
+
 
 def _make_runner():
     runner = object.__new__(GatewayRunner)
@@ -37,6 +45,7 @@ def _make_runner():
     runner.adapters = {Platform.TELEGRAM: _FakeAdapter()}
     runner._running_agents = {}
     runner._running_agents_ts = {}
+    runner._session_run_generation = {}
     runner._pending_messages = {}
     runner._pending_approvals = {}
     runner._voice_mode = {}
@@ -81,7 +90,7 @@ async def test_sentinel_placed_before_agent_setup():
     # Patch _handle_message_with_agent to capture state at entry
     sentinel_was_set = False
 
-    async def mock_inner(self_inner, ev, src, qk):
+    async def mock_inner(self_inner, ev, src, qk, generation):
         nonlocal sentinel_was_set
         sentinel_was_set = runner._running_agents.get(qk) is _AGENT_PENDING_SENTINEL
         return "ok"
@@ -105,7 +114,7 @@ async def test_sentinel_cleaned_up_after_handler_returns():
     event = _make_event()
     session_key = build_session_key(event.source)
 
-    async def mock_inner(self_inner, ev, src, qk):
+    async def mock_inner(self_inner, ev, src, qk, generation):
         return "ok"
 
     with patch.object(GatewayRunner, "_handle_message_with_agent", mock_inner):
@@ -127,7 +136,7 @@ async def test_sentinel_cleaned_up_on_exception():
     event = _make_event()
     session_key = build_session_key(event.source)
 
-    async def mock_inner(self_inner, ev, src, qk):
+    async def mock_inner(self_inner, ev, src, qk, generation):
         raise RuntimeError("boom")
 
     with patch.object(GatewayRunner, "_handle_message_with_agent", mock_inner):
@@ -154,7 +163,7 @@ async def test_second_message_during_sentinel_queued_not_duplicate():
 
     barrier = asyncio.Event()
 
-    async def slow_inner(self_inner, ev, src, qk):
+    async def slow_inner(self_inner, ev, src, qk, generation):
         # Simulate slow setup — wait until test tells us to proceed
         await barrier.wait()
         return "ok"
@@ -333,7 +342,7 @@ async def test_stop_during_sentinel_force_cleans_session():
 
     barrier = asyncio.Event()
 
-    async def slow_inner(self_inner, ev, src, qk):
+    async def slow_inner(self_inner, ev, src, qk, generation):
         await barrier.wait()
         return "ok"
 
@@ -381,6 +390,7 @@ async def test_stop_hard_kills_running_agent():
     fake_agent = MagicMock()
     fake_agent.get_activity_summary.return_value = {"seconds_since_activity": 0}
     runner._running_agents[session_key] = fake_agent
+    runner.adapters[Platform.TELEGRAM]._active_sessions[session_key] = asyncio.Event()
 
     # Send /stop
     stop_event = _make_event(text="/stop")
@@ -393,6 +403,10 @@ async def test_stop_hard_kills_running_agent():
     assert session_key not in runner._running_agents, (
         "/stop must remove the agent from _running_agents so the session is unlocked"
     )
+    assert runner.adapters[Platform.TELEGRAM].interrupted_sessions == [
+        (session_key, "12345")
+    ]
+    assert runner.adapters[Platform.TELEGRAM]._active_sessions[session_key].is_set()
 
     # Must return a confirmation
     assert result is not None
diff --git a/tests/gateway/test_status_command.py b/tests/gateway/test_status_command.py
index c4a64f30ab1..3cdf637dd97 100644
--- a/tests/gateway/test_status_command.py
+++ b/tests/gateway/test_status_command.py
@@ -50,6 +50,7 @@ def _make_runner(session_entry: SessionEntry):
     runner.session_store.rewrite_transcript = MagicMock()
     runner.session_store.update_session = MagicMock()
     runner._running_agents = {}
+    runner._session_run_generation = {}
     runner._pending_messages = {}
     runner._pending_approvals = {}
     runner._session_db = MagicMock()
@@ -223,6 +224,52 @@ async def test_handle_message_persists_agent_token_counts(monkeypatch):
     )
 
 
+@pytest.mark.asyncio
+async def test_handle_message_discards_stale_result_after_session_invalidation(monkeypatch):
+    import gateway.run as gateway_run
+
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner = _make_runner(session_entry)
+    runner.session_store.load_transcript.return_value = [{"role": "user", "content": "earlier"}]
+    session_key = session_entry.session_key
+    runner.adapters[Platform.TELEGRAM]._post_delivery_callbacks = {session_key: object()}
+
+    async def _stale_result(**kwargs):
+        runner._invalidate_session_run_generation(kwargs["session_key"], reason="test_stale_result")
+        return {
+            "final_response": "late reply",
+            "messages": [],
+            "tools": [],
+            "history_offset": 0,
+            "last_prompt_tokens": 80,
+            "input_tokens": 120,
+            "output_tokens": 45,
+            "model": "openai/test-model",
+        }
+
+    runner._run_agent = AsyncMock(side_effect=_stale_result)
+
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+    monkeypatch.setattr(
+        "agent.model_metadata.get_model_context_length",
+        lambda *_args, **_kwargs: 100000,
+    )
+
+    result = await runner._handle_message(_make_event("hello"))
+
+    assert result is None
+    runner.session_store.append_to_transcript.assert_not_called()
+    runner.session_store.update_session.assert_not_called()
+    assert session_key not in runner.adapters[Platform.TELEGRAM]._post_delivery_callbacks
+
+
 
 @pytest.mark.asyncio
 async def test_status_command_bypasses_active_session_guard():

From 8466268ca58fe1422cadcb6b134b18bc0860a597 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Sat, 18 Apr 2026 21:32:49 -0600
Subject: [PATCH 123/143] fix(gateway): keep typing loop overrides
 backward-compatible

---
 gateway/platforms/base.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 1f26ed854e1..dc0f22d2a3c 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -6,6 +6,7 @@ and implement the required methods.
 """
 
 import asyncio
+import inspect
 import ipaddress
 import logging
 import os
@@ -1743,11 +1744,17 @@ class BasePlatformAdapter(ABC):
         
         # Start continuous typing indicator (refreshes every 2 seconds)
         _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
+        _keep_typing_kwargs = {"metadata": _thread_metadata}
+        try:
+            _keep_typing_sig = inspect.signature(self._keep_typing)
+        except (TypeError, ValueError):
+            _keep_typing_sig = None
+        if _keep_typing_sig is None or "stop_event" in _keep_typing_sig.parameters:
+            _keep_typing_kwargs["stop_event"] = interrupt_event
         typing_task = asyncio.create_task(
             self._keep_typing(
                 event.source.chat_id,
-                metadata=_thread_metadata,
-                stop_event=interrupt_event,
+                **_keep_typing_kwargs,
             )
         )
         

From 4b6ff0eb7fa287695fa147e7c7622dae4ca5dd51 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sun, 19 Apr 2026 15:05:14 +0530
Subject: [PATCH 124/143] fix: tighten gateway interrupt salvage follow-ups

Follow-up on top of the helix4u #12388 cherry-picks:
- make deferred post-delivery callbacks generation-aware end-to-end so
  stale runs cannot clear callbacks registered by a fresher run for the
  same session
- bind callback ownership to the active session event at run start and
  snapshot that generation inside base adapter processing so later event
  mutation cannot retarget cleanup
- pass run_generation through proxy mode and drop stale proxy streams /
  final results the same way local runs are dropped
- centralize stop/new interrupt cleanup into one helper and replace the
  open-coded branches with shared logic
- unify internal control interrupt reason strings via shared constants
- remove the return from base.py's finally block so cleanup no longer
  swallows cancellation/exception flow
- add focused regressions for generation forwarding, proxy stale
  suppression, and newer-callback preservation

This addresses all review findings from the initial #12388 review while
keeping the fix scoped to stale-output/typing-loop interrupt handling.
---
 gateway/platforms/base.py            |  69 ++++++++--
 gateway/run.py                       | 198 ++++++++++++++++++++-------
 tests/gateway/test_proxy_mode.py     |  37 +++++
 tests/gateway/test_status_command.py |  69 ++++++++++
 4 files changed, 315 insertions(+), 58 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index dc0f22d2a3c..2b8536062c2 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -881,10 +881,11 @@ class BasePlatformAdapter(ABC):
         # working on a task after --replace or manual restarts.
         self._background_tasks: set[asyncio.Task] = set()
         # One-shot callbacks to fire after the main response is delivered.
-        # Keyed by session_key.  GatewayRunner uses this to defer
-        # background-review notifications ("💾 Skill created") until the
-        # primary reply has been sent.
-        self._post_delivery_callbacks: Dict[str, Callable] = {}
+        # Keyed by session_key. Values are either a bare callback (legacy) or
+        # a ``(generation, callback)`` tuple so GatewayRunner can make deferred
+        # deliveries generation-aware and avoid stale runs clearing callbacks
+        # registered by a fresher run for the same session.
+        self._post_delivery_callbacks: Dict[str, Any] = {}
         self._expected_cancelled_tasks: set[asyncio.Task] = set()
         self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
         # Chats where auto-TTS on voice input is disabled (set by /voice off)
@@ -1471,6 +1472,48 @@ class BasePlatformAdapter(ABC):
         except Exception:
             pass
 
+    def register_post_delivery_callback(
+        self,
+        session_key: str,
+        callback: Callable,
+        *,
+        generation: int | None = None,
+    ) -> None:
+        """Register a deferred callback to fire after the main response.
+
+        ``generation`` lets callers tie the callback to a specific gateway run
+        generation so stale runs cannot clear callbacks owned by a fresher run.
+        """
+        if not session_key or not callable(callback):
+            return
+        if generation is None:
+            self._post_delivery_callbacks[session_key] = callback
+        else:
+            self._post_delivery_callbacks[session_key] = (int(generation), callback)
+
+    def pop_post_delivery_callback(
+        self,
+        session_key: str,
+        *,
+        generation: int | None = None,
+    ) -> Callable | None:
+        """Pop a deferred callback, optionally requiring generation ownership."""
+        if not session_key:
+            return None
+        entry = self._post_delivery_callbacks.get(session_key)
+        if entry is None:
+            return None
+        if isinstance(entry, tuple) and len(entry) == 2:
+            entry_generation, callback = entry
+            if generation is not None and int(entry_generation) != int(generation):
+                return None
+            self._post_delivery_callbacks.pop(session_key, None)
+            return callback if callable(callback) else None
+        if generation is not None:
+            return None
+        self._post_delivery_callbacks.pop(session_key, None)
+        return entry if callable(entry) else None
+
     # ── Processing lifecycle hooks ──────────────────────────────────────────
     # Subclasses override these to react to message processing events
     # (e.g. Discord adds 👀/✅/❌ reactions).
@@ -1741,6 +1784,7 @@ class BasePlatformAdapter(ABC):
         # Fall back to a new Event only if the entry was removed externally.
         interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
         self._active_sessions[session_key] = interrupt_event
+        callback_generation = getattr(interrupt_event, "_hermes_run_generation", None)
         
         # Start continuous typing indicator (refreshes every 2 seconds)
         _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
@@ -2015,7 +2059,14 @@ class BasePlatformAdapter(ABC):
         finally:
             # Fire any one-shot post-delivery callback registered for this
             # session (e.g. deferred background-review notifications).
-            _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
+            _callback_generation = callback_generation
+            if hasattr(self, "pop_post_delivery_callback"):
+                _post_cb = self.pop_post_delivery_callback(
+                    session_key,
+                    generation=_callback_generation,
+                )
+            else:
+                _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
             if callable(_post_cb):
                 try:
                     _post_cb()
@@ -2061,10 +2112,10 @@ class BasePlatformAdapter(ABC):
                     pass
                 # Leave _active_sessions[session_key] populated — the drain
                 # task's own lifecycle will clean it up.
-                return
-            # Clean up session tracking
-            if session_key in self._active_sessions:
-                del self._active_sessions[session_key]
+            else:
+                # Clean up session tracking
+                if session_key in self._active_sessions:
+                    del self._active_sessions[session_key]
     
     async def cancel_background_tasks(self) -> None:
         """Cancel any in-flight background message-processing tasks.
diff --git a/gateway/run.py b/gateway/run.py
index ed3b6b5ee32..60c57495b44 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -402,14 +402,21 @@ def _dequeue_pending_event(adapter, session_key: str) -> MessageEvent | None:
     return adapter.get_pending_message(session_key)
 
 
+_INTERRUPT_REASON_STOP = "Stop requested"
+_INTERRUPT_REASON_RESET = "Session reset requested"
+_INTERRUPT_REASON_TIMEOUT = "Execution timed out (inactivity)"
+_INTERRUPT_REASON_SSE_DISCONNECT = "SSE client disconnected"
+_INTERRUPT_REASON_GATEWAY_SHUTDOWN = "Gateway shutting down"
+_INTERRUPT_REASON_GATEWAY_RESTART = "Gateway restarting"
+
 _CONTROL_INTERRUPT_MESSAGES = frozenset(
     {
-        "stop requested",
-        "session reset requested",
-        "execution timed out (inactivity)",
-        "sse client disconnected",
-        "gateway shutting down",
-        "gateway restarting",
+        _INTERRUPT_REASON_STOP.lower(),
+        _INTERRUPT_REASON_RESET.lower(),
+        _INTERRUPT_REASON_TIMEOUT.lower(),
+        _INTERRUPT_REASON_SSE_DISCONNECT.lower(),
+        _INTERRUPT_REASON_GATEWAY_SHUTDOWN.lower(),
+        _INTERRUPT_REASON_GATEWAY_RESTART.lower(),
     }
 )
 
@@ -2514,7 +2521,7 @@ class GatewayRunner:
                             _sk[:20], _e,
                         )
                 self._interrupt_running_agents(
-                    "Gateway restarting" if self._restart_requested else "Gateway shutting down"
+                    _INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN
                 )
                 interrupt_deadline = asyncio.get_running_loop().time() + 5.0
                 while self._running_agents and asyncio.get_running_loop().time() < interrupt_deadline:
@@ -3112,21 +3119,12 @@ class GatewayRunner:
             # _interrupt_requested.  Force-clean _running_agents so the session
             # is unlocked and subsequent messages are processed normally.
             if _cmd_def_inner and _cmd_def_inner.name == "stop":
-                running_agent = self._running_agents.get(_quick_key)
-                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
-                    running_agent.interrupt("Stop requested")
-                # Force-clean: remove the session lock regardless of agent state
-                self._invalidate_session_run_generation(
+                await self._interrupt_and_clear_session(
                     _quick_key,
-                    reason="stop_command",
+                    source,
+                    interrupt_reason=_INTERRUPT_REASON_STOP,
+                    invalidation_reason="stop_command",
                 )
-                adapter = self.adapters.get(source.platform)
-                if adapter and hasattr(adapter, "interrupt_session_activity"):
-                    await adapter.interrupt_session_activity(_quick_key, source.chat_id)
-                if adapter and hasattr(adapter, 'get_pending_message'):
-                    adapter.get_pending_message(_quick_key)  # consume and discard
-                self._pending_messages.pop(_quick_key, None)
-                self._release_running_agent_state(_quick_key)
                 logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
                 return "⚡ Stopped. You can continue this session."
 
@@ -3138,23 +3136,15 @@ class GatewayRunner:
             # doesn't get re-processed as a user message after the
             # interrupt completes.
             if _cmd_def_inner and _cmd_def_inner.name == "new":
-                running_agent = self._running_agents.get(_quick_key)
-                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
-                    running_agent.interrupt("Session reset requested")
                 # Clear any pending messages so the old text doesn't replay
-                self._invalidate_session_run_generation(
+                await self._interrupt_and_clear_session(
                     _quick_key,
-                    reason="new_command",
+                    source,
+                    interrupt_reason=_INTERRUPT_REASON_RESET,
+                    invalidation_reason="new_command",
                 )
-                adapter = self.adapters.get(source.platform)
-                if adapter and hasattr(adapter, "interrupt_session_activity"):
-                    await adapter.interrupt_session_activity(_quick_key, source.chat_id)
-                if adapter and hasattr(adapter, 'get_pending_message'):
-                    adapter.get_pending_message(_quick_key)  # consume and discard
-                self._pending_messages.pop(_quick_key, None)
                 # Clean up the running agent entry so the reset handler
                 # doesn't think an agent is still active.
-                self._release_running_agent_state(_quick_key)
                 return await self._handle_reset_command(event)
 
             # /queue <prompt> — queue without interrupting
@@ -4266,6 +4256,15 @@ class GatewayRunner:
         if message_text is None:
             return
 
+        # Bind this gateway run generation to the adapter's active-session
+        # event so deferred post-delivery callbacks can be released by the
+        # same run that registered them.
+        self._bind_adapter_run_generation(
+            self.adapters.get(source.platform),
+            session_key,
+            run_generation,
+        )
+
         try:
             # Emit agent:start hook
             hook_ctx = {
@@ -4304,7 +4303,12 @@ class GatewayRunner:
                     run_generation,
                 )
                 _stale_adapter = self.adapters.get(source.platform)
-                if _stale_adapter and hasattr(_stale_adapter, "_post_delivery_callbacks"):
+                if getattr(type(_stale_adapter), "pop_post_delivery_callback", None) is not None:
+                    _stale_adapter.pop_post_delivery_callback(
+                        _quick_key,
+                        generation=run_generation,
+                    )
+                elif _stale_adapter and hasattr(_stale_adapter, "_post_delivery_callbacks"):
                     _stale_adapter._post_delivery_callbacks.pop(_quick_key, None)
                 return None
 
@@ -4982,22 +4986,23 @@ class GatewayRunner:
         agent = self._running_agents.get(session_key)
         if agent is _AGENT_PENDING_SENTINEL:
             # Force-clean the sentinel so the session is unlocked.
-            self._invalidate_session_run_generation(session_key, reason="stop_command_pending")
-            adapter = self.adapters.get(source.platform)
-            if adapter and hasattr(adapter, "interrupt_session_activity"):
-                await adapter.interrupt_session_activity(session_key, source.chat_id)
-            self._release_running_agent_state(session_key)
+            await self._interrupt_and_clear_session(
+                session_key,
+                source,
+                interrupt_reason=_INTERRUPT_REASON_STOP,
+                invalidation_reason="stop_command_pending",
+            )
             logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
             return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
         if agent:
-            agent.interrupt("Stop requested")
             # Force-clean the session lock so a truly hung agent doesn't
             # keep it locked forever.
-            self._invalidate_session_run_generation(session_key, reason="stop_command_handler")
-            adapter = self.adapters.get(source.platform)
-            if adapter and hasattr(adapter, "interrupt_session_activity"):
-                await adapter.interrupt_session_activity(session_key, source.chat_id)
-            self._release_running_agent_state(session_key)
+            await self._interrupt_and_clear_session(
+                session_key,
+                source,
+                interrupt_reason=_INTERRUPT_REASON_STOP,
+                invalidation_reason="stop_command_handler",
+            )
             return "⚡ Stopped. You can continue this session."
         else:
             return "No active task to stop."
@@ -8481,6 +8486,47 @@ class GatewayRunner:
         generations = self.__dict__.get("_session_run_generation") or {}
         return int(generations.get(session_key, 0)) == int(generation)
 
+    def _bind_adapter_run_generation(
+        self,
+        adapter: Any,
+        session_key: str,
+        generation: int | None,
+    ) -> None:
+        """Bind a gateway run generation to the adapter's active-session event."""
+        if not adapter or not session_key or generation is None:
+            return
+        try:
+            interrupt_event = getattr(adapter, "_active_sessions", {}).get(session_key)
+            if interrupt_event is not None:
+                setattr(interrupt_event, "_hermes_run_generation", int(generation))
+        except Exception:
+            pass
+
+    async def _interrupt_and_clear_session(
+        self,
+        session_key: str,
+        source: SessionSource,
+        *,
+        interrupt_reason: str,
+        invalidation_reason: str,
+        release_running_state: bool = True,
+    ) -> None:
+        """Interrupt the current run and clear queued session state consistently."""
+        if not session_key:
+            return
+        running_agent = self._running_agents.get(session_key)
+        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+            running_agent.interrupt(interrupt_reason)
+        self._invalidate_session_run_generation(session_key, reason=invalidation_reason)
+        adapter = self.adapters.get(source.platform)
+        if adapter and hasattr(adapter, "interrupt_session_activity"):
+            await adapter.interrupt_session_activity(session_key, source.chat_id)
+        if adapter and hasattr(adapter, "get_pending_message"):
+            adapter.get_pending_message(session_key)  # consume and discard
+        self._pending_messages.pop(session_key, None)
+        if release_running_state:
+            self._release_running_agent_state(session_key)
+
     def _evict_cached_agent(self, session_key: str) -> None:
         """Remove a cached agent for a session (called on /new, /model, etc)."""
         _lock = getattr(self, "_agent_cache_lock", None)
@@ -8662,6 +8708,7 @@ class GatewayRunner:
         source: "SessionSource",
         session_id: str,
         session_key: str = None,
+        run_generation: Optional[int] = None,
         event_message_id: Optional[str] = None,
     ) -> Dict[str, Any]:
         """Forward the message to a remote Hermes API server instead of
@@ -8697,6 +8744,11 @@ class GatewayRunner:
 
         proxy_key = os.getenv("GATEWAY_PROXY_KEY", "").strip()
 
+        def _run_still_current() -> bool:
+            if run_generation is None or not session_key:
+                return True
+            return self._is_session_run_current(session_key, run_generation)
+
         # Build messages in OpenAI chat format --------------------------
         #
         # The remote api_server can maintain session continuity via
@@ -8826,6 +8878,21 @@ class GatewayRunner:
                     # Parse SSE stream
                     buffer = ""
                     async for chunk in resp.content.iter_any():
+                        if not _run_still_current():
+                            logger.info(
+                                "Discarding stale proxy stream for %s — generation %d is no longer current",
+                                session_key[:20] if session_key else "?",
+                                run_generation or 0,
+                            )
+                            return {
+                                "final_response": "",
+                                "messages": [],
+                                "api_calls": 0,
+                                "tools": [],
+                                "history_offset": len(history),
+                                "session_id": session_id,
+                                "response_previewed": False,
+                            }
                         text = chunk.decode("utf-8", errors="replace")
                         buffer += text
 
@@ -8875,6 +8942,21 @@ class GatewayRunner:
                     stream_task.cancel()
 
         _elapsed = time.time() - _start
+        if not _run_still_current():
+            logger.info(
+                "Discarding stale proxy result for %s — generation %d is no longer current",
+                session_key[:20] if session_key else "?",
+                run_generation or 0,
+            )
+            return {
+                "final_response": "",
+                "messages": [],
+                "api_calls": 0,
+                "tools": [],
+                "history_offset": len(history),
+                "session_id": session_id,
+                "response_previewed": False,
+            }
         logger.info(
             "proxy response: url=%s session=%s time=%.1fs response=%d chars",
             proxy_url, (session_id or "")[:20], _elapsed, len(full_response),
@@ -8929,6 +9011,7 @@ class GatewayRunner:
                 source=source,
                 session_id=session_id,
                 session_key=session_key,
+                run_generation=run_generation,
                 event_message_id=event_message_id,
             )
 
@@ -9527,9 +9610,16 @@ class GatewayRunner:
             # Register the release hook on the adapter so base.py's finally
             # block can fire it after delivering the main response.
             if _status_adapter and session_key:
-                _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
-                if _pdc is not None:
-                    _pdc[session_key] = _release_bg_review_messages
+                if getattr(type(_status_adapter), "register_post_delivery_callback", None) is not None:
+                    _status_adapter.register_post_delivery_callback(
+                        session_key,
+                        _release_bg_review_messages,
+                        generation=run_generation,
+                    )
+                else:
+                    _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
+                    if _pdc is not None:
+                        _pdc[session_key] = _release_bg_review_messages
 
             # Store agent reference for interrupt support
             agent_holder[0] = agent
@@ -10131,7 +10221,7 @@ class GatewayRunner:
                 # Interrupt the agent if it's still running so the thread
                 # pool worker is freed.
                 if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"):
-                    _timed_out_agent.interrupt("Execution timed out (inactivity)")
+                    _timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT)
 
                 _timeout_mins = int(_agent_timeout // 60) or 1
 
@@ -10309,7 +10399,17 @@ class GatewayRunner:
                     # first response has been delivered.  Pop from the
                     # adapter's callback dict (prevents double-fire in
                     # base.py's finally block) and call it.
-                    if adapter and hasattr(adapter, "_post_delivery_callbacks"):
+                    if getattr(type(adapter), "pop_post_delivery_callback", None) is not None:
+                        _bg_cb = adapter.pop_post_delivery_callback(
+                            session_key,
+                            generation=run_generation,
+                        )
+                        if callable(_bg_cb):
+                            try:
+                                _bg_cb()
+                            except Exception:
+                                pass
+                    elif adapter and hasattr(adapter, "_post_delivery_callbacks"):
                         _bg_cb = adapter._post_delivery_callbacks.pop(session_key, None)
                         if callable(_bg_cb):
                             try:
diff --git a/tests/gateway/test_proxy_mode.py b/tests/gateway/test_proxy_mode.py
index f3024cb09f1..11180639e8d 100644
--- a/tests/gateway/test_proxy_mode.py
+++ b/tests/gateway/test_proxy_mode.py
@@ -19,6 +19,7 @@ def _make_runner(proxy_url=None):
     runner.config = MagicMock()
     runner.config.streaming = StreamingConfig()
     runner._running_agents = {}
+    runner._session_run_generation = {}
     runner._session_model_overrides = {}
     runner._agent_cache = {}
     runner._agent_cache_lock = None
@@ -160,10 +161,12 @@ class TestRunAgentProxyDispatch:
             source=source,
             session_id="test-session-123",
             session_key="test-key",
+            run_generation=7,
         )
 
         assert result["final_response"] == "Hello from remote!"
         runner._run_agent_via_proxy.assert_called_once()
+        assert runner._run_agent_via_proxy.call_args.kwargs["run_generation"] == 7
 
     @pytest.mark.asyncio
     async def test_run_agent_skips_proxy_when_not_configured(self, monkeypatch):
@@ -370,6 +373,40 @@ class TestRunAgentViaProxy:
         assert "session_id" in result
         assert result["session_id"] == "sess-123"
 
+    @pytest.mark.asyncio
+    async def test_proxy_stale_generation_returns_empty_result(self, monkeypatch):
+        monkeypatch.setenv("GATEWAY_PROXY_URL", "http://host:8642")
+        monkeypatch.delenv("GATEWAY_PROXY_KEY", raising=False)
+        runner = _make_runner()
+        source = _make_source()
+        runner._session_run_generation["test-key"] = 2
+
+        resp = _FakeSSEResponse(
+            status=200,
+            sse_chunks=[
+                'data: {"choices":[{"delta":{"content":"stale"}}]}\n\n',
+                "data: [DONE]\n\n",
+            ],
+        )
+        session = _FakeSession(resp)
+
+        with patch("gateway.run._load_gateway_config", return_value={}):
+            with _patch_aiohttp(session):
+                with patch("aiohttp.ClientTimeout"):
+                    result = await runner._run_agent_via_proxy(
+                        message="hi",
+                        context_prompt="",
+                        history=[],
+                        source=source,
+                        session_id="sess-123",
+                        session_key="test-key",
+                        run_generation=1,
+                    )
+
+        assert result["final_response"] == ""
+        assert result["messages"] == []
+        assert result["api_calls"] == 0
+
     @pytest.mark.asyncio
     async def test_no_auth_header_without_key(self, monkeypatch):
         monkeypatch.setenv("GATEWAY_PROXY_URL", "http://host:8642")
diff --git a/tests/gateway/test_status_command.py b/tests/gateway/test_status_command.py
index 3cdf637dd97..50e1c52cc29 100644
--- a/tests/gateway/test_status_command.py
+++ b/tests/gateway/test_status_command.py
@@ -270,6 +270,75 @@ async def test_handle_message_discards_stale_result_after_session_invalidation(m
     assert session_key not in runner.adapters[Platform.TELEGRAM]._post_delivery_callbacks
 
 
+@pytest.mark.asyncio
+async def test_handle_message_stale_result_keeps_newer_generation_callback(monkeypatch):
+    import gateway.run as gateway_run
+
+    class _Adapter:
+        def __init__(self):
+            self._post_delivery_callbacks = {}
+
+        async def send(self, *args, **kwargs):
+            return None
+
+        def pop_post_delivery_callback(self, session_key, *, generation=None):
+            entry = self._post_delivery_callbacks.get(session_key)
+            if entry is None:
+                return None
+            if isinstance(entry, tuple):
+                entry_generation, callback = entry
+                if generation is not None and entry_generation != generation:
+                    return None
+                self._post_delivery_callbacks.pop(session_key, None)
+                return callback
+            if generation is not None:
+                return None
+            return self._post_delivery_callbacks.pop(session_key, None)
+
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner = _make_runner(session_entry)
+    runner.session_store.load_transcript.return_value = [{"role": "user", "content": "earlier"}]
+    session_key = session_entry.session_key
+    adapter = _Adapter()
+    runner.adapters[Platform.TELEGRAM] = adapter
+
+    async def _stale_result(**kwargs):
+        # Simulate a newer run claiming the callback slot before the stale run unwinds.
+        runner._session_run_generation[session_key] = 2
+        adapter._post_delivery_callbacks[session_key] = (2, lambda: None)
+        return {
+            "final_response": "late reply",
+            "messages": [],
+            "tools": [],
+            "history_offset": 0,
+            "last_prompt_tokens": 80,
+            "input_tokens": 120,
+            "output_tokens": 45,
+            "model": "openai/test-model",
+        }
+
+    runner._run_agent = AsyncMock(side_effect=_stale_result)
+
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+    monkeypatch.setattr(
+        "agent.model_metadata.get_model_context_length",
+        lambda *_args, **_kwargs: 100000,
+    )
+
+    result = await runner._handle_message(_make_event("hello"))
+
+    assert result is None
+    assert session_key in adapter._post_delivery_callbacks
+    assert adapter._post_delivery_callbacks[session_key][0] == 2
+
+
 
 @pytest.mark.asyncio
 async def test_status_command_bypasses_active_session_guard():

From 4f0e49dc7bd059fada5c6110b7bb14a6fb3b5037 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sun, 19 Apr 2026 15:34:02 +0530
Subject: [PATCH 125/143] chore: add sgaofen to AUTHOR_MAP

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index b153140057b..9c04c1c6b36 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -77,6 +77,7 @@ AUTHOR_MAP = {
     "Asunfly@users.noreply.github.com": "Asunfly",
     "2500400+honghua@users.noreply.github.com": "honghua",
     "nish3451@users.noreply.github.com": "nish3451",
+    "135070653+sgaofen@users.noreply.github.com": "sgaofen",
     # contributors (manual mapping from git names)
     "ahmedsherif95@gmail.com": "asheriif",
     "liujinkun@bytedance.com": "liujinkun2025",

From cc59d133dc52197a0388f2f3b33911fc15c6c74e Mon Sep 17 00:00:00 2001
From: sgaofen <135070653+sgaofen@users.noreply.github.com>
Date: Sun, 12 Apr 2026 15:30:16 -0700
Subject: [PATCH 126/143] fix(feishu): split fenced code blocks in post payload

---
 gateway/platforms/feishu.py  | 64 +++++++++++++++++++++++++++++++-----
 tests/gateway/test_feishu.py | 63 +++++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+), 8 deletions(-)

diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 351337e8275..6e27d33e094 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -430,23 +430,71 @@ def _coerce_required_int(value: Any, default: int, min_value: int = 0) -> int:
 
 
 def _build_markdown_post_payload(content: str) -> str:
+    rows = _build_markdown_post_rows(content)
     return json.dumps(
         {
             "zh_cn": {
-                "content": [
-                    [
-                        {
-                            "tag": "md",
-                            "text": content,
-                        }
-                    ]
-                ],
+                "content": rows,
             }
         },
         ensure_ascii=False,
     )
 
 
+def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
+    """Build Feishu post rows while isolating fenced code blocks.
+
+    Feishu's `md` renderer can swallow trailing content when a fenced code block
+    appears inside one large markdown element. Splitting the reply at code
+    fences preserves the surrounding markdown while keeping the code block in a
+    dedicated row.
+    """
+    if not content:
+        return [[{"tag": "md", "text": ""}]]
+    if "```" not in content:
+        return [[{"tag": "md", "text": content}]]
+
+    rows: List[List[Dict[str, str]]] = []
+    current: List[str] = []
+    in_code_block = False
+
+    for raw_line in content.splitlines():
+        line = raw_line.rstrip()
+        is_fence = line.strip().startswith("```")
+
+        if is_fence:
+            if not in_code_block and current:
+                segment = "\n".join(current).strip()
+                if segment:
+                    rows.append([{"tag": "md", "text": segment}])
+                current = []
+            current.append(line)
+            in_code_block = not in_code_block
+            if not in_code_block:
+                segment = "\n".join(current).strip()
+                if segment:
+                    rows.append([{"tag": "md", "text": segment}])
+                current = []
+            continue
+
+        current.append(line)
+
+    if current:
+        segment = "\n".join(current).strip()
+        if segment:
+            rows.append([{"tag": "md", "text": segment}])
+
+    return rows or [[{"tag": "md", "text": content}]]
+
+
+def parse_feishu_post_content(raw_content: str) -> FeishuPostParseResult:
+    try:
+        parsed = json.loads(raw_content) if raw_content else {}
+    except json.JSONDecodeError:
+        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)
+    return parse_feishu_post_payload(parsed)
+
+
 def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
     resolved = _resolve_post_payload(payload)
     if not resolved:
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index 661e37ec1a2..47e5a949668 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -2370,6 +2370,69 @@ class TestAdapterBehavior(unittest.TestCase):
         elements = payload["zh_cn"]["content"][0]
         self.assertEqual(elements, [{"tag": "md", "text": "可以用 **粗体** 和 *斜体*。"}])
 
+    @patch.dict(os.environ, {}, clear=True)
+    def test_send_splits_fenced_code_blocks_into_separate_post_rows(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        captured = {}
+
+        class _MessageAPI:
+            def create(self, request):
+                captured["request"] = request
+                return SimpleNamespace(
+                    success=lambda: True,
+                    data=SimpleNamespace(message_id="om_codeblock"),
+                )
+
+        adapter._client = SimpleNamespace(
+            im=SimpleNamespace(
+                v1=SimpleNamespace(
+                    message=_MessageAPI(),
+                )
+            )
+        )
+
+        async def _direct(func, *args, **kwargs):
+            return func(*args, **kwargs)
+
+        content = (
+            "确认已入库 ✓\n"
+            "文件路径：`/root/.hermes/profiles/agent_cto/cron/jobs.json`\n"
+            "**解码后的内容：**\n"
+            "```json\n"
+            '{"cron": "list"}\n'
+            "```\n"
+            "后续说明仍应保留。"
+        )
+
+        with patch("gateway.platforms.feishu.asyncio.to_thread", side_effect=_direct):
+            result = asyncio.run(
+                adapter.send(
+                    chat_id="oc_chat",
+                    content=content,
+                )
+            )
+
+        self.assertTrue(result.success)
+        self.assertEqual(captured["request"].request_body.msg_type, "post")
+        payload = json.loads(captured["request"].request_body.content)
+        rows = payload["zh_cn"]["content"]
+        self.assertEqual(
+            rows,
+            [
+                [
+                    {
+                        "tag": "md",
+                        "text": "确认已入库 ✓\n文件路径：`/root/.hermes/profiles/agent_cto/cron/jobs.json`\n**解码后的内容：**",
+                    }
+                ],
+                [{"tag": "md", "text": "```json\n{\"cron\": \"list\"}\n```"}],
+                [{"tag": "md", "text": "后续说明仍应保留。"}],
+            ],
+        )
+
     @patch.dict(os.environ, {}, clear=True)
     def test_send_falls_back_to_text_when_post_payload_is_rejected(self):
         from gateway.config import PlatformConfig

From a9debf10ffd61e9e502a25b203987335671a805d Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sun, 19 Apr 2026 15:40:53 +0530
Subject: [PATCH 127/143] fix(feishu): harden fenced post row splitting

---
 gateway/platforms/feishu.py  | 47 ++++++++++++++++++++----------------
 tests/gateway/test_feishu.py | 42 ++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 6e27d33e094..dc3d799c93d 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -119,6 +119,8 @@ _MARKDOWN_HINT_RE = re.compile(
     re.MULTILINE,
 )
 _MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
+_MARKDOWN_FENCE_OPEN_RE = re.compile(r"^```([^\n`]*)\s*$")
+_MARKDOWN_FENCE_CLOSE_RE = re.compile(r"^```\s*$")
 _MENTION_RE = re.compile(r"@_user_\d+")
 _MULTISPACE_RE = re.compile(r"[ \t]{2,}")
 _POST_CONTENT_INVALID_RE = re.compile(r"content format of the post type is incorrect", re.IGNORECASE)
@@ -445,9 +447,9 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
     """Build Feishu post rows while isolating fenced code blocks.
 
     Feishu's `md` renderer can swallow trailing content when a fenced code block
-    appears inside one large markdown element. Splitting the reply at code
-    fences preserves the surrounding markdown while keeping the code block in a
-    dedicated row.
+    appears inside one large markdown element. Split the reply at real fence
+    lines so prose before/after the code block remains visible while code stays
+    in a dedicated row.
     """
     if not content:
         return [[{"tag": "md", "text": ""}]]
@@ -458,32 +460,35 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
     current: List[str] = []
     in_code_block = False
 
+    def _flush_current() -> None:
+        nonlocal current
+        if not current:
+            return
+        segment = "\n".join(current)
+        if segment.strip():
+            rows.append([{"tag": "md", "text": segment}])
+        current = []
+
     for raw_line in content.splitlines():
-        line = raw_line.rstrip()
-        is_fence = line.strip().startswith("```")
+        stripped_line = raw_line.strip()
+        is_fence = bool(
+            _MARKDOWN_FENCE_CLOSE_RE.match(stripped_line)
+            if in_code_block
+            else _MARKDOWN_FENCE_OPEN_RE.match(stripped_line)
+        )
 
         if is_fence:
-            if not in_code_block and current:
-                segment = "\n".join(current).strip()
-                if segment:
-                    rows.append([{"tag": "md", "text": segment}])
-                current = []
-            current.append(line)
+            if not in_code_block:
+                _flush_current()
+            current.append(raw_line)
             in_code_block = not in_code_block
             if not in_code_block:
-                segment = "\n".join(current).strip()
-                if segment:
-                    rows.append([{"tag": "md", "text": segment}])
-                current = []
+                _flush_current()
             continue
 
-        current.append(line)
-
-    if current:
-        segment = "\n".join(current).strip()
-        if segment:
-            rows.append([{"tag": "md", "text": segment}])
+        current.append(raw_line)
 
+    _flush_current()
     return rows or [[{"tag": "md", "text": content}]]
 
 
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index 47e5a949668..d5511c064ef 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -2433,6 +2433,48 @@ class TestAdapterBehavior(unittest.TestCase):
             ],
         )
 
+    @patch.dict(os.environ, {}, clear=True)
+    def test_build_post_payload_keeps_fence_like_code_lines_inside_code_block(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        payload = json.loads(
+            adapter._build_post_payload(
+                "before\n```python\n```oops\n```\nafter"
+            )
+        )
+
+        self.assertEqual(
+            payload["zh_cn"]["content"],
+            [
+                [{"tag": "md", "text": "before"}],
+                [{"tag": "md", "text": "```python\n```oops\n```"}],
+                [{"tag": "md", "text": "after"}],
+            ],
+        )
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_build_post_payload_preserves_trailing_spaces_in_code_block(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        payload = json.loads(
+            adapter._build_post_payload(
+                "before\n```python\nline with two spaces  \n```\nafter"
+            )
+        )
+
+        self.assertEqual(
+            payload["zh_cn"]["content"],
+            [
+                [{"tag": "md", "text": "before"}],
+                [{"tag": "md", "text": "```python\nline with two spaces  \n```"}],
+                [{"tag": "md", "text": "after"}],
+            ],
+        )
+
     @patch.dict(os.environ, {}, clear=True)
     def test_send_falls_back_to_text_when_post_payload_is_rejected(self):
         from gateway.config import PlatformConfig

From 957ca79e8ed2fd1377553d70b9a79232f84b122e Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sun, 19 Apr 2026 15:51:43 +0530
Subject: [PATCH 128/143] fix(feishu): drop dead helper and cover repeated
 fenced blocks

---
 gateway/platforms/feishu.py  |  8 --------
 tests/gateway/test_feishu.py | 23 +++++++++++++++++++++++
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index dc3d799c93d..3b57db46d3c 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -492,14 +492,6 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
     return rows or [[{"tag": "md", "text": content}]]
 
 
-def parse_feishu_post_content(raw_content: str) -> FeishuPostParseResult:
-    try:
-        parsed = json.loads(raw_content) if raw_content else {}
-    except json.JSONDecodeError:
-        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)
-    return parse_feishu_post_payload(parsed)
-
-
 def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
     resolved = _resolve_post_payload(payload)
     if not resolved:
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index d5511c064ef..14ed9e1715d 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -2475,6 +2475,29 @@ class TestAdapterBehavior(unittest.TestCase):
             ],
         )
 
+    @patch.dict(os.environ, {}, clear=True)
+    def test_build_post_payload_splits_multiple_fenced_code_blocks(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        payload = json.loads(
+            adapter._build_post_payload(
+                "before\n```python\nprint(1)\n```\nmiddle\n```json\n{}\n```\nafter"
+            )
+        )
+
+        self.assertEqual(
+            payload["zh_cn"]["content"],
+            [
+                [{"tag": "md", "text": "before"}],
+                [{"tag": "md", "text": "```python\nprint(1)\n```"}],
+                [{"tag": "md", "text": "middle"}],
+                [{"tag": "md", "text": "```json\n{}\n```"}],
+                [{"tag": "md", "text": "after"}],
+            ],
+        )
+
     @patch.dict(os.environ, {}, clear=True)
     def test_send_falls_back_to_text_when_post_payload_is_rejected(self):
         from gateway.config import PlatformConfig

From 66ee081dc181fc731994f50bb99b0a52a2761310 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 19 Apr 2026 05:14:17 -0700
Subject: [PATCH 129/143] skills: move 7 niche mlops/mcp skills to optional
 (#12474)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Built-in → optional-skills/:
  mlops/training/peft         → optional-skills/mlops/peft
  mlops/training/pytorch-fsdp → optional-skills/mlops/pytorch-fsdp
  mlops/models/clip           → optional-skills/mlops/clip
  mlops/models/stable-diffusion → optional-skills/mlops/stable-diffusion
  mlops/models/whisper        → optional-skills/mlops/whisper
  mlops/cloud/modal           → optional-skills/mlops/modal
  mcp/mcporter                → optional-skills/mcp/mcporter

Built-in mlops training kept: axolotl, trl-fine-tuning, unsloth.
Built-in mlops models kept: audiocraft, segment-anything.
Built-in mlops evaluation/research/huggingface-hub/inference all kept.
native-mcp stays built-in (documents the native MCP tool); mcporter was a
redundant alternative CLI.

Also: removed now-empty skills/mlops/cloud/ dir, refreshed
skills/mlops/models/DESCRIPTION.md and skills/mcp/DESCRIPTION.md to match
what's left, and synchronized both catalog pages (skills-catalog.md,
optional-skills-catalog.md).
---
 .../mcp/mcporter/SKILL.md                        |  0
 .../mlops}/clip/SKILL.md                         |  0
 .../mlops}/clip/references/applications.md       |  0
 .../mlops}/modal/SKILL.md                        |  0
 .../mlops}/modal/references/advanced-usage.md    |  0
 .../mlops}/modal/references/troubleshooting.md   |  0
 .../mlops}/peft/SKILL.md                         |  0
 .../mlops}/peft/references/advanced-usage.md     |  0
 .../mlops}/peft/references/troubleshooting.md    |  0
 .../mlops}/pytorch-fsdp/SKILL.md                 |  0
 .../mlops}/pytorch-fsdp/references/index.md      |  0
 .../mlops}/pytorch-fsdp/references/other.md      |  0
 .../mlops}/stable-diffusion/SKILL.md             |  0
 .../references/advanced-usage.md                 |  0
 .../references/troubleshooting.md                |  0
 .../mlops}/whisper/SKILL.md                      |  0
 .../mlops}/whisper/references/languages.md       |  0
 skills/mcp/DESCRIPTION.md                        |  2 +-
 skills/mlops/cloud/DESCRIPTION.md                |  3 ---
 skills/mlops/models/DESCRIPTION.md               |  2 +-
 .../docs/reference/optional-skills-catalog.md    |  7 +++++++
 website/docs/reference/skills-catalog.md         | 16 +---------------
 22 files changed, 10 insertions(+), 20 deletions(-)
 rename {skills => optional-skills}/mcp/mcporter/SKILL.md (100%)
 rename {skills/mlops/models => optional-skills/mlops}/clip/SKILL.md (100%)
 rename {skills/mlops/models => optional-skills/mlops}/clip/references/applications.md (100%)
 rename {skills/mlops/cloud => optional-skills/mlops}/modal/SKILL.md (100%)
 rename {skills/mlops/cloud => optional-skills/mlops}/modal/references/advanced-usage.md (100%)
 rename {skills/mlops/cloud => optional-skills/mlops}/modal/references/troubleshooting.md (100%)
 rename {skills/mlops/training => optional-skills/mlops}/peft/SKILL.md (100%)
 rename {skills/mlops/training => optional-skills/mlops}/peft/references/advanced-usage.md (100%)
 rename {skills/mlops/training => optional-skills/mlops}/peft/references/troubleshooting.md (100%)
 rename {skills/mlops/training => optional-skills/mlops}/pytorch-fsdp/SKILL.md (100%)
 rename {skills/mlops/training => optional-skills/mlops}/pytorch-fsdp/references/index.md (100%)
 rename {skills/mlops/training => optional-skills/mlops}/pytorch-fsdp/references/other.md (100%)
 rename {skills/mlops/models => optional-skills/mlops}/stable-diffusion/SKILL.md (100%)
 rename {skills/mlops/models => optional-skills/mlops}/stable-diffusion/references/advanced-usage.md (100%)
 rename {skills/mlops/models => optional-skills/mlops}/stable-diffusion/references/troubleshooting.md (100%)
 rename {skills/mlops/models => optional-skills/mlops}/whisper/SKILL.md (100%)
 rename {skills/mlops/models => optional-skills/mlops}/whisper/references/languages.md (100%)
 delete mode 100644 skills/mlops/cloud/DESCRIPTION.md

diff --git a/skills/mcp/mcporter/SKILL.md b/optional-skills/mcp/mcporter/SKILL.md
similarity index 100%
rename from skills/mcp/mcporter/SKILL.md
rename to optional-skills/mcp/mcporter/SKILL.md
diff --git a/skills/mlops/models/clip/SKILL.md b/optional-skills/mlops/clip/SKILL.md
similarity index 100%
rename from skills/mlops/models/clip/SKILL.md
rename to optional-skills/mlops/clip/SKILL.md
diff --git a/skills/mlops/models/clip/references/applications.md b/optional-skills/mlops/clip/references/applications.md
similarity index 100%
rename from skills/mlops/models/clip/references/applications.md
rename to optional-skills/mlops/clip/references/applications.md
diff --git a/skills/mlops/cloud/modal/SKILL.md b/optional-skills/mlops/modal/SKILL.md
similarity index 100%
rename from skills/mlops/cloud/modal/SKILL.md
rename to optional-skills/mlops/modal/SKILL.md
diff --git a/skills/mlops/cloud/modal/references/advanced-usage.md b/optional-skills/mlops/modal/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/cloud/modal/references/advanced-usage.md
rename to optional-skills/mlops/modal/references/advanced-usage.md
diff --git a/skills/mlops/cloud/modal/references/troubleshooting.md b/optional-skills/mlops/modal/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/cloud/modal/references/troubleshooting.md
rename to optional-skills/mlops/modal/references/troubleshooting.md
diff --git a/skills/mlops/training/peft/SKILL.md b/optional-skills/mlops/peft/SKILL.md
similarity index 100%
rename from skills/mlops/training/peft/SKILL.md
rename to optional-skills/mlops/peft/SKILL.md
diff --git a/skills/mlops/training/peft/references/advanced-usage.md b/optional-skills/mlops/peft/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/training/peft/references/advanced-usage.md
rename to optional-skills/mlops/peft/references/advanced-usage.md
diff --git a/skills/mlops/training/peft/references/troubleshooting.md b/optional-skills/mlops/peft/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/training/peft/references/troubleshooting.md
rename to optional-skills/mlops/peft/references/troubleshooting.md
diff --git a/skills/mlops/training/pytorch-fsdp/SKILL.md b/optional-skills/mlops/pytorch-fsdp/SKILL.md
similarity index 100%
rename from skills/mlops/training/pytorch-fsdp/SKILL.md
rename to optional-skills/mlops/pytorch-fsdp/SKILL.md
diff --git a/skills/mlops/training/pytorch-fsdp/references/index.md b/optional-skills/mlops/pytorch-fsdp/references/index.md
similarity index 100%
rename from skills/mlops/training/pytorch-fsdp/references/index.md
rename to optional-skills/mlops/pytorch-fsdp/references/index.md
diff --git a/skills/mlops/training/pytorch-fsdp/references/other.md b/optional-skills/mlops/pytorch-fsdp/references/other.md
similarity index 100%
rename from skills/mlops/training/pytorch-fsdp/references/other.md
rename to optional-skills/mlops/pytorch-fsdp/references/other.md
diff --git a/skills/mlops/models/stable-diffusion/SKILL.md b/optional-skills/mlops/stable-diffusion/SKILL.md
similarity index 100%
rename from skills/mlops/models/stable-diffusion/SKILL.md
rename to optional-skills/mlops/stable-diffusion/SKILL.md
diff --git a/skills/mlops/models/stable-diffusion/references/advanced-usage.md b/optional-skills/mlops/stable-diffusion/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/models/stable-diffusion/references/advanced-usage.md
rename to optional-skills/mlops/stable-diffusion/references/advanced-usage.md
diff --git a/skills/mlops/models/stable-diffusion/references/troubleshooting.md b/optional-skills/mlops/stable-diffusion/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/models/stable-diffusion/references/troubleshooting.md
rename to optional-skills/mlops/stable-diffusion/references/troubleshooting.md
diff --git a/skills/mlops/models/whisper/SKILL.md b/optional-skills/mlops/whisper/SKILL.md
similarity index 100%
rename from skills/mlops/models/whisper/SKILL.md
rename to optional-skills/mlops/whisper/SKILL.md
diff --git a/skills/mlops/models/whisper/references/languages.md b/optional-skills/mlops/whisper/references/languages.md
similarity index 100%
rename from skills/mlops/models/whisper/references/languages.md
rename to optional-skills/mlops/whisper/references/languages.md
diff --git a/skills/mcp/DESCRIPTION.md b/skills/mcp/DESCRIPTION.md
index 627c20ea1b5..30a0660333b 100644
--- a/skills/mcp/DESCRIPTION.md
+++ b/skills/mcp/DESCRIPTION.md
@@ -1,3 +1,3 @@
 ---
-description: Skills for working with MCP (Model Context Protocol) servers, tools, and integrations. Includes the built-in native MCP client (configure servers in config.yaml for automatic tool discovery) and the mcporter CLI bridge for ad-hoc server interaction.
+description: Skills for working with MCP (Model Context Protocol) servers, tools, and integrations. Documents the built-in native MCP client — configure servers in config.yaml for automatic tool discovery.
 ---
diff --git a/skills/mlops/cloud/DESCRIPTION.md b/skills/mlops/cloud/DESCRIPTION.md
deleted file mode 100644
index 32675823e04..00000000000
--- a/skills/mlops/cloud/DESCRIPTION.md
+++ /dev/null
@@ -1,3 +0,0 @@
----
-description: GPU cloud providers and serverless compute platforms for ML workloads.
----
diff --git a/skills/mlops/models/DESCRIPTION.md b/skills/mlops/models/DESCRIPTION.md
index 8170b517f51..8f7e669562c 100644
--- a/skills/mlops/models/DESCRIPTION.md
+++ b/skills/mlops/models/DESCRIPTION.md
@@ -1,3 +1,3 @@
 ---
-description: Specific model architectures and tools — computer vision (CLIP, SAM, Stable Diffusion), speech (Whisper), audio generation (AudioCraft), and multimodal models (LLaVA).
+description: Specific model architectures and tools — image segmentation (Segment Anything / SAM) and audio generation (AudioCraft / MusicGen). Additional model skills (CLIP, Stable Diffusion, Whisper, LLaVA) are available as optional skills.
 ---
diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index 044060e9dd7..f5dd2ac5bfe 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -83,6 +83,7 @@ hermes skills uninstall <skill-name>
 | Skill | Description |
 |-------|-------------|
 | **fastmcp** | Build, test, inspect, install, and deploy MCP servers with FastMCP in Python. Covers wrapping APIs or databases as MCP tools, exposing resources or prompts, and deployment. |
+| **mcporter** | The `mcporter` CLI — list, configure, auth, and call MCP servers/tools directly (HTTP or stdio) from the terminal. Useful for ad-hoc MCP interactions; for always-on tool discovery use the built-in `native-mcp` client instead. |
 
 ## Migration
 
@@ -98,6 +99,7 @@ The largest optional category — covers the full ML pipeline from data curation
 |-------|-------------|
 | **accelerate** | Simplest distributed training API. 4 lines to add distributed support to any PyTorch script. Unified API for DeepSpeed/FSDP/Megatron/DDP. |
 | **chroma** | Open-source embedding database. Store embeddings and metadata, perform vector and full-text search. Simple 4-function API for RAG and semantic search. |
+| **clip** | OpenAI's vision-language model connecting images and text. Zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks without fine-tuning. |
 | **faiss** | Facebook's library for efficient similarity search and clustering of dense vectors. Supports billions of vectors, GPU acceleration, and various index types (Flat, IVF, HNSW). |
 | **flash-attention** | Optimize transformer attention with Flash Attention for 2-4x speedup and 10-20x memory reduction. Supports PyTorch SDPA, flash-attn library, H100 FP8, and sliding window. |
 | **guidance** | Control LLM output with regex and grammars, guarantee valid JSON/XML/code generation, enforce structured formats, and build multi-step workflows with Guidance — Microsoft Research's constrained generation framework. |
@@ -106,15 +108,20 @@ The largest optional category — covers the full ML pipeline from data curation
 | **instructor** | Extract structured data from LLM responses with Pydantic validation, retry failed extractions automatically, and stream partial results. |
 | **lambda-labs** | Reserved and on-demand GPU cloud instances for ML training and inference. SSH access, persistent filesystems, and multi-node clusters. |
 | **llava** | Large Language and Vision Assistant — visual instruction tuning and image-based conversations combining CLIP vision with LLaMA language models. |
+| **modal** | Serverless GPU cloud platform for running ML workloads. On-demand GPU access without infrastructure management, ML model deployment as APIs, or batch jobs with automatic scaling. |
 | **nemo-curator** | GPU-accelerated data curation for LLM training. Fuzzy deduplication (16x faster), quality filtering (30+ heuristics), semantic dedup, PII redaction. Scales with RAPIDS. |
+| **peft-fine-tuning** | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Train <1% of parameters with minimal accuracy loss for 7B–70B models on limited GPU memory. HuggingFace's official PEFT library. |
 | **pinecone** | Managed vector database for production AI. Auto-scaling, hybrid search (dense + sparse), metadata filtering, and low latency (under 100ms p95). |
+| **pytorch-fsdp** | Expert guidance for Fully Sharded Data Parallel training with PyTorch FSDP — parameter sharding, mixed precision, CPU offloading, FSDP2. |
 | **pytorch-lightning** | High-level PyTorch framework with Trainer class, automatic distributed training (DDP/FSDP/DeepSpeed), callbacks, and minimal boilerplate. |
 | **qdrant** | High-performance vector similarity search engine. Rust-powered with fast nearest neighbor search, hybrid search with filtering, and scalable vector storage. |
 | **saelens** | Train and analyze Sparse Autoencoders (SAEs) using SAELens to decompose neural network activations into interpretable features. |
 | **simpo** | Simple Preference Optimization — reference-free alternative to DPO with better performance (+6.4 pts on AlpacaEval 2.0). No reference model needed. |
 | **slime** | LLM post-training with RL using Megatron+SGLang framework. Custom data generation workflows and tight Megatron-LM integration for RL scaling. |
+| **stable-diffusion-image-generation** | State-of-the-art text-to-image generation with Stable Diffusion via HuggingFace Diffusers. Text-to-image, image-to-image translation, inpainting, and custom diffusion pipelines. |
 | **tensorrt-llm** | Optimize LLM inference with NVIDIA TensorRT for maximum throughput. 10-100x faster than PyTorch on A100/H100 with quantization (FP8/INT4) and in-flight batching. |
 | **torchtitan** | PyTorch-native distributed LLM pretraining with 4D parallelism (FSDP2, TP, PP, CP). Scale from 8 to 512+ GPUs with Float8 and torch.compile. |
+| **whisper** | OpenAI's general-purpose speech recognition. 99 languages, transcription, translation to English, and language ID. Six model sizes from tiny (39M) to large (1550M). Best for robust multilingual ASR. |
 
 ## Productivity
 
diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md
index 16be6a6581c..ffe489d3602 100644
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@@ -114,7 +114,6 @@ Skills for working with MCP (Model Context Protocol) servers, tools, and integra
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `mcporter` | Use the mcporter CLI to list, configure, auth, and call MCP servers/tools directly (HTTP or stdio), including ad-hoc servers, config edits, and CLI/type generation. | `mcp/mcporter` |
 | `native-mcp` | Built-in MCP (Model Context Protocol) client that connects to external MCP servers, discovers their tools, and registers them as native Hermes Agent tools. Supports stdio and HTTP transports with automatic reconnection, security filtering, and zero-config tool injection. | `mcp/native-mcp` |
 
 ## media
@@ -136,14 +135,6 @@ General-purpose ML operations tools — model hub management, dataset operations
 |-------|-------------|------|
 | `huggingface-hub` | Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Spaces and buckets. | `mlops/huggingface-hub` |
 
-## mlops/cloud
-
-GPU cloud providers and serverless compute platforms for ML workloads.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `modal-serverless-gpu` | Serverless GPU cloud platform for running ML workloads. Use when you need on-demand GPU access without infrastructure management, deploying ML models as APIs, or running batch jobs with automatic scaling. | `mlops/cloud/modal` |
-
 ## mlops/evaluation
 
 Model evaluation benchmarks, experiment tracking, and interpretability tools.
@@ -166,15 +157,12 @@ Model serving, quantization (GGUF/GPTQ), structured output, inference optimizati
 
 ## mlops/models
 
-Specific model architectures — computer vision (CLIP, SAM, Stable Diffusion), speech (Whisper), and audio generation (AudioCraft).
+Specific model architectures — image segmentation (SAM) and audio generation (AudioCraft / MusicGen). Additional model skills (CLIP, Stable Diffusion, Whisper, LLaVA) are available as optional skills.
 
 | Skill | Description | Path |
 |-------|-------------|------|
 | `audiocraft-audio-generation` | PyTorch library for audio generation including text-to-music (MusicGen) and text-to-sound (AudioGen). Use when you need to generate music from text descriptions, create sound effects, or perform melody-conditioned music generation. | `mlops/models/audiocraft` |
-| `clip` | OpenAI's model connecting vision and language. Enables zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks without fine-tuning. Best for general-pur… | `mlops/models/clip` |
 | `segment-anything-model` | Foundation model for image segmentation with zero-shot transfer. Use when you need to segment any object in images using points, boxes, or masks as prompts, or automatically generate all object masks in an image. | `mlops/models/segment-anything` |
-| `stable-diffusion-image-generation` | State-of-the-art text-to-image generation with Stable Diffusion models via HuggingFace Diffusers. Use when generating images from text prompts, performing image-to-image translation, inpainting, or building custom diffusion pipelines. | `mlops/models/stable-diffusion` |
-| `whisper` | OpenAI's general-purpose speech recognition model. Supports 99 languages, transcription, translation to English, and language identification. Six model sizes from tiny (39M params) to large (1550M params). Use for speech-to-text, podcast transcription, or multilingual audio pr… | `mlops/models/whisper` |
 
 ## mlops/research
 
@@ -192,8 +180,6 @@ Fine-tuning, RLHF/DPO/GRPO training, distributed training frameworks, and optimi
 |-------|-------------|------|
 | `axolotl` | Expert guidance for fine-tuning LLMs with Axolotl - YAML configs, 100+ models, LoRA/QLoRA, DPO/KTO/ORPO/GRPO, multimodal support | `mlops/training/axolotl` |
 | `fine-tuning-with-trl` | Fine-tune LLMs using reinforcement learning with TRL - SFT for instruction tuning, DPO for preference alignment, PPO/GRPO for reward optimization, and reward model training. Use when need RLHF, align model with preferences, or train from human feedback. Works with HuggingFace … | `mlops/training/trl-fine-tuning` |
-| `peft-fine-tuning` | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Use when fine-tuning large models (7B-70B) with limited GPU memory, when you need to train &lt;1% of parameters with minimal accuracy loss, or for multi-adapter serving. HuggingFace's official library… | `mlops/training/peft` |
-| `pytorch-fsdp` | Expert guidance for Fully Sharded Data Parallel training with PyTorch FSDP - parameter sharding, mixed precision, CPU offloading, FSDP2 | `mlops/training/pytorch-fsdp` |
 | `unsloth` | Expert guidance for fast fine-tuning with Unsloth - 2-5x faster training, 50-80% less memory, LoRA/QLoRA optimization | `mlops/training/unsloth` |
 
 ## note-taking

From 206a449b2991bd9e2b943483ae785a96ec5ce6a2 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 19 Apr 2026 05:18:19 -0700
Subject: [PATCH 130/143] feat(webhook): direct delivery mode for zero-LLM push
 notifications (#12473)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

External services can now push plain-text notifications to a user's chat
via the webhook adapter without invoking the agent. Set deliver_only=true
on a route and the rendered prompt template becomes the literal message
body — dispatched directly to the configured target (Telegram, Discord,
Slack, GitHub PR comment, etc.).

Reuses all existing webhook infrastructure: HMAC-SHA256 signature
validation, per-route rate limiting, idempotency cache, body-size limits,
template rendering with dot-notation, home-channel fallback. No new HTTP
server, no new auth scheme, no new port.

Use cases: Supabase/Firebase webhooks → user notifications, monitoring
alert forwarding, inter-agent pings, background job completion alerts.

Changes:
- gateway/platforms/webhook.py: new _direct_deliver() helper + early
  dispatch branch in _handle_webhook when deliver_only=true. Startup
  validation rejects deliver_only with deliver=log.
- hermes_cli/main.py + hermes_cli/webhook.go: --deliver-only flag on
  subscribe; list/show output marks direct-delivery routes.
- website/docs/user-guide/messaging/webhooks.md: new Direct Delivery
  Mode section with config example, CLI example, response codes.
- skills/devops/webhook-subscriptions/SKILL.md: document --deliver-only
  with use cases (bumped to v1.1.0).
- tests/gateway/test_webhook_deliver_only.py: 14 new tests covering
  agent bypass, template rendering, status codes, HMAC still enforced,
  idempotency still applies, rate limit still applies, startup
  validation, and direct-deliver dispatch.

Validation: 78 webhook tests pass (64 existing + 14 new). E2E verified
with real aiohttp server + real urllib POST — agent not invoked, target
adapter.send() called with rendered template, duplicate delivery_id
suppressed.

Closes the gap identified in PR #12117 (thanks to @H1an1 / Antenna team)
without adding a second HTTP ingress server.
---
 gateway/platforms/webhook.py                  | 103 ++++
 hermes_cli/main.py                            |   7 +
 hermes_cli/webhook.py                         |  16 +-
 skills/devops/webhook-subscriptions/SKILL.md  |  29 +-
 tests/gateway/test_webhook_deliver_only.py    | 473 ++++++++++++++++++
 website/docs/user-guide/messaging/webhooks.md |  75 +++
 6 files changed, 699 insertions(+), 4 deletions(-)
 create mode 100644 tests/gateway/test_webhook_deliver_only.py

diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py
index c37445b17e8..9995ac38709 100644
--- a/gateway/platforms/webhook.py
+++ b/gateway/platforms/webhook.py
@@ -13,6 +13,10 @@ Each route defines:
   - skills: optional list of skills to load for the agent
   - deliver: where to send the response (github_comment, telegram, etc.)
   - deliver_extra: additional delivery config (repo, pr_number, chat_id)
+  - deliver_only: if true, skip the agent — the rendered prompt IS the
+    message that gets delivered.  Use for external push notifications
+    (Supabase, monitoring alerts, inter-agent pings) where zero LLM cost
+    and sub-second delivery matter more than agent reasoning.
 
 Security:
   - HMAC secret is required per route (validated at startup)
@@ -122,6 +126,19 @@ class WebhookAdapter(BasePlatformAdapter):
                     f"For testing without auth, set secret to '{_INSECURE_NO_AUTH}'."
                 )
 
+            # deliver_only routes bypass the agent — the POST body becomes a
+            # direct push notification via the configured delivery target.
+            # Validate up-front so misconfiguration surfaces at startup rather
+            # than on the first webhook POST.
+            if route.get("deliver_only"):
+                deliver = route.get("deliver", "log")
+                if not deliver or deliver == "log":
+                    raise ValueError(
+                        f"[webhook] Route '{name}' has deliver_only=true but "
+                        f"deliver is '{deliver}'. Direct delivery requires a "
+                        f"real target (telegram, discord, slack, github_comment, etc.)."
+                    )
+
         app = web.Application()
         app.router.add_get("/health", self._handle_health)
         app.router.add_post("/webhooks/{route_name}", self._handle_webhook)
@@ -419,6 +436,64 @@ class WebhookAdapter(BasePlatformAdapter):
             )
         self._seen_deliveries[delivery_id] = now
 
+        # ── Direct delivery mode (deliver_only) ─────────────────
+        # Skip the agent entirely — the rendered prompt IS the message we
+        # deliver.  Use case: external services (Supabase, monitoring,
+        # cron jobs, other agents) that need to push a plain notification
+        # to a user's chat with zero LLM cost.  Reuses the same HMAC auth,
+        # rate limiting, idempotency, and template rendering as agent mode.
+        if route_config.get("deliver_only"):
+            delivery = {
+                "deliver": route_config.get("deliver", "log"),
+                "deliver_extra": self._render_delivery_extra(
+                    route_config.get("deliver_extra", {}), payload
+                ),
+                "payload": payload,
+            }
+            logger.info(
+                "[webhook] direct-deliver event=%s route=%s target=%s msg_len=%d delivery=%s",
+                event_type,
+                route_name,
+                delivery["deliver"],
+                len(prompt),
+                delivery_id,
+            )
+            try:
+                result = await self._direct_deliver(prompt, delivery)
+            except Exception:
+                logger.exception(
+                    "[webhook] direct-deliver failed route=%s delivery=%s",
+                    route_name,
+                    delivery_id,
+                )
+                return web.json_response(
+                    {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
+                    status=502,
+                )
+
+            if result.success:
+                return web.json_response(
+                    {
+                        "status": "delivered",
+                        "route": route_name,
+                        "target": delivery["deliver"],
+                        "delivery_id": delivery_id,
+                    },
+                    status=200,
+                )
+            # Delivery attempted but target rejected it — surface as 502
+            # with a generic error (don't leak adapter-level detail).
+            logger.warning(
+                "[webhook] direct-deliver target rejected route=%s target=%s error=%s",
+                route_name,
+                delivery["deliver"],
+                result.error,
+            )
+            return web.json_response(
+                {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
+                status=502,
+            )
+
         # Use delivery_id in session key so concurrent webhooks on the
         # same route get independent agent runs (not queued/interrupted).
         session_chat_id = f"webhook:{route_name}:{delivery_id}"
@@ -572,6 +647,34 @@ class WebhookAdapter(BasePlatformAdapter):
     # Response delivery
     # ------------------------------------------------------------------
 
+    async def _direct_deliver(
+        self, content: str, delivery: dict
+    ) -> SendResult:
+        """Deliver *content* directly without invoking the agent.
+
+        Used by ``deliver_only`` routes: the rendered template becomes the
+        literal message body, and we dispatch to the same delivery helpers
+        that the agent-mode ``send()`` flow uses.  All target types that
+        work in agent mode work here — Telegram, Discord, Slack, GitHub
+        PR comments, etc.
+        """
+        deliver_type = delivery.get("deliver", "log")
+
+        if deliver_type == "log":
+            # Shouldn't reach here — startup validation rejects deliver_only
+            # with deliver=log — but guard defensively.
+            logger.info("[webhook] direct-deliver log-only: %s", content[:200])
+            return SendResult(success=True)
+
+        if deliver_type == "github_comment":
+            return await self._deliver_github_comment(content, delivery)
+
+        # Fall through to the cross-platform dispatcher, which validates the
+        # target name and routes via the gateway runner.
+        return await self._deliver_cross_platform(
+            deliver_type, content, delivery
+        )
+
     async def _deliver_github_comment(
         self, content: str, delivery: dict
     ) -> SendResult:
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 7e0220d9186..71fc6ae3810 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -7002,6 +7002,13 @@ For more help on a command:
     wh_sub.add_argument(
         "--secret", default="", help="HMAC secret (auto-generated if omitted)"
     )
+    wh_sub.add_argument(
+        "--deliver-only",
+        action="store_true",
+        help="Skip the agent — deliver the rendered prompt directly as the "
+        "message. Zero LLM cost. Requires --deliver to be a real target "
+        "(not 'log').",
+    )
 
     webhook_subparsers.add_parser(
         "list", aliases=["ls"], help="List all dynamic subscriptions"
diff --git a/hermes_cli/webhook.py b/hermes_cli/webhook.py
index 8ff135e29e5..378f11b4a7e 100644
--- a/hermes_cli/webhook.py
+++ b/hermes_cli/webhook.py
@@ -155,6 +155,15 @@ def _cmd_subscribe(args):
         "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
     }
 
+    if getattr(args, "deliver_only", False):
+        if route["deliver"] == "log":
+            print(
+                "Error: --deliver-only requires --deliver to be a real target "
+                "(telegram, discord, slack, github_comment, etc.) — not 'log'."
+            )
+            return
+        route["deliver_only"] = True
+
     if args.deliver_chat_id:
         route["deliver_extra"] = {"chat_id": args.deliver_chat_id}
 
@@ -172,9 +181,12 @@ def _cmd_subscribe(args):
     else:
         print("  Events: (all)")
     print(f"  Deliver: {route['deliver']}")
+    if route.get("deliver_only"):
+        print("  Mode: direct delivery (no agent, zero LLM cost)")
     if route.get("prompt"):
         prompt_preview = route["prompt"][:80] + ("..." if len(route["prompt"]) > 80 else "")
-        print(f"  Prompt: {prompt_preview}")
+        label = "Message" if route.get("deliver_only") else "Prompt"
+        print(f"  {label}: {prompt_preview}")
     print(f"\n  Configure your service to POST to the URL above.")
     print(f"  Use the secret for HMAC-SHA256 signature validation.")
     print(f"  The gateway must be running to receive events (hermes gateway run).\n")
@@ -192,6 +204,8 @@ def _cmd_list(args):
     for name, route in subs.items():
         events = ", ".join(route.get("events", [])) or "(all)"
         deliver = route.get("deliver", "log")
+        if route.get("deliver_only"):
+            deliver = f"{deliver} (direct — no agent)"
         desc = route.get("description", "")
         print(f"  ◆ {name}")
         if desc:
diff --git a/skills/devops/webhook-subscriptions/SKILL.md b/skills/devops/webhook-subscriptions/SKILL.md
index e5ab6d5880d..dd20a19b415 100644
--- a/skills/devops/webhook-subscriptions/SKILL.md
+++ b/skills/devops/webhook-subscriptions/SKILL.md
@@ -1,10 +1,10 @@
 ---
 name: webhook-subscriptions
-description: Create and manage webhook subscriptions for event-driven agent activation. Use when the user wants external services to trigger agent runs automatically.
-version: 1.0.0
+description: Create and manage webhook subscriptions for event-driven agent activation, or for direct push notifications (zero LLM cost). Use when the user wants external services to trigger agent runs OR push notifications to chats.
+version: 1.1.0
 metadata:
   hermes:
-    tags: [webhook, events, automation, integrations]
+    tags: [webhook, events, automation, integrations, notifications, push]
 ---
 
 # Webhook Subscriptions
@@ -154,6 +154,29 @@ hermes webhook subscribe alerts \
   --deliver origin
 ```
 
+### Direct delivery (no agent, zero LLM cost)
+
+For use cases where you just want to push a notification through to a user's chat — no reasoning, no agent loop — add `--deliver-only`. The rendered `--prompt` template becomes the literal message body and is dispatched directly to the target adapter.
+
+Use this for:
+- External service push notifications (Supabase/Firebase webhooks → Telegram)
+- Monitoring alerts that should forward verbatim
+- Inter-agent pings where one agent is telling another agent's user something
+- Any webhook where an LLM round trip would be wasted effort
+
+```bash
+hermes webhook subscribe antenna-matches \
+  --deliver telegram \
+  --deliver-chat-id "123456789" \
+  --deliver-only \
+  --prompt "🎉 New match: {match.user_name} matched with you!" \
+  --description "Antenna match notifications"
+```
+
+The POST returns `200 OK` on successful delivery, `502` on target failure — so upstream services can retry intelligently. HMAC auth, rate limits, and idempotency still apply.
+
+Requires `--deliver` to be a real target (telegram, discord, slack, github_comment, etc.) — `--deliver log` is rejected because log-only direct delivery is pointless.
+
 ## Security
 
 - Each subscription gets an auto-generated HMAC-SHA256 secret (or provide your own with `--secret`)
diff --git a/tests/gateway/test_webhook_deliver_only.py b/tests/gateway/test_webhook_deliver_only.py
new file mode 100644
index 00000000000..d73a1520159
--- /dev/null
+++ b/tests/gateway/test_webhook_deliver_only.py
@@ -0,0 +1,473 @@
+"""Tests for the webhook adapter's ``deliver_only`` route mode.
+
+``deliver_only`` lets external services (Supabase webhooks, monitoring
+alerts, background jobs, other agents) push plain-text notifications to
+a user's chat via the webhook adapter WITHOUT invoking the agent.  The
+rendered prompt template becomes the literal message body.
+
+Covers:
+- Agent is NOT invoked (``handle_message`` never called)
+- Rendered content is delivered to the target platform adapter
+- HTTP returns 200 OK on success, 502 on delivery failure
+- Startup validation rejects ``deliver_only`` without a real delivery target
+- HMAC auth, rate limiting, and idempotency still apply
+"""
+
+import asyncio
+import hashlib
+import hmac
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from aiohttp import web
+from aiohttp.test_utils import TestClient, TestServer
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent, SendResult
+from gateway.platforms.webhook import WebhookAdapter, _INSECURE_NO_AUTH
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_adapter(routes, **extra_kw) -> WebhookAdapter:
+    extra = {"host": "0.0.0.0", "port": 0, "routes": routes}
+    extra.update(extra_kw)
+    config = PlatformConfig(enabled=True, extra=extra)
+    return WebhookAdapter(config)
+
+
+def _create_app(adapter: WebhookAdapter) -> web.Application:
+    app = web.Application()
+    app.router.add_get("/health", adapter._handle_health)
+    app.router.add_post("/webhooks/{route_name}", adapter._handle_webhook)
+    return app
+
+
+def _wire_mock_target(adapter: WebhookAdapter, platform_name: str = "telegram"):
+    """Attach a gateway_runner with a mocked target adapter."""
+    mock_target = AsyncMock()
+    mock_target.send = AsyncMock(return_value=SendResult(success=True))
+
+    mock_runner = MagicMock()
+    mock_runner.adapters = {Platform(platform_name): mock_target}
+    mock_runner.config.get_home_channel.return_value = None
+
+    adapter.gateway_runner = mock_runner
+    return mock_target
+
+
+# ===================================================================
+# Core behaviour: agent bypass
+# ===================================================================
+
+class TestDeliverOnlyBypassesAgent:
+    """The whole point of the feature — handle_message must not be called."""
+
+    @pytest.mark.asyncio
+    async def test_post_delivers_directly_without_agent(self):
+        routes = {
+            "match-alert": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "12345"},
+                "prompt": "{payload.user} matched with {payload.other}!",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+
+        # Guard: handle_message must NOT be called in deliver_only mode
+        handle_message_calls: list[MessageEvent] = []
+
+        async def _capture(event):
+            handle_message_calls.append(event)
+
+        adapter.handle_message = _capture
+
+        app = _create_app(adapter)
+        body = json.dumps(
+            {"payload": {"user": "alice", "other": "bob"}}
+        ).encode()
+
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/match-alert",
+                data=body,
+                headers={
+                    "Content-Type": "application/json",
+                    "X-GitHub-Delivery": "delivery-1",
+                },
+            )
+            assert resp.status == 200
+            data = await resp.json()
+            assert data["status"] == "delivered"
+            assert data["route"] == "match-alert"
+            assert data["target"] == "telegram"
+
+        # Let any background tasks settle before asserting no agent call
+        await asyncio.sleep(0.05)
+
+        # Agent was NOT invoked
+        assert handle_message_calls == []
+
+        # Target adapter.send() WAS called with the rendered template
+        mock_target.send.assert_awaited_once()
+        call_args = mock_target.send.await_args
+        chat_id_arg, content_arg = call_args.args[0], call_args.args[1]
+        assert chat_id_arg == "12345"
+        assert content_arg == "alice matched with bob!"
+
+    @pytest.mark.asyncio
+    async def test_template_rendering_works(self):
+        """Dot-notation template variables resolve in deliver_only mode."""
+        routes = {
+            "alert": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "chat-1"},
+                "prompt": "Build {build.number} status: {build.status}",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+        app = _create_app(adapter)
+
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/alert",
+                json={"build": {"number": 77, "status": "FAILED"}},
+                headers={"X-GitHub-Delivery": "d-render-1"},
+            )
+            assert resp.status == 200
+
+        mock_target.send.assert_awaited_once()
+        content_arg = mock_target.send.await_args.args[1]
+        assert content_arg == "Build 77 status: FAILED"
+
+    @pytest.mark.asyncio
+    async def test_thread_id_passed_through(self):
+        """deliver_extra.thread_id flows through to the target adapter."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1", "thread_id": "topic-42"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-thread-1"},
+            )
+            assert resp.status == 200
+
+        assert mock_target.send.await_args.kwargs["metadata"] == {
+            "thread_id": "topic-42"
+        }
+
+
+# ===================================================================
+# HTTP status codes
+# ===================================================================
+
+class TestDeliverOnlyStatusCodes:
+
+    @pytest.mark.asyncio
+    async def test_delivery_failure_returns_502(self):
+        """If the target adapter returns SendResult(success=False), 502."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+        mock_target.send = AsyncMock(
+            return_value=SendResult(success=False, error="rate limited by tg")
+        )
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-fail-1"},
+            )
+            assert resp.status == 502
+            data = await resp.json()
+            # Generic error — no adapter-level detail leaks
+            assert data["error"] == "Delivery failed"
+            assert "rate limited" not in json.dumps(data)
+
+    @pytest.mark.asyncio
+    async def test_delivery_exception_returns_502(self):
+        """If adapter.send() raises, we return 502 (not 500)."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+        mock_target.send = AsyncMock(side_effect=RuntimeError("tg exploded"))
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-exc-1"},
+            )
+            assert resp.status == 502
+            data = await resp.json()
+            assert data["error"] == "Delivery failed"
+            # Exception message must not leak
+            assert "exploded" not in json.dumps(data)
+
+    @pytest.mark.asyncio
+    async def test_target_platform_not_connected_returns_502(self):
+        """deliver_only to a platform the gateway doesn't have → 502."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "discord",  # not configured in mock runner
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        _wire_mock_target(adapter, platform_name="telegram")  # only TG wired
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-no-platform-1"},
+            )
+            assert resp.status == 502
+
+
+# ===================================================================
+# Startup validation
+# ===================================================================
+
+class TestDeliverOnlyStartupValidation:
+
+    @pytest.mark.asyncio
+    async def test_deliver_only_with_log_deliver_rejected(self):
+        """deliver_only=true + deliver=log is nonsense — reject at connect()."""
+        routes = {
+            "bad": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "log",
+                "deliver_only": True,
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        with pytest.raises(ValueError, match="deliver_only=true but deliver is 'log'"):
+            await adapter.connect()
+
+    @pytest.mark.asyncio
+    async def test_deliver_only_with_missing_deliver_rejected(self):
+        """deliver_only=true with no deliver field defaults to 'log' → reject."""
+        routes = {
+            "bad": {
+                "secret": _INSECURE_NO_AUTH,
+                # no deliver field
+                "deliver_only": True,
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        with pytest.raises(ValueError, match="deliver_only=true"):
+            await adapter.connect()
+
+    @pytest.mark.asyncio
+    async def test_deliver_only_with_real_target_accepted(self):
+        """Sanity check — a valid deliver_only config passes validation."""
+        routes = {
+            "good": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        # connect() does more than validation (binds a socket) — we just
+        # want to verify the validation doesn't raise.  Call it and tear
+        # down immediately.
+        try:
+            started = await adapter.connect()
+            if started:
+                await adapter.disconnect()
+        except ValueError:
+            pytest.fail("valid deliver_only config should not raise ValueError")
+
+
+# ===================================================================
+# Security + reliability invariants still hold
+# ===================================================================
+
+class TestDeliverOnlySecurityInvariants:
+
+    @pytest.mark.asyncio
+    async def test_hmac_still_enforced(self):
+        """deliver_only does NOT bypass HMAC validation."""
+        secret = "real-secret-123"
+        routes = {
+            "r": {
+                "secret": secret,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            # No signature header → reject
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-noauth-1"},
+            )
+            assert resp.status == 401
+
+        # Target never called
+        mock_target.send.assert_not_awaited()
+
+    @pytest.mark.asyncio
+    async def test_idempotency_still_applies(self):
+        """Same delivery_id posted twice → second is suppressed."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            r1 = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "dup-1"},
+            )
+            assert r1.status == 200
+
+            r2 = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "dup-1"},
+            )
+            # Existing webhook adapter treats duplicates as 200 + status=duplicate
+            assert r2.status == 200
+            data = await r2.json()
+            assert data["status"] == "duplicate"
+
+        # Target was called exactly once
+        assert mock_target.send.await_count == 1
+
+    @pytest.mark.asyncio
+    async def test_rate_limit_still_applies(self):
+        """Route-level rate limit caps deliver_only POSTs too."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes, rate_limit=2)
+        _wire_mock_target(adapter)
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            for i in range(2):
+                r = await cli.post(
+                    "/webhooks/r",
+                    json={},
+                    headers={"X-GitHub-Delivery": f"rl-{i}"},
+                )
+                assert r.status == 200
+
+            # Third within the window → 429
+            r3 = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "rl-3"},
+            )
+            assert r3.status == 429
+
+
+# ===================================================================
+# Unit: _direct_deliver dispatch
+# ===================================================================
+
+class TestDirectDeliverUnit:
+
+    @pytest.mark.asyncio
+    async def test_dispatches_to_cross_platform_for_messaging_targets(self):
+        adapter = _make_adapter({})
+        mock_target = _wire_mock_target(adapter, "telegram")
+
+        result = await adapter._direct_deliver(
+            "hello",
+            {"deliver": "telegram", "deliver_extra": {"chat_id": "c-1"}},
+        )
+        assert result.success is True
+        mock_target.send.assert_awaited_once_with(
+            "c-1", "hello", metadata=None
+        )
+
+    @pytest.mark.asyncio
+    async def test_dispatches_to_github_comment(self):
+        adapter = _make_adapter({})
+        with patch.object(
+            adapter, "_deliver_github_comment",
+            new=AsyncMock(return_value=SendResult(success=True)),
+        ) as mock_gh:
+            result = await adapter._direct_deliver(
+                "review body",
+                {
+                    "deliver": "github_comment",
+                    "deliver_extra": {"repo": "org/r", "pr_number": "1"},
+                },
+            )
+            assert result.success is True
+            mock_gh.assert_awaited_once()
diff --git a/website/docs/user-guide/messaging/webhooks.md b/website/docs/user-guide/messaging/webhooks.md
index bbf04bcb4f8..2c60624fb64 100644
--- a/website/docs/user-guide/messaging/webhooks.md
+++ b/website/docs/user-guide/messaging/webhooks.md
@@ -72,6 +72,7 @@ Routes define how different webhook sources are handled. Each route is a named e
 | `skills` | No | List of skill names to load for the agent run. |
 | `deliver` | No | Where to send the response: `github_comment`, `telegram`, `discord`, `slack`, `signal`, `sms`, `whatsapp`, `matrix`, `mattermost`, `homeassistant`, `email`, `dingtalk`, `feishu`, `wecom`, `weixin`, `bluebubbles`, `qqbot`, or `log` (default). |
 | `deliver_extra` | No | Additional delivery config — keys depend on `deliver` type (e.g. `repo`, `pr_number`, `chat_id`). Values support the same `{dot.notation}` templates as `prompt`. |
+| `deliver_only` | No | If `true`, skip the agent entirely — the rendered `prompt` template becomes the literal message that gets delivered. Zero LLM cost, sub-second delivery. See [Direct Delivery Mode](#direct-delivery-mode) for use cases. Requires `deliver` to be a real target (not `log`). |
 
 ### Full example
 
@@ -240,6 +241,80 @@ For cross-platform delivery, the target platform must also be enabled and connec
 
 ---
 
+## Direct Delivery Mode {#direct-delivery-mode}
+
+By default, every webhook POST triggers an agent run — the payload becomes a prompt, the agent processes it, and the agent's response is delivered. This costs LLM tokens on every event.
+
+For use cases where you just want to **push a plain notification** — no reasoning, no agent loop, just deliver the message — set `deliver_only: true` on the route. The rendered `prompt` template becomes the literal message body, and the adapter dispatches it directly to the configured delivery target.
+
+### When to use direct delivery
+
+- **External service push** — Supabase/Firebase webhook fires on a database change → notify a user in Telegram instantly
+- **Monitoring alerts** — Datadog/Grafana alert webhook → push to a Discord channel
+- **Inter-agent pings** — Agent A notifies Agent B's user that a long-running task finished
+- **Background job completion** — Cron job finishes → post result to Slack
+
+Benefits:
+
+- **Zero LLM tokens** — the agent is never invoked
+- **Sub-second delivery** — a single adapter call, no reasoning loop
+- **Same security as agent mode** — HMAC auth, rate limits, idempotency, and body-size limits all still apply
+- **Synchronous response** — the POST returns `200 OK` once delivery succeeds, or `502` if the target rejects it, so your upstream service can retry intelligently
+
+### Example: Telegram push from Supabase
+
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      port: 8644
+      secret: "global-secret"
+      routes:
+        antenna-matches:
+          secret: "antenna-webhook-secret"
+          deliver: "telegram"
+          deliver_only: true
+          prompt: "🎉 New match: {match.user_name} matched with you!"
+          deliver_extra:
+            chat_id: "{match.telegram_chat_id}"
+```
+
+Your Supabase edge function signs the payload with HMAC-SHA256 and POSTs to `https://your-server:8644/webhooks/antenna-matches`. The webhook adapter validates the signature, renders the template from the payload, delivers to Telegram, and returns `200 OK`.
+
+### Example: Dynamic subscription via CLI
+
+```bash
+hermes webhook subscribe antenna-matches \
+  --deliver telegram \
+  --deliver-chat-id "123456789" \
+  --deliver-only \
+  --prompt "🎉 New match: {match.user_name} matched with you!" \
+  --description "Antenna match notifications"
+```
+
+### Response codes
+
+| Status | Meaning |
+|--------|---------|
+| `200 OK` | Delivered successfully. Body: `{"status": "delivered", "route": "...", "target": "...", "delivery_id": "..."}` |
+| `200 OK` (status=duplicate) | Duplicate `X-GitHub-Delivery` ID within the idempotency TTL (1 hour). Not re-delivered. |
+| `401 Unauthorized` | HMAC signature invalid or missing. |
+| `400 Bad Request` | Malformed JSON body. |
+| `404 Not Found` | Unknown route name. |
+| `413 Payload Too Large` | Body exceeded `max_body_bytes`. |
+| `429 Too Many Requests` | Route rate limit exceeded. |
+| `502 Bad Gateway` | Target adapter rejected the message or raised. The error is logged server-side; the response body is a generic `Delivery failed` to avoid leaking adapter internals. |
+
+### Configuration gotchas
+
+- `deliver_only: true` requires `deliver` to be a real target. `deliver: log` (or omitting `deliver`) is rejected at startup — the adapter refuses to start if it finds a misconfigured route.
+- The `skills` field is ignored in direct delivery mode (no agent runs, so there's nothing to inject skills into).
+- Template rendering uses the same `{dot.notation}` syntax as agent mode, including the `{__raw__}` token.
+- Idempotency uses the same `X-GitHub-Delivery` / `X-Request-ID` header — retries with the same ID return `status=duplicate` and do NOT re-deliver.
+
+---
+
 ## Dynamic Subscriptions (CLI) {#dynamic-subscriptions}
 
 In addition to static routes in `config.yaml`, you can create webhook subscriptions dynamically using the `hermes webhook` CLI command. This is especially useful when the agent itself needs to set up event-driven triggers.

From 7fa01fafa557f4cba59eb95a61a7343559bc2b44 Mon Sep 17 00:00:00 2001
From: Mibayy <Mibayy@users.noreply.github.com>
Date: Sun, 29 Mar 2026 22:48:28 -0700
Subject: [PATCH 131/143] feat: add maps skill (OpenStreetMap + Overpass +
 OSRM, no API key)

Adds a maps optional skill with 8 commands, 44 POI categories, and
zero external dependencies. Uses free open data: Nominatim, Overpass
API, OSRM, and TimeAPI.io.

Commands: search, reverse, nearby, distance, directions, timezone,
area, bbox.

Improvements over original PR #2015:
- Fixed directory structure (optional-skills/productivity/maps/)
- Fixed distance argparse (--to flag instead of broken dual nargs=+)
- Fixed timezone (TimeAPI.io instead of broken worldtimeapi heuristic)
- Expanded POI categories from 12 to 44
- Added directions command with turn-by-turn OSRM steps
- Added area command (bounding box + dimensions for a named place)
- Added bbox command (POI search within a geographic rectangle)
- Added 23 unit tests
- Improved haversine (atan2 for numerical stability)
- Comprehensive SKILL.md with workflow examples

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
---
 optional-skills/productivity/maps/SKILL.md    |  153 +++
 .../productivity/maps/scripts/maps_client.py  | 1143 +++++++++++++++++
 .../maps/tests/test_maps_client.py            |  177 +++
 3 files changed, 1473 insertions(+)
 create mode 100644 optional-skills/productivity/maps/SKILL.md
 create mode 100644 optional-skills/productivity/maps/scripts/maps_client.py
 create mode 100644 optional-skills/productivity/maps/tests/test_maps_client.py

diff --git a/optional-skills/productivity/maps/SKILL.md b/optional-skills/productivity/maps/SKILL.md
new file mode 100644
index 00000000000..59e0359d56f
--- /dev/null
+++ b/optional-skills/productivity/maps/SKILL.md
@@ -0,0 +1,153 @@
+---
+name: maps
+description: >
+  Geocoding, reverse geocoding, nearby POI search (44 categories),
+  distance/routing, turn-by-turn directions, timezone lookup, bounding box
+  search, and area info. Uses OpenStreetMap + Overpass + OSRM. Free, no API key.
+version: 1.1.0
+author: Mibayy
+license: MIT
+metadata:
+  hermes:
+    tags: [maps, geocoding, places, routing, distance, directions, openstreetmap, nominatim, overpass, osrm]
+    category: productivity
+    requires_toolsets: [terminal]
+---
+
+# Maps Skill
+
+Location intelligence using free, open data sources. 8 commands, 44 POI
+categories, zero dependencies (Python stdlib only), no API key required.
+
+Data sources: OpenStreetMap/Nominatim, Overpass API, OSRM, TimeAPI.io.
+
+## When to Use
+
+- User wants coordinates for a place name
+- User has coordinates and wants the address
+- User asks for nearby restaurants, hospitals, pharmacies, hotels, etc.
+- User wants driving/walking/cycling distance or travel time
+- User wants turn-by-turn directions between two places
+- User wants timezone information for a location
+- User wants to search for POIs within a geographic area
+
+## Prerequisites
+
+Python 3.8+ (stdlib only — no pip installs needed).
+
+Script path after install: `~/.hermes/skills/maps/scripts/maps_client.py`
+
+## Commands
+
+```bash
+MAPS=~/.hermes/skills/maps/scripts/maps_client.py
+```
+
+### search — Geocode a place name
+
+```bash
+python3 $MAPS search "Eiffel Tower"
+python3 $MAPS search "1600 Pennsylvania Ave, Washington DC"
+```
+
+Returns: lat, lon, display name, type, bounding box, importance score.
+
+### reverse — Coordinates to address
+
+```bash
+python3 $MAPS reverse 48.8584 2.2945
+```
+
+Returns: full address breakdown (street, city, state, country, postcode).
+
+### nearby — Find places by category
+
+```bash
+python3 $MAPS nearby 48.8584 2.2945 restaurant --limit 10
+python3 $MAPS nearby 40.7128 -74.0060 hospital --radius 2000
+python3 $MAPS nearby 51.5074 -0.1278 cafe --limit 5 --radius 300
+```
+
+44 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, supermarket,
+atm, gas_station, parking, museum, park, school, university, bank, police,
+fire_station, library, airport, train_station, bus_stop, church, mosque,
+synagogue, dentist, doctor, cinema, theatre, gym, swimming_pool, post_office,
+convenience_store, bakery, bookshop, laundry, car_wash, car_rental,
+bicycle_rental, taxi, veterinary, zoo, playground, stadium, nightclub.
+
+### distance — Travel distance and time
+
+```bash
+python3 $MAPS distance "Paris" --to "Lyon"
+python3 $MAPS distance "New York" --to "Boston" --mode driving
+python3 $MAPS distance "Big Ben" --to "Tower Bridge" --mode walking
+```
+
+Modes: driving (default), walking, cycling. Returns road distance, duration,
+and straight-line distance for comparison.
+
+### directions — Turn-by-turn navigation
+
+```bash
+python3 $MAPS directions "Eiffel Tower" --to "Louvre Museum" --mode walking
+python3 $MAPS directions "JFK Airport" --to "Times Square" --mode driving
+```
+
+Returns numbered steps with instruction, distance, duration, road name, and
+maneuver type (turn, depart, arrive, etc.).
+
+### timezone — Timezone for coordinates
+
+```bash
+python3 $MAPS timezone 48.8584 2.2945
+python3 $MAPS timezone 35.6762 139.6503
+```
+
+Returns timezone name, UTC offset, and current local time.
+
+### area — Bounding box and area for a place
+
+```bash
+python3 $MAPS area "Manhattan, New York"
+python3 $MAPS area "London"
+```
+
+Returns bounding box coordinates, width/height in km, and approximate area.
+Useful as input for the bbox command.
+
+### bbox — Search within a bounding box
+
+```bash
+python3 $MAPS bbox 40.75 -74.00 40.77 -73.98 restaurant --limit 20
+```
+
+Finds POIs within a geographic rectangle. Use `area` first to get the
+bounding box coordinates for a named place.
+
+## Workflow Examples
+
+**"Find Italian restaurants near the Colosseum":**
+1. `search "Colosseum Rome"` → get lat/lon
+2. `nearby LAT LON restaurant --radius 500`
+
+**"How do I walk from hotel to conference center?":**
+1. `directions "Hotel Name" --to "Conference Center" --mode walking`
+
+**"What restaurants are in downtown Seattle?":**
+1. `area "Downtown Seattle"` → get bounding box
+2. `bbox S W N E restaurant --limit 30`
+
+## Pitfalls
+
+- Nominatim ToS: max 1 req/s (handled automatically by the script)
+- `nearby` requires lat/lon — use `search` first to get coordinates
+- OSRM routing coverage is best for Europe and North America
+- Overpass API can be slow during peak hours (script retries automatically)
+- `distance` and `directions` use `--to` flag for the destination (not positional)
+
+## Verification
+
+```bash
+python3 ~/.hermes/skills/maps/scripts/maps_client.py search "Statue of Liberty"
+# Should return lat ~40.689, lon ~-74.044
+```
diff --git a/optional-skills/productivity/maps/scripts/maps_client.py b/optional-skills/productivity/maps/scripts/maps_client.py
new file mode 100644
index 00000000000..c271570f995
--- /dev/null
+++ b/optional-skills/productivity/maps/scripts/maps_client.py
@@ -0,0 +1,1143 @@
+#!/usr/bin/env python3
+"""
+maps_client.py - CLI tool for maps, geocoding, routing, POI search, and more.
+Uses only Python stdlib. Data from OpenStreetMap/Nominatim, Overpass API, OSRM,
+and TimeAPI.io.
+
+Commands:
+  search     - Geocode a place name to coordinates
+  reverse    - Reverse geocode coordinates to an address
+  nearby     - Find nearby POIs by category
+  distance   - Road distance and travel time between two places
+  directions - Turn-by-turn directions between two places
+  timezone   - Timezone info for coordinates
+  bbox       - Find POIs within a bounding box
+  area       - Get bounding box and area info for a named place
+"""
+
+import argparse
+import json
+import math
+import os
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+USER_AGENT = "HermesAgent/1.0 (contact: hermes@agent.ai)"
+DATA_SOURCE = "OpenStreetMap/Nominatim"
+
+NOMINATIM_SEARCH  = "https://nominatim.openstreetmap.org/search"
+NOMINATIM_REVERSE = "https://nominatim.openstreetmap.org/reverse"
+OVERPASS_API      = "https://overpass-api.de/api/interpreter"
+OSRM_BASE         = "https://router.project-osrm.org/route/v1"
+TIMEAPI_BASE      = "https://timeapi.io/api/timezone/coordinate"
+
+# Seconds to sleep between Nominatim requests (ToS requirement)
+NOMINATIM_RATE_LIMIT = 1.0
+
+# Maximum retries for HTTP errors
+MAX_RETRIES = 3
+RETRY_DELAY = 2.0  # seconds
+
+# Category -> (OSM tag key, OSM tag value)
+CATEGORY_TAGS = {
+    # Food & Drink
+    "restaurant":        ("amenity", "restaurant"),
+    "cafe":              ("amenity", "cafe"),
+    "bar":               ("amenity", "bar"),
+    "bakery":            ("shop",    "bakery"),
+    "convenience_store": ("shop",    "convenience"),
+    # Health
+    "hospital":          ("amenity", "hospital"),
+    "pharmacy":          ("amenity", "pharmacy"),
+    "dentist":           ("amenity", "dentist"),
+    "doctor":            ("amenity", "doctors"),
+    "veterinary":        ("amenity", "veterinary"),
+    # Accommodation
+    "hotel":             ("tourism", "hotel"),
+    # Shopping & Services
+    "supermarket":       ("shop",    "supermarket"),
+    "bookshop":          ("shop",    "books"),
+    "laundry":           ("shop",    "laundry"),
+    # Finance
+    "atm":               ("amenity", "atm"),
+    "bank":              ("amenity", "bank"),
+    # Transport
+    "gas_station":       ("amenity", "fuel"),
+    "parking":           ("amenity", "parking"),
+    "airport":           ("aeroway", "aerodrome"),
+    "train_station":     ("railway", "station"),
+    "bus_stop":          ("highway", "bus_stop"),
+    "taxi":              ("amenity", "taxi"),
+    "car_wash":          ("amenity", "car_wash"),
+    "car_rental":        ("amenity", "car_rental"),
+    "bicycle_rental":    ("amenity", "bicycle_rental"),
+    # Culture & Entertainment
+    "museum":            ("tourism", "museum"),
+    "cinema":            ("amenity", "cinema"),
+    "theatre":           ("amenity", "theatre"),
+    "nightclub":         ("amenity", "nightclub"),
+    "zoo":               ("tourism", "zoo"),
+    # Education
+    "school":            ("amenity", "school"),
+    "university":        ("amenity", "university"),
+    "library":           ("amenity", "library"),
+    # Public Services
+    "police":            ("amenity", "police"),
+    "fire_station":      ("amenity", "fire_station"),
+    "post_office":       ("amenity", "post_office"),
+    # Religion
+    "church":            ("amenity", "place_of_worship"),  # refined by religion tag
+    "mosque":            ("amenity", "place_of_worship"),
+    "synagogue":         ("amenity", "place_of_worship"),
+    # Recreation
+    "park":              ("leisure", "park"),
+    "gym":               ("leisure", "fitness_centre"),
+    "swimming_pool":     ("leisure", "swimming_pool"),
+    "playground":        ("leisure", "playground"),
+    "stadium":           ("leisure", "stadium"),
+}
+
+# Religion-specific overrides for place_of_worship categories
+RELIGION_FILTER = {
+    "church":    "christian",
+    "mosque":    "muslim",
+    "synagogue": "jewish",
+}
+
+VALID_CATEGORIES = sorted(CATEGORY_TAGS.keys())
+
+OSRM_PROFILES = {
+    "driving": "driving",
+    "walking": "foot",
+    "cycling": "bike",
+}
+
+# ---------------------------------------------------------------------------
+# Output helpers
+# ---------------------------------------------------------------------------
+
+def print_json(data):
+    """Print data as pretty-printed JSON to stdout."""
+    print(json.dumps(data, indent=2, ensure_ascii=False))
+
+
+def error_exit(message, code=1):
+    """Print an error result as JSON and exit."""
+    print_json({"error": message, "status": "error"})
+    sys.exit(code)
+
+
+# ---------------------------------------------------------------------------
+# HTTP helpers
+# ---------------------------------------------------------------------------
+
+def http_get(url, params=None, retries=MAX_RETRIES, silent=False):
+    """
+    Perform an HTTP GET request, returning parsed JSON.
+    Adds the required User-Agent header. Retries on transient errors.
+    If silent=True, raises RuntimeError instead of calling error_exit.
+    """
+    if params:
+        url = url + "?" + urllib.parse.urlencode(params)
+
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+
+    last_error = None
+    for attempt in range(1, retries + 1):
+        try:
+            with urllib.request.urlopen(req, timeout=15) as resp:
+                raw = resp.read().decode("utf-8")
+                return json.loads(raw)
+        except urllib.error.HTTPError as exc:
+            last_error = f"HTTP {exc.code}: {exc.reason} for {url}"
+            if exc.code in (429, 503, 502, 504):
+                time.sleep(RETRY_DELAY * attempt)
+            else:
+                if silent:
+                    raise RuntimeError(last_error)
+                error_exit(last_error)
+        except urllib.error.URLError as exc:
+            last_error = f"URL error: {exc.reason}"
+            time.sleep(RETRY_DELAY * attempt)
+        except json.JSONDecodeError as exc:
+            last_error = f"JSON parse error: {exc}"
+            time.sleep(RETRY_DELAY * attempt)
+
+    msg = f"Request failed after {retries} attempts. Last error: {last_error}"
+    if silent:
+        raise RuntimeError(msg)
+    error_exit(msg)
+
+
+def http_get_text(url, params=None, retries=MAX_RETRIES, silent=False):
+    """
+    Like http_get but returns raw text instead of parsed JSON.
+    Useful for APIs that may return non-JSON responses.
+    """
+    if params:
+        url = url + "?" + urllib.parse.urlencode(params)
+
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+
+    last_error = None
+    for attempt in range(1, retries + 1):
+        try:
+            with urllib.request.urlopen(req, timeout=15) as resp:
+                return resp.read().decode("utf-8")
+        except urllib.error.HTTPError as exc:
+            last_error = f"HTTP {exc.code}: {exc.reason} for {url}"
+            if exc.code in (429, 503, 502, 504):
+                time.sleep(RETRY_DELAY * attempt)
+            else:
+                if silent:
+                    raise RuntimeError(last_error)
+                error_exit(last_error)
+        except urllib.error.URLError as exc:
+            last_error = f"URL error: {exc.reason}"
+            time.sleep(RETRY_DELAY * attempt)
+
+    msg = f"Request failed after {retries} attempts. Last error: {last_error}"
+    if silent:
+        raise RuntimeError(msg)
+    error_exit(msg)
+
+
+def http_post(url, data_str, retries=MAX_RETRIES):
+    """
+    Perform an HTTP POST with a plain-text body (for Overpass QL).
+    Returns parsed JSON.
+    """
+    encoded = data_str.encode("utf-8")
+    req = urllib.request.Request(
+        url,
+        data=encoded,
+        headers={
+            "User-Agent": USER_AGENT,
+            "Content-Type": "application/x-www-form-urlencoded",
+        },
+    )
+
+    last_error = None
+    for attempt in range(1, retries + 1):
+        try:
+            with urllib.request.urlopen(req, timeout=30) as resp:
+                raw = resp.read().decode("utf-8")
+                return json.loads(raw)
+        except urllib.error.HTTPError as exc:
+            last_error = f"HTTP {exc.code}: {exc.reason}"
+            if exc.code in (429, 503, 502, 504):
+                time.sleep(RETRY_DELAY * attempt)
+            else:
+                error_exit(last_error)
+        except urllib.error.URLError as exc:
+            last_error = f"URL error: {exc.reason}"
+            time.sleep(RETRY_DELAY * attempt)
+        except json.JSONDecodeError as exc:
+            last_error = f"JSON parse error: {exc}"
+            time.sleep(RETRY_DELAY * attempt)
+
+    error_exit(f"POST failed after {retries} attempts. Last error: {last_error}")
+
+
+# ---------------------------------------------------------------------------
+# Geo math
+# ---------------------------------------------------------------------------
+
+def haversine_m(lat1, lon1, lat2, lon2):
+    """Return distance in metres between two lat/lon points (Haversine)."""
+    R = 6_371_000  # Earth mean radius in metres
+    phi1 = math.radians(lat1)
+    phi2 = math.radians(lat2)
+    dphi = math.radians(lat2 - lat1)
+    dlam = math.radians(lon2 - lon1)
+    a = (math.sin(dphi / 2) ** 2
+         + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2)
+    return 2 * R * math.atan2(math.sqrt(a), math.sqrt(1 - a))
+
+
+# ---------------------------------------------------------------------------
+# Nominatim helpers
+# ---------------------------------------------------------------------------
+
+def nominatim_search(query, limit=5):
+    """Geocode a free-text query. Returns list of result dicts."""
+    params = {
+        "q":              query,
+        "format":         "json",
+        "limit":          limit,
+        "addressdetails": 1,
+    }
+    time.sleep(NOMINATIM_RATE_LIMIT)
+    return http_get(NOMINATIM_SEARCH, params=params)
+
+
+def nominatim_reverse(lat, lon):
+    """Reverse geocode lat/lon. Returns a single result dict."""
+    params = {
+        "lat":            lat,
+        "lon":            lon,
+        "format":         "json",
+        "addressdetails": 1,
+    }
+    time.sleep(NOMINATIM_RATE_LIMIT)
+    return http_get(NOMINATIM_REVERSE, params=params)
+
+
+def geocode_single(query):
+    """
+    Geocode a query and return (lat, lon, display_name).
+    Exits with error if nothing found.
+    """
+    results = nominatim_search(query, limit=1)
+    if not results:
+        error_exit(f"Could not geocode: {query}")
+    r = results[0]
+    return float(r["lat"]), float(r["lon"]), r.get("display_name", query)
+
+
+# ---------------------------------------------------------------------------
+# Overpass helpers
+# ---------------------------------------------------------------------------
+
+def build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
+                          religion=None):
+    """Build an Overpass QL query for nearby POIs around a point."""
+    religion_filter = ""
+    if religion:
+        religion_filter = f'["religion"="{religion}"]'
+    return (
+        f'[out:json][timeout:25];\n'
+        f'(\n'
+        f'  node["{tag_key}"="{tag_val}"]{religion_filter}'
+        f'(around:{radius},{lat},{lon});\n'
+        f'  way["{tag_key}"="{tag_val}"]{religion_filter}'
+        f'(around:{radius},{lat},{lon});\n'
+        f');\n'
+        f'out center {limit};\n'
+    )
+
+
+def build_overpass_bbox(tag_key, tag_val, south, west, north, east, limit,
+                        religion=None):
+    """Build an Overpass QL query for POIs within a bounding box."""
+    religion_filter = ""
+    if religion:
+        religion_filter = f'["religion"="{religion}"]'
+    return (
+        f'[out:json][timeout:25];\n'
+        f'(\n'
+        f'  node["{tag_key}"="{tag_val}"]{religion_filter}'
+        f'({south},{west},{north},{east});\n'
+        f'  way["{tag_key}"="{tag_val}"]{religion_filter}'
+        f'({south},{west},{north},{east});\n'
+        f');\n'
+        f'out center {limit};\n'
+    )
+
+
+def parse_overpass_elements(elements, ref_lat=None, ref_lon=None):
+    """
+    Parse Overpass elements into a clean list of POI dicts.
+    If ref_lat/ref_lon are provided, computes distance and sorts by it.
+    """
+    places = []
+    for el in elements:
+        # Ways have a "center" sub-dict; nodes have lat/lon directly
+        if el["type"] == "way":
+            center = el.get("center", {})
+            el_lat = center.get("lat")
+            el_lon = center.get("lon")
+        else:
+            el_lat = el.get("lat")
+            el_lon = el.get("lon")
+
+        if el_lat is None or el_lon is None:
+            continue
+
+        tags = el.get("tags", {})
+        name = tags.get("name") or tags.get("name:en") or ""
+
+        # Build a short address from available tags
+        addr_parts = []
+        for part_key in ("addr:housenumber", "addr:street", "addr:city"):
+            val = tags.get(part_key)
+            if val:
+                addr_parts.append(val)
+        address_str = ", ".join(addr_parts) if addr_parts else ""
+
+        place = {
+            "name":     name,
+            "address":  address_str,
+            "lat":      el_lat,
+            "lon":      el_lon,
+            "osm_type": el.get("type", ""),
+            "osm_id":   el.get("id", ""),
+            "tags": {
+                k: v for k, v in tags.items()
+                if k not in ("name", "name:en",
+                             "addr:housenumber", "addr:street", "addr:city")
+            },
+        }
+
+        if ref_lat is not None and ref_lon is not None:
+            dist_m = haversine_m(ref_lat, ref_lon, el_lat, el_lon)
+            place["distance_m"] = round(dist_m, 1)
+
+        places.append(place)
+
+    # Sort by distance if available
+    if places and "distance_m" in places[0]:
+        places.sort(key=lambda p: p["distance_m"])
+
+    return places
+
+
+# ---------------------------------------------------------------------------
+# Command: search
+# ---------------------------------------------------------------------------
+
+def cmd_search(args):
+    """Geocode a place name and return top results."""
+    query = " ".join(args.query)
+    raw   = nominatim_search(query, limit=5)
+
+    if not raw:
+        print_json({
+            "query":       query,
+            "results":     [],
+            "count":       0,
+            "data_source": DATA_SOURCE,
+        })
+        return
+
+    results = []
+    for item in raw:
+        bb = item.get("boundingbox", [])
+        results.append({
+            "name":         item.get("name") or item.get("display_name", ""),
+            "display_name": item.get("display_name", ""),
+            "lat":          float(item["lat"]),
+            "lon":          float(item["lon"]),
+            "type":         item.get("type", ""),
+            "category":     item.get("category", ""),
+            "osm_type":     item.get("osm_type", ""),
+            "osm_id":       item.get("osm_id", ""),
+            "bounding_box": {
+                "min_lat": float(bb[0]) if len(bb) > 0 else None,
+                "max_lat": float(bb[1]) if len(bb) > 1 else None,
+                "min_lon": float(bb[2]) if len(bb) > 2 else None,
+                "max_lon": float(bb[3]) if len(bb) > 3 else None,
+            },
+            "importance":   item.get("importance"),
+        })
+
+    print_json({
+        "query":       query,
+        "results":     results,
+        "count":       len(results),
+        "data_source": DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: reverse
+# ---------------------------------------------------------------------------
+
+def cmd_reverse(args):
+    """Reverse geocode coordinates to a human-readable address."""
+    try:
+        lat = float(args.lat)
+        lon = float(args.lon)
+    except ValueError:
+        error_exit("LAT and LON must be numeric values.")
+
+    if not (-90 <= lat <= 90):
+        error_exit("Latitude must be between -90 and 90.")
+    if not (-180 <= lon <= 180):
+        error_exit("Longitude must be between -180 and 180.")
+
+    data = nominatim_reverse(lat, lon)
+
+    if "error" in data:
+        error_exit(f"Reverse geocode failed: {data['error']}")
+
+    address = data.get("address", {})
+
+    print_json({
+        "lat":          lat,
+        "lon":          lon,
+        "display_name": data.get("display_name", ""),
+        "address": {
+            "house_number":  address.get("house_number", ""),
+            "road":          address.get("road", ""),
+            "neighbourhood": address.get("neighbourhood", ""),
+            "suburb":        address.get("suburb", ""),
+            "city":          (address.get("city")
+                              or address.get("town")
+                              or address.get("village", "")),
+            "county":        address.get("county", ""),
+            "state":         address.get("state", ""),
+            "postcode":      address.get("postcode", ""),
+            "country":       address.get("country", ""),
+            "country_code":  address.get("country_code", ""),
+        },
+        "osm_type":    data.get("osm_type", ""),
+        "osm_id":      data.get("osm_id", ""),
+        "data_source": DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: nearby
+# ---------------------------------------------------------------------------
+
+def cmd_nearby(args):
+    """Find nearby POIs using the Overpass API."""
+    try:
+        lat = float(args.lat)
+        lon = float(args.lon)
+    except ValueError:
+        error_exit("LAT and LON must be numeric values.")
+
+    category = args.category.lower()
+    if category not in CATEGORY_TAGS:
+        error_exit(
+            f"Unknown category '{category}'. "
+            f"Valid categories: {', '.join(VALID_CATEGORIES)}"
+        )
+
+    radius = int(args.radius)
+    limit  = int(args.limit)
+
+    if radius <= 0:
+        error_exit("Radius must be a positive integer (metres).")
+    if limit <= 0:
+        error_exit("Limit must be a positive integer.")
+
+    tag_key, tag_val = CATEGORY_TAGS[category]
+    religion = RELIGION_FILTER.get(category)
+    query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
+                                  religion=religion)
+
+    post_data = "data=" + urllib.parse.quote(query)
+    raw = http_post(OVERPASS_API, post_data)
+
+    elements = raw.get("elements", [])
+    places = parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon)
+
+    # Add category to each result
+    for p in places:
+        p["category"] = category
+
+    print_json({
+        "center_lat":  lat,
+        "center_lon":  lon,
+        "category":    category,
+        "radius_m":    radius,
+        "count":       len(places),
+        "results":     places,
+        "data_source": DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: distance
+# ---------------------------------------------------------------------------
+
+def cmd_distance(args):
+    """Calculate road distance and travel time between two places."""
+    origin_query      = " ".join(args.origin)
+    destination_query = " ".join(args.to)
+    mode              = args.mode.lower()
+
+    if mode not in OSRM_PROFILES:
+        error_exit(f"Invalid mode '{mode}'. Choose from: {', '.join(OSRM_PROFILES)}")
+
+    # Geocode origin and destination
+    o_lat, o_lon, o_name = geocode_single(origin_query)
+    d_lat, d_lon, d_name = geocode_single(destination_query)
+
+    profile = OSRM_PROFILES[mode]
+    url = (
+        f"{OSRM_BASE}/{profile}/"
+        f"{o_lon},{o_lat};{d_lon},{d_lat}"
+        f"?overview=false&steps=false"
+    )
+
+    osrm_data = http_get(url)
+
+    if osrm_data.get("code") != "Ok":
+        error_exit(
+            f"OSRM routing failed: "
+            f"{osrm_data.get('message', osrm_data.get('code', 'unknown error'))}"
+        )
+
+    routes = osrm_data.get("routes", [])
+    if not routes:
+        error_exit("No route found between the two locations.")
+
+    route        = routes[0]
+    distance_m   = route.get("distance", 0)
+    duration_s   = route.get("duration", 0)
+    distance_km  = round(distance_m / 1000, 3)
+    duration_min = round(duration_s / 60, 2)
+
+    # Straight-line distance for reference
+    straight_m = haversine_m(o_lat, o_lon, d_lat, d_lon)
+
+    print_json({
+        "origin": {
+            "query":        origin_query,
+            "display_name": o_name,
+            "lat":          o_lat,
+            "lon":          o_lon,
+        },
+        "destination": {
+            "query":        destination_query,
+            "display_name": d_name,
+            "lat":          d_lat,
+            "lon":          d_lon,
+        },
+        "mode":             mode,
+        "distance_km":      distance_km,
+        "distance_m":       round(distance_m, 1),
+        "duration_minutes": duration_min,
+        "duration_seconds": round(duration_s, 1),
+        "straight_line_km": round(straight_m / 1000, 3),
+        "data_source":      DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: directions
+# ---------------------------------------------------------------------------
+
+def _format_duration(seconds):
+    """Format seconds into a human-readable string."""
+    if seconds < 60:
+        return f"{round(seconds)}s"
+    minutes = seconds / 60
+    if minutes < 60:
+        return f"{round(minutes, 1)} min"
+    hours = int(minutes // 60)
+    remaining = round(minutes % 60)
+    return f"{hours}h {remaining}min"
+
+
+def _format_distance(metres):
+    """Format metres into a human-readable string."""
+    if metres < 1000:
+        return f"{round(metres)} m"
+    return f"{round(metres / 1000, 2)} km"
+
+
+def cmd_directions(args):
+    """Get turn-by-turn directions between two places via OSRM."""
+    origin_query      = " ".join(args.origin)
+    destination_query = " ".join(args.to)
+    mode              = args.mode.lower()
+
+    if mode not in OSRM_PROFILES:
+        error_exit(f"Invalid mode '{mode}'. Choose from: {', '.join(OSRM_PROFILES)}")
+
+    # Geocode origin and destination
+    o_lat, o_lon, o_name = geocode_single(origin_query)
+    d_lat, d_lon, d_name = geocode_single(destination_query)
+
+    profile = OSRM_PROFILES[mode]
+    url = (
+        f"{OSRM_BASE}/{profile}/"
+        f"{o_lon},{o_lat};{d_lon},{d_lat}"
+        f"?overview=false&steps=true"
+    )
+
+    osrm_data = http_get(url)
+
+    if osrm_data.get("code") != "Ok":
+        error_exit(
+            f"OSRM routing failed: "
+            f"{osrm_data.get('message', osrm_data.get('code', 'unknown error'))}"
+        )
+
+    routes = osrm_data.get("routes", [])
+    if not routes:
+        error_exit("No route found between the two locations.")
+
+    route        = routes[0]
+    distance_m   = route.get("distance", 0)
+    duration_s   = route.get("duration", 0)
+
+    # Extract steps from all legs
+    steps = []
+    step_num = 0
+    for leg in route.get("legs", []):
+        for step in leg.get("steps", []):
+            maneuver = step.get("maneuver", {})
+            step_dist = step.get("distance", 0)
+            step_dur  = step.get("duration", 0)
+            step_name = step.get("name", "")
+            modifier  = maneuver.get("modifier", "")
+            m_type    = maneuver.get("type", "")
+
+            # Build instruction text
+            if m_type == "depart":
+                instruction = f"Depart on {step_name}" if step_name else "Depart"
+            elif m_type == "arrive":
+                instruction = "Arrive at destination"
+            elif m_type == "turn":
+                instruction = f"Turn {modifier} onto {step_name}" if step_name else f"Turn {modifier}"
+            elif m_type == "new name":
+                instruction = f"Continue onto {step_name}" if step_name else "Continue"
+            elif m_type == "merge":
+                instruction = f"Merge {modifier} onto {step_name}" if step_name else f"Merge {modifier}"
+            elif m_type == "fork":
+                instruction = f"Take the {modifier} fork onto {step_name}" if step_name else f"Take the {modifier} fork"
+            elif m_type == "roundabout":
+                instruction = f"Enter roundabout, exit onto {step_name}" if step_name else "Enter roundabout"
+            elif m_type == "rotary":
+                instruction = f"Enter rotary, exit onto {step_name}" if step_name else "Enter rotary"
+            elif m_type == "end of road":
+                instruction = f"At end of road, turn {modifier} onto {step_name}" if step_name else f"At end of road, turn {modifier}"
+            elif m_type == "continue":
+                instruction = f"Continue {modifier} on {step_name}" if step_name else f"Continue {modifier}"
+            elif m_type == "on ramp":
+                instruction = f"Take ramp onto {step_name}" if step_name else "Take ramp"
+            elif m_type == "off ramp":
+                instruction = f"Take exit onto {step_name}" if step_name else "Take exit"
+            else:
+                instruction = f"{m_type} {modifier} {step_name}".strip()
+
+            step_num += 1
+            steps.append({
+                "step":        step_num,
+                "instruction": instruction,
+                "distance":    _format_distance(step_dist),
+                "distance_m":  round(step_dist, 1),
+                "duration":    _format_duration(step_dur),
+                "duration_s":  round(step_dur, 1),
+                "road_name":   step_name,
+                "maneuver":    m_type,
+            })
+
+    print_json({
+        "origin": {
+            "query":        origin_query,
+            "display_name": o_name,
+            "lat":          o_lat,
+            "lon":          o_lon,
+        },
+        "destination": {
+            "query":        destination_query,
+            "display_name": d_name,
+            "lat":          d_lat,
+            "lon":          d_lon,
+        },
+        "mode":               mode,
+        "total_distance":     _format_distance(distance_m),
+        "total_distance_m":   round(distance_m, 1),
+        "total_duration":     _format_duration(duration_s),
+        "total_duration_s":   round(duration_s, 1),
+        "steps":              steps,
+        "step_count":         len(steps),
+        "data_source":        DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: timezone
+# ---------------------------------------------------------------------------
+
+def cmd_timezone(args):
+    """
+    Get timezone information for a lat/lon coordinate.
+
+    Strategy:
+      1. Try TimeAPI.io (free, no key, supports coordinate-based lookup).
+      2. Fallback: derive UTC offset approximation from longitude.
+    """
+    try:
+        lat = float(args.lat)
+        lon = float(args.lon)
+    except ValueError:
+        error_exit("LAT and LON must be numeric values.")
+
+    if not (-90 <= lat <= 90):
+        error_exit("Latitude must be between -90 and 90.")
+    if not (-180 <= lon <= 180):
+        error_exit("Longitude must be between -180 and 180.")
+
+    timezone_str = None
+    timezone_src = None
+    current_time = None
+    utc_offset   = None
+
+    # --- Strategy 1: TimeAPI.io coordinate lookup ---
+    try:
+        params = {"latitude": lat, "longitude": lon}
+        tz_data = http_get(TIMEAPI_BASE, params=params, silent=True)
+        if isinstance(tz_data, dict):
+            timezone_str = tz_data.get("timeZone")
+            current_time = tz_data.get("currentLocalTime")
+            # Build utc_offset from currentUtcOffset if available
+            offset_info = tz_data.get("currentUtcOffset", {})
+            if isinstance(offset_info, dict):
+                oh = offset_info.get("hours", 0)
+                om = abs(offset_info.get("minutes", 0))
+                os_ = offset_info.get("seconds", 0)
+                sign = "+" if oh >= 0 else "-"
+                utc_offset = f"{sign}{abs(oh):02d}:{om:02d}"
+            elif tz_data.get("standardUtcOffset"):
+                offset_info2 = tz_data["standardUtcOffset"]
+                if isinstance(offset_info2, dict):
+                    oh = offset_info2.get("hours", 0)
+                    om = abs(offset_info2.get("minutes", 0))
+                    sign = "+" if oh >= 0 else "-"
+                    utc_offset = f"{sign}{abs(oh):02d}:{om:02d}"
+            timezone_src = "timeapi.io"
+    except (RuntimeError, KeyError, TypeError):
+        pass  # API may be down; continue to fallback
+
+    # --- Strategy 2: longitude-based UTC offset approximation ---
+    if not timezone_str:
+        approx_offset_h = round(lon / 15)
+        if approx_offset_h >= 0:
+            utc_offset = f"+{approx_offset_h:02d}:00"
+        else:
+            utc_offset = f"-{abs(approx_offset_h):02d}:00"
+        timezone_str = f"UTC{utc_offset}"
+        timezone_src = "longitude approximation (longitude/15)"
+
+    print_json({
+        "lat":          lat,
+        "lon":          lon,
+        "timezone":     timezone_str,
+        "utc_offset":   utc_offset,
+        "current_time": current_time,
+        "source":       timezone_src,
+        "data_source":  DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: bbox
+# ---------------------------------------------------------------------------
+
+def cmd_bbox(args):
+    """Find POIs within a bounding box using the Overpass API."""
+    try:
+        lat1 = float(args.lat1)
+        lon1 = float(args.lon1)
+        lat2 = float(args.lat2)
+        lon2 = float(args.lon2)
+    except ValueError:
+        error_exit("All coordinate arguments must be numeric values.")
+
+    # Normalize: south/west < north/east
+    south = min(lat1, lat2)
+    north = max(lat1, lat2)
+    west  = min(lon1, lon2)
+    east  = max(lon1, lon2)
+
+    category = args.category.lower()
+    if category not in CATEGORY_TAGS:
+        error_exit(
+            f"Unknown category '{category}'. "
+            f"Valid categories: {', '.join(VALID_CATEGORIES)}"
+        )
+
+    limit = int(args.limit)
+    if limit <= 0:
+        error_exit("Limit must be a positive integer.")
+
+    tag_key, tag_val = CATEGORY_TAGS[category]
+    religion = RELIGION_FILTER.get(category)
+    query = build_overpass_bbox(tag_key, tag_val, south, west, north, east,
+                                limit, religion=religion)
+
+    post_data = "data=" + urllib.parse.quote(query)
+    raw = http_post(OVERPASS_API, post_data)
+
+    elements = raw.get("elements", [])
+
+    # Use center of bbox as reference for distance sorting
+    center_lat = (south + north) / 2
+    center_lon = (west + east) / 2
+    places = parse_overpass_elements(elements, ref_lat=center_lat,
+                                     ref_lon=center_lon)
+
+    for p in places:
+        p["category"] = category
+
+    print_json({
+        "bounding_box": {
+            "south": south,
+            "west":  west,
+            "north": north,
+            "east":  east,
+        },
+        "category":    category,
+        "count":       len(places),
+        "results":     places,
+        "data_source": DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: area
+# ---------------------------------------------------------------------------
+
+def cmd_area(args):
+    """Get bounding box and area info for a named place."""
+    query = " ".join(args.place)
+    raw = nominatim_search(query, limit=1)
+
+    if not raw:
+        error_exit(f"Could not find place: {query}")
+
+    item = raw[0]
+    bb = item.get("boundingbox", [])
+
+    if len(bb) < 4:
+        error_exit(f"No bounding box data available for: {query}")
+
+    min_lat = float(bb[0])
+    max_lat = float(bb[1])
+    min_lon = float(bb[2])
+    max_lon = float(bb[3])
+
+    # Approximate area in km² using the bounding box
+    # Width in km at the average latitude
+    avg_lat = (min_lat + max_lat) / 2
+    height_km = haversine_m(min_lat, min_lon, max_lat, min_lon) / 1000
+    width_km  = haversine_m(avg_lat, min_lon, avg_lat, max_lon) / 1000
+    approx_area_km2 = round(height_km * width_km, 3)
+
+    print_json({
+        "query":        query,
+        "display_name": item.get("display_name", ""),
+        "lat":          float(item["lat"]),
+        "lon":          float(item["lon"]),
+        "type":         item.get("type", ""),
+        "category":     item.get("category", ""),
+        "bounding_box": {
+            "south": min_lat,
+            "north": max_lat,
+            "west":  min_lon,
+            "east":  max_lon,
+        },
+        "dimensions": {
+            "width_km":  round(width_km, 3),
+            "height_km": round(height_km, 3),
+        },
+        "approx_area_km2": approx_area_km2,
+        "osm_type":        item.get("osm_type", ""),
+        "osm_id":          item.get("osm_id", ""),
+        "data_source":     DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# CLI setup
+# ---------------------------------------------------------------------------
+
+def build_parser():
+    parser = argparse.ArgumentParser(
+        prog="maps_client.py",
+        description=(
+            "CLI maps tool: geocoding, reverse geocoding, POI search, "
+            "routing, directions, timezone, and area lookup. "
+            "Powered by OpenStreetMap, OSRM, Overpass, and TimeAPI.io. "
+            "No API keys required."
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=(
+            "Examples:\n"
+            "  maps_client.py search Times Square\n"
+            "  maps_client.py reverse 40.758 -73.985\n"
+            "  maps_client.py nearby 40.758 -73.985 restaurant --radius 800\n"
+            "  maps_client.py distance New York --to Los Angeles --mode driving\n"
+            "  maps_client.py directions Paris --to Berlin --mode driving\n"
+            "  maps_client.py timezone 48.8566 2.3522\n"
+            "  maps_client.py bbox 40.70 -74.02 40.78 -73.95 restaurant\n"
+            "  maps_client.py area Manhattan"
+        ),
+    )
+    sub = parser.add_subparsers(dest="command", required=True,
+                                 metavar="COMMAND")
+
+    # -- search --
+    p_search = sub.add_parser(
+        "search",
+        help="Geocode a place name to coordinates.",
+        description="Search for a place by name and return coordinates and details.",
+    )
+    p_search.add_argument(
+        "query", nargs="+",
+        help="Place name or address to search.",
+    )
+
+    # -- reverse --
+    p_reverse = sub.add_parser(
+        "reverse",
+        help="Reverse geocode coordinates to an address.",
+        description="Convert latitude/longitude coordinates to a human-readable address.",
+    )
+    p_reverse.add_argument("lat", help="Latitude (decimal degrees).")
+    p_reverse.add_argument("lon", help="Longitude (decimal degrees).")
+
+    # -- nearby --
+    p_nearby = sub.add_parser(
+        "nearby",
+        help="Find nearby places of a given category.",
+        description=(
+            "Find points of interest near a location using the Overpass API.\n"
+            f"Categories: {', '.join(VALID_CATEGORIES)}"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p_nearby.add_argument("lat", help="Center latitude (decimal degrees).")
+    p_nearby.add_argument("lon", help="Center longitude (decimal degrees).")
+    p_nearby.add_argument(
+        "category",
+        help="POI category (use --help to see full list).",
+    )
+    p_nearby.add_argument(
+        "--radius", "-r",
+        default=500, type=int, metavar="METRES",
+        help="Search radius in metres (default: 500).",
+    )
+    p_nearby.add_argument(
+        "--limit", "-n",
+        default=10, type=int, metavar="N",
+        help="Maximum number of results (default: 10).",
+    )
+
+    # -- distance --
+    p_dist = sub.add_parser(
+        "distance",
+        help="Calculate road distance and travel time.",
+        description=(
+            "Calculate road distance and estimated travel time between two places.\n"
+            "Example: maps_client.py distance New York --to Los Angeles"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p_dist.add_argument(
+        "origin", nargs="+",
+        help="Origin address or place name.",
+    )
+    p_dist.add_argument(
+        "--to", nargs="+", required=True, metavar="DEST",
+        help="Destination address or place name (required).",
+    )
+    p_dist.add_argument(
+        "--mode", "-m",
+        default="driving",
+        choices=list(OSRM_PROFILES.keys()),
+        help="Travel mode (default: driving).",
+    )
+
+    # -- directions --
+    p_dir = sub.add_parser(
+        "directions",
+        help="Get turn-by-turn directions between two places.",
+        description=(
+            "Get step-by-step navigation directions between two places.\n"
+            "Example: maps_client.py directions Paris --to Berlin --mode driving"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p_dir.add_argument(
+        "origin", nargs="+",
+        help="Origin address or place name.",
+    )
+    p_dir.add_argument(
+        "--to", nargs="+", required=True, metavar="DEST",
+        help="Destination address or place name (required).",
+    )
+    p_dir.add_argument(
+        "--mode", "-m",
+        default="driving",
+        choices=list(OSRM_PROFILES.keys()),
+        help="Travel mode (default: driving).",
+    )
+
+    # -- timezone --
+    p_tz = sub.add_parser(
+        "timezone",
+        help="Get timezone information for coordinates.",
+        description="Look up timezone and current local time for a lat/lon coordinate.",
+    )
+    p_tz.add_argument("lat", help="Latitude (decimal degrees).")
+    p_tz.add_argument("lon", help="Longitude (decimal degrees).")
+
+    # -- bbox --
+    p_bbox = sub.add_parser(
+        "bbox",
+        help="Find POIs within a bounding box.",
+        description=(
+            "Search for points of interest within a geographic bounding box.\n"
+            "Tip: use the 'area' command to find bounding boxes for named places.\n"
+            f"Categories: {', '.join(VALID_CATEGORIES)}"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p_bbox.add_argument("lat1", help="First corner latitude.")
+    p_bbox.add_argument("lon1", help="First corner longitude.")
+    p_bbox.add_argument("lat2", help="Second corner latitude.")
+    p_bbox.add_argument("lon2", help="Second corner longitude.")
+    p_bbox.add_argument("category", help="POI category to search for.")
+    p_bbox.add_argument(
+        "--limit", "-n",
+        default=20, type=int, metavar="N",
+        help="Maximum number of results (default: 20).",
+    )
+
+    # -- area --
+    p_area = sub.add_parser(
+        "area",
+        help="Get bounding box and area info for a named place.",
+        description=(
+            "Look up a place by name and return its bounding box, dimensions, "
+            "and approximate area. Useful as input to the 'bbox' command."
+        ),
+    )
+    p_area.add_argument(
+        "place", nargs="+",
+        help="Place name to look up (e.g., 'Manhattan' or 'downtown Seattle').",
+    )
+
+    return parser
+
+
+def main():
+    parser = build_parser()
+    args   = parser.parse_args()
+
+    dispatch = {
+        "search":     cmd_search,
+        "reverse":    cmd_reverse,
+        "nearby":     cmd_nearby,
+        "distance":   cmd_distance,
+        "directions": cmd_directions,
+        "timezone":   cmd_timezone,
+        "bbox":       cmd_bbox,
+        "area":       cmd_area,
+    }
+
+    handler = dispatch.get(args.command)
+    if handler is None:
+        error_exit(f"Unknown command: {args.command}")
+
+    handler(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/optional-skills/productivity/maps/tests/test_maps_client.py b/optional-skills/productivity/maps/tests/test_maps_client.py
new file mode 100644
index 00000000000..0400d51b7d6
--- /dev/null
+++ b/optional-skills/productivity/maps/tests/test_maps_client.py
@@ -0,0 +1,177 @@
+"""Unit tests for maps_client.py pure functions."""
+
+import json
+import math
+import sys
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+# Add the scripts directory to the path so we can import maps_client
+SCRIPTS_DIR = str(Path(__file__).resolve().parent.parent / "scripts")
+sys.path.insert(0, SCRIPTS_DIR)
+
+import maps_client as mc
+
+
+# ── Haversine ────────────────────────────────────────────────────────────
+
+
+class TestHaversine:
+    def test_same_point_is_zero(self):
+        assert mc.haversine_m(48.8584, 2.2945, 48.8584, 2.2945) == 0.0
+
+    def test_known_distance_paris_lyon(self):
+        # Paris to Lyon is ~393 km straight line
+        dist = mc.haversine_m(48.8566, 2.3522, 45.7640, 4.8357)
+        assert 390_000 < dist < 400_000
+
+    def test_antipodal_points(self):
+        # North pole to south pole ~20,000 km
+        dist = mc.haversine_m(90, 0, -90, 0)
+        assert 20_000_000 < dist < 20_100_000
+
+    def test_equator_quarter(self):
+        # 0,0 to 0,90 is ~10,000 km
+        dist = mc.haversine_m(0, 0, 0, 90)
+        assert 10_000_000 < dist < 10_100_000
+
+    def test_symmetry(self):
+        d1 = mc.haversine_m(40.7128, -74.0060, 51.5074, -0.1278)
+        d2 = mc.haversine_m(51.5074, -0.1278, 40.7128, -74.0060)
+        assert d1 == pytest.approx(d2)
+
+
+# ── Overpass query builder ───────────────────────────────────────────────
+
+
+class TestBuildOverpassQuery:
+    def test_basic_query_structure(self):
+        q = mc.build_overpass_nearby("amenity", "restaurant", 48.85, 2.29, 500, 10)
+        assert "[out:json]" in q
+        assert '"amenity"="restaurant"' in q
+        assert "around:500,48.85,2.29" in q
+        assert "out center 10" in q
+
+    def test_contains_node_and_way(self):
+        q = mc.build_overpass_nearby("tourism", "hotel", 40.0, -74.0, 1000, 5)
+        assert "node[" in q
+        assert "way[" in q
+
+    def test_bbox_query_structure(self):
+        q = mc.build_overpass_bbox("amenity", "cafe", 40.75, -74.00, 40.77, -73.98, 20)
+        assert "[out:json]" in q
+        assert '"amenity"="cafe"' in q
+        assert "40.75,-74.0,40.77,-73.98" in q
+
+
+# ── Category validation ──────────────────────────────────────────────────
+
+
+class TestCategories:
+    def test_original_12_categories_exist(self):
+        original = [
+            "restaurant", "cafe", "bar", "hospital", "pharmacy", "hotel",
+            "supermarket", "atm", "gas_station", "parking", "museum", "park",
+        ]
+        for cat in original:
+            assert cat in mc.CATEGORY_TAGS, f"Missing original category: {cat}"
+
+    def test_new_categories_exist(self):
+        new_cats = [
+            "school", "university", "bank", "police", "fire_station",
+            "library", "airport", "train_station", "bus_stop", "dentist",
+            "doctor", "cinema", "theatre", "gym", "post_office",
+            "convenience_store", "bakery", "nightclub", "zoo", "playground",
+        ]
+        for cat in new_cats:
+            assert cat in mc.CATEGORY_TAGS, f"Missing new category: {cat}"
+
+    def test_all_categories_have_valid_tags(self):
+        for cat, tag in mc.CATEGORY_TAGS.items():
+            assert isinstance(tag, tuple), f"{cat}: tag should be tuple"
+            assert len(tag) == 2, f"{cat}: tag should be (key, value)"
+            assert isinstance(tag[0], str) and isinstance(tag[1], str)
+
+    def test_at_least_40_categories(self):
+        assert len(mc.CATEGORY_TAGS) >= 40
+
+
+# ── OSRM profiles ────────────────────────────────────────────────────────
+
+
+class TestOSRMProfiles:
+    def test_driving_walking_cycling(self):
+        assert "driving" in mc.OSRM_PROFILES
+        assert "walking" in mc.OSRM_PROFILES
+        assert "cycling" in mc.OSRM_PROFILES
+
+    def test_profile_mappings(self):
+        assert mc.OSRM_PROFILES["driving"] == "driving"
+        assert mc.OSRM_PROFILES["walking"] == "foot"
+        assert mc.OSRM_PROFILES["cycling"] == "bike"
+
+
+# ── Argparse ─────────────────────────────────────────────────────────────
+
+
+class TestArgparse:
+    def test_distance_uses_to_flag(self):
+        """The distance command should use --to, not two positional nargs='+'."""
+        parser = mc.build_parser()
+        args = parser.parse_args(["distance", "Paris", "--to", "Lyon"])
+        assert args.command == "distance"
+        assert args.origin == ["Paris"]
+        assert args.to == ["Lyon"]
+
+    def test_distance_multiword_origin(self):
+        parser = mc.build_parser()
+        args = parser.parse_args(["distance", "New", "York", "--to", "Boston"])
+        assert args.origin == ["New", "York"]
+        assert args.to == ["Boston"]
+
+    def test_directions_uses_to_flag(self):
+        parser = mc.build_parser()
+        args = parser.parse_args(["directions", "Big Ben", "--to", "Tower Bridge"])
+        assert args.command == "directions"
+
+    def test_search_accepts_query(self):
+        parser = mc.build_parser()
+        args = parser.parse_args(["search", "Eiffel", "Tower"])
+        assert args.command == "search"
+        assert args.query == ["Eiffel", "Tower"]
+
+    def test_nearby_accepts_category(self):
+        parser = mc.build_parser()
+        args = parser.parse_args(["nearby", "48.85", "2.29", "restaurant"])
+        assert args.command == "nearby"
+        assert args.category == "restaurant"
+
+    def test_bbox_accepts_coordinates(self):
+        parser = mc.build_parser()
+        args = parser.parse_args(["bbox", "40.75", "-74.00", "40.77", "-73.98", "cafe"])
+        assert args.command == "bbox"
+        assert args.category == "cafe"
+
+    def test_area_accepts_query(self):
+        parser = mc.build_parser()
+        args = parser.parse_args(["area", "Manhattan"])
+        assert args.command == "area"
+
+
+# ── Output helpers ───────────────────────────────────────────────────────
+
+
+class TestOutputHelpers:
+    def test_print_json_outputs_valid_json(self, capsys):
+        mc.print_json({"key": "value", "num": 42})
+        captured = capsys.readouterr()
+        data = json.loads(captured.out)
+        assert data["key"] == "value"
+        assert data["num"] == 42
+
+    def test_error_exit_outputs_error_json(self):
+        with pytest.raises(SystemExit) as exc_info:
+            mc.error_exit("something went wrong")
+        assert exc_info.value.code == 1

From de491fdf0e4a35a91b447f8f077af4961a59b7b3 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Mon, 30 Mar 2026 00:10:04 -0700
Subject: [PATCH 132/143] chore: remove unit tests from maps skill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Skills are self-contained scripts — they don't need test suites in
the repo.
---
 .../maps/tests/test_maps_client.py            | 177 ------------------
 1 file changed, 177 deletions(-)
 delete mode 100644 optional-skills/productivity/maps/tests/test_maps_client.py

diff --git a/optional-skills/productivity/maps/tests/test_maps_client.py b/optional-skills/productivity/maps/tests/test_maps_client.py
deleted file mode 100644
index 0400d51b7d6..00000000000
--- a/optional-skills/productivity/maps/tests/test_maps_client.py
+++ /dev/null
@@ -1,177 +0,0 @@
-"""Unit tests for maps_client.py pure functions."""
-
-import json
-import math
-import sys
-from pathlib import Path
-from unittest.mock import patch, MagicMock
-
-import pytest
-
-# Add the scripts directory to the path so we can import maps_client
-SCRIPTS_DIR = str(Path(__file__).resolve().parent.parent / "scripts")
-sys.path.insert(0, SCRIPTS_DIR)
-
-import maps_client as mc
-
-
-# ── Haversine ────────────────────────────────────────────────────────────
-
-
-class TestHaversine:
-    def test_same_point_is_zero(self):
-        assert mc.haversine_m(48.8584, 2.2945, 48.8584, 2.2945) == 0.0
-
-    def test_known_distance_paris_lyon(self):
-        # Paris to Lyon is ~393 km straight line
-        dist = mc.haversine_m(48.8566, 2.3522, 45.7640, 4.8357)
-        assert 390_000 < dist < 400_000
-
-    def test_antipodal_points(self):
-        # North pole to south pole ~20,000 km
-        dist = mc.haversine_m(90, 0, -90, 0)
-        assert 20_000_000 < dist < 20_100_000
-
-    def test_equator_quarter(self):
-        # 0,0 to 0,90 is ~10,000 km
-        dist = mc.haversine_m(0, 0, 0, 90)
-        assert 10_000_000 < dist < 10_100_000
-
-    def test_symmetry(self):
-        d1 = mc.haversine_m(40.7128, -74.0060, 51.5074, -0.1278)
-        d2 = mc.haversine_m(51.5074, -0.1278, 40.7128, -74.0060)
-        assert d1 == pytest.approx(d2)
-
-
-# ── Overpass query builder ───────────────────────────────────────────────
-
-
-class TestBuildOverpassQuery:
-    def test_basic_query_structure(self):
-        q = mc.build_overpass_nearby("amenity", "restaurant", 48.85, 2.29, 500, 10)
-        assert "[out:json]" in q
-        assert '"amenity"="restaurant"' in q
-        assert "around:500,48.85,2.29" in q
-        assert "out center 10" in q
-
-    def test_contains_node_and_way(self):
-        q = mc.build_overpass_nearby("tourism", "hotel", 40.0, -74.0, 1000, 5)
-        assert "node[" in q
-        assert "way[" in q
-
-    def test_bbox_query_structure(self):
-        q = mc.build_overpass_bbox("amenity", "cafe", 40.75, -74.00, 40.77, -73.98, 20)
-        assert "[out:json]" in q
-        assert '"amenity"="cafe"' in q
-        assert "40.75,-74.0,40.77,-73.98" in q
-
-
-# ── Category validation ──────────────────────────────────────────────────
-
-
-class TestCategories:
-    def test_original_12_categories_exist(self):
-        original = [
-            "restaurant", "cafe", "bar", "hospital", "pharmacy", "hotel",
-            "supermarket", "atm", "gas_station", "parking", "museum", "park",
-        ]
-        for cat in original:
-            assert cat in mc.CATEGORY_TAGS, f"Missing original category: {cat}"
-
-    def test_new_categories_exist(self):
-        new_cats = [
-            "school", "university", "bank", "police", "fire_station",
-            "library", "airport", "train_station", "bus_stop", "dentist",
-            "doctor", "cinema", "theatre", "gym", "post_office",
-            "convenience_store", "bakery", "nightclub", "zoo", "playground",
-        ]
-        for cat in new_cats:
-            assert cat in mc.CATEGORY_TAGS, f"Missing new category: {cat}"
-
-    def test_all_categories_have_valid_tags(self):
-        for cat, tag in mc.CATEGORY_TAGS.items():
-            assert isinstance(tag, tuple), f"{cat}: tag should be tuple"
-            assert len(tag) == 2, f"{cat}: tag should be (key, value)"
-            assert isinstance(tag[0], str) and isinstance(tag[1], str)
-
-    def test_at_least_40_categories(self):
-        assert len(mc.CATEGORY_TAGS) >= 40
-
-
-# ── OSRM profiles ────────────────────────────────────────────────────────
-
-
-class TestOSRMProfiles:
-    def test_driving_walking_cycling(self):
-        assert "driving" in mc.OSRM_PROFILES
-        assert "walking" in mc.OSRM_PROFILES
-        assert "cycling" in mc.OSRM_PROFILES
-
-    def test_profile_mappings(self):
-        assert mc.OSRM_PROFILES["driving"] == "driving"
-        assert mc.OSRM_PROFILES["walking"] == "foot"
-        assert mc.OSRM_PROFILES["cycling"] == "bike"
-
-
-# ── Argparse ─────────────────────────────────────────────────────────────
-
-
-class TestArgparse:
-    def test_distance_uses_to_flag(self):
-        """The distance command should use --to, not two positional nargs='+'."""
-        parser = mc.build_parser()
-        args = parser.parse_args(["distance", "Paris", "--to", "Lyon"])
-        assert args.command == "distance"
-        assert args.origin == ["Paris"]
-        assert args.to == ["Lyon"]
-
-    def test_distance_multiword_origin(self):
-        parser = mc.build_parser()
-        args = parser.parse_args(["distance", "New", "York", "--to", "Boston"])
-        assert args.origin == ["New", "York"]
-        assert args.to == ["Boston"]
-
-    def test_directions_uses_to_flag(self):
-        parser = mc.build_parser()
-        args = parser.parse_args(["directions", "Big Ben", "--to", "Tower Bridge"])
-        assert args.command == "directions"
-
-    def test_search_accepts_query(self):
-        parser = mc.build_parser()
-        args = parser.parse_args(["search", "Eiffel", "Tower"])
-        assert args.command == "search"
-        assert args.query == ["Eiffel", "Tower"]
-
-    def test_nearby_accepts_category(self):
-        parser = mc.build_parser()
-        args = parser.parse_args(["nearby", "48.85", "2.29", "restaurant"])
-        assert args.command == "nearby"
-        assert args.category == "restaurant"
-
-    def test_bbox_accepts_coordinates(self):
-        parser = mc.build_parser()
-        args = parser.parse_args(["bbox", "40.75", "-74.00", "40.77", "-73.98", "cafe"])
-        assert args.command == "bbox"
-        assert args.category == "cafe"
-
-    def test_area_accepts_query(self):
-        parser = mc.build_parser()
-        args = parser.parse_args(["area", "Manhattan"])
-        assert args.command == "area"
-
-
-# ── Output helpers ───────────────────────────────────────────────────────
-
-
-class TestOutputHelpers:
-    def test_print_json_outputs_valid_json(self, capsys):
-        mc.print_json({"key": "value", "num": 42})
-        captured = capsys.readouterr()
-        data = json.loads(captured.out)
-        assert data["key"] == "value"
-        assert data["num"] == 42
-
-    def test_error_exit_outputs_error_json(self):
-        with pytest.raises(SystemExit) as exc_info:
-            mc.error_exit("something went wrong")
-        assert exc_info.value.code == 1

From ea0bd81b84e460368c35432472ef6e8cbdf6c541 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sun, 19 Apr 2026 05:17:39 -0700
Subject: [PATCH 133/143] feat(skills): consolidate find-nearby into maps as a
 single location skill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

find-nearby and the (new) maps optional skill both used OpenStreetMap's
Overpass + Nominatim to answer the same question — 'what's near this
location?' — so shipping both would be duplicate code for overlapping
capability. Consolidate into one active-by-default skill at
skills/productivity/maps/ that is a strict superset of find-nearby.

Moves + deletions:
- optional-skills/productivity/maps/ → skills/productivity/maps/ (active,
  no install step needed)
- skills/leisure/find-nearby/ → DELETED (fully superseded)

Upgrades to maps_client.py so it covers everything find-nearby did:
- Overpass server failover — tries overpass-api.de then
  overpass.kumi.systems so a single-mirror outage doesn't break the skill
  (new overpass_query helper, used by both nearby and bbox)
- nearby now accepts --near "<address>" as a shortcut that auto-geocodes,
  so one command replaces the old 'search → copy coords → nearby' chain
- nearby now accepts --category (repeatable) for multi-type queries in
  one call (e.g. --category restaurant --category bar), results merged
  and deduped by (osm_type, osm_id), sorted by distance, capped at --limit
- Each nearby result now includes maps_url (clickable Google Maps search
  link) and directions_url (Google Maps directions from the search point
  — only when a ref point is known)
- Promoted commonly-useful OSM tags to top-level fields on each result:
  cuisine, hours (opening_hours), phone, website — instead of forcing
  callers to dig into the raw tags dict

SKILL.md:
- Version bumped 1.1.0 → 1.2.0, description rewritten to lead with
  capability surface
- New 'Working With Telegram Location Pins' section replacing
  find-nearby's equivalent workflow
- metadata.hermes.supersedes: [find-nearby] so tooling can flag any
  lingering references to the old skill

External references updated:
- optional-skills/productivity/telephony/SKILL.md — related_skills
  find-nearby → maps
- website/docs/reference/skills-catalog.md — removed the (now-empty)
  'leisure' section, added 'maps' row under productivity
- website/docs/user-guide/features/cron.md — find-nearby example
  usages swapped to maps
- tests/tools/test_cronjob_tools.py, tests/hermes_cli/test_cron.py,
  tests/cron/test_scheduler.py — fixture string values swapped
- cli.py:5290 — /cron help-hint example swapped

Not touched:
- RELEASE_v0.2.0.md — historical record, left intact

E2E-verified live (Nominatim + Overpass, one query each):
- nearby --near "Times Square" --category restaurant --category bar → 3 results,
  sorted by distance, all with maps_url, directions_url, cuisine, phone, website
  where OSM had the tags

All 111 targeted tests pass across tests/cron/, tests/tools/, tests/hermes_cli/.
---
 cli.py                                        |   2 +-
 .../productivity/telephony/SKILL.md           |   2 +-
 skills/leisure/find-nearby/SKILL.md           |  69 -------
 .../find-nearby/scripts/find_nearby.py        | 184 ------------------
 .../productivity/maps/SKILL.md                |  81 ++++++--
 .../productivity/maps/scripts/maps_client.py  | 168 +++++++++++++---
 tests/cron/test_scheduler.py                  |   8 +-
 tests/hermes_cli/test_cron.py                 |   8 +-
 tests/tools/test_cronjob_tools.py             |  10 +-
 website/docs/reference/skills-catalog.md      |   9 +-
 website/docs/user-guide/features/cron.md      |  12 +-
 11 files changed, 222 insertions(+), 331 deletions(-)
 delete mode 100644 skills/leisure/find-nearby/SKILL.md
 delete mode 100644 skills/leisure/find-nearby/scripts/find_nearby.py
 rename {optional-skills => skills}/productivity/maps/SKILL.md (53%)
 rename {optional-skills => skills}/productivity/maps/scripts/maps_client.py (86%)

diff --git a/cli.py b/cli.py
index e814e35b122..0e5e9ff6603 100644
--- a/cli.py
+++ b/cli.py
@@ -5287,7 +5287,7 @@ class HermesCLI:
             print("    /cron list")
             print('    /cron add "every 2h" "Check server status" [--skill blogwatcher]')
             print('    /cron edit <job_id> --schedule "every 4h" --prompt "New task"')
-            print("    /cron edit <job_id> --skill blogwatcher --skill find-nearby")
+            print("    /cron edit <job_id> --skill blogwatcher --skill maps")
             print("    /cron edit <job_id> --remove-skill blogwatcher")
             print("    /cron edit <job_id> --clear-skills")
             print("    /cron pause <job_id>")
diff --git a/optional-skills/productivity/telephony/SKILL.md b/optional-skills/productivity/telephony/SKILL.md
index c74a3692091..6c457592a9a 100644
--- a/optional-skills/productivity/telephony/SKILL.md
+++ b/optional-skills/productivity/telephony/SKILL.md
@@ -7,7 +7,7 @@ license: MIT
 metadata:
   hermes:
     tags: [telephony, phone, sms, mms, voice, twilio, bland.ai, vapi, calling, texting]
-    related_skills: [find-nearby, google-workspace, agentmail]
+    related_skills: [maps, google-workspace, agentmail]
     category: productivity
 ---
 
diff --git a/skills/leisure/find-nearby/SKILL.md b/skills/leisure/find-nearby/SKILL.md
deleted file mode 100644
index f0ecdbf5314..00000000000
--- a/skills/leisure/find-nearby/SKILL.md
+++ /dev/null
@@ -1,69 +0,0 @@
----
-name: find-nearby
-description: Find nearby places (restaurants, cafes, bars, pharmacies, etc.) using OpenStreetMap. Works with coordinates, addresses, cities, zip codes, or Telegram location pins. No API keys needed.
-version: 1.0.0
-metadata:
-  hermes:
-    tags: [location, maps, nearby, places, restaurants, local]
-    related_skills: []
----
-
-# Find Nearby — Local Place Discovery
-
-Find restaurants, cafes, bars, pharmacies, and other places near any location. Uses OpenStreetMap (free, no API keys). Works with:
-
-- **Coordinates** from Telegram location pins (latitude/longitude in conversation)
-- **Addresses** ("near 123 Main St, Springfield")
-- **Cities** ("restaurants in downtown Austin")
-- **Zip codes** ("pharmacies near 90210")
-- **Landmarks** ("cafes near Times Square")
-
-## Quick Reference
-
-```bash
-# By coordinates (from Telegram location pin or user-provided)
-python3 SKILL_DIR/scripts/find_nearby.py --lat <LAT> --lon <LON> --type restaurant --radius 1500
-
-# By address, city, or landmark (auto-geocoded)
-python3 SKILL_DIR/scripts/find_nearby.py --near "Times Square, New York" --type cafe
-
-# Multiple place types
-python3 SKILL_DIR/scripts/find_nearby.py --near "downtown austin" --type restaurant --type bar --limit 10
-
-# JSON output
-python3 SKILL_DIR/scripts/find_nearby.py --near "90210" --type pharmacy --json
-```
-
-### Parameters
-
-| Flag | Description | Default |
-|------|-------------|---------|
-| `--lat`, `--lon` | Exact coordinates | — |
-| `--near` | Address, city, zip, or landmark (geocoded) | — |
-| `--type` | Place type (repeatable for multiple) | restaurant |
-| `--radius` | Search radius in meters | 1500 |
-| `--limit` | Max results | 15 |
-| `--json` | Machine-readable JSON output | off |
-
-### Common Place Types
-
-`restaurant`, `cafe`, `bar`, `pub`, `fast_food`, `pharmacy`, `hospital`, `bank`, `atm`, `fuel`, `parking`, `supermarket`, `convenience`, `hotel`
-
-## Workflow
-
-1. **Get the location.** Look for coordinates (`latitude: ... / longitude: ...`) from a Telegram pin, or ask the user for an address/city/zip.
-
-2. **Ask for preferences** (only if not already stated): place type, how far they're willing to go, any specifics (cuisine, "open now", etc.).
-
-3. **Run the script** with appropriate flags. Use `--json` if you need to process results programmatically.
-
-4. **Present results** with names, distances, and Google Maps links. If the user asked about hours or "open now," check the `hours` field in results — if missing or unclear, verify with `web_search`.
-
-5. **For directions**, use the `directions_url` from results, or construct: `https://www.google.com/maps/dir/?api=1&origin=<LAT>,<LON>&destination=<LAT>,<LON>`
-
-## Tips
-
-- If results are sparse, widen the radius (1500 → 3000m)
-- For "open now" requests: check the `hours` field in results, cross-reference with `web_search` for accuracy since OSM hours aren't always complete
-- Zip codes alone can be ambiguous globally — prompt the user for country/state if results look wrong
-- The script uses OpenStreetMap data which is community-maintained; coverage varies by region
diff --git a/skills/leisure/find-nearby/scripts/find_nearby.py b/skills/leisure/find-nearby/scripts/find_nearby.py
deleted file mode 100644
index 9d7fed78f46..00000000000
--- a/skills/leisure/find-nearby/scripts/find_nearby.py
+++ /dev/null
@@ -1,184 +0,0 @@
-#!/usr/bin/env python3
-"""Find nearby places using OpenStreetMap (Overpass + Nominatim). No API keys needed.
-
-Usage:
-    # By coordinates
-    python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --radius 1500
-
-    # By address/city/zip (auto-geocoded)
-    python find_nearby.py --near "Times Square, New York" --type cafe --radius 1000
-    python find_nearby.py --near "90210" --type pharmacy
-
-    # Multiple types
-    python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --type bar
-
-    # JSON output for programmatic use
-    python find_nearby.py --near "downtown las vegas" --type restaurant --json
-"""
-
-import argparse
-import json
-import math
-import sys
-import urllib.parse
-import urllib.request
-from typing import Any
-
-OVERPASS_URLS = [
-    "https://overpass-api.de/api/interpreter",
-    "https://overpass.kumi.systems/api/interpreter",
-]
-NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
-USER_AGENT = "HermesAgent/1.0 (find-nearby skill)"
-TIMEOUT = 15
-
-
-def _http_get(url: str) -> Any:
-    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
-    with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
-        return json.loads(r.read())
-
-
-def _http_post(url: str, data: str) -> Any:
-    req = urllib.request.Request(
-        url, data=data.encode(), headers={"User-Agent": USER_AGENT}
-    )
-    with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
-        return json.loads(r.read())
-
-
-def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
-    """Distance in meters between two coordinates."""
-    R = 6_371_000
-    rlat1, rlat2 = math.radians(lat1), math.radians(lat2)
-    dlat = math.radians(lat2 - lat1)
-    dlon = math.radians(lon2 - lon1)
-    a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2
-    return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
-
-
-def geocode(query: str) -> tuple[float, float]:
-    """Convert address/city/zip to coordinates via Nominatim."""
-    params = urllib.parse.urlencode({"q": query, "format": "json", "limit": 1})
-    results = _http_get(f"{NOMINATIM_URL}?{params}")
-    if not results:
-        print(f"Error: Could not geocode '{query}'. Try a more specific address.", file=sys.stderr)
-        sys.exit(1)
-    return float(results[0]["lat"]), float(results[0]["lon"])
-
-
-def find_nearby(lat: float, lon: float, types: list[str], radius: int = 1500, limit: int = 15) -> list[dict]:
-    """Query Overpass for nearby amenities."""
-    # Build Overpass QL query
-    type_filters = "".join(
-        f'nwr["amenity"="{t}"](around:{radius},{lat},{lon});' for t in types
-    )
-    query = f"[out:json][timeout:{TIMEOUT}];({type_filters});out center tags;"
-
-    # Try each Overpass server
-    data = None
-    for url in OVERPASS_URLS:
-        try:
-            data = _http_post(url, f"data={urllib.parse.quote(query)}")
-            break
-        except Exception:
-            continue
-
-    if not data:
-        return []
-
-    # Parse results
-    places = []
-    for el in data.get("elements", []):
-        tags = el.get("tags", {})
-        name = tags.get("name")
-        if not name:
-            continue
-
-        # Get coordinates (nodes have lat/lon directly, ways/relations use center)
-        plat = el.get("lat") or (el.get("center", {}) or {}).get("lat")
-        plon = el.get("lon") or (el.get("center", {}) or {}).get("lon")
-        if plat is None or plon is None:
-            continue
-
-        dist = haversine(lat, lon, plat, plon)
-
-        place = {
-            "name": name,
-            "type": tags.get("amenity", ""),
-            "distance_m": round(dist),
-            "lat": plat,
-            "lon": plon,
-            "maps_url": f"https://www.google.com/maps/search/?api=1&query={plat},{plon}",
-            "directions_url": f"https://www.google.com/maps/dir/?api=1&origin={lat},{lon}&destination={plat},{plon}",
-        }
-
-        # Add useful optional fields
-        if tags.get("cuisine"):
-            place["cuisine"] = tags["cuisine"]
-        if tags.get("opening_hours"):
-            place["hours"] = tags["opening_hours"]
-        if tags.get("phone"):
-            place["phone"] = tags["phone"]
-        if tags.get("website"):
-            place["website"] = tags["website"]
-        if tags.get("addr:street"):
-            addr_parts = [tags.get("addr:housenumber", ""), tags.get("addr:street", "")]
-            if tags.get("addr:city"):
-                addr_parts.append(tags["addr:city"])
-            place["address"] = " ".join(p for p in addr_parts if p)
-
-        places.append(place)
-
-    # Sort by distance, limit results
-    places.sort(key=lambda p: p["distance_m"])
-    return places[:limit]
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Find nearby places via OpenStreetMap")
-    parser.add_argument("--lat", type=float, help="Latitude")
-    parser.add_argument("--lon", type=float, help="Longitude")
-    parser.add_argument("--near", type=str, help="Address, city, or zip code (geocoded automatically)")
-    parser.add_argument("--type", action="append", dest="types", default=[], help="Place type (restaurant, cafe, bar, pharmacy, etc.)")
-    parser.add_argument("--radius", type=int, default=1500, help="Search radius in meters (default: 1500)")
-    parser.add_argument("--limit", type=int, default=15, help="Max results (default: 15)")
-    parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON")
-    args = parser.parse_args()
-
-    # Resolve coordinates
-    if args.near:
-        lat, lon = geocode(args.near)
-    elif args.lat is not None and args.lon is not None:
-        lat, lon = args.lat, args.lon
-    else:
-        print("Error: Provide --lat/--lon or --near", file=sys.stderr)
-        sys.exit(1)
-
-    if not args.types:
-        args.types = ["restaurant"]
-
-    places = find_nearby(lat, lon, args.types, args.radius, args.limit)
-
-    if args.json_output:
-        print(json.dumps({"origin": {"lat": lat, "lon": lon}, "results": places, "count": len(places)}, indent=2))
-    else:
-        if not places:
-            print(f"No {'/'.join(args.types)} found within {args.radius}m")
-            return
-        print(f"Found {len(places)} places within {args.radius}m:\n")
-        for i, p in enumerate(places, 1):
-            dist_str = f"{p['distance_m']}m" if p["distance_m"] < 1000 else f"{p['distance_m']/1000:.1f}km"
-            print(f"  {i}. {p['name']} ({p['type']}) — {dist_str}")
-            if p.get("cuisine"):
-                print(f"     Cuisine: {p['cuisine']}")
-            if p.get("hours"):
-                print(f"     Hours: {p['hours']}")
-            if p.get("address"):
-                print(f"     Address: {p['address']}")
-            print(f"     Map: {p['maps_url']}")
-            print()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/optional-skills/productivity/maps/SKILL.md b/skills/productivity/maps/SKILL.md
similarity index 53%
rename from optional-skills/productivity/maps/SKILL.md
rename to skills/productivity/maps/SKILL.md
index 59e0359d56f..9eded20866b 100644
--- a/optional-skills/productivity/maps/SKILL.md
+++ b/skills/productivity/maps/SKILL.md
@@ -1,17 +1,20 @@
 ---
 name: maps
 description: >
-  Geocoding, reverse geocoding, nearby POI search (44 categories),
-  distance/routing, turn-by-turn directions, timezone lookup, bounding box
-  search, and area info. Uses OpenStreetMap + Overpass + OSRM. Free, no API key.
-version: 1.1.0
+  Location intelligence — geocode a place, reverse-geocode coordinates,
+  find nearby places (44 POI categories), driving/walking/cycling
+  distance + time, turn-by-turn directions, timezone lookup, bounding
+  box + area for a named place, and POI search within a rectangle.
+  Uses OpenStreetMap + Overpass + OSRM. Free, no API key.
+version: 1.2.0
 author: Mibayy
 license: MIT
 metadata:
   hermes:
-    tags: [maps, geocoding, places, routing, distance, directions, openstreetmap, nominatim, overpass, osrm]
+    tags: [maps, geocoding, places, routing, distance, directions, nearby, location, openstreetmap, nominatim, overpass, osrm]
     category: productivity
     requires_toolsets: [terminal]
+    supersedes: [find-nearby]
 ---
 
 # Maps Skill
@@ -21,21 +24,26 @@ categories, zero dependencies (Python stdlib only), no API key required.
 
 Data sources: OpenStreetMap/Nominatim, Overpass API, OSRM, TimeAPI.io.
 
+This skill supersedes the old `find-nearby` skill — all of find-nearby's
+functionality is covered by the `nearby` command below, with the same
+`--near "<place>"` shortcut and multi-category support.
+
 ## When to Use
 
-- User wants coordinates for a place name
-- User has coordinates and wants the address
-- User asks for nearby restaurants, hospitals, pharmacies, hotels, etc.
-- User wants driving/walking/cycling distance or travel time
-- User wants turn-by-turn directions between two places
-- User wants timezone information for a location
-- User wants to search for POIs within a geographic area
+- User sends a Telegram location pin (latitude/longitude in the message) → `nearby`
+- User wants coordinates for a place name → `search`
+- User has coordinates and wants the address → `reverse`
+- User asks for nearby restaurants, hospitals, pharmacies, hotels, etc. → `nearby`
+- User wants driving/walking/cycling distance or travel time → `distance`
+- User wants turn-by-turn directions between two places → `directions`
+- User wants timezone information for a location → `timezone`
+- User wants to search for POIs within a geographic area → `area` + `bbox`
 
 ## Prerequisites
 
 Python 3.8+ (stdlib only — no pip installs needed).
 
-Script path after install: `~/.hermes/skills/maps/scripts/maps_client.py`
+Script path: `~/.hermes/skills/maps/scripts/maps_client.py`
 
 ## Commands
 
@@ -63,9 +71,16 @@ Returns: full address breakdown (street, city, state, country, postcode).
 ### nearby — Find places by category
 
 ```bash
+# By coordinates (from a Telegram location pin, for example)
 python3 $MAPS nearby 48.8584 2.2945 restaurant --limit 10
 python3 $MAPS nearby 40.7128 -74.0060 hospital --radius 2000
-python3 $MAPS nearby 51.5074 -0.1278 cafe --limit 5 --radius 300
+
+# By address / city / zip / landmark — --near auto-geocodes
+python3 $MAPS nearby --near "Times Square, New York" --category cafe
+python3 $MAPS nearby --near "90210" --category pharmacy
+
+# Multiple categories merged into one query
+python3 $MAPS nearby --near "downtown austin" --category restaurant --category bar --limit 10
 ```
 
 44 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, supermarket,
@@ -75,6 +90,11 @@ synagogue, dentist, doctor, cinema, theatre, gym, swimming_pool, post_office,
 convenience_store, bakery, bookshop, laundry, car_wash, car_rental,
 bicycle_rental, taxi, veterinary, zoo, playground, stadium, nightclub.
 
+Each result includes: `name`, `address`, `lat`/`lon`, `distance_m`,
+`maps_url` (clickable Google Maps link), `directions_url` (Google Maps
+directions from the search point), and promoted tags when available —
+`cuisine`, `hours` (opening_hours), `phone`, `website`.
+
 ### distance — Travel distance and time
 
 ```bash
@@ -124,11 +144,31 @@ python3 $MAPS bbox 40.75 -74.00 40.77 -73.98 restaurant --limit 20
 Finds POIs within a geographic rectangle. Use `area` first to get the
 bounding box coordinates for a named place.
 
+## Working With Telegram Location Pins
+
+When a user sends a location pin, the message contains `latitude:` and
+`longitude:` fields. Extract those and pass them straight to `nearby`:
+
+```bash
+# User sent a pin at 36.17, -115.14 and asked "find cafes nearby"
+python3 $MAPS nearby 36.17 -115.14 cafe --radius 1500
+```
+
+Present results as a numbered list with names, distances, and the
+`maps_url` field so the user gets a tap-to-open link in chat. For "open
+now?" questions, check the `hours` field; if missing or unclear, verify
+with `web_search` since OSM hours are community-maintained and not always
+current.
+
 ## Workflow Examples
 
 **"Find Italian restaurants near the Colosseum":**
-1. `search "Colosseum Rome"` → get lat/lon
-2. `nearby LAT LON restaurant --radius 500`
+1. `nearby --near "Colosseum Rome" --category restaurant --radius 500`
+   — one command, auto-geocoded
+
+**"What's near this location pin they sent?":**
+1. Extract lat/lon from the Telegram message
+2. `nearby LAT LON cafe --radius 1500`
 
 **"How do I walk from hotel to conference center?":**
 1. `directions "Hotel Name" --to "Conference Center" --mode walking`
@@ -140,14 +180,19 @@ bounding box coordinates for a named place.
 ## Pitfalls
 
 - Nominatim ToS: max 1 req/s (handled automatically by the script)
-- `nearby` requires lat/lon — use `search` first to get coordinates
+- `nearby` requires lat/lon OR `--near "<address>"` — one of the two is needed
 - OSRM routing coverage is best for Europe and North America
-- Overpass API can be slow during peak hours (script retries automatically)
+- Overpass API can be slow during peak hours; the script automatically
+  falls back between mirrors (overpass-api.de → overpass.kumi.systems)
 - `distance` and `directions` use `--to` flag for the destination (not positional)
+- If a zip code alone gives ambiguous results globally, include country/state
 
 ## Verification
 
 ```bash
 python3 ~/.hermes/skills/maps/scripts/maps_client.py search "Statue of Liberty"
 # Should return lat ~40.689, lon ~-74.044
+
+python3 ~/.hermes/skills/maps/scripts/maps_client.py nearby --near "Times Square" --category restaurant --limit 3
+# Should return a list of restaurants within ~500m of Times Square
 ```
diff --git a/optional-skills/productivity/maps/scripts/maps_client.py b/skills/productivity/maps/scripts/maps_client.py
similarity index 86%
rename from optional-skills/productivity/maps/scripts/maps_client.py
rename to skills/productivity/maps/scripts/maps_client.py
index c271570f995..db0de82d6d7 100644
--- a/optional-skills/productivity/maps/scripts/maps_client.py
+++ b/skills/productivity/maps/scripts/maps_client.py
@@ -34,7 +34,14 @@ DATA_SOURCE = "OpenStreetMap/Nominatim"
 
 NOMINATIM_SEARCH  = "https://nominatim.openstreetmap.org/search"
 NOMINATIM_REVERSE = "https://nominatim.openstreetmap.org/reverse"
-OVERPASS_API      = "https://overpass-api.de/api/interpreter"
+# Public Overpass endpoints. We try them in order so a single server
+# outage doesn't break the skill — kumi.systems is a well-known mirror.
+OVERPASS_URLS = [
+    "https://overpass-api.de/api/interpreter",
+    "https://overpass.kumi.systems/api/interpreter",
+]
+# Backward-compat alias for any caller that imports OVERPASS_API directly.
+OVERPASS_API      = OVERPASS_URLS[0]
 OSRM_BASE         = "https://router.project-osrm.org/route/v1"
 TIMEAPI_BASE      = "https://timeapi.io/api/timezone/coordinate"
 
@@ -246,6 +253,30 @@ def http_post(url, data_str, retries=MAX_RETRIES):
     error_exit(f"POST failed after {retries} attempts. Last error: {last_error}")
 
 
+def overpass_query(query):
+    """POST an Overpass QL query, trying each URL in OVERPASS_URLS in turn.
+
+    A single public Overpass mirror can be rate-limited or down; trying the
+    next mirror before giving up turns a flaky outage into a retry. Returns
+    parsed JSON. Falls through to error_exit if every mirror fails.
+    """
+    post_data = "data=" + urllib.parse.quote(query)
+    last_error = None
+    for url in OVERPASS_URLS:
+        try:
+            return http_post(url, post_data, retries=1)
+        except SystemExit:
+            # error_exit inside http_post — keep trying the next mirror.
+            last_error = f"mirror {url} exhausted retries"
+            continue
+        except Exception as exc:
+            last_error = f"{url}: {exc}"
+            continue
+    error_exit(
+        f"All Overpass mirrors failed. Last error: {last_error or 'unknown'}"
+    )
+
+
 # ---------------------------------------------------------------------------
 # Geo math
 # ---------------------------------------------------------------------------
@@ -379,6 +410,9 @@ def parse_overpass_elements(elements, ref_lat=None, ref_lon=None):
             "lon":      el_lon,
             "osm_type": el.get("type", ""),
             "osm_id":   el.get("id", ""),
+            # Clickable Google Maps link so the agent can render a tap-to-open
+            # URL in chat without composing one downstream.
+            "maps_url": f"https://www.google.com/maps/search/?api=1&query={el_lat},{el_lon}",
             "tags": {
                 k: v for k, v in tags.items()
                 if k not in ("name", "name:en",
@@ -386,9 +420,27 @@ def parse_overpass_elements(elements, ref_lat=None, ref_lon=None):
             },
         }
 
+        # Promote commonly-useful tags to top-level fields so agents can
+        # reference them without digging into the raw ``tags`` dict.
+        for src_key, dst_key in (
+            ("cuisine",        "cuisine"),
+            ("opening_hours",  "hours"),
+            ("phone",          "phone"),
+            ("website",        "website"),
+        ):
+            val = tags.get(src_key)
+            if val:
+                place[dst_key] = val
+
         if ref_lat is not None and ref_lon is not None:
             dist_m = haversine_m(ref_lat, ref_lon, el_lat, el_lon)
             place["distance_m"] = round(dist_m, 1)
+            # With a reference point we can also hand back a directions URL.
+            place["directions_url"] = (
+                f"https://www.google.com/maps/dir/?api=1"
+                f"&origin={ref_lat},{ref_lon}"
+                f"&destination={el_lat},{el_lon}"
+            )
 
         places.append(place)
 
@@ -499,47 +551,84 @@ def cmd_reverse(args):
 # ---------------------------------------------------------------------------
 
 def cmd_nearby(args):
-    """Find nearby POIs using the Overpass API."""
-    try:
-        lat = float(args.lat)
-        lon = float(args.lon)
-    except ValueError:
-        error_exit("LAT and LON must be numeric values.")
+    """Find nearby POIs using the Overpass API.
 
-    category = args.category.lower()
-    if category not in CATEGORY_TAGS:
+    Accepts either explicit coordinates (``lat``/``lon``) or a free-form
+    address via ``--near`` (auto-geocoded through Nominatim). Supports
+    multiple categories in one call — results are merged, deduplicated
+    by ``osm_type+osm_id``, sorted by distance.
+    """
+    # Resolve the center point. --near takes precedence if provided so the
+    # agent can ask "cafes near Times Square" in one command without having
+    # to geocode first.
+    if getattr(args, "near", None):
+        near_query = " ".join(args.near).strip() if isinstance(args.near, list) else str(args.near).strip()
+        if not near_query:
+            error_exit("--near must be a non-empty address or place name.")
+        lat, lon, _ = geocode_single(near_query)
+    else:
+        try:
+            lat = float(args.lat)
+            lon = float(args.lon)
+        except (TypeError, ValueError):
+            error_exit("Provide numeric LAT and LON, or use --near \"<address>\".")
+
+    # Categories: support both legacy single positional ``category`` and the
+    # new repeatable ``--category`` flag. Users can ask for multiple place
+    # types in one query.
+    categories = []
+    if getattr(args, "category_list", None):
+        categories.extend(args.category_list)
+    if getattr(args, "category", None):
+        categories.append(args.category)
+    # Deduplicate, preserve order, lower-case.
+    categories = list(dict.fromkeys(c.lower() for c in categories if c))
+    if not categories:
+        error_exit("Provide at least one category (positional or --category).")
+    unknown = [c for c in categories if c not in CATEGORY_TAGS]
+    if unknown:
         error_exit(
-            f"Unknown category '{category}'. "
+            f"Unknown categor{'ies' if len(unknown) > 1 else 'y'} "
+            f"{', '.join(repr(c) for c in unknown)}. "
             f"Valid categories: {', '.join(VALID_CATEGORIES)}"
         )
 
     radius = int(args.radius)
     limit  = int(args.limit)
-
     if radius <= 0:
         error_exit("Radius must be a positive integer (metres).")
     if limit <= 0:
         error_exit("Limit must be a positive integer.")
 
-    tag_key, tag_val = CATEGORY_TAGS[category]
-    religion = RELIGION_FILTER.get(category)
-    query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
-                                  religion=religion)
+    # Query each category against the Overpass fallback chain, merge results,
+    # dedupe by OSM identity so POIs tagged under multiple categories don't
+    # appear twice.
+    merged = {}
+    for category in categories:
+        tag_key, tag_val = CATEGORY_TAGS[category]
+        religion = RELIGION_FILTER.get(category)
+        query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
+                                      religion=religion)
+        raw = overpass_query(query)
+        elements = raw.get("elements", [])
+        for place in parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon):
+            place["category"] = category
+            key = (place.get("osm_type", ""), place.get("osm_id", ""))
+            # Prefer the entry that actually has a distance_m attached (first
+            # pass through the ref_lat/ref_lon branch), then first-seen wins.
+            if key not in merged:
+                merged[key] = place
 
-    post_data = "data=" + urllib.parse.quote(query)
-    raw = http_post(OVERPASS_API, post_data)
-
-    elements = raw.get("elements", [])
-    places = parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon)
-
-    # Add category to each result
-    for p in places:
-        p["category"] = category
+    # Sort merged by distance when we have ref lat/lon, then cap at ``limit``.
+    places = sorted(
+        merged.values(),
+        key=lambda p: p.get("distance_m", float("inf")),
+    )[:limit]
 
     print_json({
         "center_lat":  lat,
         "center_lon":  lon,
-        "category":    category,
+        "categories":  categories,
         "radius_m":    radius,
         "count":       len(places),
         "results":     places,
@@ -861,8 +950,7 @@ def cmd_bbox(args):
     query = build_overpass_bbox(tag_key, tag_val, south, west, north, east,
                                 limit, religion=religion)
 
-    post_data = "data=" + urllib.parse.quote(query)
-    raw = http_post(OVERPASS_API, post_data)
+    raw = overpass_query(query)
 
     elements = raw.get("elements", [])
 
@@ -998,15 +1086,33 @@ def build_parser():
         help="Find nearby places of a given category.",
         description=(
             "Find points of interest near a location using the Overpass API.\n"
+            "Provide either LAT/LON, or use --near \"<address>\" to auto-geocode.\n"
+            "Categories can be specified positionally OR repeated via --category\n"
+            "to merge multiple types in one query (e.g. --category bar --category cafe).\n"
             f"Categories: {', '.join(VALID_CATEGORIES)}"
         ),
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
-    p_nearby.add_argument("lat", help="Center latitude (decimal degrees).")
-    p_nearby.add_argument("lon", help="Center longitude (decimal degrees).")
     p_nearby.add_argument(
-        "category",
-        help="POI category (use --help to see full list).",
+        "lat", nargs="?", default=None,
+        help="Center latitude (decimal degrees). Omit if using --near.",
+    )
+    p_nearby.add_argument(
+        "lon", nargs="?", default=None,
+        help="Center longitude (decimal degrees). Omit if using --near.",
+    )
+    p_nearby.add_argument(
+        "category", nargs="?", default=None,
+        help="POI category (use --help for full list). Omit if using --category flags.",
+    )
+    p_nearby.add_argument(
+        "--near", nargs="+", metavar="PLACE",
+        help="Address, city, or landmark to search around (geocoded via Nominatim).",
+    )
+    p_nearby.add_argument(
+        "--category", action="append", dest="category_list", default=[],
+        metavar="CAT",
+        help="POI category (repeatable — adds a type to the search).",
     )
     p_nearby.add_argument(
         "--radius", "-r",
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index b889ede372e..c083a4a80e2 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -1024,7 +1024,7 @@ class TestRunJobSkillBacked:
             "id": "multi-skill-job",
             "name": "multi skill test",
             "prompt": "Combine the results.",
-            "skills": ["blogwatcher", "find-nearby"],
+            "skills": ["blogwatcher", "maps"],
         }
 
         fake_db = MagicMock()
@@ -1057,12 +1057,12 @@ class TestRunJobSkillBacked:
         assert error is None
         assert final_response == "ok"
         assert skill_view_mock.call_count == 2
-        assert [call.args[0] for call in skill_view_mock.call_args_list] == ["blogwatcher", "find-nearby"]
+        assert [call.args[0] for call in skill_view_mock.call_args_list] == ["blogwatcher", "maps"]
 
         prompt_arg = mock_agent.run_conversation.call_args.args[0]
-        assert prompt_arg.index("blogwatcher") < prompt_arg.index("find-nearby")
+        assert prompt_arg.index("blogwatcher") < prompt_arg.index("maps")
         assert "Instructions for blogwatcher." in prompt_arg
-        assert "Instructions for find-nearby." in prompt_arg
+        assert "Instructions for maps." in prompt_arg
         assert "Combine the results." in prompt_arg
 
 
diff --git a/tests/hermes_cli/test_cron.py b/tests/hermes_cli/test_cron.py
index 9ae92048272..8593195a1ba 100644
--- a/tests/hermes_cli/test_cron.py
+++ b/tests/hermes_cli/test_cron.py
@@ -54,12 +54,12 @@ class TestCronCommandLifecycle:
                 deliver=None,
                 repeat=None,
                 skill=None,
-                skills=["find-nearby", "blogwatcher"],
+                skills=["maps", "blogwatcher"],
                 clear_skills=False,
             )
         )
         updated = get_job(job["id"])
-        assert updated["skills"] == ["find-nearby", "blogwatcher"]
+        assert updated["skills"] == ["maps", "blogwatcher"]
         assert updated["name"] == "Edited Job"
         assert updated["prompt"] == "Revised prompt"
         assert updated["schedule_display"] == "every 120m"
@@ -95,7 +95,7 @@ class TestCronCommandLifecycle:
                 deliver=None,
                 repeat=None,
                 skill=None,
-                skills=["blogwatcher", "find-nearby"],
+                skills=["blogwatcher", "maps"],
             )
         )
         out = capsys.readouterr().out
@@ -103,5 +103,5 @@ class TestCronCommandLifecycle:
 
         jobs = list_jobs()
         assert len(jobs) == 1
-        assert jobs[0]["skills"] == ["blogwatcher", "find-nearby"]
+        assert jobs[0]["skills"] == ["blogwatcher", "maps"]
         assert jobs[0]["name"] == "Skill combo"
diff --git a/tests/tools/test_cronjob_tools.py b/tests/tools/test_cronjob_tools.py
index dd6b0101b1b..38fc12cc8c7 100644
--- a/tests/tools/test_cronjob_tools.py
+++ b/tests/tools/test_cronjob_tools.py
@@ -192,23 +192,23 @@ class TestUnifiedCronjobTool:
         result = json.loads(
             cronjob(
                 action="create",
-                skills=["blogwatcher", "find-nearby"],
+                skills=["blogwatcher", "maps"],
                 prompt="Use both skills and combine the result.",
                 schedule="every 1h",
                 name="Combo job",
             )
         )
         assert result["success"] is True
-        assert result["skills"] == ["blogwatcher", "find-nearby"]
+        assert result["skills"] == ["blogwatcher", "maps"]
 
         listing = json.loads(cronjob(action="list"))
-        assert listing["jobs"][0]["skills"] == ["blogwatcher", "find-nearby"]
+        assert listing["jobs"][0]["skills"] == ["blogwatcher", "maps"]
 
     def test_multi_skill_default_name_prefers_prompt_when_present(self):
         result = json.loads(
             cronjob(
                 action="create",
-                skills=["blogwatcher", "find-nearby"],
+                skills=["blogwatcher", "maps"],
                 prompt="Use both skills and combine the result.",
                 schedule="every 1h",
             )
@@ -220,7 +220,7 @@ class TestUnifiedCronjobTool:
         created = json.loads(
             cronjob(
                 action="create",
-                skills=["blogwatcher", "find-nearby"],
+                skills=["blogwatcher", "maps"],
                 prompt="Use both skills and combine the result.",
                 schedule="every 1h",
             )
diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md
index ffe489d3602..46c29929f9c 100644
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@@ -100,14 +100,6 @@ GitHub workflow skills for managing repositories, pull requests, code reviews, i
 | `github-pr-workflow` | Full pull request lifecycle — create branches, commit changes, open PRs, monitor CI status, auto-fix failures, and merge. Works with gh CLI or falls back to git + GitHub REST API via curl. | `github/github-pr-workflow` |
 | `github-repo-management` | Clone, create, fork, configure, and manage GitHub repositories. Manage remotes, secrets, releases, and workflows. Works with gh CLI or falls back to git + GitHub REST API via curl. | `github/github-repo-management` |
 
-## leisure
-
-Skills for discovery and everyday tasks.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `find-nearby` | Find nearby places (restaurants, cafes, bars, pharmacies, etc.) using OpenStreetMap. Works with coordinates, addresses, cities, zip codes, or Telegram location pins. No API keys needed. | `leisure/find-nearby` |
-
 ## mcp
 
 Skills for working with MCP (Model Context Protocol) servers, tools, and integrations.
@@ -198,6 +190,7 @@ Skills for document creation, presentations, spreadsheets, and other productivit
 |-------|-------------|------|
 | `google-workspace` | Gmail, Calendar, Drive, Contacts, Sheets, and Docs integration for Hermes. Uses Hermes-managed OAuth2 setup, prefers the Google Workspace CLI (`gws`) when available for broader API coverage, and falls back to the Python client libraries otherwise. | `productivity/google-workspace` |
 | `linear` | Manage Linear issues, projects, and teams via the GraphQL API. Create, update, search, and organize issues. Uses API key auth (no OAuth needed). All operations via curl — no dependencies. | `productivity/linear` |
+| `maps` | Location intelligence — geocode, reverse-geocode, nearby POI search (44 categories, coordinates or address via `--near`), driving/walking/cycling distance + time, turn-by-turn directions, timezone, bounding box + area, POI search in a rectangle. Uses OpenStreetMap + Overpass + OSRM. No API key needed. Telegram location-pin friendly. | `productivity/maps` |
 | `nano-pdf` | Edit PDFs with natural-language instructions using the nano-pdf CLI. Modify text, fix typos, update titles, and make content changes to specific pages without manual editing. | `productivity/nano-pdf` |
 | `notion` | Notion API for creating and managing pages, databases, and blocks via curl. Search, create, update, and query Notion workspaces directly from the terminal. | `productivity/notion` |
 | `ocr-and-documents` | Extract text from PDFs and scanned documents. Use web_extract for remote URLs, pymupdf for local text-based PDFs, marker-pdf for OCR/scanned docs. For DOCX use python-docx, for PPTX see the powerpoint skill. | `productivity/ocr-and-documents` |
diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md
index 222c00827c2..4628fcc639a 100644
--- a/website/docs/user-guide/features/cron.md
+++ b/website/docs/user-guide/features/cron.md
@@ -30,7 +30,7 @@ Cron-run sessions cannot recursively create more cron jobs. Hermes disables cron
 /cron add 30m "Remind me to check the build"
 /cron add "every 2h" "Check server status"
 /cron add "every 1h" "Summarize new feed items" --skill blogwatcher
-/cron add "every 1h" "Use both skills and combine the result" --skill blogwatcher --skill find-nearby
+/cron add "every 1h" "Use both skills and combine the result" --skill blogwatcher --skill maps
 ```
 
 ### From the standalone CLI
@@ -40,7 +40,7 @@ hermes cron create "every 2h" "Check server status"
 hermes cron create "every 1h" "Summarize new feed items" --skill blogwatcher
 hermes cron create "every 1h" "Use both skills and combine the result" \
   --skill blogwatcher \
-  --skill find-nearby \
+  --skill maps \
   --name "Skill combo"
 ```
 
@@ -77,7 +77,7 @@ Skills are loaded in order. The prompt becomes the task instruction layered on t
 ```python
 cronjob(
     action="create",
-    skills=["blogwatcher", "find-nearby"],
+    skills=["blogwatcher", "maps"],
     prompt="Look for new local events and interesting nearby places, then combine them into one short brief.",
     schedule="every 6h",
     name="Local brief",
@@ -95,7 +95,7 @@ You do not need to delete and recreate jobs just to change them.
 ```bash
 /cron edit <job_id> --schedule "every 4h"
 /cron edit <job_id> --prompt "Use the revised task"
-/cron edit <job_id> --skill blogwatcher --skill find-nearby
+/cron edit <job_id> --skill blogwatcher --skill maps
 /cron edit <job_id> --remove-skill blogwatcher
 /cron edit <job_id> --clear-skills
 ```
@@ -105,8 +105,8 @@ You do not need to delete and recreate jobs just to change them.
 ```bash
 hermes cron edit <job_id> --schedule "every 4h"
 hermes cron edit <job_id> --prompt "Use the revised task"
-hermes cron edit <job_id> --skill blogwatcher --skill find-nearby
-hermes cron edit <job_id> --add-skill find-nearby
+hermes cron edit <job_id> --skill blogwatcher --skill maps
+hermes cron edit <job_id> --add-skill maps
 hermes cron edit <job_id> --remove-skill blogwatcher
 hermes cron edit <job_id> --clear-skills
 ```

From a3b76ae36d37124638b3e547b608b266f230c679 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sun, 19 Apr 2026 05:19:51 -0700
Subject: [PATCH 134/143] chore(attribution): add AUTHOR_MAP entry for Mibayy

Adds the Mibayy noreply email to the AUTHOR_MAP so CI attribution checks
pass for the #3884 maps skill feat commit (7fa01faf).
---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 9c04c1c6b36..a20c3c134fa 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -77,6 +77,7 @@ AUTHOR_MAP = {
     "Asunfly@users.noreply.github.com": "Asunfly",
     "2500400+honghua@users.noreply.github.com": "honghua",
     "nish3451@users.noreply.github.com": "nish3451",
+    "Mibayy@users.noreply.github.com": "Mibayy",
     "135070653+sgaofen@users.noreply.github.com": "sgaofen",
     # contributors (manual mapping from git names)
     "ahmedsherif95@gmail.com": "asheriif",

From d5fc8a5e00dfd396cd188f605ff2abc76fce3c2e Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 19 Apr 2026 05:19:57 -0700
Subject: [PATCH 135/143] fix(tui): reject /model and agent-mutating slash
 passthroughs while running (#12548)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

agent.switch_model() mutates self.model, self.provider, self.base_url,
self.api_key, self.api_mode, and rebuilds self.client / self._anthropic_client
in place.  The worker thread running agent.run_conversation reads those
fields on every iteration.  A concurrent config.set key=model or slash-
worker-mirrored /model / /personality / /prompt / /compress can send an
HTTP request with mismatched model + base_url (or the old client keeps
running against a new endpoint) — 400/404s the user never asked for.

Fix: same pattern as the session.undo / session.compress guards
(PR #12416) and the gateway runner's running-agent /model guard (PR
#12334).  Reject with 4009 'session busy' when session.running is True.

Two call sites guarded:
- config.set with key=model: primary /model entry point from Ink
- _mirror_slash_side_effects for model / personality / prompt /
  compress: slash-worker passthrough path that applies live-agent
  side effects

Idle sessions still switch models normally — regression guard test
verifies this.

Tests (tests/test_tui_gateway_server.py): 4 new cases.
- test_config_set_model_rejects_while_running
- test_config_set_model_allowed_when_idle (regression guard)
- test_mirror_slash_side_effects_rejects_mutating_commands_while_running
- test_mirror_slash_side_effects_allowed_when_idle (regression guard)

Validated: against unpatched server.py, the two 'rejects_while_running'
tests fail with the exact race they assert against.  With the fix all
4 pass.  Live E2E against the live Python environment confirmed both
guards enforce 4009 / 'session busy' exactly as designed.
---
 tests/test_tui_gateway_server.py | 121 +++++++++++++++++++++++++++++++
 tui_gateway/server.py            |  24 ++++++
 2 files changed, 145 insertions(+)

diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index 07a68ac9e9d..c0f52390356 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -828,3 +828,124 @@ def test_respond_unpacks_sid_tuple_correctly():
         server._pending.pop("rid-x", None)
         server._answers.pop("rid-x", None)
 
+
+
+# ---------------------------------------------------------------------------
+# /model switch and other agent-mutating commands must reject while the
+# session is running.  agent.switch_model() mutates self.model, self.provider,
+# self.base_url, self.client etc. in place — the worker thread running
+# agent.run_conversation is reading those on every iteration.  Same class of
+# bug as the session.undo / session.compress mid-run silent-drop; same fix
+# pattern: reject with 4009 while running.
+# ---------------------------------------------------------------------------
+
+
+def test_config_set_model_rejects_while_running(monkeypatch):
+    """/model via config.set must reject during an in-flight turn."""
+    seen = {"called": False}
+
+    def _fake_apply(sid, session, raw):
+        seen["called"] = True
+        return {"value": raw, "warning": ""}
+
+    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply)
+
+    server._sessions["sid"] = _session(running=True)
+    try:
+        resp = server.handle_request({
+            "id": "1", "method": "config.set",
+            "params": {"session_id": "sid", "key": "model", "value": "anthropic/claude-sonnet-4.6"},
+        })
+        assert resp.get("error")
+        assert resp["error"]["code"] == 4009
+        assert "session busy" in resp["error"]["message"]
+        assert not seen["called"], (
+            "_apply_model_switch was called mid-turn — would race with "
+            "the worker thread reading agent.model / agent.client"
+        )
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_config_set_model_allowed_when_idle(monkeypatch):
+    """Regression guard: idle sessions can still switch models."""
+    seen = {"called": False}
+
+    def _fake_apply(sid, session, raw):
+        seen["called"] = True
+        return {"value": "newmodel", "warning": ""}
+
+    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply)
+
+    server._sessions["sid"] = _session(running=False)
+    try:
+        resp = server.handle_request({
+            "id": "1", "method": "config.set",
+            "params": {"session_id": "sid", "key": "model", "value": "newmodel"},
+        })
+        assert resp.get("result")
+        assert resp["result"]["value"] == "newmodel"
+        assert seen["called"]
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_mirror_slash_side_effects_rejects_mutating_commands_while_running(monkeypatch):
+    """Slash worker passthrough (e.g. /model, /personality, /prompt,
+    /compress) must reject during an in-flight turn.  Same race as
+    config.set — mutates live agent state while run_conversation is
+    reading it."""
+    import types
+
+    applied = {"model": False, "compress": False}
+
+    def _fake_apply_model(sid, session, arg):
+        applied["model"] = True
+        return {"value": arg, "warning": ""}
+
+    def _fake_compress(session, focus):
+        applied["compress"] = True
+        return (0, {})
+
+    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply_model)
+    monkeypatch.setattr(server, "_compress_session_history", _fake_compress)
+
+    session = _session(running=True)
+    session["agent"] = types.SimpleNamespace(model="x")
+
+    for cmd, expected_name in [
+        ("/model new/model", "model"),
+        ("/personality default", "personality"),
+        ("/prompt", "prompt"),
+        ("/compress", "compress"),
+    ]:
+        warning = server._mirror_slash_side_effects("sid", session, cmd)
+        assert "session busy" in warning, (
+            f"{cmd} should have returned busy warning, got: {warning!r}"
+        )
+        assert f"/{expected_name}" in warning
+
+    # None of the mutating side-effect helpers should have fired.
+    assert not applied["model"], "model switch fired despite running session"
+    assert not applied["compress"], "compress fired despite running session"
+
+
+def test_mirror_slash_side_effects_allowed_when_idle(monkeypatch):
+    """Regression guard: idle session still runs the side effects."""
+    import types
+
+    applied = {"model": False}
+
+    def _fake_apply_model(sid, session, arg):
+        applied["model"] = True
+        return {"value": arg, "warning": ""}
+
+    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply_model)
+
+    session = _session(running=False)
+    session["agent"] = types.SimpleNamespace(model="x")
+
+    warning = server._mirror_slash_side_effects("sid", session, "/model foo")
+    # Should NOT contain "session busy" — the switch went through.
+    assert "session busy" not in warning
+    assert applied["model"]
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 921f868a3c0..00f8346191d 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1743,6 +1743,19 @@ def _(rid, params: dict) -> dict:
             if not value:
                 return _err(rid, 4002, "model value required")
             if session:
+                # Reject during an in-flight turn.  agent.switch_model()
+                # mutates self.model / self.provider / self.base_url /
+                # self.client in place; the worker thread running
+                # agent.run_conversation is reading those on every
+                # iteration.  A mid-turn swap can send an HTTP request
+                # with the new base_url but old model (or vice versa),
+                # producing 400/404s the user never asked for.  Parity
+                # with the gateway's running-agent /model guard.
+                if session.get("running"):
+                    return _err(
+                        rid, 4009,
+                        "session busy — /interrupt the current turn before switching models",
+                    )
                 result = _apply_model_switch(params.get("session_id", ""), session, value)
             else:
                 result = _apply_model_switch("", {"agent": None}, value)
@@ -2446,6 +2459,17 @@ def _mirror_slash_side_effects(sid: str, session: dict, command: str) -> str:
         return ""
     name, arg, agent = parts[0], (parts[1].strip() if len(parts) > 1 else ""), session.get("agent")
 
+    # Reject agent-mutating commands during an in-flight turn.  These
+    # all do read-then-mutate on live agent/session state that the
+    # worker thread running agent.run_conversation is using.  Parity
+    # with the session.compress / session.undo guards and the gateway
+    # runner's running-agent /model guard.
+    _MUTATES_WHILE_RUNNING = {"model", "personality", "prompt", "compress"}
+    if name in _MUTATES_WHILE_RUNNING and session.get("running"):
+        return (
+            f"session busy — /interrupt the current turn before running /{name}"
+        )
+
     try:
         if name == "model" and arg and agent:
             result = _apply_model_switch(sid, session, arg)

From 37524a574ec94adcd40e65d4cbb847e84153aa92 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Thu, 9 Apr 2026 03:16:04 -0700
Subject: [PATCH 136/143] docs: add PR review guides, rework quickstart, slim
 down installation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds two complementary GitHub PR review guides from contest submissions:
- Cron-based PR review agent (from PR #5836 by @dieutx) — polls on a
  schedule, no server needed, teaches skills + memory authoring
- Webhook-based PR review (from PR #6503 by @gaijinkush) — real-time via
  GitHub webhooks, documents previously undocumented webhook feature
Both guides are cross-linked so users can pick the approach that fits.

Reworks quickstart.md by integrating the best content from PR #5744
by @aidil2105:
- Opinionated decision table ('The fastest path')
- Common failure modes table with causes and fixes
- Recovery toolkit sequence
- Session lifecycle verification step
- Better first-chat guidance with example prompts

Slims down installation.md:
- Removes 10-step manual/dev install section (already covered in
  developer-guide/contributing.md)
- Links to Contributing guide for dev setup
- Keeps focused on the automated installer + prerequisites + troubleshooting
---
 website/docs/getting-started/installation.md  | 199 +----------
 website/docs/getting-started/quickstart.md    | 255 ++++++++------
 website/docs/guides/github-pr-review-agent.md | 300 ++++++++++++++++
 .../docs/guides/webhook-github-pr-review.md   | 329 ++++++++++++++++++
 website/sidebars.ts                           |   2 +
 5 files changed, 784 insertions(+), 301 deletions(-)
 create mode 100644 website/docs/guides/github-pr-review-agent.md
 create mode 100644 website/docs/guides/webhook-github-pr-review.md

diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md
index a28b1256e6e..219c1e7d555 100644
--- a/website/docs/getting-started/installation.md
+++ b/website/docs/getting-started/installation.md
@@ -6,7 +6,7 @@ description: "Install Hermes Agent on Linux, macOS, WSL2, or Android via Termux"
 
 # Installation
 
-Get Hermes Agent up and running in under two minutes with the one-line installer, or follow the manual steps for full control.
+Get Hermes Agent up and running in under two minutes with the one-line installer.
 
 ## Quick Install
 
@@ -82,202 +82,9 @@ If you use Nix (on NixOS, macOS, or Linux), there's a dedicated setup path with
 
 ---
 
-## Manual Installation
+## Manual / Developer Installation
 
-If you prefer full control over the installation process, follow these steps.
-
-### Step 1: Clone the Repository
-
-Clone with `--recurse-submodules` to pull the required submodules:
-
-```bash
-git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
-cd hermes-agent
-```
-
-If you already cloned without `--recurse-submodules`:
-```bash
-git submodule update --init --recursive
-```
-
-### Step 2: Install uv & Create Virtual Environment
-
-```bash
-# Install uv (if not already installed)
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# Create venv with Python 3.11 (uv downloads it if not present — no sudo needed)
-uv venv venv --python 3.11
-```
-
-:::tip
-You do **not** need to activate the venv to use `hermes`. The entry point has a hardcoded shebang pointing to the venv Python, so it works globally once symlinked.
-:::
-
-### Step 3: Install Python Dependencies
-
-```bash
-# Tell uv which venv to install into
-export VIRTUAL_ENV="$(pwd)/venv"
-
-# Install with all extras
-uv pip install -e ".[all]"
-```
-
-If you only want the core agent (no Telegram/Discord/cron support):
-```bash
-uv pip install -e "."
-```
-
-<details>
-<summary><strong>Optional extras breakdown</strong></summary>
-
-| Extra | What it adds | Install command |
-|-------|-------------|-----------------|
-| `all` | Everything below | `uv pip install -e ".[all]"` |
-| `messaging` | Telegram, Discord & Slack gateway | `uv pip install -e ".[messaging]"` |
-| `cron` | Cron expression parsing for scheduled tasks | `uv pip install -e ".[cron]"` |
-| `cli` | Terminal menu UI for setup wizard | `uv pip install -e ".[cli]"` |
-| `modal` | Modal cloud execution backend | `uv pip install -e ".[modal]"` |
-| `tts-premium` | ElevenLabs premium voices | `uv pip install -e ".[tts-premium]"` |
-| `voice` | CLI microphone input + audio playback | `uv pip install -e ".[voice]"` |
-| `pty` | PTY terminal support | `uv pip install -e ".[pty]"` |
-| `termux` | Tested Android / Termux bundle (`cron`, `cli`, `pty`, `mcp`, `honcho`, `acp`) | `python -m pip install -e ".[termux]" -c constraints-termux.txt` |
-| `honcho` | AI-native memory (Honcho integration) | `uv pip install -e ".[honcho]"` |
-| `mcp` | Model Context Protocol support | `uv pip install -e ".[mcp]"` |
-| `homeassistant` | Home Assistant integration | `uv pip install -e ".[homeassistant]"` |
-| `acp` | ACP editor integration support | `uv pip install -e ".[acp]"` |
-| `slack` | Slack messaging | `uv pip install -e ".[slack]"` |
-| `dev` | pytest & test utilities | `uv pip install -e ".[dev]"` |
-
-You can combine extras: `uv pip install -e ".[messaging,cron]"`
-
-:::tip Termux users
-`.[all]` is not currently available on Android because the `voice` extra pulls `faster-whisper`, which depends on `ctranslate2` wheels that are not published for Android. Use `.[termux]` for the tested mobile install path, then add individual extras only as needed.
-:::
-
-</details>
-
-### Step 4: Install Optional Submodules (if needed)
-
-```bash
-# RL training backend (optional)
-uv pip install -e "./tinker-atropos"
-```
-
-Both are optional — if you skip them, the corresponding toolsets simply won't be available.
-
-### Step 5: Install Node.js Dependencies (Optional)
-
-Only needed for **browser automation** (Browserbase-powered) and **WhatsApp bridge**:
-
-```bash
-npm install
-```
-
-### Step 6: Create the Configuration Directory
-
-```bash
-# Create the directory structure
-mkdir -p ~/.hermes/{cron,sessions,logs,memories,skills,pairing,hooks,image_cache,audio_cache,whatsapp/session}
-
-# Copy the example config file
-cp cli-config.yaml.example ~/.hermes/config.yaml
-
-# Create an empty .env file for API keys
-touch ~/.hermes/.env
-```
-
-### Step 7: Add Your API Keys
-
-Open `~/.hermes/.env` and add at minimum an LLM provider key:
-
-```bash
-# Required — at least one LLM provider:
-OPENROUTER_API_KEY=sk-or-v1-your-key-here
-
-# Optional — enable additional tools:
-FIRECRAWL_API_KEY=fc-your-key          # Web search & scraping (or self-host, see docs)
-FAL_KEY=your-fal-key                   # Image generation (FLUX)
-```
-
-Or set them via the CLI:
-```bash
-hermes config set OPENROUTER_API_KEY sk-or-v1-your-key-here
-```
-
-### Step 8: Add `hermes` to Your PATH
-
-```bash
-mkdir -p ~/.local/bin
-ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
-```
-
-If `~/.local/bin` isn't on your PATH, add it to your shell config:
-
-```bash
-# Bash
-echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc && source ~/.bashrc
-
-# Zsh
-echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.zshrc && source ~/.zshrc
-
-# Fish
-fish_add_path $HOME/.local/bin
-```
-
-### Step 9: Configure Your Provider
-
-```bash
-hermes model       # Select your LLM provider and model
-```
-
-### Step 10: Verify the Installation
-
-```bash
-hermes version    # Check that the command is available
-hermes doctor     # Run diagnostics to verify everything is working
-hermes status     # Check your configuration
-hermes chat -q "Hello! What tools do you have available?"
-```
-
----
-
-## Quick-Reference: Manual Install (Condensed)
-
-For those who just want the commands:
-
-```bash
-# Install uv
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# Clone & enter
-git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
-cd hermes-agent
-
-# Create venv with Python 3.11
-uv venv venv --python 3.11
-export VIRTUAL_ENV="$(pwd)/venv"
-
-# Install everything
-uv pip install -e ".[all]"
-uv pip install -e "./tinker-atropos"
-npm install  # optional, for browser tools and WhatsApp
-
-# Configure
-mkdir -p ~/.hermes/{cron,sessions,logs,memories,skills,pairing,hooks,image_cache,audio_cache,whatsapp/session}
-cp cli-config.yaml.example ~/.hermes/config.yaml
-touch ~/.hermes/.env
-echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
-
-# Make hermes available globally
-mkdir -p ~/.local/bin
-ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
-
-# Verify
-hermes doctor
-hermes
-```
+If you want to clone the repo and install from source — for contributing, running from a specific branch, or having full control over the virtual environment — see the [Development Setup](../developer-guide/contributing.md#development-setup) section in the Contributing guide.
 
 ---
 
diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index 8a39c49f1e8..b67f63ae36e 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -1,12 +1,35 @@
 ---
 sidebar_position: 1
 title: "Quickstart"
-description: "Your first conversation with Hermes Agent — from install to chatting in 2 minutes"
+description: "Your first conversation with Hermes Agent — from install to chatting in under 5 minutes"
 ---
 
 # Quickstart
 
-This guide walks you through installing Hermes Agent, setting up a provider, and having your first conversation. By the end, you'll know the key features and how to explore further.
+This guide gets you from zero to a working Hermes setup that survives real use. Install, choose a provider, verify a working chat, and know exactly what to do when something breaks.
+
+## Who this is for
+
+- Brand new and want the shortest path to a working setup
+- Switching providers and don't want to lose time to config mistakes
+- Setting up Hermes for a team, bot, or always-on workflow
+- Tired of "it installed, but it still does nothing"
+
+## The fastest path
+
+Pick the row that matches your goal:
+
+| Goal | Do this first | Then do this |
+|---|---|---|
+| I just want Hermes working on my machine | `hermes setup` | Run a real chat and verify it responds |
+| I already know my provider | `hermes model` | Save the config, then start chatting |
+| I want a bot or always-on setup | `hermes gateway setup` after CLI works | Connect Telegram, Discord, Slack, or another platform |
+| I want a local or self-hosted model | `hermes model` → custom endpoint | Verify the endpoint, model name, and context length |
+| I want multi-provider fallback | `hermes model` first | Add routing and fallback only after the base chat works |
+
+**Rule of thumb:** if Hermes cannot complete a normal chat, do not add more features yet. Get one clean conversation working first, then layer on gateway, cron, skills, voice, or routing.
+
+---
 
 ## 1. Install Hermes Agent
 
@@ -31,86 +54,109 @@ After it finishes, reload your shell:
 source ~/.bashrc   # or source ~/.zshrc
 ```
 
-## 2. Set Up a Provider
+For detailed installation options, prerequisites, and troubleshooting, see the [Installation guide](./installation.md).
 
-The installer configures your LLM provider automatically. To change it later, use one of these commands:
+## 2. Choose a Provider
+
+The single most important setup step. Use `hermes model` to walk through the choice interactively:
 
 ```bash
-hermes model       # Choose your LLM provider and model
-hermes tools       # Configure which tools are enabled
-hermes setup       # Or configure everything at once
+hermes model
 ```
 
-`hermes model` walks you through selecting an inference provider:
+Good defaults:
 
-| Provider | What it is | How to set up |
-|----------|-----------|---------------|
-| **Nous Portal** | Subscription-based, zero-config | OAuth login via `hermes model` |
-| **OpenAI Codex** | ChatGPT OAuth, uses Codex models | Device code auth via `hermes model` |
-| **Anthropic** | Claude models directly (Pro/Max or API key) | `hermes model` with Claude Code auth, or an Anthropic API key |
-| **OpenRouter** | Multi-provider routing across many models | Enter your API key |
-| **Z.AI** | GLM / Zhipu-hosted models | Set `GLM_API_KEY` / `ZAI_API_KEY` |
-| **Kimi / Moonshot** | Moonshot-hosted coding and chat models | Set `KIMI_API_KEY` |
-| **Kimi / Moonshot China** | China-region Moonshot endpoint | Set `KIMI_CN_API_KEY` |
-| **Arcee AI** | Trinity models | Set `ARCEEAI_API_KEY` |
-| **Xiaomi MiMo** | Xiaomi MiMo models via [platform.xiaomimimo.com](https://platform.xiaomimimo.com) | Set `XIAOMI_API_KEY` |
-| **AWS Bedrock** | Anthropic Claude, Amazon Nova, DeepSeek v3.2, and Meta Llama via AWS | Standard boto3 auth (`AWS_PROFILE` or `AWS_ACCESS_KEY_ID` + `AWS_REGION`) |
-| **Qwen Portal (OAuth)** | Qwen 3.5 / Qwen-Coder models via Alibaba's consumer Qwen Portal | OAuth via `hermes model` (optional: `HERMES_QWEN_BASE_URL`) |
-| **MiniMax** | International MiniMax endpoint | Set `MINIMAX_API_KEY` |
-| **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` |
-| **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` |
-| **Hugging Face** | 20+ open models via unified router (Qwen, DeepSeek, Kimi, etc.) | Set `HF_TOKEN` |
-| **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` |
-| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
-| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
-| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` |
-| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
-| **Ollama Cloud** | Managed Ollama catalog without local GPU | Set `OLLAMA_API_KEY` (or pick **Ollama Cloud** in `hermes model`) |
-| **Google Gemini (OAuth)** | Gemini via Cloud Code Assist — free and paid tiers | OAuth via `hermes model` (optional: `HERMES_GEMINI_PROJECT_ID` for paid tiers) |
-| **xAI (Grok)** | Grok 4 models via Responses API + prompt caching | Set `XAI_API_KEY` (alias: `grok`) |
-| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
-| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) |
-| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
-| **Custom Endpoint** | VLLM, SGLang, Ollama, or any OpenAI-compatible API | Set base URL + API key |
+| Situation | Recommended path |
+|---|---|
+| Least friction | Nous Portal or OpenRouter |
+| You already have Claude or Codex auth | Anthropic or OpenAI Codex |
+| You want local/private inference | Ollama or any custom OpenAI-compatible endpoint |
+| You want multi-provider routing | OpenRouter |
+| You have a custom GPU server | vLLM, SGLang, LiteLLM, or any OpenAI-compatible endpoint |
+
+For most first-time users: choose a provider, accept the defaults unless you know why you're changing them. The full provider catalog with env vars and setup steps lives on the [Providers](../integrations/providers.md) page.
 
 :::caution Minimum context: 64K tokens
 Hermes Agent requires a model with at least **64,000 tokens** of context. Models with smaller windows cannot maintain enough working memory for multi-step tool-calling workflows and will be rejected at startup. Most hosted models (Claude, GPT, Gemini, Qwen, DeepSeek) meet this easily. If you're running a local model, set its context size to at least 64K (e.g. `--ctx-size 65536` for llama.cpp or `-c 65536` for Ollama).
 :::
 
 :::tip
-You can switch providers at any time with `hermes model` — no code changes, no lock-in. When configuring a custom endpoint, Hermes will prompt for the context window size and auto-detect it when possible. See [Context Length Detection](../integrations/providers.md#context-length-detection) for details.
+You can switch providers at any time with `hermes model` — no lock-in. For a full list of all supported providers and setup details, see [AI Providers](../integrations/providers.md).
 :::
 
-## 3. Start Chatting
+### How settings are stored
+
+Hermes separates secrets from normal config:
+
+- **Secrets and tokens** → `~/.hermes/.env`
+- **Non-secret settings** → `~/.hermes/config.yaml`
+
+The easiest way to set values correctly is through the CLI:
+
+```bash
+hermes config set model anthropic/claude-opus-4.6
+hermes config set terminal.backend docker
+hermes config set OPENROUTER_API_KEY sk-or-...
+```
+
+The right value goes to the right file automatically.
+
+## 3. Run Your First Chat
 
 ```bash
 hermes            # classic CLI
 hermes --tui      # modern TUI (recommended)
 ```
 
-That's it! You'll see a welcome banner with your model, available tools, and skills. Type a message and press Enter.
+You'll see a welcome banner with your model, available tools, and skills. Use a prompt that's specific and easy to verify:
 
 :::tip Pick your interface
 Hermes ships with two terminal interfaces: the classic `prompt_toolkit` CLI and a newer [TUI](../user-guide/tui.md) with modal overlays, mouse selection, and non-blocking input. Both share the same sessions, slash commands, and config — try each with `hermes` vs `hermes --tui`.
 :::
 
 ```
-❯ What can you help me with?
+Summarize this repo in 5 bullets and tell me what the main entrypoint is.
 ```
 
-The agent has access to tools for web search, file operations, terminal commands, and more — all out of the box.
+```
+Check my current directory and tell me what looks like the main project file.
+```
 
-## 4. Try Key Features
+```
+Help me set up a clean GitHub PR workflow for this codebase.
+```
 
-### Ask it to use the terminal
+**What success looks like:**
+
+- The banner shows your chosen model/provider
+- Hermes replies without error
+- It can use a tool if needed (terminal, file read, web search)
+- The conversation continues normally for more than one turn
+
+If that works, you're past the hardest part.
+
+## 4. Verify Sessions Work
+
+Before moving on, make sure resume works:
+
+```bash
+hermes --continue    # Resume the most recent session
+hermes -c            # Short form
+```
+
+That should bring you back to the session you just had. If it doesn't, check whether you're in the same profile and whether the session actually saved. This matters later when you're juggling multiple setups or machines.
+
+## 5. Try Key Features
+
+### Use the terminal
 
 ```
 ❯ What's my disk usage? Show the top 5 largest directories.
 ```
 
-The agent will run terminal commands on your behalf and show you the results.
+The agent runs terminal commands on your behalf and shows results.
 
-### Use slash commands
+### Slash commands
 
 Type `/` to see an autocomplete dropdown of all commands:
 
@@ -128,22 +174,27 @@ Press `Alt+Enter` or `Ctrl+J` to add a new line. Great for pasting code or writi
 
 ### Interrupt the agent
 
-If the agent is taking too long, just type a new message and press Enter — it interrupts the current task and switches to your new instructions. `Ctrl+C` also works.
+If the agent is taking too long, type a new message and press Enter — it interrupts the current task and switches to your new instructions. `Ctrl+C` also works.
 
-### Resume a session
+## 6. Add the Next Layer
 
-When you exit, hermes prints a resume command:
+Only after the base chat works. Pick what you need:
+
+### Bot or shared assistant
 
 ```bash
-hermes --continue    # Resume the most recent session
-hermes -c            # Short form
+hermes gateway setup    # Interactive platform configuration
 ```
 
-## 5. Explore Further
+Connect [Telegram](/docs/user-guide/messaging/telegram), [Discord](/docs/user-guide/messaging/discord), [Slack](/docs/user-guide/messaging/slack), [WhatsApp](/docs/user-guide/messaging/whatsapp), [Signal](/docs/user-guide/messaging/signal), [Email](/docs/user-guide/messaging/email), or [Home Assistant](/docs/user-guide/messaging/homeassistant).
 
-Here are some things to try next:
+### Automation and tools
 
-### Set up a sandboxed terminal
+- `hermes tools` — tune tool access per platform
+- `hermes skills` — browse and install reusable workflows
+- Cron — only after your bot or CLI setup is stable
+
+### Sandboxed terminal
 
 For safety, run the agent in a Docker container or on a remote server:
 
@@ -152,71 +203,25 @@ hermes config set terminal.backend docker    # Docker isolation
 hermes config set terminal.backend ssh       # Remote server
 ```
 
-### Connect messaging platforms
-
-Chat with Hermes from your phone or other surfaces via Telegram, Discord, Slack, WhatsApp, Signal, Email, or Home Assistant:
-
-```bash
-hermes gateway setup    # Interactive platform configuration
-```
-
-### Add voice mode
-
-Want microphone input in the CLI or spoken replies in messaging?
+### Voice mode
 
 ```bash
 pip install "hermes-agent[voice]"
 # Includes faster-whisper for free local speech-to-text
 ```
 
-Then start Hermes and enable it inside the CLI:
+Then in the CLI: `/voice on`. Press `Ctrl+B` to record. See [Voice Mode](../user-guide/features/voice-mode.md).
 
-```text
-/voice on
-```
-
-Press `Ctrl+B` to record, or use `/voice tts` to have Hermes speak its replies. See [Voice Mode](../user-guide/features/voice-mode.md) for the full setup across CLI, Telegram, Discord, and Discord voice channels.
-
-### Schedule automated tasks
-
-```
-❯ Every morning at 9am, check Hacker News for AI news and send me a summary on Telegram.
-```
-
-The agent will set up a cron job that runs automatically via the gateway.
-
-### Browse and install skills
+### Skills
 
 ```bash
 hermes skills search kubernetes
-hermes skills search react --source skills-sh
-hermes skills search https://mintlify.com/docs --source well-known
 hermes skills install openai/skills/k8s
-hermes skills install official/security/1password
-hermes skills install skills-sh/vercel-labs/json-render/json-render-react --force
 ```
 
-Tips:
-- Use `--source skills-sh` to search the public `skills.sh` directory.
-- Use `--source well-known` with a docs/site URL to discover skills from `/.well-known/skills/index.json`.
-- Use `--force` only after reviewing a third-party skill. It can override non-dangerous policy blocks, but not a `dangerous` scan verdict.
+Or use `/skills` inside a chat session.
 
-Or use the `/skills` slash command inside chat.
-
-### Use Hermes inside an editor via ACP
-
-Hermes can also run as an ACP server for ACP-compatible editors like VS Code, Zed, and JetBrains:
-
-```bash
-pip install -e '.[acp]'
-hermes acp
-```
-
-See [ACP Editor Integration](../user-guide/features/acp.md) for setup details.
-
-### Try MCP servers
-
-Connect to external tools via the Model Context Protocol:
+### MCP servers
 
 ```yaml
 # Add to ~/.hermes/config.yaml
@@ -228,6 +233,43 @@ mcp_servers:
       GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_xxx"
 ```
 
+### Editor integration (ACP)
+
+```bash
+pip install -e '.[acp]'
+hermes acp
+```
+
+See [ACP Editor Integration](../user-guide/features/acp.md).
+
+---
+
+## Common Failure Modes
+
+These are the problems that waste the most time:
+
+| Symptom | Likely cause | Fix |
+|---|---|---|
+| Hermes opens but gives empty or broken replies | Provider auth or model selection is wrong | Run `hermes model` again and confirm provider, model, and auth |
+| Custom endpoint "works" but returns garbage | Wrong base URL, model name, or not actually OpenAI-compatible | Verify the endpoint in a separate client first |
+| Gateway starts but nobody can message it | Bot token, allowlist, or platform setup is incomplete | Re-run `hermes gateway setup` and check `hermes gateway status` |
+| `hermes --continue` can't find old session | Switched profiles or session never saved | Check `hermes sessions list` and confirm you're in the right profile |
+| Model unavailable or odd fallback behavior | Provider routing or fallback settings are too aggressive | Keep routing off until the base provider is stable |
+| `hermes doctor` flags config problems | Config values are missing or stale | Fix the config, retest a plain chat before adding features |
+
+## Recovery Toolkit
+
+When something feels off, use this order:
+
+1. `hermes doctor`
+2. `hermes model`
+3. `hermes setup`
+4. `hermes sessions list`
+5. `hermes --continue`
+6. `hermes gateway status`
+
+That sequence gets you from "broken vibes" back to a known state fast.
+
 ---
 
 ## Quick Reference
@@ -249,3 +291,6 @@ mcp_servers:
 - **[Configuration](../user-guide/configuration.md)** — Customize your setup
 - **[Messaging Gateway](../user-guide/messaging/index.md)** — Connect Telegram, Discord, Slack, WhatsApp, Signal, Email, or Home Assistant
 - **[Tools & Toolsets](../user-guide/features/tools.md)** — Explore available capabilities
+- **[AI Providers](../integrations/providers.md)** — Full provider list and setup details
+- **[Skills System](../user-guide/features/skills.md)** — Reusable workflows and knowledge
+- **[Tips & Best Practices](../guides/tips.md)** — Power user tips
diff --git a/website/docs/guides/github-pr-review-agent.md b/website/docs/guides/github-pr-review-agent.md
new file mode 100644
index 00000000000..530d8d6df05
--- /dev/null
+++ b/website/docs/guides/github-pr-review-agent.md
@@ -0,0 +1,300 @@
+---
+sidebar_position: 10
+title: "Tutorial: GitHub PR Review Agent"
+description: "Build an automated AI code reviewer that monitors your repos, reviews pull requests, and delivers feedback — hands-free"
+---
+
+# Tutorial: Build a GitHub PR Review Agent
+
+**The problem:** Your team opens PRs faster than you can review them. PRs sit for days waiting for eyeballs. Junior devs merge bugs because nobody had time to check. You spend your mornings catching up on diffs instead of building.
+
+**The solution:** An AI agent that watches your repos around the clock, reviews every new PR for bugs, security issues, and code quality, and sends you a summary — so you only spend time on PRs that actually need human judgment.
+
+**What you'll build:**
+
+```
+┌──────────────┐     ┌───────────────┐     ┌──────────────┐     ┌──────────────┐
+│  Cron Timer  │────▶│  Hermes Agent │────▶│  GitHub API  │────▶│  Review to   │
+│  (every 2h)  │     │  + gh CLI     │     │  (PR diffs)  │     │  Telegram/   │
+│              │     │  + skill      │     │              │     │  Discord/    │
+│              │     │  + memory     │     │              │     │  local file  │
+└──────────────┘     └───────────────┘     └──────────────┘     └──────────────┘
+```
+
+This guide uses **cron jobs** to poll for PRs on a schedule — no server or public endpoint needed. Works behind NAT and firewalls.
+
+:::tip Want real-time reviews instead?
+If you have a public endpoint available, check out [Automated GitHub PR Comments with Webhooks](./webhook-github-pr-review.md) — GitHub pushes events to Hermes instantly when PRs are opened or updated.
+:::
+
+---
+
+## Prerequisites
+
+- **Hermes Agent installed** — see the [Installation guide](/docs/getting-started/installation)
+- **Gateway running** for cron jobs:
+  ```bash
+  hermes gateway install   # Install as a service
+  # or
+  hermes gateway           # Run in foreground
+  ```
+- **GitHub CLI (`gh`) installed and authenticated**:
+  ```bash
+  # Install
+  brew install gh        # macOS
+  sudo apt install gh    # Ubuntu/Debian
+
+  # Authenticate
+  gh auth login
+  ```
+- **Messaging configured** (optional) — [Telegram](/docs/user-guide/messaging/telegram) or [Discord](/docs/user-guide/messaging/discord)
+
+:::tip No messaging? No problem
+Use `deliver: "local"` to save reviews to `~/.hermes/cron/output/`. Great for testing before wiring up notifications.
+:::
+
+---
+
+## Step 1: Verify the Setup
+
+Make sure Hermes can access GitHub. Start a chat:
+
+```bash
+hermes
+```
+
+Test with a simple command:
+
+```
+Run: gh pr list --repo NousResearch/hermes-agent --state open --limit 3
+```
+
+You should see a list of open PRs. If this works, you're ready.
+
+---
+
+## Step 2: Try a Manual Review
+
+Still in the chat, ask Hermes to review a real PR:
+
+```
+Review this pull request. Read the diff, check for bugs, security issues,
+and code quality. Be specific about line numbers and quote problematic code.
+
+Run: gh pr diff 3888 --repo NousResearch/hermes-agent
+```
+
+Hermes will:
+1. Execute `gh pr diff` to fetch the code changes
+2. Read through the entire diff
+3. Produce a structured review with specific findings
+
+If you're happy with the quality, time to automate it.
+
+---
+
+## Step 3: Create a Review Skill
+
+A skill gives Hermes consistent review guidelines that persist across sessions and cron runs. Without one, review quality varies.
+
+```bash
+mkdir -p ~/.hermes/skills/code-review
+```
+
+Create `~/.hermes/skills/code-review/SKILL.md`:
+
+```markdown
+---
+name: code-review
+description: Review pull requests for bugs, security issues, and code quality
+---
+
+# Code Review Guidelines
+
+When reviewing a pull request:
+
+## What to Check
+1. **Bugs** — Logic errors, off-by-one, null/undefined handling
+2. **Security** — Injection, auth bypass, secrets in code, SSRF
+3. **Performance** — N+1 queries, unbounded loops, memory leaks
+4. **Style** — Naming conventions, dead code, missing error handling
+5. **Tests** — Are changes tested? Do tests cover edge cases?
+
+## Output Format
+For each finding:
+- **File:Line** — exact location
+- **Severity** — Critical / Warning / Suggestion
+- **What's wrong** — one sentence
+- **Fix** — how to fix it
+
+## Rules
+- Be specific. Quote the problematic code.
+- Don't flag style nitpicks unless they affect readability.
+- If the PR looks good, say so. Don't invent problems.
+- End with: APPROVE / REQUEST_CHANGES / COMMENT
+```
+
+Verify it loaded — start `hermes` and you should see `code-review` in the skills list at startup.
+
+---
+
+## Step 4: Teach It Your Conventions
+
+This is what makes the reviewer actually useful. Start a session and teach Hermes your team's standards:
+
+```
+Remember: In our backend repo, we use Python with FastAPI.
+All endpoints must have type annotations and Pydantic models.
+We don't allow raw SQL — only SQLAlchemy ORM.
+Test files go in tests/ and must use pytest fixtures.
+```
+
+```
+Remember: In our frontend repo, we use TypeScript with React.
+No `any` types allowed. All components must have props interfaces.
+We use React Query for data fetching, never useEffect for API calls.
+```
+
+These memories persist forever — the reviewer will enforce your conventions without being told each time.
+
+---
+
+## Step 5: Create the Automated Cron Job
+
+Now wire it all together. Create a cron job that runs every 2 hours:
+
+```bash
+hermes cron create "0 */2 * * *" \
+  "Check for new open PRs and review them.
+
+Repos to monitor:
+- myorg/backend-api
+- myorg/frontend-app
+
+Steps:
+1. Run: gh pr list --repo REPO --state open --limit 5 --json number,title,author,createdAt
+2. For each PR created or updated in the last 4 hours:
+   - Run: gh pr diff NUMBER --repo REPO
+   - Review the diff using the code-review guidelines
+3. Format output as:
+
+## PR Reviews — today
+
+### [repo] #[number]: [title]
+**Author:** [name] | **Verdict:** APPROVE/REQUEST_CHANGES/COMMENT
+[findings]
+
+If no new PRs found, say: No new PRs to review." \
+  --name "pr-review" \
+  --deliver telegram \
+  --skill code-review
+```
+
+Verify it's scheduled:
+
+```bash
+hermes cron list
+```
+
+### Other useful schedules
+
+| Schedule | When |
+|----------|------|
+| `0 */2 * * *` | Every 2 hours |
+| `0 9,13,17 * * 1-5` | Three times a day, weekdays only |
+| `0 9 * * 1` | Weekly Monday morning roundup |
+| `30m` | Every 30 minutes (high-traffic repos) |
+
+---
+
+## Step 6: Run It On Demand
+
+Don't want to wait for the schedule? Trigger it manually:
+
+```bash
+hermes cron run pr-review
+```
+
+Or from within a chat session:
+
+```
+/cron run pr-review
+```
+
+---
+
+## Going Further
+
+### Post Reviews Directly to GitHub
+
+Instead of delivering to Telegram, have the agent comment on the PR itself:
+
+Add this to your cron prompt:
+
+```
+After reviewing, post your review:
+- For issues: gh pr review NUMBER --repo REPO --comment --body "YOUR_REVIEW"
+- For critical issues: gh pr review NUMBER --repo REPO --request-changes --body "YOUR_REVIEW"
+- For clean PRs: gh pr review NUMBER --repo REPO --approve --body "Looks good"
+```
+
+:::caution
+Make sure `gh` has a token with `repo` scope. Reviews are posted as whoever `gh` is authenticated as.
+:::
+
+### Weekly PR Dashboard
+
+Create a Monday morning overview of all your repos:
+
+```bash
+hermes cron create "0 9 * * 1" \
+  "Generate a weekly PR dashboard:
+- myorg/backend-api
+- myorg/frontend-app
+- myorg/infra
+
+For each repo show:
+1. Open PR count and oldest PR age
+2. PRs merged this week
+3. Stale PRs (older than 5 days)
+4. PRs with no reviewer assigned
+
+Format as a clean summary." \
+  --name "weekly-dashboard" \
+  --deliver telegram
+```
+
+### Multi-Repo Monitoring
+
+Scale up by adding more repos to the prompt. The agent processes them sequentially — no extra setup needed.
+
+---
+
+## Troubleshooting
+
+### "gh: command not found"
+The gateway runs in a minimal environment. Ensure `gh` is in the system PATH and restart the gateway.
+
+### Reviews are too generic
+1. Add the `code-review` skill (Step 3)
+2. Teach Hermes your conventions via memory (Step 4)
+3. The more context it has about your stack, the better the reviews
+
+### Cron job doesn't run
+```bash
+hermes gateway status    # Is the gateway running?
+hermes cron list         # Is the job enabled?
+```
+
+### Rate limits
+GitHub allows 5,000 API requests/hour for authenticated users. Each PR review uses ~3-5 requests (list + diff + optional comments). Even reviewing 100 PRs/day stays well within limits.
+
+---
+
+## What's Next?
+
+- **[Webhook-Based PR Reviews](./webhook-github-pr-review.md)** — get instant reviews when PRs are opened (requires a public endpoint)
+- **[Daily Briefing Bot](/docs/guides/daily-briefing-bot)** — combine PR reviews with your morning news digest
+- **[Build a Plugin](/docs/guides/build-a-hermes-plugin)** — wrap the review logic into a shareable plugin
+- **[Profiles](/docs/user-guide/profiles)** — run a dedicated reviewer profile with its own memory and config
+- **[Fallback Providers](/docs/user-guide/features/fallback-providers)** — ensure reviews run even when one provider is down
diff --git a/website/docs/guides/webhook-github-pr-review.md b/website/docs/guides/webhook-github-pr-review.md
new file mode 100644
index 00000000000..b0dd15ecea1
--- /dev/null
+++ b/website/docs/guides/webhook-github-pr-review.md
@@ -0,0 +1,329 @@
+---
+sidebar_position: 11
+sidebar_label: "GitHub PR Reviews via Webhook"
+title: "Automated GitHub PR Comments with Webhooks"
+description: "Connect Hermes to GitHub so it automatically fetches PR diffs, reviews code changes, and posts comments — triggered by webhooks with no manual prompting"
+---
+
+# Automated GitHub PR Comments with Webhooks
+
+This guide walks you through connecting Hermes Agent to GitHub so it automatically fetches a pull request's diff, analyzes the code changes, and posts a comment — triggered by a webhook event with no manual prompting.
+
+When a PR is opened or updated, GitHub sends a webhook POST to your Hermes instance. Hermes runs the agent with a prompt that instructs it to retrieve the diff via the `gh` CLI, and the response is posted back to the PR thread.
+
+:::tip Want a simpler setup without a public endpoint?
+If you don't have a public URL or just want to get started quickly, check out [Build a GitHub PR Review Agent](./github-pr-review-agent.md) — uses cron jobs to poll for PRs on a schedule, works behind NAT and firewalls.
+:::
+
+:::info Reference docs
+For the full webhook platform reference (all config options, delivery types, dynamic subscriptions, security model) see [Webhooks](/docs/user-guide/messaging/webhooks).
+:::
+
+:::warning Prompt injection risk
+Webhook payloads contain attacker-controlled data — PR titles, commit messages, and descriptions can contain malicious instructions. When your webhook endpoint is exposed to the internet, run the gateway in a sandboxed environment (Docker, SSH backend). See the [security section](#security-notes) below.
+:::
+
+---
+
+## Prerequisites
+
+- Hermes Agent installed and running (`hermes gateway`)
+- [`gh` CLI](https://cli.github.com/) installed and authenticated on the gateway host (`gh auth login`)
+- A publicly reachable URL for your Hermes instance (see [Local testing with ngrok](#local-testing-with-ngrok) if running locally)
+- Admin access to the GitHub repository (required to manage webhooks)
+
+---
+
+## Step 1 — Enable the webhook platform
+
+Add the following to your `~/.hermes/config.yaml`:
+
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      port: 8644          # default; change if another service occupies this port
+      rate_limit: 30      # max requests per minute per route (not a global cap)
+
+      routes:
+        github-pr-review:
+          secret: "your-webhook-secret-here"   # must match the GitHub webhook secret exactly
+          events:
+            - pull_request
+
+          # The agent is instructed to fetch the actual diff before reviewing.
+          # {number} and {repository.full_name} are resolved from the GitHub payload.
+          prompt: |
+            A pull request event was received (action: {action}).
+
+            PR #{number}: {pull_request.title}
+            Author: {pull_request.user.login}
+            Branch: {pull_request.head.ref} → {pull_request.base.ref}
+            Description: {pull_request.body}
+            URL: {pull_request.html_url}
+
+            If the action is "closed" or "labeled", stop here and do not post a comment.
+
+            Otherwise:
+            1. Run: gh pr diff {number} --repo {repository.full_name}
+            2. Review the code changes for correctness, security issues, and clarity.
+            3. Write a concise, actionable review comment and post it.
+
+          deliver: github_comment
+          deliver_extra:
+            repo: "{repository.full_name}"
+            pr_number: "{number}"
+```
+
+**Key fields:**
+
+| Field | Description |
+|---|---|
+| `secret` (route-level) | HMAC secret for this route. Falls back to `extra.secret` global if omitted. |
+| `events` | List of `X-GitHub-Event` header values to accept. Empty list = accept all. |
+| `prompt` | Template; `{field}` and `{nested.field}` resolve from the GitHub payload. |
+| `deliver` | `github_comment` posts via `gh pr comment`. `log` just writes to the gateway log. |
+| `deliver_extra.repo` | Resolves to e.g. `org/repo` from the payload. |
+| `deliver_extra.pr_number` | Resolves to the PR number from the payload. |
+
+:::note The payload does not contain code
+The GitHub webhook payload includes PR metadata (title, description, branch names, URLs) but **not the diff**. The prompt above instructs the agent to run `gh pr diff` to fetch the actual changes. The `terminal` tool is included in the default `hermes-webhook` toolset, so no extra configuration is needed.
+:::
+
+---
+
+## Step 2 — Start the gateway
+
+```bash
+hermes gateway
+```
+
+You should see:
+
+```
+[webhook] Listening on 0.0.0.0:8644 — routes: github-pr-review
+```
+
+Verify it's running:
+
+```bash
+curl http://localhost:8644/health
+# {"status": "ok", "platform": "webhook"}
+```
+
+---
+
+## Step 3 — Register the webhook on GitHub
+
+1. Go to your repository → **Settings** → **Webhooks** → **Add webhook**
+2. Fill in:
+   - **Payload URL:** `https://your-public-url.example.com/webhooks/github-pr-review`
+   - **Content type:** `application/json`
+   - **Secret:** the same value you set for `secret` in the route config
+   - **Which events?** → Select individual events → check **Pull requests**
+3. Click **Add webhook**
+
+GitHub will immediately send a `ping` event to confirm the connection. It is safely ignored — `ping` is not in your `events` list — and returns `{"status": "ignored", "event": "ping"}`. It is only logged at DEBUG level, so it won't appear in the console at the default log level.
+
+---
+
+## Step 4 — Open a test PR
+
+Create a branch, push a change, and open a PR. Within 30–90 seconds (depending on PR size and model), Hermes should post a review comment.
+
+To follow the agent's progress in real time:
+
+```bash
+tail -f "${HERMES_HOME:-$HOME/.hermes}/logs/gateway.log"
+```
+
+---
+
+## Local testing with ngrok
+
+If Hermes is running on your laptop, use [ngrok](https://ngrok.com/) to expose it:
+
+```bash
+ngrok http 8644
+```
+
+Copy the `https://...ngrok-free.app` URL and use it as your GitHub Payload URL. On the free ngrok tier the URL changes each time ngrok restarts — update your GitHub webhook each session. Paid ngrok accounts get a static domain.
+
+You can smoke-test a static route directly with `curl` — no GitHub account or real PR needed.
+
+:::tip Use `deliver: log` when testing locally
+Change `deliver: github_comment` to `deliver: log` in your config while testing. Otherwise the agent will attempt to post a comment to the fake `org/repo#99` repo in the test payload, which will fail. Switch back to `deliver: github_comment` once you're satisfied with the prompt output.
+:::
+
+```bash
+SECRET="your-webhook-secret-here"
+BODY='{"action":"opened","number":99,"pull_request":{"title":"Test PR","body":"Adds a feature.","user":{"login":"testuser"},"head":{"ref":"feat/x"},"base":{"ref":"main"},"html_url":"https://github.com/org/repo/pull/99"},"repository":{"full_name":"org/repo"}}'
+SIG=$(printf '%s' "$BODY" | openssl dgst -sha256 -hmac "$SECRET" -hex | awk '{print "sha256="$2}')
+
+curl -s -X POST http://localhost:8644/webhooks/github-pr-review \
+  -H "Content-Type: application/json" \
+  -H "X-GitHub-Event: pull_request" \
+  -H "X-Hub-Signature-256: $SIG" \
+  -d "$BODY"
+# Expected: {"status":"accepted","route":"github-pr-review","event":"pull_request","delivery_id":"..."}
+```
+
+Then watch the agent run:
+```bash
+tail -f "${HERMES_HOME:-$HOME/.hermes}/logs/gateway.log"
+```
+
+:::note
+`hermes webhook test <name>` only works for **dynamic subscriptions** created with `hermes webhook subscribe`. It does not read routes from `config.yaml`.
+:::
+
+---
+
+## Filtering to specific actions
+
+GitHub sends `pull_request` events for many actions: `opened`, `synchronize`, `reopened`, `closed`, `labeled`, etc. The `events` list filters only by the `X-GitHub-Event` header value — it cannot filter by action sub-type at the routing level.
+
+The prompt in Step 1 already handles this by instructing the agent to stop early for `closed` and `labeled` events.
+
+:::warning The agent still runs and consumes tokens
+The "stop here" instruction prevents a meaningful review, but the agent still runs to completion for every `pull_request` event regardless of action. GitHub webhooks can only filter by event type (`pull_request`, `push`, `issues`, etc.) — not by action sub-type (`opened`, `closed`, `labeled`). There is no routing-level filter for sub-actions. For high-volume repos, accept this cost or filter upstream with a GitHub Actions workflow that calls your webhook URL conditionally.
+:::
+
+> There is no Jinja2 or conditional template syntax. `{field}` and `{nested.field}` are the only substitutions supported. Anything else is passed verbatim to the agent.
+
+---
+
+## Using a skill for consistent review style
+
+Load a [Hermes skill](/docs/user-guide/features/skills) to give the agent a consistent review persona. Add `skills` to your route inside `platforms.webhook.extra.routes` in `config.yaml`:
+
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      routes:
+        github-pr-review:
+          secret: "your-webhook-secret-here"
+          events: [pull_request]
+          prompt: |
+            A pull request event was received (action: {action}).
+            PR #{number}: {pull_request.title} by {pull_request.user.login}
+            URL: {pull_request.html_url}
+
+            If the action is "closed" or "labeled", stop here and do not post a comment.
+
+            Otherwise:
+            1. Run: gh pr diff {number} --repo {repository.full_name}
+            2. Review the diff using your review guidelines.
+            3. Write a concise, actionable review comment and post it.
+          skills:
+            - review
+          deliver: github_comment
+          deliver_extra:
+            repo: "{repository.full_name}"
+            pr_number: "{number}"
+```
+
+> **Note:** Only the first skill in the list that is found is loaded. Hermes does not stack multiple skills — subsequent entries are ignored.
+
+---
+
+## Sending responses to Slack or Discord instead
+
+Replace the `deliver` and `deliver_extra` fields inside your route with your target platform:
+
+```yaml
+# Inside platforms.webhook.extra.routes.<route-name>:
+
+# Slack
+deliver: slack
+deliver_extra:
+  chat_id: "C0123456789"   # Slack channel ID (omit to use the configured home channel)
+
+# Discord
+deliver: discord
+deliver_extra:
+  chat_id: "987654321012345678"  # Discord channel ID (omit to use home channel)
+```
+
+The target platform must also be enabled and connected in the gateway. If `chat_id` is omitted, the response is sent to that platform's configured home channel.
+
+Valid `deliver` values: `log` · `github_comment` · `telegram` · `discord` · `slack` · `signal` · `sms`
+
+---
+
+## GitLab support
+
+The same adapter works with GitLab. GitLab uses `X-Gitlab-Token` for authentication (plain string match, not HMAC) — Hermes handles both automatically.
+
+For event filtering, GitLab sets `X-GitLab-Event` to values like `Merge Request Hook`, `Push Hook`, `Pipeline Hook`. Use the exact header value in `events`:
+
+```yaml
+events:
+  - Merge Request Hook
+```
+
+GitLab payload fields differ from GitHub's — e.g. `{object_attributes.title}` for the MR title and `{object_attributes.iid}` for the MR number. The easiest way to discover the full payload structure is GitLab's **Test** button in your webhook settings, combined with the **Recent Deliveries** log. Alternatively, omit `prompt` from your route config — Hermes will then pass the full payload as formatted JSON directly to the agent, and the agent's response (visible in the gateway log with `deliver: log`) will describe its structure.
+
+---
+
+## Security notes
+
+- **Never use `INSECURE_NO_AUTH`** in production — it disables signature validation entirely. It is only for local development.
+- **Rotate your webhook secret** periodically and update it in both GitHub (webhook settings) and your `config.yaml`.
+- **Rate limiting** is 30 req/min per route by default (configurable via `extra.rate_limit`). Exceeding it returns `429`.
+- **Duplicate deliveries** (webhook retries) are deduplicated via a 1-hour idempotency cache. The cache key is `X-GitHub-Delivery` if present, then `X-Request-ID`, then a millisecond timestamp. When neither delivery ID header is set, retries are **not** deduplicated.
+- **Prompt injection:** PR titles, descriptions, and commit messages are attacker-controlled. Malicious PRs could attempt to manipulate the agent's actions. Run the gateway in a sandboxed environment (Docker, VM) when exposed to the public internet.
+
+---
+
+## Troubleshooting
+
+| Symptom | Check |
+|---|---|
+| `401 Invalid signature` | Secret in config.yaml doesn't match GitHub webhook secret |
+| `404 Unknown route` | Route name in the URL doesn't match the key in `routes:` |
+| `429 Rate limit exceeded` | 30 req/min per route exceeded — common when re-delivering test events from GitHub's UI; wait a minute or raise `extra.rate_limit` |
+| No comment posted | `gh` not installed, not on PATH, or not authenticated (`gh auth login`) |
+| Agent runs but no comment | Check the gateway log — if the agent output was empty or just "SKIP", delivery is still attempted |
+| Port already in use | Change `extra.port` in config.yaml |
+| Agent runs but reviews only the PR description | The prompt isn't including the `gh pr diff` instruction — the diff is not in the webhook payload |
+| Can't see the ping event | Ignored events return `{"status":"ignored","event":"ping"}` at DEBUG log level only — check GitHub's delivery log (repo → Settings → Webhooks → your webhook → Recent Deliveries) |
+
+**GitHub's Recent Deliveries tab** (repo → Settings → Webhooks → your webhook) shows the exact request headers, payload, HTTP status, and response body for every delivery. It is the fastest way to diagnose failures without touching your server logs.
+
+---
+
+## Full config reference
+
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      host: "0.0.0.0"         # bind address (default: 0.0.0.0)
+      port: 8644               # listen port (default: 8644)
+      secret: ""               # optional global fallback secret
+      rate_limit: 30           # requests per minute per route
+      max_body_bytes: 1048576  # payload size limit in bytes (default: 1 MB)
+
+      routes:
+        <route-name>:
+          secret: "required-per-route"
+          events: []            # [] = accept all; otherwise list X-GitHub-Event values
+          prompt: ""            # {field} / {nested.field} resolved from payload
+          skills: []            # first matching skill is loaded (only one)
+          deliver: "log"        # log | github_comment | telegram | discord | slack | signal | sms
+          deliver_extra: {}     # repo + pr_number for github_comment; chat_id for others
+```
+
+---
+
+## What's Next?
+
+- **[Cron-Based PR Reviews](./github-pr-review-agent.md)** — poll for PRs on a schedule, no public endpoint needed
+- **[Webhook Reference](/docs/user-guide/messaging/webhooks)** — full config reference for the webhook platform
+- **[Build a Plugin](/docs/guides/build-a-hermes-plugin)** — package review logic into a shareable plugin
+- **[Profiles](/docs/user-guide/profiles)** — run a dedicated reviewer profile with its own memory and config
diff --git a/website/sidebars.ts b/website/sidebars.ts
index c84184c4e67..d57a71dcc2c 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -162,6 +162,8 @@ const sidebars: SidebarsConfig = {
         'guides/cron-troubleshooting',
         'guides/work-with-skills',
         'guides/delegation-patterns',
+        'guides/github-pr-review-agent',
+        'guides/webhook-github-pr-review',
         'guides/migrate-from-openclaw',
         'guides/aws-bedrock',
       ],

From c567adb58abbaa0fd1f775ec27d1754efacca83c Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 19 Apr 2026 05:35:45 -0700
Subject: [PATCH 137/143] fix(tui): session.create build thread must clean up
 if session.close races (#12555)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a user hits /new or /resume before the previous session finishes
initializing, session.close runs while the previous session.create's
_build thread is still constructing the agent.  session.close pops
_sessions[sid] and closes whatever slash_worker it finds (None at that
point — _build hasn't installed it yet), then returns.  _build keeps
running in the background, installs the slash_worker subprocess and
registers an approval-notify callback on a session dict that's now
unreachable via _sessions.  The subprocess leaks until process exit;
the notify callback lingers in the global registry.

Fix: _build now tracks what it allocates (worker, notify_registered)
and checks in its finally block whether _sessions[sid] still points
to the session it's building for.  If not, the build was orphaned by
a racing close, so clean up the subprocess and unregister the notify
ourselves.

tui_gateway/server.py:
- _build reads _sessions.get(sid) safely (returns early if already gone)
- tracks allocated worker + notify registration
- finally checks orphan status and cleans up

Tests (tests/test_tui_gateway_server.py): 2 new cases.
- test_session_create_close_race_does_not_orphan_worker: slow
  _make_agent, close mid-build, verify worker.close() and
  unregister_gateway_notify both fire from the build thread's
  cleanup path.
- test_session_create_no_race_keeps_worker_alive: regression guard —
  happy path does NOT over-eagerly clean up a live worker.

Validated: against the unpatched code, the race test fails with
'orphan worker was not cleaned up — closed_workers=[]'.  Live E2E
against the live Python environment confirmed the cleanup fires
exactly when the race happens.
---
 tests/test_tui_gateway_server.py | 159 +++++++++++++++++++++++++++++++
 tui_gateway/server.py            |  39 +++++++-
 2 files changed, 196 insertions(+), 2 deletions(-)

diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index c0f52390356..533516b95da 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -949,3 +949,162 @@ def test_mirror_slash_side_effects_allowed_when_idle(monkeypatch):
     # Should NOT contain "session busy" — the switch went through.
     assert "session busy" not in warning
     assert applied["model"]
+
+
+# ---------------------------------------------------------------------------
+# session.create / session.close race: fast /new churn must not orphan the
+# slash_worker subprocess or the global approval-notify registration.
+# ---------------------------------------------------------------------------
+
+
+def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
+    """Regression guard: if session.close runs while session.create's
+    _build thread is still constructing the agent, the build thread
+    must detect the orphan and clean up the slash_worker + notify
+    registration it's about to install.  Without the cleanup those
+    resources leak — the subprocess stays alive until atexit and the
+    notify callback lingers in the global registry."""
+    import threading
+
+    closed_workers: list[str] = []
+    unregistered_keys: list[str] = []
+
+    class _FakeWorker:
+        def __init__(self, key, model):
+            self.key = key
+            self._closed = False
+
+        def close(self):
+            self._closed = True
+            closed_workers.append(self.key)
+
+    class _FakeAgent:
+        def __init__(self):
+            self.model = "x"
+            self.provider = "openrouter"
+            self.base_url = ""
+            self.api_key = ""
+
+    # Make _build block until we release it — simulates slow agent init
+    release_build = threading.Event()
+
+    def _slow_make_agent(sid, key):
+        release_build.wait(timeout=3.0)
+        return _FakeAgent()
+
+    # Stub everything _build touches
+    monkeypatch.setattr(server, "_make_agent", _slow_make_agent)
+    monkeypatch.setattr(server, "_SlashWorker", _FakeWorker)
+    monkeypatch.setattr(server, "_get_db", lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None))
+    monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"})
+    monkeypatch.setattr(server, "_probe_credentials", lambda _a: None)
+    monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None)
+    monkeypatch.setattr(server, "_emit", lambda *a, **kw: None)
+
+    # Shim register/unregister to observe leaks
+    import tools.approval as _approval
+    monkeypatch.setattr(_approval, "register_gateway_notify",
+                        lambda key, cb: None)
+    monkeypatch.setattr(_approval, "unregister_gateway_notify",
+                        lambda key: unregistered_keys.append(key))
+    monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None)
+
+    # Start: session.create spawns _build thread, returns synchronously
+    resp = server.handle_request({
+        "id": "1", "method": "session.create", "params": {"cols": 80},
+    })
+    assert resp.get("result"), f"got error: {resp.get('error')}"
+    sid = resp["result"]["session_id"]
+
+    # Build thread is blocked in _slow_make_agent.  Close the session
+    # NOW — this pops _sessions[sid] before _build can install the
+    # worker/notify.
+    close_resp = server.handle_request({
+        "id": "2", "method": "session.close", "params": {"session_id": sid},
+    })
+    assert close_resp.get("result", {}).get("closed") is True
+
+    # At this point session.close saw slash_worker=None (not yet
+    # installed) so it didn't close anything.  Release the build thread
+    # and let it finish — it should detect the orphan and clean up the
+    # worker it just allocated + unregister the notify.
+    release_build.set()
+
+    # Give the build thread a moment to run through its finally.
+    for _ in range(100):
+        if closed_workers:
+            break
+        import time
+        time.sleep(0.02)
+
+    assert len(closed_workers) == 1, (
+        f"orphan worker was not cleaned up — closed_workers={closed_workers}"
+    )
+    # Notify may be unregistered by both session.close (unconditional)
+    # and the orphan-cleanup path; the key guarantee is that the build
+    # thread does at least one unregister call (any prior close
+    # already popped the callback; the duplicate is a no-op).
+    assert len(unregistered_keys) >= 1, (
+        f"orphan notify registration was not unregistered — "
+        f"unregistered_keys={unregistered_keys}"
+    )
+
+
+def test_session_create_no_race_keeps_worker_alive(monkeypatch):
+    """Regression guard: when session.close does NOT race, the build
+    thread must install the worker + notify normally and leave them
+    alone (no over-eager cleanup)."""
+    closed_workers: list[str] = []
+    unregistered_keys: list[str] = []
+
+    class _FakeWorker:
+        def __init__(self, key, model):
+            self.key = key
+
+        def close(self):
+            closed_workers.append(self.key)
+
+    class _FakeAgent:
+        def __init__(self):
+            self.model = "x"
+            self.provider = "openrouter"
+            self.base_url = ""
+            self.api_key = ""
+
+    monkeypatch.setattr(server, "_make_agent", lambda sid, key: _FakeAgent())
+    monkeypatch.setattr(server, "_SlashWorker", _FakeWorker)
+    monkeypatch.setattr(server, "_get_db", lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None))
+    monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"})
+    monkeypatch.setattr(server, "_probe_credentials", lambda _a: None)
+    monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None)
+    monkeypatch.setattr(server, "_emit", lambda *a, **kw: None)
+
+    import tools.approval as _approval
+    monkeypatch.setattr(_approval, "register_gateway_notify", lambda key, cb: None)
+    monkeypatch.setattr(_approval, "unregister_gateway_notify",
+                        lambda key: unregistered_keys.append(key))
+    monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None)
+
+    resp = server.handle_request({
+        "id": "1", "method": "session.create", "params": {"cols": 80},
+    })
+    sid = resp["result"]["session_id"]
+
+    # Wait for the build to finish (ready event inside session dict).
+    session = server._sessions[sid]
+    session["agent_ready"].wait(timeout=2.0)
+
+    # Build finished without a close race — nothing should have been
+    # cleaned up by the orphan check.
+    assert closed_workers == [], (
+        f"build thread closed its own worker despite no race: {closed_workers}"
+    )
+    assert unregistered_keys == [], (
+        f"build thread unregistered its own notify despite no race: {unregistered_keys}"
+    )
+
+    # Session should have the live worker installed.
+    assert session.get("slash_worker") is not None
+
+    # Cleanup
+    server._sessions.pop(sid, None)
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 00f8346191d..70dff3b17bb 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1088,7 +1088,23 @@ def _(rid, params: dict) -> dict:
     }
 
     def _build() -> None:
-        session = _sessions[sid]
+        session = _sessions.get(sid)
+        if session is None:
+            # session.close ran before the build thread got scheduled.
+            ready.set()
+            return
+
+        # Track what we allocate so we can clean up if session.close
+        # races us to the finish line.  session.close pops _sessions[sid]
+        # unconditionally and tries to close the slash_worker it finds;
+        # if _build is still mid-construction when close runs, close
+        # finds slash_worker=None / notify unregistered and returns
+        # cleanly — leaving us, the build thread, to later install the
+        # worker + notify on an orphaned session dict.  The finally
+        # block below detects the orphan and cleans up instead of
+        # leaking a subprocess and a global notify registration.
+        worker = None
+        notify_registered = False
         try:
             tokens = _set_session_context(key)
             try:
@@ -1100,13 +1116,15 @@ def _(rid, params: dict) -> dict:
             session["agent"] = agent
 
             try:
-                session["slash_worker"] = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
+                worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
+                session["slash_worker"] = worker
             except Exception:
                 pass
 
             try:
                 from tools.approval import register_gateway_notify, load_permanent_allowlist
                 register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
+                notify_registered = True
                 load_permanent_allowlist()
             except Exception:
                 pass
@@ -1122,6 +1140,23 @@ def _(rid, params: dict) -> dict:
             session["agent_error"] = str(e)
             _emit("error", sid, {"message": f"agent init failed: {e}"})
         finally:
+            # Orphan check: if session.close raced us and popped
+            # _sessions[sid] while we were building, the dict we just
+            # populated is unreachable.  Clean up the subprocess and
+            # the global notify registration ourselves — session.close
+            # couldn't see them at the time it ran.
+            if _sessions.get(sid) is not session:
+                if worker is not None:
+                    try:
+                        worker.close()
+                    except Exception:
+                        pass
+                if notify_registered:
+                    try:
+                        from tools.approval import unregister_gateway_notify
+                        unregister_gateway_notify(key)
+                    except Exception:
+                        pass
             ready.set()
 
     threading.Thread(target=_build, daemon=True).start()

From a521005fe5e5885b23c878a5c5fdc2e1b361a4da Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 19 Apr 2026 05:45:59 -0700
Subject: [PATCH 138/143] fix(discord): close two low-severity adapter races
 (#12558)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two small races in gateway/platforms/discord.py, bundled together
since they're adjacent in the adapter and both narrow in impact.

1. on_message vs _resolve_allowed_usernames (startup window)
   DISCORD_ALLOWED_USERS accepts both numeric IDs and raw usernames.
   At connect-time, _resolve_allowed_usernames walks the bot's guilds
   (fetch_members can take multiple seconds) to swap usernames for IDs.
   on_message can fire during that window; _is_allowed_user compares
   the numeric author.id against a set that may still contain raw
   usernames — legitimate users get silently rejected for a few
   seconds after every reconnect.

   Fix: on_message awaits _ready_event (with a 30s timeout) when it
   isn't already set.  on_ready sets the event after the resolve
   completes.  In steady state this is a no-op (event already set);
   only the startup / reconnect window ever blocks.

2. join_voice_channel check-and-connect
   The existing-connection check at _voice_clients.get() and the
   channel.connect() call straddled an await boundary with no lock.
   Two concurrent /voice channel invocations could both see None and
   both call connect(); discord.py raises ClientException
   ("Already connected") on the loser.  Same race class for leave
   running concurrently with _voice_timeout_handler.

   Fix: per-guild asyncio.Lock (_voice_locks dict with lazy alloc via
   _voice_lock_for).  join_voice_channel and leave_voice_channel both
   run their body under the lock.  Sequential within a guild, still
   fully concurrent across guilds.

Both: LOW severity.  The first only affects username-based allowlists
on fast-follow-up messages at startup; the second is a narrow
exception on simultaneous voice commands.  Bundled so the adapter
gets a single coherent polish pass.

Tests (tests/gateway/test_discord_race_polish.py): 2 regression cases.
- test_concurrent_joins_do_not_double_connect: two concurrent
  join_voice_channel calls on the same guild result in exactly one
  channel.connect() invocation.
- test_on_message_blocks_until_ready_event_set: asserts the expected
  wait pattern is present in on_message (source inspection, since
  full discord.py client setup isn't practical here).

Regression-guard validated: against unpatched gateway/platforms/discord.py
both tests fail.  With the fix they pass.  Full Discord suite (118
tests) green.
---
 gateway/platforms/discord.py              | 116 +++++++++++++-------
 tests/gateway/test_discord_race_polish.py | 122 ++++++++++++++++++++++
 2 files changed, 201 insertions(+), 37 deletions(-)
 create mode 100644 tests/gateway/test_discord_race_polish.py

diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 1ec831b66de..fce7ece4146 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -498,6 +498,7 @@ class DiscordAdapter(BasePlatformAdapter):
         self._allowed_role_ids: set = set()  # For DISCORD_ALLOWED_ROLES filtering
         # Voice channel state (per-guild)
         self._voice_clients: Dict[int, Any] = {}  # guild_id -> VoiceClient
+        self._voice_locks: Dict[int, asyncio.Lock] = {}  # guild_id -> serialize join/leave
         # Text batching: merge rapid successive messages (Telegram-style)
         self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6"))
         self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
@@ -636,6 +637,30 @@ class DiscordAdapter(BasePlatformAdapter):
 
             @self._client.event
             async def on_message(message: DiscordMessage):
+                # Wait for on_ready to finish resolving username-based
+                # allowlist entries.  Without this block, messages
+                # arriving between Discord's READY event and the end
+                # of _resolve_allowed_usernames compare author IDs
+                # (numeric) against a set that may still contain raw
+                # usernames (strings) from DISCORD_ALLOWED_USERS —
+                # legitimate users get silently rejected for the first
+                # few seconds after every reconnect.  The wait is a
+                # near-instant no-op in steady state (_ready_event is
+                # already set); only the startup / reconnect window
+                # ever blocks.
+                if not adapter_self._ready_event.is_set():
+                    try:
+                        await asyncio.wait_for(
+                            adapter_self._ready_event.wait(),
+                            timeout=30.0,
+                        )
+                    except asyncio.TimeoutError:
+                        logger.warning(
+                            "[%s] on_message timed out waiting for _ready_event; "
+                            "allowlist check may use pre-resolved entries",
+                            adapter_self.name,
+                        )
+
                 # Dedup: Discord RESUME replays events after reconnects (#4777)
                 if adapter_self._dedup.is_duplicate(str(message.id)):
                     return
@@ -1231,57 +1256,74 @@ class DiscordAdapter(BasePlatformAdapter):
     # Voice channel methods (join / leave / play)
     # ------------------------------------------------------------------
 
+    def _voice_lock_for(self, guild_id: int) -> "asyncio.Lock":
+        """Return the per-guild lock, creating it on first use.
+
+        Voice join/leave/move must be serialized per guild — without
+        this, two concurrent /voice channel invocations both see
+        _voice_clients.get(guild_id) return None, both call
+        channel.connect(), and discord.py raises ClientException
+        ('Already connected') on the loser.
+        """
+        lock = self._voice_locks.get(guild_id)
+        if lock is None:
+            lock = asyncio.Lock()
+            self._voice_locks[guild_id] = lock
+        return lock
+
     async def join_voice_channel(self, channel) -> bool:
         """Join a Discord voice channel. Returns True on success."""
         if not self._client or not DISCORD_AVAILABLE:
             return False
         guild_id = channel.guild.id
 
-        # Already connected in this guild?
-        existing = self._voice_clients.get(guild_id)
-        if existing and existing.is_connected():
-            if existing.channel.id == channel.id:
+        async with self._voice_lock_for(guild_id):
+            # Already connected in this guild?
+            existing = self._voice_clients.get(guild_id)
+            if existing and existing.is_connected():
+                if existing.channel.id == channel.id:
+                    self._reset_voice_timeout(guild_id)
+                    return True
+                await existing.move_to(channel)
                 self._reset_voice_timeout(guild_id)
                 return True
-            await existing.move_to(channel)
+
+            vc = await channel.connect()
+            self._voice_clients[guild_id] = vc
             self._reset_voice_timeout(guild_id)
+
+            # Start voice receiver (Phase 2: listen to users)
+            try:
+                receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
+                receiver.start()
+                self._voice_receivers[guild_id] = receiver
+                self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
+                    self._voice_listen_loop(guild_id)
+                )
+            except Exception as e:
+                logger.warning("Voice receiver failed to start: %s", e)
+
             return True
 
-        vc = await channel.connect()
-        self._voice_clients[guild_id] = vc
-        self._reset_voice_timeout(guild_id)
-
-        # Start voice receiver (Phase 2: listen to users)
-        try:
-            receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
-            receiver.start()
-            self._voice_receivers[guild_id] = receiver
-            self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
-                self._voice_listen_loop(guild_id)
-            )
-        except Exception as e:
-            logger.warning("Voice receiver failed to start: %s", e)
-
-        return True
-
     async def leave_voice_channel(self, guild_id: int) -> None:
         """Disconnect from the voice channel in a guild."""
-        # Stop voice receiver first
-        receiver = self._voice_receivers.pop(guild_id, None)
-        if receiver:
-            receiver.stop()
-        listen_task = self._voice_listen_tasks.pop(guild_id, None)
-        if listen_task:
-            listen_task.cancel()
+        async with self._voice_lock_for(guild_id):
+            # Stop voice receiver first
+            receiver = self._voice_receivers.pop(guild_id, None)
+            if receiver:
+                receiver.stop()
+            listen_task = self._voice_listen_tasks.pop(guild_id, None)
+            if listen_task:
+                listen_task.cancel()
 
-        vc = self._voice_clients.pop(guild_id, None)
-        if vc and vc.is_connected():
-            await vc.disconnect()
-        task = self._voice_timeout_tasks.pop(guild_id, None)
-        if task:
-            task.cancel()
-        self._voice_text_channels.pop(guild_id, None)
-        self._voice_sources.pop(guild_id, None)
+            vc = self._voice_clients.pop(guild_id, None)
+            if vc and vc.is_connected():
+                await vc.disconnect()
+            task = self._voice_timeout_tasks.pop(guild_id, None)
+            if task:
+                task.cancel()
+            self._voice_text_channels.pop(guild_id, None)
+            self._voice_sources.pop(guild_id, None)
 
     # Maximum seconds to wait for voice playback before giving up
     PLAYBACK_TIMEOUT = 120
diff --git a/tests/gateway/test_discord_race_polish.py b/tests/gateway/test_discord_race_polish.py
new file mode 100644
index 00000000000..a0f900aea60
--- /dev/null
+++ b/tests/gateway/test_discord_race_polish.py
@@ -0,0 +1,122 @@
+"""Regression tests for the Discord adapter race-polish fix.
+
+Two races are addressed:
+1. on_message allowlist check racing on_ready's _resolve_allowed_usernames
+   resolution window.  Username-based entries in DISCORD_ALLOWED_USERS
+   appear in the set as raw strings for several seconds after
+   connect/reconnect; author.id is always numeric, so legitimate users
+   are silently rejected until resolution finishes.
+2. join_voice_channel check-and-connect: concurrent /voice channel
+   invocations both see _voice_clients.get(guild_id) is None, both call
+   channel.connect(), second raises ClientException ('Already connected').
+"""
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+
+
+def _make_adapter():
+    """Bare DiscordAdapter for testing — object.__new__ pattern per AGENTS.md."""
+    from gateway.platforms.discord import DiscordAdapter
+
+    adapter = object.__new__(DiscordAdapter)
+    adapter._platform = Platform.DISCORD
+    adapter.config = PlatformConfig(enabled=True, token="t")
+    adapter._ready_event = asyncio.Event()
+    adapter._allowed_user_ids = set()
+    adapter._allowed_role_ids = set()
+    adapter._voice_clients = {}
+    adapter._voice_locks = {}
+    adapter._voice_receivers = {}
+    adapter._voice_listen_tasks = {}
+    adapter._voice_timeout_tasks = {}
+    adapter._voice_text_channels = {}
+    adapter._voice_sources = {}
+    adapter._client = MagicMock()
+    return adapter
+
+
+class TestJoinVoiceSerialization:
+    @pytest.mark.asyncio
+    async def test_concurrent_joins_do_not_double_connect(self):
+        """Two concurrent join_voice_channel calls on the same guild
+        must serialize through the per-guild lock — only ONE
+        channel.connect() actually fires; the second sees the
+        _voice_clients entry the first just installed."""
+        adapter = _make_adapter()
+
+        connect_count = [0]
+        connect_event = asyncio.Event()
+
+        class FakeVC:
+            def __init__(self, channel):
+                self.channel = channel
+
+            def is_connected(self):
+                return True
+
+            async def move_to(self, _channel):
+                return None
+
+            async def disconnect(self):
+                return None
+
+        async def slow_connect(self):
+            connect_count[0] += 1
+            # Widen the race window
+            await connect_event.wait()
+            return FakeVC(self)
+
+        channel = MagicMock()
+        channel.id = 111
+        channel.guild.id = 42
+        channel.connect = lambda: slow_connect(channel)
+
+        # Swap out VoiceReceiver so it doesn't try to set up real audio
+        from gateway.platforms import discord as discord_mod
+        with patch.object(discord_mod, "VoiceReceiver", MagicMock(return_value=MagicMock(start=lambda: None))):
+            with patch.object(discord_mod.asyncio, "ensure_future", lambda _c: asyncio.create_task(asyncio.sleep(0))):
+                # Fire two joins concurrently
+                t1 = asyncio.create_task(adapter.join_voice_channel(channel))
+                t2 = asyncio.create_task(adapter.join_voice_channel(channel))
+                # Let them run until they're blocked on our event
+                await asyncio.sleep(0.05)
+                # Release connect so both can finish
+                connect_event.set()
+                r1, r2 = await asyncio.gather(t1, t2)
+
+        assert connect_count[0] == 1, (
+            f"Expected exactly 1 channel.connect() call, got {connect_count[0]} — "
+            "per-guild voice lock is not serializing join_voice_channel"
+        )
+        assert r1 is True and r2 is True
+        assert 42 in adapter._voice_clients
+
+
+class TestOnMessageWaitsForReadyEvent:
+    @pytest.mark.asyncio
+    async def test_on_message_blocks_until_ready_event_set(self):
+        """A message arriving before on_ready finishes
+        _resolve_allowed_usernames must wait, not proceed with a
+        half-resolved allowlist."""
+        # This is an integration-style check — we pull out the
+        # on_message handler by asserting the source contains the
+        # expected wait pattern.  A full end-to-end test would require
+        # setting up the discord.py client machinery, which is not
+        # practical here.
+        import inspect
+        from gateway.platforms import discord as discord_mod
+
+        src = inspect.getsource(discord_mod.DiscordAdapter.connect)
+        assert "_ready_event.is_set()" in src, (
+            "on_message must gate on _ready_event so username-based "
+            "allowlist entries are resolved before the allowlist check"
+        )
+        assert "await asyncio.wait_for(" in src and "_ready_event.wait()" in src, (
+            "Expected asyncio.wait_for(_ready_event.wait(), timeout=...) "
+            "pattern in on_message"
+        )

From a6fe5d08727c9bb2486709ba3357137fbb49a321 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sun, 19 Apr 2026 07:47:15 -0500
Subject: [PATCH 139/143] fix(tui-gateway): dispatch slow RPC handlers on a
 thread pool (#12546)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The stdin-read loop in entry.py calls handle_request() inline, so the
five handlers that can block for seconds to minutes
(slash.exec, cli.exec, shell.exec, session.resume, session.branch)
freeze the dispatcher. While one is running, any inbound RPC —
notably approval.respond and session.interrupt — sits unread in the
pipe buffer and lands only after the slow handler returns.

Route only those five onto a small ThreadPoolExecutor; every other
handler stays on the main thread so the fast-path ordering is
unchanged and the audit surface stays small. write_json is already
_stdout_lock-guarded, so concurrent response writes are safe. Pool
size defaults to 4 (overridable via HERMES_TUI_RPC_POOL_WORKERS).

- add _LONG_HANDLERS set + ThreadPoolExecutor + atexit shutdown
- new dispatch(req) function: pool for long handlers, inline for rest
- _run_and_emit wraps pool work in a try/except so a misbehaving
  handler still surfaces as a JSON-RPC error instead of silently
  dying in a worker
- entry.py swaps handle_request → dispatch
- 5 new tests: sync path still inline, long handlers emit via stdout,
  fast handler not blocked behind slow one, handler exceptions map to
  error responses, non-long methods always take the sync path

Manual repro confirms the fix: shell.exec(sleep 3) + terminal.resize
sent back-to-back now returns the resize response at t=0s while the
sleep finishes independently at t=3s. Before, both landed together
at t=3s.

Fixes #12546.
---
 tests/tui_gateway/test_protocol.py | 79 ++++++++++++++++++++++++++++++
 tui_gateway/entry.py               |  4 +-
 tui_gateway/server.py              | 51 +++++++++++++++++++
 3 files changed, 132 insertions(+), 2 deletions(-)

diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py
index 926dfadf170..da154cc1680 100644
--- a/tests/tui_gateway/test_protocol.py
+++ b/tests/tui_gateway/test_protocol.py
@@ -4,6 +4,7 @@ import io
 import json
 import sys
 import threading
+import time
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -432,3 +433,81 @@ def test_command_dispatch_returns_skill_payload(server):
     assert result["type"] == "skill"
     assert result["message"] == fake_msg
     assert result["name"] == "hermes-agent-dev"
+
+
+# ── dispatch(): pool routing for long handlers (#12546) ──────────────
+
+
+def test_dispatch_runs_short_handlers_inline(server):
+    """Non-long handlers return their response synchronously from dispatch()."""
+    server._methods["fast.ping"] = lambda rid, params: server._ok(rid, {"pong": True})
+
+    resp = server.dispatch({"id": "r1", "method": "fast.ping", "params": {}})
+
+    assert resp == {"jsonrpc": "2.0", "id": "r1", "result": {"pong": True}}
+
+
+def test_dispatch_offloads_long_handlers_and_emits_via_stdout(capture):
+    """Long handlers run on the pool and write their response via write_json."""
+    server, buf = capture
+    server._methods["slash.exec"] = lambda rid, params: server._ok(rid, {"output": "hi"})
+
+    resp = server.dispatch({"id": "r2", "method": "slash.exec", "params": {}})
+    assert resp is None
+
+    for _ in range(50):
+        if buf.getvalue():
+            break
+        time.sleep(0.01)
+
+    written = json.loads(buf.getvalue())
+    assert written == {"jsonrpc": "2.0", "id": "r2", "result": {"output": "hi"}}
+
+
+def test_dispatch_long_handler_does_not_block_fast_handler(server):
+    """A slow long handler must not prevent a concurrent fast handler from completing."""
+    released = threading.Event()
+    server._methods["slash.exec"] = lambda rid, params: (released.wait(timeout=5), server._ok(rid, {"done": True}))[1]
+    server._methods["fast.ping"] = lambda rid, params: server._ok(rid, {"pong": True})
+
+    t0 = time.monotonic()
+    assert server.dispatch({"id": "slow", "method": "slash.exec", "params": {}}) is None
+
+    fast_resp = server.dispatch({"id": "fast", "method": "fast.ping", "params": {}})
+    fast_elapsed = time.monotonic() - t0
+
+    assert fast_resp["result"] == {"pong": True}
+    assert fast_elapsed < 0.5, f"fast handler blocked for {fast_elapsed:.2f}s behind slow handler"
+
+    released.set()
+
+
+def test_dispatch_long_handler_exception_produces_error_response(capture):
+    """An exception inside a pool-dispatched handler still yields a JSON-RPC error."""
+    server, buf = capture
+
+    def boom(rid, params):
+        raise RuntimeError("kaboom")
+
+    server._methods["slash.exec"] = boom
+
+    server.dispatch({"id": "r3", "method": "slash.exec", "params": {}})
+
+    for _ in range(50):
+        if buf.getvalue():
+            break
+        time.sleep(0.01)
+
+    written = json.loads(buf.getvalue())
+    assert written["id"] == "r3"
+    assert written["error"]["code"] == -32000
+    assert "kaboom" in written["error"]["message"]
+
+
+def test_dispatch_unknown_long_method_still_goes_inline(server):
+    """Method name not in _LONG_HANDLERS takes the sync path even if handler is slow."""
+    server._methods["some.method"] = lambda rid, params: server._ok(rid, {"ok": True})
+
+    resp = server.dispatch({"id": "r4", "method": "some.method", "params": {}})
+
+    assert resp["result"] == {"ok": True}
diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py
index a9667528de4..d2b82b9dab2 100644
--- a/tui_gateway/entry.py
+++ b/tui_gateway/entry.py
@@ -2,7 +2,7 @@ import json
 import signal
 import sys
 
-from tui_gateway.server import handle_request, resolve_skin, write_json
+from tui_gateway.server import dispatch, resolve_skin, write_json
 
 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
 signal.signal(signal.SIGINT, signal.SIG_IGN)
@@ -28,7 +28,7 @@ def main():
                 sys.exit(0)
             continue
 
-        resp = handle_request(req)
+        resp = dispatch(req)
         if resp is not None:
             if not write_json(resp):
                 sys.exit(0)
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 70dff3b17bb..6d0dbea659c 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1,4 +1,5 @@
 import atexit
+import concurrent.futures
 import copy
 import json
 import os
@@ -36,6 +37,29 @@ _cfg_cache: dict | None = None
 _cfg_mtime: float | None = None
 _SLASH_WORKER_TIMEOUT_S = max(5.0, float(os.environ.get("HERMES_TUI_SLASH_TIMEOUT_S", "45") or 45))
 
+# ── Async RPC dispatch (#12546) ──────────────────────────────────────
+# A handful of handlers block the dispatcher loop in entry.py for seconds
+# to minutes (slash.exec, cli.exec, shell.exec, session.resume,
+# session.branch). While they're running, inbound RPCs — notably
+# approval.respond and session.interrupt — sit unread in the stdin pipe.
+# We route only those slow handlers onto a small thread pool; everything
+# else stays on the main thread so ordering stays sane for the fast path.
+# write_json is already _stdout_lock-guarded, so concurrent response
+# writes are safe.
+_LONG_HANDLERS = frozenset({
+    "cli.exec",
+    "session.branch",
+    "session.resume",
+    "shell.exec",
+    "slash.exec",
+})
+_RPC_POOL_WORKERS = max(2, int(os.environ.get("HERMES_TUI_RPC_POOL_WORKERS", "4") or 4))
+_pool = concurrent.futures.ThreadPoolExecutor(
+    max_workers=_RPC_POOL_WORKERS,
+    thread_name_prefix="tui-rpc",
+)
+atexit.register(lambda: _pool.shutdown(wait=False, cancel_futures=True))
+
 # Reserve real stdout for JSON-RPC only; redirect Python's stdout to stderr
 # so stray print() from libraries/tools becomes harmless gateway.stderr instead
 # of corrupting the JSON protocol.
@@ -200,6 +224,33 @@ def handle_request(req: dict) -> dict | None:
     return fn(req.get("id"), req.get("params", {}))
 
 
+def _run_and_emit(req: dict) -> None:
+    """Run a handler on the RPC pool and write its response directly.
+
+    Catches any unexpected exception so a misbehaving handler can't kill
+    the worker thread silently — the caller still sees a JSON-RPC error.
+    """
+    try:
+        resp = handle_request(req)
+    except Exception as exc:
+        resp = _err(req.get("id"), -32000, f"handler error: {exc}")
+    if resp is not None:
+        write_json(resp)
+
+
+def dispatch(req: dict) -> dict | None:
+    """Route an inbound RPC — long handlers to the pool, everything else inline.
+
+    Returns the response for sync-dispatched requests so the caller
+    (entry.py) can write it. Returns None when the request has been
+    scheduled on the pool; the worker writes the response itself.
+    """
+    if req.get("method", "") in _LONG_HANDLERS:
+        _pool.submit(_run_and_emit, req)
+        return None
+    return handle_request(req)
+
+
 def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
     ready = session.get("agent_ready")
     if ready is not None and not ready.wait(timeout=timeout):

From ab6eaaff2610ec236edbbe4d7729c103b816e573 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sun, 19 Apr 2026 07:53:01 -0500
Subject: [PATCH 140/143] chore(tui-gateway): inline one-off RPC_POOL_WORKERS,
 compact _LONG_HANDLERS

---
 tui_gateway/server.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 6d0dbea659c..41d93db4427 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -46,16 +46,10 @@ _SLASH_WORKER_TIMEOUT_S = max(5.0, float(os.environ.get("HERMES_TUI_SLASH_TIMEOU
 # else stays on the main thread so ordering stays sane for the fast path.
 # write_json is already _stdout_lock-guarded, so concurrent response
 # writes are safe.
-_LONG_HANDLERS = frozenset({
-    "cli.exec",
-    "session.branch",
-    "session.resume",
-    "shell.exec",
-    "slash.exec",
-})
-_RPC_POOL_WORKERS = max(2, int(os.environ.get("HERMES_TUI_RPC_POOL_WORKERS", "4") or 4))
+_LONG_HANDLERS = frozenset({"cli.exec", "session.branch", "session.resume", "shell.exec", "slash.exec"})
+
 _pool = concurrent.futures.ThreadPoolExecutor(
-    max_workers=_RPC_POOL_WORKERS,
+    max_workers=max(2, int(os.environ.get("HERMES_TUI_RPC_POOL_WORKERS", "4") or 4)),
     thread_name_prefix="tui-rpc",
 )
 atexit.register(lambda: _pool.shutdown(wait=False, cancel_futures=True))

From 596280a40bc2807641a42625d172d97af30a841c Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sun, 19 Apr 2026 07:54:16 -0500
Subject: [PATCH 141/143] =?UTF-8?q?chore(tui):=20/clean=20pass=20=E2=80=94?=
 =?UTF-8?q?=20inline=20one-off=20locals,=20tighten=20ConfirmPrompt?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- providers.ts: drop the `dup` intermediate, fold the ternary inline
- paths.ts (fmtCwdBranch): inline `b` into the `tag` template
- prompts.tsx (ConfirmPrompt): hoist a single `lower = ch.toLowerCase()`,
  collapse the three early-return branches into two, drop the
  redundant bounds checks on arrow-key handlers (setSel is idempotent
  at 0/1), inline the `confirmLabel`/`cancelLabel` defaults at the
  use site
- modelPicker.tsx / config/env.ts / providers.test.ts: auto-formatter
  reflows picked up by `npm run fix`
- useInputHandlers.ts: drop the stray blank line that was tripping
  perfectionist/sort-imports (pre-existing lint error)
---
 ui-tui/src/__tests__/providers.test.ts |  9 +++++---
 ui-tui/src/app/useInputHandlers.ts     |  1 -
 ui-tui/src/components/modelPicker.tsx  | 10 +++++++--
 ui-tui/src/components/prompts.tsx      | 30 ++++++++------------------
 ui-tui/src/config/env.ts               |  4 +---
 ui-tui/src/domain/paths.ts             |  3 +--
 ui-tui/src/domain/providers.ts         | 12 +++--------
 7 files changed, 28 insertions(+), 41 deletions(-)

diff --git a/ui-tui/src/__tests__/providers.test.ts b/ui-tui/src/__tests__/providers.test.ts
index a46102e8933..2dfd76d0220 100644
--- a/ui-tui/src/__tests__/providers.test.ts
+++ b/ui-tui/src/__tests__/providers.test.ts
@@ -4,9 +4,12 @@ import { providerDisplayNames } from '../domain/providers.js'
 
 describe('providerDisplayNames', () => {
   it('returns bare names when all are unique', () => {
-    expect(providerDisplayNames([{ name: 'Anthropic', slug: 'anthropic' }, { name: 'OpenAI', slug: 'openai' }])).toEqual(
-      ['Anthropic', 'OpenAI']
-    )
+    expect(
+      providerDisplayNames([
+        { name: 'Anthropic', slug: 'anthropic' },
+        { name: 'OpenAI', slug: 'openai' }
+      ])
+    ).toEqual(['Anthropic', 'OpenAI'])
   })
 
   it('appends slug to every collision so the disambiguation is symmetric', () => {
diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts
index b71a1dc3924..258cf7cee3e 100644
--- a/ui-tui/src/app/useInputHandlers.ts
+++ b/ui-tui/src/app/useInputHandlers.ts
@@ -7,7 +7,6 @@ import type {
   SudoRespondResponse,
   VoiceRecordResponse
 } from '../gatewayTypes.js'
-
 import { writeOsc52Clipboard } from '../lib/osc52.js'
 
 import { getInputSelection } from './inputSelectionStore.js'
diff --git a/ui-tui/src/components/modelPicker.tsx b/ui-tui/src/components/modelPicker.tsx
index 406047bc11c..5ee19e407c7 100644
--- a/ui-tui/src/components/modelPicker.tsx
+++ b/ui-tui/src/components/modelPicker.tsx
@@ -181,7 +181,10 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
           const idx = off + i
 
           return (
-            <Text color={providerIdx === idx ? t.color.cornsilk : t.color.dim} key={providers[idx]?.slug ?? `row-${idx}`}>
+            <Text
+              color={providerIdx === idx ? t.color.cornsilk : t.color.dim}
+              key={providers[idx]?.slug ?? `row-${idx}`}
+            >
               {providerIdx === idx ? '▸ ' : '  '}
               {i + 1}. {row}
             </Text>
@@ -212,7 +215,10 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
         const idx = off + i
 
         return (
-          <Text color={modelIdx === idx ? t.color.cornsilk : t.color.dim} key={`${provider?.slug ?? 'prov'}:${idx}:${row}`}>
+          <Text
+            color={modelIdx === idx ? t.color.cornsilk : t.color.dim}
+            key={`${provider?.slug ?? 'prov'}:${idx}:${row}`}
+          >
             {modelIdx === idx ? '▸ ' : '  '}
             {i + 1}. {row}
           </Text>
diff --git a/ui-tui/src/components/prompts.tsx b/ui-tui/src/components/prompts.tsx
index cd9c3a2d1d7..f9d00dbfe31 100644
--- a/ui-tui/src/components/prompts.tsx
+++ b/ui-tui/src/components/prompts.tsx
@@ -155,31 +155,21 @@ export function ConfirmPrompt({ onCancel, onConfirm, req, t }: ConfirmPromptProp
   const [sel, setSel] = useState(0)
 
   useInput((ch, key) => {
-    if (key.escape || (key.ctrl && ch.toLowerCase() === 'c')) {
-      onCancel()
-
-      return
-    }
-
     const lower = ch.toLowerCase()
 
+    if (key.escape || (key.ctrl && lower === 'c') || lower === 'n') {
+      return onCancel()
+    }
+
     if (lower === 'y') {
-      onConfirm()
-
-      return
+      return onConfirm()
     }
 
-    if (lower === 'n') {
-      onCancel()
-
-      return
-    }
-
-    if (key.upArrow && sel > 0) {
+    if (key.upArrow) {
       setSel(0)
     }
 
-    if (key.downArrow && sel < 1) {
+    if (key.downArrow) {
       setSel(1)
     }
 
@@ -189,12 +179,10 @@ export function ConfirmPrompt({ onCancel, onConfirm, req, t }: ConfirmPromptProp
   })
 
   const accent = req.danger ? t.color.error : t.color.warn
-  const confirmLabel = req.confirmLabel ?? 'Yes'
-  const cancelLabel = req.cancelLabel ?? 'No'
 
   const rows = [
-    { color: t.color.cornsilk, label: cancelLabel },
-    { color: req.danger ? t.color.error : t.color.cornsilk, label: confirmLabel }
+    { color: t.color.cornsilk, label: req.cancelLabel ?? 'No' },
+    { color: req.danger ? t.color.error : t.color.cornsilk, label: req.confirmLabel ?? 'Yes' }
   ]
 
   return (
diff --git a/ui-tui/src/config/env.ts b/ui-tui/src/config/env.ts
index 999607dacf8..60f1e80c539 100644
--- a/ui-tui/src/config/env.ts
+++ b/ui-tui/src/config/env.ts
@@ -1,5 +1,3 @@
 export const STARTUP_RESUME_ID = (process.env.HERMES_TUI_RESUME ?? '').trim()
 export const MOUSE_TRACKING = !/^(?:1|true|yes|on)$/i.test((process.env.HERMES_TUI_DISABLE_MOUSE ?? '').trim())
-export const NO_CONFIRM_DESTRUCTIVE = /^(?:1|true|yes|on)$/i.test(
-  (process.env.HERMES_TUI_NO_CONFIRM ?? '').trim()
-)
+export const NO_CONFIRM_DESTRUCTIVE = /^(?:1|true|yes|on)$/i.test((process.env.HERMES_TUI_NO_CONFIRM ?? '').trim())
diff --git a/ui-tui/src/domain/paths.ts b/ui-tui/src/domain/paths.ts
index 6b95dcbac15..43c023b6ba9 100644
--- a/ui-tui/src/domain/paths.ts
+++ b/ui-tui/src/domain/paths.ts
@@ -10,8 +10,7 @@ export const fmtCwdBranch = (cwd: string, branch: null | string, max = 40) => {
     return shortCwd(cwd, max)
   }
 
-  const b = branch.length > 16 ? `…${branch.slice(-15)}` : branch
-  const tag = ` (${b})`
+  const tag = ` (${branch.length > 16 ? `…${branch.slice(-15)}` : branch})`
 
   return `${shortCwd(cwd, Math.max(8, max - tag.length))}${tag}`
 }
diff --git a/ui-tui/src/domain/providers.ts b/ui-tui/src/domain/providers.ts
index 02cc99b922a..83ac016ff19 100644
--- a/ui-tui/src/domain/providers.ts
+++ b/ui-tui/src/domain/providers.ts
@@ -5,13 +5,7 @@ export const providerDisplayNames = (providers: readonly { name: string; slug: s
     counts.set(p.name, (counts.get(p.name) ?? 0) + 1)
   }
 
-  return providers.map(p => {
-    const dup = (counts.get(p.name) ?? 0) > 1
-
-    if (!dup || !p.slug || p.slug === p.name) {
-      return p.name
-    }
-
-    return `${p.name} (${p.slug})`
-  })
+  return providers.map(p =>
+    (counts.get(p.name) ?? 0) > 1 && p.slug && p.slug !== p.name ? `${p.name} (${p.slug})` : p.name
+  )
 }

From 393175e60ce119f654d15dad489a8e282a532d24 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sun, 19 Apr 2026 07:58:33 -0500
Subject: [PATCH 142/143] =?UTF-8?q?chore(tui-gateway):=20inline=20=5Frun?=
 =?UTF-8?q?=5Fand=5Femit=20=E2=80=94=20one-off=20wrapper,=20belongs=20insi?=
 =?UTF-8?q?de=20dispatch?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tui_gateway/server.py | 40 ++++++++++++++++++----------------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 41d93db4427..3a48e381e8c 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -218,31 +218,27 @@ def handle_request(req: dict) -> dict | None:
     return fn(req.get("id"), req.get("params", {}))
 
 
-def _run_and_emit(req: dict) -> None:
-    """Run a handler on the RPC pool and write its response directly.
-
-    Catches any unexpected exception so a misbehaving handler can't kill
-    the worker thread silently — the caller still sees a JSON-RPC error.
-    """
-    try:
-        resp = handle_request(req)
-    except Exception as exc:
-        resp = _err(req.get("id"), -32000, f"handler error: {exc}")
-    if resp is not None:
-        write_json(resp)
-
-
 def dispatch(req: dict) -> dict | None:
-    """Route an inbound RPC — long handlers to the pool, everything else inline.
+    """Route inbound RPCs — long handlers to the pool, everything else inline.
 
-    Returns the response for sync-dispatched requests so the caller
-    (entry.py) can write it. Returns None when the request has been
-    scheduled on the pool; the worker writes the response itself.
+    Returns a response dict when handled inline. Returns None when the
+    handler was scheduled on the pool; the worker writes its own
+    response via write_json when done.
     """
-    if req.get("method", "") in _LONG_HANDLERS:
-        _pool.submit(_run_and_emit, req)
-        return None
-    return handle_request(req)
+    if req.get("method") not in _LONG_HANDLERS:
+        return handle_request(req)
+
+    def run():
+        try:
+            resp = handle_request(req)
+        except Exception as exc:
+            resp = _err(req.get("id"), -32000, f"handler error: {exc}")
+        if resp is not None:
+            write_json(resp)
+
+    _pool.submit(run)
+
+    return None
 
 
 def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:

From d32e8d2ace98a24ce22d014ddf8da44812aee37a Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sun, 19 Apr 2026 08:56:29 -0500
Subject: [PATCH 143/143] =?UTF-8?q?fix(tui):=20drain=20message=20queue=20o?=
 =?UTF-8?q?n=20every=20busy=20=E2=86=92=20false=20transition?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously the queue only drained inside the message.complete event
handler, so anything enqueued while a shell.exec (!sleep, !cmd) or a
failed agent turn was running would stay stuck forever — neither of
those paths emits message.complete. After Ctrl+C an interrupted
session would also orphan the queue because idle() flips busy=false
locally without going through message.complete.

Single source of truth: a useEffect that watches ui.busy. When the
session is settled (sid present, busy false, not editing a queue
item), pull one message and send it. Covers agent turn end,
interrupt, shell.exec completion, error recovery, and the original
startup hydration (first-sid case) all at once.

Dropped the now-redundant dequeue/sendQueued from
createGatewayEventHandler.message.complete and the accompanying
GatewayEventHandlerContext.composer field — the effect handles it.
---
 ui-tui/src/app/createGatewayEventHandler.ts | 11 -----------
 ui-tui/src/app/interfaces.ts                |  5 -----
 ui-tui/src/app/useMainApp.ts                | 15 ++++++---------
 3 files changed, 6 insertions(+), 25 deletions(-)

diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts
index 699a3794dee..8f45bb3d7eb 100644
--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@@ -46,7 +46,6 @@ const pushNote = pushUnique(6)
 const pushTool = pushUnique(8)
 
 export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: GatewayEvent) => void {
-  const { dequeue, queueEditRef, sendQueued } = ctx.composer
   const { rpc } = ctx.gateway
   const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session
   const { bellOnComplete, stdout, sys } = ctx.system
@@ -394,16 +393,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           patchUiState(state => ({ ...state, usage: { ...state.usage, ...ev.payload!.usage } }))
         }
 
-        if (queueEditRef.current !== null) {
-          return
-        }
-
-        const next = dequeue()
-
-        if (next) {
-          sendQueued(next)
-        }
-
         return
       }
 
diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts
index 353c56535be..af13e047c70 100644
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -193,11 +193,6 @@ export interface InputHandlerResult {
 }
 
 export interface GatewayEventHandlerContext {
-  composer: {
-    dequeue: () => string | undefined
-    queueEditRef: MutableRefObject<null | number>
-    sendQueued: (text: string) => void
-  }
   gateway: GatewayServices
   session: {
     STARTUP_RESUME_ID: string
diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts
index fb48badea9a..e0c18dec64f 100644
--- a/ui-tui/src/app/useMainApp.ts
+++ b/ui-tui/src/app/useMainApp.ts
@@ -380,12 +380,13 @@ export function useMainApp(gw: GatewayClient) {
     sys
   })
 
-  const prevSidRef = useRef<null | string>(null)
+  // Drain one queued message whenever the session settles (busy → false):
+  // agent turn ends, interrupt, shell.exec finishes, error recovered, or the
+  // session first comes up with pre-queued messages. Without this, shell.exec
+  // and error paths never emit message.complete, so anything enqueued while
+  // `!sleep` / a failed turn was running would stay stuck forever.
   useEffect(() => {
-    const prev = prevSidRef.current
-    prevSidRef.current = ui.sid
-
-    if (prev !== null || !ui.sid || ui.busy || composerRefs.queueEditRef.current !== null) {
+    if (!ui.sid || ui.busy || composerRefs.queueEditRef.current !== null) {
       return
     }
 
@@ -416,7 +417,6 @@ export function useMainApp(gw: GatewayClient) {
   const onEvent = useMemo(
     () =>
       createGatewayEventHandler({
-        composer: { dequeue: composerActions.dequeue, queueEditRef: composerRefs.queueEditRef, sendQueued },
         gateway,
         session: {
           STARTUP_RESUME_ID,
@@ -432,11 +432,8 @@ export function useMainApp(gw: GatewayClient) {
     [
       appendMessage,
       bellOnComplete,
-      composerActions,
-      composerRefs,
       gateway,
       panel,
-      sendQueued,
       session.newSession,
       session.resetSession,
       session.resumeById,