diff --git a/Dockerfile b/Dockerfile
index 37038233262..0d3da72eb77 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -21,26 +21,36 @@ RUN useradd -u 10000 -m -d /opt/data hermes
 COPY --chmod=0755 --from=gosu_source /gosu /usr/local/bin/
 COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/
 
-COPY . /opt/hermes
 WORKDIR /opt/hermes
 
-# Install Node dependencies and Playwright as root (--with-deps needs apt)
+# ---------- Layer-cached dependency install ----------
+# Copy only package manifests first so npm install + Playwright are cached
+# unless the lockfiles themselves change.
+COPY package.json package-lock.json ./
+COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/
+COPY web/package.json web/package-lock.json web/
+
 RUN npm install --prefer-offline --no-audit && \
     npx playwright install --with-deps chromium --only-shell && \
-    cd /opt/hermes/scripts/whatsapp-bridge && \
-    npm install --prefer-offline --no-audit && \
+    (cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \
+    (cd web && npm install --prefer-offline --no-audit) && \
     npm cache clean --force
 
-# Hand ownership to hermes user, then install Python deps in a virtualenv
-RUN chown -R hermes:hermes /opt/hermes
-USER hermes
+# ---------- Source code ----------
+# .dockerignore excludes node_modules, so the installs above survive.
+COPY --chown=hermes:hermes . .
 
+# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
+RUN cd web && npm run build
+
+# ---------- Python virtualenv ----------
+RUN chown hermes:hermes /opt/hermes
+USER hermes
 RUN uv venv && \
     uv pip install --no-cache-dir -e ".[all]"
 
-USER root
-RUN chmod +x /opt/hermes/docker/entrypoint.sh
-
+# ---------- Runtime ----------
+ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
 VOLUME [ "/opt/data" ]
 ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 8adf080e31d..19bde946ee3 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -99,11 +99,48 @@ _FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
     "kimi-for-coding": 0.6,
 }
 
+# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
+# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
+# value 0.6.  Any other value will result in an error."  The same lock applies
+# to the other k2.* models served on that endpoint.  Enumerated explicitly so
+# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
+# the standard chat API and third parties) are NOT clamped.
+# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
+_KIMI_INSTANT_MODELS: frozenset = frozenset({
+    "kimi-k2.5",
+    "kimi-k2-turbo-preview",
+    "kimi-k2-0905-preview",
+})
+_KIMI_THINKING_MODELS: frozenset = frozenset({
+    "kimi-k2-thinking",
+    "kimi-k2-thinking-turbo",
+})
+
 
 def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
-    """Return a required temperature override for models with strict contracts."""
+    """Return a required temperature override for models with strict contracts.
+
+    Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
+    the k2.5 family.  Non-thinking variants require exactly 0.6; thinking
+    variants require 1.0.  An optional ``vendor/`` prefix (e.g.
+    ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
+
+    Returns ``None`` for every other model, including ``kimi-k2-instruct*``
+    which is the separate non-coding K2 family with variable temperature.
+    """
     normalized = (model or "").strip().lower()
-    return _FIXED_TEMPERATURE_MODELS.get(normalized)
+    fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
+    if fixed is not None:
+        logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
+        return fixed
+    bare = normalized.rsplit("/", 1)[-1]
+    if bare in _KIMI_THINKING_MODELS:
+        logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
+        return 1.0
+    if bare in _KIMI_INSTANT_MODELS:
+        logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
+        return 0.6
+    return None
 
 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
 _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
@@ -1611,7 +1648,6 @@ def resolve_provider_client(
             from hermes_cli.models import copilot_default_headers
 
             headers.update(copilot_default_headers())
-
         client = OpenAI(api_key=api_key, base_url=base_url,
                         **({"default_headers": headers} if headers else {}))
 
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 34ec5091b1c..ae8c2c0bd31 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -63,6 +63,52 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
 
 
+def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
+    """Shrink long string values inside a tool-call arguments JSON blob while
+    preserving JSON validity.
+
+    The ``function.arguments`` field on a tool call is a JSON-encoded string
+    passed through to the LLM provider; downstream providers strictly
+    validate it and return a non-retryable 400 when it is not well-formed.
+    An earlier implementation sliced the raw JSON at a fixed byte offset and
+    appended ``...[truncated]`` — which routinely produced strings like::
+
+        {"path": "/foo/bar", "content": "# long markdown
+        ...[truncated]
+
+    i.e. an unterminated string and a missing closing brace. MiniMax, for
+    example, rejects this with ``invalid function arguments json string``
+    and the session gets stuck re-sending the same broken history on every
+    turn. See issue #11762 for the observed loop.
+
+    This helper parses the arguments, shrinks long string leaves inside the
+    parsed structure, and re-serialises. Non-string values (paths, ints,
+    booleans) are preserved intact. If the arguments are not valid JSON
+    to begin with — some model backends use non-JSON tool arguments — the
+    original string is returned unchanged rather than replaced with
+    something neither we nor the backend can parse.
+    """
+    try:
+        parsed = json.loads(args)
+    except (ValueError, TypeError):
+        return args
+
+    def _shrink(obj: Any) -> Any:
+        if isinstance(obj, str):
+            if len(obj) > head_chars:
+                return obj[:head_chars] + "...[truncated]"
+            return obj
+        if isinstance(obj, dict):
+            return {k: _shrink(v) for k, v in obj.items()}
+        if isinstance(obj, list):
+            return [_shrink(v) for v in obj]
+        return obj
+
+    shrunken = _shrink(parsed)
+    # ensure_ascii=False preserves CJK/emoji instead of bloating with \uXXXX
+    return json.dumps(shrunken, ensure_ascii=False)
+
+
 def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
     """Create an informative 1-line summary of a tool call + result.
 
@@ -449,6 +495,11 @@ class ContextCompressor(ContextEngine):
         # Pass 3: Truncate large tool_call arguments in assistant messages
         # outside the protected tail. write_file with 50KB content, for
         # example, survives pruning entirely without this.
+        #
+        # The shrinking is done inside the parsed JSON structure so the
+        # result remains valid JSON — otherwise downstream providers 400
+        # on every subsequent turn until the broken call falls out of
+        # the window. See ``_truncate_tool_call_args_json`` docstring.
         for i in range(prune_boundary):
             msg = result[i]
             if msg.get("role") != "assistant" or not msg.get("tool_calls"):
@@ -459,8 +510,10 @@ class ContextCompressor(ContextEngine):
                 if isinstance(tc, dict):
                     args = tc.get("function", {}).get("arguments", "")
                     if len(args) > 500:
-                        tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
-                        modified = True
+                        new_args = _truncate_tool_call_args_json(args)
+                        if new_args != args:
+                            tc = {**tc, "function": {**tc["function"], "arguments": new_args}}
+                            modified = True
                 new_tcs.append(tc)
             if modified:
                 result[i] = {**msg, "tool_calls": new_tcs}
diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index a67eee6c422..b02514e990c 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -22,8 +22,6 @@ from hermes_cli.auth import (
     _auth_store_lock,
     _codex_access_token_is_expiring,
     _decode_jwt_claims,
-    _import_codex_cli_tokens,
-    _write_codex_cli_tokens,
     _load_auth_store,
     _load_provider_state,
     _resolve_kimi_base_url,
@@ -457,39 +455,6 @@ class CredentialPool:
             logger.debug("Failed to sync from credentials file: %s", exc)
         return entry
 
-    def _sync_codex_entry_from_cli(self, entry: PooledCredential) -> PooledCredential:
-        """Sync an openai-codex pool entry from ~/.codex/auth.json if tokens differ.
-
-        OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
-        When the Codex CLI (or another Hermes profile) refreshes its token,
-        the pool entry's refresh_token becomes stale.  This method detects that
-        by comparing against ~/.codex/auth.json and syncing the fresh pair.
-        """
-        if self.provider != "openai-codex":
-            return entry
-        try:
-            cli_tokens = _import_codex_cli_tokens()
-            if not cli_tokens:
-                return entry
-            cli_refresh = cli_tokens.get("refresh_token", "")
-            cli_access = cli_tokens.get("access_token", "")
-            if cli_refresh and cli_refresh != entry.refresh_token:
-                logger.debug("Pool entry %s: syncing tokens from ~/.codex/auth.json (refresh token changed)", entry.id)
-                updated = replace(
-                    entry,
-                    access_token=cli_access,
-                    refresh_token=cli_refresh,
-                    last_status=None,
-                    last_status_at=None,
-                    last_error_code=None,
-                )
-                self._replace_entry(entry, updated)
-                self._persist()
-                return updated
-        except Exception as exc:
-            logger.debug("Failed to sync from ~/.codex/auth.json: %s", exc)
-        return entry
-
     def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
         """Write refreshed pool entry tokens back to auth.json providers.
 
@@ -585,13 +550,6 @@ class CredentialPool:
                     except Exception as wexc:
                         logger.debug("Failed to write refreshed token to credentials file: %s", wexc)
             elif self.provider == "openai-codex":
-                # Proactively sync from ~/.codex/auth.json before refresh.
-                # The Codex CLI (or another Hermes profile) may have already
-                # consumed our refresh_token.  Syncing first avoids a
-                # "refresh_token_reused" error when the CLI has a newer pair.
-                synced = self._sync_codex_entry_from_cli(entry)
-                if synced is not entry:
-                    entry = synced
                 refreshed = auth_mod.refresh_codex_oauth_pure(
                     entry.access_token,
                     entry.refresh_token,
@@ -677,45 +635,6 @@ class CredentialPool:
                     # Credentials file had a valid (non-expired) token — use it directly
                     logger.debug("Credentials file has valid token, using without refresh")
                     return synced
-            # For openai-codex: the refresh_token may have been consumed by
-            # the Codex CLI between our proactive sync and the refresh call.
-            # Re-sync and retry once.
-            if self.provider == "openai-codex":
-                synced = self._sync_codex_entry_from_cli(entry)
-                if synced.refresh_token != entry.refresh_token:
-                    logger.debug("Retrying Codex refresh with synced token from ~/.codex/auth.json")
-                    try:
-                        refreshed = auth_mod.refresh_codex_oauth_pure(
-                            synced.access_token,
-                            synced.refresh_token,
-                        )
-                        updated = replace(
-                            synced,
-                            access_token=refreshed["access_token"],
-                            refresh_token=refreshed["refresh_token"],
-                            last_refresh=refreshed.get("last_refresh"),
-                            last_status=STATUS_OK,
-                            last_status_at=None,
-                            last_error_code=None,
-                        )
-                        self._replace_entry(synced, updated)
-                        self._persist()
-                        self._sync_device_code_entry_to_auth_store(updated)
-                        try:
-                            _write_codex_cli_tokens(
-                                updated.access_token,
-                                updated.refresh_token,
-                                last_refresh=updated.last_refresh,
-                            )
-                        except Exception as wexc:
-                            logger.debug("Failed to write refreshed Codex tokens to CLI file (retry): %s", wexc)
-                        return updated
-                    except Exception as retry_exc:
-                        logger.debug("Codex retry refresh also failed: %s", retry_exc)
-                elif not self._entry_needs_refresh(synced):
-                    logger.debug("Codex CLI has valid token, using without refresh")
-                    self._sync_device_code_entry_to_auth_store(synced)
-                    return synced
             self._mark_exhausted(entry, None)
             return None
 
@@ -734,17 +653,6 @@ class CredentialPool:
         # _seed_from_singletons() on the next load_pool() sees fresh state
         # instead of re-seeding stale/consumed tokens.
         self._sync_device_code_entry_to_auth_store(updated)
-        # Write refreshed tokens back to ~/.codex/auth.json so Codex CLI
-        # and VS Code don't hit "refresh_token_reused" on their next refresh.
-        if self.provider == "openai-codex":
-            try:
-                _write_codex_cli_tokens(
-                    updated.access_token,
-                    updated.refresh_token,
-                    last_refresh=updated.last_refresh,
-                )
-            except Exception as wexc:
-                logger.debug("Failed to write refreshed Codex tokens to CLI file: %s", wexc)
         return updated
 
     def _entry_needs_refresh(self, entry: PooledCredential) -> bool:
@@ -790,16 +698,6 @@ class CredentialPool:
                 if synced is not entry:
                     entry = synced
                     cleared_any = True
-            # For openai-codex entries, sync from ~/.codex/auth.json before
-            # any status/refresh checks.  This picks up tokens refreshed by
-            # the Codex CLI or another Hermes profile.
-            if (self.provider == "openai-codex"
-                    and entry.last_status == STATUS_EXHAUSTED
-                    and entry.refresh_token):
-                synced = self._sync_codex_entry_from_cli(entry)
-                if synced is not entry:
-                    entry = synced
-                    cleared_any = True
             if entry.last_status == STATUS_EXHAUSTED:
                 exhausted_until = _exhausted_until(entry)
                 if exhausted_until is not None and now < exhausted_until:
@@ -1218,8 +1116,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
     elif provider == "openai-codex":
         # Respect user suppression — `hermes auth remove openai-codex` marks
         # the device_code source as suppressed so it won't be re-seeded from
-        # either the Hermes auth store or ~/.codex/auth.json.  Without this
-        # gate the removal is instantly undone on the next load_pool() call.
+        # the Hermes auth store.  Without this gate the removal is instantly
+        # undone on the next load_pool() call.
         codex_suppressed = False
         try:
             from hermes_cli.auth import is_source_suppressed
@@ -1231,23 +1129,12 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 
         state = _load_provider_state(auth_store, "openai-codex")
         tokens = state.get("tokens") if isinstance(state, dict) else None
-        # Fallback: import from Codex CLI (~/.codex/auth.json) if Hermes auth
-        # store has no tokens.  This mirrors resolve_codex_runtime_credentials()
-        # so that load_pool() and list_authenticated_providers() detect tokens
-        # that only exist in the Codex CLI shared file.
-        if not (isinstance(tokens, dict) and tokens.get("access_token")):
-            try:
-                from hermes_cli.auth import _import_codex_cli_tokens, _save_codex_tokens
-                cli_tokens = _import_codex_cli_tokens()
-                if cli_tokens:
-                    logger.info("Importing Codex CLI tokens into Hermes auth store.")
-                    _save_codex_tokens(cli_tokens)
-                    # Re-read state after import
-                    auth_store = _load_auth_store()
-                    state = _load_provider_state(auth_store, "openai-codex")
-                    tokens = state.get("tokens") if isinstance(state, dict) else None
-            except Exception as exc:
-                logger.debug("Codex CLI token import failed: %s", exc)
+        # Hermes owns its own Codex auth state — we do NOT auto-import from
+        # ~/.codex/auth.json at pool-load time.  OAuth refresh tokens are
+        # single-use, so sharing them with Codex CLI / VS Code causes
+        # refresh_token_reused race failures.  Users who want to adopt
+        # existing Codex CLI credentials get a one-time, explicit prompt
+        # via `hermes auth openai-codex`.
         if isinstance(tokens, dict) and tokens.get("access_token"):
             active_sources.add("device_code")
             changed |= _upsert_entry(
diff --git a/agent/models_dev.py b/agent/models_dev.py
index 42c8925ffe7..3e5c911e7ee 100644
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -420,7 +420,10 @@ def list_provider_models(provider: str) -> List[str]:
     models = _get_provider_models(provider)
     if models is None:
         return []
-    return list(models.keys())
+    return [
+        mid for mid in models.keys()
+        if not _should_hide_from_provider_catalog(provider, mid)
+    ]
 
 
 # Patterns that indicate non-agentic or noise models (TTS, embedding,
@@ -432,6 +435,43 @@ _NOISE_PATTERNS: re.Pattern = re.compile(
     re.IGNORECASE,
 )
 
+# Google's live Gemini catalogs currently include a mix of stale slugs and
+# Gemma models whose TPM quotas are too small for normal Hermes agent traffic.
+# Keep capability metadata available for direct/manual use, but hide these from
+# the Gemini model catalogs we surface in setup and model selection.
+_GOOGLE_HIDDEN_MODELS = frozenset({
+    # Low-TPM Gemma models that trip Google input-token quota walls under
+    # agent-style traffic despite advertising large context windows.
+    "gemma-4-31b-it",
+    "gemma-4-26b-it",
+    "gemma-4-26b-a4b-it",
+    "gemma-3-1b",
+    "gemma-3-1b-it",
+    "gemma-3-2b",
+    "gemma-3-2b-it",
+    "gemma-3-4b",
+    "gemma-3-4b-it",
+    "gemma-3-12b",
+    "gemma-3-12b-it",
+    "gemma-3-27b",
+    "gemma-3-27b-it",
+    # Stale/retired Google slugs that still surface through models.dev-backed
+    # Gemini selection but 404 on the current Google endpoints.
+    "gemini-1.5-flash",
+    "gemini-1.5-pro",
+    "gemini-1.5-flash-8b",
+    "gemini-2.0-flash",
+    "gemini-2.0-flash-lite",
+})
+
+
+def _should_hide_from_provider_catalog(provider: str, model_id: str) -> bool:
+    provider_lower = (provider or "").strip().lower()
+    model_lower = (model_id or "").strip().lower()
+    if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_HIDDEN_MODELS:
+        return True
+    return False
+
 
 def list_agentic_models(provider: str) -> List[str]:
     """Return model IDs suitable for agentic use from models.dev.
@@ -448,6 +488,8 @@ def list_agentic_models(provider: str) -> List[str]:
     for mid, entry in models.items():
         if not isinstance(entry, dict):
             continue
+        if _should_hide_from_provider_catalog(provider, mid):
+            continue
         if not entry.get("tool_call", False):
             continue
         if _NOISE_PATTERNS.search(mid):
@@ -582,5 +624,3 @@ def get_model_info(
             return _parse_model_info(mid, mdata, mdev_id)
 
     return None
-
-
diff --git a/cli.py b/cli.py
index c0c17babc4c..0e5e9ff6603 100644
--- a/cli.py
+++ b/cli.py
@@ -83,17 +83,51 @@ load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
 _REASONING_TAGS = (
     "REASONING_SCRATCHPAD",
     "think",
-    "reasoning",
-    "THINKING",
     "thinking",
+    "reasoning",
+    "thought",
 )
 
 
 def _strip_reasoning_tags(text: str) -> str:
+    """Remove reasoning/thinking blocks from displayed text.
+
+    Handles every case:
+      * Closed pairs ``<tag>…</tag>`` (case-insensitive, multi-line).
+      * Unterminated open tags that run to end-of-text (e.g. truncated
+        generations on NIM/MiniMax where the close tag is dropped).
+      * Stray orphan close tags (``stuff</think>answer``) left behind by
+        partial-content dumps.
+
+    Covers the variants emitted by reasoning models today: ``<think>``,
+    ``<thinking>``, ``<reasoning>``, ``<REASONING_SCRATCHPAD>``, and
+    ``<thought>`` (Gemma 4).  Must stay in sync with
+    ``run_agent.py::_strip_think_blocks`` and the stream consumer's
+    ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples.
+    """
     cleaned = text
     for tag in _REASONING_TAGS:
-        cleaned = re.sub(rf"<{tag}>.*?</{tag}>\s*", "", cleaned, flags=re.DOTALL)
-        cleaned = re.sub(rf"<{tag}>.*$", "", cleaned, flags=re.DOTALL)
+        # Closed pair — case-insensitive so <THINK>…</THINK> is handled too.
+        cleaned = re.sub(
+            rf"<{tag}>.*?</{tag}>\s*",
+            "",
+            cleaned,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+        # Unterminated open tag — strip from the tag to end of text.
+        cleaned = re.sub(
+            rf"<{tag}>.*$",
+            "",
+            cleaned,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+        # Stray orphan close tag left behind by partial dumps.
+        cleaned = re.sub(
+            rf"</{tag}>\s*",
+            "",
+            cleaned,
+            flags=re.IGNORECASE,
+        )
     return cleaned.strip()
 
 
@@ -1776,7 +1810,7 @@ class HermesCLI:
             mcp_names = set((CLI_CONFIG.get("mcp_servers") or {}).keys())
             invalid = [t for t in toolsets if not validate_toolset(t) and t not in mcp_names]
             if invalid:
-                self.console.print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]")
+                self._console_print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]")
         
         # Filesystem checkpoints: CLI flag > config
         cp_cfg = CLI_CONFIG.get("checkpoints", {})
@@ -2068,20 +2102,35 @@ class HermesCLI:
 
     def _spinner_widget_height(self, width: Optional[int] = None) -> int:
         """Return the visible height for the spinner/status text line above the status bar."""
-        if not getattr(self, "_spinner_text", ""):
+        spinner_line = self._render_spinner_text()
+        if not spinner_line:
             return 0
         if self._use_minimal_tui_chrome(width=width):
             return 0
-        # Compute how many lines the spinner text needs when wrapped.
-        # The rendered text is "  {emoji} {label}  ({elapsed})" — about
-        # len(_spinner_text) + 16 chars for indent + timer suffix.
         width = width or self._get_tui_terminal_width()
         if width and width > 10:
             import math
-            text_len = len(self._spinner_text) + 16  # indent + timer
-            return max(1, math.ceil(text_len / width))
+            text_width = self._status_bar_display_width(spinner_line)
+            return max(1, math.ceil(text_width / width))
         return 1
 
+    def _render_spinner_text(self) -> str:
+        """Return the live spinner/status text exactly as rendered in the TUI."""
+        txt = getattr(self, "_spinner_text", "")
+        if not txt:
+            return ""
+        t0 = getattr(self, "_tool_start_time", 0) or 0
+        if t0 > 0:
+            import time as _time
+            elapsed = _time.monotonic() - t0
+            if elapsed >= 60:
+                _m, _s = int(elapsed // 60), int(elapsed % 60)
+                elapsed_str = f"{_m}m {_s}s"
+            else:
+                elapsed_str = f"{elapsed:.1f}s"
+            return f"  {txt}  ({elapsed_str})"
+        return f"  {txt}"
+
     def _get_voice_status_fragments(self, width: Optional[int] = None):
         """Return the voice status bar fragments for the interactive TUI."""
         width = width or self._get_tui_terminal_width()
@@ -2212,7 +2261,7 @@ class HermesCLI:
                 normalized_model = normalize_model_for_provider(current_model, resolved_provider)
                 if normalized_model and normalized_model != current_model:
                     if not self._model_is_default:
-                        self.console.print(
+                        self._console_print(
                             f"[yellow]⚠️  Normalized model '{current_model}' to '{normalized_model}' for {resolved_provider}.[/]"
                         )
                     self.model = normalized_model
@@ -2228,7 +2277,7 @@ class HermesCLI:
                 canonical = normalize_copilot_model_id(current_model, api_key=self.api_key)
                 if canonical and canonical != current_model:
                     if not self._model_is_default:
-                        self.console.print(
+                        self._console_print(
                             f"[yellow]⚠️  Normalized Copilot model '{current_model}' to '{canonical}'.[/]"
                         )
                     self.model = canonical
@@ -2250,7 +2299,7 @@ class HermesCLI:
                 canonical = normalize_opencode_model_id(resolved_provider, current_model)
                 if canonical and canonical != current_model:
                     if not self._model_is_default:
-                        self.console.print(
+                        self._console_print(
                             f"[yellow]⚠️  Stripped provider prefix from '{current_model}'; using '{canonical}' for {resolved_provider}.[/]"
                         )
                     self.model = canonical
@@ -2272,7 +2321,7 @@ class HermesCLI:
         if "/" in current_model:
             slug = current_model.split("/", 1)[1]
             if not self._model_is_default:
-                self.console.print(
+                self._console_print(
                     f"[yellow]⚠️  Stripped provider prefix from '{current_model}'; "
                     f"using '{slug}' for OpenAI Codex.[/]"
                 )
@@ -3021,7 +3070,7 @@ class HermesCLI:
         use_compact = self.compact or term_width < 80
         
         if use_compact:
-            self.console.print(_build_compact_banner())
+            self._console_print(_build_compact_banner())
             self._show_status()
         else:
             # Get tools for display
@@ -3046,25 +3095,25 @@ class HermesCLI:
 
         # Warn about very low context lengths (common with local servers)
         if ctx_len and ctx_len <= 8192:
-            self.console.print()
-            self.console.print(
+            self._console_print()
+            self._console_print(
                 f"[yellow]⚠️  Context length is only {ctx_len:,} tokens — "
                 f"this is likely too low for agent use with tools.[/]"
             )
-            self.console.print(
+            self._console_print(
                 "[dim]   Hermes needs 16k–32k minimum. Tool schemas + system prompt alone use ~4k–8k.[/]"
             )
             base_url = getattr(self, "base_url", "") or ""
             if "11434" in base_url or "ollama" in base_url.lower():
-                self.console.print(
+                self._console_print(
                     "[dim]   Ollama fix: OLLAMA_CONTEXT_LENGTH=32768 ollama serve[/]"
                 )
             elif "1234" in base_url:
-                self.console.print(
+                self._console_print(
                     "[dim]   LM Studio fix: Set context length in model settings → reload model[/]"
                 )
             else:
-                self.console.print(
+                self._console_print(
                     "[dim]   Fix: Set model.context_length in config.yaml, or increase your server's context setting[/]"
                 )
 
@@ -3073,20 +3122,20 @@ class HermesCLI:
 
         model_name = getattr(self, "model", "") or ""
         if is_nous_hermes_non_agentic(model_name):
-            self.console.print()
-            self.console.print(
+            self._console_print()
+            self._console_print(
                 "[bold yellow]⚠  Nous Research Hermes 3 & 4 models are NOT agentic and are not "
                 "designed for use with Hermes Agent.[/]"
             )
-            self.console.print(
+            self._console_print(
                 "[dim]   They lack tool-calling capabilities required for agent workflows. "
                 "Consider using an agentic model (Claude, GPT, Gemini, DeepSeek, etc.).[/]"
             )
-            self.console.print(
+            self._console_print(
                 "[dim]   Switch with: /model sonnet  or  /model gpt5[/]"
             )
 
-        self.console.print()
+        self._console_print()
 
     def _preload_resumed_session(self) -> bool:
         """Load a resumed session's history from the DB early (before first chat).
@@ -3104,10 +3153,10 @@ class HermesCLI:
 
         session_meta = self._session_db.get_session(self.session_id)
         if not session_meta:
-            self.console.print(
+            self._console_print(
                 f"[bold red]Session not found: {self.session_id}[/]"
             )
-            self.console.print(
+            self._console_print(
                 "[dim]Use a session ID from a previous CLI run "
                 "(hermes sessions list).[/]"
             )
@@ -3122,7 +3171,7 @@ class HermesCLI:
             if session_meta.get("title"):
                 title_part = f' "{session_meta["title"]}"'
             accent_color = _accent_hex()
-            self.console.print(
+            self._console_print(
                 f"[{accent_color}]↻ Resumed session [bold]{self.session_id}[/bold]"
                 f"{title_part} "
                 f"({msg_count} user message{'s' if msg_count != 1 else ''}, "
@@ -3130,7 +3179,7 @@ class HermesCLI:
             )
         else:
             accent_color = _accent_hex()
-            self.console.print(
+            self._console_print(
                 f"[{accent_color}]Session {self.session_id} found but has no "
                 f"messages. Starting fresh.[/]"
             )
@@ -3305,7 +3354,7 @@ class HermesCLI:
             padding=(0, 1),
             style=_history_text_c,
         )
-        self.console.print(panel)
+        self._console_print(panel)
 
     def _try_attach_clipboard_image(self) -> bool:
         """Check clipboard for an image and attach it if found.
@@ -3741,14 +3790,14 @@ class HermesCLI:
             api_key_missing = [u for u in unavailable if u["missing_vars"]]
             
             if api_key_missing:
-                self.console.print()
-                self.console.print("[yellow]⚠️  Some tools disabled (missing API keys):[/]")
+                self._console_print()
+                self._console_print("[yellow]⚠️  Some tools disabled (missing API keys):[/]")
                 for item in api_key_missing:
                     tools_str = ", ".join(item["tools"][:2])  # Show first 2 tools
                     if len(item["tools"]) > 2:
                         tools_str += f", +{len(item['tools'])-2} more"
-                    self.console.print(f"   [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]")
-                self.console.print("[dim]   Run 'hermes setup' to configure[/]")
+                    self._console_print(f"   [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]")
+                self._console_print("[dim]   Run 'hermes setup' to configure[/]")
         except Exception:
             pass  # Don't crash on import errors
     
@@ -3786,7 +3835,7 @@ class HermesCLI:
         if self._provider_source:
             provider_info += f" [dim {separator_color}]·[/] [dim]auth: {self._provider_source}[/]"
 
-        self.console.print(
+        self._console_print(
             f"  {api_indicator} [{accent_color}]{model_short}[/] "
             f"[dim {separator_color}]·[/] [bold {label_color}]{tool_count} tools[/]"
             f"{toolsets_info}{provider_info}"
@@ -3843,7 +3892,7 @@ class HermesCLI:
             f"Tokens: {total_tokens:,}",
             f"Agent Running: {'Yes' if is_running else 'No'}",
         ])
-        self.console.print("\n".join(lines), highlight=False, markup=False)
+        self._console_print("\n".join(lines), highlight=False, markup=False)
     
     def _fast_command_available(self) -> bool:
         try:
@@ -5041,8 +5090,15 @@ class HermesCLI:
 
         print("  To change model or provider, use: hermes model")
 
+    def _output_console(self):
+        """Use prompt_toolkit-safe Rich rendering once the TUI is live."""
+        if getattr(self, "_app", None):
+            return ChatConsole()
+        return self.console
 
-    
+    def _console_print(self, *args, **kwargs):
+        """Print through the active command-safe console."""
+        self._output_console().print(*args, **kwargs)
 
     @staticmethod
     def _resolve_personality_prompt(value) -> str:
@@ -5062,14 +5118,14 @@ class HermesCLI:
             from agent.google_oauth import get_valid_access_token, GoogleOAuthError, load_credentials
             from agent.google_code_assist import retrieve_user_quota, CodeAssistError
         except ImportError as exc:
-            self.console.print(f"  [red]Gemini modules unavailable: {exc}[/]")
+            self._console_print(f"  [red]Gemini modules unavailable: {exc}[/]")
             return
 
         try:
             access_token = get_valid_access_token()
         except GoogleOAuthError as exc:
-            self.console.print(f"  [yellow]{exc}[/]")
-            self.console.print("  Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.")
+            self._console_print(f"  [yellow]{exc}[/]")
+            self._console_print("  Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.")
             return
 
         creds = load_credentials()
@@ -5078,18 +5134,18 @@ class HermesCLI:
         try:
             buckets = retrieve_user_quota(access_token, project_id=project_id)
         except CodeAssistError as exc:
-            self.console.print(f"  [red]Quota lookup failed:[/] {exc}")
+            self._console_print(f"  [red]Quota lookup failed:[/] {exc}")
             return
 
         if not buckets:
-            self.console.print("  [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]")
+            self._console_print("  [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]")
             return
 
         # Sort for stable display, group by model
         buckets.sort(key=lambda b: (b.model_id, b.token_type))
-        self.console.print()
-        self.console.print(f"  [bold]Gemini Code Assist quota[/]  (project: {project_id or '(auto / free-tier)'})")
-        self.console.print()
+        self._console_print()
+        self._console_print(f"  [bold]Gemini Code Assist quota[/]  (project: {project_id or '(auto / free-tier)'})")
+        self._console_print()
         for b in buckets:
             pct = max(0.0, min(1.0, b.remaining_fraction))
             width = 20
@@ -5099,8 +5155,8 @@ class HermesCLI:
             header = b.model_id
             if b.token_type:
                 header += f" [{b.token_type}]"
-            self.console.print(f"    {header:40s}  {bar}  {pct_str}")
-        self.console.print()
+            self._console_print(f"    {header:40s}  {bar}  {pct_str}")
+        self._console_print()
 
     def _handle_personality_command(self, cmd: str):
         """Handle the /personality command to set predefined personalities."""
@@ -5231,7 +5287,7 @@ class HermesCLI:
             print("    /cron list")
             print('    /cron add "every 2h" "Check server status" [--skill blogwatcher]')
             print('    /cron edit <job_id> --schedule "every 4h" --prompt "New task"')
-            print("    /cron edit <job_id> --skill blogwatcher --skill find-nearby")
+            print("    /cron edit <job_id> --skill blogwatcher --skill maps")
             print("    /cron edit <job_id> --remove-skill blogwatcher")
             print("    /cron edit <job_id> --clear-skills")
             print("    /cron pause <job_id>")
@@ -5548,7 +5604,7 @@ class HermesCLI:
                         _tip_color = get_active_skin().get_color("banner_dim", "#B8860B")
                     except Exception:
                         _tip_color = "#B8860B"
-                    self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
+                    self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
                 except Exception:
                     pass
         elif canonical == "history":
@@ -5642,7 +5698,7 @@ class HermesCLI:
         elif canonical == "statusbar":
             self._status_bar_visible = not self._status_bar_visible
             state = "visible" if self._status_bar_visible else "hidden"
-            self.console.print(f"  Status bar {state}")
+            self._console_print(f"  Status bar {state}")
         elif canonical == "verbose":
             self._toggle_verbose()
         elif canonical == "yolo":
@@ -5720,6 +5776,30 @@ class HermesCLI:
                     _cprint(f"  Queued for the next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}")
                 else:
                     _cprint(f"  Queued: {payload[:80]}{'...' if len(payload) > 80 else ''}")
+        elif canonical == "steer":
+            # Inject a message after the next tool call without interrupting.
+            # If the agent is actively running, push the text into the agent's
+            # pending_steer slot — the drain hook in _execute_tool_calls_*
+            # will append it to the next tool result's content. If no agent
+            # is running, fall back to queue semantics (same as /queue).
+            parts = cmd_original.split(None, 1)
+            payload = parts[1].strip() if len(parts) > 1 else ""
+            if not payload:
+                _cprint("  Usage: /steer <prompt>")
+            elif self._agent_running and self.agent is not None and hasattr(self.agent, "steer"):
+                try:
+                    accepted = self.agent.steer(payload)
+                except Exception as exc:
+                    _cprint(f"  Steer failed: {exc}")
+                else:
+                    if accepted:
+                        _cprint(f"  ⏩ Steer queued — arrives after the next tool call: {payload[:80]}{'...' if len(payload) > 80 else ''}")
+                    else:
+                        _cprint("  Steer rejected (empty payload).")
+            else:
+                # No active run — treat as a normal next-turn message.
+                self._pending_input.put(payload)
+                _cprint(f"  No agent running; queued as next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}")
         elif canonical == "skin":
             self._handle_skin_command(cmd_original)
         elif canonical == "voice":
@@ -5741,15 +5821,15 @@ class HermesCLI:
                             )
                             output = result.stdout.strip() or result.stderr.strip()
                             if output:
-                                self.console.print(_rich_text_from_ansi(output))
+                                self._console_print(_rich_text_from_ansi(output))
                             else:
-                                self.console.print("[dim]Command returned no output[/]")
+                                self._console_print("[dim]Command returned no output[/]")
                         except subprocess.TimeoutExpired:
-                            self.console.print("[bold red]Quick command timed out (30s)[/]")
+                            self._console_print("[bold red]Quick command timed out (30s)[/]")
                         except Exception as e:
-                            self.console.print(f"[bold red]Quick command error: {e}[/]")
+                            self._console_print(f"[bold red]Quick command error: {e}[/]")
                     else:
-                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
+                        self._console_print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
                 elif qcmd.get("type") == "alias":
                     target = qcmd.get("target", "").strip()
                     if target:
@@ -5758,9 +5838,9 @@ class HermesCLI:
                         aliased_command = f"{target} {user_args}".strip()
                         return self.process_command(aliased_command)
                     else:
-                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
+                        self._console_print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
                 else:
-                    self.console.print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
+                    self._console_print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
             # Check for plugin-registered slash commands
             elif base_cmd.lstrip("/") in _get_plugin_cmd_handler_names():
                 from hermes_cli.plugins import get_plugin_command_handler
@@ -7017,8 +7097,7 @@ class HermesCLI:
                 )
             raise RuntimeError(
                 "Voice mode requires sounddevice and numpy.\n"
-                "Install with: pip install sounddevice numpy\n"
-                "Or: pip install hermes-agent[voice]"
+                f"Install with: {sys.executable} -m pip install sounddevice numpy"
             )
         if not reqs.get("stt_available", reqs.get("stt_key_set")):
             raise RuntimeError(
@@ -7294,8 +7373,7 @@ class HermesCLI:
                     _cprint(f"  {_DIM}Then install/update the Termux:API Android app for microphone capture{_RST}")
                     _cprint(f"  {_BOLD}Option 2: pkg install python-numpy portaudio && python -m pip install sounddevice{_RST}")
                 else:
-                    _cprint(f"\n  {_BOLD}Install: pip install {' '.join(reqs['missing_packages'])}{_RST}")
-                    _cprint(f"  {_DIM}Or: pip install hermes-agent[voice]{_RST}")
+                    _cprint(f"\n  {_BOLD}Install: {sys.executable} -m pip install {' '.join(reqs['missing_packages'])}{_RST}")
             return
 
         with self._voice_lock:
@@ -8246,7 +8324,15 @@ class HermesCLI:
                 else:
                     print(f"\n⚡ Sending after interrupt: '{preview}'")
                 self._pending_input.put(combined)
-            
+
+            # If a /steer was left over (agent finished before another tool
+            # batch could absorb it), deliver it as the next user turn.
+            _leftover_steer = result.get("pending_steer") if result else None
+            if _leftover_steer and hasattr(self, '_pending_input'):
+                preview = _leftover_steer[:60] + ("..." if len(_leftover_steer) > 60 else "")
+                print(f"\n⏩ Delivering leftover /steer as next turn: '{preview}'")
+                self._pending_input.put(_leftover_steer)
+
             return response
             
         except Exception as e:
@@ -8524,7 +8610,7 @@ class HermesCLI:
         except Exception:
             _welcome_text = "Welcome to Hermes Agent! Type your message or /help for commands."
             _welcome_color = "#FFF8DC"
-        self.console.print(f"[{_welcome_color}]{_welcome_text}[/]")
+        self._console_print(f"[{_welcome_color}]{_welcome_text}[/]")
         # Show a random tip to help users discover features
         try:
             from hermes_cli.tips import get_random_tip
@@ -8533,16 +8619,16 @@ class HermesCLI:
                 _tip_color = _welcome_skin.get_color("banner_dim", "#B8860B")
             except Exception:
                 _tip_color = "#B8860B"
-            self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
+            self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
         except Exception:
             pass  # Tips are non-critical — never break startup
         if self.preloaded_skills and not self._startup_skills_line_shown:
             skills_label = ", ".join(self.preloaded_skills)
-            self.console.print(
+            self._console_print(
                 f"[bold {_accent_hex()}]Activated skills:[/] {skills_label}"
             )
             self._startup_skills_line_shown = True
-        self.console.print()
+        self._console_print()
         
         # State for async operation
         self._agent_running = False
@@ -9345,21 +9431,10 @@ class HermesCLI:
             return cli_ref._agent_spacer_height()
 
         def get_spinner_text():
-            txt = cli_ref._spinner_text
-            if not txt:
+            spinner_line = cli_ref._render_spinner_text()
+            if not spinner_line:
                 return []
-            # Append live elapsed timer when a tool is running
-            t0 = cli_ref._tool_start_time
-            if t0 > 0:
-                import time as _time
-                elapsed = _time.monotonic() - t0
-                if elapsed >= 60:
-                    _m, _s = int(elapsed // 60), int(elapsed % 60)
-                    elapsed_str = f"{_m}m {_s}s"
-                else:
-                    elapsed_str = f"{elapsed:.1f}s"
-                return [('class:hint', f'  {txt}  ({elapsed_str})')]
-            return [('class:hint', f'  {txt}')]
+            return [('class:hint', spinner_line)]
 
         def get_spinner_height():
             return cli_ref._spinner_widget_height()
@@ -10067,8 +10142,36 @@ class HermesCLI:
         
         # Register signal handlers for graceful shutdown on SSH disconnect / SIGTERM
         def _signal_handler(signum, frame):
-            """Handle SIGHUP/SIGTERM by triggering graceful cleanup."""
+            """Handle SIGHUP/SIGTERM by triggering graceful cleanup.
+
+            Calls ``self.agent.interrupt()`` first so the agent daemon
+            thread's poll loop sees the per-thread interrupt and kills the
+            tool's subprocess group via ``_kill_process`` (os.killpg).
+            Without this, the main thread dies from KeyboardInterrupt and
+            the daemon thread is killed with it — before it can run one
+            more poll iteration to clean up the subprocess, which was
+            spawned with ``os.setsid`` and therefore survives as an orphan
+            with PPID=1.
+
+            Grace window (``HERMES_SIGTERM_GRACE``, default 1.5 s) gives
+            the daemon time to: detect the interrupt (next 200 ms poll) →
+            call _kill_process (SIGTERM + 1 s wait + SIGKILL if needed) →
+            return from _wait_for_process.  ``time.sleep`` releases the
+            GIL so the daemon actually runs during the window.
+            """
             logger.debug("Received signal %s, triggering graceful shutdown", signum)
+            try:
+                if getattr(self, "agent", None) and getattr(self, "_agent_running", False):
+                    self.agent.interrupt(f"received signal {signum}")
+                    import time as _t
+                    try:
+                        _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
+                    except (TypeError, ValueError):
+                        _grace = 1.5
+                    if _grace > 0:
+                        _t.sleep(_grace)
+            except Exception:
+                pass  # never block signal handling
             raise KeyboardInterrupt()
         
         try:
@@ -10371,6 +10474,45 @@ def main(
     
     # Register cleanup for single-query mode (interactive mode registers in run())
     atexit.register(_run_cleanup)
+
+    # Also install signal handlers in single-query / `-q` mode.  Interactive
+    # mode registers its own inside HermesCLI.run(), but `-q` runs
+    # cli.agent.run_conversation() below and AIAgent spawns worker threads
+    # for tools — so when SIGTERM arrives on the main thread, raising
+    # KeyboardInterrupt only unwinds the main thread, not the worker
+    # running _wait_for_process.  Python then exits, the child subprocess
+    # (spawned with os.setsid, its own process group) is reparented to
+    # init and keeps running as an orphan.
+    #
+    # Fix: route SIGTERM/SIGHUP through agent.interrupt() which sets the
+    # per-thread interrupt flag the worker's poll loop checks every 200 ms.
+    # Give the worker a grace window to call _kill_process (SIGTERM to the
+    # process group, then SIGKILL after 1 s), then raise KeyboardInterrupt
+    # so main unwinds normally.  HERMES_SIGTERM_GRACE overrides the 1.5 s
+    # default for debugging.
+    def _signal_handler_q(signum, frame):
+        logger.debug("Received signal %s in single-query mode", signum)
+        try:
+            _agent = getattr(cli, "agent", None)
+            if _agent is not None:
+                _agent.interrupt(f"received signal {signum}")
+                import time as _t
+                try:
+                    _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
+                except (TypeError, ValueError):
+                    _grace = 1.5
+                if _grace > 0:
+                    _t.sleep(_grace)
+        except Exception:
+            pass  # never block signal handling
+        raise KeyboardInterrupt()
+    try:
+        import signal as _signal
+        _signal.signal(_signal.SIGTERM, _signal_handler_q)
+        if hasattr(_signal, "SIGHUP"):
+            _signal.signal(_signal.SIGHUP, _signal_handler_q)
+    except Exception:
+        pass  # signal handler may fail in restricted environments
     
     # Handle single query mode
     if query or image:
diff --git a/cron/scheduler.py b/cron/scheduler.py
index db5991c6f02..6e93fc02fee 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -564,15 +564,53 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
         return False, f"Script execution failed: {exc}"
 
 
-def _build_job_prompt(job: dict) -> str:
-    """Build the effective prompt for a cron job, optionally loading one or more skills first."""
+def _parse_wake_gate(script_output: str) -> bool:
+    """Parse the last non-empty stdout line of a cron job's pre-check script
+    as a wake gate.
+
+    The convention (ported from nanoclaw #1232): if the last stdout line is
+    JSON like ``{"wakeAgent": false}``, the agent is skipped entirely — no
+    LLM run, no delivery. Any other output (non-JSON, missing flag, gate
+    absent, or ``wakeAgent: true``) means wake the agent normally.
+
+    Returns True if the agent should wake, False to skip.
+    """
+    if not script_output:
+        return True
+    stripped_lines = [line for line in script_output.splitlines() if line.strip()]
+    if not stripped_lines:
+        return True
+    last_line = stripped_lines[-1].strip()
+    try:
+        gate = json.loads(last_line)
+    except (json.JSONDecodeError, ValueError):
+        return True
+    if not isinstance(gate, dict):
+        return True
+    return gate.get("wakeAgent", True) is not False
+
+
+def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
+    """Build the effective prompt for a cron job, optionally loading one or more skills first.
+
+    Args:
+        job: The cron job dict.
+        prerun_script: Optional ``(success, stdout)`` from a script that has
+            already been executed by the caller (e.g. for a wake-gate check).
+            When provided, the script is not re-executed and the cached
+            result is used for prompt injection. When omitted, the script
+            (if any) runs inline as before.
+    """
     prompt = job.get("prompt", "")
     skills = job.get("skills")
 
     # Run data-collection script if configured, inject output as context.
     script_path = job.get("script")
     if script_path:
-        success, script_output = _run_job_script(script_path)
+        if prerun_script is not None:
+            success, script_output = prerun_script
+        else:
+            success, script_output = _run_job_script(script_path)
         if success:
             if script_output:
                 prompt = (
@@ -674,13 +712,41 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
     
     job_id = job["id"]
     job_name = job["name"]
-    prompt = _build_job_prompt(job)
+
+    # Wake-gate: if this job has a pre-check script, run it BEFORE building
+    # the prompt so a ``{"wakeAgent": false}`` response can short-circuit
+    # the whole agent run. We pass the result into _build_job_prompt so
+    # the script is only executed once.
+    prerun_script = None
+    script_path = job.get("script")
+    if script_path:
+        prerun_script = _run_job_script(script_path)
+        _ran_ok, _script_output = prerun_script
+        if _ran_ok and not _parse_wake_gate(_script_output):
+            logger.info(
+                "Job '%s' (ID: %s): wakeAgent=false, skipping agent run",
+                job_name, job_id,
+            )
+            silent_doc = (
+                f"# Cron Job: {job_name}\n\n"
+                f"**Job ID:** {job_id}\n"
+                f"**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
+                "Script gate returned `wakeAgent=false` — agent skipped.\n"
+            )
+            return True, silent_doc, SILENT_MARKER, None
+
+    prompt = _build_job_prompt(job, prerun_script=prerun_script)
     origin = _resolve_origin(job)
     _cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"
 
     logger.info("Running job '%s' (ID: %s)", job_name, job_id)
     logger.info("Prompt: %s", prompt[:100])
 
+    # Mark this as a cron session so the approval system can apply cron_mode.
+    # This env var is process-wide and persists for the lifetime of the
+    # scheduler process — every job this process runs is a cron job.
+    os.environ["HERMES_CRON_SESSION"] = "1"
+
     try:
         # Inject origin context so the agent's send_message tool knows the chat.
         # Must be INSIDE the try block so the finally cleanup always runs.
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index af694a5e2d6..2b8536062c2 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -6,6 +6,7 @@ and implement the required methods.
 """
 
 import asyncio
+import inspect
 import ipaddress
 import logging
 import os
@@ -669,6 +670,15 @@ class MessageEvent:
     # Original platform data
     raw_message: Any = None
     message_id: Optional[str] = None
+
+    # Platform-specific update identifier.  For Telegram this is the
+    # ``update_id`` from the PTB Update wrapper; other platforms currently
+    # ignore it.  Used by ``/restart`` to record the triggering update so the
+    # new gateway can advance the Telegram offset past it and avoid processing
+    # the same ``/restart`` twice if PTB's graceful-shutdown ACK times out
+    # ("Error while calling `get_updates` one more time to mark all fetched
+    # updates" in gateway.log).
+    platform_update_id: Optional[int] = None
     
     # Media attachments
     # media_urls: local file paths (for vision tool access)
@@ -871,10 +881,11 @@ class BasePlatformAdapter(ABC):
         # working on a task after --replace or manual restarts.
         self._background_tasks: set[asyncio.Task] = set()
         # One-shot callbacks to fire after the main response is delivered.
-        # Keyed by session_key.  GatewayRunner uses this to defer
-        # background-review notifications ("💾 Skill created") until the
-        # primary reply has been sent.
-        self._post_delivery_callbacks: Dict[str, Callable] = {}
+        # Keyed by session_key. Values are either a bare callback (legacy) or
+        # a ``(generation, callback)`` tuple so GatewayRunner can make deferred
+        # deliveries generation-aware and avoid stale runs clearing callbacks
+        # registered by a fresher run for the same session.
+        self._post_delivery_callbacks: Dict[str, Any] = {}
         self._expected_cancelled_tasks: set[asyncio.Task] = set()
         self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
         # Chats where auto-TTS on voice input is disabled (set by /voice off)
@@ -1392,7 +1403,13 @@ class BasePlatformAdapter(ABC):
 
         return paths, cleaned
 
-    async def _keep_typing(self, chat_id: str, interval: float = 2.0, metadata=None) -> None:
+    async def _keep_typing(
+        self,
+        chat_id: str,
+        interval: float = 2.0,
+        metadata=None,
+        stop_event: asyncio.Event | None = None,
+    ) -> None:
         """
         Continuously send typing indicator until cancelled.
         
@@ -1406,9 +1423,18 @@ class BasePlatformAdapter(ABC):
         """
         try:
             while True:
+                if stop_event is not None and stop_event.is_set():
+                    return
                 if chat_id not in self._typing_paused:
                     await self.send_typing(chat_id, metadata=metadata)
-                await asyncio.sleep(interval)
+                if stop_event is None:
+                    await asyncio.sleep(interval)
+                    continue
+                try:
+                    await asyncio.wait_for(stop_event.wait(), timeout=interval)
+                except asyncio.TimeoutError:
+                    continue
+                return
         except asyncio.CancelledError:
             pass  # Normal cancellation when handler completes
         finally:
@@ -1435,6 +1461,59 @@ class BasePlatformAdapter(ABC):
         """Resume typing indicator for a chat after approval resolves."""
         self._typing_paused.discard(chat_id)
 
+    async def interrupt_session_activity(self, session_key: str, chat_id: str) -> None:
+        """Signal the active session loop to stop and clear typing immediately."""
+        if session_key:
+            interrupt_event = self._active_sessions.get(session_key)
+            if interrupt_event is not None:
+                interrupt_event.set()
+        try:
+            await self.stop_typing(chat_id)
+        except Exception:
+            pass
+
+    def register_post_delivery_callback(
+        self,
+        session_key: str,
+        callback: Callable,
+        *,
+        generation: int | None = None,
+    ) -> None:
+        """Register a deferred callback to fire after the main response.
+
+        ``generation`` lets callers tie the callback to a specific gateway run
+        generation so stale runs cannot clear callbacks owned by a fresher run.
+        """
+        if not session_key or not callable(callback):
+            return
+        if generation is None:
+            self._post_delivery_callbacks[session_key] = callback
+        else:
+            self._post_delivery_callbacks[session_key] = (int(generation), callback)
+
+    def pop_post_delivery_callback(
+        self,
+        session_key: str,
+        *,
+        generation: int | None = None,
+    ) -> Callable | None:
+        """Pop a deferred callback, optionally requiring generation ownership."""
+        if not session_key:
+            return None
+        entry = self._post_delivery_callbacks.get(session_key)
+        if entry is None:
+            return None
+        if isinstance(entry, tuple) and len(entry) == 2:
+            entry_generation, callback = entry
+            if generation is not None and int(entry_generation) != int(generation):
+                return None
+            self._post_delivery_callbacks.pop(session_key, None)
+            return callback if callable(callback) else None
+        if generation is not None:
+            return None
+        self._post_delivery_callbacks.pop(session_key, None)
+        return entry if callable(entry) else None
+
     # ── Processing lifecycle hooks ──────────────────────────────────────────
     # Subclasses override these to react to message processing events
     # (e.g. Discord adds 👀/✅/❌ reactions).
@@ -1705,10 +1784,23 @@ class BasePlatformAdapter(ABC):
         # Fall back to a new Event only if the entry was removed externally.
         interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
         self._active_sessions[session_key] = interrupt_event
+        callback_generation = getattr(interrupt_event, "_hermes_run_generation", None)
         
         # Start continuous typing indicator (refreshes every 2 seconds)
         _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-        typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id, metadata=_thread_metadata))
+        _keep_typing_kwargs = {"metadata": _thread_metadata}
+        try:
+            _keep_typing_sig = inspect.signature(self._keep_typing)
+        except (TypeError, ValueError):
+            _keep_typing_sig = None
+        if _keep_typing_sig is None or "stop_event" in _keep_typing_sig.parameters:
+            _keep_typing_kwargs["stop_event"] = interrupt_event
+        typing_task = asyncio.create_task(
+            self._keep_typing(
+                event.source.chat_id,
+                **_keep_typing_kwargs,
+            )
+        )
         
         try:
             await self._run_processing_hook("on_processing_start", event)
@@ -1917,9 +2009,18 @@ class BasePlatformAdapter(ABC):
             if session_key in self._pending_messages:
                 pending_event = self._pending_messages.pop(session_key)
                 logger.debug("[%s] Processing queued message from interrupt", self.name)
-                # Clean up current session before processing pending
-                if session_key in self._active_sessions:
-                    del self._active_sessions[session_key]
+                # Keep the _active_sessions entry live across the turn chain
+                # and only CLEAR the interrupt Event — do NOT delete the entry.
+                # If we deleted here, a concurrent inbound message arriving
+                # during the awaits below would pass the Level-1 guard, spawn
+                # its own _process_message_background, and run simultaneously
+                # with the recursive drain below.  Two agents on one
+                # session_key = duplicate responses, duplicate tool calls.
+                # Clearing the Event keeps the guard live so follow-ups take
+                # the busy-handler path (queue + interrupt) as intended.
+                _active = self._active_sessions.get(session_key)
+                if _active is not None:
+                    _active.clear()
                 typing_task.cancel()
                 try:
                     await typing_task
@@ -1958,7 +2059,14 @@ class BasePlatformAdapter(ABC):
         finally:
             # Fire any one-shot post-delivery callback registered for this
             # session (e.g. deferred background-review notifications).
-            _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
+            _callback_generation = callback_generation
+            if hasattr(self, "pop_post_delivery_callback"):
+                _post_cb = self.pop_post_delivery_callback(
+                    session_key,
+                    generation=_callback_generation,
+                )
+            else:
+                _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
             if callable(_post_cb):
                 try:
                     _post_cb()
@@ -1977,9 +2085,37 @@ class BasePlatformAdapter(ABC):
                     await self.stop_typing(event.source.chat_id)
             except Exception:
                 pass
-            # Clean up session tracking
-            if session_key in self._active_sessions:
-                del self._active_sessions[session_key]
+            # Late-arrival drain: a message may have arrived during the
+            # cleanup awaits above (typing_task cancel, stop_typing).  Such
+            # messages passed the Level-1 guard (entry still live, Event
+            # possibly set) and landed in _pending_messages via the
+            # busy-handler path.  Without this block, we would delete the
+            # active-session entry and the queued message would be silently
+            # dropped (user never gets a reply).
+            late_pending = self._pending_messages.pop(session_key, None)
+            if late_pending is not None:
+                logger.debug(
+                    "[%s] Late-arrival pending message during cleanup — spawning drain task",
+                    self.name,
+                )
+                _active = self._active_sessions.get(session_key)
+                if _active is not None:
+                    _active.clear()
+                drain_task = asyncio.create_task(
+                    self._process_message_background(late_pending, session_key)
+                )
+                try:
+                    self._background_tasks.add(drain_task)
+                    drain_task.add_done_callback(self._background_tasks.discard)
+                except TypeError:
+                    # Tests stub create_task() with non-hashable sentinels; tolerate.
+                    pass
+                # Leave _active_sessions[session_key] populated — the drain
+                # task's own lifecycle will clean it up.
+            else:
+                # Clean up session tracking
+                if session_key in self._active_sessions:
+                    del self._active_sessions[session_key]
     
     async def cancel_background_tasks(self) -> None:
         """Cancel any in-flight background message-processing tasks.
@@ -1987,12 +2123,26 @@ class BasePlatformAdapter(ABC):
         Used during gateway shutdown/replacement so active sessions from the old
         process do not keep running after adapters are being torn down.
         """
-        tasks = [task for task in self._background_tasks if not task.done()]
-        for task in tasks:
-            self._expected_cancelled_tasks.add(task)
-            task.cancel()
-        if tasks:
+        # Loop until no new tasks appear.  Without this, a message
+        # arriving during the `await asyncio.gather` below would spawn
+        # a fresh _process_message_background task (added to
+        # self._background_tasks at line ~1668 via handle_message),
+        # and the _background_tasks.clear() at the end of this method
+        # would drop the reference — the task runs untracked against a
+        # disconnecting adapter, logs send-failures, and may linger
+        # until it completes on its own.  Retrying the drain until the
+        # task set stabilizes closes the window.
+        MAX_DRAIN_ROUNDS = 5
+        for _ in range(MAX_DRAIN_ROUNDS):
+            tasks = [task for task in self._background_tasks if not task.done()]
+            if not tasks:
+                break
+            for task in tasks:
+                self._expected_cancelled_tasks.add(task)
+                task.cancel()
             await asyncio.gather(*tasks, return_exceptions=True)
+            # Loop: late-arrival tasks spawned during the gather above
+            # will be in self._background_tasks now.  Re-check.
         self._background_tasks.clear()
         self._expected_cancelled_tasks.clear()
         self._pending_messages.clear()
diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 5cad956a362..fce7ece4146 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -498,6 +498,7 @@ class DiscordAdapter(BasePlatformAdapter):
         self._allowed_role_ids: set = set()  # For DISCORD_ALLOWED_ROLES filtering
         # Voice channel state (per-guild)
         self._voice_clients: Dict[int, Any] = {}  # guild_id -> VoiceClient
+        self._voice_locks: Dict[int, asyncio.Lock] = {}  # guild_id -> serialize join/leave
         # Text batching: merge rapid successive messages (Telegram-style)
         self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6"))
         self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
@@ -636,6 +637,30 @@ class DiscordAdapter(BasePlatformAdapter):
 
             @self._client.event
             async def on_message(message: DiscordMessage):
+                # Wait for on_ready to finish resolving username-based
+                # allowlist entries.  Without this block, messages
+                # arriving between Discord's READY event and the end
+                # of _resolve_allowed_usernames compare author IDs
+                # (numeric) against a set that may still contain raw
+                # usernames (strings) from DISCORD_ALLOWED_USERS —
+                # legitimate users get silently rejected for the first
+                # few seconds after every reconnect.  The wait is a
+                # near-instant no-op in steady state (_ready_event is
+                # already set); only the startup / reconnect window
+                # ever blocks.
+                if not adapter_self._ready_event.is_set():
+                    try:
+                        await asyncio.wait_for(
+                            adapter_self._ready_event.wait(),
+                            timeout=30.0,
+                        )
+                    except asyncio.TimeoutError:
+                        logger.warning(
+                            "[%s] on_message timed out waiting for _ready_event; "
+                            "allowlist check may use pre-resolved entries",
+                            adapter_self.name,
+                        )
+
                 # Dedup: Discord RESUME replays events after reconnects (#4777)
                 if adapter_self._dedup.is_duplicate(str(message.id)):
                     return
@@ -1231,57 +1256,74 @@ class DiscordAdapter(BasePlatformAdapter):
     # Voice channel methods (join / leave / play)
     # ------------------------------------------------------------------
 
+    def _voice_lock_for(self, guild_id: int) -> "asyncio.Lock":
+        """Return the per-guild lock, creating it on first use.
+
+        Voice join/leave/move must be serialized per guild — without
+        this, two concurrent /voice channel invocations both see
+        _voice_clients.get(guild_id) return None, both call
+        channel.connect(), and discord.py raises ClientException
+        ('Already connected') on the loser.
+        """
+        lock = self._voice_locks.get(guild_id)
+        if lock is None:
+            lock = asyncio.Lock()
+            self._voice_locks[guild_id] = lock
+        return lock
+
     async def join_voice_channel(self, channel) -> bool:
         """Join a Discord voice channel. Returns True on success."""
         if not self._client or not DISCORD_AVAILABLE:
             return False
         guild_id = channel.guild.id
 
-        # Already connected in this guild?
-        existing = self._voice_clients.get(guild_id)
-        if existing and existing.is_connected():
-            if existing.channel.id == channel.id:
+        async with self._voice_lock_for(guild_id):
+            # Already connected in this guild?
+            existing = self._voice_clients.get(guild_id)
+            if existing and existing.is_connected():
+                if existing.channel.id == channel.id:
+                    self._reset_voice_timeout(guild_id)
+                    return True
+                await existing.move_to(channel)
                 self._reset_voice_timeout(guild_id)
                 return True
-            await existing.move_to(channel)
+
+            vc = await channel.connect()
+            self._voice_clients[guild_id] = vc
             self._reset_voice_timeout(guild_id)
+
+            # Start voice receiver (Phase 2: listen to users)
+            try:
+                receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
+                receiver.start()
+                self._voice_receivers[guild_id] = receiver
+                self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
+                    self._voice_listen_loop(guild_id)
+                )
+            except Exception as e:
+                logger.warning("Voice receiver failed to start: %s", e)
+
             return True
 
-        vc = await channel.connect()
-        self._voice_clients[guild_id] = vc
-        self._reset_voice_timeout(guild_id)
-
-        # Start voice receiver (Phase 2: listen to users)
-        try:
-            receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
-            receiver.start()
-            self._voice_receivers[guild_id] = receiver
-            self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
-                self._voice_listen_loop(guild_id)
-            )
-        except Exception as e:
-            logger.warning("Voice receiver failed to start: %s", e)
-
-        return True
-
     async def leave_voice_channel(self, guild_id: int) -> None:
         """Disconnect from the voice channel in a guild."""
-        # Stop voice receiver first
-        receiver = self._voice_receivers.pop(guild_id, None)
-        if receiver:
-            receiver.stop()
-        listen_task = self._voice_listen_tasks.pop(guild_id, None)
-        if listen_task:
-            listen_task.cancel()
+        async with self._voice_lock_for(guild_id):
+            # Stop voice receiver first
+            receiver = self._voice_receivers.pop(guild_id, None)
+            if receiver:
+                receiver.stop()
+            listen_task = self._voice_listen_tasks.pop(guild_id, None)
+            if listen_task:
+                listen_task.cancel()
 
-        vc = self._voice_clients.pop(guild_id, None)
-        if vc and vc.is_connected():
-            await vc.disconnect()
-        task = self._voice_timeout_tasks.pop(guild_id, None)
-        if task:
-            task.cancel()
-        self._voice_text_channels.pop(guild_id, None)
-        self._voice_sources.pop(guild_id, None)
+            vc = self._voice_clients.pop(guild_id, None)
+            if vc and vc.is_connected():
+                await vc.disconnect()
+            task = self._voice_timeout_tasks.pop(guild_id, None)
+            if task:
+                task.cancel()
+            self._voice_text_channels.pop(guild_id, None)
+            self._voice_sources.pop(guild_id, None)
 
     # Maximum seconds to wait for voice playback before giving up
     PLAYBACK_TIMEOUT = 120
@@ -1933,6 +1975,24 @@ class DiscordAdapter(BasePlatformAdapter):
         the "thinking..." indicator is replaced with that text; otherwise it
         is deleted so the channel isn't cluttered.
         """
+        # Log the invoker so ghost-command reports can be triaged.  Discord
+        # native slash invocations are always user-initiated (no bot can fire
+        # them), but mobile autocomplete / keyboard shortcuts / other users
+        # in the same channel are easy to miss in post-mortems.
+        try:
+            _user = interaction.user
+            _chan_id = getattr(interaction.channel, "id", None) or getattr(interaction, "channel_id", None)
+            logger.info(
+                "[Discord] slash '%s' invoked by user=%s id=%s channel=%s guild=%s",
+                command_text,
+                getattr(_user, "name", "?"),
+                getattr(_user, "id", "?"),
+                _chan_id,
+                getattr(interaction, "guild_id", None),
+            )
+        except Exception:
+            pass  # logging must never block command dispatch
+
         await interaction.response.defer(ephemeral=True)
         event = self._build_slash_event(interaction, command_text)
         await self.handle_message(event)
@@ -1994,6 +2054,11 @@ class DiscordAdapter(BasePlatformAdapter):
         async def slash_stop(interaction: discord.Interaction):
             await self._run_simple_slash(interaction, "/stop", "Stop requested~")
 
+        @tree.command(name="steer", description="Inject a message after the next tool call (no interrupt)")
+        @discord.app_commands.describe(prompt="Text to inject into the agent's next tool result")
+        async def slash_steer(interaction: discord.Interaction, prompt: str):
+            await self._run_simple_slash(interaction, f"/steer {prompt}".strip())
+
         @tree.command(name="compress", description="Compress conversation context")
         async def slash_compress(interaction: discord.Interaction):
             await self._run_simple_slash(interaction, "/compress")
@@ -3242,7 +3307,20 @@ class DiscordAdapter(BasePlatformAdapter):
                 "[Discord] Flushing text batch %s (%d chars)",
                 key, len(event.text or ""),
             )
-            await self.handle_message(event)
+            # Shield the downstream dispatch so that a subsequent chunk
+            # arriving while handle_message is mid-flight cannot cancel
+            # the running agent turn.  _enqueue_text_event always cancels
+            # the prior flush task when a new chunk lands; without this
+            # shield, CancelledError would propagate from our task down
+            # into handle_message → the agent's streaming request,
+            # aborting the response the user was waiting on.  The new
+            # chunk is handled by the fresh flush task regardless.
+            await asyncio.shield(self.handle_message(event))
+        except asyncio.CancelledError:
+            # Only reached if cancel landed before the pop — the shielded
+            # handle_message is unaffected either way.  Let the task exit
+            # cleanly so the finally block cleans up.
+            pass
         finally:
             if self._pending_text_batch_tasks.get(key) is current_task:
                 self._pending_text_batch_tasks.pop(key, None)
diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 351337e8275..3b57db46d3c 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -119,6 +119,8 @@ _MARKDOWN_HINT_RE = re.compile(
     re.MULTILINE,
 )
 _MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
+_MARKDOWN_FENCE_OPEN_RE = re.compile(r"^```([^\n`]*)\s*$")
+_MARKDOWN_FENCE_CLOSE_RE = re.compile(r"^```\s*$")
 _MENTION_RE = re.compile(r"@_user_\d+")
 _MULTISPACE_RE = re.compile(r"[ \t]{2,}")
 _POST_CONTENT_INVALID_RE = re.compile(r"content format of the post type is incorrect", re.IGNORECASE)
@@ -430,23 +432,66 @@ def _coerce_required_int(value: Any, default: int, min_value: int = 0) -> int:
 
 
 def _build_markdown_post_payload(content: str) -> str:
+    rows = _build_markdown_post_rows(content)
     return json.dumps(
         {
             "zh_cn": {
-                "content": [
-                    [
-                        {
-                            "tag": "md",
-                            "text": content,
-                        }
-                    ]
-                ],
+                "content": rows,
             }
         },
         ensure_ascii=False,
     )
 
 
+def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
+    """Build Feishu post rows while isolating fenced code blocks.
+
+    Feishu's `md` renderer can swallow trailing content when a fenced code block
+    appears inside one large markdown element. Split the reply at real fence
+    lines so prose before/after the code block remains visible while code stays
+    in a dedicated row.
+    """
+    if not content:
+        return [[{"tag": "md", "text": ""}]]
+    if "```" not in content:
+        return [[{"tag": "md", "text": content}]]
+
+    rows: List[List[Dict[str, str]]] = []
+    current: List[str] = []
+    in_code_block = False
+
+    def _flush_current() -> None:
+        nonlocal current
+        if not current:
+            return
+        segment = "\n".join(current)
+        if segment.strip():
+            rows.append([{"tag": "md", "text": segment}])
+        current = []
+
+    for raw_line in content.splitlines():
+        stripped_line = raw_line.strip()
+        is_fence = bool(
+            _MARKDOWN_FENCE_CLOSE_RE.match(stripped_line)
+            if in_code_block
+            else _MARKDOWN_FENCE_OPEN_RE.match(stripped_line)
+        )
+
+        if is_fence:
+            if not in_code_block:
+                _flush_current()
+            current.append(raw_line)
+            in_code_block = not in_code_block
+            if not in_code_block:
+                _flush_current()
+            continue
+
+        current.append(raw_line)
+
+    _flush_current()
+    return rows or [[{"tag": "md", "text": content}]]
+
+
 def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
     resolved = _resolve_post_payload(payload)
     if not resolved:
diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py
index 617713ad908..4df4193bc0d 100644
--- a/gateway/platforms/signal.py
+++ b/gateway/platforms/signal.py
@@ -160,6 +160,14 @@ class SignalAdapter(BasePlatformAdapter):
         self._sse_task: Optional[asyncio.Task] = None
         self._health_monitor_task: Optional[asyncio.Task] = None
         self._typing_tasks: Dict[str, asyncio.Task] = {}
+        # Per-chat typing-indicator backoff. When signal-cli reports
+        # NETWORK_FAILURE (recipient offline / unroutable), base.py's
+        # _keep_typing refresh loop would otherwise hammer sendTyping every
+        # ~2s indefinitely, producing WARNING-level log spam and pointless
+        # RPC traffic. We track consecutive failures per chat and skip the
+        # RPC during a cooldown window instead.
+        self._typing_failures: Dict[str, int] = {}
+        self._typing_skip_until: Dict[str, float] = {}
         self._running = False
         self._last_sse_activity = 0.0
         self._sse_response: Optional[httpx.Response] = None
@@ -548,8 +556,22 @@ class SignalAdapter(BasePlatformAdapter):
     # JSON-RPC Communication
     # ------------------------------------------------------------------
 
-    async def _rpc(self, method: str, params: dict, rpc_id: str = None) -> Any:
-        """Send a JSON-RPC 2.0 request to signal-cli daemon."""
+    async def _rpc(
+        self,
+        method: str,
+        params: dict,
+        rpc_id: str = None,
+        *,
+        log_failures: bool = True,
+    ) -> Any:
+        """Send a JSON-RPC 2.0 request to signal-cli daemon.
+
+        When ``log_failures=False``, error and exception paths log at DEBUG
+        instead of WARNING — used by the typing-indicator path to silence
+        repeated NETWORK_FAILURE spam for unreachable recipients while
+        still preserving visibility for the first occurrence and for
+        unrelated RPCs.
+        """
         if not self.client:
             logger.warning("Signal: RPC called but client not connected")
             return None
@@ -574,13 +596,19 @@ class SignalAdapter(BasePlatformAdapter):
             data = resp.json()
 
             if "error" in data:
-                logger.warning("Signal RPC error (%s): %s", method, data["error"])
+                if log_failures:
+                    logger.warning("Signal RPC error (%s): %s", method, data["error"])
+                else:
+                    logger.debug("Signal RPC error (%s): %s", method, data["error"])
                 return None
 
             return data.get("result")
 
         except Exception as e:
-            logger.warning("Signal RPC %s failed: %s", method, e)
+            if log_failures:
+                logger.warning("Signal RPC %s failed: %s", method, e)
+            else:
+                logger.debug("Signal RPC %s failed: %s", method, e)
             return None
 
     # ------------------------------------------------------------------
@@ -627,7 +655,28 @@ class SignalAdapter(BasePlatformAdapter):
                 self._recent_sent_timestamps.pop()
 
     async def send_typing(self, chat_id: str, metadata=None) -> None:
-        """Send a typing indicator."""
+        """Send a typing indicator.
+
+        base.py's ``_keep_typing`` refresh loop calls this every ~2s while
+        the agent is processing. If signal-cli returns NETWORK_FAILURE for
+        this recipient (offline, unroutable, group membership lost, etc.)
+        the unmitigated behaviour is: a WARNING log every 2 seconds for as
+        long as the agent keeps running. Instead we:
+
+        - silence the WARNING after the first consecutive failure (subsequent
+          attempts log at DEBUG) so transport issues are still visible once
+          but don't flood the log,
+        - skip the RPC entirely during an exponential cooldown window once
+          three consecutive failures have happened, so we stop hammering
+          signal-cli with requests it can't deliver.
+
+        A successful sendTyping clears the counters.
+        """
+        now = time.monotonic()
+        skip_until = self._typing_skip_until.get(chat_id, 0.0)
+        if now < skip_until:
+            return
+
         params: Dict[str, Any] = {
             "account": self.account,
         }
@@ -637,7 +686,26 @@ class SignalAdapter(BasePlatformAdapter):
         else:
             params["recipient"] = [chat_id]
 
-        await self._rpc("sendTyping", params, rpc_id="typing")
+        fails = self._typing_failures.get(chat_id, 0)
+        result = await self._rpc(
+            "sendTyping",
+            params,
+            rpc_id="typing",
+            log_failures=(fails == 0),
+        )
+
+        if result is None:
+            fails += 1
+            self._typing_failures[chat_id] = fails
+            # After 3 consecutive failures, back off exponentially (16s,
+            # 32s, 60s cap) to stop spamming signal-cli for a recipient
+            # that clearly isn't reachable right now.
+            if fails >= 3:
+                backoff = min(60.0, 16.0 * (2 ** (fails - 3)))
+                self._typing_skip_until[chat_id] = now + backoff
+        else:
+            self._typing_failures.pop(chat_id, None)
+            self._typing_skip_until.pop(chat_id, None)
 
     async def send_image(
         self,
@@ -789,6 +857,10 @@ class SignalAdapter(BasePlatformAdapter):
                 await task
             except asyncio.CancelledError:
                 pass
+        # Reset per-chat typing backoff state so the next agent turn starts
+        # fresh rather than inheriting a cooldown from a prior conversation.
+        self._typing_failures.pop(chat_id, None)
+        self._typing_skip_until.pop(chat_id, None)
 
     async def stop_typing(self, chat_id: str) -> None:
         """Public interface for stopping typing — called by base adapter's
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 5b1fef1337b..0b74c4e15f4 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -1657,6 +1657,21 @@ class TelegramAdapter(BasePlatformAdapter):
         except Exception as exc:
             logger.error("Failed to write update response from callback: %s", exc)
 
+    def _missing_media_path_error(self, label: str, path: str) -> str:
+        """Build an actionable file-not-found error for gateway MEDIA delivery.
+
+        Paths like /workspace/... or /output/... often only exist inside the
+        Docker sandbox, while the gateway process runs on the host.
+        """
+        error = f"{label} file not found: {path}"
+        if path.startswith(("/workspace/", "/output/", "/outputs/")):
+            error += (
+                " (path may only exist inside the Docker sandbox. "
+                "Bind-mount a host directory and emit the host-visible "
+                "path in MEDIA: for gateway file delivery.)"
+            )
+        return error
+
     async def send_voice(
         self,
         chat_id: str,
@@ -1673,7 +1688,7 @@ class TelegramAdapter(BasePlatformAdapter):
         try:
             import os
             if not os.path.exists(audio_path):
-                return SendResult(success=False, error=f"Audio file not found: {audio_path}")
+                return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
             
             with open(audio_path, "rb") as audio_file:
                 # .ogg files -> send as voice (round playable bubble)
@@ -1722,7 +1737,7 @@ class TelegramAdapter(BasePlatformAdapter):
         try:
             import os
             if not os.path.exists(image_path):
-                return SendResult(success=False, error=f"Image file not found: {image_path}")
+                return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))
 
             _thread = self._metadata_thread_id(metadata)
             with open(image_path, "rb") as image_file:
@@ -1759,7 +1774,7 @@ class TelegramAdapter(BasePlatformAdapter):
 
         try:
             if not os.path.exists(file_path):
-                return SendResult(success=False, error=f"File not found: {file_path}")
+                return SendResult(success=False, error=self._missing_media_path_error("File", file_path))
 
             display_name = file_name or os.path.basename(file_path)
             _thread = self._metadata_thread_id(metadata)
@@ -1793,7 +1808,7 @@ class TelegramAdapter(BasePlatformAdapter):
 
         try:
             if not os.path.exists(video_path):
-                return SendResult(success=False, error=f"Video file not found: {video_path}")
+                return SendResult(success=False, error=self._missing_media_path_error("Video", video_path))
 
             _thread = self._metadata_thread_id(metadata)
             with open(video_path, "rb") as f:
@@ -2326,7 +2341,7 @@ class TelegramAdapter(BasePlatformAdapter):
         if not self._should_process_message(update.message):
             return
 
-        event = self._build_message_event(update.message, MessageType.TEXT)
+        event = self._build_message_event(update.message, MessageType.TEXT, update_id=update.update_id)
         event.text = self._clean_bot_trigger_text(event.text)
         self._enqueue_text_event(event)
     
@@ -2337,7 +2352,7 @@ class TelegramAdapter(BasePlatformAdapter):
         if not self._should_process_message(update.message, is_command=True):
             return
         
-        event = self._build_message_event(update.message, MessageType.COMMAND)
+        event = self._build_message_event(update.message, MessageType.COMMAND, update_id=update.update_id)
         await self.handle_message(event)
     
     async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@@ -2373,7 +2388,7 @@ class TelegramAdapter(BasePlatformAdapter):
         parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}")
         parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.")
 
-        event = self._build_message_event(msg, MessageType.LOCATION)
+        event = self._build_message_event(msg, MessageType.LOCATION, update_id=update.update_id)
         event.text = "\n".join(parts)
         await self.handle_message(event)
 
@@ -2524,7 +2539,7 @@ class TelegramAdapter(BasePlatformAdapter):
         else:
             msg_type = MessageType.DOCUMENT
         
-        event = self._build_message_event(msg, msg_type)
+        event = self._build_message_event(msg, msg_type, update_id=update.update_id)
         
         # Add caption as text
         if msg.caption:
@@ -2863,8 +2878,19 @@ class TelegramAdapter(BasePlatformAdapter):
                 self.name, cache_key, thread_id,
             )
 
-    def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
-        """Build a MessageEvent from a Telegram message."""
+    def _build_message_event(
+        self,
+        message: Message,
+        msg_type: MessageType,
+        update_id: Optional[int] = None,
+    ) -> MessageEvent:
+        """Build a MessageEvent from a Telegram message.
+
+        ``update_id`` is the ``Update.update_id`` from PTB; passing it through
+        lets ``/restart`` record the triggering offset so the new gateway
+        process can advance past it (prevents ``/restart`` being re-delivered
+        when PTB's graceful-shutdown ACK fails).
+        """
         chat = message.chat
         user = message.from_user
         
@@ -2915,8 +2941,8 @@ class TelegramAdapter(BasePlatformAdapter):
             chat_id=str(chat.id),
             chat_name=chat.title or (chat.full_name if hasattr(chat, "full_name") else None),
             chat_type=chat_type,
-            user_id=str(user.id) if user else None,
-            user_name=user.full_name if user else None,
+            user_id=str(user.id) if user else (str(chat.id) if chat_type == "dm" else None),
+            user_name=user.full_name if user else (chat.full_name if hasattr(chat, "full_name") and chat_type == "dm" else None),
             thread_id=thread_id_str,
             chat_topic=chat_topic,
         )
@@ -2943,6 +2969,7 @@ class TelegramAdapter(BasePlatformAdapter):
             source=source,
             raw_message=message,
             message_id=str(message.message_id),
+            platform_update_id=update_id,
             reply_to_message_id=reply_to_id,
             reply_to_text=reply_to_text,
             auto_skill=topic_skill,
diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py
index c37445b17e8..9995ac38709 100644
--- a/gateway/platforms/webhook.py
+++ b/gateway/platforms/webhook.py
@@ -13,6 +13,10 @@ Each route defines:
   - skills: optional list of skills to load for the agent
   - deliver: where to send the response (github_comment, telegram, etc.)
   - deliver_extra: additional delivery config (repo, pr_number, chat_id)
+  - deliver_only: if true, skip the agent — the rendered prompt IS the
+    message that gets delivered.  Use for external push notifications
+    (Supabase, monitoring alerts, inter-agent pings) where zero LLM cost
+    and sub-second delivery matter more than agent reasoning.
 
 Security:
   - HMAC secret is required per route (validated at startup)
@@ -122,6 +126,19 @@ class WebhookAdapter(BasePlatformAdapter):
                     f"For testing without auth, set secret to '{_INSECURE_NO_AUTH}'."
                 )
 
+            # deliver_only routes bypass the agent — the POST body becomes a
+            # direct push notification via the configured delivery target.
+            # Validate up-front so misconfiguration surfaces at startup rather
+            # than on the first webhook POST.
+            if route.get("deliver_only"):
+                deliver = route.get("deliver", "log")
+                if not deliver or deliver == "log":
+                    raise ValueError(
+                        f"[webhook] Route '{name}' has deliver_only=true but "
+                        f"deliver is '{deliver}'. Direct delivery requires a "
+                        f"real target (telegram, discord, slack, github_comment, etc.)."
+                    )
+
         app = web.Application()
         app.router.add_get("/health", self._handle_health)
         app.router.add_post("/webhooks/{route_name}", self._handle_webhook)
@@ -419,6 +436,64 @@ class WebhookAdapter(BasePlatformAdapter):
             )
         self._seen_deliveries[delivery_id] = now
 
+        # ── Direct delivery mode (deliver_only) ─────────────────
+        # Skip the agent entirely — the rendered prompt IS the message we
+        # deliver.  Use case: external services (Supabase, monitoring,
+        # cron jobs, other agents) that need to push a plain notification
+        # to a user's chat with zero LLM cost.  Reuses the same HMAC auth,
+        # rate limiting, idempotency, and template rendering as agent mode.
+        if route_config.get("deliver_only"):
+            delivery = {
+                "deliver": route_config.get("deliver", "log"),
+                "deliver_extra": self._render_delivery_extra(
+                    route_config.get("deliver_extra", {}), payload
+                ),
+                "payload": payload,
+            }
+            logger.info(
+                "[webhook] direct-deliver event=%s route=%s target=%s msg_len=%d delivery=%s",
+                event_type,
+                route_name,
+                delivery["deliver"],
+                len(prompt),
+                delivery_id,
+            )
+            try:
+                result = await self._direct_deliver(prompt, delivery)
+            except Exception:
+                logger.exception(
+                    "[webhook] direct-deliver failed route=%s delivery=%s",
+                    route_name,
+                    delivery_id,
+                )
+                return web.json_response(
+                    {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
+                    status=502,
+                )
+
+            if result.success:
+                return web.json_response(
+                    {
+                        "status": "delivered",
+                        "route": route_name,
+                        "target": delivery["deliver"],
+                        "delivery_id": delivery_id,
+                    },
+                    status=200,
+                )
+            # Delivery attempted but target rejected it — surface as 502
+            # with a generic error (don't leak adapter-level detail).
+            logger.warning(
+                "[webhook] direct-deliver target rejected route=%s target=%s error=%s",
+                route_name,
+                delivery["deliver"],
+                result.error,
+            )
+            return web.json_response(
+                {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
+                status=502,
+            )
+
         # Use delivery_id in session key so concurrent webhooks on the
         # same route get independent agent runs (not queued/interrupted).
         session_chat_id = f"webhook:{route_name}:{delivery_id}"
@@ -572,6 +647,34 @@ class WebhookAdapter(BasePlatformAdapter):
     # Response delivery
     # ------------------------------------------------------------------
 
+    async def _direct_deliver(
+        self, content: str, delivery: dict
+    ) -> SendResult:
+        """Deliver *content* directly without invoking the agent.
+
+        Used by ``deliver_only`` routes: the rendered template becomes the
+        literal message body, and we dispatch to the same delivery helpers
+        that the agent-mode ``send()`` flow uses.  All target types that
+        work in agent mode work here — Telegram, Discord, Slack, GitHub
+        PR comments, etc.
+        """
+        deliver_type = delivery.get("deliver", "log")
+
+        if deliver_type == "log":
+            # Shouldn't reach here — startup validation rejects deliver_only
+            # with deliver=log — but guard defensively.
+            logger.info("[webhook] direct-deliver log-only: %s", content[:200])
+            return SendResult(success=True)
+
+        if deliver_type == "github_comment":
+            return await self._deliver_github_comment(content, delivery)
+
+        # Fall through to the cross-platform dispatcher, which validates the
+        # target name and routes via the gateway runner.
+        return await self._deliver_cross_platform(
+            deliver_type, content, delivery
+        )
+
     async def _deliver_github_comment(
         self, content: str, delivery: dict
     ) -> SendResult:
diff --git a/gateway/run.py b/gateway/run.py
index caa55e72caa..feb55eb1d62 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -96,6 +96,10 @@ from hermes_cli.env_loader import load_hermes_dotenv
 _env_path = _hermes_home / '.env'
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')
 
+
+_DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P<host>.+):(?P<container>/[^:]+?)(?::(?P<options>[^:]+))?$")
+_DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"}
+
 # Bridge config.yaml values into the environment so os.getenv() picks them up.
 # config.yaml is authoritative for terminal settings — overrides .env.
 _config_path = _hermes_home / 'config.yaml'
@@ -398,6 +402,33 @@ def _dequeue_pending_event(adapter, session_key: str) -> MessageEvent | None:
     return adapter.get_pending_message(session_key)
 
 
+_INTERRUPT_REASON_STOP = "Stop requested"
+_INTERRUPT_REASON_RESET = "Session reset requested"
+_INTERRUPT_REASON_TIMEOUT = "Execution timed out (inactivity)"
+_INTERRUPT_REASON_SSE_DISCONNECT = "SSE client disconnected"
+_INTERRUPT_REASON_GATEWAY_SHUTDOWN = "Gateway shutting down"
+_INTERRUPT_REASON_GATEWAY_RESTART = "Gateway restarting"
+
+_CONTROL_INTERRUPT_MESSAGES = frozenset(
+    {
+        _INTERRUPT_REASON_STOP.lower(),
+        _INTERRUPT_REASON_RESET.lower(),
+        _INTERRUPT_REASON_TIMEOUT.lower(),
+        _INTERRUPT_REASON_SSE_DISCONNECT.lower(),
+        _INTERRUPT_REASON_GATEWAY_SHUTDOWN.lower(),
+        _INTERRUPT_REASON_GATEWAY_RESTART.lower(),
+    }
+)
+
+
+def _is_control_interrupt_message(message: Optional[str]) -> bool:
+    """Return True when an interrupt message is internal control flow."""
+    if not message:
+        return False
+    normalized = " ".join(str(message).strip().split()).lower()
+    return normalized in _CONTROL_INTERRUPT_MESSAGES
+
+
 def _check_unavailable_skill(command_name: str) -> str | None:
     """Check if a command matches a known-but-inactive skill.
 
@@ -585,6 +616,7 @@ class GatewayRunner:
     def __init__(self, config: Optional[GatewayConfig] = None):
         self.config = config or load_gateway_config()
         self.adapters: Dict[Platform, BasePlatformAdapter] = {}
+        self._warn_if_docker_media_delivery_is_risky()
 
         # Load ephemeral config from config.yaml / env vars.
         # Both are injected at API-call time only and never persisted.
@@ -625,6 +657,7 @@ class GatewayRunner:
         self._running_agents_ts: Dict[str, float] = {}  # start timestamp per session
         self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
         self._busy_ack_ts: Dict[str, float] = {}  # last busy-ack timestamp per session (debounce)
+        self._session_run_generation: Dict[str, int] = {}
 
         # Cache AIAgent instances per session to preserve prompt caching.
         # Without this, a new AIAgent is created per message, rebuilding the
@@ -691,6 +724,53 @@ class GatewayRunner:
         self._background_tasks: set = set()
 
 
+    def _warn_if_docker_media_delivery_is_risky(self) -> None:
+        """Warn when Docker-backed gateways lack an explicit export mount.
+
+        MEDIA delivery happens in the gateway process, so paths emitted by the model
+        must be readable from the host. A plain container-local path like
+        `/workspace/report.txt` or `/output/report.txt` often exists only inside
+        Docker, so users commonly need a dedicated export mount such as
+        `host-dir:/output`.
+        """
+        if os.getenv("TERMINAL_ENV", "").strip().lower() != "docker":
+            return
+
+        connected = self.config.get_connected_platforms()
+        messaging_platforms = [p for p in connected if p not in {Platform.LOCAL, Platform.API_SERVER, Platform.WEBHOOK}]
+        if not messaging_platforms:
+            return
+
+        raw_volumes = os.getenv("TERMINAL_DOCKER_VOLUMES", "").strip()
+        volumes: List[str] = []
+        if raw_volumes:
+            try:
+                parsed = json.loads(raw_volumes)
+                if isinstance(parsed, list):
+                    volumes = [str(v) for v in parsed if isinstance(v, str)]
+            except Exception:
+                logger.debug("Could not parse TERMINAL_DOCKER_VOLUMES for gateway media warning", exc_info=True)
+
+        has_explicit_output_mount = False
+        for spec in volumes:
+            match = _DOCKER_VOLUME_SPEC_RE.match(spec)
+            if not match:
+                continue
+            container_path = match.group("container")
+            if container_path in _DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS:
+                has_explicit_output_mount = True
+                break
+
+        if has_explicit_output_mount:
+            return
+
+        logger.warning(
+            "Docker backend is enabled for the messaging gateway but no explicit host-visible "
+            "output mount (for example '/home/user/.hermes/cache/documents:/output') is configured. "
+            "This is fine if the model already emits host-visible paths, but MEDIA file delivery can fail "
+            "for container-local paths like '/workspace/...' or '/output/...'."
+        )
+
 
 
     # -- Setup skill availability ----------------------------------------
@@ -752,6 +832,26 @@ class GatewayRunner:
             chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
         )
 
+    async def _safe_adapter_disconnect(self, adapter, platform) -> None:
+        """Call adapter.disconnect() defensively, swallowing any error.
+
+        Used when adapter.connect() failed or raised — the adapter may
+        have allocated partial resources (aiohttp.ClientSession, poll
+        tasks, child subprocesses) that would otherwise leak and surface
+        as "Unclosed client session" warnings at process exit.
+
+        Must tolerate partial-init state and never raise, since callers
+        use it inside error-handling blocks.
+        """
+        try:
+            await adapter.disconnect()
+        except Exception as e:
+            logger.debug(
+                "Defensive %s disconnect after failed connect raised: %s",
+                platform.value if platform is not None else "adapter",
+                e,
+            )
+
     # -----------------------------------------------------------------
 
     def _flush_memories_for_session(
@@ -1547,7 +1647,7 @@ class GatewayRunner:
         action = "restarting" if self._restart_requested else "shutting down"
         hint = (
             "Your current task will be interrupted. "
-            "Send any message after restart to resume where it left off."
+            "Send any message after restart and I'll try to resume where you left off."
             if self._restart_requested
             else "Your current task will be interrupted."
         )
@@ -1921,6 +2021,15 @@ class GatewayRunner:
                     logger.info("✓ %s connected", platform.value)
                 else:
                     logger.warning("✗ %s failed to connect", platform.value)
+                    # Defensive cleanup: a failed connect() may have
+                    # allocated resources (aiohttp.ClientSession, poll
+                    # tasks, bridge subprocesses) before giving up.
+                    # Without this call, those resources are orphaned
+                    # and Python logs "Unclosed client session" at
+                    # process exit. Adapter disconnect() implementations
+                    # are expected to be idempotent and tolerate
+                    # partial-init state.
+                    await self._safe_adapter_disconnect(adapter, platform)
                     if adapter.has_fatal_error:
                         self._update_platform_runtime_status(
                             platform.value,
@@ -1961,6 +2070,10 @@ class GatewayRunner:
                         }
             except Exception as e:
                 logger.error("✗ %s error: %s", platform.value, e)
+                # Same defensive cleanup path for exceptions — an adapter
+                # that raised mid-connect may still have a live
+                # aiohttp.ClientSession or child subprocess.
+                await self._safe_adapter_disconnect(adapter, platform)
                 self._update_platform_runtime_status(
                     platform.value,
                     platform_state="retrying",
@@ -2381,8 +2494,42 @@ class GatewayRunner:
                     timeout,
                     self._running_agent_count(),
                 )
+                # Mark forcibly-interrupted sessions as resume_pending BEFORE
+                # interrupting the agents.  This preserves each session's
+                # session_id + transcript so the next message on the same
+                # session_key auto-resumes from the existing conversation
+                # instead of getting routed through suspend_recently_active()
+                # and converted into a fresh session.  Terminal escalation
+                # for genuinely stuck sessions still flows through the
+                # existing ``.restart_failure_counts`` stuck-loop counter
+                # (incremented below, threshold 3), which sets
+                # ``suspended=True`` and overrides resume_pending.
+                #
+                # Iterate self._running_agents (current) rather than the
+                # drain-start ``active_agents`` snapshot — the snapshot
+                # may include sessions that finished gracefully during
+                # the drain window, and marking those falsely would give
+                # them a stray restart-interruption system note on their
+                # next turn even though their previous turn completed
+                # cleanly.  Skip pending sentinels for the same reason
+                # _interrupt_running_agents() does: their agent hasn't
+                # started yet, there's nothing to interrupt, and the
+                # session shouldn't carry a misleading resume flag.
+                _resume_reason = (
+                    "restart_timeout" if self._restart_requested else "shutdown_timeout"
+                )
+                for _sk, _agent in list(self._running_agents.items()):
+                    if _agent is _AGENT_PENDING_SENTINEL:
+                        continue
+                    try:
+                        self.session_store.mark_resume_pending(_sk, _resume_reason)
+                    except Exception as _e:
+                        logger.debug(
+                            "mark_resume_pending failed for %s: %s",
+                            _sk[:20], _e,
+                        )
                 self._interrupt_running_agents(
-                    "Gateway restarting" if self._restart_requested else "Gateway shutting down"
+                    _INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN
                 )
                 interrupt_deadline = asyncio.get_running_loop().time() + 5.0
                 while self._running_agents and asyncio.get_running_loop().time() < interrupt_deadline:
@@ -2953,6 +3100,10 @@ class GatewayRunner:
                     _quick_key[:30], _stale_age, _stale_idle,
                     _raw_stale_timeout, _stale_detail,
                 )
+                self._invalidate_session_run_generation(
+                    _quick_key,
+                    reason="stale_running_agent_eviction",
+                )
                 self._release_running_agent_state(_quick_key)
 
         if _quick_key in self._running_agents:
@@ -2961,8 +3112,8 @@ class GatewayRunner:
 
             # Resolve the command once for all early-intercept checks below.
             from hermes_cli.commands import (
+                ACTIVE_SESSION_BYPASS_COMMANDS as _DEDICATED_HANDLERS,
                 resolve_command as _resolve_cmd_inner,
-                should_bypass_active_session as _should_bypass_active_inner,
             )
             _evt_cmd = event.get_command()
             _cmd_def_inner = _resolve_cmd_inner(_evt_cmd) if _evt_cmd else None
@@ -2976,15 +3127,12 @@ class GatewayRunner:
             # _interrupt_requested.  Force-clean _running_agents so the session
             # is unlocked and subsequent messages are processed normally.
             if _cmd_def_inner and _cmd_def_inner.name == "stop":
-                running_agent = self._running_agents.get(_quick_key)
-                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
-                    running_agent.interrupt("Stop requested")
-                # Force-clean: remove the session lock regardless of agent state
-                adapter = self.adapters.get(source.platform)
-                if adapter and hasattr(adapter, 'get_pending_message'):
-                    adapter.get_pending_message(_quick_key)  # consume and discard
-                self._pending_messages.pop(_quick_key, None)
-                self._release_running_agent_state(_quick_key)
+                await self._interrupt_and_clear_session(
+                    _quick_key,
+                    source,
+                    interrupt_reason=_INTERRUPT_REASON_STOP,
+                    invalidation_reason="stop_command",
+                )
                 logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
                 return "⚡ Stopped. You can continue this session."
 
@@ -2996,17 +3144,15 @@ class GatewayRunner:
             # doesn't get re-processed as a user message after the
             # interrupt completes.
             if _cmd_def_inner and _cmd_def_inner.name == "new":
-                running_agent = self._running_agents.get(_quick_key)
-                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
-                    running_agent.interrupt("Session reset requested")
                 # Clear any pending messages so the old text doesn't replay
-                adapter = self.adapters.get(source.platform)
-                if adapter and hasattr(adapter, 'get_pending_message'):
-                    adapter.get_pending_message(_quick_key)  # consume and discard
-                self._pending_messages.pop(_quick_key, None)
+                await self._interrupt_and_clear_session(
+                    _quick_key,
+                    source,
+                    interrupt_reason=_INTERRUPT_REASON_RESET,
+                    invalidation_reason="new_command",
+                )
                 # Clean up the running agent entry so the reset handler
                 # doesn't think an agent is still active.
-                self._release_running_agent_state(_quick_key)
                 return await self._handle_reset_command(event)
 
             # /queue <prompt> — queue without interrupting
@@ -3027,6 +3173,54 @@ class GatewayRunner:
                     adapter._pending_messages[_quick_key] = queued_event
                 return "Queued for the next turn."
 
+            # /steer <prompt> — inject mid-run after the next tool call.
+            # Unlike /queue (turn boundary), /steer lands BETWEEN tool-call
+            # iterations inside the same agent run, by appending to the
+            # last tool result's content. No interrupt, no new user turn,
+            # no role-alternation violation.
+            if _cmd_def_inner and _cmd_def_inner.name == "steer":
+                steer_text = event.get_command_args().strip()
+                if not steer_text:
+                    return "Usage: /steer <prompt>"
+                running_agent = self._running_agents.get(_quick_key)
+                if running_agent is _AGENT_PENDING_SENTINEL:
+                    # Agent hasn't started yet — queue as turn-boundary fallback.
+                    adapter = self.adapters.get(source.platform)
+                    if adapter:
+                        from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
+                        queued_event = _ME(
+                            text=steer_text,
+                            message_type=_MT.TEXT,
+                            source=event.source,
+                            message_id=event.message_id,
+                            channel_prompt=event.channel_prompt,
+                        )
+                        adapter._pending_messages[_quick_key] = queued_event
+                    return "Agent still starting — /steer queued for the next turn."
+                if running_agent and hasattr(running_agent, "steer"):
+                    try:
+                        accepted = running_agent.steer(steer_text)
+                    except Exception as exc:
+                        logger.warning("Steer failed for session %s: %s", _quick_key[:20], exc)
+                        return f"⚠️ Steer failed: {exc}"
+                    if accepted:
+                        preview = steer_text[:60] + ("..." if len(steer_text) > 60 else "")
+                        return f"⏩ Steer queued — arrives after the next tool call: '{preview}'"
+                    return "Steer rejected (empty payload)."
+                # Running agent is missing or lacks steer() — fall back to queue.
+                adapter = self.adapters.get(source.platform)
+                if adapter:
+                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
+                    queued_event = _ME(
+                        text=steer_text,
+                        message_type=_MT.TEXT,
+                        source=event.source,
+                        message_id=event.message_id,
+                        channel_prompt=event.channel_prompt,
+                    )
+                    adapter._pending_messages[_quick_key] = queued_event
+                return "No active agent — /steer queued for the next turn."
+
             # /model must not be used while the agent is running.
             if _cmd_def_inner and _cmd_def_inner.name == "model":
                 return "Agent is running — wait or /stop first, then switch models."
@@ -3049,11 +3243,9 @@ class GatewayRunner:
             if _cmd_def_inner and _cmd_def_inner.name == "background":
                 return await self._handle_background_command(event)
 
-            # Gateway-handled info/control commands must never fall through to
-            # the interrupt path. If they are queued as pending text, the
-            # slash-command safety net discards them before the user sees any
-            # response.
-            if _cmd_def_inner and _should_bypass_active_inner(_cmd_def_inner.name):
+            # Gateway-handled info/control commands with dedicated
+            # running-agent handlers.
+            if _cmd_def_inner and _cmd_def_inner.name in _DEDICATED_HANDLERS:
                 if _cmd_def_inner.name == "help":
                     return await self._handle_help_command(event)
                 if _cmd_def_inner.name == "commands":
@@ -3063,6 +3255,21 @@ class GatewayRunner:
                 if _cmd_def_inner.name == "update":
                     return await self._handle_update_command(event)
 
+            # Catch-all: any other recognized slash command reached the
+            # running-agent guard. Reject gracefully rather than falling
+            # through to interrupt + discard. Without this, commands
+            # like /model, /reasoning, /voice, /insights, /title,
+            # /resume, /retry, /undo, /compress, /usage, /provider,
+            # /reload-mcp, /sethome, /reset (all registered as Discord
+            # slash commands) would interrupt the agent AND get
+            # silently discarded by the slash-command safety net,
+            # producing a zero-char response. See #5057, #6252, #10370.
+            if _cmd_def_inner:
+                return (
+                    f"⏳ Agent is running — `/{_cmd_def_inner.name}` can't run "
+                    f"mid-turn. Wait for the current response or `/stop` first."
+                )
+
             if event.message_type == MessageType.PHOTO:
                 logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
                 adapter = self.adapters.get(source.platform)
@@ -3268,6 +3475,21 @@ class GatewayRunner:
         if canonical == "btw":
             return await self._handle_btw_command(event)
 
+        if canonical == "steer":
+            # No active agent — /steer has no tool call to inject into.
+            # Strip the prefix so downstream treats it as a normal user
+            # message. If the payload is empty, surface the usage hint.
+            steer_payload = event.get_command_args().strip()
+            if not steer_payload:
+                return "Usage: /steer <prompt>  (no agent is running; sending as a normal message)"
+            try:
+                event.text = steer_payload
+            except Exception:
+                pass
+            # Do NOT return — fall through to _handle_message_with_agent
+            # at the end of this function so the rewritten text is sent
+            # to the agent as a regular user turn.
+
         if canonical == "voice":
             return await self._handle_voice_command(event)
 
@@ -3411,9 +3633,10 @@ class GatewayRunner:
         # same session — corrupting the transcript.
         self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL
         self._running_agents_ts[_quick_key] = time.time()
+        _run_generation = self._begin_session_run_generation(_quick_key)
 
         try:
-            return await self._handle_message_with_agent(event, source, _quick_key)
+            return await self._handle_message_with_agent(event, source, _quick_key, _run_generation)
         finally:
             # If _run_agent replaced the sentinel with a real agent and
             # then cleaned it up, this is a no-op.  If we exited early
@@ -3584,7 +3807,7 @@ class GatewayRunner:
 
         return message_text
 
-    async def _handle_message_with_agent(self, event, source, _quick_key: str):
+    async def _handle_message_with_agent(self, event, source, _quick_key: str, run_generation: int):
         """Inner handler that runs under the _running_agents sentinel guard."""
         _msg_start_time = time.time()
         _platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform)
@@ -4041,6 +4264,15 @@ class GatewayRunner:
         if message_text is None:
             return
 
+        # Bind this gateway run generation to the adapter's active-session
+        # event so deferred post-delivery callbacks can be released by the
+        # same run that registered them.
+        self._bind_adapter_run_generation(
+            self.adapters.get(source.platform),
+            session_key,
+            run_generation,
+        )
+
         try:
             # Emit agent:start hook
             hook_ctx = {
@@ -4059,6 +4291,7 @@ class GatewayRunner:
                 source=source,
                 session_id=session_entry.session_id,
                 session_key=session_key,
+                run_generation=run_generation,
                 event_message_id=event.message_id,
                 channel_prompt=event.channel_prompt,
             )
@@ -4071,6 +4304,22 @@ class GatewayRunner:
             except Exception:
                 pass
 
+            if not self._is_session_run_current(_quick_key, run_generation):
+                logger.info(
+                    "Discarding stale agent result for %s — generation %d is no longer current",
+                    _quick_key[:20] if _quick_key else "?",
+                    run_generation,
+                )
+                _stale_adapter = self.adapters.get(source.platform)
+                if getattr(type(_stale_adapter), "pop_post_delivery_callback", None) is not None:
+                    _stale_adapter.pop_post_delivery_callback(
+                        _quick_key,
+                        generation=run_generation,
+                    )
+                elif _stale_adapter and hasattr(_stale_adapter, "_post_delivery_callbacks"):
+                    _stale_adapter._post_delivery_callbacks.pop(_quick_key, None)
+                return None
+
             response = agent_result.get("final_response") or ""
 
             # Convert the agent's internal "(empty)" sentinel into a
@@ -4097,8 +4346,20 @@ class GatewayRunner:
             # Successful turn — clear any stuck-loop counter for this session.
             # This ensures the counter only accumulates across CONSECUTIVE
             # restarts where the session was active (never completed).
+            #
+            # Also clear the resume_pending flag (set by drain-timeout
+            # shutdown) — the turn ran to completion, so recovery
+            # succeeded and subsequent messages should no longer receive
+            # the restart-interruption system note.
             if session_key:
                 self._clear_restart_failure_count(session_key)
+                try:
+                    self.session_store.clear_resume_pending(session_key)
+                except Exception as _e:
+                    logger.debug(
+                        "clear_resume_pending failed for %s: %s",
+                        session_key[:20], _e,
+                    )
 
             # Surface error details when the agent failed silently (final_response=None)
             if not response and agent_result.get("failed"):
@@ -4473,6 +4734,7 @@ class GatewayRunner:
         
         # Get existing session key
         session_key = self._session_key_for_source(source)
+        self._invalidate_session_run_generation(session_key, reason="session_reset")
         
         # Flush memories in the background (fire-and-forget) so the user
         # gets the "Session reset!" response immediately.
@@ -4732,20 +4994,49 @@ class GatewayRunner:
         agent = self._running_agents.get(session_key)
         if agent is _AGENT_PENDING_SENTINEL:
             # Force-clean the sentinel so the session is unlocked.
-            self._release_running_agent_state(session_key)
+            await self._interrupt_and_clear_session(
+                session_key,
+                source,
+                interrupt_reason=_INTERRUPT_REASON_STOP,
+                invalidation_reason="stop_command_pending",
+            )
             logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
             return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
         if agent:
-            agent.interrupt("Stop requested")
             # Force-clean the session lock so a truly hung agent doesn't
             # keep it locked forever.
-            self._release_running_agent_state(session_key)
+            await self._interrupt_and_clear_session(
+                session_key,
+                source,
+                interrupt_reason=_INTERRUPT_REASON_STOP,
+                invalidation_reason="stop_command_handler",
+            )
             return "⚡ Stopped. You can continue this session."
         else:
             return "No active task to stop."
 
     async def _handle_restart_command(self, event: MessageEvent) -> str:
         """Handle /restart command - drain active work, then restart the gateway."""
+        # Defensive idempotency check: if the previous gateway process
+        # recorded this same /restart (same platform + update_id) and the new
+        # process is seeing it *again*, this is a re-delivery caused by PTB's
+        # graceful-shutdown `get_updates` ACK failing on the way out ("Error
+        # while calling `get_updates` one more time to mark all fetched
+        # updates. Suppressing error to ensure graceful shutdown. When
+        # polling for updates is restarted, updates may be received twice."
+        # in gateway.log).  Ignoring the stale redelivery prevents a
+        # self-perpetuating restart loop where every fresh gateway
+        # re-processes the same /restart command and immediately restarts
+        # again.
+        if self._is_stale_restart_redelivery(event):
+            logger.info(
+                "Ignoring redelivered /restart (platform=%s, update_id=%s) — "
+                "already processed by a previous gateway instance.",
+                event.source.platform.value if event.source and event.source.platform else "?",
+                event.platform_update_id,
+            )
+            return ""
+
         if self._restart_requested or self._draining:
             count = self._running_agent_count()
             if count:
@@ -4768,6 +5059,26 @@ class GatewayRunner:
         except Exception as e:
             logger.debug("Failed to write restart notify file: %s", e)
 
+        # Record the triggering platform + update_id in a dedicated dedup
+        # marker.  Unlike .restart_notify.json (which gets unlinked once the
+        # new gateway sends the "gateway restarted" notification), this
+        # marker persists so the new gateway can still detect a delayed
+        # /restart redelivery from Telegram.  Overwritten on every /restart.
+        try:
+            import json as _json
+            import time as _time
+            dedup_data = {
+                "platform": event.source.platform.value if event.source.platform else None,
+                "requested_at": _time.time(),
+            }
+            if event.platform_update_id is not None:
+                dedup_data["update_id"] = event.platform_update_id
+            (_hermes_home / ".restart_last_processed.json").write_text(
+                _json.dumps(dedup_data)
+            )
+        except Exception as e:
+            logger.debug("Failed to write restart dedup marker: %s", e)
+
         active_agents = self._running_agent_count()
         # When running under a service manager (systemd/launchd), use the
         # service restart path: exit with code 75 so the service manager
@@ -4783,6 +5094,58 @@ class GatewayRunner:
             return f"⏳ Draining {active_agents} active agent(s) before restart..."
         return "♻ Restarting gateway. If you aren't notified within 60 seconds, restart from the console with `hermes gateway restart`."
 
+    def _is_stale_restart_redelivery(self, event: MessageEvent) -> bool:
+        """Return True if this /restart is a Telegram re-delivery we already handled.
+
+        The previous gateway wrote ``.restart_last_processed.json`` with the
+        triggering platform + update_id when it processed the /restart.  If
+        we now see a /restart on the same platform with an update_id <= that
+        recorded value AND the marker is recent (< 5 minutes), it's a
+        redelivery and should be ignored.
+
+        Only applies to Telegram today (the only platform that exposes a
+        numeric cross-session update ordering); other platforms return False.
+        """
+        if event is None or event.source is None:
+            return False
+        if event.platform_update_id is None:
+            return False
+        if event.source.platform is None:
+            return False
+        # Only Telegram populates platform_update_id currently; be explicit
+        # so future platforms aren't accidentally gated by this check.
+        try:
+            platform_value = event.source.platform.value
+        except Exception:
+            return False
+        if platform_value != "telegram":
+            return False
+
+        try:
+            import json as _json
+            import time as _time
+            marker_path = _hermes_home / ".restart_last_processed.json"
+            if not marker_path.exists():
+                return False
+            data = _json.loads(marker_path.read_text())
+        except Exception:
+            return False
+
+        if data.get("platform") != platform_value:
+            return False
+        recorded_uid = data.get("update_id")
+        if not isinstance(recorded_uid, int):
+            return False
+        # Staleness guard: ignore markers older than 5 minutes.  A legitimately
+        # old marker (e.g. crash recovery where notify never fired) should not
+        # swallow a fresh /restart from the user.
+        requested_at = data.get("requested_at")
+        if isinstance(requested_at, (int, float)):
+            if _time.time() - requested_at > 300:
+                return False
+        return event.platform_update_id <= recorded_uid
+
+
     async def _handle_help_command(self, event: MessageEvent) -> str:
         """Handle /help command - list available commands."""
         from hermes_cli.commands import gateway_help_lines
@@ -5528,8 +5891,7 @@ class GatewayRunner:
             if "pynacl" in err_lower or "nacl" in err_lower or "davey" in err_lower:
                 return (
                     "Voice dependencies are missing (PyNaCl / davey). "
-                    "Install or reinstall Hermes with the messaging extra, e.g. "
-                    "`pip install hermes-agent[messaging]`."
+                    f"Install with: `{sys.executable} -m pip install PyNaCl`"
                 )
             return f"Failed to join voice channel: {e}"
 
@@ -8096,6 +8458,84 @@ class GatewayRunner:
         if hasattr(self, "_busy_ack_ts"):
             self._busy_ack_ts.pop(session_key, None)
 
+    def _begin_session_run_generation(self, session_key: str) -> int:
+        """Claim a fresh run generation token for ``session_key``.
+
+        Every top-level gateway turn gets a monotonically increasing token.
+        If a later command like /stop or /new invalidates that token while the
+        old worker is still unwinding, the late result can be recognized and
+        dropped instead of bleeding into the fresh session.
+        """
+        if not session_key:
+            return 0
+        generations = self.__dict__.get("_session_run_generation")
+        if generations is None:
+            generations = {}
+            self._session_run_generation = generations
+        next_generation = int(generations.get(session_key, 0)) + 1
+        generations[session_key] = next_generation
+        return next_generation
+
+    def _invalidate_session_run_generation(self, session_key: str, *, reason: str = "") -> int:
+        """Invalidate any in-flight run token for ``session_key``."""
+        generation = self._begin_session_run_generation(session_key)
+        if reason:
+            logger.info(
+                "Invalidated run generation for %s → %d (%s)",
+                session_key[:20],
+                generation,
+                reason,
+            )
+        return generation
+
+    def _is_session_run_current(self, session_key: str, generation: int) -> bool:
+        """Return True when ``generation`` is still current for ``session_key``."""
+        if not session_key:
+            return True
+        generations = self.__dict__.get("_session_run_generation") or {}
+        return int(generations.get(session_key, 0)) == int(generation)
+
+    def _bind_adapter_run_generation(
+        self,
+        adapter: Any,
+        session_key: str,
+        generation: int | None,
+    ) -> None:
+        """Bind a gateway run generation to the adapter's active-session event."""
+        if not adapter or not session_key or generation is None:
+            return
+        try:
+            interrupt_event = getattr(adapter, "_active_sessions", {}).get(session_key)
+            if interrupt_event is not None:
+                setattr(interrupt_event, "_hermes_run_generation", int(generation))
+        except Exception:
+            pass
+
+    async def _interrupt_and_clear_session(
+        self,
+        session_key: str,
+        source: SessionSource,
+        *,
+        interrupt_reason: str,
+        invalidation_reason: str,
+        release_running_state: bool = True,
+    ) -> None:
+        """Interrupt the current run and clear queued session state consistently."""
+        if not session_key:
+            return
+        running_agent = self._running_agents.get(session_key)
+        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+            running_agent.interrupt(interrupt_reason)
+        self._invalidate_session_run_generation(session_key, reason=invalidation_reason)
+        adapter = self.adapters.get(source.platform)
+        if adapter and hasattr(adapter, "interrupt_session_activity"):
+            await adapter.interrupt_session_activity(session_key, source.chat_id)
+        if adapter and hasattr(adapter, "get_pending_message"):
+            adapter.get_pending_message(session_key)  # consume and discard
+        self._pending_messages.pop(session_key, None)
+        if release_running_state:
+            self._release_running_agent_state(session_key)
+
     def _evict_cached_agent(self, session_key: str) -> None:
         """Remove a cached agent for a session (called on /new, /model, etc)."""
         _lock = getattr(self, "_agent_cache_lock", None)
@@ -8277,6 +8717,7 @@ class GatewayRunner:
         source: "SessionSource",
         session_id: str,
         session_key: str = None,
+        run_generation: Optional[int] = None,
         event_message_id: Optional[str] = None,
     ) -> Dict[str, Any]:
         """Forward the message to a remote Hermes API server instead of
@@ -8312,6 +8753,11 @@ class GatewayRunner:
 
         proxy_key = os.getenv("GATEWAY_PROXY_KEY", "").strip()
 
+        def _run_still_current() -> bool:
+            if run_generation is None or not session_key:
+                return True
+            return self._is_session_run_current(session_key, run_generation)
+
         # Build messages in OpenAI chat format --------------------------
         #
         # The remote api_server can maintain session continuity via
@@ -8441,6 +8887,21 @@ class GatewayRunner:
                     # Parse SSE stream
                     buffer = ""
                     async for chunk in resp.content.iter_any():
+                        if not _run_still_current():
+                            logger.info(
+                                "Discarding stale proxy stream for %s — generation %d is no longer current",
+                                session_key[:20] if session_key else "?",
+                                run_generation or 0,
+                            )
+                            return {
+                                "final_response": "",
+                                "messages": [],
+                                "api_calls": 0,
+                                "tools": [],
+                                "history_offset": len(history),
+                                "session_id": session_id,
+                                "response_previewed": False,
+                            }
                         text = chunk.decode("utf-8", errors="replace")
                         buffer += text
 
@@ -8490,6 +8951,21 @@ class GatewayRunner:
                     stream_task.cancel()
 
         _elapsed = time.time() - _start
+        if not _run_still_current():
+            logger.info(
+                "Discarding stale proxy result for %s — generation %d is no longer current",
+                session_key[:20] if session_key else "?",
+                run_generation or 0,
+            )
+            return {
+                "final_response": "",
+                "messages": [],
+                "api_calls": 0,
+                "tools": [],
+                "history_offset": len(history),
+                "session_id": session_id,
+                "response_previewed": False,
+            }
         logger.info(
             "proxy response: url=%s session=%s time=%.1fs response=%d chars",
             proxy_url, (session_id or "")[:20], _elapsed, len(full_response),
@@ -8518,6 +8994,7 @@ class GatewayRunner:
         source: SessionSource,
         session_id: str,
         session_key: str = None,
+        run_generation: Optional[int] = None,
         _interrupt_depth: int = 0,
         event_message_id: Optional[str] = None,
         channel_prompt: Optional[str] = None,
@@ -8543,11 +9020,17 @@ class GatewayRunner:
                 source=source,
                 session_id=session_id,
                 session_key=session_key,
+                run_generation=run_generation,
                 event_message_id=event_message_id,
             )
 
         from run_agent import AIAgent
         import queue
+
+        def _run_still_current() -> bool:
+            if run_generation is None or not session_key:
+                return True
+            return self._is_session_run_current(session_key, run_generation)
         
         user_config = _load_gateway_config()
         platform_key = _platform_config_key(source.platform)
@@ -8602,7 +9085,7 @@ class GatewayRunner:
         
         def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs):
             """Callback invoked by agent on tool lifecycle events."""
-            if not progress_queue:
+            if not progress_queue or not _run_still_current():
                 return
 
             # Only act on tool.started events (ignore tool.completed, reasoning.available, etc.)
@@ -8707,6 +9190,14 @@ class GatewayRunner:
 
             while True:
                 try:
+                    if not _run_still_current():
+                        while not progress_queue.empty():
+                            try:
+                                progress_queue.get_nowait()
+                            except Exception:
+                                break
+                        return
+
                     raw = progress_queue.get_nowait()
 
                     # Handle dedup messages: update last line with repeat counter
@@ -8732,6 +9223,9 @@ class GatewayRunner:
                         await asyncio.sleep(_remaining)
                         continue
 
+                    if not _run_still_current():
+                        return
+
                     if can_edit and progress_msg_id is not None:
                         # Try to edit the existing progress message
                         full_text = "\n".join(progress_lines)
@@ -8767,7 +9261,8 @@ class GatewayRunner:
 
                     # Restore typing indicator
                     await asyncio.sleep(0.3)
-                    await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
+                    if _run_still_current():
+                        await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
 
                 except queue.Empty:
                     await asyncio.sleep(0.3)
@@ -8811,6 +9306,8 @@ class GatewayRunner:
         _hooks_ref = self.hooks
 
         def _step_callback_sync(iteration: int, prev_tools: list) -> None:
+            if not _run_still_current():
+                return
             try:
                 # prev_tools may be list[str] or list[dict] with "name"/"result"
                 # keys.  Normalise to keep "tool_names" backward-compatible for
@@ -8841,7 +9338,7 @@ class GatewayRunner:
         _status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None
 
         def _status_callback_sync(event_type: str, message: str) -> None:
-            if not _status_adapter:
+            if not _status_adapter or not _run_still_current():
                 return
             try:
                 asyncio.run_coroutine_threadsafe(
@@ -8972,12 +9469,16 @@ class GatewayRunner:
                             metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None,
                         )
                         if _want_stream_deltas:
-                            _stream_delta_cb = _stream_consumer.on_delta
+                            def _stream_delta_cb(text: str) -> None:
+                                if _run_still_current():
+                                    _stream_consumer.on_delta(text)
                         stream_consumer_holder[0] = _stream_consumer
                 except Exception as _sc_err:
                     logger.debug("Could not set up stream consumer: %s", _sc_err)
 
             def _interim_assistant_cb(text: str, *, already_streamed: bool = False) -> None:
+                if not _run_still_current():
+                    return
                 if _stream_consumer is not None:
                     if already_streamed:
                         _stream_consumer.on_segment_break()
@@ -9081,7 +9582,7 @@ class GatewayRunner:
             _bg_review_pending_lock = threading.Lock()
 
             def _deliver_bg_review_message(message: str) -> None:
-                if not _status_adapter:
+                if not _status_adapter or not _run_still_current():
                     return
                 try:
                     asyncio.run_coroutine_threadsafe(
@@ -9105,7 +9606,7 @@ class GatewayRunner:
 
             # Background review delivery — send "💾 Memory updated" etc. to user
             def _bg_review_send(message: str) -> None:
-                if not _status_adapter:
+                if not _status_adapter or not _run_still_current():
                     return
                 if not _bg_review_release.is_set():
                     with _bg_review_pending_lock:
@@ -9118,9 +9619,16 @@ class GatewayRunner:
             # Register the release hook on the adapter so base.py's finally
             # block can fire it after delivering the main response.
             if _status_adapter and session_key:
-                _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
-                if _pdc is not None:
-                    _pdc[session_key] = _release_bg_review_messages
+                if getattr(type(_status_adapter), "register_post_delivery_callback", None) is not None:
+                    _status_adapter.register_post_delivery_callback(
+                        session_key,
+                        _release_bg_review_messages,
+                        generation=run_generation,
+                    )
+                else:
+                    _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
+                    if _pdc is not None:
+                        _pdc[session_key] = _release_bg_review_messages
 
             # Store agent reference for interrupt support
             agent_holder[0] = agent
@@ -9282,7 +9790,40 @@ class GatewayRunner:
             # restart, crash, SIGTERM).  Prepend a system note so the model
             # finishes processing the pending tool results before addressing
             # the user's new message.  (#4493)
-            if agent_history and agent_history[-1].get("role") == "tool":
+            #
+            # Session-level resume_pending (set on drain-timeout shutdown)
+            # escalates the wording — the transcript's last role may be
+            # anything (tool, assistant with unfinished work, etc.), so we
+            # give a stronger, reason-aware instruction that subsumes the
+            # tool-tail case.
+            _resume_entry = None
+            if session_key:
+                try:
+                    _resume_entry = self.session_store._entries.get(session_key)
+                except Exception:
+                    _resume_entry = None
+            _is_resume_pending = bool(
+                _resume_entry is not None and getattr(_resume_entry, "resume_pending", False)
+            )
+
+            if _is_resume_pending:
+                _reason = getattr(_resume_entry, "resume_reason", None) or "restart_timeout"
+                _reason_phrase = (
+                    "a gateway restart"
+                    if _reason == "restart_timeout"
+                    else "a gateway shutdown"
+                    if _reason == "shutdown_timeout"
+                    else "a gateway interruption"
+                )
+                message = (
+                    f"[System note: Your previous turn in this session was interrupted "
+                    f"by {_reason_phrase}. The conversation history below is intact. "
+                    f"If it contains unfinished tool result(s), process them first and "
+                    f"summarize what was accomplished, then address the user's new "
+                    f"message below.]\n\n"
+                    + message
+                )
+            elif agent_history and agent_history[-1].get("role") == "tool":
                 message = (
                     "[System note: Your previous turn was interrupted before you could "
                     "process the last tool result(s). The conversation history contains "
@@ -9689,7 +10230,7 @@ class GatewayRunner:
                 # Interrupt the agent if it's still running so the thread
                 # pool worker is freed.
                 if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"):
-                    _timed_out_agent.interrupt("Execution timed out (inactivity)")
+                    _timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT)
 
                 _timeout_mins = int(_agent_timeout // 60) or 1
 
@@ -9754,7 +10295,15 @@ class GatewayRunner:
             if result and adapter and session_key:
                 pending_event = _dequeue_pending_event(adapter, session_key)
                 if result.get("interrupted") and not pending_event and result.get("interrupt_message"):
-                    pending = result.get("interrupt_message")
+                    interrupt_message = result.get("interrupt_message")
+                    if _is_control_interrupt_message(interrupt_message):
+                        logger.info(
+                            "Ignoring control interrupt message for session %s: %s",
+                            session_key[:20] if session_key else "?",
+                            interrupt_message,
+                        )
+                    else:
+                        pending = interrupt_message
                 elif pending_event:
                     pending = pending_event.text or _build_media_placeholder(pending_event)
                     logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
@@ -9859,7 +10408,17 @@ class GatewayRunner:
                     # first response has been delivered.  Pop from the
                     # adapter's callback dict (prevents double-fire in
                     # base.py's finally block) and call it.
-                    if adapter and hasattr(adapter, "_post_delivery_callbacks"):
+                    if getattr(type(adapter), "pop_post_delivery_callback", None) is not None:
+                        _bg_cb = adapter.pop_post_delivery_callback(
+                            session_key,
+                            generation=run_generation,
+                        )
+                        if callable(_bg_cb):
+                            try:
+                                _bg_cb()
+                            except Exception:
+                                pass
+                    elif adapter and hasattr(adapter, "_post_delivery_callbacks"):
                         _bg_cb = adapter._post_delivery_callbacks.pop(session_key, None)
                         if callable(_bg_cb):
                             try:
@@ -9907,6 +10466,7 @@ class GatewayRunner:
                     source=next_source,
                     session_id=session_id,
                     session_key=session_key,
+                    run_generation=run_generation,
                     _interrupt_depth=_interrupt_depth + 1,
                     event_message_id=next_message_id,
                     channel_prompt=next_channel_prompt,
diff --git a/gateway/session.py b/gateway/session.py
index 4cb623128c7..8b31c2b0aa2 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -377,7 +377,19 @@ class SessionEntry:
     # this session (create a new session_id) so the user starts fresh.
     # Set by /stop to break stuck-resume loops (#7536).
     suspended: bool = False
-    
+
+    # When True the session was interrupted by a gateway restart/shutdown
+    # drain timeout, but recovery is still expected.  Unlike ``suspended``,
+    # ``resume_pending`` preserves the existing session_id on next access —
+    # the user stays on the same transcript and the agent auto-continues
+    # from where it left off.  Cleared after the next successful turn.
+    # Escalation to ``suspended`` is handled by the existing
+    # ``.restart_failure_counts`` stuck-loop counter (#7536), not by a
+    # parallel counter on this entry.
+    resume_pending: bool = False
+    resume_reason: Optional[str] = None  # e.g. "restart_timeout"
+    last_resume_marked_at: Optional[datetime] = None
+
     def to_dict(self) -> Dict[str, Any]:
         result = {
             "session_key": self.session_key,
@@ -397,6 +409,13 @@ class SessionEntry:
             "cost_status": self.cost_status,
             "memory_flushed": self.memory_flushed,
             "suspended": self.suspended,
+            "resume_pending": self.resume_pending,
+            "resume_reason": self.resume_reason,
+            "last_resume_marked_at": (
+                self.last_resume_marked_at.isoformat()
+                if self.last_resume_marked_at
+                else None
+            ),
         }
         if self.origin:
             result["origin"] = self.origin.to_dict()
@@ -414,7 +433,15 @@ class SessionEntry:
                 platform = Platform(data["platform"])
             except ValueError as e:
                 logger.debug("Unknown platform value %r: %s", data["platform"], e)
-        
+
+        last_resume_marked_at = None
+        _lrma = data.get("last_resume_marked_at")
+        if _lrma:
+            try:
+                last_resume_marked_at = datetime.fromisoformat(_lrma)
+            except (TypeError, ValueError):
+                last_resume_marked_at = None
+
         return cls(
             session_key=data["session_key"],
             session_id=data["session_id"],
@@ -434,6 +461,9 @@ class SessionEntry:
             cost_status=data.get("cost_status", "unknown"),
             memory_flushed=data.get("memory_flushed", False),
             suspended=data.get("suspended", False),
+            resume_pending=data.get("resume_pending", False),
+            resume_reason=data.get("resume_reason"),
+            last_resume_marked_at=last_resume_marked_at,
         )
 
 
@@ -710,9 +740,23 @@ class SessionStore:
                 entry = self._entries[session_key]
 
                 # Auto-reset sessions marked as suspended (e.g. after /stop
-                # broke a stuck loop — #7536).
+                # broke a stuck loop — #7536).  ``suspended`` is the hard
+                # forced-wipe signal and always wins over ``resume_pending``,
+                # so repeated interrupted restarts that escalate via the
+                # existing ``.restart_failure_counts`` stuck-loop counter
+                # still converge to a clean slate.
                 if entry.suspended:
                     reset_reason = "suspended"
+                elif entry.resume_pending:
+                    # Restart-interrupted session: preserve the session_id
+                    # and return the existing entry so the transcript
+                    # reloads intact.  ``resume_pending`` is cleared after
+                    # the NEXT successful turn completes (not here), which
+                    # means a re-interrupted retry keeps trying — the
+                    # stuck-loop counter handles terminal escalation.
+                    entry.updated_at = now
+                    self._save()
+                    return entry
                 else:
                     reset_reason = self._should_reset(entry, source)
                 if not reset_reason:
@@ -802,6 +846,55 @@ class SessionStore:
                 return True
         return False
 
+    def mark_resume_pending(
+        self,
+        session_key: str,
+        reason: str = "restart_timeout",
+    ) -> bool:
+        """Mark a session as resumable after a restart interruption.
+
+        Unlike ``suspend_session()``, this preserves the existing
+        ``session_id`` and the transcript.  The next call to
+        ``get_or_create_session()`` for this key returns the same entry
+        so the user auto-resumes on the same conversation lane.
+
+        Returns True if the session existed and was marked.
+        """
+        with self._lock:
+            self._ensure_loaded_locked()
+            if session_key in self._entries:
+                entry = self._entries[session_key]
+                # Never override an explicit ``suspended`` — that is a hard
+                # forced-wipe signal (from /stop or stuck-loop escalation).
+                if entry.suspended:
+                    return False
+                entry.resume_pending = True
+                entry.resume_reason = reason
+                entry.last_resume_marked_at = _now()
+                self._save()
+                return True
+        return False
+
+    def clear_resume_pending(self, session_key: str) -> bool:
+        """Clear the resume-pending flag after a successful resumed turn.
+
+        Called from the gateway after ``run_conversation()`` returns a
+        final response for a session that had ``resume_pending=True``,
+        signalling that recovery succeeded.
+
+        Returns True if a flag was cleared.
+        """
+        with self._lock:
+            self._ensure_loaded_locked()
+            entry = self._entries.get(session_key)
+            if entry is None or not entry.resume_pending:
+                return False
+            entry.resume_pending = False
+            entry.resume_reason = None
+            entry.last_resume_marked_at = None
+            self._save()
+            return True
+
     def prune_old_entries(self, max_age_days: int) -> int:
         """Drop SessionEntry records older than max_age_days.
 
@@ -861,6 +954,12 @@ class SessionStore:
         (#7536).  Only suspends sessions updated within *max_age_seconds*
         to avoid resetting long-idle sessions that are harmless to resume.
         Returns the number of sessions that were suspended.
+
+        Entries flagged ``resume_pending=True`` are skipped — those were
+        marked intentionally by the drain-timeout path as recoverable.
+        Terminal escalation for genuinely stuck ``resume_pending`` sessions
+        is handled by the existing ``.restart_failure_counts`` stuck-loop
+        counter, which runs after this method on startup.
         """
         from datetime import timedelta
 
@@ -869,6 +968,8 @@ class SessionStore:
         with self._lock:
             self._ensure_loaded_locked()
             for entry in self._entries.values():
+                if entry.resume_pending:
+                    continue
                 if not entry.suspended and entry.updated_at >= cutoff:
                     entry.suspended = True
                     count += 1
diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py
index ae00aee392b..78e365712d9 100644
--- a/gateway/stream_consumer.py
+++ b/gateway/stream_consumer.py
@@ -430,6 +430,21 @@ class GatewayStreamConsumer:
                 # a real string like "msg_1", not "__no_edit__", so that case
                 # still resets and creates a fresh segment as intended.)
                 if got_segment_break:
+                    # If the segment-break edit failed to deliver the
+                    # accumulated content (flood control that has not yet
+                    # promoted to fallback mode, or fallback mode itself),
+                    # _accumulated still holds pre-boundary text the user
+                    # never saw. Flush that tail as a continuation message
+                    # before the reset below wipes _accumulated — otherwise
+                    # text generated before the tool boundary is silently
+                    # dropped (issue #8124).
+                    if (
+                        self._accumulated
+                        and not current_update_visible
+                        and self._message_id
+                        and self._message_id != "__no_edit__"
+                    ):
+                        await self._flush_segment_tail_on_edit_failure()
                     self._reset_segment_state(preserve_no_edit=True)
 
                 await asyncio.sleep(0.05)  # Small yield to not busy-loop
@@ -556,6 +571,30 @@ class GatewayStreamConsumer:
             if final_text.strip() and final_text != self._visible_prefix():
                 continuation = final_text
             else:
+                # Defence-in-depth for #7183: the last edit may still show the
+                # cursor character because fallback mode was entered after an
+                # edit failure left it stuck.  Try one final edit to strip it
+                # so the message doesn't freeze with a visible ▉.  Best-effort
+                # — if this edit also fails (flood control still active),
+                # _try_strip_cursor has already been called on fallback entry
+                # and the adaptive-backoff retries will have had their shot.
+                if (
+                    self._message_id
+                    and self._last_sent_text
+                    and self.cfg.cursor
+                    and self._last_sent_text.endswith(self.cfg.cursor)
+                ):
+                    clean_text = self._last_sent_text[:-len(self.cfg.cursor)]
+                    try:
+                        result = await self.adapter.edit_message(
+                            chat_id=self.chat_id,
+                            message_id=self._message_id,
+                            content=clean_text,
+                        )
+                        if result.success:
+                            self._last_sent_text = clean_text
+                    except Exception:
+                        pass
                 self._already_sent = True
                 self._final_response_sent = True
                 return
@@ -620,6 +659,39 @@ class GatewayStreamConsumer:
         err_lower = err.lower()
         return "flood" in err_lower or "retry after" in err_lower or "rate" in err_lower
 
+    async def _flush_segment_tail_on_edit_failure(self) -> None:
+        """Deliver un-sent tail content before a segment-break reset.
+
+        When an edit fails (flood control, transport error) and a tool
+        boundary arrives before the next retry, ``_accumulated`` holds text
+        that was generated but never shown to the user. Without this flush,
+        the segment reset would discard that tail and leave a frozen cursor
+        in the partial message.
+
+        Sends the tail that sits after the last successfully-delivered
+        prefix as a new message, and best-effort strips the stuck cursor
+        from the previous partial message.
+        """
+        if not self._fallback_final_send:
+            await self._try_strip_cursor()
+        visible = self._fallback_prefix or self._visible_prefix()
+        tail = self._accumulated
+        if visible and tail.startswith(visible):
+            tail = tail[len(visible):].lstrip()
+        tail = self._clean_for_display(tail)
+        if not tail.strip():
+            return
+        try:
+            result = await self.adapter.send(
+                chat_id=self.chat_id,
+                content=tail,
+                metadata=self.metadata,
+            )
+            if result.success:
+                self._already_sent = True
+        except Exception as e:
+            logger.error("Segment-break tail flush error: %s", e)
+
     async def _try_strip_cursor(self) -> None:
         """Best-effort edit to remove the cursor from the last visible message.
 
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 831f81bf266..4623147a5a5 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -1434,49 +1434,6 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]:
     }
 
 
-def _write_codex_cli_tokens(
-    access_token: str,
-    refresh_token: str,
-    *,
-    last_refresh: Optional[str] = None,
-) -> None:
-    """Write refreshed tokens back to ~/.codex/auth.json.
-
-    OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
-    When Hermes refreshes a token it consumes the old refresh_token; if we
-    don't write the new pair back, the Codex CLI (or VS Code extension) will
-    fail with ``refresh_token_reused`` on its next refresh attempt.
-
-    This mirrors the Anthropic write-back to ~/.claude/.credentials.json
-    via ``_write_claude_code_credentials()``.
-    """
-    codex_home = os.getenv("CODEX_HOME", "").strip()
-    if not codex_home:
-        codex_home = str(Path.home() / ".codex")
-    auth_path = Path(codex_home).expanduser() / "auth.json"
-    try:
-        existing: Dict[str, Any] = {}
-        if auth_path.is_file():
-            existing = json.loads(auth_path.read_text(encoding="utf-8"))
-        if not isinstance(existing, dict):
-            existing = {}
-
-        tokens_dict = existing.get("tokens")
-        if not isinstance(tokens_dict, dict):
-            tokens_dict = {}
-        tokens_dict["access_token"] = access_token
-        tokens_dict["refresh_token"] = refresh_token
-        existing["tokens"] = tokens_dict
-        if last_refresh is not None:
-            existing["last_refresh"] = last_refresh
-
-        auth_path.parent.mkdir(parents=True, exist_ok=True)
-        auth_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
-        auth_path.chmod(0o600)
-    except (OSError, IOError) as exc:
-        logger.debug("Failed to write refreshed tokens to %s: %s", auth_path, exc)
-
-
 def _save_codex_tokens(tokens: Dict[str, str], last_refresh: str = None) -> None:
     """Save Codex OAuth tokens to Hermes auth store (~/.hermes/auth.json)."""
     if last_refresh is None:
@@ -1544,6 +1501,11 @@ def refresh_codex_oauth_pure(
                 "then run `hermes auth` to re-authenticate."
             )
             relogin_required = True
+        # A 401/403 from the token endpoint always means the refresh token
+        # is invalid/expired — force relogin even if the body error code
+        # wasn't one of the known strings above.
+        if response.status_code in (401, 403) and not relogin_required:
+            relogin_required = True
         raise AuthError(
             message,
             provider="openai-codex",
@@ -1599,12 +1561,6 @@ def _refresh_codex_auth_tokens(
     updated_tokens["refresh_token"] = refreshed["refresh_token"]
 
     _save_codex_tokens(updated_tokens)
-    # Write back to ~/.codex/auth.json so Codex CLI / VS Code stay in sync.
-    _write_codex_cli_tokens(
-        refreshed["access_token"],
-        refreshed["refresh_token"],
-        last_refresh=refreshed.get("last_refresh"),
-    )
     return updated_tokens
 
 
@@ -1649,25 +1605,7 @@ def resolve_codex_runtime_credentials(
     refresh_skew_seconds: int = CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
 ) -> Dict[str, Any]:
     """Resolve runtime credentials from Hermes's own Codex token store."""
-    try:
-        data = _read_codex_tokens()
-    except AuthError as orig_err:
-        # Only attempt migration when there are NO tokens stored at all
-        # (code == "codex_auth_missing"), not when tokens exist but are invalid.
-        if orig_err.code != "codex_auth_missing":
-            raise
-
-        # Migration: user had Codex as active provider with old storage (~/.codex/).
-        cli_tokens = _import_codex_cli_tokens()
-        if cli_tokens:
-            logger.info("Migrating Codex credentials from ~/.codex/ to Hermes auth store")
-            print("⚠️  Migrating Codex credentials to Hermes's own auth store.")
-            print("   This avoids conflicts with Codex CLI and VS Code.")
-            print("   Run `hermes auth` to create a fully independent session.\n")
-            _save_codex_tokens(cli_tokens)
-            data = _read_codex_tokens()
-        else:
-            raise
+    data = _read_codex_tokens()
     tokens = dict(data["tokens"])
     access_token = str(tokens.get("access_token", "") or "").strip()
     refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20"))
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index ce257b0d7cb..f753d6f3a73 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -91,6 +91,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
                aliases=("tasks",)),
     CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
                aliases=("q",), args_hint="<prompt>"),
+    CommandDef("steer", "Inject a message after the next tool call without interrupting", "Session",
+               args_hint="<prompt>"),
     CommandDef("status", "Show session info", "Session"),
     CommandDef("profile", "Show active profile name and home directory", "Info"),
     CommandDef("sethome", "Set this chat as the home channel", "Session",
@@ -258,10 +260,10 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
 )
 
 
-# Commands that must never be queued behind an active gateway session.
-# These are explicit control/info commands handled by the gateway itself;
-# if they get queued as pending text, the safety net in gateway.run will
-# discard them before they ever reach the user.
+# Commands with explicit Level-2 running-agent handlers in gateway/run.py.
+# Listed here for introspection / tests; semantically a subset of
+# "all resolvable commands" — which is the real bypass set (see
+# should_bypass_active_session below).
 ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
     {
         "agents",
@@ -275,6 +277,7 @@ ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
         "queue",
         "restart",
         "status",
+        "steer",
         "stop",
         "update",
     }
@@ -282,9 +285,26 @@ ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
 
 
 def should_bypass_active_session(command_name: str | None) -> bool:
-    """Return True when a slash command must bypass active-session queuing."""
-    cmd = resolve_command(command_name) if command_name else None
-    return bool(cmd and cmd.name in ACTIVE_SESSION_BYPASS_COMMANDS)
+    """Return True for any resolvable slash command.
+
+    Rationale: every gateway-registered slash command either has a
+    specific Level-2 handler in gateway/run.py (/stop, /new, /model,
+    /approve, etc.) or reaches the running-agent catch-all that returns
+    a "busy — wait or /stop first" response. In both paths the command
+    is dispatched, not queued.
+
+    Queueing is always wrong for a recognized slash command because the
+    safety net in gateway.run discards any command text that reaches
+    the pending queue — which meant a mid-run /model (or /reasoning,
+    /voice, /insights, /title, /resume, /retry, /undo, /compress,
+    /usage, /provider, /reload-mcp, /sethome, /reset) would silently
+    interrupt the agent AND get discarded, producing a zero-char
+    response. See issue #5057 / PRs #6252, #10370, #4665.
+
+    ACTIVE_SESSION_BYPASS_COMMANDS remains the subset of commands with
+    explicit Level-2 handlers; the rest fall through to the catch-all.
+    """
+    return resolve_command(command_name) is not None if command_name else False
 
 
 def _resolve_config_gates() -> set[str]:
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index c9e05e3e882..786ff622d93 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -403,7 +403,11 @@ DEFAULT_CONFIG = {
         "container_persistent": True,   # Persist filesystem across sessions
         # Docker volume mounts — share host directories with the container.
         # Each entry is "host_path:container_path" (standard Docker -v syntax).
-        # Example: ["/home/user/projects:/workspace/projects", "/data:/data"]
+        # Example:
+        # ["/home/user/projects:/workspace/projects",
+        #  "/home/user/.hermes/cache/documents:/output"]
+        # For gateway MEDIA delivery, write inside Docker to /output/... and emit
+        # the host-visible path in MEDIA:, not the container path.
         "docker_volumes": [],
         # Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
         # Default off because passing host directories into a sandbox weakens isolation.
@@ -737,9 +741,14 @@ DEFAULT_CONFIG = {
     #   manual — always prompt the user (default)
     #   smart  — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk
     #   off    — skip all approval prompts (equivalent to --yolo)
+    #
+    # cron_mode — what to do when a cron job hits a dangerous command:
+    #   deny    — block the command and let the agent find another way (default, safe)
+    #   approve — auto-approve all dangerous commands in cron jobs
     "approvals": {
         "mode": "manual",
         "timeout": 60,
+        "cron_mode": "deny",
     },
 
     # Permanently allowed dangerous command patterns (added via "always" approval)
@@ -771,6 +780,20 @@ DEFAULT_CONFIG = {
         "wrap_response": True,
     },
 
+    # execute_code settings — controls the tool used for programmatic tool calls.
+    "code_execution": {
+        # Execution mode:
+        #   project (default) — scripts run in the session's working directory
+        #     with the active virtualenv/conda env's python, so project deps
+        #     (pandas, torch, project packages) and relative paths resolve.
+        #   strict            — scripts run in an isolated temp directory with
+        #     hermes-agent's own python (sys.executable). Maximum isolation
+        #     and reproducibility; project deps and relative paths won't work.
+        # Env scrubbing (strips *_API_KEY, *_TOKEN, *_SECRET, ...) and the
+        # tool whitelist apply identically in both modes.
+        "mode": "project",
+    },
+
     # Logging — controls file logging to ~/.hermes/logs/.
     # agent.log captures INFO+ (all agent activity); errors.log captures WARNING+.
     "logging": {
@@ -788,7 +811,7 @@ DEFAULT_CONFIG = {
     },
 
     # Config schema version - bump this when adding new required fields
-    "_config_version": 18,
+    "_config_version": 19,
 }
 
 # =============================================================================
@@ -2842,7 +2865,7 @@ _FALLBACK_COMMENT = """
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
 #   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
 #
-# For custom OpenAI-compatible endpoints, add base_url and api_key_env.
+# For custom OpenAI-compatible endpoints, add base_url and key_env.
 #
 # fallback_model:
 #   provider: openrouter
@@ -2886,7 +2909,7 @@ _COMMENTED_SECTIONS = """
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
 #   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
 #
-# For custom OpenAI-compatible endpoints, add base_url and api_key_env.
+# For custom OpenAI-compatible endpoints, add base_url and key_env.
 #
 # fallback_model:
 #   provider: openrouter
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 28c4af1fa8a..4138aeaa278 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -895,8 +895,8 @@ def run_doctor(args):
                 _model_count = len(_br_resp.get("modelSummaries", []))
                 print(f"\r  {color('✓', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)}           ")
             except ImportError:
-                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color('(boto3 not installed — pip install hermes-agent[bedrock])', Colors.DIM)}           ")
-                issues.append("Install boto3 for Bedrock: pip install hermes-agent[bedrock]")
+                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color(f'(boto3 not installed — {sys.executable} -m pip install boto3)', Colors.DIM)}           ")
+                issues.append(f"Install boto3 for Bedrock: {sys.executable} -m pip install boto3")
             except Exception as _e:
                 _err_name = type(_e).__name__
                 print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)}           ")
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index e2e2a774f5a..71fc6ae3810 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -897,6 +897,10 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
     _ensure_tui_node()
 
     def _node_bin(bin: str) -> str:
+        if bin == "node":
+            env_node = os.environ.get("HERMES_NODE")
+            if env_node and os.path.isfile(env_node) and os.access(env_node, os.X_OK):
+                return env_node
         path = shutil.which(bin)
         if not path:
             print(f"{bin} not found — install Node.js to use the TUI.")
@@ -3969,7 +3973,7 @@ def _model_flow_anthropic(config, current_model=""):
 
         elif choice == "2":
             print()
-            print("  Get an API key at: https://console.anthropic.com/settings/keys")
+            print("  Get an API key at: https://platform.claude.com/settings/keys")
             print()
             try:
                 import getpass
@@ -4985,8 +4989,187 @@ def _update_node_dependencies() -> None:
             print(f"    {stderr.splitlines()[-1]}")
 
 
+class _UpdateOutputStream:
+    """Stream wrapper used during ``hermes update`` to survive terminal loss.
+
+    Wraps the process's original stdout/stderr so that:
+
+    * Every write is also mirrored to an append-only log file
+      (``~/.hermes/logs/update.log``) that users can inspect after the
+      terminal disconnects.
+    * Writes to the original stream that fail with ``BrokenPipeError`` /
+      ``OSError`` / ``ValueError`` (closed file) no longer cascade into
+      process exit — the update keeps going, only the on-screen output
+      stops.
+
+    Combined with ``SIGHUP -> SIG_IGN`` installed by
+    ``_install_hangup_protection``, this makes ``hermes update`` safe to
+    run in a plain SSH session that might disconnect mid-install.
+    """
+
+    def __init__(self, original, log_file):
+        self._original = original
+        self._log = log_file
+        self._original_broken = False
+
+    def write(self, data):
+        # Mirror to the log file first — it's the most reliable destination.
+        if self._log is not None:
+            try:
+                self._log.write(data)
+            except Exception:
+                # Log errors should never abort the update.
+                pass
+
+        if self._original_broken:
+            return len(data) if isinstance(data, (str, bytes)) else 0
+
+        try:
+            return self._original.write(data)
+        except (BrokenPipeError, OSError, ValueError):
+            # Terminal vanished (SSH disconnect, shell close).  Stop trying
+            # to write to it, but keep the update running.
+            self._original_broken = True
+            return len(data) if isinstance(data, (str, bytes)) else 0
+
+    def flush(self):
+        if self._log is not None:
+            try:
+                self._log.flush()
+            except Exception:
+                pass
+        if self._original_broken:
+            return
+        try:
+            self._original.flush()
+        except (BrokenPipeError, OSError, ValueError):
+            self._original_broken = True
+
+    def isatty(self):
+        if self._original_broken:
+            return False
+        try:
+            return self._original.isatty()
+        except Exception:
+            return False
+
+    def fileno(self):
+        # Some tools probe fileno(); defer to the underlying stream and let
+        # callers handle failures (same behaviour as the unwrapped stream).
+        return self._original.fileno()
+
+    def __getattr__(self, name):
+        return getattr(self._original, name)
+
+
+def _install_hangup_protection(gateway_mode: bool = False):
+    """Protect ``cmd_update`` from SIGHUP and broken terminal pipes.
+
+    Users commonly run ``hermes update`` in an SSH session or a terminal
+    that may close mid-install.  Without protection, ``SIGHUP`` from the
+    terminal kills the Python process during ``pip install`` and leaves
+    the venv half-installed; the documented workaround ("use screen /
+    tmux") shouldn't be required for something as routine as an update.
+
+    Protections installed:
+
+    1. ``SIGHUP`` is set to ``SIG_IGN``.  POSIX preserves ``SIG_IGN``
+       across ``exec()``, so pip and git subprocesses also stop dying on
+       hangup.
+    2. ``sys.stdout`` / ``sys.stderr`` are wrapped to mirror output to
+       ``~/.hermes/logs/update.log`` and to silently absorb
+       ``BrokenPipeError`` when the terminal vanishes.
+
+    ``SIGINT`` (Ctrl-C) and ``SIGTERM`` (systemd shutdown) are
+    **intentionally left alone** — those are legitimate cancellation
+    signals the user or OS sent on purpose.
+
+    In gateway mode (``hermes update --gateway``) the update is already
+    spawned detached from a terminal, so this function is a no-op.
+
+    Returns a dict that ``cmd_update`` can pass to
+    ``_finalize_update_output`` on exit.  Returning a dict rather than a
+    tuple keeps the call site forward-compatible with future additions.
+    """
+    state = {
+        "prev_stdout": sys.stdout,
+        "prev_stderr": sys.stderr,
+        "log_file": None,
+        "installed": False,
+    }
+
+    if gateway_mode:
+        return state
+
+    import signal as _signal
+
+    # (1) Ignore SIGHUP for the remainder of this process.
+    if hasattr(_signal, "SIGHUP"):
+        try:
+            _signal.signal(_signal.SIGHUP, _signal.SIG_IGN)
+        except (ValueError, OSError):
+            # Called from a non-main thread — not fatal.  The update still
+            # runs, just without hangup protection.
+            pass
+
+    # (2) Mirror output to update.log and wrap stdio for broken-pipe
+    # tolerance.  Any failure here is non-fatal; we just skip the wrap.
+    try:
+        from hermes_cli.config import get_hermes_home
+
+        logs_dir = get_hermes_home() / "logs"
+        logs_dir.mkdir(parents=True, exist_ok=True)
+        log_path = logs_dir / "update.log"
+        log_file = open(log_path, "a", buffering=1, encoding="utf-8")
+
+        import datetime as _dt
+
+        log_file.write(
+            f"\n=== hermes update started "
+            f"{_dt.datetime.now().isoformat(timespec='seconds')} ===\n"
+        )
+
+        state["log_file"] = log_file
+        sys.stdout = _UpdateOutputStream(state["prev_stdout"], log_file)
+        sys.stderr = _UpdateOutputStream(state["prev_stderr"], log_file)
+        state["installed"] = True
+    except Exception:
+        # Leave stdio untouched on any setup failure.  Update continues
+        # without mirroring.
+        state["log_file"] = None
+
+    return state
+
+
+def _finalize_update_output(state):
+    """Restore stdio and close the update.log handle opened by ``_install_hangup_protection``."""
+    if not state:
+        return
+    if state.get("installed"):
+        try:
+            sys.stdout = state.get("prev_stdout", sys.stdout)
+        except Exception:
+            pass
+        try:
+            sys.stderr = state.get("prev_stderr", sys.stderr)
+        except Exception:
+            pass
+    log_file = state.get("log_file")
+    if log_file is not None:
+        try:
+            log_file.flush()
+            log_file.close()
+        except Exception:
+            pass
+
+
 def cmd_update(args):
-    """Update Hermes Agent to the latest version."""
+    """Update Hermes Agent to the latest version.
+
+    Thin wrapper around ``_cmd_update_impl``: installs hangup protection,
+    runs the update, then restores stdio on the way out (even on
+    ``sys.exit`` or unhandled exceptions).
+    """
     from hermes_cli.config import is_managed, managed_error
 
     if is_managed():
@@ -4994,6 +5177,20 @@ def cmd_update(args):
         return
 
     gateway_mode = getattr(args, "gateway", False)
+
+    # Protect against mid-update terminal disconnects (SIGHUP) and tolerate
+    # writes to a closed stdout.  No-op in gateway mode.  See
+    # _install_hangup_protection for rationale.
+    _update_io_state = _install_hangup_protection(gateway_mode=gateway_mode)
+    try:
+        _cmd_update_impl(args, gateway_mode=gateway_mode)
+    finally:
+        _finalize_update_output(_update_io_state)
+
+
+def _cmd_update_impl(args, gateway_mode: bool):
+    """Body of ``cmd_update`` — kept separate so the wrapper can always
+    restore stdio even on ``sys.exit``."""
     # In gateway mode, use file-based IPC for prompts instead of stdin
     gw_input_fn = (
         (lambda prompt, default="": _gateway_prompt(prompt, default))
@@ -6029,11 +6226,12 @@ def cmd_dashboard(args):
         import uvicorn  # noqa: F401
     except ImportError:
         print("Web UI dependencies not installed.")
-        print("Install them with:  pip install hermes-agent[web]")
+        print(f"Install them with:  {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'")
         sys.exit(1)
 
-    if not _build_web_ui(PROJECT_ROOT / "web", fatal=True):
-        sys.exit(1)
+    if "HERMES_WEB_DIST" not in os.environ:
+        if not _build_web_ui(PROJECT_ROOT / "web", fatal=True):
+            sys.exit(1)
 
     from hermes_cli.web_server import start_server
 
@@ -6804,6 +7002,13 @@ For more help on a command:
     wh_sub.add_argument(
         "--secret", default="", help="HMAC secret (auto-generated if omitted)"
     )
+    wh_sub.add_argument(
+        "--deliver-only",
+        action="store_true",
+        help="Skip the agent — deliver the rendered prompt directly as the "
+        "message. Zero LLM cost. Requires --deliver to be a real target "
+        "(not 'log').",
+    )
 
     webhook_subparsers.add_parser(
         "list", aliases=["ls"], help="List all dynamic subscriptions"
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index cbbeef62d44..a0d7c2220c1 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -133,8 +133,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "gemini-2.5-pro",
         "gemini-2.5-flash",
         "gemini-2.5-flash-lite",
-        # Gemma open models (also served via AI Studio)
-        "gemma-4-31b-it",
     ],
     "google-gemini-cli": [
         "gemini-2.5-pro",
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 8770386b73e..f969bd4bd16 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -91,7 +91,6 @@ _DEFAULT_PROVIDER_MODELS = {
     "gemini": [
         "gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
         "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
-        "gemma-4-31b-it",
     ],
     "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
     "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
@@ -1461,7 +1460,9 @@ def setup_agent_settings(config: dict):
     )
     print_info("Maximum tool-calling iterations per conversation.")
     print_info("Higher = more complex tasks, but costs more tokens.")
-    print_info("Default is 90, which works for most tasks. Use 150+ for open exploration.")
+    print_info(
+        f"Press Enter to keep {current_max}. Use 90 for most tasks or 150+ for open exploration."
+    )
 
     max_iter_str = prompt("Max iterations", current_max)
     try:
diff --git a/hermes_cli/uninstall.py b/hermes_cli/uninstall.py
index 8d8e3393b36..67cea418209 100644
--- a/hermes_cli/uninstall.py
+++ b/hermes_cli/uninstall.py
@@ -118,59 +118,166 @@ def remove_wrapper_script():
 
 
 def uninstall_gateway_service():
-    """Stop and uninstall the gateway service if running."""
+    """Stop and uninstall the gateway service (systemd, launchd) and kill any
+    standalone gateway processes.
+
+    Delegates to the gateway module which handles:
+    - Linux: user + system systemd services (with proper DBUS env setup)
+    - macOS: launchd plists
+    - All platforms: standalone ``hermes gateway run`` processes
+    - Termux/Android: skips systemd (no systemd on Android), still kills standalone processes
+    """
     import platform
-    
-    if platform.system() != "Linux":
-        return False
+    stopped_something = False
 
-    prefix = os.getenv("PREFIX", "")
-    if os.getenv("TERMUX_VERSION") or "com.termux/files/usr" in prefix:
-        return False
-    
+    # 1. Kill any standalone gateway processes (all platforms, including Termux)
     try:
-        from hermes_cli.gateway import get_service_name
-        svc_name = get_service_name()
-    except Exception:
-        svc_name = "hermes-gateway"
-
-    service_file = Path.home() / ".config" / "systemd" / "user" / f"{svc_name}.service"
-    
-    if not service_file.exists():
-        return False
-    
-    try:
-        # Stop the service
-        subprocess.run(
-            ["systemctl", "--user", "stop", svc_name],
-            capture_output=True,
-            check=False
-        )
-        
-        # Disable the service
-        subprocess.run(
-            ["systemctl", "--user", "disable", svc_name],
-            capture_output=True,
-            check=False
-        )
-        
-        # Remove service file
-        service_file.unlink()
-        
-        # Reload systemd
-        subprocess.run(
-            ["systemctl", "--user", "daemon-reload"],
-            capture_output=True,
-            check=False
-        )
-        
-        return True
-        
+        from hermes_cli.gateway import kill_gateway_processes, find_gateway_pids
+        pids = find_gateway_pids()
+        if pids:
+            killed = kill_gateway_processes()
+            if killed:
+                log_success(f"Killed {killed} running gateway process(es)")
+                stopped_something = True
     except Exception as e:
-        log_warn(f"Could not fully remove gateway service: {e}")
+        log_warn(f"Could not check for gateway processes: {e}")
+
+    system = platform.system()
+
+    # Termux/Android has no systemd and no launchd — nothing left to do.
+    prefix = os.getenv("PREFIX", "")
+    is_termux = bool(os.getenv("TERMUX_VERSION") or "com.termux/files/usr" in prefix)
+    if is_termux:
+        return stopped_something
+
+    # 2. Linux: uninstall systemd services (both user and system scopes)
+    if system == "Linux":
+        try:
+            from hermes_cli.gateway import (
+                get_systemd_unit_path,
+                get_service_name,
+                _systemctl_cmd,
+            )
+            svc_name = get_service_name()
+
+            for is_system in (False, True):
+                unit_path = get_systemd_unit_path(system=is_system)
+                if not unit_path.exists():
+                    continue
+
+                scope = "system" if is_system else "user"
+                try:
+                    if is_system and os.geteuid() != 0:
+                        log_warn(f"System gateway service exists at {unit_path} "
+                                 f"but needs sudo to remove")
+                        continue
+
+                    cmd = _systemctl_cmd(is_system)
+                    subprocess.run(cmd + ["stop", svc_name],
+                                   capture_output=True, check=False)
+                    subprocess.run(cmd + ["disable", svc_name],
+                                   capture_output=True, check=False)
+                    unit_path.unlink()
+                    subprocess.run(cmd + ["daemon-reload"],
+                                   capture_output=True, check=False)
+                    log_success(f"Removed {scope} gateway service ({unit_path})")
+                    stopped_something = True
+                except Exception as e:
+                    log_warn(f"Could not remove {scope} gateway service: {e}")
+        except Exception as e:
+            log_warn(f"Could not check systemd gateway services: {e}")
+
+    # 3. macOS: uninstall launchd plist
+    elif system == "Darwin":
+        try:
+            from hermes_cli.gateway import get_launchd_plist_path
+            plist_path = get_launchd_plist_path()
+            if plist_path.exists():
+                subprocess.run(["launchctl", "unload", str(plist_path)],
+                               capture_output=True, check=False)
+                plist_path.unlink()
+                log_success(f"Removed macOS gateway service ({plist_path})")
+                stopped_something = True
+        except Exception as e:
+            log_warn(f"Could not remove launchd gateway service: {e}")
+
+    return stopped_something
+
+
+def _is_default_hermes_home(hermes_home: Path) -> bool:
+    """Return True when ``hermes_home`` points at the default (non-profile) root."""
+    try:
+        from hermes_constants import get_default_hermes_root
+        return hermes_home.resolve() == get_default_hermes_root().resolve()
+    except Exception:
         return False
 
 
+def _discover_named_profiles():
+    """Return a list of ``ProfileInfo`` for every non-default profile, or ``[]``
+    if profile support is unavailable or nothing is installed beyond the
+    default root."""
+    try:
+        from hermes_cli.profiles import list_profiles
+    except Exception:
+        return []
+    try:
+        return [p for p in list_profiles() if not getattr(p, "is_default", False)]
+    except Exception as e:
+        log_warn(f"Could not enumerate profiles: {e}")
+        return []
+
+
+def _uninstall_profile(profile) -> None:
+    """Fully uninstall a single named profile: stop its gateway service,
+    remove its alias wrapper, and wipe its HERMES_HOME directory.
+
+    We shell out to ``hermes -p <name> gateway stop|uninstall`` because
+    service names, unit paths, and plist paths are all derived from the
+    current HERMES_HOME and can't be easily switched in-process.
+    """
+    import sys as _sys
+    name = profile.name
+    profile_home = profile.path
+
+    log_info(f"Uninstalling profile '{name}'...")
+
+    # 1. Stop and remove this profile's gateway service.
+    #    Use `python -m hermes_cli.main` so we don't depend on a `hermes`
+    #    wrapper that may be half-removed mid-uninstall.
+    hermes_invocation = [_sys.executable, "-m", "hermes_cli.main", "--profile", name]
+    for subcmd in ("stop", "uninstall"):
+        try:
+            subprocess.run(
+                hermes_invocation + ["gateway", subcmd],
+                capture_output=True,
+                text=True,
+                timeout=60,
+                check=False,
+            )
+        except subprocess.TimeoutExpired:
+            log_warn(f"  Gateway {subcmd} timed out for '{name}'")
+        except Exception as e:
+            log_warn(f"  Could not run gateway {subcmd} for '{name}': {e}")
+
+    # 2. Remove the wrapper alias script at ~/.local/bin/<name> (if any).
+    alias_path = getattr(profile, "alias_path", None)
+    if alias_path and alias_path.exists():
+        try:
+            alias_path.unlink()
+            log_success(f"  Removed alias {alias_path}")
+        except Exception as e:
+            log_warn(f"  Could not remove alias {alias_path}: {e}")
+
+    # 3. Wipe the profile's HERMES_HOME directory.
+    try:
+        if profile_home.exists():
+            shutil.rmtree(profile_home)
+            log_success(f"  Removed {profile_home}")
+    except Exception as e:
+        log_warn(f"  Could not remove {profile_home}: {e}")
+
+
 def run_uninstall(args):
     """
     Run the uninstall process.
@@ -181,7 +288,13 @@ def run_uninstall(args):
     """
     project_root = get_project_root()
     hermes_home = get_hermes_home()
-    
+
+    # Detect named profiles when uninstalling from the default root —
+    # offer to clean them up too instead of leaving zombie HERMES_HOMEs
+    # and systemd units behind.
+    is_default_profile = _is_default_hermes_home(hermes_home)
+    named_profiles = _discover_named_profiles() if is_default_profile else []
+
     print()
     print(color("┌─────────────────────────────────────────────────────────┐", Colors.MAGENTA, Colors.BOLD))
     print(color("│            ⚕ Hermes Agent Uninstaller                  │", Colors.MAGENTA, Colors.BOLD))
@@ -195,6 +308,13 @@ def run_uninstall(args):
     print(f"  Secrets: {hermes_home / '.env'}")
     print(f"  Data:    {hermes_home / 'cron/'}, {hermes_home / 'sessions/'}, {hermes_home / 'logs/'}")
     print()
+
+    if named_profiles:
+        print(color("Other profiles detected:", Colors.CYAN, Colors.BOLD))
+        for p in named_profiles:
+            running = " (gateway running)" if getattr(p, "gateway_running", False) else ""
+            print(f"  • {p.name}{running}: {p.path}")
+        print()
     
     # Ask for confirmation
     print(color("Uninstall Options:", Colors.YELLOW, Colors.BOLD))
@@ -221,12 +341,40 @@ def run_uninstall(args):
         return
     
     full_uninstall = (choice == "2")
-    
+
+    # When doing a full uninstall from the default profile, also offer to
+    # remove any named profiles — stopping their gateway services, unlinking
+    # their alias wrappers, and wiping their HERMES_HOME dirs. Otherwise
+    # those leave zombie services and data behind.
+    remove_profiles = False
+    if full_uninstall and named_profiles:
+        print()
+        print(color("Other profiles will NOT be removed by default.", Colors.YELLOW))
+        print(f"Found {len(named_profiles)} named profile(s): " +
+              ", ".join(p.name for p in named_profiles))
+        print()
+        try:
+            resp = input(color(
+                f"Also stop and remove these {len(named_profiles)} profile(s)? [y/N]: ",
+                Colors.BOLD
+            )).strip().lower()
+        except (KeyboardInterrupt, EOFError):
+            print()
+            print("Cancelled.")
+            return
+        remove_profiles = resp in ("y", "yes")
+
     # Final confirmation
     print()
     if full_uninstall:
         print(color("⚠️  WARNING: This will permanently delete ALL Hermes data!", Colors.RED, Colors.BOLD))
         print(color("   Including: configs, API keys, sessions, scheduled jobs, logs", Colors.RED))
+        if remove_profiles:
+            print(color(
+                f"   Plus {len(named_profiles)} profile(s): " +
+                ", ".join(p.name for p in named_profiles),
+                Colors.RED
+            ))
     else:
         print("This will remove the Hermes code but keep your configuration and data.")
     
@@ -247,12 +395,10 @@ def run_uninstall(args):
     print(color("Uninstalling...", Colors.CYAN, Colors.BOLD))
     print()
     
-    # 1. Stop and uninstall gateway service
-    log_info("Checking for gateway service...")
-    if uninstall_gateway_service():
-        log_success("Gateway service stopped and removed")
-    else:
-        log_info("No gateway service found")
+    # 1. Stop and uninstall gateway service + kill standalone processes
+    log_info("Checking for running gateway...")
+    if not uninstall_gateway_service():
+        log_info("No gateway service or processes found")
     
     # 2. Remove PATH entries from shell configs
     log_info("Removing PATH entries from shell configs...")
@@ -291,8 +437,17 @@ def run_uninstall(args):
         log_warn(f"Could not fully remove {project_root}: {e}")
         log_info("You may need to manually remove it")
     
-    # 5. Optionally remove ~/.hermes/ data directory
+    # 5. Optionally remove ~/.hermes/ data directory (and named profiles)
     if full_uninstall:
+        # 5a. Stop and remove each named profile's gateway service and
+        #     alias wrapper. The profile HERMES_HOME dirs live under
+        #     ``<default>/profiles/<name>/`` and will be swept away by the
+        #     rmtree below, but services + alias scripts live OUTSIDE the
+        #     default root and have to be cleaned up explicitly.
+        if remove_profiles and named_profiles:
+            for prof in named_profiles:
+                _uninstall_profile(prof)
+
         log_info("Removing configuration and data...")
         try:
             if hermes_home.exists():
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index e5f2eb53767..110b81e4b5e 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -56,10 +56,10 @@ try:
 except ImportError:
     raise SystemExit(
         "Web UI requires fastapi and uvicorn.\n"
-        "Run 'hermes web' to auto-install, or: pip install hermes-agent[web]"
+        f"Install with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'"
     )
 
-WEB_DIST = Path(__file__).parent / "web_dist"
+WEB_DIST = Path(os.environ["HERMES_WEB_DIST"]) if "HERMES_WEB_DIST" in os.environ else Path(__file__).parent / "web_dist"
 _log = logging.getLogger(__name__)
 
 app = FastAPI(title="Hermes Agent", version=__version__)
diff --git a/hermes_cli/webhook.py b/hermes_cli/webhook.py
index 8ff135e29e5..378f11b4a7e 100644
--- a/hermes_cli/webhook.py
+++ b/hermes_cli/webhook.py
@@ -155,6 +155,15 @@ def _cmd_subscribe(args):
         "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
     }
 
+    if getattr(args, "deliver_only", False):
+        if route["deliver"] == "log":
+            print(
+                "Error: --deliver-only requires --deliver to be a real target "
+                "(telegram, discord, slack, github_comment, etc.) — not 'log'."
+            )
+            return
+        route["deliver_only"] = True
+
     if args.deliver_chat_id:
         route["deliver_extra"] = {"chat_id": args.deliver_chat_id}
 
@@ -172,9 +181,12 @@ def _cmd_subscribe(args):
     else:
         print("  Events: (all)")
     print(f"  Deliver: {route['deliver']}")
+    if route.get("deliver_only"):
+        print("  Mode: direct delivery (no agent, zero LLM cost)")
     if route.get("prompt"):
         prompt_preview = route["prompt"][:80] + ("..." if len(route["prompt"]) > 80 else "")
-        print(f"  Prompt: {prompt_preview}")
+        label = "Message" if route.get("deliver_only") else "Prompt"
+        print(f"  {label}: {prompt_preview}")
     print(f"\n  Configure your service to POST to the URL above.")
     print(f"  Use the secret for HMAC-SHA256 signature validation.")
     print(f"  The gateway must be running to receive events (hermes gateway run).\n")
@@ -192,6 +204,8 @@ def _cmd_list(args):
     for name, route in subs.items():
         events = ", ".join(route.get("events", [])) or "(all)"
         deliver = route.get("deliver", "log")
+        if route.get("deliver_only"):
+            deliver = f"{deliver} (direct — no agent)"
         desc = route.get("description", "")
         print(f"  ◆ {name}")
         if desc:
diff --git a/hermes_state.py b/hermes_state.py
index 5e563666e83..af97f7fbd89 100644
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -987,6 +987,22 @@ class SessionDB:
 
         return sanitized.strip()
 
+
+    @staticmethod
+    def _contains_cjk(text: str) -> bool:
+        """Check if text contains CJK (Chinese, Japanese, Korean) characters."""
+        for ch in text:
+            cp = ord(ch)
+            if (0x4E00 <= cp <= 0x9FFF or    # CJK Unified Ideographs
+                0x3400 <= cp <= 0x4DBF or    # CJK Extension A
+                0x20000 <= cp <= 0x2A6DF or  # CJK Extension B
+                0x3000 <= cp <= 0x303F or    # CJK Symbols
+                0x3040 <= cp <= 0x309F or    # Hiragana
+                0x30A0 <= cp <= 0x30FF or    # Katakana
+                0xAC00 <= cp <= 0xD7AF):     # Hangul Syllables
+                return True
+        return False
+
     def search_messages(
         self,
         query: str,
@@ -1062,8 +1078,47 @@ class SessionDB:
                 cursor = self._conn.execute(sql, params)
             except sqlite3.OperationalError:
                 # FTS5 query syntax error despite sanitization — return empty
-                return []
-            matches = [dict(row) for row in cursor.fetchall()]
+                # unless query contains CJK (fall back to LIKE below)
+                if not self._contains_cjk(query):
+                    return []
+                matches = []
+            else:
+                matches = [dict(row) for row in cursor.fetchall()]
+
+        # LIKE fallback for CJK queries: FTS5 default tokenizer splits CJK
+        # characters individually, causing multi-character queries to fail.
+        if not matches and self._contains_cjk(query):
+            raw_query = query.strip('"').strip()
+            like_where = ["m.content LIKE ?"]
+            like_params: list = [f"%{raw_query}%"]
+            if source_filter is not None:
+                like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
+                like_params.extend(source_filter)
+            if exclude_sources is not None:
+                like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
+                like_params.extend(exclude_sources)
+            if role_filter:
+                like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
+                like_params.extend(role_filter)
+            like_sql = f"""
+                SELECT m.id, m.session_id, m.role,
+                       substr(m.content,
+                              max(1, instr(m.content, ?) - 40),
+                              120) AS snippet,
+                       m.content, m.timestamp, m.tool_name,
+                       s.source, s.model, s.started_at AS session_started
+                FROM messages m
+                JOIN sessions s ON s.id = m.session_id
+                WHERE {' AND '.join(like_where)}
+                ORDER BY m.timestamp DESC
+                LIMIT ? OFFSET ?
+            """
+            like_params.extend([limit, offset])
+            # instr() parameter goes first in the bound list
+            like_params = [raw_query] + like_params
+            with self._lock:
+                like_cursor = self._conn.execute(like_sql, like_params)
+                matches = [dict(row) for row in like_cursor.fetchall()]
 
         # Add surrounding context (1 message before + after each match).
         # Done outside the lock so we don't hold it across N sequential queries.
diff --git a/mcp_serve.py b/mcp_serve.py
index e8294d1f91f..e0aeb706191 100644
--- a/mcp_serve.py
+++ b/mcp_serve.py
@@ -433,7 +433,7 @@ def create_mcp_server(event_bridge: Optional[EventBridge] = None) -> "FastMCP":
     if not _MCP_SERVER_AVAILABLE:
         raise ImportError(
             "MCP server requires the 'mcp' package. "
-            "Install with: pip install 'hermes-agent[mcp]'"
+            f"Install with: {sys.executable} -m pip install 'mcp'"
         )
 
     mcp = FastMCP(
@@ -838,7 +838,7 @@ def run_mcp_server(verbose: bool = False) -> None:
     if not _MCP_SERVER_AVAILABLE:
         print(
             "Error: MCP server requires the 'mcp' package.\n"
-            "Install with: pip install 'hermes-agent[mcp]'",
+            f"Install with: {sys.executable} -m pip install 'mcp'",
             file=sys.stderr,
         )
         sys.exit(1)
diff --git a/model_tools.py b/model_tools.py
index 801255b7978..5ec806e78bf 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -274,9 +274,9 @@ def get_tool_definitions(
     # execute_code" even when the API key isn't configured or the toolset is
     # disabled (#560-discord).
     if "execute_code" in available_tool_names:
-        from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema
+        from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema, _get_execution_mode
         sandbox_enabled = SANDBOX_ALLOWED_TOOLS & available_tool_names
-        dynamic_schema = build_execute_code_schema(sandbox_enabled)
+        dynamic_schema = build_execute_code_schema(sandbox_enabled, mode=_get_execution_mode())
         for i, td in enumerate(filtered_tools):
             if td.get("function", {}).get("name") == "execute_code":
                 filtered_tools[i] = {"type": "function", "function": dynamic_schema}
diff --git a/nix/checks.nix b/nix/checks.nix
index 55068a94f16..984016a4f47 100644
--- a/nix/checks.nix
+++ b/nix/checks.nix
@@ -37,7 +37,30 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
     in {
       packages.configKeys = configKeys;
 
-      checks = lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux {
+      checks = {
+        # Cross-platform evaluation — catches "not supported for interpreter"
+        # errors (e.g. sphinx dropping python311) without needing a darwin builder.
+        # Evaluation is pure and instant; it doesn't build anything.
+        cross-eval = let
+          targetSystems = builtins.filter
+            (s: inputs.self.packages ? ${s})
+            [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" "x86_64-darwin" ];
+          tryEvalPkg = sys:
+            let pkg = inputs.self.packages.${sys}.default;
+            in builtins.tryEval (builtins.seq pkg.drvPath true);
+          results = map (sys: { inherit sys; result = tryEvalPkg sys; }) targetSystems;
+          failures = builtins.filter (r: !r.result.success) results;
+          failMsg = lib.concatMapStringsSep "\n" (r: "  - ${r.sys}") failures;
+        in pkgs.runCommand "hermes-cross-eval" { } (
+          if failures != [] then
+            builtins.throw "Package fails to evaluate on:\n${failMsg}"
+          else ''
+            echo "PASS: package evaluates on all ${toString (builtins.length targetSystems)} platforms"
+            mkdir -p $out
+            echo "ok" > $out/result
+          ''
+        );
+      } // lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux {
         # Verify binaries exist and are executable
         package-contents = pkgs.runCommand "hermes-package-contents" { } ''
           set -e
@@ -125,6 +148,29 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
           echo "ok" > $out/result
         '';
 
+        # Verify HERMES_NODE is set in wrapper and points to Node 20+
+        # (string-width uses the /v regex flag which requires Node 20+)
+        hermes-node = pkgs.runCommand "hermes-node-version" { } ''
+          set -e
+          echo "=== Checking HERMES_NODE in wrapper ==="
+          grep -q "HERMES_NODE" ${hermes-agent}/bin/hermes || \
+            (echo "FAIL: HERMES_NODE not set in wrapper"; exit 1)
+          echo "PASS: HERMES_NODE present in wrapper"
+
+          HERMES_NODE=$(sed -n "s/^export HERMES_NODE='\(.*\)'/\1/p" ${hermes-agent}/bin/hermes)
+          test -x "$HERMES_NODE" || (echo "FAIL: HERMES_NODE=$HERMES_NODE not executable"; exit 1)
+          echo "PASS: HERMES_NODE executable at $HERMES_NODE"
+
+          NODE_MAJOR=$("$HERMES_NODE" --version | sed 's/^v//' | cut -d. -f1)
+          test "$NODE_MAJOR" -ge 20 || \
+            (echo "FAIL: Node v$NODE_MAJOR < 20, TUI needs /v regex flag support"; exit 1)
+          echo "PASS: Node v$NODE_MAJOR >= 20"
+
+          echo "=== All HERMES_NODE checks passed ==="
+          mkdir -p $out
+          echo "ok" > $out/result
+        '';
+
         # Verify HERMES_MANAGED guard works on all mutation commands
         managed-guard = pkgs.runCommand "hermes-managed-guard" { } ''
           set -e
diff --git a/nix/devShell.nix b/nix/devShell.nix
index db39c9d9557..63edc59cf1e 100644
--- a/nix/devShell.nix
+++ b/nix/devShell.nix
@@ -12,7 +12,7 @@
       devShells.default = pkgs.mkShell {
         inputsFrom = packages;
         packages = with pkgs; [
-          python311 uv nodejs_22 ripgrep git openssh ffmpeg
+          python312 uv nodejs_22 ripgrep git openssh ffmpeg
         ];
 
         shellHook = let
diff --git a/nix/nixosModules.nix b/nix/nixosModules.nix
index 75b3dca31b2..3f2709f8145 100644
--- a/nix/nixosModules.nix
+++ b/nix/nixosModules.nix
@@ -121,11 +121,19 @@
       # ── Provision apt packages (first boot only, cached in writable layer) ──
       # sudo: agent self-modification
       # nodejs/npm: writable node so npm i -g works (nix store copies are read-only)
-      # curl: needed for uv installer
+      #   Node 22 via NodeSource — Ubuntu 24.04 ships Node 18 which is EOL.
+      # curl: needed for uv installer + NodeSource setup
       if [ ! -f /var/lib/hermes-tools-provisioned ] && command -v apt-get >/dev/null 2>&1; then
         echo "First boot: provisioning agent tools..."
         apt-get update -qq
-        apt-get install -y -qq sudo nodejs npm curl
+        apt-get install -y -qq sudo curl ca-certificates gnupg
+        mkdir -p /etc/apt/keyrings
+        curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
+          | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
+        echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" \
+          > /etc/apt/sources.list.d/nodesource.list
+        apt-get update -qq
+        apt-get install -y -qq nodejs
         touch /var/lib/hermes-tools-provisioned
       fi
 
@@ -140,15 +148,14 @@
         su -s /bin/sh "$TARGET_USER" -c 'curl -LsSf https://astral.sh/uv/install.sh | sh' || true
       fi
 
-      # Python 3.11 venv — gives the agent a writable Python with pip.
-      # Uses uv to install Python 3.11 (Ubuntu 24.04 ships 3.12).
+      # Python 3.12 venv — gives the agent a writable Python with pip.
       # --seed includes pip/setuptools so bare `pip install` works.
       _UV_BIN="$TARGET_HOME/.local/bin/uv"
       if [ ! -d "$TARGET_HOME/.venv" ] && [ -x "$_UV_BIN" ]; then
         su -s /bin/sh "$TARGET_USER" -c "
           export PATH=\"\$HOME/.local/bin:\$PATH\"
-          uv python install 3.11
-          uv venv --python 3.11 --seed \"\$HOME/.venv\"
+          uv python install 3.12
+          uv venv --python 3.12 --seed \"\$HOME/.venv\"
         " || true
       fi
 
@@ -171,7 +178,7 @@
     # Package and entrypoint use stable symlinks (current-package, current-entrypoint)
     # so they can update without recreation. Env vars go through $HERMES_HOME/.env.
     containerIdentity = builtins.hashString "sha256" (builtins.toJSON {
-      schema = 3; # bump when identity inputs change
+      schema = 4; # bump when identity inputs change (4: Node 18→22 via NodeSource)
       image = cfg.container.image;
       extraVolumes = cfg.container.extraVolumes;
       extraOptions = cfg.container.extraOptions;
diff --git a/nix/packages.nix b/nix/packages.nix
index f39d9d0b2be..912be7843bd 100644
--- a/nix/packages.nix
+++ b/nix/packages.nix
@@ -18,6 +18,10 @@
         filter = path: _type: !(pkgs.lib.hasInfix "/index-cache/" path);
       };
 
+      hermesWeb = pkgs.callPackage ./web.nix {
+        npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
+      };
+
       runtimeDeps = with pkgs; [
         nodejs_22
         ripgrep
@@ -52,6 +56,7 @@
 
             mkdir -p $out/share/hermes-agent $out/bin
             cp -r ${bundledSkills} $out/share/hermes-agent/skills
+            cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
 
             # copy pre-built TUI (same layout as dev: ui-tui/dist/ + node_modules/)
             mkdir -p $out/ui-tui
@@ -62,8 +67,10 @@
                 makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
                   --suffix PATH : "${runtimePath}" \
                   --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
+                  --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
                   --set HERMES_TUI_DIR $out/ui-tui \
-                  --set HERMES_PYTHON ${hermesVenv}/bin/python3
+                  --set HERMES_PYTHON ${hermesVenv}/bin/python3 \
+                  --set HERMES_NODE ${pkgs.nodejs_22}/bin/node
               '')
               [
                 "hermes"
@@ -80,7 +87,7 @@
             STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
             if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
               echo "hermes-agent: installing Python dependencies..."
-              uv venv .venv --python ${pkgs.python311}/bin/python3 2>/dev/null || true
+              uv venv .venv --python ${pkgs.python312}/bin/python3 2>/dev/null || true
               source .venv/bin/activate
               uv pip install -e ".[all]"
               [ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
@@ -103,6 +110,7 @@
         };
 
         tui = hermesTui;
+        web = hermesWeb;
       };
     };
 }
diff --git a/nix/python.nix b/nix/python.nix
index 160b4ee790b..0bcd017e76d 100644
--- a/nix/python.nix
+++ b/nix/python.nix
@@ -1,6 +1,6 @@
 # nix/python.nix — uv2nix virtual environment builder
 {
-  python311,
+  python312,
   lib,
   callPackage,
   uv2nix,
@@ -35,30 +35,46 @@ let
       };
     };
 
+  # Legacy alibabacloud packages ship only sdists with setup.py/setup.cfg
+  # and no pyproject.toml, so setuptools isn't declared as a build dep.
+  buildSystemOverrides = final: prev: builtins.mapAttrs
+    (name: _: prev.${name}.overrideAttrs (old: {
+      nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ final.setuptools ];
+    }))
+    (lib.genAttrs [
+      "alibabacloud-credentials-api"
+      "alibabacloud-endpoint-util"
+      "alibabacloud-gateway-dingtalk"
+      "alibabacloud-gateway-spi"
+      "alibabacloud-tea"
+    ] (_: null));
+
   pythonPackageOverrides = final: _prev:
     if isAarch64Darwin then {
-      numpy = mkPrebuiltOverride final python311.pkgs.numpy { };
+      numpy = mkPrebuiltOverride final python312.pkgs.numpy { };
 
-      av = mkPrebuiltOverride final python311.pkgs.av { };
+      pyarrow = mkPrebuiltOverride final python312.pkgs.pyarrow { };
 
-      humanfriendly = mkPrebuiltOverride final python311.pkgs.humanfriendly { };
+      av = mkPrebuiltOverride final python312.pkgs.av { };
 
-      coloredlogs = mkPrebuiltOverride final python311.pkgs.coloredlogs {
+      humanfriendly = mkPrebuiltOverride final python312.pkgs.humanfriendly { };
+
+      coloredlogs = mkPrebuiltOverride final python312.pkgs.coloredlogs {
         humanfriendly = [ ];
       };
 
-      onnxruntime = mkPrebuiltOverride final python311.pkgs.onnxruntime {
+      onnxruntime = mkPrebuiltOverride final python312.pkgs.onnxruntime {
         coloredlogs = [ ];
         numpy = [ ];
         packaging = [ ];
       };
 
-      ctranslate2 = mkPrebuiltOverride final python311.pkgs.ctranslate2 {
+      ctranslate2 = mkPrebuiltOverride final python312.pkgs.ctranslate2 {
         numpy = [ ];
         pyyaml = [ ];
       };
 
-      faster-whisper = mkPrebuiltOverride final python311.pkgs.faster-whisper {
+      faster-whisper = mkPrebuiltOverride final python312.pkgs.faster-whisper {
         av = [ ];
         ctranslate2 = [ ];
         huggingface-hub = [ ];
@@ -70,11 +86,12 @@ let
 
   pythonSet =
     (callPackage pyproject-nix.build.packages {
-      python = python311;
+      python = python312;
     }).overrideScope
       (lib.composeManyExtensions [
         pyproject-build-systems.overlays.default
         overlay
+        buildSystemOverrides
         pythonPackageOverrides
       ]);
 in
diff --git a/nix/tui.nix b/nix/tui.nix
index 70eb67f949a..7303edecb9f 100644
--- a/nix/tui.nix
+++ b/nix/tui.nix
@@ -4,7 +4,7 @@ let
   src = ../ui-tui;
   npmDeps = pkgs.fetchNpmDeps {
     inherit src;
-    hash = "sha256-zsUPmbC6oMUO10EhS3ptvDjwlfpCSEmrkjyeORw7fac=";
+    hash = "sha256-mG3vpgGi4ljt4X3XIf3I/5mIcm+rVTUAmx2DQ6YVA90=";
   };
 
   packageJson = builtins.fromJSON (builtins.readFile (src + "/package.json"));
@@ -18,11 +18,6 @@ pkgs.buildNpmPackage {
 
   doCheck = false;
 
-  postPatch = ''
-    # fetchNpmDeps strips the trailing newline; match it so the diff passes
-    sed -i -z 's/\n$//' package-lock.json
-  '';
-
   installPhase = ''
     runHook preInstall
 
diff --git a/nix/web.nix b/nix/web.nix
new file mode 100644
index 00000000000..247889753f6
--- /dev/null
+++ b/nix/web.nix
@@ -0,0 +1,63 @@
+# nix/web.nix — Hermes Web Dashboard (Vite/React) frontend build
+{ pkgs, npm-lockfile-fix, ... }:
+let
+  src = ../web;
+  npmDeps = pkgs.fetchNpmDeps {
+    inherit src;
+    hash = "sha256-Y0pOzdFG8BLjfvCLmsvqYpjxFjAQabXp1i7X9W/cCU4=";
+  };
+
+  npmLockHash = builtins.hashString "sha256" (builtins.readFile ../web/package-lock.json);
+in
+pkgs.buildNpmPackage {
+  pname = "hermes-web";
+  version = "0.0.0";
+  inherit src npmDeps;
+
+  doCheck = false;
+
+  buildPhase = ''
+    npx tsc -b
+    npx vite build --outDir dist
+  '';
+
+  installPhase = ''
+    runHook preInstall
+    cp -r dist $out
+    runHook postInstall
+  '';
+
+  nativeBuildInputs = [
+    (pkgs.writeShellScriptBin "update_web_lockfile" ''
+      set -euox pipefail
+
+      REPO_ROOT=$(git rev-parse --show-toplevel)
+
+      cd "$REPO_ROOT/web"
+      rm -rf node_modules/
+      npm cache clean --force
+      CI=true npm install
+      ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
+
+      NIX_FILE="$REPO_ROOT/nix/web.nix"
+      sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
+      NIX_OUTPUT=$(nix build .#web 2>&1 || true)
+      NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}')
+      echo got new hash $NEW_HASH
+      sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
+      nix build .#web
+      echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
+    '')
+  ];
+
+  passthru.devShellHook = ''
+    STAMP=".nix-stamps/hermes-web"
+    STAMP_VALUE="${npmLockHash}"
+    if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
+      echo "hermes-web: installing npm dependencies..."
+      cd web && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd ..
+      mkdir -p .nix-stamps
+      echo "$STAMP_VALUE" > "$STAMP"
+    fi
+  '';
+}
diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
index c60d2c63561..1c099ca605f 100644
--- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md
+++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
@@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen.
 | Key | Default | Description |
 |-----|---------|-------------|
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `3` | Min turns between dialectic API calls |
+| `dialecticCadence` | `2` | Min turns between dialectic API calls. Recommended 1–5 |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection |
 
-Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn.
+Higher cadence values fire the dialectic LLM less often. `dialecticCadence: 2` means the engine fires every other turn. Setting it to `1` fires every turn.
 
 ### Depth (how many)
 
@@ -180,6 +180,8 @@ If `dialecticDepthLevels` is omitted, rounds use **proportional levels** derived
 
 This keeps earlier passes cheap while using full depth on the final synthesis.
 
+**Depth at session start.** The session-start prewarm runs the full configured `dialecticDepth` in the background before turn 1. A single-pass prewarm on a cold peer often returns thin output — multi-pass depth runs the audit/reconcile cycle before the user ever speaks. Turn 1 consumes the prewarm result directly; if prewarm hasn't landed in time, turn 1 falls back to a synchronous call with a bounded timeout.
+
 ### Level (how hard)
 
 Controls the **intensity** of each dialectic reasoning round.
@@ -368,7 +370,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso
 | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` |
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `3` | Min turns between dialectic LLM calls |
+| `dialecticCadence` | `2` | Min turns between dialectic LLM calls (recommended 1–5) |
 
 The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions.
 
diff --git a/optional-skills/creative/touchdesigner-mcp/SKILL.md b/optional-skills/creative/touchdesigner-mcp/SKILL.md
new file mode 100644
index 00000000000..d0bd348afc4
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/SKILL.md
@@ -0,0 +1,339 @@
+---
+name: touchdesigner-mcp
+description: "Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools."
+version: 1.0.0
+author: kshitijk4poor
+license: MIT
+metadata:
+  hermes:
+    tags: [TouchDesigner, MCP, twozero, creative-coding, real-time-visuals, generative-art, audio-reactive, VJ, installation, GLSL]
+    related_skills: [native-mcp, ascii-video, manim-video, hermes-video]
+
+---
+
+# TouchDesigner Integration (twozero MCP)
+
+## CRITICAL RULES
+
+1. **NEVER guess parameter names.** Call `td_get_par_info` for the op type FIRST. Your training data is wrong for TD 2025.32.
+2. **If `tdAttributeError` fires, STOP.** Call `td_get_operator_info` on the failing node before continuing.
+3. **NEVER hardcode absolute paths** in script callbacks. Use `me.parent()` / `scriptOp.parent()`.
+4. **Prefer native MCP tools over td_execute_python.** Use `td_create_operator`, `td_set_operator_pars`, `td_get_errors` etc. Only fall back to `td_execute_python` for complex multi-step logic.
+5. **Call `td_get_hints` before building.** It returns patterns specific to the op type you're working with.
+
+## Architecture
+
+```
+Hermes Agent -> MCP (Streamable HTTP) -> twozero.tox (port 40404) -> TD Python
+```
+
+36 native tools. Free plugin (no payment/license — confirmed April 2026).
+Context-aware (knows selected OP, current network).
+Hub health check: `GET http://localhost:40404/mcp` returns JSON with instance PID, project name, TD version.
+
+## Setup (Automated)
+
+Run the setup script to handle everything:
+
+```bash
+bash "${HERMES_HOME:-$HOME/.hermes}/skills/creative/touchdesigner-mcp/scripts/setup.sh"
+```
+
+The script will:
+1. Check if TD is running
+2. Download twozero.tox if not already cached
+3. Add `twozero_td` MCP server to Hermes config (if missing)
+4. Test the MCP connection on port 40404
+5. Report what manual steps remain (drag .tox into TD, enable MCP toggle)
+
+### Manual steps (one-time, cannot be automated)
+
+1. **Drag `~/Downloads/twozero.tox` into the TD network editor** → click Install
+2. **Enable MCP:** click twozero icon → Settings → mcp → "auto start MCP" → Yes
+3. **Restart Hermes session** to pick up the new MCP server
+
+After setup, verify:
+```bash
+nc -z 127.0.0.1 40404 && echo "twozero MCP: READY"
+```
+
+## Environment Notes
+
+- **Non-Commercial TD** caps resolution at 1280×1280. Use `outputresolution = 'custom'` and set width/height explicitly.
+- **Codecs:** `prores` (preferred on macOS) or `mjpa` as fallback. H.264/H.265/AV1 require a Commercial license.
+- Always call `td_get_par_info` before setting params — names vary by TD version (see CRITICAL RULES #1).
+
+## Workflow
+
+### Step 0: Discover (before building anything)
+
+```
+Call td_get_par_info with op_type for each type you plan to use.
+Call td_get_hints with the topic you're building (e.g. "glsl", "audio reactive", "feedback").
+Call td_get_focus to see where the user is and what's selected.
+Call td_get_network to see what already exists.
+```
+
+No temp nodes, no cleanup. This replaces the old discovery dance entirely.
+
+### Step 1: Clean + Build
+
+**IMPORTANT: Split cleanup and creation into SEPARATE MCP calls.** Destroying and recreating same-named nodes in one `td_execute_python` script causes "Invalid OP object" errors. See pitfalls #11b.
+
+Use `td_create_operator` for each node (handles viewport positioning automatically):
+
+```
+td_create_operator(type="noiseTOP", parent="/project1", name="bg", parameters={"resolutionw": 1280, "resolutionh": 720})
+td_create_operator(type="levelTOP", parent="/project1", name="brightness")
+td_create_operator(type="nullTOP", parent="/project1", name="out")
+```
+
+For bulk creation or wiring, use `td_execute_python`:
+
+```python
+# td_execute_python script:
+root = op('/project1')
+nodes = []
+for name, optype in [('bg', noiseTOP), ('fx', levelTOP), ('out', nullTOP)]:
+    n = root.create(optype, name)
+    nodes.append(n.path)
+# Wire chain
+for i in range(len(nodes)-1):
+    op(nodes[i]).outputConnectors[0].connect(op(nodes[i+1]).inputConnectors[0])
+result = {'created': nodes}
+```
+
+### Step 2: Set Parameters
+
+Prefer the native tool (validates params, won't crash):
+
+```
+td_set_operator_pars(path="/project1/bg", parameters={"roughness": 0.6, "monochrome": true})
+```
+
+For expressions or modes, use `td_execute_python`:
+
+```python
+op('/project1/time_driver').par.colorr.expr = "absTime.seconds % 1000.0"
+```
+
+### Step 3: Wire
+
+Use `td_execute_python` — no native wire tool exists:
+
+```python
+op('/project1/bg').outputConnectors[0].connect(op('/project1/fx').inputConnectors[0])
+```
+
+### Step 4: Verify
+
+```
+td_get_errors(path="/project1", recursive=true)
+td_get_perf()
+td_get_operator_info(path="/project1/out", detail="full")
+```
+
+### Step 5: Display / Capture
+
+```
+td_get_screenshot(path="/project1/out")
+```
+
+Or open a window via script:
+
+```python
+win = op('/project1').create(windowCOMP, 'display')
+win.par.winop = op('/project1/out').path
+win.par.winw = 1280; win.par.winh = 720
+win.par.winopen.pulse()
+```
+
+## MCP Tool Quick Reference
+
+**Core (use these most):**
+| Tool | What |
+|------|------|
+| `td_execute_python` | Run arbitrary Python in TD. Full API access. |
+| `td_create_operator` | Create node with params + auto-positioning |
+| `td_set_operator_pars` | Set params safely (validates, won't crash) |
+| `td_get_operator_info` | Inspect one node: connections, params, errors |
+| `td_get_operators_info` | Inspect multiple nodes in one call |
+| `td_get_network` | See network structure at a path |
+| `td_get_errors` | Find errors/warnings recursively |
+| `td_get_par_info` | Get param names for an OP type (replaces discovery) |
+| `td_get_hints` | Get patterns/tips before building |
+| `td_get_focus` | What network is open, what's selected |
+
+**Read/Write:**
+| Tool | What |
+|------|------|
+| `td_read_dat` | Read DAT text content |
+| `td_write_dat` | Write/patch DAT content |
+| `td_read_chop` | Read CHOP channel values |
+| `td_read_textport` | Read TD console output |
+
+**Visual:**
+| Tool | What |
+|------|------|
+| `td_get_screenshot` | Capture one OP viewer to file |
+| `td_get_screenshots` | Capture multiple OPs at once |
+| `td_get_screen_screenshot` | Capture actual screen via TD |
+| `td_navigate_to` | Jump network editor to an OP |
+
+**Search:**
+| Tool | What |
+|------|------|
+| `td_find_op` | Find ops by name/type across project |
+| `td_search` | Search code, expressions, string params |
+
+**System:**
+| Tool | What |
+|------|------|
+| `td_get_perf` | Performance profiling (FPS, slow ops) |
+| `td_list_instances` | List all running TD instances |
+| `td_get_docs` | In-depth docs on a TD topic |
+| `td_agents_md` | Read/write per-COMP markdown docs |
+| `td_reinit_extension` | Reload extension after code edit |
+| `td_clear_textport` | Clear console before debug session |
+
+**Input Automation:**
+| Tool | What |
+|------|------|
+| `td_input_execute` | Send mouse/keyboard to TD |
+| `td_input_status` | Poll input queue status |
+| `td_input_clear` | Stop input automation |
+| `td_op_screen_rect` | Get screen coords of a node |
+| `td_click_screen_point` | Click a point in a screenshot |
+
+See `references/mcp-tools.md` for full parameter schemas.
+
+## Key Implementation Rules
+
+**GLSL time:** No `uTDCurrentTime` in GLSL TOP. Use the Values page:
+```python
+# Call td_get_par_info(op_type="glslTOP") first to confirm param names
+td_set_operator_pars(path="/project1/shader", parameters={"value0name": "uTime"})
+# Then set expression via script:
+# op('/project1/shader').par.value0.expr = "absTime.seconds"
+# In GLSL: uniform float uTime;
+```
+
+Fallback: Constant TOP in `rgba32float` format (8-bit clamps to 0-1, freezing the shader).
+
+**Feedback TOP:** Use `top` parameter reference, not direct input wire. "Not enough sources" resolves after first cook. "Cook dependency loop" warning is expected.
+
+**Resolution:** Non-Commercial caps at 1280×1280. Use `outputresolution = 'custom'`.
+
+**Large shaders:** Write GLSL to `/tmp/file.glsl`, then use `td_write_dat` or `td_execute_python` to load.
+
+**Vertex/Point access (TD 2025.32):** `point.P[0]`, `point.P[1]`, `point.P[2]` — NOT `.x`, `.y`, `.z`.
+
+**Extensions:** `ext0object` format is `"op('./datName').module.ClassName(me)"` in CONSTANT mode. After editing extension code with `td_write_dat`, call `td_reinit_extension`.
+
+**Script callbacks:** ALWAYS use relative paths via `me.parent()` / `scriptOp.parent()`.
+
+**Cleaning nodes:** Always `list(root.children)` before iterating + `child.valid` check.
+
+## Recording / Exporting Video
+
+```python
+# via td_execute_python:
+root = op('/project1')
+rec = root.create(moviefileoutTOP, 'recorder')
+op('/project1/out').outputConnectors[0].connect(rec.inputConnectors[0])
+rec.par.type = 'movie'
+rec.par.file = '/tmp/output.mov'
+rec.par.videocodec = 'prores'  # Apple ProRes — NOT license-restricted on macOS
+rec.par.record = True   # start
+# rec.par.record = False  # stop (call separately later)
+```
+
+H.264/H.265/AV1 need Commercial license. Use `prores` on macOS or `mjpa` as fallback.
+Extract frames: `ffmpeg -i /tmp/output.mov -vframes 120 /tmp/frames/frame_%06d.png`
+
+**TOP.save() is useless for animation** — captures same GPU texture every time. Always use MovieFileOut.
+
+### Before Recording: Checklist
+
+1. **Verify FPS > 0** via `td_get_perf`. If FPS=0 the recording will be empty. See pitfalls #38-39.
+2. **Verify shader output is not black** via `td_get_screenshot`. Black output = shader error or missing input. See pitfalls #8, #40.
+3. **If recording with audio:** cue audio to start first, then delay recording by 3 frames. See pitfalls #19.
+4. **Set output path before starting record** — setting both in the same script can race.
+
+## Audio-Reactive GLSL (Proven Recipe)
+
+### Correct signal chain (tested April 2026)
+
+```
+AudioFileIn CHOP (playmode=sequential)
+  → AudioSpectrum CHOP (FFT=512, outputmenu=setmanually, outlength=256, timeslice=ON)
+  → Math CHOP (gain=10)
+  → CHOP to TOP (dataformat=r, layout=rowscropped)
+  → GLSL TOP input 1 (spectrum texture, 256x2)
+
+Constant TOP (rgba32float, time) → GLSL TOP input 0
+GLSL TOP → Null TOP → MovieFileOut
+```
+
+### Critical audio-reactive rules (empirically verified)
+
+1. **TimeSlice must stay ON** for AudioSpectrum. OFF = processes entire audio file → 24000+ samples → CHOP to TOP overflow.
+2. **Set Output Length manually** to 256 via `outputmenu='setmanually'` and `outlength=256`. Default outputs 22050 samples.
+3. **DO NOT use Lag CHOP for spectrum smoothing.** Lag CHOP operates in timeslice mode and expands 256 samples to 2400+, averaging all values to near-zero (~1e-06). The shader receives no usable data. This was the #1 audio sync failure in testing.
+4. **DO NOT use Filter CHOP either** — same timeslice expansion problem with spectrum data.
+5. **Smoothing belongs in the GLSL shader** if needed, via temporal lerp with a feedback texture: `mix(prevValue, newValue, 0.3)`. This gives frame-perfect sync with zero pipeline latency.
+6. **CHOP to TOP dataformat = 'r'**, layout = 'rowscropped'. Spectrum output is 256x2 (stereo). Sample at y=0.25 for first channel.
+7. **Math gain = 10** (not 5). Raw spectrum values are ~0.19 in bass range. Gain of 10 gives usable ~5.0 for the shader.
+8. **No Resample CHOP needed.** Control output size via AudioSpectrum's `outlength` param directly.
+
+### GLSL spectrum sampling
+
+```glsl
+// Input 0 = time (1x1 rgba32float), Input 1 = spectrum (256x2)
+float iTime = texture(sTD2DInputs[0], vec2(0.5)).r;
+
+// Sample multiple points per band and average for stability:
+// NOTE: y=0.25 for first channel (stereo texture is 256x2, first row center is 0.25)
+float bass = (texture(sTD2DInputs[1], vec2(0.02, 0.25)).r +
+              texture(sTD2DInputs[1], vec2(0.05, 0.25)).r) / 2.0;
+float mid  = (texture(sTD2DInputs[1], vec2(0.2, 0.25)).r +
+              texture(sTD2DInputs[1], vec2(0.35, 0.25)).r) / 2.0;
+float hi   = (texture(sTD2DInputs[1], vec2(0.6, 0.25)).r +
+              texture(sTD2DInputs[1], vec2(0.8, 0.25)).r) / 2.0;
+```
+
+See `references/network-patterns.md` for complete build scripts + shader code.
+
+## Operator Quick Reference
+
+| Family | Color | Python class / MCP type | Suffix |
+|--------|-------|-------------|--------|
+| TOP | Purple | noiseTOP, glslTOP, compositeTOP, levelTop, blurTOP, textTOP, nullTOP | TOP |
+| CHOP | Green | audiofileinCHOP, audiospectrumCHOP, mathCHOP, lfoCHOP, constantCHOP | CHOP |
+| SOP | Blue | gridSOP, sphereSOP, transformSOP, noiseSOP | SOP |
+| DAT | White | textDAT, tableDAT, scriptDAT, webserverDAT | DAT |
+| MAT | Yellow | phongMAT, pbrMAT, glslMAT, constMAT | MAT |
+| COMP | Gray | geometryCOMP, containerCOMP, cameraCOMP, lightCOMP, windowCOMP | COMP |
+
+## Security Notes
+
+- MCP runs on localhost only (port 40404). No authentication — any local process can send commands.
+- `td_execute_python` has unrestricted access to the TD Python environment and filesystem as the TD process user.
+- `setup.sh` downloads twozero.tox from the official 404zero.com URL. Verify the download if concerned.
+- The skill never sends data outside localhost. All MCP communication is local.
+
+## References
+
+| File | What |
+|------|------|
+| `references/pitfalls.md` | Hard-won lessons from real sessions |
+| `references/operators.md` | All operator families with params and use cases |
+| `references/network-patterns.md` | Recipes: audio-reactive, generative, GLSL, instancing |
+| `references/mcp-tools.md` | Full twozero MCP tool parameter schemas |
+| `references/python-api.md` | TD Python: op(), scripting, extensions |
+| `references/troubleshooting.md` | Connection diagnostics, debugging |
+| `scripts/setup.sh` | Automated setup script |
+
+---
+
+> You're not writing code. You're conducting light.
diff --git a/optional-skills/creative/touchdesigner-mcp/references/mcp-tools.md b/optional-skills/creative/touchdesigner-mcp/references/mcp-tools.md
new file mode 100644
index 00000000000..ec90076cb2b
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/mcp-tools.md
@@ -0,0 +1,382 @@
+# twozero MCP Tools Reference
+
+36 tools from twozero MCP v2.774+ (April 2026).
+All tools accept an optional `target_instance` param for multi-TD-instance scenarios.
+
+## Execution & Scripting
+
+### td_execute_python
+
+Execute Python code inside TouchDesigner and return the result. Has full access to TD Python API (op, project, app, etc). Print statements and the last expression value are captured. Best for: wiring connections (inputConnectors), setting expressions (par.X.expr/mode), querying parameter names, and batch creation scripts (5+ operators). For creating 1-4 operators, prefer td_create_operator instead.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `code` | string | yes | Python code to execute in TouchDesigner |
+
+## Network & Structure
+
+### td_get_network
+
+Get the operator network structure in TouchDesigner (TD) at a given path. Returns compact list: name OPType flags. First line is full path of queried op. Flags: ch:N=children count, !cook=allowCooking off, bypass, private=isPrivate, blocked:reason, "comment text". depth=0 (default) = current level only. depth=1 = one level of children (indented). To explore deeper, call again on a specific COMP path. System operators (/ui, /sys) are hidden by default.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | no | Network path to inspect, e.g. '/' or '/project1' |
+| `depth` | integer | no | How many levels deep to recurse. 0=current level only (recommended), 1=include direct children of COMPs |
+| `includeSystem` | boolean | no | Include system operators (/ui, /sys). Default false. |
+| `nodeXY` | boolean | no | Include nodeX,nodeY coordinates. Default false. |
+
+### td_create_operator
+
+Create a new operator (node) in TouchDesigner (TD). Preferred way to create operators — handles viewport positioning, viewer flag, and docked ops automatically. For batch creation (5+ ops), you may use td_execute_python with a script instead, but then call td_get_hints('construction') first for correct parameter names and layout rules. Supports all TD operator types: TOP, CHOP, SOP, DAT, COMP, MAT. If parent is omitted, creates in the currently open network at the user's viewport position. When building a container: first create baseCOMP (no parent), then create children with parent=compPath.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `type` | string | yes | Operator type, e.g. 'textDAT', 'constantCHOP', 'noiseTOP', 'transformTOP', 'baseCOMP' |
+| `parent` | string | no | Path to the parent operator. If omitted, uses the currently open network in TD. |
+| `name` | string | no | Name for the new operator (optional, TD auto-names if omitted) |
+| `parameters` | object | no | Key-value pairs of parameters to set on the created operator |
+
+### td_find_op
+
+Find operators by name and/or type across the project. Returns TSV: path, OPType, flags. Flags: bypass, !cook, private, blocked:reason. Use td_search to search inside code/expressions; use td_find_op to find operators themselves.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | string | no | Substring to match in operator name (case-insensitive). E.g. 'noise' finds noise1, noise2, myNoise. |
+| `type` | string | no | Substring to match in OPType (case-insensitive). E.g. 'noiseTOP', 'baseCOMP', 'CHOP'. Use exact type for precision or partial for broader matches. |
+| `root` | string | no | Root operator path to search from. Default '/project1'. |
+| `max_results` | number | no | Maximum results to return. Default 50. |
+| `max_depth` | number | no | Max recursion depth from root. Default unlimited. |
+| `detail` | `basic` / `summary` | no | Result detail level. 'basic' = name/path/type (fast). 'summary' = + connections, non-default pars, expressions. Default 'basic'. |
+
+### td_search
+
+Search for text across all code (DAT scripts), parameter expressions, and string parameter values in the TD project. Returns TSV: path, kind (code/expression/parameter/ref), line, text. JSON when context>0. Words are OR-matched. Use quotes for exact phrases: 'GetLogin "op('login')"'. Use count_only=true to quickly check if something is referenced without fetching full results.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `query` | string | yes | Search query. Multiple words = OR (any match). Wrap in quotes for exact phrase. Example: 'GetLogin getLogin' finds either. |
+| `root` | string | no | Root operator path to search from. Default '/project1'. |
+| `scope` | `all` / `code` / `editable` / `expressions` / `parameters` | no | What to search. 'code' = DAT scripts only (fast, ~0.05s). 'editable' = only editable code (skips inherited/ref DATs). 'expressions' = parameter expressions only. 'parameters' = string parameter values only. 'all' = everything (slow, ~1.5s due to parameter scan). Default 'all'. |
+| `case_sensitive` | boolean | no | Case-sensitive matching. Default false. |
+| `max_results` | number | no | Maximum results to return. Default 50. |
+| `context` | number | no | Lines to show before/after each code match. Saves td_read_dat calls. Default 0. |
+| `count_only` | boolean | no | Return only match count, not results. Fast existence check. |
+| `max_depth` | number | no | Max recursion depth from root. Default unlimited. |
+
+### td_navigate_to
+
+Navigate the TouchDesigner Network Editor viewport to show a specific operator. Opens the operator's parent network and centers the view on it. Use this to show the user where a problem is, or to navigate to an operator before modifying it.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the operator to navigate to, e.g. '/project1/noise1' |
+
+## Operator Inspection
+
+### td_get_operator_info
+
+Get information about a specific operator (node) in TouchDesigner (TD). detail='summary': connections, non-default pars, expressions, CHOP channels (compact). detail='full': all of the above PLUS every parameter with value/default/label.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Full path to the operator, e.g. '/project1/noise1' |
+| `detail` | `summary` / `full` | no | Level of detail. 'summary' = connections, expressions, non-default pars, custom pars (pulse marked), CHOP channels. 'full' = summary + all parameters. Default 'full'. |
+
+### td_get_operators_info
+
+Get information about multiple operators in one call. Returns an array of operator info objects. Use instead of calling td_get_operator_info multiple times.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `paths` | array | yes | Array of full operator paths, e.g. ['/project1/null1', '/project1/null2'] |
+| `detail` | `summary` / `full` | no | Level of detail. Default 'summary'. |
+
+### td_get_par_info
+
+Get parameter names and details for a TouchDesigner operator type. Without specific pars: returns compact list of all parameters with their names, types, and menu options. With pars: returns full details (help text, menu values, style) for specific parameters. Use this when you need to know exact parameter names before setting them.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `op_type` | string | yes | TD operator type name, e.g. 'noiseTOP', 'blurTOP', 'lfoCHOP', 'compositeTOP' |
+| `pars` | array | no | Optional list of specific parameter names to get full details for |
+
+## Parameter Setting
+
+### td_set_operator_pars
+
+Set parameters and flags on an operator in TouchDesigner (TD). Safer than td_execute_python for simple parameter changes. Can set values, toggle bypass/viewer, without writing Python code.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the operator |
+| `parameters` | object | no | Key-value pairs of parameters to set |
+| `bypass` | boolean | no | Set bypass state of the operator (not available on COMPs) |
+| `viewer` | boolean | no | Set viewer state of the operator |
+| `allowCooking` | boolean | no | Set cooking flag on a COMP. When False, internal network stops cooking (0 CPU). COMP-only. |
+
+## Data Read/Write
+
+### td_read_dat
+
+Read the text content of a DAT operator in TouchDesigner (TD). Returns content with line numbers. Use to read scripts, extensions, GLSL shaders, table data.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the DAT operator |
+| `start_line` | integer | no | Start line (1-based). Omit to read from beginning. |
+| `end_line` | integer | no | End line (inclusive). Omit to read to end. |
+
+### td_write_dat
+
+Write or patch text content of a DAT operator in TouchDesigner (TD). Can do full replacement or StrReplace-style patching (old_text -> new_text). Use for editing scripts, extensions, shaders. Does NOT reinit extensions automatically.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the DAT operator |
+| `text` | string | no | Full replacement text. Use this OR old_text+new_text, not both. |
+| `old_text` | string | no | Text to find and replace (must be unique in the DAT) |
+| `new_text` | string | no | Replacement text |
+| `replace_all` | boolean | no | If true, replaces ALL occurrences of old_text (default: false, requires unique match) |
+
+### td_read_chop
+
+Read CHOP channel sample data. Returns channel values as arrays. Use when you need the actual sample values (animation curves, lookup tables, waveforms), not just the summary from td_get_operator_info.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the CHOP operator |
+| `channels` | array | no | Channel names to read. Omit to read all channels. |
+| `start` | integer | no | Start sample index (0-based). Omit to read from beginning. |
+| `end` | integer | no | End sample index (inclusive). Omit to read to end. |
+
+### td_read_textport
+
+Read the last N lines from the TouchDesigner (TD) log/textport (console output). Use this to see errors, warnings and print output from TD.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `lines` | integer | no | Number of recent lines to return |
+
+### td_clear_textport
+
+Clear the MCP textport log buffer. Use this before starting a debug session or an edit-run-check loop to keep td_read_textport output focused and minimal.
+
+No parameters (other than optional `target_instance`).
+
+## Visual Capture
+
+### td_get_screenshot
+
+Get a screenshot of an operator's viewer in TouchDesigner (TD). Saves the image to a file and returns the file path. Use your file-reading tool to view the image. Shows what the operator looks like in its viewer (TOP output, CHOP waveform graph, SOP geometry, DAT table, parameter UI, etc). Use this to visually inspect any operator, or to generate images via TD for use in your project. TWO-STEP ASYNC USAGE: Step 1 — call with 'path' to start: returns {'status': 'pending', 'requestId': '...'}. Step 2 — call with 'request_id' to retrieve: returns {'file': '/tmp/.../opname_id.jpg'}. Then read the file to see the image. If step 2 still returns pending, make one other tool call then retry.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | no | Full operator path to screenshot, e.g. '/project1/noise1'. Required for step 1. |
+| `request_id` | string | no | Request ID from step 1 to retrieve the completed screenshot. |
+| `max_size` | integer | no | Max pixel size for the longer side (default 512). Use 0 for original operator resolution (useful for pixel-accurate UI work). Higher values (e.g. 1024) for more detail. |
+| `output_path` | string | no | Optional absolute path where the image should be saved (e.g. '/Users/me/project/render.png'). If omitted, saved to /tmp/pisang_mcp/screenshots/. Use absolute paths — TD's working directory may differ from the agent's. |
+| `as_top` | boolean | no | If true, captures the operator directly as a TOP (bypasses the viewer renderer), preserving alpha/transparency. Only works for TOP operators — if the target is not a TOP, falls back to the viewer automatically. Use this when you need a clean PNG with alpha, e.g. to save a generated image for use in another project. |
+| `format` | `auto` / `jpg` / `png` | no | Image format. 'auto' (default): JPEG for viewer mode, PNG for as_top=true. 'jpg': always JPEG (smaller). 'png': always PNG (lossless). |
+
+### td_get_screenshots
+
+Get screenshots of multiple operators in one batch. Saves images to files and returns file paths. Use your file-reading tool to view images. TWO-STEP ASYNC USAGE: Step 1 — call with 'paths' array to start: returns {'status': 'pending', 'batchId': '...', 'total': N}. Step 2 — call with 'batch_id' to retrieve: returns {'files': [{op, file}, ...]}. Then read the files to see the images. If still processing returns {'status': 'pending', 'ready': K, 'total': N}.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `paths` | array | no | List of full operator paths to screenshot. Required for step 1. |
+| `batch_id` | string | no | Batch ID from step 1 to retrieve completed screenshots. |
+| `max_size` | integer | no | Max pixel size for longer side (default 512). Use 0 for original resolution. |
+| `as_top` | boolean | no | If true, captures TOP operators directly (preserves alpha). Non-TOP operators fall back to viewer. |
+| `output_dir` | string | no | Optional absolute path to a directory. Each screenshot saved as <opname>.jpg or .png inside it and kept on disk. |
+| `format` | `auto` / `jpg` / `png` | no | Image format. 'auto' (default): JPEG for viewer mode, PNG for as_top=true. 'jpg': always JPEG (smaller). 'png': always PNG (lossless). |
+
+### td_get_screen_screenshot
+
+Capture a screenshot of the actual screen via TD's screenGrabTOP. Saves the image to a file and returns the file path. Use your file-reading tool to view the image. Unlike td_get_screenshot (operator viewer), this shows what the user literally sees on their monitor — TD windows, UI panels, everything. Use when simulating mouse/keyboard input to verify what happened on screen. Workflow: td_get_screen_screenshot → read file → td_input_execute → wait idle → td_get_screen_screenshot again. TWO-STEP ASYNC: Step 1 — call without request_id: returns {'status':'pending','requestId':'...'}. Step 2 — call with request_id: returns {'file': '/tmp/.../screen_id.jpg', 'info': '...metadata...'}. Then read the file to see the image. The requestId also stays usable with td_screen_point_to_global for later coordinate lookup. crop_x/y/w/h are in ACTUAL SCREEN PIXELS (not image pixels). Crops exceeding screen bounds are auto-clamped. SMART DEFAULTS: max_size is auto when omitted — 1920 for full screen (good overview), max(crop_w,crop_h) for cropped (guarantees 1:1 scale). At 1:1 scale: screen_coord = crop_origin + image_pixel. Otherwise use the formula from metadata.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `request_id` | string | no | Request ID from step 1 to retrieve the completed screenshot. |
+| `max_size` | integer | no | Max pixel size for the longer side. Auto when omitted: 1920 for full screen, max(crop_w,crop_h) for cropped (1:1). Set explicitly to override. |
+| `crop_x` | integer | no | Left edge in screen pixels. |
+| `crop_y` | integer | no | Top edge in screen pixels (y=0 at top of screen). |
+| `crop_w` | integer | no | Width in pixels. |
+| `crop_h` | integer | no | Height in pixels. |
+| `display` | integer | no | Screen index (default 0 = primary display). |
+
+## Context & Focus
+
+### td_get_focus
+
+Get the current user focus in TouchDesigner (TD): which network is open, selected operators, current operator, and rollover (what is under the mouse cursor). IMPORTANT: when the user says 'this operator' or 'вот этот', they mean the SELECTED/CURRENT operator, NOT the rollover. Rollover is just incidental mouse position and should be ignored for intent. Pass screenshots=true to immediately start a screenshot batch for all selected operators — response includes a 'screenshots' field with batchId; retrieve with td_get_screenshots(batch_id=...).
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `screenshots` | boolean | no | If true, start a screenshot batch for all selected operators. Retrieve with td_get_screenshots(batch_id=...). |
+| `max_size` | integer | no | Max screenshot size when screenshots=true (default 512). |
+| `as_top` | boolean | no | Passed to the screenshot batch when screenshots=true. |
+
+### td_get_errors
+
+Find errors and warnings in TouchDesigner (TD) operators. Checks operator errors, warnings, AND broken parameter expressions (missing channels, bad references, etc). Also includes recent script errors from the log (tracebacks), grouped and deduplicated — e.g. 1000 identical mouse-move errors shown as ×1000 with one entry. If path is given, checks that operator and its children. If no path, checks the currently open network. Use '/' for entire project. Use when user says something is broken, has errors, red nodes, горит ошибка, etc. TIP: call td_clear_textport before reproducing an error to keep log focused. TIP: combine with td_get_perf when user says 'тупит/лагает' to check both errors and performance.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | no | Path to check. If omitted, checks the current network. Use '/' to scan entire project. |
+| `recursive` | boolean | no | Check children recursively (default true) |
+| `include_log` | boolean | no | Include recent script errors from log, grouped by unique signature (default true). Use td_clear_textport before reproducing an error to keep results focused. |
+
+### td_get_perf
+
+Get performance data from TouchDesigner (TD). Returns TSV: header with fps/budget/memory summary, then slowest operators sorted by cook time. Columns: path, OPType, cpu/cook(ms), gpu/cook(ms), cpu/s, gpu/s, rate, flags. Use when user reports lag, low FPS, slow performance, тупит, тормозит.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | no | Path to profile. If omitted, profiles the current network. Use '/' for entire project. |
+| `top` | integer | no | Number of slowest operators to return |
+
+## Documentation
+
+### td_get_docs
+
+Get comprehensive documentation on a TouchDesigner topic. Unlike td_get_hints (compact tips), this returns in-depth reference material. Call without arguments to see available topics with descriptions. Call with a topic name to get the full documentation.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `topic` | string | no | Topic to get docs for. Omit to list available topics. |
+
+### td_get_hints
+
+Get TouchDesigner tips and common patterns for a topic. Call this BEFORE creating operators or writing TD Python code to learn correct parameter names, expressions, and idiomatic approaches. Available topics: animation, noise, connections, parameters, scripting, construction, ui_analysis, panel_layout, screenshots, input_simulation, undo. IMPORTANT: always call with topic='construction' before building multi-operator setups to get correct TOP/CHOP parameter names, compositeTOP input ordering, and layout guidelines. IMPORTANT: always call with topic='input_simulation' before using td_input_execute to learn focus recovery, coordinate systems, and testing workflow.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `topic` | string | yes | Topic to get hints for. Available: 'animation', 'noise', 'connections', 'parameters', 'scripting', 'construction', 'ui_analysis', 'panel_layout', 'screenshots', 'input_simulation', 'undo', 'networking', 'all' |
+
+### td_agents_md
+
+Read, write, or update the agents_md documentation inside a COMP container. agents_md is a Markdown textDAT describing the container's purpose, structure, and conventions. action='read': returns content + staleness check (compares documented children vs live state). action='update': refreshes auto-generated sections (children list, connections) from live state, preserves human-written sections. action='write': sets full content, creates the DAT if missing.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the COMP container |
+| `action` | `read` / `update` / `write` | yes | read=get content+staleness, update=refresh auto sections, write=set content |
+| `content` | string | no | Markdown content (only for action='write') |
+
+## Input Automation
+
+### td_input_execute
+
+Send a sequence of mouse/keyboard commands to TouchDesigner. Commands execute sequentially with smooth bezier movement. Returns immediately — poll td_input_status() until status='idle' before proceeding. Command types: 'focus' — bring TD to foreground. 'move' — smooth mouse move: {type,x,y,duration,easing}. 'click' — click: {type,x,y,button,hold,duration,easing}. hold=seconds to hold down. duration=smooth move before click. 'dblclick' — double click: {type,x,y,duration}. 'mousedown'/'mouseup' — {type,x,y,button}. 'key' — keystroke: {type,keys} e.g. 'ctrl+z','tab','escape','shift+f5'. Requires Accessibility permission on Mac. 'type' — human-like typing: {type,text,wpm,variance} — layout-independent Unicode, variable timing. 'wait' — pause: {type,duration}. 'scroll' — {type,x,y,dx,dy,steps} — human-like scroll: moves mouse to (x,y) first, then sends dy (vertical, +up) and dx (horizontal, +right) as multiple ticks with natural timing. steps=4 by default. Mouse commands may include coord_space='logical' (default) or coord_space='physical'. On macOS, 'physical' means actual screen pixels from td_get_screen_screenshot and is converted to CGEvent logical coords automatically. Top-level coord_space applies to commands that do not override it. on_error: 'stop' (default) clears queue on error; 'continue' skips failed command. IMPORTANT: call td_get_hints('input_simulation') before first use to learn focus recovery, coordinate systems, and testing workflow.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `commands` | array | yes | List of command dicts to execute in sequence. |
+| `coord_space` | `logical` / `physical` | no | Default coordinate space for mouse commands that do not specify their own coord_space. 'logical' uses CGEvent coords directly. 'physical' uses actual screen pixels from td_get_screen_screenshot and is auto-converted on macOS. |
+| `on_error` | `stop` / `continue` | no | What to do on error. Default 'stop'. |
+
+### td_input_status
+
+Get current status of the td_input command queue. Poll this after td_input_execute until status='idle'. Returns: status ('idle'/'running'), current command, queue_remaining, last error.
+
+No parameters (other than optional `target_instance`).
+
+### td_input_clear
+
+Clear the td_input command queue and stop current execution immediately.
+
+No parameters (other than optional `target_instance`).
+
+### td_op_screen_rect
+
+Get the screen coordinates of an operator node in the network editor. Returns {x,y,w,h,cx,cy} where cx,cy is the center for clicking. Use this to find where to click on a specific operator. Only works if the operator's parent network is currently open in a network editor pane.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Full path to the operator, e.g. '/project1/myComp/noise1' |
+
+### td_click_screen_point
+
+Resolve a point inside a previous td_get_screen_screenshot result and click it. Pass the screenshot request_id plus either normalized u/v or image_x/image_y. Queues a td_input click using physical screen coordinates, so it works directly with screenshot-derived points. Use duration/easing to control the cursor travel before the click.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `request_id` | string | yes | Request ID originally returned by td_get_screen_screenshot. |
+| `u` | number | no | Normalized horizontal position inside the screenshot region (0=left, 1=right). Use with v. |
+| `v` | number | no | Normalized vertical position inside the screenshot region (0=top, 1=bottom). Use with u. |
+| `image_x` | number | no | Horizontal pixel coordinate inside the returned screenshot image. Use with image_y. |
+| `image_y` | number | no | Vertical pixel coordinate inside the returned screenshot image. Use with image_x. |
+| `button` | `left` / `right` / `middle` | no | Mouse button to click. Default left. |
+| `hold` | number | no | Seconds to hold the mouse button down before releasing. |
+| `duration` | number | no | Seconds for the cursor to travel to the target before clicking. |
+| `easing` | `linear` / `ease-in` / `ease-out` / `ease-in-out` | no | Cursor movement easing for the pre-click travel. |
+| `focus` | boolean | no | If true, bring TD to the front before clicking and wait briefly for focus to settle. |
+
+### td_screen_point_to_global
+
+Convert a point inside a previous td_get_screen_screenshot result into absolute screen coordinates. Pass the screenshot request_id plus either normalized u/v (0..1 inside that screenshot region) or image_x/image_y in returned image pixels. Returns absolute physical screen coordinates, logical coordinates, and a ready-to-use td_input_execute payload. Metadata is kept for the most recent screen screenshots so multiple agents can resolve points later by request_id.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `request_id` | string | yes | Request ID originally returned by td_get_screen_screenshot. |
+| `u` | number | no | Normalized horizontal position inside the screenshot region (0=left, 1=right). Use with v. |
+| `v` | number | no | Normalized vertical position inside the screenshot region (0=top, 1=bottom). Use with u. |
+| `image_x` | number | no | Horizontal pixel coordinate inside the returned screenshot image. Use with image_y. |
+| `image_y` | number | no | Vertical pixel coordinate inside the returned screenshot image. Use with image_x. |
+
+## System
+
+### td_list_instances
+
+List all running TouchDesigner (TD) instances with active MCP servers. Returns port, project name, PID, and instanceId for each instance. Call this at the start of every conversation to discover available instances and choose which one to work with. instanceId is stable for the lifetime of a TD process and is used as target_instance in all other tool calls.
+
+No parameters (other than optional `target_instance`).
+
+### td_project_quit
+
+Save and/or close the current TouchDesigner (TD) project. Can save before closing. Reports if project has unsaved changes. To close a different instance, pass target_instance=instanceId. WARNING: this will shut down the MCP server on that instance.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `save` | boolean | no | Save the project before closing. Default true. |
+| `force` | boolean | no | Force close without save dialog. Default false. |
+
+### td_reinit_extension
+
+Reinitialize an extension on a COMP in TouchDesigner (TD). Call this AFTER finishing all code edits via td_write_dat to apply changes. Do NOT call after every small edit - batch your changes first.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the COMP with the extension |
+
+### td_dev_log
+
+Read the last N entries from the MCP dev log. Only available when Devmode is enabled. Shows request/response history.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `count` | integer | no | Number of recent log entries to return |
+
+### td_clear_dev_log
+
+Clear the current MCP dev log by closing the old file and starting a fresh one. Only available when Devmode is enabled.
+
+No parameters (other than optional `target_instance`).
+
+### td_test_session
+
+Manage test sessions, bug reports, and conversation export. IMPORTANT: Do NOT proactively suggest exporting chat or submitting reports. These are tools for specific situations: - export_chat / submit_report: ONLY when the user encounters a BUG with the plugin or TouchDesigner and wants to report it, or when the user explicitly asks to export the conversation. Never suggest this at session end or as routine action. USER PHRASES → ACTIONS: 'разбор тестовых сессий' / 'analyze test sessions' → list, then pull, read meta.json → index.jsonl → calls/. 'разбор репортов' / 'analyze user reports' → list with session='user', then pull by name. 'экспортируй чат' / 'export chat' → (1) export_chat_id → marker, (2) export_chat with session=marker. 'сообщи о проблеме' / 'report bug' → export chat, review for privacy, then submit_report with summary + tags + result_op=file_path. ACTIONS: export_chat_id | export_chat | submit_report | start | note | import_chat | end | list | pull. list: default=auto-detect repo. session='user' for user_reports (dev only). pull: auto-searches both repos. Auto-detects dev vs user Hub access.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `action` | `export_chat_id` / `export_chat` / `submit_report` / `start` / `note` / `import_chat` / `end` / `list` / `pull` | yes | Action: export_chat_id / export_chat / submit_report / start / note / import_chat / end / list / pull |
+| `prompt` | string | no | (start) The test prompt/task description |
+| `tags` | array | no | (start) Tags for categorization, e.g. ['ui', 'layout'] |
+| `text` | string | no | (note) Observation text. (import_chat) Full conversation text. |
+| `outcome` | `success` / `partial` / `failure` | no | (end) Result: success / partial / failure |
+| `summary` | string | no | (end) Brief summary of what happened |
+| `result_op` | string | no | (end) Path to operator to save as result.tox |
+| `session` | string | no | (pull) Session name or substring to download |
diff --git a/optional-skills/creative/touchdesigner-mcp/references/network-patterns.md b/optional-skills/creative/touchdesigner-mcp/references/network-patterns.md
new file mode 100644
index 00000000000..cb04fd54d57
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/network-patterns.md
@@ -0,0 +1,966 @@
+# TouchDesigner Network Patterns
+
+Complete network recipes for common creative coding tasks. Each pattern shows the operator chain, MCP tool calls to build it, and key parameter settings.
+
+## Audio-Reactive Visuals
+
+### Pattern 1: Audio Spectrum -> Noise Displacement
+
+Audio drives noise parameters for organic, music-responsive textures.
+
+```
+Audio File In CHOP -> Audio Spectrum CHOP -> Math CHOP (scale)
+                                                |
+                                                v (export to noise params)
+                          Noise TOP -> Level TOP -> Feedback TOP -> Composite TOP -> Null TOP (out)
+                                                        ^                |
+                                                        |________________|
+```
+
+**MCP Build Sequence:**
+
+```
+1. td_create_operator(parent="/project1", type="audiofileinChop", name="audio_in")
+2. td_create_operator(parent="/project1", type="audiospectrumChop", name="spectrum")
+3. td_create_operator(parent="/project1", type="mathChop", name="spectrum_scale")
+4. td_create_operator(parent="/project1", type="noiseTop", name="noise1")
+5. td_create_operator(parent="/project1", type="levelTop", name="level1")
+6. td_create_operator(parent="/project1", type="feedbackTop", name="feedback1")
+7. td_create_operator(parent="/project1", type="compositeTop", name="comp1")
+8. td_create_operator(parent="/project1", type="nullTop", name="out")
+
+9. td_set_operator_pars(path="/project1/audio_in",
+     properties={"file": "/path/to/music.wav", "play": true})
+10. td_set_operator_pars(path="/project1/spectrum",
+     properties={"size": 512})
+11. td_set_operator_pars(path="/project1/spectrum_scale",
+     properties={"gain": 2.0, "postoff": 0.0})
+12. td_set_operator_pars(path="/project1/noise1",
+     properties={"type": 1, "monochrome": false, "resolutionw": 1280, "resolutionh": 720,
+                  "period": 4.0, "harmonics": 3, "amp": 1.0})
+13. td_set_operator_pars(path="/project1/level1",
+     properties={"opacity": 0.95, "gamma1": 0.75})
+14. td_set_operator_pars(path="/project1/feedback1",
+     properties={"top": "/project1/comp1"})
+15. td_set_operator_pars(path="/project1/comp1",
+     properties={"operand": 0})
+
+16. td_execute_python: """
+op('/project1/audio_in').outputConnectors[0].connect(op('/project1/spectrum'))
+op('/project1/spectrum').outputConnectors[0].connect(op('/project1/spectrum_scale'))
+op('/project1/noise1').outputConnectors[0].connect(op('/project1/level1'))
+op('/project1/level1').outputConnectors[0].connect(op('/project1/comp1').inputConnectors[0])
+op('/project1/feedback1').outputConnectors[0].connect(op('/project1/comp1').inputConnectors[1])
+op('/project1/comp1').outputConnectors[0].connect(op('/project1/out'))
+"""
+
+17. td_execute_python: """
+# Export spectrum values to drive noise parameters
+# This makes the noise react to audio frequencies
+op('/project1/noise1').par.seed.expr = "op('/project1/spectrum_scale')['chan1']"
+op('/project1/noise1').par.period.expr = "tdu.remap(op('/project1/spectrum_scale')['chan1'].eval(), 0, 1, 1, 8)"
+"""
+```
+
+### Pattern 2: Beat Detection -> Visual Pulses
+
+Detect beats from audio and trigger visual events.
+
+```
+Audio Device In CHOP -> Audio Spectrum CHOP -> Math CHOP (isolate bass)
+                                                    |
+                                              Trigger CHOP (envelope)
+                                                    |
+                                              [export to visual params]
+```
+
+**Key parameter settings:**
+
+```
+# Isolate bass frequencies (20-200 Hz)
+Math CHOP: chanop=1 (Add channels), range1low=0, range1high=10
+           (first 10 FFT bins = bass frequencies with 512 FFT at 44100Hz)
+
+# ADSR envelope on each beat
+Trigger CHOP: attack=0.02, peak=1.0, decay=0.3, sustain=0.0, release=0.1
+
+# Export to visual: Scale, brightness, or color intensity
+td_execute_python: "op('/project1/level1').par.brightness1.expr = \"1.0 + op('/project1/trigger1')['chan1'] * 0.5\""
+```
+
+### Pattern 3: Multi-Band Audio -> Multi-Layer Visuals
+
+Split audio into frequency bands, drive different visual layers per band.
+
+```
+Audio In -> Spectrum -> Audio Band EQ (3 bands: bass, mid, treble)
+                              |
+                    +---------+---------+
+                    |         |         |
+                 Bass      Mids     Treble
+                  |          |         |
+           Noise TOP   Circle TOP  Text TOP
+           (slow,dark) (mid,warm)  (fast,bright)
+                  |          |         |
+                  +-----+----+----+----+
+                        |         |
+                   Composite  Composite
+                        |
+                       Out
+```
+
+### Pattern 3b: Audio-Reactive GLSL Fractal (Proven Recipe)
+
+Complete working recipe. Plays an MP3, runs FFT, feeds spectrum as a texture into a GLSL shader where inner fractal reacts to bass, outer to treble.
+
+**Network:**
+```
+AudioFileIn CHOP → AudioSpectrum CHOP (FFT=512, outlength=256)
+    → Math CHOP (gain=10) → CHOP To TOP (256x2 spectrum texture, dataformat=r)
+                                                                   ↓
+Constant TOP (time, rgba32float) → GLSL TOP (input 0=time, input 1=spectrum) → Null → MovieFileOut
+                                                                                        ↓
+AudioFileIn CHOP → Audio Device Out CHOP                                          Record to .mov
+```
+
+**Build via td_execute_python (one call per step for reliability):**
+
+```python
+# Step 1: Audio chain
+# td_execute_python script:
+td_execute_python(code="""
+root = op('/project1')
+audio = root.create(audiofileinCHOP, 'audio_in')
+audio.par.file = '/path/to/music.mp3'
+audio.par.playmode = 0  # Locked to timeline
+audio.par.volume = 0.5
+
+spec = root.create(audiospectrumCHOP, 'spectrum')
+audio.outputConnectors[0].connect(spec.inputConnectors[0])
+
+math_n = root.create(mathCHOP, 'math_norm')
+spec.outputConnectors[0].connect(math_n.inputConnectors[0])
+math_n.par.gain = 5  # boost signal
+
+resamp = root.create(resampleCHOP, 'resample_spec')
+math_n.outputConnectors[0].connect(resamp.inputConnectors[0])
+resamp.par.timeslice = True
+resamp.par.rate = 256
+
+chop2top = root.create(choptoTOP, 'spectrum_tex')
+chop2top.par.chop = resamp  # CHOP To TOP has NO input connectors — use par.chop reference
+
+# Audio output (hear the music)
+aout = root.create(audiodeviceoutCHOP, 'audio_out')
+audio.outputConnectors[0].connect(aout.inputConnectors[0])
+result = 'audio chain ok'
+""")
+
+# Step 2: Time driver (MUST be rgba32float — see pitfalls #6)
+# td_execute_python script:
+td_execute_python(code="""
+root = op('/project1')
+td = root.create(constantTOP, 'time_driver')
+td.par.format = 'rgba32float'
+td.par.outputresolution = 'custom'
+td.par.resolutionw = 1
+td.par.resolutionh = 1
+td.par.colorr.expr = "absTime.seconds % 1000.0"
+td.par.colorg.expr = "int(absTime.seconds / 1000.0)"
+result = 'time ok'
+""")
+
+# Step 3: GLSL shader (write to /tmp, load from file)
+# td_execute_python script:
+td_execute_python(code="""
+root = op('/project1')
+glsl = root.create(glslTOP, 'audio_shader')
+glsl.par.outputresolution = 'custom'
+glsl.par.resolutionw = 1280
+glsl.par.resolutionh = 720
+
+sd = root.create(textDAT, 'shader_code')
+sd.text = open('/tmp/my_shader.glsl').read()
+glsl.par.pixeldat = sd
+
+# Wire: input 0 = time, input 1 = spectrum texture
+op('/project1/time_driver').outputConnectors[0].connect(glsl.inputConnectors[0])
+op('/project1/spectrum_tex').outputConnectors[0].connect(glsl.inputConnectors[1])
+result = 'glsl ok'
+""")
+
+# Step 4: Output + recorder
+# td_execute_python script:
+td_execute_python(code="""
+root = op('/project1')
+out = root.create(nullTOP, 'output')
+op('/project1/audio_shader').outputConnectors[0].connect(out.inputConnectors[0])
+
+rec = root.create(moviefileoutTOP, 'recorder')
+out.outputConnectors[0].connect(rec.inputConnectors[0])
+rec.par.type = 'movie'
+rec.par.file = '/tmp/output.mov'
+rec.par.videocodec = 'mjpa'
+result = 'output ok'
+""")
+```
+
+**GLSL shader pattern (audio-reactive fractal):**
+```glsl
+out vec4 fragColor;
+
+vec3 palette(float t) {
+    vec3 a = vec3(0.5); vec3 b = vec3(0.5);
+    vec3 c = vec3(1.0); vec3 d = vec3(0.263, 0.416, 0.557);
+    return a + b * cos(6.28318 * (c * t + d));
+}
+
+void main() {
+    // Input 0 = time (1x1 rgba32float constant)
+    // Input 1 = audio spectrum (256x2 CHOP To TOP, stereo — sample at y=0.25 for first channel)
+    vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+    float t = td.r + td.g * 1000.0;
+
+    vec2 res = uTDOutputInfo.res.zw;
+    vec2 uv = (gl_FragCoord.xy * 2.0 - res) / min(res.x, res.y);
+    vec2 uv0 = uv;
+    vec3 finalColor = vec3(0.0);
+
+    float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;
+    float mids = texture(sTD2DInputs[1], vec2(0.25, 0.25)).r;
+
+    for (float i = 0.0; i < 4.0; i++) {
+        uv = fract(uv * (1.4 + bass * 0.3)) - 0.5;
+        float d = length(uv) * exp(-length(uv0));
+
+        // Sample spectrum at distance: inner=bass, outer=treble
+        float freq = texture(sTD2DInputs[1], vec2(clamp(d * 0.5, 0.0, 1.0), 0.25)).r;
+
+        vec3 col = palette(length(uv0) + i * 0.4 + t * 0.35);
+        d = sin(d * (7.0 + bass * 4.0) + t * 1.5) / 8.0;
+        d = abs(d);
+        d = pow(0.012 / d, 1.2 + freq * 0.8 + bass * 0.5);
+        finalColor += col * d;
+    }
+
+    // Tone mapping
+    finalColor = finalColor / (finalColor + vec3(1.0));
+    fragColor = TDOutputSwizzle(vec4(finalColor, 1.0));
+}
+```
+
+**Key insights from testing:**
+- `spectrum_tex` (CHOP To TOP) produces a 256x2 texture — x position = frequency, y=0.25 for first channel
+- Sampling at `vec2(0.05, 0.0)` gets bass, `vec2(0.65, 0.0)` gets treble
+- Sampling based on pixel distance (`d * 0.5`) makes inner fractal react to bass, outer to treble
+- `bass * 0.3` in the `fract()` zoom makes the fractal breathe with kicks
+- Math CHOP gain of 5 is needed because raw spectrum values are very small
+
+## Generative Art
+
+### Pattern 4: Feedback Loop with Transform
+
+Classic generative technique — texture evolves through recursive transformation.
+
+```
+Noise TOP -> Composite TOP -> Level TOP -> Null TOP (out)
+                  ^      |
+                  |      v
+            Transform TOP <- Feedback TOP
+```
+
+**MCP Build Sequence:**
+
+```
+1. td_create_operator(parent="/project1", type="noiseTop", name="seed_noise")
+2. td_create_operator(parent="/project1", type="compositeTop", name="mix")
+3. td_create_operator(parent="/project1", type="transformTop", name="evolve")
+4. td_create_operator(parent="/project1", type="feedbackTop", name="fb")
+5. td_create_operator(parent="/project1", type="levelTop", name="color_correct")
+6. td_create_operator(parent="/project1", type="nullTop", name="out")
+
+7. td_set_operator_pars(path="/project1/seed_noise",
+     properties={"type": 1, "monochrome": false, "period": 2.0, "amp": 0.3,
+                  "resolutionw": 1280, "resolutionh": 720})
+8. td_set_operator_pars(path="/project1/mix",
+     properties={"operand": 27})  # 27 = Screen blend
+9. td_set_operator_pars(path="/project1/evolve",
+     properties={"sx": 1.003, "sy": 1.003, "rz": 0.5, "extend": 2})  # slight zoom + rotate, repeat edges
+10. td_set_operator_pars(path="/project1/fb",
+     properties={"top": "/project1/mix"})
+11. td_set_operator_pars(path="/project1/color_correct",
+     properties={"opacity": 0.98, "gamma1": 0.85})
+
+12. td_execute_python: """
+op('/project1/seed_noise').outputConnectors[0].connect(op('/project1/mix').inputConnectors[0])
+op('/project1/fb').outputConnectors[0].connect(op('/project1/evolve'))
+op('/project1/evolve').outputConnectors[0].connect(op('/project1/mix').inputConnectors[1])
+op('/project1/mix').outputConnectors[0].connect(op('/project1/color_correct'))
+op('/project1/color_correct').outputConnectors[0].connect(op('/project1/out'))
+"""
+```
+
+**Variations:**
+- Change Transform: `rz` (rotation), `sx/sy` (zoom), `tx/ty` (drift)
+- Change Composite operand: Screen (glow), Add (bright), Multiply (dark)
+- Add HSV Adjust in the feedback loop for color evolution
+- Add Blur for dreamlike softness
+- Replace Noise with a GLSL TOP for custom seed patterns
+
+### Pattern 5: Instancing (Particle-Like Systems)
+
+Render thousands of copies of geometry, each with unique position/rotation/scale driven by CHOP data or DATs.
+
+```
+Table DAT (instance data) -> DAT to CHOP -> Geometry COMP (instancing on) -> Render TOP
+                                              + Sphere SOP (template geometry)
+                                              + Constant MAT (material)
+                                              + Camera COMP
+                                              + Light COMP
+```
+
+**MCP Build Sequence:**
+
+```
+1. td_create_operator(parent="/project1", type="tableDat", name="instance_data")
+2. td_create_operator(parent="/project1", type="geometryComp", name="geo1")
+3. td_create_operator(parent="/project1/geo1", type="sphereSop", name="sphere")
+4. td_create_operator(parent="/project1", type="constMat", name="mat1")
+5. td_create_operator(parent="/project1", type="cameraComp", name="cam1")
+6. td_create_operator(parent="/project1", type="lightComp", name="light1")
+7. td_create_operator(parent="/project1", type="renderTop", name="render1")
+
+8. td_execute_python: """
+import random, math
+dat = op('/project1/instance_data')
+dat.clear()
+dat.appendRow(['tx', 'ty', 'tz', 'sx', 'sy', 'sz', 'cr', 'cg', 'cb'])
+for i in range(500):
+    angle = i * 0.1
+    r = 2 + i * 0.01
+    dat.appendRow([
+        str(math.cos(angle) * r),
+        str(math.sin(angle) * r),
+        str((i - 250) * 0.02),
+        '0.05', '0.05', '0.05',
+        str(random.random()),
+        str(random.random()),
+        str(random.random())
+    ])
+"""
+
+9. td_set_operator_pars(path="/project1/geo1",
+     properties={"instancing": true, "instancechop": "",
+                  "instancedat": "/project1/instance_data",
+                  "material": "/project1/mat1"})
+10. td_set_operator_pars(path="/project1/render1",
+     properties={"camera": "/project1/cam1", "geometry": "/project1/geo1",
+                  "light": "/project1/light1",
+                  "resolutionw": 1280, "resolutionh": 720})
+11. td_set_operator_pars(path="/project1/cam1",
+     properties={"tz": 10})
+```
+
+### Pattern 6: Reaction-Diffusion (GLSL)
+
+Classic Gray-Scott reaction-diffusion system running on the GPU.
+
+```
+Text DAT (GLSL code) -> GLSL TOP (resolution, dat reference) -> Feedback TOP
+                              ^                                       |
+                              |_______________________________________|
+                         Level TOP (out)
+```
+
+**Key GLSL code (write to Text DAT via td_execute_python):**
+
+```glsl
+// Gray-Scott reaction-diffusion
+uniform float feed;    // 0.037
+uniform float kill;    // 0.06
+uniform float dA;      // 1.0
+uniform float dB;      // 0.5
+
+layout(location = 0) out vec4 fragColor;
+
+void main() {
+    vec2 uv = vUV.st;
+    vec2 texel = 1.0 / uTDOutputInfo.res.zw;
+
+    vec4 c = texture(sTD2DInputs[0], uv);
+    float a = c.r;
+    float b = c.g;
+
+    // Laplacian (9-point stencil)
+    float lA = 0.0, lB = 0.0;
+    for(int dx = -1; dx <= 1; dx++) {
+        for(int dy = -1; dy <= 1; dy++) {
+            float w = (dx == 0 && dy == 0) ? -1.0 : (abs(dx) + abs(dy) == 1 ? 0.2 : 0.05);
+            vec4 s = texture(sTD2DInputs[0], uv + vec2(dx, dy) * texel);
+            lA += s.r * w;
+            lB += s.g * w;
+        }
+    }
+
+    float reaction = a * b * b;
+    float newA = a + (dA * lA - reaction + feed * (1.0 - a));
+    float newB = b + (dB * lB + reaction - (kill + feed) * b);
+
+    fragColor = vec4(clamp(newA, 0.0, 1.0), clamp(newB, 0.0, 1.0), 0.0, 1.0);
+}
+```
+
+## Video Processing
+
+### Pattern 7: Video Effects Chain
+
+Apply a chain of effects to a video file.
+
+```
+Movie File In TOP -> HSV Adjust TOP -> Level TOP -> Blur TOP -> Composite TOP -> Null TOP (out)
+                                                                      ^
+                                                          Text TOP ---+
+```
+
+**MCP Build Sequence:**
+
+```
+1. td_create_operator(parent="/project1", type="moviefileinTop", name="video_in")
+2. td_create_operator(parent="/project1", type="hsvadjustTop", name="color")
+3. td_create_operator(parent="/project1", type="levelTop", name="levels")
+4. td_create_operator(parent="/project1", type="blurTop", name="blur")
+5. td_create_operator(parent="/project1", type="compositeTop", name="overlay")
+6. td_create_operator(parent="/project1", type="textTop", name="title")
+7. td_create_operator(parent="/project1", type="nullTop", name="out")
+
+8. td_set_operator_pars(path="/project1/video_in",
+     properties={"file": "/path/to/video.mp4", "play": true})
+9. td_set_operator_pars(path="/project1/color",
+     properties={"hueoffset": 0.1, "saturationmult": 1.3})
+10. td_set_operator_pars(path="/project1/levels",
+     properties={"brightness1": 1.1, "contrast": 1.2, "gamma1": 0.9})
+11. td_set_operator_pars(path="/project1/blur",
+     properties={"sizex": 2, "sizey": 2})
+12. td_set_operator_pars(path="/project1/title",
+     properties={"text": "My Video", "fontsizex": 48, "alignx": 1, "aligny": 1})
+
+13. td_execute_python: """
+chain = ['video_in', 'color', 'levels', 'blur']
+for i in range(len(chain) - 1):
+    op(f'/project1/{chain[i]}').outputConnectors[0].connect(op(f'/project1/{chain[i+1]}'))
+op('/project1/blur').outputConnectors[0].connect(op('/project1/overlay').inputConnectors[0])
+op('/project1/title').outputConnectors[0].connect(op('/project1/overlay').inputConnectors[1])
+op('/project1/overlay').outputConnectors[0].connect(op('/project1/out'))
+"""
+```
+
+### Pattern 8: Video Recording
+
+Record the output to a file. **H.264/H.265 require a Commercial license** — use Motion JPEG (`mjpa`) on Non-Commercial.
+
+```
+[any TOP chain] -> Null TOP -> Movie File Out TOP
+```
+
+```python
+# Build via td_execute_python:
+root = op('/project1')
+
+# Always put a Null TOP before the recorder
+null_out = root.op('out')  # or create one
+rec = root.create(moviefileoutTOP, 'recorder')
+null_out.outputConnectors[0].connect(rec.inputConnectors[0])
+
+rec.par.type = 'movie'
+rec.par.file = '/tmp/output.mov'
+rec.par.videocodec = 'mjpa'  # Motion JPEG — works on Non-Commercial
+
+# Start recording (par.record is a toggle — .record() method may not exist)
+rec.par.record = True
+# ... let TD run for desired duration ...
+rec.par.record = False
+
+# For image sequences:
+# rec.par.type = 'imagesequence'
+# rec.par.imagefiletype = 'png'
+# rec.par.file.expr = "'/tmp/frames/out' + me.fileSuffix"  # fileSuffix REQUIRED
+```
+
+**Pitfalls:**
+- Setting `par.file` + `par.record = True` in the same script may race — use `run("...", delayFrames=2)`
+- `TOP.save()` called rapidly always captures the same frame — use MovieFileOut for animation
+- See `pitfalls.md` #25-27 for full details
+
+### Pattern 8b: TD → External Pipeline (FFmpeg / Python / Post-Processing)
+
+Export TD visuals for use in another tool (ffmpeg, Python, ASCII art, etc.). This is the standard workflow when you need to composite TD output with external processing (ASCII conversion, Python shader chains, ML inference, etc.).
+
+**Step 1: Record to video in TD**
+
+```python
+# Preferred: ProRes on macOS (lossless, Non-Commercial OK, ~55MB/s at 1280x720)
+rec.par.videocodec = 'prores'
+# Fallback for non-macOS: mjpa (Motion JPEG)
+# rec.par.videocodec = 'mjpa'
+rec.par.record = True
+# ... wait N seconds ...
+rec.par.record = False
+```
+
+**Step 2: Extract frames with ffmpeg**
+
+```bash
+# Extract all frames at 30fps
+ffmpeg -y -i /tmp/output.mov -vf 'fps=30' /tmp/frames/frame_%06d.png
+
+# Or extract a specific duration
+ffmpeg -y -i /tmp/output.mov -t 25 -vf 'fps=30' /tmp/frames/frame_%06d.png
+
+# Or extract specific frame range
+ffmpeg -y -i /tmp/output.mov -vf 'select=between(n\,0\,749)' -vsync vfr /tmp/frames/frame_%06d.png
+```
+
+**Step 3: Process frames in Python**
+
+```python
+from PIL import Image
+import os
+
+frames_dir = '/tmp/frames'
+output_dir = '/tmp/processed'
+os.makedirs(output_dir, exist_ok=True)
+
+for fname in sorted(os.listdir(frames_dir)):
+    if not fname.endswith('.png'):
+        continue
+    img = Image.open(os.path.join(frames_dir, fname))
+    # ... apply your processing ...
+    img.save(os.path.join(output_dir, fname))
+```
+
+**Step 4: Mux processed frames back with audio**
+
+```bash
+# Create video from processed frames + audio with fade-out
+ffmpeg -y \
+  -framerate 30 -i /tmp/processed/frame_%06d.png \
+  -i /tmp/audio.mp3 \
+  -c:v libx264 -pix_fmt yuv420p -crf 18 \
+  -c:a aac -b:a 192k \
+  -shortest \
+  -af 'afade=t=out:st=23:d=2' \
+  /tmp/final_output.mp4
+```
+
+**Key considerations:**
+- Use ProRes for the TD recording step to avoid generation loss during compositing
+- Extract at the target output framerate (not TD's render framerate)
+- For audio-synced content, analyze the audio file separately in Python (scipy FFT) to get per-frame features (rms, spectral bands, beats) and drive compositing parameters
+- Always verify TD FPS > 0 before recording (see pitfalls #37, #38)
+
+## Data Visualization
+
+### Pattern 9: Table Data -> Bar Chart via Instancing
+
+Visualize tabular data as a 3D bar chart.
+
+```
+Table DAT (data) -> Script DAT (transform to instance format) -> DAT to CHOP
+                                                                      |
+Box SOP -> Geometry COMP (instancing from CHOP) -> Render TOP -> Null TOP (out)
+           + PBR MAT
+           + Camera COMP
+           + Light COMP
+```
+
+```python
+# Script DAT code to transform data to instance positions
+td_execute_python: """
+source = op('/project1/data_table')
+instance = op('/project1/instance_transform')
+instance.clear()
+instance.appendRow(['tx', 'ty', 'tz', 'sx', 'sy', 'sz', 'cr', 'cg', 'cb'])
+
+for i in range(1, source.numRows):
+    value = float(source[i, 'value'])
+    name = source[i, 'name']
+    instance.appendRow([
+        str(i * 1.5),          # x position (spread bars)
+        str(value / 2),        # y position (center bar vertically)
+        '0',                   # z position
+        '1', str(value), '1',  # scale (height = data value)
+        '0.2', '0.6', '1.0'   # color (blue)
+    ])
+"""
+```
+
+### Pattern 9b: Audio-Reactive GLSL Fractal (Proven Recipe)
+
+Audio spectrum drives a GLSL fractal shader directly via a spectrum texture input. Bass thickens inner fractal lines, mids twist rotation, highs light outer edges. **Always run discovery (SKILL.md Step 0) before using any param names from these recipes — they may differ in your TD version.**
+
+```
+Audio File In CHOP → Audio Spectrum CHOP (FFT=512, outlength=256)
+    → Math CHOP (gain=10)
+    → CHOP To TOP (spectrum texture, 256x2, dataformat=r)
+                                          ↓ (input 1)
+Constant TOP (rgba32float, time) → GLSL TOP (audio-reactive shader) → Null TOP
+        (input 0)                    ↑
+                              Text DAT (shader code)
+```
+
+**Build via td_execute_python (complete working script):**
+
+```python
+# td_execute_python script:
+td_execute_python(code="""
+import os
+root = op('/project1')
+
+# Audio input
+audio = root.create(audiofileinCHOP, 'audio_in')
+audio.par.file = '/path/to/music.mp3'
+audio.par.playmode = 0  # Locked to timeline
+
+# FFT analysis (output length manually set to 256 bins)
+spectrum = root.create(audiospectrumCHOP, 'spectrum')
+audio.outputConnectors[0].connect(spectrum.inputConnectors[0])
+spectrum.par.fftsize = '512'
+spectrum.par.outputmenu = 'setmanually'
+spectrum.par.outlength = 256
+
+# THEN boost gain on the raw spectrum (NO Lag CHOP — see pitfall #34)
+math = root.create(mathCHOP, 'math_norm')
+spectrum.outputConnectors[0].connect(math.inputConnectors[0])
+math.par.gain = 10
+
+# Spectrum → texture (256x2 image — stereo, sample at y=0.25 for first channel)
+# NOTE: choptoTOP has NO input connectors — use par.chop reference!
+spec_tex = root.create(choptoTOP, 'spectrum_tex')
+spec_tex.par.chop = math
+spec_tex.par.dataformat = 'r'
+spec_tex.par.layout = 'rowscropped'
+
+# Time driver (rgba32float to avoid 0-1 clamping!)
+time_drv = root.create(constantTOP, 'time_driver')
+time_drv.par.format = 'rgba32float'
+time_drv.par.outputresolution = 'custom'
+time_drv.par.resolutionw = 1
+time_drv.par.resolutionh = 1
+time_drv.par.colorr.expr = "absTime.seconds % 1000.0"
+time_drv.par.colorg.expr = "int(absTime.seconds / 1000.0)"
+
+# GLSL shader
+glsl = root.create(glslTOP, 'audio_shader')
+glsl.par.outputresolution = 'custom'
+glsl.par.resolutionw = 1280; glsl.par.resolutionh = 720
+
+shader_dat = root.create(textDAT, 'shader_code')
+shader_dat.text = open('/tmp/shader.glsl').read()
+glsl.par.pixeldat = shader_dat
+
+# Wire: input 0=time, input 1=spectrum
+time_drv.outputConnectors[0].connect(glsl.inputConnectors[0])
+spec_tex.outputConnectors[0].connect(glsl.inputConnectors[1])
+
+# Output + audio playback
+out = root.create(nullTOP, 'output')
+glsl.outputConnectors[0].connect(out.inputConnectors[0])
+audio_out = root.create(audiodeviceoutCHOP, 'audio_out')
+audio.outputConnectors[0].connect(audio_out.inputConnectors[0])
+
+result = 'network built'
+""")
+```
+
+**GLSL shader (reads spectrum from input 1 texture):**
+
+```glsl
+out vec4 fragColor;
+
+vec3 palette(float t) {
+    vec3 a = vec3(0.5); vec3 b = vec3(0.5);
+    vec3 c = vec3(1.0); vec3 d = vec3(0.263, 0.416, 0.557);
+    return a + b * cos(6.28318 * (c * t + d));
+}
+
+void main() {
+    vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+    float t = td.r + td.g * 1000.0;
+
+    vec2 res = uTDOutputInfo.res.zw;
+    vec2 uv = (gl_FragCoord.xy * 2.0 - res) / min(res.x, res.y);
+    vec2 uv0 = uv;
+    vec3 finalColor = vec3(0.0);
+
+    float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;
+    float mids = texture(sTD2DInputs[1], vec2(0.25, 0.25)).r;
+    float highs = texture(sTD2DInputs[1], vec2(0.65, 0.25)).r;
+
+    float ca = cos(t * (0.15 + mids * 0.3));
+    float sa = sin(t * (0.15 + mids * 0.3));
+    uv = mat2(ca, -sa, sa, ca) * uv;
+
+    for (float i = 0.0; i < 4.0; i++) {
+        uv = fract(uv * (1.4 + bass * 0.3)) - 0.5;
+        float d = length(uv) * exp(-length(uv0));
+        float freq = texture(sTD2DInputs[1], vec2(clamp(d*0.5, 0.0, 1.0), 0.25)).r;
+        vec3 col = palette(length(uv0) + i * 0.4 + t * 0.35);
+        d = sin(d * (7.0 + bass * 4.0) + t * 1.5) / 8.0;
+        d = abs(d);
+        d = pow(0.012 / d, 1.2 + freq * 0.8 + bass * 0.5);
+        finalColor += col * d;
+    }
+
+    float glow = (0.03 + bass * 0.05) / (length(uv0) + 0.03);
+    finalColor += vec3(0.4, 0.1, 0.7) * glow * (0.6 + 0.4 * sin(t * 2.5));
+
+    float ring = abs(length(uv0) - 0.4 - mids * 0.3);
+    finalColor += vec3(0.1, 0.6, 0.8) * (0.005 / ring) * (0.2 + highs * 0.5);
+
+    finalColor *= smoothstep(0.0, 1.0, 1.0 - dot(uv0*0.55, uv0*0.55));
+    finalColor = finalColor / (finalColor + vec3(1.0));
+
+    fragColor = TDOutputSwizzle(vec4(finalColor, 1.0));
+}
+```
+
+**How spectrum sampling drives the visual:**
+- `texture(sTD2DInputs[1], vec2(x, 0.0)).r` — x position = frequency (0=bass, 1=treble)
+- Inner fractal iterations sample lower x → react to bass
+- Outer iterations sample higher x → react to treble
+- `bass * 0.3` on `fract()` scale → fractal zoom pulses with bass
+- `bass * 4.0` on sin frequency → line density pulses with bass
+- `mids * 0.3` on rotation speed → spiral twists faster during vocal/mid sections
+- `highs * 0.5` on ring opacity → high-frequency sparkle on outer ring
+
+**Recording the output:** Use MovieFileOut TOP with `mjpa` codec (H.264 requires Commercial license). See pitfalls #25-27.
+
+## GLSL Shaders
+
+### Pattern 10: Custom Fragment Shader
+
+Write a custom visual effect as a GLSL fragment shader.
+
+```
+Text DAT (shader code) -> GLSL TOP -> Level TOP -> Null TOP (out)
+                           + optional input TOPs for texture sampling
+```
+
+**Common GLSL uniforms available in TouchDesigner:**
+
+```glsl
+// Automatically provided by TD
+uniform vec4 uTDOutputInfo;  // .res.zw = resolution
+
+// NOTE: uTDCurrentTime does NOT exist in TD 099!
+// Feed time via a 1x1 Constant TOP (format=rgba32float):
+//   t.par.colorr.expr = "absTime.seconds % 1000.0"
+//   t.par.colorg.expr = "int(absTime.seconds / 1000.0)"
+// Then read in GLSL:
+//   vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+//   float t = td.r + td.g * 1000.0;
+
+// Input textures (from connected TOP inputs)
+uniform sampler2D sTD2DInputs[1];  // array of input samplers
+
+// From vertex shader
+in vec3 vUV;  // UV coordinates (0-1 range)
+```
+
+**Example: Plasma shader (using time from input texture)**
+
+```glsl
+layout(location = 0) out vec4 fragColor;
+
+void main() {
+    vec2 uv = vUV.st;
+    // Read time from Constant TOP input 0 (rgba32float format)
+    vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+    float t = td.r + td.g * 1000.0;
+
+    float v1 = sin(uv.x * 10.0 + t);
+    float v2 = sin(uv.y * 10.0 + t * 0.7);
+    float v3 = sin((uv.x + uv.y) * 10.0 + t * 1.3);
+    float v4 = sin(length(uv - 0.5) * 20.0 - t * 2.0);
+
+    float v = (v1 + v2 + v3 + v4) * 0.25;
+
+    vec3 color = vec3(
+        sin(v * 3.14159 + 0.0) * 0.5 + 0.5,
+        sin(v * 3.14159 + 2.094) * 0.5 + 0.5,
+        sin(v * 3.14159 + 4.189) * 0.5 + 0.5
+    );
+
+    fragColor = vec4(color, 1.0);
+}
+```
+
+### Pattern 11: Multi-Pass GLSL (Ping-Pong)
+
+For effects needing state across frames (particles, fluid, cellular automata), use GLSL Multi TOP with multiple passes or a Feedback TOP loop.
+
+```
+GLSL Multi TOP (pass 0: simulation, pass 1: rendering)
+   + Text DAT (simulation shader)
+   + Text DAT (render shader)
+   -> Level TOP -> Null TOP (out)
+      ^
+      |__ Feedback TOP (feeds simulation state back)
+```
+
+## Interactive Installations
+
+### Pattern 12: Mouse/Touch -> Visual Response
+
+```
+Mouse In CHOP -> Math CHOP (normalize to 0-1) -> [export to visual params]
+
+# Or for touch/multi-touch:
+Multi Touch In DAT -> Script CHOP (parse touches) -> [export to visual params]
+```
+
+```python
+# Normalize mouse position to 0-1 range
+td_execute_python: """
+op('/project1/noise1').par.offsetx.expr = "op('/project1/mouse_norm')['tx']"
+op('/project1/noise1').par.offsety.expr = "op('/project1/mouse_norm')['ty']"
+"""
+```
+
+### Pattern 13: OSC Control (from external software)
+
+```
+OSC In CHOP (port 7000) -> Select CHOP (pick channels) -> [export to visual params]
+```
+
+```
+1. td_create_operator(parent="/project1", type="oscinChop", name="osc_in")
+2. td_set_operator_pars(path="/project1/osc_in", properties={"port": 7000})
+
+# OSC messages like /frequency 440 will appear as channel "frequency" with value 440
+# Export to any parameter:
+3. td_execute_python: "op('/project1/noise1').par.period.expr = \"op('/project1/osc_in')['frequency']\""
+```
+
+### Pattern 14: MIDI Control (DJ/VJ)
+
+```
+MIDI In CHOP (device) -> Select CHOP -> [export channels to visual params]
+```
+
+Common MIDI mappings:
+- CC channels (knobs/faders): continuous 0-127, map to float params
+- Note On/Off: binary triggers, map to Trigger CHOP for envelopes
+- Velocity: intensity/brightness
+
+## Live Performance
+
+### Pattern 15: Multi-Source VJ Setup
+
+```
+Source A (generative) ----+
+Source B (video) ---------+-- Switch/Cross TOP -- Level TOP -- Window COMP (output)
+Source C (camera) --------+
+                           ^
+                    MIDI/OSC control selects active source and crossfade
+```
+
+```python
+# MIDI CC1 controls which source is active (0-127 -> 0-2)
+td_execute_python: """
+op('/project1/switch1').par.index.expr = "int(op('/project1/midi_in')['cc1'] / 42)"
+"""
+
+# MIDI CC2 controls crossfade between current and next
+td_execute_python: """
+op('/project1/cross1').par.cross.expr = "op('/project1/midi_in')['cc2'] / 127.0"
+"""
+```
+
+### Pattern 16: Projection Mapping
+
+```
+Content TOPs ----+
+                 |
+Stoner TOP (UV mapping) -> Composite TOP -> Window COMP (projector output)
+   or
+Kantan Mapper COMP (external .tox)
+```
+
+For projection mapping, the key is:
+1. Create your visual content as standard TOPs
+2. Use Stoner TOP or a third-party mapping tool to UV-map content to physical surfaces
+3. Output via Window COMP to the projector
+
+### Pattern 17: Cue System
+
+```
+Table DAT (cue list: cue_number, scene_name, duration, transition_type)
+    |
+Script CHOP (cue state: current_cue, progress, next_cue_trigger)
+    |
+[export to Switch/Cross TOPs to transition between scenes]
+```
+
+```python
+td_execute_python: """
+# Simple cue system
+cue_table = op('/project1/cue_list')
+cue_state = op('/project1/cue_state')
+
+def advance_cue():
+    current = int(cue_state.par.value0.val)
+    next_cue = min(current + 1, cue_table.numRows - 1)
+    cue_state.par.value0.val = next_cue
+    
+    scene = cue_table[next_cue, 'scene']
+    duration = float(cue_table[next_cue, 'duration'])
+    
+    # Set crossfade target and duration
+    op('/project1/cross1').par.cross.val = 0
+    # Animate cross to 1.0 over duration seconds
+    # (use a Timer CHOP or LFO CHOP for smooth animation)
+"""
+```
+
+## Networking
+
+### Pattern 18: OSC Server/Client
+
+```
+# Sending OSC
+OSC Out CHOP -> (network) -> external application
+
+# Receiving OSC  
+(network) -> OSC In CHOP -> Select CHOP -> [use values]
+```
+
+### Pattern 19: NDI Video Streaming
+
+```
+# Send video over network
+[any TOP chain] -> NDI Out TOP (source name)
+
+# Receive video from network
+NDI In TOP (select source) -> [process as normal TOP]
+```
+
+### Pattern 20: WebSocket Communication
+
+```
+WebSocket DAT -> Script DAT (parse JSON messages) -> [update visuals]
+```
+
+```python
+td_execute_python: """
+ws = op('/project1/websocket1')
+ws.par.address = 'ws://localhost:8080'
+ws.par.active = True
+
+# In a DAT Execute callback (Script DAT watching WebSocket DAT):
+# def onTableChange(dat):
+#     import json
+#     msg = json.loads(dat.text)
+#     op('/project1/noise1').par.seed.val = msg.get('seed', 0)
+"""
+```
diff --git a/optional-skills/creative/touchdesigner-mcp/references/operators.md b/optional-skills/creative/touchdesigner-mcp/references/operators.md
new file mode 100644
index 00000000000..6aa716cb9a2
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/operators.md
@@ -0,0 +1,239 @@
+# TouchDesigner Operator Reference
+
+## Operator Families Overview
+
+TouchDesigner has 6 operator families. Each family processes a specific data type and is color-coded in the UI. Operators can only connect to others of the SAME family (with cross-family converters as the bridge).
+
+## TOPs — Texture Operators (Purple)
+
+2D image/texture processing on the GPU. The workhorse of visual output.
+
+### Generators (create images from nothing)
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Noise TOP | `noiseTop` | `type` (0-6), `monochrome`, `seed`, `period`, `harmonics`, `exponent`, `amp`, `offset`, `resolutionw/h` | Procedural noise textures — Perlin, Simplex, Sparse, etc. Foundation of generative art. |
+| Constant TOP | `constantTop` | `colorr/g/b/a`, `resolutionw/h` | Solid color. Use as background or blend input. |
+| Text TOP | `textTop` | `text`, `fontsizex`, `fontfile`, `alignx/y`, `colorr/g/b` | Render text to texture. Supports multi-line, word wrap. |
+| Ramp TOP | `rampTop` | `type` (0=horizontal, 1=vertical, 2=radial, 3=circular), `phase`, `period` | Gradient textures for masking, color mapping. |
+| Circle TOP | `circleTop` | `radiusx/y`, `centerx/y`, `width` | Circles, rings, ellipses. |
+| Rectangle TOP | `rectangleTop` | `sizex/y`, `centerx/y`, `softness` | Rectangles with optional softness. |
+| GLSL TOP | `glslTop` | `dat` (points to shader DAT), `resolutionw/h`, `outputformat`, custom uniforms | Custom fragment shaders. Most powerful TOP for custom visuals. |
+| GLSL Multi TOP | `glslmultiTop` | `dat`, `numinputs`, `numoutputs`, `numcomputepasses` | Multi-pass GLSL with compute shaders. Advanced. |
+| Render TOP | `renderTop` | `camera`, `geometry`, `lights`, `resolutionw/h` | Renders 3D scenes (SOPs + MATs + Camera/Light COMPs). |
+
+### Filters (modify a single input)
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Level TOP | `levelTop` | `opacity`, `brightness1/2`, `gamma1/2`, `contrast`, `invert`, `blacklevel/whitelevel` | Brightness, contrast, gamma, levels. Essential color correction. |
+| Blur TOP | `blurTop` | `sizex/y`, `type` (0=Gaussian, 1=Box, 2=Bartlett) | Gaussian/box blur. |
+| Transform TOP | `transformTop` | `tx/ty`, `sx/sy`, `rz`, `pivotx/y`, `extend` (0=Hold, 1=Zero, 2=Repeat, 3=Mirror) | Translate, scale, rotate textures. |
+| HSV Adjust TOP | `hsvadjustTop` | `hueoffset`, `saturationmult`, `valuemult` | HSV color adjustments. |
+| Lookup TOP | `lookupTop` | (input: texture + lookup table) | Color remapping via lookup table texture. |
+| Edge TOP | `edgeTop` | `type` (0=Sobel, 1=Frei-Chen) | Edge detection. |
+| Displace TOP | `displaceTop` | `scalex/y` | Pixel displacement using a second input as displacement map. |
+| Flip TOP | `flipTop` | `flipx`, `flipy`, `flop` (diagonal) | Mirror/flip textures. |
+| Crop TOP | `cropTop` | `cropleft/right/top/bottom` | Crop region of texture. |
+| Resolution TOP | `resolutionTop` | `resolutionw/h`, `outputresolution` | Resize textures. |
+| Null TOP | `nullTop` | (none significant) | Pass-through. Use for organization, referencing, feedback delay. |
+| Cache TOP | `cacheTop` | `length`, `step` | Store N frames of history. Useful for trails, time effects. |
+
+### Compositors (combine multiple inputs)
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Composite TOP | `compositeTop` | `operand` (0-31: Over, Add, Multiply, Screen, etc.) | Blend two textures with standard compositing modes. |
+| Over TOP | `overTop` | (simple alpha compositing) | Layer with alpha. Simpler than Composite. |
+| Add TOP | `addTop` | (additive blend) | Additive blending. Great for glow, light effects. |
+| Multiply TOP | `multiplyTop` | (multiplicative blend) | Multiply blend. Good for masking, darkening. |
+| Switch TOP | `switchTop` | `index` (0-based) | Switch between multiple inputs by index. |
+| Cross TOP | `crossTop` | `cross` (0.0-1.0) | Crossfade between two inputs. |
+
+### I/O (input/output)
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Movie File In TOP | `moviefileinTop` | `file`, `speed`, `trim`, `index` | Load video files, image sequences. |
+| Movie File Out TOP | `moviefileoutTop` | `file`, `type` (codec), `record` (toggle) | Record/export video files. |
+| NDI In TOP | `ndiinTop` | `sourcename` | Receive NDI video streams. |
+| NDI Out TOP | `ndioutTop` | `sourcename` | Send NDI video streams. |
+| Syphon Spout In/Out TOP | `syphonspoutinTop` / `syphonspoutoutTop` | `servername` | Inter-app texture sharing. |
+| Video Device In TOP | `videodeviceinTop` | `device` | Webcam/capture card input. |
+| Feedback TOP | `feedbackTop` | `top` (path to the TOP to feed back) | One-frame delay feedback. Essential for recursive effects. |
+
+### Converters
+
+| Operator | Type Name | Direction | Use |
+|----------|-----------|-----------|-----|
+| CHOP to TOP | `choptopTop` | CHOP -> TOP | Visualize channel data as texture (waveform, spectrum display). |
+| TOP to CHOP | `topchopChop` | TOP -> CHOP | Sample texture pixels as channel data. |
+
+## CHOPs — Channel Operators (Green)
+
+Time-varying numeric data: audio, animation curves, sensor data, control signals.
+
+### Generators
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Constant CHOP | `constantChop` | `name0/value0`, `name1/value1`... | Static named channels. Control panel for parameters. |
+| LFO CHOP | `lfoChop` | `frequency`, `type` (0=Sin, 1=Tri, 2=Square, 3=Ramp, 4=Pulse), `amp`, `offset`, `phase` | Low frequency oscillator. Animation driver. |
+| Noise CHOP | `noiseChop` | `type`, `roughness`, `period`, `amp`, `seed`, `channels` | Smooth random motion. Organic animation. |
+| Pattern CHOP | `patternChop` | `type` (0=Sine, 1=Triangle, ...), `length`, `cycles` | Generate waveform patterns. |
+| Timer CHOP | `timerChop` | `length`, `play`, `cue`, `cycles` | Countdown/count-up timer with cue points. |
+| Count CHOP | `countChop` | `threshold`, `limittype`, `limitmin/max` | Event counter with wrapping/clamping. |
+
+### Audio
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Audio File In CHOP | `audiofileinChop` | `file`, `volume`, `play`, `speed`, `trim` | Play audio files. |
+| Audio Device In CHOP | `audiodeviceinChop` | `device`, `channels` | Live microphone/line input. |
+| Audio Spectrum CHOP | `audiospectrumChop` | `size` (FFT size), `outputformat` (0=Power, 1=Magnitude) | FFT frequency analysis. |
+| Audio Band EQ CHOP | `audiobandeqChop` | `bands`, `gaindb` per band | Frequency band isolation. |
+| Audio Device Out CHOP | `audiodeviceoutChop` | `device` | Audio playback output. |
+
+### Math/Logic
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Math CHOP | `mathChop` | `preoff`, `gain`, `postoff`, `chanop` (0=Off, 1=Add, 2=Subtract, 3=Multiply...) | Math operations on channels. The Swiss army knife. |
+| Logic CHOP | `logicChop` | `preop` (0=Off, 1=AND, 2=OR, 3=XOR, 4=NAND), `convert` | Boolean logic on channels. |
+| Filter CHOP | `filterChop` | `type` (0=Low Pass, 1=Band Pass, 2=High Pass, 3=Notch), `cutofffreq`, `filterwidth` | Smooth, dampen, filter signals. |
+| Lag CHOP | `lagChop` | `lag1/2`, `overshoot1/2` | Smooth transitions with overshoot. |
+| Limit CHOP | `limitChop` | `type` (0=Clamp, 1=Loop, 2=ZigZag), `min/max` | Clamp or wrap channel values. |
+| Speed CHOP | `speedChop` | (none significant) | Integrate values (velocity to position, acceleration to velocity). |
+| Trigger CHOP | `triggerChop` | `attack`, `peak`, `decay`, `sustain`, `release` | ADSR envelope from trigger events. |
+| Select CHOP | `selectChop` | `chop` (path), `channames` | Reference channels from another CHOP. |
+| Merge CHOP | `mergeChop` | `align` (0=Extend, 1=Trim to First, 2=Trim to Shortest) | Combine channels from multiple CHOPs. |
+| Null CHOP | `nullChop` | (none significant) | Pass-through for organization and referencing. |
+
+### Input Devices
+
+| Operator | Type Name | Use |
+|----------|-----------|-----|
+| Mouse In CHOP | `mouseinChop` | Mouse position, buttons, wheel. |
+| Keyboard In CHOP | `keyboardinChop` | Keyboard key states. |
+| MIDI In CHOP | `midiinChop` | MIDI note/CC input. |
+| OSC In CHOP | `oscinChop` | OSC message input (network). |
+
+## SOPs — Surface Operators (Blue)
+
+3D geometry: points, polygons, NURBS, meshes.
+
+### Generators
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Grid SOP | `gridSop` | `rows`, `cols`, `sizex/y`, `type` (0=Polygon, 1=Mesh, 2=NURBS) | Flat grid mesh. Foundation for displacement, instancing. |
+| Sphere SOP | `sphereSop` | `type`, `rows`, `cols`, `radius` | Sphere geometry. |
+| Box SOP | `boxSop` | `sizex/y/z` | Box geometry. |
+| Torus SOP | `torusSop` | `radiusx/y`, `rows`, `cols` | Donut shape. |
+| Circle SOP | `circleSop` | `type`, `radius`, `divs` | Circle/ring geometry. |
+| Line SOP | `lineSop` | `dist`, `points` | Line segments. |
+| Text SOP | `textSop` | `text`, `fontsizex`, `fontfile`, `extrude` | 3D text geometry. |
+
+### Modifiers
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Transform SOP | `transformSop` | `tx/ty/tz`, `rx/ry/rz`, `sx/sy/sz` | Transform geometry (translate, rotate, scale). |
+| Noise SOP | `noiseSop` | `type`, `amp`, `period`, `roughness` | Deform geometry with noise. |
+| Sort SOP | `sortSop` | `ptsort`, `primsort` | Reorder points/primitives. |
+| Facet SOP | `facetSop` | `unique`, `consolidate`, `computenormals` | Normals, consolidation, unique points. |
+| Merge SOP | `mergeSop` | (none significant) | Combine multiple geometry inputs. |
+| Null SOP | `nullSop` | (none significant) | Pass-through. |
+
+## DATs — Data Operators (White)
+
+Text, tables, scripts, network data.
+
+### Core
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Table DAT | `tableDat` | (edit content directly) | Spreadsheet-like data tables. |
+| Text DAT | `textDat` | (edit content directly) | Arbitrary text content. Shader code, configs, scripts. |
+| Script DAT | `scriptDat` | `language` (0=Python, 1=C++) | Custom callbacks and DAT processing. |
+| CHOP Execute DAT | `chopexecDat` | `chop` (path to watch), callbacks | Trigger Python on CHOP value changes. |
+| DAT Execute DAT | `datexecDat` | `dat` (path to watch) | Trigger Python on DAT content changes. |
+| Panel Execute DAT | `panelexecDat` | `panel` | Trigger Python on UI panel events. |
+
+### I/O
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Web DAT | `webDat` | `url`, `fetchmethod` (0=GET, 1=POST) | HTTP requests. API integration. |
+| TCP/IP DAT | `tcpipDat` | `address`, `port`, `mode` | TCP networking. |
+| OSC In DAT | `oscinDat` | `port` | Receive OSC as text messages. |
+| Serial DAT | `serialDat` | `port`, `baudrate` | Serial port communication (Arduino, etc.). |
+| File In DAT | `fileinDat` | `file` | Read text files. |
+| File Out DAT | `fileoutDat` | `file`, `write` | Write text files. |
+
+### Conversions
+
+| Operator | Type Name | Direction | Use |
+|----------|-----------|-----------|-----|
+| DAT to CHOP | `dattochopChop` | DAT -> CHOP | Convert table data to channels. |
+| CHOP to DAT | `choptodatDat` | CHOP -> DAT | Convert channel data to table rows. |
+| SOP to DAT | `soptodatDat` | SOP -> DAT | Extract geometry data as table. |
+
+## MATs — Material Operators (Yellow)
+
+Materials for 3D rendering in Render TOP / Geometry COMP.
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Phong MAT | `phongMat` | `diff_colorr/g/b`, `spec_colorr/g/b`, `shininess`, `colormap`, `normalmap` | Classic Phong shading. Simple, fast. |
+| PBR MAT | `pbrMat` | `basecolorr/g/b`, `metallic`, `roughness`, `normalmap`, `emitcolorr/g/b` | Physically-based rendering. Realistic materials. |
+| GLSL MAT | `glslMat` | `dat` (shader DAT), custom uniforms | Custom vertex + fragment shaders for 3D. |
+| Constant MAT | `constMat` | `colorr/g/b`, `colormap` | Flat unlit color/texture. No shading. |
+| Point Sprite MAT | `pointspriteMat` | `colormap`, `scale` | Render points as camera-facing sprites. Great for particles. |
+| Wireframe MAT | `wireframeMat` | `colorr/g/b`, `width` | Wireframe rendering. |
+| Depth MAT | `depthMat` | `near`, `far` | Render depth buffer as grayscale. |
+
+## COMPs — Component Operators (Gray)
+
+Containers, 3D scene elements, UI components.
+
+### 3D Scene
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Geometry COMP | `geometryComp` | `material` (path), `instancechop` (path), `instancing` (toggle) | Renders geometry with material. Instancing host. |
+| Camera COMP | `cameraComp` | `tx/ty/tz`, `rx/ry/rz`, `fov`, `near/far` | Camera for Render TOP. |
+| Light COMP | `lightComp` | `lighttype` (0=Point, 1=Directional, 2=Spot, 3=Cone), `dimmer`, `colorr/g/b` | Lighting for 3D scenes. |
+| Ambient Light COMP | `ambientlightComp` | `dimmer`, `colorr/g/b` | Ambient lighting. |
+| Environment Light COMP | `envlightComp` | `envmap` | Image-based lighting (IBL). |
+
+### Containers
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Container COMP | `containerComp` | `w`, `h`, `bgcolor1/2/3` | UI container. Holds other COMPs for panel layouts. |
+| Base COMP | `baseComp` | (none significant) | Generic container. Networks-inside-networks. |
+| Replicator COMP | `replicatorComp` | `template`, `operatorsdat` | Clone a template operator N times from a table. |
+
+### Utilities
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Window COMP | `windowComp` | `winw/h`, `winoffsetx/y`, `monitor`, `borders` | Output window for display/projection. |
+| Select COMP | `selectComp` | `rowcol`, `panel` | Select and display content from elsewhere. |
+| Engine COMP | `engineComp` | `tox`, `externaltox` | Load external .tox components. Sub-process isolation. |
+
+## Cross-Family Converter Summary
+
+| From | To | Operator | Type Name |
+|------|-----|----------|-----------|
+| CHOP | TOP | CHOP to TOP | `choptopTop` |
+| TOP | CHOP | TOP to CHOP | `topchopChop` |
+| DAT | CHOP | DAT to CHOP | `dattochopChop` |
+| CHOP | DAT | CHOP to DAT | `choptodatDat` |
+| SOP | CHOP | SOP to CHOP | `soptochopChop` |
+| CHOP | SOP | CHOP to SOP | `choptosopSop` |
+| SOP | DAT | SOP to DAT | `soptodatDat` |
+| DAT | SOP | DAT to SOP | `dattosopSop` |
+| SOP | TOP | (use Render TOP + Geometry COMP) | — |
+| TOP | SOP | TOP to SOP | `toptosopSop` |
diff --git a/optional-skills/creative/touchdesigner-mcp/references/pitfalls.md b/optional-skills/creative/touchdesigner-mcp/references/pitfalls.md
new file mode 100644
index 00000000000..33c9b5f4d87
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/pitfalls.md
@@ -0,0 +1,508 @@
+# TouchDesigner MCP — Pitfalls & Lessons Learned
+
+Hard-won knowledge from real TD sessions. Read this before building anything.
+
+## Parameter Names
+
+### 1. NEVER hardcode parameter names — always discover
+
+Parameter names change between TD versions. What works in one build may not work in another. ALWAYS use td_get_par_info to discover actual names from TD.
+
+The agent's LLM training data contains WRONG parameter names. Do not trust them.
+
+Known historical differences (may vary further — always verify):
+| What docs/training say | Actual in some versions | Notes |
+|---------------|---------------|-------|
+| `dat` | `pixeldat` | GLSL TOP pixel shader DAT |
+| `colora` | `alpha` | Constant TOP alpha |
+| `sizex` / `sizey` | `size` | Blur TOP (single value) |
+| `fontr/g/b/a` | `fontcolorr/g/b/a` | Text TOP font color (r/g/b) |
+| `fontcolora` | `fontalpha` | Text TOP font alpha (NOT `fontcolora`) |
+| `bgcolora` | `bgalpha` | Text TOP bg alpha |
+| `value1name` | `vec0name` | GLSL TOP uniform name |
+
+### 2. twozero td_execute_python response format
+
+When calling `td_execute_python` via twozero MCP, successful responses return `(ok)` followed by FPS/error summary (e.g. `[fps 60.0/60] [0 err/0 warn]`), NOT the raw Python `result` dict. If you're parsing responses programmatically, check for the `(ok)` prefix — don't pattern-match on Python variable names from the script. Use `td_get_operator_info` or separate inspection calls to read back values.
+
+### 3. When using td_set_operator_pars, param names must match exactly
+
+Use td_get_par_info to discover them. The MCP tool validates parameter names and returns clear errors explaining what went wrong, unlike raw Python which crashes the whole script with tdAttributeError and stops execution. Always discover before setting.
+
+### 4. Use `safe_par()` pattern for cross-version compatibility
+
+```python
+def safe_par(node, name, value):
+    p = getattr(node.par, name, None)
+    if p is not None:
+        p.val = value
+        return True
+    return False
+```
+
+### 5. `td.tdAttributeError` crashes the whole script — use defensive access
+
+If you do `node.par.nonexistent = value`, TD raises `tdAttributeError` and stops the entire script. Prevention is better than catching:
+- Use `op()` instead of `opex()` — `op()` returns None on failure, `opex()` raises
+- Use `hasattr(node.par, 'name')` before accessing any parameter
+- Use `getattr(node.par, 'name', None)` with a default
+- Use the `safe_par()` pattern from pitfall #3
+
+```python
+# WRONG — crashes if param doesn't exist:
+node.par.nonexistent = value
+
+# CORRECT — defensive access:
+if hasattr(node.par, 'nonexistent'):
+    node.par.nonexistent = value
+```
+
+### 6. `outputresolution` is a string menu, not an integer
+
+```
+menuNames: ['useinput','eighth','quarter','half','2x','4x','8x','fit','limit','custom','parpanel']
+```
+Always use the string form. Setting `outputresolution = 9` may silently fail.
+```python
+node.par.outputresolution = 'custom'  # correct
+node.par.resolutionw = 1280; node.par.resolutionh = 720
+```
+Discover valid values: `list(node.par.outputresolution.menuNames)`
+
+## GLSL Shaders
+
+### 7. `uTDCurrentTime` does NOT exist in GLSL TOP
+
+There is NO built-in time uniform for GLSL TOPs. GLSL MAT has `uTDGeneral.seconds` but that's NOT available in GLSL TOP context.
+
+**PRIMARY — GLSL TOP Vectors/Values page:**
+```python
+gl.par.value0name = 'uTime'
+gl.par.value0.expr = "absTime.seconds"
+# In GLSL: uniform float uTime;
+```
+
+**FALLBACK — Constant TOP texture (for complex time data):**
+
+CRITICAL: set format to `rgba32float` — default 8-bit clamps to 0-1:
+```python
+t = root.create(constantTOP, 'time_driver')
+t.par.format = 'rgba32float'
+t.par.outputresolution = 'custom'
+t.par.resolutionw = 1; t.par.resolutionh = 1
+t.par.colorr.expr = "absTime.seconds % 1000.0"
+t.outputConnectors[0].connect(glsl.inputConnectors[0])
+```
+
+### 8. GLSL compile errors are silent in the API
+
+The GLSL TOP shows a yellow warning triangle in the UI but `node.errors()` may return empty string. Check `node.warnings()` too, and create an Info DAT pointed at the GLSL TOP to read the actual compiler output.
+
+### 9. TD GLSL uses `vUV.st` not `gl_FragCoord` — and REQUIRES `TDOutputSwizzle()` on macOS
+
+Standard GLSL patterns don't work. TD provides:
+- `vUV.st` — UV coordinates (0-1)
+- `uTDOutputInfo.res.zw` — resolution
+- `sTD2DInputs[0]` — input textures
+- `layout(location = 0) out vec4 fragColor` — output
+
+CRITICAL on macOS: Always wrap output with `TDOutputSwizzle()`:
+```glsl
+fragColor = TDOutputSwizzle(color);
+```
+TD uses GLSL 4.60 (Vulkan backend). GLSL 3.30 and earlier removed.
+
+### 10. Large GLSL shaders — write to temp file
+
+GLSL code with special characters can corrupt JSON payloads. Write the shader to a temp file and load it in TD:
+```python
+# Agent side: write shader to /tmp/shader.glsl via write_file
+# TD side:
+sd = root.create(textDAT, 'shader_code')
+with open('/tmp/shader.glsl', 'r') as f:
+    sd.text = f.read()
+```
+
+## Node Management
+
+### 11. Destroying nodes while iterating `root.children` causes `tdError`
+
+The iterator is invalidated when a child is destroyed. Always snapshot first:
+```python
+kids = list(root.children)  # snapshot
+for child in kids:
+    if child.valid:  # check — earlier destroys may cascade
+        child.destroy()
+```
+
+### 11b. Split cleanup and creation into SEPARATE td_execute_python calls
+
+Creating nodes with the same names you just destroyed in the SAME script causes "Invalid OP object" errors — even with `list()` snapshot. TD's internal references can go stale within one execution context.
+
+**WRONG (single call):**
+```python
+# td_execute_python:
+for c in list(root.children):
+    if c.valid and c.name.startswith('promo_'):
+        c.destroy()
+# ... then create promo_audio, promo_shader etc. in same script → CRASHES
+```
+
+**CORRECT (two separate calls):**
+```python
+# Call 1: td_execute_python — clean only
+for c in list(root.children):
+    if c.valid and c.name.startswith('promo_'):
+        c.destroy()
+
+# Call 2: td_execute_python — build (separate MCP call)
+audio = root.create(audiofileinCHOP, 'promo_audio')
+# ... rest of build
+```
+
+### 12. Feedback TOP: use `top` parameter, NOT direct input wire
+
+The feedbackTOP's `top` parameter references which TOP to delay. Do NOT also wire that TOP directly into the feedback's input — this creates a real cook dependency loop.
+
+Correct setup:
+```python
+fb = root.create(feedbackTOP, 'fb_delay')
+fb.par.top = comp.path          # reference only — no wire to fb input
+fb.outputConnectors[0].connect(xf)  # fb output -> transform -> fade -> comp
+```
+
+The "Cook dependency loop detected" warning on the transform/fade chain is expected.
+
+### 13. GLSL TOP auto-creates companion nodes
+
+Creating a `glslTOP` also creates `name_pixel` (Text DAT), `name_info` (Info DAT), and `name_compute` (Text DAT). These are visible in the network. Don't be alarmed by "extra" nodes.
+
+### 14. The default project root is `/project1`
+
+New TD files start with `/project1` as the main container. System nodes live at `/`, `/ui`, `/sys`, `/local`, `/perform`. Don't create user nodes outside `/project1`.
+
+### 15. Non-Commercial license caps resolution at 1280x1280
+
+Setting `resolutionw=1920` silently clamps to 1280. Always check effective resolution after creation:
+```python
+n.cook(force=True)
+actual = str(n.width) + 'x' + str(n.height)
+```
+
+## Recording & Codecs
+
+### 16. MovieFileOut TOP: H.264/H.265/AV1 requires Commercial license
+
+In Non-Commercial TD, these codecs produce an error. Recommended alternatives:
+- `prores` — Apple ProRes, **best on macOS**, HW accelerated, NOT license-restricted. ~55MB/s at 1280x720 but lossless quality. **Use this as default on macOS.**
+- `cineform` — GoPro Cineform, supports alpha
+- `hap` — GPU-accelerated playback, large files
+- `notchlc` — GPU-accelerated, good quality
+- `mjpa` — Motion JPEG, legacy fallback (lossy, use only if ProRes unavailable)
+
+For image sequences: `rec.par.type = 'imagesequence'`, `rec.par.imagefiletype = 'png'`
+
+### 17. MovieFileOut `.record()` method may not exist
+
+Use the toggle parameter instead:
+```python
+rec.par.record = True   # start recording
+rec.par.record = False  # stop recording
+```
+
+When setting file path and starting recording in the same script, use delayFrames:
+```python
+rec.par.file = '/tmp/new_output.mov'
+run("op('/project1/recorder').par.record = True", delayFrames=2)
+```
+
+### 18. TOP.save() captures same frame when called rapidly
+
+Use MovieFileOut for real-time recording. Set `project.realTime = False` for frame-accurate output.
+
+### 19. AudioFileIn CHOP: cue and recording sequence matters
+
+The recording sequence must be done in exact order, or the recording will be empty, audio will start mid-file, or the file won't be written.
+
+**Proven recording sequence:**
+
+```python
+# Step 1: Stop any existing recording
+rec.par.record = False
+
+# Step 2: Reset audio to beginning
+audio.par.play = False
+audio.par.cue = True
+audio.par.cuepoint = 0      # may need cuepointunit=0 too
+# Verify: audio.par.cue.eval() should be True
+
+# Step 3: Set output file path
+rec.par.file = '/tmp/output.mov'
+
+# Step 4: Release cue + start playing + start recording (with frame delay)
+audio.par.cue = False
+audio.par.play = True
+audio.par.playmode = 2      # Sequential — plays once through
+run("op('/project1/recorder').par.record = True", delayFrames=3)
+```
+
+**Why each step matters:**
+- `rec.par.record = False` first — if a previous recording is active, setting `par.file` may fail silently
+- `audio.par.cue = True` + `cuepoint = 0` — guarantees audio starts from the beginning, otherwise the spectrum may be silent for the first few seconds
+- `delayFrames=3` on the record start — setting `par.file` and `par.record = True` in the same script can race; the file path needs a frame to register before recording starts
+- `playmode = 2` (Sequential) — plays the file once. Use `playmode = 0` (Locked to Timeline) if you want TD's timeline to control position
+
+## TD Python API Patterns
+
+### 20. COMP extension setup: ext0object format is CRITICAL
+
+`ext0object` expects a CONSTANT string (NOT expression mode):
+```python
+comp.par.ext0object = "op('./myExtensionDat').module.MyClassName(me)"
+```
+NEVER set as just the DAT name. NEVER use ParMode.EXPRESSION. ALWAYS ensure the DAT has `par.language='python'`.
+
+### 21. td.Panel is NOT subscriptable — use attribute access
+
+```python
+comp.panel.select      # correct (attribute access, returns float)
+comp.panel['select']   # WRONG — 'td.Panel' object is not subscriptable
+```
+
+### 22. ALWAYS use relative paths in script callbacks
+
+In scriptTOP/CHOP/SOP/DAT callbacks, use paths relative to `scriptOp` or `me`:
+```python
+root = scriptOp.parent().parent()
+dat = root.op('pixel_data')
+```
+NEVER hardcode absolute paths like `op('/project1/myComp/child')` — they break when containers are renamed or copied.
+
+### 23. keyboardinCHOP channel names have 'k' prefix
+
+Channel names are `kup`, `kdown`, `kleft`, `kright`, `ka`, `kb`, etc. — NOT `up`, `down`, `a`, `b`. Always verify with:
+```python
+channels = [c.name for c in op('/project1/keyboard1').chans()]
+```
+
+### 24. expressCHOP cook-only properties — false positive errors
+
+`me.inputVal`, `me.chanIndex`, `me.sampleIndex` work ONLY in cook-context. Calling `par.expr0expr.eval()` from outside always raises an error — this is NOT a real operator error. Ignore these in error scans.
+
+### 25. td.Vertex attributes — use index access not named attributes
+
+In TD 2025.32, `td.Vertex` objects do NOT have `.x`, `.y`, `.z` attributes:
+```python
+# WRONG — crashes:
+vertex.x, vertex.y, vertex.z
+
+# CORRECT — index-based:
+vertex.point.P[0], vertex.point.P[1], vertex.point.P[2]
+# Or for SOP point positions:
+pt = sop.points()[i]
+pos = pt.P    # use P[0], P[1], P[2]
+```
+
+## Audio
+
+### 26. Audio Spectrum CHOP output is weak — boost it
+
+Raw output is very small (0.001-0.05). Use built-in boost: `spectrum.par.highfrequencyboost = 3.0`
+
+If still weak, add Math CHOP in Range mode: `fromrangehi=0.05, torangehi=1.0`
+
+### 27. AudioSpectrum CHOP: timeslice and sample count are the #1 gotcha
+
+AudioSpectrum at 44100Hz with `timeslice=False` outputs the ENTIRE audio file as samples (~24000+). CHOP-to-TOP then exceeds texture resolution max and warns/fails.
+
+**Fix:** Keep `timeslice = True` (default) for real-time per-frame FFT. Set `fftsize` to control bin count (it's a STRING enum: `'256'` not `256`).
+
+If the CHOP-to-TOP still gets too many samples, set `layout = 'rowscropped'` on the choptoTOP.
+
+```python
+spectrum.par.fftsize = '256'      # STRING, not int — enum values
+spectrum.par.timeslice = True     # MUST be True for real-time audio reactivity
+spectex.par.layout = 'rowscropped'  # handles oversized CHOP inputs
+```
+
+**resampleCHOP has NO `numsamples` param.** It uses `rate`, `start`, `end`, `method`. Don't guess — always `td_get_par_info('resampleCHOP')` first.
+
+### 28. CHOP To TOP has NO input connectors — use par.chop reference
+
+```python
+spec_tex = root.create(choptoTOP, 'spectrum_tex')
+spec_tex.par.chop = resample  # correct: parameter reference
+# NOT: resample.outputConnectors[0].connect(spec_tex.inputConnectors[0])  # WRONG
+```
+
+## Workflow
+
+### 29. Always verify after building — errors are silent
+
+Node errors and broken connections produce no output. Always check:
+```python
+for c in list(root.children):
+    e = c.errors()
+    w = c.warnings()
+    if e: print(c.name, 'ERR:', e)
+    if w: print(c.name, 'WARN:', w)
+```
+
+### 30. Window COMP param for display target is `winop`
+
+```python
+win = root.create(windowCOMP, 'display')
+win.par.winop = '/project1/logo_out'
+win.par.winw = 1280; win.par.winh = 720
+win.par.winopen.pulse()
+```
+
+### 31. `sample()` returns frozen pixels in rapid calls
+
+`out.sample(x, y)` returns pixels from a single cook snapshot. Compare samples with 2+ second delays, or use screencapture on the display window.
+
+### 32. Audio-reactive GLSL: dual-layer sync pipeline
+
+For audio-synced visuals, use BOTH layers for maximum effect:
+
+**Layer 1 (TD-side, real-time):** AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
+
+**Layer 2 (Python-side, post-hoc):** scipy FFT on the SAME audio file → per-frame features (rms, bass, mid, hi, beat detection) → drive ASCII brightness, chromatic aberration, beat flashes during the render pass.
+
+Both layers locked to the same audio file = visuals genuinely sync to the beat at two independent stages.
+
+**Key gotcha:** AudioFileIn must be cued (`par.cue=True` → `par.cuepulse.pulse()`) then uncued (`par.cue=False`, `par.play=True`) before recording starts. Otherwise the spectrum is silent for the first few seconds.
+
+### 33. twozero MCP: benchmark and prefer native tools
+
+Benchmarked April 2026: twozero MCP with 36 native tools. The old curl/REST method (port 9981) had zero native tools.
+
+**Always prefer native MCP tools over td_execute_python:**
+- `td_create_operator` over `root.create()` scripts (handles viewport positioning)
+- `td_set_operator_pars` over `node.par.X = Y` scripts (validates param names)
+- `td_get_par_info` over temp-node discovery dance (instant, no cleanup)
+- `td_get_errors` over manual `c.errors()` loops
+- `td_get_focus` for context awareness (no equivalent in old method)
+
+Only fall back to `td_execute_python` for multi-step logic (wiring chains, conditional builds, loops).
+
+### 34. twozero td_execute_python response wrapping
+
+twozero wraps `td_execute_python` responses with status info: `(ok)\n\n[fps 60.0/60] [0 err/0 warn]`. Your Python `result` variable value may not appear verbatim in the response text. If you need to check results programmatically, use `print()` statements in the script — they appear in the response. Don't rely on string-matching the `result` dict.
+
+### 35. Audio-reactive chain: DO NOT use Lag CHOP or Filter CHOP for spectrum smoothing
+
+The Derivative docs and tutorials suggest using Lag CHOP (lag1=0.2, lag2=0.5) to smooth raw FFT output before passing to a shader. **This does NOT work with AudioSpectrum → CHOP to TOP → GLSL.**
+
+What happens: Lag CHOP operates in timeslice mode. A 256-sample spectrum input gets expanded to 1600-2400 samples. The Lag averaging drives all values to near-zero (~1e-06). The CHOP to TOP produces a 2400x2 texture instead of 256x2. The shader receives effectively zero audio data.
+
+**The correct chain is: Spectrum(outlength=256) → Math(gain=10) → CHOPtoTOP → GLSL.** No CHOP smoothing at all. If you need smoothing, do it in the GLSL shader via temporal lerp with a feedback texture.
+
+Verified values with audio playing:
+- Without Lag CHOP: bass bins = 5.0-5.4, mid bins = 1.0-1.7 (strong, usable)
+- With Lag CHOP: ALL bins = 0.000001-0.00004 (dead, zero audio reactivity)
+
+### 36. AudioSpectrum Output Length: set manually to avoid CHOP to TOP overflow
+
+AudioSpectrum in Visualization mode with FFT 8192 outputs 22,050 samples by default (1 per Hz, 0–22050). CHOP to TOP cannot handle this — you get "Number of samples exceeded texture resolution max".
+
+Fix: `spectrum.par.outputmenu = 'setmanually'` and `spectrum.par.outlength = 256`. This gives 256 frequency bins — plenty for visual FFT.
+
+DO NOT set `timeslice = False` as a workaround — that processes the entire audio file at once and produces even more samples.
+
+### 37. GLSL spectrum texture from CHOP to TOP is 256x2 not 256x1
+
+AudioSpectrum outputs 2 channels (stereo: chan1, chan2). CHOP to TOP with `dataformat='r'` creates a 256x2 texture — one row per channel. Sample the first channel at `y=0.25` (center of first row), NOT `y=0.5` (boundary between rows):
+
+```glsl
+float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;  // correct
+float bass = texture(sTD2DInputs[1], vec2(0.05, 0.5)).r;   // WRONG — samples between rows
+```
+
+### 38. FPS=0 doesn't mean ops aren't cooking — check play state
+
+TD can show `fps:0` in `td_get_perf` while ops still cook and `TOP.save()` still produces valid screenshots. The two most common causes:
+
+**a) Project is paused (playbar stopped).** TD's playbar can be toggled with spacebar. The `root` at `/` has no `.playbar` attribute (it's on the perform COMP). The easiest fix is sending a spacebar keypress via `td_input_execute`, though this tool can sometimes error. As a workaround, `TOP.save()` always works regardless of play state — use it to verify rendering is actually happening before spending time debugging FPS.
+
+**b) Audio device CHOP blocking the main thread.** An `audiooutCHOP` with an active audio device can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. Fix: keep the CHOP active but set `volume=0` to prevent the audio driver from blocking. Disabling it entirely (`active=False`) may also work but can prevent downstream audio processing CHOPs from cooking.
+
+Diagnostic sequence when FPS=0:
+1. `td_get_perf` — check if any op has extreme CPU/s
+2. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
+3. Check for blocking CHOPs (audioout, audiodevin, etc.)
+4. Toggle play state (spacebar, or check if absTime.seconds is advancing)
+
+### 39. Recording while FPS=0 produces empty or near-empty files
+
+This is the #1 cause of "I recorded for 30 seconds but got a 2-frame video." If TD's cook loop is stalled (FPS=0 or very low), MovieFileOut has nothing to record. Unlike `TOP.save()` which captures the last cooked frame regardless, MovieFileOut only writes frames that actually cook.
+
+**Always verify FPS before starting a recording:**
+```python
+# Check via td_get_perf first
+# If FPS < 30, do NOT start recording — fix the performance issue first
+# If FPS=0, the playbar is likely paused — see pitfall #37
+```
+
+Common causes of recording empty video:
+- Playbar paused (FPS=0) — see pitfall #37
+- Audio device CHOP blocking the main thread — see pitfall #37b
+- Recording started before audio was cued — audio is silent, GLSL outputs black, MovieFileOut records black frames that look empty
+- `par.file` set in the same script as `par.record = True` — see pitfall #18
+
+### 40. GLSL shader produces black output — test before committing to a long render
+
+New GLSL shaders can fail silently (see pitfall #7). Before recording a long take, always:
+
+1. **Write a minimal test shader first** that just outputs a solid color or pass-through:
+```glsl
+void main() {
+    vec2 uv = vUV.st;
+    fragColor = TDOutputSwizzle(vec4(uv, 0.0, 1.0));
+}
+```
+
+2. **Verify the test renders correctly** via `td_get_screenshot` on the GLSL TOP's output.
+
+3. **Swap in the real shader** and screenshot again immediately. If black, the shader has a compile error or logic issue.
+
+4. **Only then start recording.** A 90-second ProRes recording is ~5GB. Recording black frames wastes disk and time.
+
+Common causes of black GLSL output:
+- Missing `TDOutputSwizzle()` on macOS (pitfall #8)
+- Time uniform not connected — shader uses default 0.0, fractal stays at origin
+- Spectrum texture not connected — audio values all 0.0, driving everything to black
+- Integer division where float division was expected (`1/2 = 0` not `0.5`)
+- `absTime.seconds % 1000.0` rolled over past 1000 and the modulo produces unexpected values
+
+### 41. td_write_dat uses `text` parameter, NOT `content`
+
+The MCP tool `td_write_dat` expects a `text` parameter for full replacement. Passing `content` returns an error: `"Provide either 'text' for full replace, or 'old_text'+'new_text' for patching"`.
+
+If `td_write_dat` fails, fall back to `td_execute_python`:
+```python
+op("/project1/shader_code").text = shader_string
+```
+
+### 42. td_execute_python does NOT return stdout or print() output
+
+Despite what earlier versions of pitfall #33 stated, `print()` and `debug()` output from `td_execute_python` scripts does NOT appear in the MCP response. The response is always just `(ok)` + FPS/error summary. To read values back, use dedicated inspection tools (`td_get_operator_info`, `td_read_dat`, `td_read_chop`) instead of trying to print from within a script.
+
+### 43. td_get_operator_info JSON is appended with `[fps X.X/X]` — breaks json.loads()
+
+The response text from `td_get_operator_info` has `[fps 60.0/60]` appended after the JSON object. This causes `json.loads()` to fail with "Extra data" errors. Strip it before parsing:
+```python
+clean = response_text.rsplit('[fps', 1)[0]
+data = json.loads(clean)
+```
+
+### 44. td_get_screenshot is asynchronous — returns `{"status": "pending"}`
+
+Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file appears later. Wait a few seconds before checking for the file. There is no callback or completion notification — poll the filesystem.
+
+### 45. Recording duration is manual — no auto-stop at audio end
+
+MovieFileOut records until `par.record = False` is set. If audio ends before you stop recording, the file keeps growing with repeated frames. Always stop recording promptly after the audio duration. For precision: set a timer on the agent side matching the audio length, then send `par.record = False`. Trim excess with ffmpeg as a safety net:
+```bash
+ffmpeg -i raw.mov -t 25 -c copy trimmed.mov
+```
\ No newline at end of file
diff --git a/optional-skills/creative/touchdesigner-mcp/references/python-api.md b/optional-skills/creative/touchdesigner-mcp/references/python-api.md
new file mode 100644
index 00000000000..f2955110b0e
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/python-api.md
@@ -0,0 +1,463 @@
+# TouchDesigner Python API Reference
+
+## The td Module
+
+TouchDesigner's Python environment auto-imports the `td` module. All TD-specific classes, functions, and constants live here. Scripts inside TD (Script DATs, CHOP/DAT Execute callbacks, Extensions) have full access.
+
+When using the MCP `execute_python_script` tool, these globals are pre-loaded:
+- `op` — shortcut for `td.op()`, finds operators by path
+- `ops` — shortcut for `td.ops()`, finds multiple operators by pattern
+- `me` — the operator running the script (via MCP this is the twozero internal executor)
+- `parent` — shortcut for `me.parent()`
+- `project` — the root project component
+- `td` — the full td module
+
+## Finding Operators: op() and ops()
+
+### op(path) — Find a single operator
+
+```python
+# Absolute path (always works from MCP)
+node = op('/project1/noise1')
+
+# Relative path (relative to current operator — only in Script DATs)
+node = op('noise1')      # sibling
+node = op('../noise1')   # parent's sibling
+
+# Returns None if not found (does NOT raise)
+node = op('/project1/nonexistent')  # None
+```
+
+### ops(pattern) — Find multiple operators
+
+```python
+# Glob patterns
+nodes = ops('/project1/noise*')       # all nodes starting with "noise"
+nodes = ops('/project1/*')            # all direct children
+nodes = ops('/project1/container1/*') # all children of container1
+
+# Returns a tuple of operators (may be empty)
+for n in ops('/project1/*'):
+    print(n.name, n.OPType)
+```
+
+### Navigation from a node
+
+```python
+node = op('/project1/noise1')
+
+node.name        # 'noise1'
+node.path        # '/project1/noise1'
+node.OPType      # 'noiseTop'
+node.type         # <class 'noiseTop'>
+node.family       # 'TOP'
+
+# Parent / children
+node.parent()              # the parent COMP
+node.parent().children     # all siblings + self
+node.parent().findChildren(name='noise*')  # filtered
+
+# Type checking
+node.isTOP   # True
+node.isCHOP  # False
+node.isSOP   # False
+node.isDAT   # False
+node.isMAT   # False
+node.isCOMP  # False
+```
+
+## Parameters
+
+Every operator has parameters accessed via the `.par` attribute.
+
+### Reading parameters
+
+```python
+node = op('/project1/noise1')
+
+# Direct access
+node.par.seed.val        # current evaluated value (may be an expression result)
+node.par.seed.eval()     # same as .val
+node.par.seed.default    # default value
+node.par.monochrome.val  # boolean parameters: True/False
+
+# List all parameters
+for p in node.pars():
+    print(f"{p.name}: {p.val} (default: {p.default})")
+
+# Filter by page (parameter group)
+for p in node.pars('Noise'):  # page name
+    print(f"{p.name}: {p.val}")
+```
+
+### Setting parameters
+
+```python
+# Direct value setting
+node.par.seed.val = 42
+node.par.monochrome.val = True
+node.par.resolutionw.val = 1920
+node.par.resolutionh.val = 1080
+
+# String parameters
+op('/project1/text1').par.text.val = 'Hello World'
+
+# File paths
+op('/project1/moviefilein1').par.file.val = '/path/to/video.mp4'
+
+# Reference another operator (for "dat", "chop", "top" type parameters)
+op('/project1/glsl1').par.dat.val = '/project1/shader_code'
+```
+
+### Parameter expressions
+
+```python
+# Python expressions that evaluate dynamically
+node.par.seed.expr = "me.time.frame"
+node.par.tx.expr = "math.sin(me.time.seconds * 2)"
+
+# Reference another parameter
+node.par.brightness1.expr = "op('/project1/constant1').par.value0.val"
+
+# Export (one-way binding from CHOP to parameter)
+# This makes the parameter follow a CHOP channel value
+op('/project1/noise1').par.seed.val  # can also be driven by exports
+```
+
+### Parameter types
+
+| Type | Python Type | Example |
+|------|------------|---------|
+| Float | `float` | `node.par.brightness1.val = 0.5` |
+| Int | `int` | `node.par.seed.val = 42` |
+| Toggle | `bool` | `node.par.monochrome.val = True` |
+| String | `str` | `node.par.text.val = 'hello'` |
+| Menu | `int` (index) or `str` (label) | `node.par.type.val = 'sine'` |
+| File | `str` (path) | `node.par.file.val = '/path/to/file'` |
+| OP reference | `str` (path) | `node.par.dat.val = '/project1/text1'` |
+| Color | separate r/g/b/a floats | `node.par.colorr.val = 1.0` |
+| XY/XYZ | separate x/y/z floats | `node.par.tx.val = 0.5` |
+
+## Creating and Deleting Operators
+
+```python
+# Create via parent component
+parent = op('/project1')
+new_node = parent.create(noiseTop)         # using class reference
+new_node = parent.create(noiseTop, 'my_noise')  # with custom name
+
+# The MCP create_td_node tool handles this automatically:
+# create_td_node(parentPath="/project1", nodeType="noiseTop", nodeName="my_noise")
+
+# Delete
+node = op('/project1/my_noise')
+node.destroy()
+
+# Copy
+original = op('/project1/noise1')
+copy = parent.copy(original, name='noise1_copy')
+```
+
+## Connections (Wiring Operators)
+
+### Output to Input connections
+
+```python
+# Connect noise1's output to level1's input
+op('/project1/noise1').outputConnectors[0].connect(op('/project1/level1'))
+
+# Connect to specific input index (for multi-input operators like Composite)
+op('/project1/noise1').outputConnectors[0].connect(op('/project1/composite1').inputConnectors[0])
+op('/project1/text1').outputConnectors[0].connect(op('/project1/composite1').inputConnectors[1])
+
+# Disconnect all outputs
+op('/project1/noise1').outputConnectors[0].disconnect()
+
+# Query connections
+node = op('/project1/level1')
+inputs = node.inputs          # list of connected input operators
+outputs = node.outputs        # list of connected output operators
+```
+
+### Connection patterns for common setups
+
+```python
+# Linear chain: A -> B -> C -> D
+ops_list = [op(f'/project1/{name}') for name in ['noise1', 'level1', 'blur1', 'null1']]
+for i in range(len(ops_list) - 1):
+    ops_list[i].outputConnectors[0].connect(ops_list[i+1])
+
+# Fan-out: A -> B, A -> C, A -> D
+source = op('/project1/noise1')
+for target_name in ['level1', 'composite1', 'transform1']:
+    source.outputConnectors[0].connect(op(f'/project1/{target_name}'))
+
+# Merge: A + B + C -> Composite
+comp = op('/project1/composite1')
+for i, source_name in enumerate(['noise1', 'text1', 'ramp1']):
+    op(f'/project1/{source_name}').outputConnectors[0].connect(comp.inputConnectors[i])
+```
+
+## DAT Content Manipulation
+
+### Text DATs
+
+```python
+dat = op('/project1/text1')
+
+# Read
+content = dat.text          # full text as string
+
+# Write
+dat.text = "new content"
+dat.text = '''multi
+line
+content'''
+
+# Append
+dat.text += "\nnew line"
+```
+
+### Table DATs
+
+```python
+dat = op('/project1/table1')
+
+# Read cell
+val = dat[0, 0]         # row 0, col 0
+val = dat[0, 'name']    # row 0, column named 'name'
+val = dat['key', 1]     # row named 'key', col 1
+
+# Write cell
+dat[0, 0] = 'value'
+
+# Read row/col
+row = dat.row(0)         # list of Cell objects
+col = dat.col('name')    # list of Cell objects
+
+# Dimensions
+rows = dat.numRows
+cols = dat.numCols
+
+# Append row
+dat.appendRow(['col1_val', 'col2_val', 'col3_val'])
+
+# Clear
+dat.clear()
+
+# Set entire table
+dat.clear()
+dat.appendRow(['name', 'value', 'type'])
+dat.appendRow(['frequency', '440', 'float'])
+dat.appendRow(['amplitude', '0.8', 'float'])
+```
+
+## Time and Animation
+
+```python
+# Global time
+td.absTime.frame       # absolute frame number (never resets)
+td.absTime.seconds     # absolute seconds
+
+# Timeline time (affected by play/pause/loop)
+me.time.frame          # current frame on timeline
+me.time.seconds        # current seconds on timeline
+me.time.rate           # FPS setting
+
+# Timeline control (via execute_python_script)
+project.play = True
+project.play = False
+project.frameRange = (1, 300)   # set timeline range
+
+# Cook frame (when operator was last computed)
+node.cookFrame
+node.cookTime
+```
+
+## Extensions (Custom Python Classes on Components)
+
+Extensions add custom Python methods and attributes to COMPs.
+
+```python
+# Create extension on a Base COMP
+base = op('/project1/myBase')
+
+# The extension class is defined in a Text DAT inside the COMP
+# Typically named 'ExtClass' with the extension code:
+
+extension_code = '''
+class MyExtension:
+    def __init__(self, ownerComp):
+        self.ownerComp = ownerComp
+        self.counter = 0
+
+    def Reset(self):
+        self.counter = 0
+
+    def Increment(self):
+        self.counter += 1
+        return self.counter
+
+    @property
+    def Count(self):
+        return self.counter
+'''
+
+# Write extension code to DAT inside the COMP
+op('/project1/myBase/extClass').text = extension_code
+
+# Configure the extension on the COMP
+base.par.extension1 = 'extClass'  # name of the DAT
+base.par.promoteextension1 = True  # promote methods to parent
+
+# Call extension methods
+base.Increment()       # calls MyExtension.Increment()
+count = base.Count     # accesses MyExtension.Count property
+base.Reset()
+```
+
+## Useful Built-in Modules
+
+### tdu — TouchDesigner Utilities
+
+```python
+import tdu
+
+# Dependency tracking (reactive values)
+dep = tdu.Dependency(initial_value)
+dep.val = new_value   # triggers dependents to recook
+
+# File path utilities
+tdu.expandPath('$HOME/Desktop/output.mov')
+
+# Math
+tdu.clamp(value, min, max)
+tdu.remap(value, from_min, from_max, to_min, to_max)
+```
+
+### TDFunctions
+
+```python
+from TDFunctions import *
+
+# Commonly used utilities
+clamp(value, low, high)
+remap(value, inLow, inHigh, outLow, outHigh)
+interp(value1, value2, t)  # linear interpolation
+```
+
+### TDStoreTools — Persistent Storage
+
+```python
+from TDStoreTools import StorageManager
+
+# Store data that survives project reload
+me.store('myKey', 'myValue')
+val = me.fetch('myKey', default='fallback')
+
+# Storage dict
+me.storage['key'] = value
+```
+
+## Common Patterns via execute_python_script
+
+### Build a complete chain
+
+```python
+# Create a complete audio-reactive noise chain
+parent = op('/project1')
+
+# Create operators
+audio_in = parent.create(audiofileinChop, 'audio_in')
+spectrum = parent.create(audiospectrumChop, 'spectrum')
+chop_to_top = parent.create(choptopTop, 'chop_to_top')
+noise = parent.create(noiseTop, 'noise1')
+level = parent.create(levelTop, 'level1')
+null_out = parent.create(nullTop, 'out')
+
+# Wire the chain
+audio_in.outputConnectors[0].connect(spectrum)
+spectrum.outputConnectors[0].connect(chop_to_top)
+noise.outputConnectors[0].connect(level)
+level.outputConnectors[0].connect(null_out)
+
+# Set parameters
+audio_in.par.file = '/path/to/music.wav'
+audio_in.par.play = True
+spectrum.par.size = 512
+noise.par.type = 1  # Sparse
+noise.par.monochrome = False
+noise.par.resolutionw = 1920
+noise.par.resolutionh = 1080
+level.par.opacity = 0.8
+level.par.gamma1 = 0.7
+```
+
+### Query network state
+
+```python
+# Get all TOPs in the project
+tops = [c for c in op('/project1').findChildren(type=TOP)]
+for t in tops:
+    print(f"{t.path}: {t.OPType} {'ERROR' if t.errors() else 'OK'}")
+
+# Find all operators with errors
+def find_errors(parent_path='/project1'):
+    parent = op(parent_path)
+    errors = []
+    for child in parent.findChildren(depth=-1):
+        if child.errors():
+            errors.append((child.path, child.errors()))
+    return errors
+
+result = find_errors()
+```
+
+### Batch parameter changes
+
+```python
+# Set parameters on multiple nodes at once
+settings = {
+    '/project1/noise1': {'seed': 42, 'monochrome': False, 'resolutionw': 1920},
+    '/project1/level1': {'brightness1': 1.2, 'gamma1': 0.8},
+    '/project1/blur1': {'sizex': 5, 'sizey': 5},
+}
+
+for path, params in settings.items():
+    node = op(path)
+    if node:
+        for key, val in params.items():
+            setattr(node.par, key, val)
+```
+
+## Python Version and Packages
+
+TouchDesigner bundles Python 3.11+ with these pre-installed:
+- **numpy** — array operations, fast math
+- **scipy** — signal processing, FFT
+- **OpenCV** (cv2) — computer vision
+- **PIL/Pillow** — image processing
+- **requests** — HTTP client
+- **json**, **re**, **os**, **sys** — standard library
+
+**IMPORTANT:** Parameter names in examples below are illustrative. Always run discovery (SKILL.md Step 0) to get actual names for your TD version. Do NOT copy param names from these examples verbatim.
+
+Custom packages can be installed to TD's Python site-packages directory. See TD documentation for the exact path per platform.
+
+## SOP Vertex/Point Access (TD 2025.32)
+
+In TD 2025.32, `td.Vertex` does NOT have `.x`, `.y`, `.z` attributes. Use index access:
+
+```python
+# WRONG — crashes in TD 2025.32:
+vertex.x, vertex.y, vertex.z
+
+# CORRECT — index/attribute access:
+pt = sop.points()[i]
+pos = pt.P          # Position object
+x, y, z = pos[0], pos[1], pos[2]
+
+# Always introspect first:
+dir(sop.points()[0])   # see what attributes actually exist
+dir(sop.points()[0].P) # see Position object interface
+```
diff --git a/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md b/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md
new file mode 100644
index 00000000000..b8e201f5c32
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md
@@ -0,0 +1,244 @@
+# TouchDesigner Troubleshooting (twozero MCP)
+
+> See `references/pitfalls.md` for the comprehensive lessons-learned list.
+
+## 1. Connection Issues
+
+### Port 40404 not responding
+
+Check these in order:
+
+1. Is TouchDesigner running?
+   ```bash
+   pgrep TouchDesigner
+   ```
+
+1b. Quick hub health check (no JSON-RPC needed):
+   A plain GET to the MCP URL returns instance info:
+   ```
+   curl -s http://localhost:40404/mcp
+   ```
+   Returns: `{"hub": true, "pid": ..., "instances": {"127.0.0.1_PID": {"project": "...", "tdVersion": "...", ...}}}`
+   If this returns JSON but `instances` is empty, TD is running but twozero hasn't registered yet.
+
+2. Is twozero installed in TD?
+   Open TD Palette Browser > twozero should be listed. If not, install it.
+
+3. Is MCP enabled in twozero settings?
+   In TD, open twozero preferences and confirm MCP server is toggled ON.
+
+4. Test the port directly:
+   ```bash
+   nc -z 127.0.0.1 40404
+   ```
+
+5. Test the MCP endpoint:
+   ```bash
+   curl -s http://localhost:40404/mcp
+   ```
+   Should return JSON with hub info. If it does, the server is running.
+
+### Hub responds but no TD instances
+
+The twozero MCP hub is running but TD hasn't registered. Causes:
+- TD project not loaded yet (still on splash screen)
+- twozero COMP not initialized in the current project
+- twozero version mismatch
+
+Fix: Open/reload a TD project that contains the twozero COMP. Use td_list_instances
+to check which TD instances are registered.
+
+### Multi-instance setup
+
+twozero auto-assigns ports for multiple TD instances:
+- First instance: 40404
+- Second instance: 40405
+- Third instance: 40406
+- etc.
+
+Use `td_list_instances` to discover all running instances and their ports.
+
+## 2. MCP Tool Errors
+
+### td_execute_python returns error
+
+The error message from td_execute_python often contains the Python traceback.
+If it's unclear, use `td_read_textport` to see the full TD console output —
+Python exceptions are always printed there.
+
+Common causes:
+- Syntax error in the script
+- Referencing a node that doesn't exist (op() returns None, then you call .par on None)
+- Using wrong parameter names (see pitfalls.md)
+
+### td_set_operator_pars fails
+
+Parameter name mismatch is the #1 cause. The tool validates param names and
+returns clear errors, but you must use exact names.
+
+Fix: ALWAYS call `td_get_par_info` first to discover the real parameter names:
+```
+td_get_par_info(op_type='glslTOP')
+td_get_par_info(op_type='noiseTOP')
+```
+
+### td_create_operator type name errors
+
+Operator type names use camelCase with family suffix:
+- CORRECT: noiseTOP, glslTOP, levelTOP, compositeTOP, audiospectrumCHOP
+- WRONG:   NoiseTOP, noise_top, NOISE TOP, Noise
+
+### td_get_operator_info for deep inspection
+
+If unsure about any aspect of an operator (params, inputs, outputs, state):
+```
+td_get_operator_info(path='/project1/noise1', detail='full')
+```
+
+## 3. Parameter Discovery
+
+CRITICAL: ALWAYS use td_get_par_info to discover parameter names.
+
+The agent's LLM training data contains WRONG parameter names for TouchDesigner.
+Do not trust them. Known wrong names include dat vs pixeldat, colora vs alpha,
+sizex vs size, and many more. See pitfalls.md for the full list.
+
+Workflow:
+1. td_get_par_info(op_type='glslTOP') — get all params for a type
+2. td_get_operator_info(path='/project1/mynode', detail='full') — get params for a specific instance
+3. Use ONLY the names returned by these tools
+
+## 4. Performance
+
+### Diagnosing slow performance
+
+Use `td_get_perf` to see which operators are slow. Look at cook times —
+anything over 1ms per frame is worth investigating.
+
+Common causes:
+- Resolution too high (especially on Non-Commercial)
+- Complex GLSL shaders
+- Too many TOP-to-CHOP or CHOP-to-TOP transfers (GPU-CPU memory copies)
+- Feedback loops without decay (values accumulate, memory grows)
+
+### Non-Commercial license restrictions
+
+- Resolution cap: 1280x1280. Setting resolutionw=1920 silently clamps to 1280.
+- H.264/H.265/AV1 encoding requires Commercial license. Use ProRes or Hap instead.
+- No commercial use of output.
+
+Always check effective resolution after creation:
+```python
+n.cook(force=True)
+actual = str(n.width) + 'x' + str(n.height)
+```
+
+## 5. Hermes Configuration
+
+### Config location
+
+`$HERMES_HOME/config.yaml` (defaults to `~/.hermes/config.yaml` when `HERMES_HOME` is unset)
+
+### MCP entry format
+
+The twozero TD entry should look like:
+```yaml
+mcpServers:
+  twozero_td:
+    url: http://localhost:40404/mcp
+```
+
+### After config changes
+
+Restart the Hermes session for changes to take effect. The MCP connection is
+established at session startup.
+
+### Verifying MCP tools are available
+
+After restarting, the session log should show twozero MCP tools registered.
+If tools show as registered but aren't callable, check:
+- The twozero MCP hub is still running (curl test above)
+- TD is still running with a project loaded
+- No firewall blocking localhost:40404
+
+## 6. Node Creation Issues
+
+### "Node type not found" error
+
+Wrong type string. Use camelCase with family suffix:
+- Wrong: NoiseTop, noise_top, NOISE TOP
+- Right: noiseTOP
+
+### Node created but not visible
+
+Check parentPath — use absolute paths like /project1. The default project
+root is /project1. System nodes live at /, /ui, /sys, /local, /perform.
+Don't create user nodes outside /project1.
+
+### Cannot create node inside a non-COMP
+
+Only COMP operators (Container, Base, Geometry, etc.) can contain children.
+You cannot create nodes inside a TOP, CHOP, SOP, DAT, or MAT.
+
+## 7. Wiring Issues
+
+### Cross-family wiring
+
+TOPs connect to TOPs, CHOPs to CHOPs, SOPs to SOPs, DATs to DATs.
+Use converter operators to bridge: choptoTOP, topToCHOP, soptoDAT, etc.
+
+Note: choptoTOP has NO input connectors. Use par.chop reference instead:
+```python
+spec_tex.par.chop = resample_node  # correct
+# NOT: resample.outputConnectors[0].connect(spec_tex.inputConnectors[0])
+```
+
+### Feedback loops
+
+Never create A -> B -> A directly. Use a Feedback TOP:
+```python
+fb = root.create(feedbackTOP, 'fb')
+fb.par.top = comp.path          # reference only, no wire to fb input
+fb.outputConnectors[0].connect(next_node)
+```
+"Cook dependency loop detected" warning on the chain is expected and correct.
+
+## 8. GLSL Issues
+
+### Shader compilation errors are silent
+
+GLSL TOP shows a yellow warning in the UI but node.errors() may return empty.
+Check node.warnings() too. Create an Info DAT pointed at the GLSL TOP for
+full compiler output.
+
+### TD GLSL specifics
+
+- Uses GLSL 4.60 (Vulkan backend). GLSL 3.30 and earlier removed.
+- UV coordinates: vUV.st (not gl_FragCoord)
+- Input textures: sTD2DInputs[0]
+- Output: layout(location = 0) out vec4 fragColor
+- macOS CRITICAL: Always wrap output with TDOutputSwizzle(color)
+- No built-in time uniform. Pass time via GLSL TOP Values page or Constant TOP.
+
+## 9. Recording Issues
+
+### H.264/H.265/AV1 requires Commercial license
+
+Use Apple ProRes on macOS (hardware accelerated, not license-restricted):
+```python
+rec.par.videocodec = 'prores'  # Preferred on macOS — lossless, Non-Commercial OK
+# rec.par.videocodec = 'mjpa'  # Fallback — lossy, works everywhere
+```
+
+### MovieFileOut has no .record() method
+
+Use the toggle parameter:
+```python
+rec.par.record = True   # start
+rec.par.record = False  # stop
+```
+
+### All exported frames identical
+
+TOP.save() captures same frame when called rapidly. Use MovieFileOut for
+real-time recording. Set project.realTime = False for frame-accurate output.
diff --git a/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh b/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
new file mode 100644
index 00000000000..15dc662c1cd
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+# setup.sh — Automated setup for twozero MCP plugin for TouchDesigner
+# Idempotent: safe to run multiple times.
+set -euo pipefail
+
+GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'; NC='\033[0m'
+OK="${GREEN}✔${NC}"; FAIL="${RED}✘${NC}"; WARN="${YELLOW}⚠${NC}"
+
+TWOZERO_URL="https://www.404zero.com/pisang/twozero.tox"
+TOX_PATH="$HOME/Downloads/twozero.tox"
+HERMES_HOME_DIR="${HERMES_HOME:-$HOME/.hermes}"
+HERMES_CFG="${HERMES_HOME_DIR}/config.yaml"
+MCP_PORT=40404
+MCP_ENDPOINT="http://localhost:${MCP_PORT}/mcp"
+
+manual_steps=()
+
+echo -e "\n${CYAN}═══ twozero MCP for TouchDesigner — Setup ═══${NC}\n"
+
+# ── 1. Check if TouchDesigner is running ──
+# Match on process *name* (not full cmdline) to avoid self-matching shells
+# that happen to have "TouchDesigner" in their args. macOS and Linux pgrep
+# both support -x for exact name match.
+if pgrep -x TouchDesigner >/dev/null 2>&1 || pgrep -x TouchDesignerFTE >/dev/null 2>&1; then
+    echo -e " ${OK} TouchDesigner is running"
+    td_running=true
+else
+    echo -e " ${WARN} TouchDesigner is not running"
+    td_running=false
+fi
+
+# ── 2. Ensure twozero.tox exists ──
+if [[ -f "$TOX_PATH" ]]; then
+    echo -e " ${OK} twozero.tox already exists at ${TOX_PATH}"
+else
+    echo -e " ${WARN} twozero.tox not found — downloading..."
+    if curl -fSL -o "$TOX_PATH" "$TWOZERO_URL" 2>/dev/null; then
+        echo -e " ${OK} Downloaded twozero.tox to ${TOX_PATH}"
+    else
+        echo -e " ${FAIL} Failed to download twozero.tox from ${TWOZERO_URL}"
+        echo "       Please download manually and place at ${TOX_PATH}"
+        manual_steps+=("Download twozero.tox from ${TWOZERO_URL} to ${TOX_PATH}")
+    fi
+fi
+
+# ── 3. Ensure Hermes config has twozero_td MCP entry ──
+if [[ ! -f "$HERMES_CFG" ]]; then
+    echo -e " ${FAIL} Hermes config not found at ${HERMES_CFG}"
+    manual_steps+=("Create ${HERMES_CFG} with twozero_td MCP server entry")
+elif grep -q 'twozero_td' "$HERMES_CFG" 2>/dev/null; then
+    echo -e " ${OK} twozero_td MCP entry exists in Hermes config"
+else
+    echo -e " ${WARN} Adding twozero_td MCP entry to Hermes config..."
+    python3 -c "
+import yaml, sys, copy
+
+cfg_path = '$HERMES_CFG'
+with open(cfg_path, 'r') as f:
+    cfg = yaml.safe_load(f) or {}
+
+if 'mcp_servers' not in cfg:
+    cfg['mcp_servers'] = {}
+
+if 'twozero_td' not in cfg['mcp_servers']:
+    cfg['mcp_servers']['twozero_td'] = {
+        'url': '${MCP_ENDPOINT}',
+        'timeout': 120,
+        'connect_timeout': 60
+    }
+    with open(cfg_path, 'w') as f:
+        yaml.dump(cfg, f, default_flow_style=False, sort_keys=False)
+" 2>/dev/null && echo -e " ${OK} twozero_td MCP entry added to config" \
+              || { echo -e " ${FAIL} Could not update config (is PyYAML installed?)"; \
+                   manual_steps+=("Add twozero_td MCP entry to ${HERMES_CFG} manually"); }
+    manual_steps+=("Restart Hermes session to pick up config change")
+fi
+
+# ── 4. Test if MCP port is responding ──
+if nc -z 127.0.0.1 "$MCP_PORT" 2>/dev/null; then
+    echo -e " ${OK} Port ${MCP_PORT} is open"
+
+    # ── 5. Verify MCP endpoint responds ──
+    resp=$(curl -s --max-time 3 "$MCP_ENDPOINT" 2>/dev/null || true)
+    if [[ -n "$resp" ]]; then
+        echo -e " ${OK} MCP endpoint responded at ${MCP_ENDPOINT}"
+    else
+        echo -e " ${WARN} Port open but MCP endpoint returned empty response"
+        manual_steps+=("Verify MCP is enabled in twozero settings")
+    fi
+else
+    echo -e " ${WARN} Port ${MCP_PORT} is not open"
+    if [[ "$td_running" == true ]]; then
+        manual_steps+=("In TD: drag twozero.tox into network editor → click Install")
+        manual_steps+=("Enable MCP: twozero icon → Settings → mcp → 'auto start MCP' → Yes")
+    else
+        manual_steps+=("Launch TouchDesigner")
+        manual_steps+=("Drag twozero.tox into the TD network editor and click Install")
+        manual_steps+=("Enable MCP: twozero icon → Settings → mcp → 'auto start MCP' → Yes")
+    fi
+fi
+
+# ── Status Report ──
+echo -e "\n${CYAN}═══ Status Report ═══${NC}\n"
+
+if [[ ${#manual_steps[@]} -eq 0 ]]; then
+    echo -e " ${OK} ${GREEN}Fully configured! twozero MCP is ready to use.${NC}\n"
+    exit 0
+else
+    echo -e " ${WARN} ${YELLOW}Manual steps remaining:${NC}\n"
+    for i in "${!manual_steps[@]}"; do
+        echo -e "   $((i+1)). ${manual_steps[$i]}"
+    done
+    echo ""
+    exit 1
+fi
diff --git a/skills/mcp/mcporter/SKILL.md b/optional-skills/mcp/mcporter/SKILL.md
similarity index 100%
rename from skills/mcp/mcporter/SKILL.md
rename to optional-skills/mcp/mcporter/SKILL.md
diff --git a/skills/mlops/models/clip/SKILL.md b/optional-skills/mlops/clip/SKILL.md
similarity index 100%
rename from skills/mlops/models/clip/SKILL.md
rename to optional-skills/mlops/clip/SKILL.md
diff --git a/skills/mlops/models/clip/references/applications.md b/optional-skills/mlops/clip/references/applications.md
similarity index 100%
rename from skills/mlops/models/clip/references/applications.md
rename to optional-skills/mlops/clip/references/applications.md
diff --git a/skills/mlops/inference/guidance/SKILL.md b/optional-skills/mlops/guidance/SKILL.md
similarity index 100%
rename from skills/mlops/inference/guidance/SKILL.md
rename to optional-skills/mlops/guidance/SKILL.md
diff --git a/skills/mlops/inference/guidance/references/backends.md b/optional-skills/mlops/guidance/references/backends.md
similarity index 100%
rename from skills/mlops/inference/guidance/references/backends.md
rename to optional-skills/mlops/guidance/references/backends.md
diff --git a/skills/mlops/inference/guidance/references/constraints.md b/optional-skills/mlops/guidance/references/constraints.md
similarity index 100%
rename from skills/mlops/inference/guidance/references/constraints.md
rename to optional-skills/mlops/guidance/references/constraints.md
diff --git a/skills/mlops/inference/guidance/references/examples.md b/optional-skills/mlops/guidance/references/examples.md
similarity index 100%
rename from skills/mlops/inference/guidance/references/examples.md
rename to optional-skills/mlops/guidance/references/examples.md
diff --git a/optional-skills/mlops/hermes-atropos-environments/SKILL.md b/optional-skills/mlops/hermes-atropos-environments/SKILL.md
index 9dff4668767..5101886b41a 100644
--- a/optional-skills/mlops/hermes-atropos-environments/SKILL.md
+++ b/optional-skills/mlops/hermes-atropos-environments/SKILL.md
@@ -7,7 +7,7 @@ license: MIT
 metadata:
   hermes:
     tags: [atropos, rl, environments, training, reinforcement-learning, reward-functions]
-    related_skills: [axolotl, grpo-rl-training, trl-fine-tuning, lm-evaluation-harness]
+    related_skills: [axolotl, fine-tuning-with-trl, lm-evaluation-harness]
 ---
 
 # Hermes Agent Atropos Environments
diff --git a/skills/mlops/cloud/modal/SKILL.md b/optional-skills/mlops/modal/SKILL.md
similarity index 100%
rename from skills/mlops/cloud/modal/SKILL.md
rename to optional-skills/mlops/modal/SKILL.md
diff --git a/skills/mlops/cloud/modal/references/advanced-usage.md b/optional-skills/mlops/modal/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/cloud/modal/references/advanced-usage.md
rename to optional-skills/mlops/modal/references/advanced-usage.md
diff --git a/skills/mlops/cloud/modal/references/troubleshooting.md b/optional-skills/mlops/modal/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/cloud/modal/references/troubleshooting.md
rename to optional-skills/mlops/modal/references/troubleshooting.md
diff --git a/skills/mlops/training/peft/SKILL.md b/optional-skills/mlops/peft/SKILL.md
similarity index 100%
rename from skills/mlops/training/peft/SKILL.md
rename to optional-skills/mlops/peft/SKILL.md
diff --git a/skills/mlops/training/peft/references/advanced-usage.md b/optional-skills/mlops/peft/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/training/peft/references/advanced-usage.md
rename to optional-skills/mlops/peft/references/advanced-usage.md
diff --git a/skills/mlops/training/peft/references/troubleshooting.md b/optional-skills/mlops/peft/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/training/peft/references/troubleshooting.md
rename to optional-skills/mlops/peft/references/troubleshooting.md
diff --git a/skills/mlops/training/pytorch-fsdp/SKILL.md b/optional-skills/mlops/pytorch-fsdp/SKILL.md
similarity index 100%
rename from skills/mlops/training/pytorch-fsdp/SKILL.md
rename to optional-skills/mlops/pytorch-fsdp/SKILL.md
diff --git a/skills/mlops/training/pytorch-fsdp/references/index.md b/optional-skills/mlops/pytorch-fsdp/references/index.md
similarity index 100%
rename from skills/mlops/training/pytorch-fsdp/references/index.md
rename to optional-skills/mlops/pytorch-fsdp/references/index.md
diff --git a/skills/mlops/training/pytorch-fsdp/references/other.md b/optional-skills/mlops/pytorch-fsdp/references/other.md
similarity index 100%
rename from skills/mlops/training/pytorch-fsdp/references/other.md
rename to optional-skills/mlops/pytorch-fsdp/references/other.md
diff --git a/skills/mlops/models/stable-diffusion/SKILL.md b/optional-skills/mlops/stable-diffusion/SKILL.md
similarity index 100%
rename from skills/mlops/models/stable-diffusion/SKILL.md
rename to optional-skills/mlops/stable-diffusion/SKILL.md
diff --git a/skills/mlops/models/stable-diffusion/references/advanced-usage.md b/optional-skills/mlops/stable-diffusion/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/models/stable-diffusion/references/advanced-usage.md
rename to optional-skills/mlops/stable-diffusion/references/advanced-usage.md
diff --git a/skills/mlops/models/stable-diffusion/references/troubleshooting.md b/optional-skills/mlops/stable-diffusion/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/models/stable-diffusion/references/troubleshooting.md
rename to optional-skills/mlops/stable-diffusion/references/troubleshooting.md
diff --git a/skills/mlops/models/whisper/SKILL.md b/optional-skills/mlops/whisper/SKILL.md
similarity index 100%
rename from skills/mlops/models/whisper/SKILL.md
rename to optional-skills/mlops/whisper/SKILL.md
diff --git a/skills/mlops/models/whisper/references/languages.md b/optional-skills/mlops/whisper/references/languages.md
similarity index 100%
rename from skills/mlops/models/whisper/references/languages.md
rename to optional-skills/mlops/whisper/references/languages.md
diff --git a/optional-skills/productivity/telephony/SKILL.md b/optional-skills/productivity/telephony/SKILL.md
index c74a3692091..6c457592a9a 100644
--- a/optional-skills/productivity/telephony/SKILL.md
+++ b/optional-skills/productivity/telephony/SKILL.md
@@ -7,7 +7,7 @@ license: MIT
 metadata:
   hermes:
     tags: [telephony, phone, sms, mms, voice, twilio, bland.ai, vapi, calling, texting]
-    related_skills: [find-nearby, google-workspace, agentmail]
+    related_skills: [maps, google-workspace, agentmail]
     category: productivity
 ---
 
diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index ca44ce60193..6ca32c1dcbb 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -19,6 +19,7 @@ import json
 import logging
 import re
 import threading
+import time
 from typing import Any, Dict, List, Optional
 
 from agent.memory_provider import MemoryProvider
@@ -206,13 +207,19 @@ class HonchoMemoryProvider(MemoryProvider):
         self._turn_count = 0
         self._injection_frequency = "every-turn"  # or "first-turn"
         self._context_cadence = 1   # minimum turns between context API calls
-        self._dialectic_cadence = 3  # minimum turns between dialectic API calls
+        self._dialectic_cadence = 1  # backwards-compat fallback; wizard writes 2 on new configs
         self._dialectic_depth = 1   # how many .chat() calls per dialectic cycle (1-3)
         self._dialectic_depth_levels: list[str] | None = None  # per-pass reasoning levels
-        self._reasoning_level_cap: Optional[str] = None  # "minimal", "low", "medium", "high"
+        self._reasoning_heuristic: bool = True  # scale base level by query length
+        self._reasoning_level_cap: str = "high"  # ceiling for auto-selected level
         self._last_context_turn = -999
         self._last_dialectic_turn = -999
 
+        # Liveness + observability state
+        self._prefetch_thread_started_at: float = 0.0   # monotonic ts of current thread
+        self._prefetch_result_fired_at: int = -999      # turn the pending result was fired at
+        self._dialectic_empty_streak: int = 0           # consecutive empty returns
+
         # Port #1957: lazy session init for tools-only mode
         self._session_initialized = False
         self._lazy_init_kwargs: Optional[dict] = None
@@ -286,14 +293,6 @@ class HonchoMemoryProvider(MemoryProvider):
                 logger.debug("Honcho not configured — plugin inactive")
                 return
 
-            # Override peer_name with gateway user_id for per-user memory scoping.
-            # Only when no explicit peerName was configured — an explicit peerName
-            # means the user chose their identity; a raw user_id (e.g. Telegram
-            # chat ID) should not silently replace it.
-            _gw_user_id = kwargs.get("user_id")
-            if _gw_user_id and not cfg.peer_name:
-                cfg.peer_name = _gw_user_id
-
             self._config = cfg
 
             # ----- B1: recall_mode from config -----
@@ -305,12 +304,16 @@ class HonchoMemoryProvider(MemoryProvider):
                 raw = cfg.raw or {}
                 self._injection_frequency = raw.get("injectionFrequency", "every-turn")
                 self._context_cadence = int(raw.get("contextCadence", 1))
-                self._dialectic_cadence = int(raw.get("dialecticCadence", 3))
+                # Backwards-compat: unset dialecticCadence falls back to 1
+                # (every turn) so existing honcho.json configs without the key
+                # behave as they did before. New setups via `hermes honcho setup`
+                # get dialecticCadence=2 written explicitly by the wizard.
+                self._dialectic_cadence = int(raw.get("dialecticCadence", 1))
                 self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3))
                 self._dialectic_depth_levels = cfg.dialectic_depth_levels
-                cap = raw.get("reasoningLevelCap")
-                if cap and cap in ("minimal", "low", "medium", "high"):
-                    self._reasoning_level_cap = cap
+                self._reasoning_heuristic = cfg.reasoning_heuristic
+                if cfg.reasoning_level_cap in self._LEVEL_ORDER:
+                    self._reasoning_level_cap = cfg.reasoning_level_cap
             except Exception as e:
                 logger.debug("Honcho cost-awareness config parse error: %s", e)
 
@@ -352,6 +355,7 @@ class HonchoMemoryProvider(MemoryProvider):
             honcho=client,
             config=cfg,
             context_tokens=cfg.context_tokens,
+            runtime_user_peer_name=kwargs.get("user_id") or None,
         )
 
         # ----- B3: resolve_session_name -----
@@ -391,14 +395,45 @@ class HonchoMemoryProvider(MemoryProvider):
         except Exception as e:
             logger.debug("Honcho memory file migration skipped: %s", e)
 
-        # ----- B7: Pre-warming context at init -----
+        # ----- B7: Pre-warming at init -----
+        # Context prewarm warms peer.context() (base layer), consumed via
+        # pop_context_result() in prefetch(). Dialectic prewarm runs the
+        # full configured depth and writes into _prefetch_result so turn 1
+        # consumes the result directly.
         if self._recall_mode in ("context", "hybrid"):
             try:
                 self._manager.prefetch_context(self._session_key)
-                self._manager.prefetch_dialectic(self._session_key, "What should I know about this user?")
-                logger.debug("Honcho pre-warm threads started for session: %s", self._session_key)
             except Exception as e:
-                logger.debug("Honcho pre-warm failed: %s", e)
+                logger.debug("Honcho context prewarm failed: %s", e)
+
+            _prewarm_query = (
+                "Summarize what you know about this user. "
+                "Focus on preferences, current projects, and working style."
+            )
+
+            def _prewarm_dialectic() -> None:
+                try:
+                    r = self._run_dialectic_depth(_prewarm_query)
+                except Exception as exc:
+                    logger.debug("Honcho dialectic prewarm failed: %s", exc)
+                    self._dialectic_empty_streak += 1
+                    return
+                if r and r.strip():
+                    with self._prefetch_lock:
+                        self._prefetch_result = r
+                        self._prefetch_result_fired_at = 0
+                    # Treat prewarm as turn 0 so cadence gating starts clean.
+                    self._last_dialectic_turn = 0
+                    self._dialectic_empty_streak = 0
+                else:
+                    self._dialectic_empty_streak += 1
+
+            self._prefetch_thread_started_at = time.monotonic()
+            self._prefetch_thread = threading.Thread(
+                target=_prewarm_dialectic, daemon=True, name="honcho-prewarm-dialectic"
+            )
+            self._prefetch_thread.start()
+            logger.debug("Honcho pre-warm started for session: %s", self._session_key)
 
     def _ensure_session(self) -> bool:
         """Lazily initialize the Honcho session (for tools-only mode).
@@ -487,7 +522,8 @@ class HonchoMemoryProvider(MemoryProvider):
                 "# Honcho Memory\n"
                 "Active (tools-only mode). Use honcho_profile for a quick factual snapshot, "
                 "honcho_search for raw excerpts, honcho_context for raw peer context, "
-                "honcho_reasoning for synthesized answers, "
+                "honcho_reasoning for synthesized answers (pass reasoning_level "
+                "minimal/low/medium/high/max — you pick the depth per call), "
                 "honcho_conclude to save facts about the user. "
                 "No automatic context injection — you must use tools to access memory."
             )
@@ -497,7 +533,8 @@ class HonchoMemoryProvider(MemoryProvider):
                 "Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. "
                 "Use honcho_profile for a quick factual snapshot, "
                 "honcho_search for raw excerpts, honcho_context for raw peer context, "
-                "honcho_reasoning for synthesized answers, "
+                "honcho_reasoning for synthesized answers (pass reasoning_level "
+                "minimal/low/medium/high/max — you pick the depth per call), "
                 "honcho_conclude to save facts about the user."
             )
 
@@ -526,6 +563,10 @@ class HonchoMemoryProvider(MemoryProvider):
         if self._injection_frequency == "first-turn" and self._turn_count > 1:
             return ""
 
+        # Trivial prompts ("ok", "yes", slash commands) carry no semantic signal.
+        if self._is_trivial_prompt(query):
+            return ""
+
         parts = []
 
         # ----- Layer 1: Base context (representation + card) -----
@@ -560,43 +601,72 @@ class HonchoMemoryProvider(MemoryProvider):
         # On the very first turn, no queue_prefetch() has run yet so the
         # dialectic result is empty.  Run with a bounded timeout so a slow
         # Honcho connection doesn't block the first response indefinitely.
-        # On timeout the result is skipped and queue_prefetch() will pick it
-        # up at the next cadence-allowed turn.
+        # On timeout we let the thread keep running and write its result into
+        # _prefetch_result under the lock, so the next turn picks it up.
+        #
+        # Skip if the session-start prewarm already filled _prefetch_result —
+        # firing another .chat() would be duplicate work.
+        with self._prefetch_lock:
+            _prewarm_landed = bool(self._prefetch_result)
+        if _prewarm_landed and self._last_dialectic_turn == -999:
+            self._last_dialectic_turn = self._turn_count
+
         if self._last_dialectic_turn == -999 and query:
             _first_turn_timeout = (
                 self._config.timeout if self._config and self._config.timeout else 8.0
             )
-            _result_holder: list[str] = []
+            _fired_at = self._turn_count
 
             def _run_first_turn() -> None:
                 try:
-                    _result_holder.append(self._run_dialectic_depth(query))
+                    r = self._run_dialectic_depth(query)
                 except Exception as exc:
                     logger.debug("Honcho first-turn dialectic failed: %s", exc)
-
-            _t = threading.Thread(target=_run_first_turn, daemon=True)
-            _t.start()
-            _t.join(timeout=_first_turn_timeout)
-            if not _t.is_alive():
-                first_turn_dialectic = _result_holder[0] if _result_holder else ""
-                if first_turn_dialectic and first_turn_dialectic.strip():
+                    self._dialectic_empty_streak += 1
+                    return
+                if r and r.strip():
                     with self._prefetch_lock:
-                        self._prefetch_result = first_turn_dialectic
-                self._last_dialectic_turn = self._turn_count
-            else:
+                        self._prefetch_result = r
+                        self._prefetch_result_fired_at = _fired_at
+                    # Advance cadence only on a non-empty result so the next
+                    # turn retries when the call returned nothing.
+                    self._last_dialectic_turn = _fired_at
+                    self._dialectic_empty_streak = 0
+                else:
+                    self._dialectic_empty_streak += 1
+
+            self._prefetch_thread_started_at = time.monotonic()
+            self._prefetch_thread = threading.Thread(
+                target=_run_first_turn, daemon=True, name="honcho-prefetch-first"
+            )
+            self._prefetch_thread.start()
+            self._prefetch_thread.join(timeout=_first_turn_timeout)
+            if self._prefetch_thread.is_alive():
                 logger.debug(
-                    "Honcho first-turn dialectic timed out (%.1fs) — "
-                    "will inject at next cadence-allowed turn",
+                    "Honcho first-turn dialectic still running after %.1fs — "
+                    "will surface on next turn",
                     _first_turn_timeout,
                 )
-                # Don't update _last_dialectic_turn: queue_prefetch() will
-                # retry at the next cadence-allowed turn via the async path.
 
         if self._prefetch_thread and self._prefetch_thread.is_alive():
             self._prefetch_thread.join(timeout=3.0)
         with self._prefetch_lock:
             dialectic_result = self._prefetch_result
+            fired_at = self._prefetch_result_fired_at
             self._prefetch_result = ""
+            self._prefetch_result_fired_at = -999
+
+        # Discard stale pending results: if the fire happened more than
+        # cadence × multiplier turns ago (e.g. a run of trivial-prompt turns
+        # passed without consumption), the content likely no longer tracks
+        # the current conversational pivot.
+        stale_limit = self._dialectic_cadence * self._STALE_RESULT_MULTIPLIER
+        if dialectic_result and fired_at >= 0 and (self._turn_count - fired_at) > stale_limit:
+            logger.debug(
+                "Honcho pending dialectic discarded as stale: fired_at=%d, "
+                "turn=%d, limit=%d", fired_at, self._turn_count, stale_limit,
+            )
+            dialectic_result = ""
 
         if dialectic_result and dialectic_result.strip():
             parts.append(dialectic_result)
@@ -641,6 +711,10 @@ class HonchoMemoryProvider(MemoryProvider):
         if self._recall_mode == "tools":
             return
 
+        # Trivial prompts don't warrant either a context refresh or a dialectic call.
+        if self._is_trivial_prompt(query):
+            return
+
         # ----- Context refresh (base layer) — independent cadence -----
         if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
             self._last_context_turn = self._turn_count
@@ -650,24 +724,46 @@ class HonchoMemoryProvider(MemoryProvider):
                 logger.debug("Honcho context prefetch failed: %s", e)
 
         # ----- Dialectic prefetch (supplement layer) -----
-        # B5: cadence check — skip if too soon since last dialectic call
-        if self._dialectic_cadence > 1:
-            if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence:
-                logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d",
-                             self._dialectic_cadence, self._turn_count - self._last_dialectic_turn)
-                return
+        # Thread-alive guard with stale-thread recovery: a hung Honcho call
+        # older than timeout × multiplier is treated as dead so it can't
+        # block subsequent fires.
+        if self._thread_is_live():
+            logger.debug("Honcho dialectic prefetch skipped: prior thread still running")
+            return
 
-        self._last_dialectic_turn = self._turn_count
+        # Cadence gate, widened by the empty-streak backoff so a persistently
+        # silent backend doesn't retry every turn forever.
+        effective = self._effective_cadence()
+        if (self._turn_count - self._last_dialectic_turn) < effective:
+            logger.debug(
+                "Honcho dialectic prefetch skipped: effective cadence %d "
+                "(base %d, empty streak %d), turns since last: %d",
+                effective, self._dialectic_cadence, self._dialectic_empty_streak,
+                self._turn_count - self._last_dialectic_turn,
+            )
+            return
+
+        # Cadence advances only on a non-empty result so empty returns
+        # (transient API error, sparse representation) retry next turn.
+        _fired_at = self._turn_count
 
         def _run():
             try:
                 result = self._run_dialectic_depth(query)
-                if result and result.strip():
-                    with self._prefetch_lock:
-                        self._prefetch_result = result
             except Exception as e:
                 logger.debug("Honcho prefetch failed: %s", e)
+                self._dialectic_empty_streak += 1
+                return
+            if result and result.strip():
+                with self._prefetch_lock:
+                    self._prefetch_result = result
+                    self._prefetch_result_fired_at = _fired_at
+                self._last_dialectic_turn = _fired_at
+                self._dialectic_empty_streak = 0
+            else:
+                self._dialectic_empty_streak += 1
 
+        self._prefetch_thread_started_at = time.monotonic()
         self._prefetch_thread = threading.Thread(
             target=_run, daemon=True, name="honcho-prefetch"
         )
@@ -692,11 +788,91 @@ class HonchoMemoryProvider(MemoryProvider):
 
     _LEVEL_ORDER = ("minimal", "low", "medium", "high", "max")
 
-    def _resolve_pass_level(self, pass_idx: int) -> str:
+    # Char-count thresholds for the query-length reasoning heuristic.
+    _HEURISTIC_LENGTH_MEDIUM = 120
+    _HEURISTIC_LENGTH_HIGH = 400
+
+    # Liveness constants. A thread older than timeout × multiplier is treated
+    # as dead so a hung Honcho call can't block future retries indefinitely.
+    _STALE_THREAD_MULTIPLIER = 2.0
+    # Pending result whose fire-turn is older than cadence × multiplier is
+    # discarded on read so we don't inject context for a stale conversational
+    # pivot after a gap of trivial-prompt turns.
+    _STALE_RESULT_MULTIPLIER = 2
+    # Cap on the empty-streak backoff so a persistently silent backend
+    # eventually settles on a ceiling instead of unbounded widening.
+    _BACKOFF_MAX = 8
+
+    def _thread_is_live(self) -> bool:
+        """Thread-alive guard that treats threads older than the stale
+        threshold as dead, so a hung Honcho request can't block new fires."""
+        if not self._prefetch_thread or not self._prefetch_thread.is_alive():
+            return False
+        timeout = (self._config.timeout if self._config and self._config.timeout else 8.0)
+        age = time.monotonic() - self._prefetch_thread_started_at
+        if age > timeout * self._STALE_THREAD_MULTIPLIER:
+            logger.debug(
+                "Honcho prefetch thread age %.1fs exceeds stale threshold "
+                "%.1fs — treating as dead", age, timeout * self._STALE_THREAD_MULTIPLIER,
+            )
+            return False
+        return True
+
+    def _effective_cadence(self) -> int:
+        """Cadence plus empty-streak backoff, capped at _BACKOFF_MAX × base."""
+        if self._dialectic_empty_streak <= 0:
+            return self._dialectic_cadence
+        widened = self._dialectic_cadence + self._dialectic_empty_streak
+        ceiling = self._dialectic_cadence * self._BACKOFF_MAX
+        return min(widened, ceiling)
+
+    def liveness_snapshot(self) -> dict:
+        """In-process snapshot of dialectic liveness state for diagnostics.
+
+        Returns current turn, last successful dialectic turn, pending-result
+        fire turn, empty streak, effective cadence, and thread status.
+        """
+        thread_age = None
+        if self._prefetch_thread and self._prefetch_thread.is_alive():
+            thread_age = time.monotonic() - self._prefetch_thread_started_at
+        return {
+            "turn_count": self._turn_count,
+            "last_dialectic_turn": self._last_dialectic_turn,
+            "pending_result_fired_at": self._prefetch_result_fired_at,
+            "empty_streak": self._dialectic_empty_streak,
+            "effective_cadence": self._effective_cadence(),
+            "thread_alive": thread_age is not None,
+            "thread_age_seconds": thread_age,
+        }
+
+    def _apply_reasoning_heuristic(self, base: str, query: str) -> str:
+        """Scale `base` up by query length, clamped at reasoning_level_cap.
+
+        Char-count heuristic: +1 at >=120 chars, +2 at >=400.
+        """
+        if not self._reasoning_heuristic or not query:
+            return base
+        if base not in self._LEVEL_ORDER:
+            return base
+        n = len(query)
+        if n < self._HEURISTIC_LENGTH_MEDIUM:
+            bump = 0
+        elif n < self._HEURISTIC_LENGTH_HIGH:
+            bump = 1
+        else:
+            bump = 2
+        base_idx = self._LEVEL_ORDER.index(base)
+        cap_idx = self._LEVEL_ORDER.index(self._reasoning_level_cap)
+        return self._LEVEL_ORDER[min(base_idx + bump, cap_idx)]
+
+    def _resolve_pass_level(self, pass_idx: int, query: str = "") -> str:
         """Resolve reasoning level for a given pass index.
 
-        Uses dialecticDepthLevels if configured, otherwise proportional
-        defaults relative to dialecticReasoningLevel.
+        Precedence:
+          1. dialecticDepthLevels (explicit per-pass) — wins absolutely
+          2. _PROPORTIONAL_LEVELS table (depth>1 lighter-early passes)
+          3. Base level = dialecticReasoningLevel, optionally scaled by the
+             reasoning heuristic when the mapping falls through to 'base'
         """
         if self._dialectic_depth_levels and pass_idx < len(self._dialectic_depth_levels):
             return self._dialectic_depth_levels[pass_idx]
@@ -704,7 +880,7 @@ class HonchoMemoryProvider(MemoryProvider):
         base = (self._config.dialectic_reasoning_level if self._config else "low")
         mapping = self._PROPORTIONAL_LEVELS.get((self._dialectic_depth, pass_idx))
         if mapping is None or mapping == "base":
-            return base
+            return self._apply_reasoning_heuristic(base, query)
         return mapping
 
     def _build_dialectic_prompt(self, pass_idx: int, prior_results: list[str], is_cold: bool) -> str:
@@ -791,7 +967,7 @@ class HonchoMemoryProvider(MemoryProvider):
                     break
                 prompt = self._build_dialectic_prompt(i, results, is_cold)
 
-            level = self._resolve_pass_level(i)
+            level = self._resolve_pass_level(i, query=query)
             logger.debug("Honcho dialectic depth %d: pass %d, level=%s, cold=%s",
                          self._dialectic_depth, i, level, is_cold)
 
@@ -808,6 +984,29 @@ class HonchoMemoryProvider(MemoryProvider):
                 return r
         return ""
 
+    # Prompts that carry no semantic signal — trivial acknowledgements, slash
+    # commands, empty input. Skipping injection here saves tokens and prevents
+    # stale user-model context from derailing one-word replies.
+    _TRIVIAL_PROMPT_RE = re.compile(
+        r'^(yes|no|ok|okay|sure|thanks|thank you|y|n|yep|nope|yeah|nah|'
+        r'continue|go ahead|do it|proceed|got it|cool|nice|great|done|next|lgtm|k)$',
+        re.IGNORECASE,
+    )
+
+    @classmethod
+    def _is_trivial_prompt(cls, text: str) -> bool:
+        """Return True if the prompt is too trivial to warrant context injection."""
+        if not text:
+            return True
+        stripped = text.strip()
+        if not stripped:
+            return True
+        if stripped.startswith("/"):
+            return True
+        if cls._TRIVIAL_PROMPT_RE.match(stripped):
+            return True
+        return False
+
     def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
         """Track turn count for cadence and injection_frequency logic."""
         self._turn_count = turn_number
diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py
index 536d34002de..5c829a4c989 100644
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@@ -460,17 +460,37 @@ def cmd_setup(args) -> None:
             pass  # keep current
 
     # --- 7b. Dialectic cadence ---
-    current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "3")
+    current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "2")
     print("\n  Dialectic cadence:")
     print("    How often Honcho rebuilds its user model (LLM call on Honcho backend).")
-    print("    1 = every turn (aggressive), 3 = every 3 turns (recommended), 5+ = sparse.")
+    print("    1 = every turn, 2 = every other turn, 3+ = sparser.")
+    print("    Recommended: 1-5.")
     new_dialectic = _prompt("Dialectic cadence", default=current_dialectic)
     try:
         val = int(new_dialectic)
         if val >= 1:
             hermes_host["dialecticCadence"] = val
     except (ValueError, TypeError):
-        hermes_host["dialecticCadence"] = 3
+        hermes_host["dialecticCadence"] = 2
+
+    # --- 7c. Dialectic reasoning level ---
+    current_reasoning = (
+        hermes_host.get("dialecticReasoningLevel")
+        or cfg.get("dialecticReasoningLevel")
+        or "low"
+    )
+    print("\n  Dialectic reasoning level:")
+    print("    Depth Honcho uses when synthesizing user context on auto-injected calls.")
+    print("    minimal  -- quick factual lookups")
+    print("    low      -- straightforward questions (default)")
+    print("    medium   -- multi-aspect synthesis")
+    print("    high     -- complex behavioral patterns")
+    print("    max      -- thorough audit-level analysis")
+    new_reasoning = _prompt("Reasoning level", default=current_reasoning)
+    if new_reasoning in ("minimal", "low", "medium", "high", "max"):
+        hermes_host["dialecticReasoningLevel"] = new_reasoning
+    else:
+        hermes_host["dialecticReasoningLevel"] = "low"
 
     # --- 8. Session strategy ---
     current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session")
@@ -636,8 +656,11 @@ def cmd_status(args) -> None:
     print(f"  Recall mode:    {hcfg.recall_mode}")
     print(f"  Context budget: {hcfg.context_tokens or '(uncapped)'} tokens")
     raw = getattr(hcfg, "raw", None) or {}
-    dialectic_cadence = raw.get("dialecticCadence") or 3
+    dialectic_cadence = raw.get("dialecticCadence") or 1
     print(f"  Dialectic cad:  every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}")
+    reasoning_cap = raw.get("reasoningLevelCap") or hcfg.reasoning_level_cap
+    heuristic_on = "on" if hcfg.reasoning_heuristic else "off"
+    print(f"  Reasoning:      base={hcfg.dialectic_reasoning_level}, cap={reasoning_cap}, heuristic={heuristic_on}")
     print(f"  Observation:    user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})")
     print(f"  Write freq:     {hcfg.write_frequency}")
 
diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py
index 2474d3a2b65..fef2e2d58f1 100644
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@@ -251,6 +251,11 @@ class HonchoClientConfig:
     # matching dialectic_depth length. When None, uses proportional defaults
     # derived from dialectic_reasoning_level.
     dialectic_depth_levels: list[str] | None = None
+    # When true, the auto-injected dialectic scales reasoning level up on
+    # longer queries. See HonchoMemoryProvider for thresholds.
+    reasoning_heuristic: bool = True
+    # Ceiling for the heuristic-selected reasoning level.
+    reasoning_level_cap: str = "high"
     # Honcho API limits — configurable for self-hosted instances
     # Max chars per message sent via add_messages() (Honcho cloud: 25000)
     message_max_chars: int = 25000
@@ -446,6 +451,16 @@ class HonchoClientConfig:
                 raw.get("dialecticDepthLevels"),
                 depth=_parse_dialectic_depth(host_block.get("dialecticDepth"), raw.get("dialecticDepth")),
             ),
+            reasoning_heuristic=_resolve_bool(
+                host_block.get("reasoningHeuristic"),
+                raw.get("reasoningHeuristic"),
+                default=True,
+            ),
+            reasoning_level_cap=(
+                host_block.get("reasoningLevelCap")
+                or raw.get("reasoningLevelCap")
+                or "high"
+            ),
             message_max_chars=int(
                 host_block.get("messageMaxChars")
                 or raw.get("messageMaxChars")
diff --git a/plugins/memory/honcho/session.py b/plugins/memory/honcho/session.py
index fd91ee3b3b9..79625b5cd58 100644
--- a/plugins/memory/honcho/session.py
+++ b/plugins/memory/honcho/session.py
@@ -78,6 +78,7 @@ class HonchoSessionManager:
         honcho: Honcho | None = None,
         context_tokens: int | None = None,
         config: Any | None = None,
+        runtime_user_peer_name: str | None = None,
     ):
         """
         Initialize the session manager.
@@ -87,10 +88,12 @@ class HonchoSessionManager:
             context_tokens: Max tokens for context() calls (None = Honcho default).
             config: HonchoClientConfig from global config (provides peer_name, ai_peer,
                     write_frequency, observation, etc.).
+            runtime_user_peer_name: Gateway user identity for per-user memory scoping.
         """
         self._honcho = honcho
         self._context_tokens = context_tokens
         self._config = config
+        self._runtime_user_peer_name = runtime_user_peer_name
         self._cache: dict[str, HonchoSession] = {}
         self._peers_cache: dict[str, Any] = {}
         self._sessions_cache: dict[str, Any] = {}
@@ -100,9 +103,11 @@ class HonchoSessionManager:
         self._write_frequency = write_frequency
         self._turn_counter: int = 0
 
-        # Prefetch caches: session_key → last result (consumed once per turn)
+        # Prefetch cache: session_key → last context result (consumed once per turn).
+        # Dialectic results are cached on the plugin side (HonchoMemoryProvider
+        # ._prefetch_result) so session-start prewarm and turn-driven fires share
+        # one source of truth; see __init__.py _do_session_init for the prewarm.
         self._context_cache: dict[str, dict] = {}
-        self._dialectic_cache: dict[str, str] = {}
         self._prefetch_cache_lock = threading.Lock()
         self._dialectic_reasoning_level: str = (
             config.dialectic_reasoning_level if config else "low"
@@ -272,8 +277,10 @@ class HonchoSessionManager:
             logger.debug("Local session cache hit: %s", key)
             return self._cache[key]
 
-        # Use peer names from global config when available
-        if self._config and self._config.peer_name:
+        # Gateway sessions should use the runtime user identity when available.
+        if self._runtime_user_peer_name:
+            user_peer_id = self._sanitize_id(self._runtime_user_peer_name)
+        elif self._config and self._config.peer_name:
             user_peer_id = self._sanitize_id(self._config.peer_name)
         else:
             # Fallback: derive from session key
@@ -499,8 +506,8 @@ class HonchoSessionManager:
         Query Honcho's dialectic endpoint about a peer.
 
         Runs an LLM on Honcho's backend against the target peer's full
-        representation. Higher latency than context() — call async via
-        prefetch_dialectic() to avoid blocking the response.
+        representation. Higher latency than context() — callers run this in
+        a background thread (see HonchoMemoryProvider) to avoid blocking.
 
         Args:
             session_key: The session key to query against.
@@ -555,42 +562,6 @@ class HonchoSessionManager:
             logger.warning("Honcho dialectic query failed: %s", e)
             return ""
 
-    def prefetch_dialectic(self, session_key: str, query: str) -> None:
-        """
-        Fire a dialectic_query in a background thread, caching the result.
-
-        Non-blocking. The result is available via pop_dialectic_result()
-        on the next call (typically the following turn). Reasoning level
-        is selected dynamically based on query complexity.
-
-        Args:
-            session_key: The session key to query against.
-            query: The user's current message, used as the query.
-        """
-        def _run():
-            result = self.dialectic_query(session_key, query)
-            if result:
-                self.set_dialectic_result(session_key, result)
-
-        t = threading.Thread(target=_run, name="honcho-dialectic-prefetch", daemon=True)
-        t.start()
-
-    def set_dialectic_result(self, session_key: str, result: str) -> None:
-        """Store a prefetched dialectic result in a thread-safe way."""
-        if not result:
-            return
-        with self._prefetch_cache_lock:
-            self._dialectic_cache[session_key] = result
-
-    def pop_dialectic_result(self, session_key: str) -> str:
-        """
-        Return and clear the cached dialectic result for this session.
-
-        Returns empty string if no result is ready yet.
-        """
-        with self._prefetch_cache_lock:
-            return self._dialectic_cache.pop(session_key, "")
-
     def prefetch_context(self, session_key: str, user_message: str | None = None) -> None:
         """
         Fire get_prefetch_context in a background thread, caching the result.
diff --git a/run_agent.py b/run_agent.py
index ef90ae39e20..8e1fbfed194 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -831,6 +831,26 @@ class AIAgent:
         self._execution_thread_id: int | None = None  # Set at run_conversation() start
         self._interrupt_thread_signal_pending = False
         self._client_lock = threading.RLock()
+
+        # /steer mechanism — inject a user note into the next tool result
+        # without interrupting the agent. Unlike interrupt(), steer() does
+        # NOT set _interrupt_requested; it waits for the current tool batch
+        # to finish naturally, then the drain hook appends the text to the
+        # last tool result's content so the model sees it on its next
+        # iteration. Message-role alternation is preserved (we modify an
+        # existing tool message rather than inserting a new user turn).
+        self._pending_steer: Optional[str] = None
+        self._pending_steer_lock = threading.Lock()
+
+        # Concurrent-tool worker thread tracking.  `_execute_tool_calls_concurrent`
+        # runs each tool on its own ThreadPoolExecutor worker — those worker
+        # threads have tids distinct from `_execution_thread_id`, so
+        # `_set_interrupt(True, _execution_thread_id)` alone does NOT cause
+        # `is_interrupted()` inside the worker to return True.  Track the
+        # workers here so `interrupt()` / `clear_interrupt()` can fan out to
+        # their tids explicitly.
+        self._tool_worker_threads: set[int] = set()
+        self._tool_worker_threads_lock = threading.Lock()
         
         # Subagent delegation state
         self._delegate_depth = 0        # 0 = top-level agent, incremented for children
@@ -1286,31 +1306,6 @@ class AIAgent:
             try:
                 _mem_provider_name = mem_config.get("provider", "") if mem_config else ""
 
-                # Auto-migrate: if Honcho was actively configured (enabled +
-                # credentials) but memory.provider is not set, activate the
-                # honcho plugin automatically.  Just having the config file
-                # is not enough — the user may have disabled Honcho or the
-                # file may be from a different tool.
-                if not _mem_provider_name:
-                    try:
-                        from plugins.memory.honcho.client import HonchoClientConfig as _HCC
-                        _hcfg = _HCC.from_global_config()
-                        if _hcfg.enabled and (_hcfg.api_key or _hcfg.base_url):
-                            _mem_provider_name = "honcho"
-                            # Persist so this only auto-migrates once
-                            try:
-                                from hermes_cli.config import load_config as _lc, save_config as _sc
-                                _cfg = _lc()
-                                _cfg.setdefault("memory", {})["provider"] = "honcho"
-                                _sc(_cfg)
-                            except Exception:
-                                pass
-                            if not self.quiet_mode:
-                                print("  ✓ Auto-migrated Honcho to memory provider plugin.")
-                                print("    Your config and data are preserved.\n")
-                    except Exception:
-                        pass
-
                 if _mem_provider_name:
                     from agent.memory_manager import MemoryManager as _MemoryManager
                     from plugins.memory import load_memory_provider as _load_mem
@@ -1921,13 +1916,16 @@ class AIAgent:
     def _should_emit_quiet_tool_messages(self) -> bool:
         """Return True when quiet-mode tool summaries should print directly.
 
-        When the caller provides ``tool_progress_callback`` (for example the CLI
-        TUI or a gateway progress renderer), that callback owns progress display.
-        Emitting quiet-mode summary lines here duplicates progress and leaks tool
-        previews into flows that are expected to stay silent, such as
-        ``hermes chat -q``.
+        Quiet mode is used by both the interactive CLI and embedded/library
+        callers. The CLI may still want compact progress hints when no callback
+        owns rendering. Embedded/library callers, on the other hand, expect
+        quiet mode to be truly silent.
         """
-        return self.quiet_mode and not self.tool_progress_callback
+        return (
+            self.quiet_mode
+            and not self.tool_progress_callback
+            and getattr(self, "platform", "") == "cli"
+        )
 
     def _emit_status(self, message: str) -> None:
         """Emit a lifecycle status message to both CLI and gateway channels.
@@ -2152,17 +2150,49 @@ class AIAgent:
         return bool(cleaned.strip())
     
     def _strip_think_blocks(self, content: str) -> str:
-        """Remove reasoning/thinking blocks from content, returning only visible text."""
+        """Remove reasoning/thinking blocks from content, returning only visible text.
+
+        Handles four cases:
+          1. Closed tag pairs (``<think>…</think>``) — the common path when
+             the provider emits complete reasoning blocks.
+          2. Unterminated open tag at a block boundary (start of text or
+             after a newline) — e.g. MiniMax M2.7 / NIM endpoints where the
+             closing tag is dropped.  Everything from the open tag to end
+             of string is stripped.  The block-boundary check mirrors
+             ``gateway/stream_consumer.py``'s filter so models that mention
+             ``<think>`` in prose aren't over-stripped.
+          3. Stray orphan open/close tags that slip through.
+          4. Tag variants: ``<think>``, ``<thinking>``, ``<reasoning>``,
+             ``<REASONING_SCRATCHPAD>``, ``<thought>`` (Gemma 4), all
+             case-insensitive.
+        """
         if not content:
             return ""
-        # Strip all reasoning tag variants: <think>, <thinking>, <THINKING>,
-        # <reasoning>, <REASONING_SCRATCHPAD>, <thought> (Gemma 4)
-        content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
+        # 1. Closed tag pairs — case-insensitive for all variants so
+        #    mixed-case tags (<THINK>, <Thinking>) don't slip through to
+        #    the unterminated-tag pass and take trailing content with them.
+        content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL | re.IGNORECASE)
         content = re.sub(r'<thinking>.*?</thinking>', '', content, flags=re.DOTALL | re.IGNORECASE)
-        content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
-        content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL)
+        content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL | re.IGNORECASE)
+        content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL | re.IGNORECASE)
         content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)
-        content = re.sub(r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
+        # 2. Unterminated reasoning block — open tag at a block boundary
+        #    (start of text, or after a newline) with no matching close.
+        #    Strip from the tag to end of string.  Fixes #8878 / #9568
+        #    (MiniMax M2.7 leaking raw reasoning into assistant content).
+        content = re.sub(
+            r'(?:^|\n)[ \t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\b[^>]*>.*$',
+            '',
+            content,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+        # 3. Stray orphan open/close tags that slipped through.
+        content = re.sub(
+            r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*',
+            '',
+            content,
+            flags=re.IGNORECASE,
+        )
         return content
 
     @staticmethod
@@ -3191,6 +3221,25 @@ class AIAgent:
             # interrupt signal until startup completes instead of targeting
             # the caller thread by mistake.
             self._interrupt_thread_signal_pending = True
+        # Fan out to concurrent-tool worker threads.  Those workers run tools
+        # on their own tids (ThreadPoolExecutor workers), so `is_interrupted()`
+        # inside a tool only sees an interrupt when their specific tid is in
+        # the `_interrupted_threads` set.  Without this propagation, an
+        # already-running concurrent tool (e.g. a terminal command hung on
+        # network I/O) never notices the interrupt and has to run to its own
+        # timeout.  See `_run_tool` for the matching entry/exit bookkeeping.
+        # `getattr` fallback covers test stubs that build AIAgent via
+        # object.__new__ and skip __init__.
+        _tracker = getattr(self, "_tool_worker_threads", None)
+        _tracker_lock = getattr(self, "_tool_worker_threads_lock", None)
+        if _tracker is not None and _tracker_lock is not None:
+            with _tracker_lock:
+                _worker_tids = list(_tracker)
+            for _wtid in _worker_tids:
+                try:
+                    _set_interrupt(True, _wtid)
+                except Exception:
+                    pass
         # Propagate interrupt to any running child agents (subagent delegation)
         with self._active_children_lock:
             children_copy = list(self._active_children)
@@ -3209,6 +3258,146 @@ class AIAgent:
         self._interrupt_thread_signal_pending = False
         if self._execution_thread_id is not None:
             _set_interrupt(False, self._execution_thread_id)
+        # Also clear any concurrent-tool worker thread bits.  Tracked
+        # workers normally clear their own bit on exit, but an explicit
+        # clear here guarantees no stale interrupt can survive a turn
+        # boundary and fire on a subsequent, unrelated tool call that
+        # happens to get scheduled onto the same recycled worker tid.
+        # `getattr` fallback covers test stubs that build AIAgent via
+        # object.__new__ and skip __init__.
+        _tracker = getattr(self, "_tool_worker_threads", None)
+        _tracker_lock = getattr(self, "_tool_worker_threads_lock", None)
+        if _tracker is not None and _tracker_lock is not None:
+            with _tracker_lock:
+                _worker_tids = list(_tracker)
+            for _wtid in _worker_tids:
+                try:
+                    _set_interrupt(False, _wtid)
+                except Exception:
+                    pass
+        # A hard interrupt supersedes any pending /steer — the steer was
+        # meant for the agent's next tool-call iteration, which will no
+        # longer happen. Drop it instead of surprising the user with a
+        # late injection on the post-interrupt turn.
+        _steer_lock = getattr(self, "_pending_steer_lock", None)
+        if _steer_lock is not None:
+            with _steer_lock:
+                self._pending_steer = None
+
+    def steer(self, text: str) -> bool:
+        """
+        Inject a user message into the next tool result without interrupting.
+
+        Unlike interrupt(), this does NOT stop the current tool call. The
+        text is stashed and the agent loop appends it to the LAST tool
+        result's content once the current tool batch finishes. The model
+        sees the steer as part of the tool output on its next iteration.
+
+        Thread-safe: callable from gateway/CLI/TUI threads. Multiple calls
+        before the drain point concatenate with newlines.
+
+        Args:
+            text: The user text to inject. Empty strings are ignored.
+
+        Returns:
+            True if the steer was accepted, False if the text was empty.
+        """
+        if not text or not text.strip():
+            return False
+        cleaned = text.strip()
+        _lock = getattr(self, "_pending_steer_lock", None)
+        if _lock is None:
+            # Test stubs that built AIAgent via object.__new__ skip __init__.
+            # Fall back to direct attribute set; no concurrent callers expected
+            # in those stubs.
+            existing = getattr(self, "_pending_steer", None)
+            self._pending_steer = (existing + "\n" + cleaned) if existing else cleaned
+            return True
+        with _lock:
+            if self._pending_steer:
+                self._pending_steer = self._pending_steer + "\n" + cleaned
+            else:
+                self._pending_steer = cleaned
+        return True
+
+    def _drain_pending_steer(self) -> Optional[str]:
+        """Return the pending steer text (if any) and clear the slot.
+
+        Safe to call from the agent execution thread after appending tool
+        results. Returns None when no steer is pending.
+        """
+        _lock = getattr(self, "_pending_steer_lock", None)
+        if _lock is None:
+            text = getattr(self, "_pending_steer", None)
+            self._pending_steer = None
+            return text
+        with _lock:
+            text = self._pending_steer
+            self._pending_steer = None
+        return text
+
+    def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None:
+        """Append any pending /steer text to the last tool result in this turn.
+
+        Called at the end of a tool-call batch, before the next API call.
+        The steer is appended to the last ``role:"tool"`` message's content
+        with a clear marker so the model understands it came from the user
+        and NOT from the tool itself. Role alternation is preserved —
+        nothing new is inserted, we only modify existing content.
+
+        Args:
+            messages: The running messages list.
+            num_tool_msgs: Number of tool results appended in this batch;
+                used to locate the tail slice safely.
+        """
+        if num_tool_msgs <= 0 or not messages:
+            return
+        steer_text = self._drain_pending_steer()
+        if not steer_text:
+            return
+        # Find the last tool-role message in the recent tail. Skipping
+        # non-tool messages defends against future code appending
+        # something else at the boundary.
+        target_idx = None
+        for j in range(len(messages) - 1, max(len(messages) - num_tool_msgs - 1, -1), -1):
+            msg = messages[j]
+            if isinstance(msg, dict) and msg.get("role") == "tool":
+                target_idx = j
+                break
+        if target_idx is None:
+            # No tool result in this batch (e.g. all skipped by interrupt);
+            # put the steer back so the caller's fallback path can deliver
+            # it as a normal next-turn user message.
+            _lock = getattr(self, "_pending_steer_lock", None)
+            if _lock is not None:
+                with _lock:
+                    if self._pending_steer:
+                        self._pending_steer = self._pending_steer + "\n" + steer_text
+                    else:
+                        self._pending_steer = steer_text
+            else:
+                existing = getattr(self, "_pending_steer", None)
+                self._pending_steer = (existing + "\n" + steer_text) if existing else steer_text
+            return
+        marker = f"\n\n[USER STEER (injected mid-run, not tool output): {steer_text}]"
+        existing_content = messages[target_idx].get("content", "")
+        if not isinstance(existing_content, str):
+            # Anthropic multimodal content blocks — preserve them and append
+            # a text block at the end.
+            try:
+                blocks = list(existing_content) if existing_content else []
+                blocks.append({"type": "text", "text": marker.lstrip()})
+                messages[target_idx]["content"] = blocks
+            except Exception:
+                # Fall back to string replacement if content shape is unexpected.
+                messages[target_idx]["content"] = f"{existing_content}{marker}"
+        else:
+            messages[target_idx]["content"] = existing_content + marker
+        logger.info(
+            "Delivered /steer to agent after tool batch (%d chars): %s",
+            len(steer_text),
+            steer_text[:120] + ("..." if len(steer_text) > 120 else ""),
+        )
 
     def _touch_activity(self, desc: str) -> None:
         """Update the last-activity timestamp and description (thread-safe)."""
@@ -5512,7 +5701,7 @@ class AIAgent:
                 raise result["error"]
             return result["response"]
 
-        result = {"response": None, "error": None}
+        result = {"response": None, "error": None, "partial_tool_names": []}
         request_client_holder = {"client": None}
         first_delta_fired = {"done": False}
         deltas_were_sent = {"yes": False}  # Track if any deltas were fired (for fallback)
@@ -5668,7 +5857,15 @@ class AIAgent:
                             entry["id"] = tc_delta.id
                         if tc_delta.function:
                             if tc_delta.function.name:
-                                entry["function"]["name"] += tc_delta.function.name
+                                # Use assignment, not +=.  Function names are
+                                # atomic identifiers delivered complete in the
+                                # first chunk (OpenAI spec).  Some providers
+                                # (MiniMax M2.7 via NVIDIA NIM) resend the full
+                                # name in every chunk; concatenation would
+                                # produce "read_fileread_file".  Assignment
+                                # (matching the OpenAI Node SDK / LiteLLM /
+                                # Vercel AI patterns) is immune to this.
+                                entry["function"]["name"] = tc_delta.function.name
                             if tc_delta.function.arguments:
                                 entry["function"]["arguments"] += tc_delta.function.arguments
                         extra = getattr(tc_delta, "extra_content", None)
@@ -5684,6 +5881,14 @@ class AIAgent:
                             tool_gen_notified.add(idx)
                             _fire_first_delta()
                             self._fire_tool_gen_started(name)
+                            # Record the partial tool-call name so the outer
+                            # stub-builder can surface a user-visible warning
+                            # if streaming dies before this tool's arguments
+                            # are fully delivered.  Without this, a stall
+                            # during tool-call JSON generation lets the stub
+                            # at line ~6107 return `tool_calls=None`, silently
+                            # discarding the attempted action.
+                            result["partial_tool_names"].append(name)
 
                 if chunk.choices[0].finish_reason:
                     finish_reason = chunk.choices[0].finish_reason
@@ -6050,13 +6255,44 @@ class AIAgent:
                 _partial_text = (
                     getattr(self, "_current_streamed_assistant_text", "") or ""
                 ).strip() or None
-                logger.warning(
-                    "Partial stream delivered before error; returning stub "
-                    "response with %s chars of recovered content to prevent "
-                    "duplicate messages: %s",
-                    len(_partial_text or ""),
-                    result["error"],
-                )
+
+                # If the stream died while the model was emitting a tool call,
+                # the stub below will silently set `tool_calls=None` and the
+                # agent loop will treat the turn as complete — the attempted
+                # action is lost with no user-facing signal.  Append a
+                # human-visible warning to the stub content so (a) the user
+                # knows something failed, and (b) the next turn's model sees
+                # in conversation history what was attempted and can retry.
+                _partial_names = list(result.get("partial_tool_names") or [])
+                if _partial_names:
+                    _name_str = ", ".join(_partial_names[:3])
+                    if len(_partial_names) > 3:
+                        _name_str += f", +{len(_partial_names) - 3} more"
+                    _warn = (
+                        f"\n\n⚠ Stream stalled mid tool-call "
+                        f"({_name_str}); the action was not executed. "
+                        f"Ask me to retry if you want to continue."
+                    )
+                    _partial_text = (_partial_text or "") + _warn
+                    # Also fire as a streaming delta so the user sees it now
+                    # instead of only in the persisted transcript.
+                    try:
+                        self._fire_stream_delta(_warn)
+                    except Exception:
+                        pass
+                    logger.warning(
+                        "Partial stream dropped tool call(s) %s after %s chars "
+                        "of text; surfaced warning to user: %s",
+                        _partial_names, len(_partial_text or ""), result["error"],
+                    )
+                else:
+                    logger.warning(
+                        "Partial stream delivered before error; returning stub "
+                        "response with %s chars of recovered content to prevent "
+                        "duplicate messages: %s",
+                        len(_partial_text or ""),
+                        result["error"],
+                    )
                 _stub_msg = SimpleNamespace(
                     role="assistant", content=_partial_text, tool_calls=None,
                     reasoning_content=None,
@@ -6814,8 +7050,20 @@ class AIAgent:
         if self.tools:
             api_kwargs["tools"] = self.tools
 
-        if self.max_tokens is not None:
+        # ── max_tokens for chat_completions ──────────────────────────────
+        # Priority: ephemeral override (error recovery / length-continuation
+        # boost) > user-configured max_tokens > provider-specific defaults.
+        _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
+        if _ephemeral_out is not None:
+            self._ephemeral_max_output_tokens = None  # consume immediately
+            api_kwargs.update(self._max_tokens_param(_ephemeral_out))
+        elif self.max_tokens is not None:
             api_kwargs.update(self._max_tokens_param(self.max_tokens))
+        elif "integrate.api.nvidia.com" in self._base_url_lower:
+            # NVIDIA NIM defaults to a very low max_tokens when omitted,
+            # causing models like GLM-4.7 to truncate immediately (thinking
+            # tokens alone exhaust the budget).  16384 provides adequate room.
+            api_kwargs.update(self._max_tokens_param(16384))
         elif self._is_qwen_portal():
             # Qwen Portal defaults to a very low max_tokens when omitted.
             # Reasoning models (qwen3-coder-plus) exhaust that budget on
@@ -7024,6 +7272,20 @@ class AIAgent:
         if reasoning_text:
             reasoning_text = _sanitize_surrogates(reasoning_text)
 
+        # Strip inline reasoning tags (<think>…</think> etc.) from the stored
+        # assistant content.  Reasoning was already captured into
+        # ``reasoning_text`` above (either from structured fields or the
+        # inline-block fallback), so the raw tags in content are redundant.
+        # Leaving them in place caused reasoning to leak to messaging
+        # platforms (#8878, #9568), inflate context on subsequent turns
+        # (#9306 observed 16% content-size reduction on a real MiniMax
+        # session), and pollute generated session titles.  One strip at the
+        # storage boundary cleans content for every downstream consumer:
+        # API replay, session transcript, gateway delivery, CLI display,
+        # compression, title generation.
+        if isinstance(_san_content, str) and _san_content:
+            _san_content = self._strip_think_blocks(_san_content).strip()
+
         msg = {
             "role": "assistant",
             "content": _san_content,
@@ -7653,6 +7915,22 @@ class AIAgent:
 
         def _run_tool(index, tool_call, function_name, function_args):
             """Worker function executed in a thread."""
+            # Register this worker tid so the agent can fan out an interrupt
+            # to it — see AIAgent.interrupt().  Must happen first thing, and
+            # must be paired with discard + clear in the finally block.
+            _worker_tid = threading.current_thread().ident
+            with self._tool_worker_threads_lock:
+                self._tool_worker_threads.add(_worker_tid)
+            # Race: if the agent was interrupted between fan-out (which
+            # snapshotted an empty/earlier set) and our registration, apply
+            # the interrupt to our own tid now so is_interrupted() inside
+            # the tool returns True on the next poll.
+            if self._interrupt_requested:
+                try:
+                    from tools.interrupt import set_interrupt as _sif
+                    _sif(True, _worker_tid)
+                except Exception:
+                    pass
             # Set the activity callback on THIS worker thread so
             # _wait_for_process (terminal commands) can fire heartbeats.
             # The callback is thread-local; the main thread's callback
@@ -7675,6 +7953,16 @@ class AIAgent:
             else:
                 logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
             results[index] = (function_name, function_args, result, duration, is_error)
+            # Tear down worker-tid tracking.  Clear any interrupt bit we may
+            # have set so the next task scheduled onto this recycled tid
+            # starts with a clean slate.
+            with self._tool_worker_threads_lock:
+                self._tool_worker_threads.discard(_worker_tid)
+            try:
+                from tools.interrupt import set_interrupt as _sif
+                _sif(False, _worker_tid)
+            except Exception:
+                pass
 
         # Start spinner for CLI mode (skip when TUI handles tool progress)
         spinner = None
@@ -7819,6 +8107,13 @@ class AIAgent:
             turn_tool_msgs = messages[-num_tools:]
             enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id))
 
+        # ── /steer injection ──────────────────────────────────────────────
+        # Append any pending user steer text to the last tool result so the
+        # agent sees it on its next iteration. Runs AFTER budget enforcement
+        # so the steer marker is never truncated. See steer() for details.
+        if num_tools > 0:
+            self._apply_pending_steer_to_tool_results(messages, num_tools)
+
     def _execute_tool_calls_sequential(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
         """Execute tool calls sequentially (original behavior). Used for single calls or interactive tools."""
         for i, tool_call in enumerate(assistant_message.tool_calls, 1):
@@ -8030,7 +8325,7 @@ class AIAgent:
             elif self._context_engine_tool_names and function_name in self._context_engine_tool_names:
                 # Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
                 spinner = None
-                if self.quiet_mode and not self.tool_progress_callback:
+                if self._should_emit_quiet_tool_messages():
                     face = random.choice(KawaiiSpinner.get_waiting_faces())
                     emoji = _get_tool_emoji(function_name)
                     preview = _build_tool_preview(function_name, function_args) or function_name
@@ -8048,7 +8343,7 @@ class AIAgent:
                     cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result)
                     if spinner:
                         spinner.stop(cute_msg)
-                    elif self.quiet_mode:
+                    elif self._should_emit_quiet_tool_messages():
                         self._vprint(f"  {cute_msg}")
             elif self._memory_manager and self._memory_manager.has_tool(function_name):
                 # Memory provider tools (hindsight_retain, honcho_search, etc.)
@@ -8198,6 +8493,12 @@ class AIAgent:
         if num_tools_seq > 0:
             enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id))
 
+        # ── /steer injection ──────────────────────────────────────────────
+        # See _execute_tool_calls_parallel for the rationale. Same hook,
+        # applied to sequential execution as well.
+        if num_tools_seq > 0:
+            self._apply_pending_steer_to_tool_results(messages, num_tools_seq)
+
 
 
     def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
@@ -9874,7 +10175,7 @@ class AIAgent:
                         _dhh = _dhh_fn()
                         print(f"{self.log_prefix}     • Check ANTHROPIC_TOKEN in {_dhh}/.env for Hermes-managed OAuth/setup tokens")
                         print(f"{self.log_prefix}     • Check ANTHROPIC_API_KEY in {_dhh}/.env for API keys or legacy token values")
-                        print(f"{self.log_prefix}     • For API keys: verify at https://console.anthropic.com/settings/keys")
+                        print(f"{self.log_prefix}     • For API keys: verify at https://platform.claude.com/settings/keys")
                         print(f"{self.log_prefix}     • For Claude Code: run 'claude /login' to refresh, then retry")
                         print(f"{self.log_prefix}     • Legacy cleanup: hermes config set ANTHROPIC_TOKEN \"\"")
                         print(f"{self.log_prefix}     • Clear stale keys: hermes config set ANTHROPIC_API_KEY \"\"")
@@ -10518,6 +10819,12 @@ class AIAgent:
                 continue
 
             if restart_with_length_continuation:
+                # Progressively boost the output token budget on each retry.
+                # Retry 1 → 2× base, retry 2 → 3× base, capped at 32 768.
+                # Applies to all providers via _ephemeral_max_output_tokens.
+                _boost_base = self.max_tokens if self.max_tokens else 4096
+                _boost = _boost_base * (length_continue_retries + 1)
+                self._ephemeral_max_output_tokens = min(_boost, 32768)
                 continue
 
             # Guard: if all retries exhausted without a successful response
@@ -10880,17 +11187,10 @@ class AIAgent:
                         self._last_content_tools_all_housekeeping = _all_housekeeping
                         if _all_housekeeping and self._has_stream_consumers():
                             self._mute_post_response = True
-                        elif self.quiet_mode:
+                        elif self._should_emit_quiet_tool_messages():
                             clean = self._strip_think_blocks(turn_content).strip()
                             if clean:
-                                relayed = False
-                                if (
-                                    self.tool_progress_callback
-                                    and getattr(self, "platform", "") == "tui"
-                                ):
-                                    relayed = True
-                                if not relayed:
-                                    self._vprint(f"  ┊ 💬 {clean}")
+                                self._vprint(f"  ┊ 💬 {clean}")
                     
                     # Pop thinking-only prefill message(s) before appending
                     # (tool-call path — same rationale as the final-response path).
@@ -11478,6 +11778,12 @@ class AIAgent:
             "cost_status": self.session_cost_status,
             "cost_source": self.session_cost_source,
         }
+        # If a /steer landed after the final assistant turn (no more tool
+        # batches to drain into), hand it back to the caller so it can be
+        # delivered as the next user turn instead of being silently lost.
+        _leftover_steer = self._drain_pending_steer()
+        if _leftover_steer:
+            result["pending_steer"] = _leftover_steer
         self._response_was_previewed = False
         
         # Include interrupt message if one triggered the interrupt
diff --git a/scripts/release.py b/scripts/release.py
index e8039047ceb..a20c3c134fa 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -48,6 +48,7 @@ AUTHOR_MAP = {
     "35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
     "82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
     "kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
+    "kshitijk4poor@gmail.com": "kshitijk4poor",
     "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
     "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
     "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
@@ -74,6 +75,10 @@ AUTHOR_MAP = {
     "109555139+davetist@users.noreply.github.com": "davetist",
     "39405770+yyq4193@users.noreply.github.com": "yyq4193",
     "Asunfly@users.noreply.github.com": "Asunfly",
+    "2500400+honghua@users.noreply.github.com": "honghua",
+    "nish3451@users.noreply.github.com": "nish3451",
+    "Mibayy@users.noreply.github.com": "Mibayy",
+    "135070653+sgaofen@users.noreply.github.com": "sgaofen",
     # contributors (manual mapping from git names)
     "ahmedsherif95@gmail.com": "asheriif",
     "liujinkun@bytedance.com": "liujinkun2025",
@@ -207,10 +212,12 @@ AUTHOR_MAP = {
     "cola-runner@users.noreply.github.com": "cola-runner",
     "ygd58@users.noreply.github.com": "ygd58",
     "vominh1919@users.noreply.github.com": "vominh1919",
+    "iamagenius00@users.noreply.github.com": "iamagenius00",
     "trevmanthony@gmail.com": "trevthefoolish",
     "ziliangpeng@users.noreply.github.com": "ziliangpeng",
     "centripetal-star@users.noreply.github.com": "centripetal-star",
     "LeonSGP43@users.noreply.github.com": "LeonSGP43",
+    "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
     "Lubrsy706@users.noreply.github.com": "Lubrsy706",
     "niyant@spicefi.xyz": "spniyant",
     "olafthiele@gmail.com": "olafthiele",
@@ -262,6 +269,10 @@ AUTHOR_MAP = {
     "xiayh17@gmail.com": "xiayh0107",
     "asurla@nvidia.com": "anniesurla",
     "limkuan24@gmail.com": "WideLee",
+    "aviralarora002@gmail.com": "AviArora02-commits",
+    "junminliu@gmail.com": "JimLiu",
+    "jarvischer@gmail.com": "maxchernin",
+    "levantam.98.2324@gmail.com": "LVT382009",
 }
 
 
diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js
index 70cf8e95d9f..401651c8a83 100644
--- a/scripts/whatsapp-bridge/bridge.js
+++ b/scripts/whatsapp-bridge/bridge.js
@@ -229,6 +229,14 @@ async function startSocket() {
 
       // Check allowlist for messages from others (resolve LID ↔ phone aliases)
       if (!msg.key.fromMe && !matchesAllowedUser(senderId, ALLOWED_USERS, SESSION_DIR)) {
+        try {
+          console.log(JSON.stringify({
+            event: 'ignored',
+            reason: 'allowlist_mismatch',
+            chatId,
+            senderId,
+          }));
+        } catch {}
         continue;
       }
 
diff --git a/skills/creative/baoyu-infographic/PORT_NOTES.md b/skills/creative/baoyu-infographic/PORT_NOTES.md
new file mode 100644
index 00000000000..0a2d86d89ca
--- /dev/null
+++ b/skills/creative/baoyu-infographic/PORT_NOTES.md
@@ -0,0 +1,43 @@
+# Port Notes — baoyu-infographic
+
+Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56.1.
+
+## Changes from upstream
+
+Only `SKILL.md` was modified. All 45 reference files are verbatim copies.
+
+### SKILL.md adaptations
+
+| Change | Upstream | Hermes |
+|--------|----------|--------|
+| Metadata namespace | `openclaw` | `hermes` |
+| Trigger | `/baoyu-infographic` slash command | Natural language skill matching |
+| User config | EXTEND.md file (project/user/XDG paths) | Removed — not part of Hermes infra |
+| User prompts | `AskUserQuestion` (batched) | `clarify` tool (one at a time) |
+| Image generation | baoyu-imagine (Bun/TypeScript) | `image_generate` tool |
+| Platform support | Linux/macOS/Windows/WSL/PowerShell | Linux/macOS only |
+| File operations | Bash commands | Hermes file tools (write_file, read_file) |
+
+### What was preserved
+
+- All layout definitions (21 files)
+- All style definitions (21 files)
+- Core reference files (analysis-framework, base-prompt, structured-content-template)
+- Recommended combinations table
+- Keyword shortcuts table
+- Core principles and workflow structure
+- Author, version, homepage attribution
+
+## Syncing with upstream
+
+To pull upstream updates:
+```bash
+# Compare versions
+curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-infographic/SKILL.md | head -5
+# Look for version: line
+
+# Diff reference files
+diff <(curl -sL https://raw.githubusercontent.com/.../references/layouts/bento-grid.md) references/layouts/bento-grid.md
+```
+
+Reference files can be overwritten directly (they're unchanged from upstream). SKILL.md must be manually merged since it contains Hermes-specific adaptations.
diff --git a/skills/creative/baoyu-infographic/SKILL.md b/skills/creative/baoyu-infographic/SKILL.md
new file mode 100644
index 00000000000..fea3499cbf4
--- /dev/null
+++ b/skills/creative/baoyu-infographic/SKILL.md
@@ -0,0 +1,236 @@
+---
+name: baoyu-infographic
+description: Generate professional infographics with 21 layout types and 21 visual styles. Analyzes content, recommends layout×style combinations, and generates publication-ready infographics. Use when user asks to create "infographic", "visual summary", "信息图", "可视化", or "高密度信息大图".
+version: 1.56.1
+author: 宝玉 (JimLiu)
+license: MIT
+metadata:
+  hermes:
+    tags: [infographic, visual-summary, creative, image-generation]
+    homepage: https://github.com/JimLiu/baoyu-skills#baoyu-infographic
+---
+
+# Infographic Generator
+
+Adapted from [baoyu-infographic](https://github.com/JimLiu/baoyu-skills) for Hermes Agent's tool ecosystem.
+
+Two dimensions: **layout** (information structure) × **style** (visual aesthetics). Freely combine any layout with any style.
+
+## When to Use
+
+Trigger this skill when the user asks to create an infographic, visual summary, information graphic, or uses terms like "信息图", "可视化", or "高密度信息大图". The user provides content (text, file path, URL, or topic) and optionally specifies layout, style, aspect ratio, or language.
+
+## Options
+
+| Option | Values |
+|--------|--------|
+| Layout | 21 options (see Layout Gallery), default: bento-grid |
+| Style | 21 options (see Style Gallery), default: craft-handmade |
+| Aspect | Named: landscape (16:9), portrait (9:16), square (1:1). Custom: any W:H ratio (e.g., 3:4, 4:3, 2.35:1) |
+| Language | en, zh, ja, etc. |
+
+## Layout Gallery
+
+| Layout | Best For |
+|--------|----------|
+| `linear-progression` | Timelines, processes, tutorials |
+| `binary-comparison` | A vs B, before-after, pros-cons |
+| `comparison-matrix` | Multi-factor comparisons |
+| `hierarchical-layers` | Pyramids, priority levels |
+| `tree-branching` | Categories, taxonomies |
+| `hub-spoke` | Central concept with related items |
+| `structural-breakdown` | Exploded views, cross-sections |
+| `bento-grid` | Multiple topics, overview (default) |
+| `iceberg` | Surface vs hidden aspects |
+| `bridge` | Problem-solution |
+| `funnel` | Conversion, filtering |
+| `isometric-map` | Spatial relationships |
+| `dashboard` | Metrics, KPIs |
+| `periodic-table` | Categorized collections |
+| `comic-strip` | Narratives, sequences |
+| `story-mountain` | Plot structure, tension arcs |
+| `jigsaw` | Interconnected parts |
+| `venn-diagram` | Overlapping concepts |
+| `winding-roadmap` | Journey, milestones |
+| `circular-flow` | Cycles, recurring processes |
+| `dense-modules` | High-density modules, data-rich guides |
+
+Full definitions: `references/layouts/<layout>.md`
+
+## Style Gallery
+
+| Style | Description |
+|-------|-------------|
+| `craft-handmade` | Hand-drawn, paper craft (default) |
+| `claymation` | 3D clay figures, stop-motion |
+| `kawaii` | Japanese cute, pastels |
+| `storybook-watercolor` | Soft painted, whimsical |
+| `chalkboard` | Chalk on black board |
+| `cyberpunk-neon` | Neon glow, futuristic |
+| `bold-graphic` | Comic style, halftone |
+| `aged-academia` | Vintage science, sepia |
+| `corporate-memphis` | Flat vector, vibrant |
+| `technical-schematic` | Blueprint, engineering |
+| `origami` | Folded paper, geometric |
+| `pixel-art` | Retro 8-bit |
+| `ui-wireframe` | Grayscale interface mockup |
+| `subway-map` | Transit diagram |
+| `ikea-manual` | Minimal line art |
+| `knolling` | Organized flat-lay |
+| `lego-brick` | Toy brick construction |
+| `pop-laboratory` | Blueprint grid, coordinate markers, lab precision |
+| `morandi-journal` | Hand-drawn doodle, warm Morandi tones |
+| `retro-pop-grid` | 1970s retro pop art, Swiss grid, thick outlines |
+| `hand-drawn-edu` | Macaron pastels, hand-drawn wobble, stick figures |
+
+Full definitions: `references/styles/<style>.md`
+
+## Recommended Combinations
+
+| Content Type | Layout + Style |
+|--------------|----------------|
+| Timeline/History | `linear-progression` + `craft-handmade` |
+| Step-by-step | `linear-progression` + `ikea-manual` |
+| A vs B | `binary-comparison` + `corporate-memphis` |
+| Hierarchy | `hierarchical-layers` + `craft-handmade` |
+| Overlap | `venn-diagram` + `craft-handmade` |
+| Conversion | `funnel` + `corporate-memphis` |
+| Cycles | `circular-flow` + `craft-handmade` |
+| Technical | `structural-breakdown` + `technical-schematic` |
+| Metrics | `dashboard` + `corporate-memphis` |
+| Educational | `bento-grid` + `chalkboard` |
+| Journey | `winding-roadmap` + `storybook-watercolor` |
+| Categories | `periodic-table` + `bold-graphic` |
+| Product Guide | `dense-modules` + `morandi-journal` |
+| Technical Guide | `dense-modules` + `pop-laboratory` |
+| Trendy Guide | `dense-modules` + `retro-pop-grid` |
+| Educational Diagram | `hub-spoke` + `hand-drawn-edu` |
+| Process Tutorial | `linear-progression` + `hand-drawn-edu` |
+
+Default: `bento-grid` + `craft-handmade`
+
+## Keyword Shortcuts
+
+When user input contains these keywords, **auto-select** the associated layout and offer associated styles as top recommendations in Step 3. Skip content-based layout inference for matched keywords.
+
+If a shortcut has **Prompt Notes**, append them to the generated prompt (Step 5) as additional style instructions.
+
+| User Keyword | Layout | Recommended Styles | Default Aspect | Prompt Notes |
+|--------------|--------|--------------------|----------------|--------------|
+| 高密度信息大图 / high-density-info | `dense-modules` | `morandi-journal`, `pop-laboratory`, `retro-pop-grid` | portrait | — |
+| 信息图 / infographic | `bento-grid` | `craft-handmade` | landscape | Minimalist: clean canvas, ample whitespace, no complex background textures. Simple cartoon elements and icons only. |
+
+## Output Structure
+
+```
+infographic/{topic-slug}/
+├── source-{slug}.{ext}
+├── analysis.md
+├── structured-content.md
+├── prompts/infographic.md
+└── infographic.png
+```
+
+Slug: 2-4 words kebab-case from topic. Conflict: append `-YYYYMMDD-HHMMSS`.
+
+## Core Principles
+
+- Preserve source data faithfully — no summarization or rephrasing (but **strip any credentials, API keys, tokens, or secrets** before including in outputs)
+- Define learning objectives before structuring content
+- Structure for visual communication (headlines, labels, visual elements)
+
+## Workflow
+
+### Step 1: Analyze Content
+
+**Load references**: Read `references/analysis-framework.md` from this skill.
+
+1. Save source content (file path or paste → `source.md` using `write_file`)
+   - **Backup rule**: If `source.md` exists, rename to `source-backup-YYYYMMDD-HHMMSS.md`
+2. Analyze: topic, data type, complexity, tone, audience
+3. Detect source language and user language
+4. Extract design instructions from user input
+5. Save analysis to `analysis.md`
+   - **Backup rule**: If `analysis.md` exists, rename to `analysis-backup-YYYYMMDD-HHMMSS.md`
+
+See `references/analysis-framework.md` for detailed format.
+
+### Step 2: Generate Structured Content → `structured-content.md`
+
+Transform content into infographic structure:
+1. Title and learning objectives
+2. Sections with: key concept, content (verbatim), visual element, text labels
+3. Data points (all statistics/quotes copied exactly)
+4. Design instructions from user
+
+**Rules**: Markdown only. No new information. Preserve data faithfully. Strip any credentials or secrets from output.
+
+See `references/structured-content-template.md` for detailed format.
+
+### Step 3: Recommend Combinations
+
+**3.1 Check Keyword Shortcuts first**: If user input matches a keyword from the **Keyword Shortcuts** table, auto-select the associated layout and prioritize associated styles as top recommendations. Skip content-based layout inference.
+
+**3.2 Otherwise**, recommend 3-5 layout×style combinations based on:
+- Data structure → matching layout
+- Content tone → matching style
+- Audience expectations
+- User design instructions
+
+### Step 4: Confirm Options
+
+Use the `clarify` tool to confirm options with the user. Since `clarify` handles one question at a time, ask the most important question first:
+
+**Q1 — Combination**: Present 3+ layout×style combos with rationale. Ask user to pick one.
+
+**Q2 — Aspect**: Ask for aspect ratio preference (landscape/portrait/square or custom W:H).
+
+**Q3 — Language** (only if source ≠ user language): Ask which language the text content should use.
+
+### Step 5: Generate Prompt → `prompts/infographic.md`
+
+**Backup rule**: If `prompts/infographic.md` exists, rename to `prompts/infographic-backup-YYYYMMDD-HHMMSS.md`
+
+**Load references**: Read the selected layout from `references/layouts/<layout>.md` and style from `references/styles/<style>.md`.
+
+Combine:
+1. Layout definition from `references/layouts/<layout>.md`
+2. Style definition from `references/styles/<style>.md`
+3. Base template from `references/base-prompt.md`
+4. Structured content from Step 2
+5. All text in confirmed language
+
+**Aspect ratio resolution** for `{{ASPECT_RATIO}}`:
+- Named presets → ratio string: landscape→`16:9`, portrait→`9:16`, square→`1:1`
+- Custom W:H ratios → use as-is (e.g., `3:4`, `4:3`, `2.35:1`)
+
+Save the assembled prompt to `prompts/infographic.md` using `write_file`.
+
+### Step 6: Generate Image
+
+Use the `image_generate` tool with the assembled prompt from Step 5.
+
+- Map aspect ratio to image_generate's format: `16:9` → `landscape`, `9:16` → `portrait`, `1:1` → `square`
+- For custom ratios, pick the closest named aspect
+- On failure, auto-retry once
+- Save the resulting image URL/path to the output directory
+
+### Step 7: Output Summary
+
+Report: topic, layout, style, aspect, language, output path, files created.
+
+## References
+
+- `references/analysis-framework.md` — Analysis methodology
+- `references/structured-content-template.md` — Content format
+- `references/base-prompt.md` — Prompt template
+- `references/layouts/<layout>.md` — 21 layout definitions
+- `references/styles/<style>.md` — 21 style definitions
+
+## Pitfalls
+
+1. **Data integrity is paramount** — never summarize, paraphrase, or alter source statistics. "73% increase" must stay "73% increase", not "significant increase".
+2. **Strip secrets** — always scan source content for API keys, tokens, or credentials before including in any output file.
+3. **One message per section** — each infographic section should convey one clear concept. Overloading sections reduces readability.
+4. **Style consistency** — the style definition from the references file must be applied consistently across the entire infographic. Don't mix styles.
+5. **image_generate aspect ratios** — the tool only supports `landscape`, `portrait`, and `square`. Custom ratios like `3:4` should map to the nearest option (portrait in that case).
diff --git a/skills/creative/baoyu-infographic/references/analysis-framework.md b/skills/creative/baoyu-infographic/references/analysis-framework.md
new file mode 100644
index 00000000000..702bf7becf8
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/analysis-framework.md
@@ -0,0 +1,182 @@
+# Infographic Content Analysis Framework
+
+Deep analysis framework applying instructional design principles to infographic creation.
+
+## Purpose
+
+Before creating an infographic, thoroughly analyze the source material to:
+- Understand the content at a deep level
+- Identify clear learning objectives for the viewer
+- Structure information for maximum clarity and retention
+- Match content to optimal layout×style combinations
+- Preserve all source data verbatim
+
+## Instructional Design Mindset
+
+Approach content analysis as a **world-class instructional designer**:
+
+| Principle | Application |
+|-----------|-------------|
+| **Deep Understanding** | Read the entire document before analyzing any part |
+| **Learner-Centered** | Focus on what the viewer needs to understand |
+| **Visual Storytelling** | Use visuals to communicate, not just decorate |
+| **Cognitive Load** | Simplify complex ideas without losing accuracy |
+| **Data Integrity** | Never alter, summarize, or paraphrase source facts |
+
+## Analysis Dimensions
+
+### 1. Content Type Classification
+
+| Type | Characteristics | Best Layout | Best Style |
+|------|-----------------|-------------|------------|
+| **Timeline/History** | Sequential events, dates, progression | linear-progression | craft-handmade, aged-academia |
+| **Process/Tutorial** | Step-by-step instructions, how-to | linear-progression, winding-roadmap | ikea-manual, technical-schematic |
+| **Comparison** | A vs B, pros/cons, before-after | binary-comparison, comparison-matrix | corporate-memphis, bold-graphic |
+| **Hierarchy** | Levels, priorities, pyramids | hierarchical-layers, tree-branching | craft-handmade, corporate-memphis |
+| **Relationships** | Connections, overlaps, influences | venn-diagram, hub-spoke, jigsaw | craft-handmade, subway-map |
+| **Data/Metrics** | Statistics, KPIs, measurements | dashboard, periodic-table | corporate-memphis, technical-schematic |
+| **Cycle/Loop** | Recurring processes, feedback loops | circular-flow | craft-handmade, technical-schematic |
+| **System/Structure** | Components, architecture, anatomy | structural-breakdown, bento-grid | technical-schematic, ikea-manual |
+| **Journey/Narrative** | Stories, user flows, milestones | winding-roadmap, story-mountain | storybook-watercolor, comic-strip |
+| **Overview/Summary** | Multiple topics, feature highlights | bento-grid, periodic-table, dense-modules | chalkboard, bold-graphic |
+| **Product/Buying Guide** | Multi-dimension comparisons, specs, pitfalls | dense-modules | morandi-journal, pop-laboratory, retro-pop-grid |
+
+### 2. Learning Objective Identification
+
+Every infographic should have 1-3 clear learning objectives.
+
+**Good Learning Objectives**:
+- Specific and measurable
+- Focus on what the viewer will understand, not just see
+- Written from the viewer's perspective
+
+**Format**: "After viewing this infographic, the viewer will understand..."
+
+| Content Aspect | Objective Type |
+|----------------|----------------|
+| Core concept | "...what [topic] is and why it matters" |
+| Process | "...how to [accomplish something]" |
+| Comparison | "...the key differences between [A] and [B]" |
+| Relationships | "...how [elements] connect to each other" |
+| Data | "...the significance of [key statistics]" |
+
+### 3. Audience Analysis
+
+| Factor | Questions | Impact |
+|--------|-----------|--------|
+| **Knowledge Level** | What do they already know? | Determines complexity depth |
+| **Context** | Why are they viewing this? | Determines emphasis points |
+| **Expectations** | What do they hope to learn? | Determines success criteria |
+| **Visual Preferences** | Professional, playful, technical? | Influences style choice |
+
+### 4. Complexity Assessment
+
+| Level | Indicators | Layout Recommendation |
+|-------|------------|----------------------|
+| **Simple** (3-5 points) | Few main concepts, clear relationships | sparse layouts, single focus |
+| **Moderate** (6-8 points) | Multiple concepts, some relationships | balanced layouts, clear sections |
+| **Complex** (9+ points) | Many concepts, intricate relationships | dense layouts, multiple sections |
+
+### 5. Visual Opportunity Mapping
+
+Identify what can be shown rather than told:
+
+| Content Element | Visual Treatment |
+|-----------------|------------------|
+| Numbers/Statistics | Large, highlighted numerals |
+| Comparisons | Side-by-side, split screen |
+| Processes | Arrows, numbered steps, flow |
+| Hierarchies | Pyramids, layers, size differences |
+| Relationships | Lines, connections, overlapping shapes |
+| Categories | Color coding, grouping, sections |
+| Timelines | Horizontal/vertical progression |
+| Quotes | Callout boxes, quotation marks |
+
+### 6. Data Verbatim Extraction
+
+**Critical**: All factual information must be preserved exactly as written in the source.
+
+| Data Type | Handling Rule |
+|-----------|---------------|
+| **Statistics** | Copy exactly: "73%" not "about 70%" |
+| **Quotes** | Copy word-for-word with attribution |
+| **Names** | Preserve exact spelling |
+| **Dates** | Keep original format |
+| **Technical Terms** | Do not simplify or substitute |
+| **Lists** | Preserve order and wording |
+
+**Never**:
+- Round numbers
+- Paraphrase quotes
+- Substitute simpler words
+- Add implied information
+- Remove context that affects meaning
+
+## Output Format
+
+Save analysis results to `analysis.md`:
+
+```yaml
+---
+title: "[Main topic title]"
+topic: "[educational/technical/business/creative/etc.]"
+data_type: "[timeline/hierarchy/comparison/process/etc.]"
+complexity: "[simple/moderate/complex]"
+point_count: [number of main points]
+source_language: "[detected language]"
+user_language: "[user's language]"
+---
+
+## Main Topic
+[1-2 sentence summary of what this content is about]
+
+## Learning Objectives
+After viewing this infographic, the viewer should understand:
+1. [Primary objective]
+2. [Secondary objective]
+3. [Tertiary objective if applicable]
+
+## Target Audience
+- **Knowledge Level**: [Beginner/Intermediate/Expert]
+- **Context**: [Why they're viewing this]
+- **Expectations**: [What they hope to learn]
+
+## Content Type Analysis
+- **Data Structure**: [How information relates to itself]
+- **Key Relationships**: [What connects to what]
+- **Visual Opportunities**: [What can be shown rather than told]
+
+## Key Data Points (Verbatim)
+[All statistics, quotes, and critical facts exactly as they appear in source]
+- "[Exact data point 1]"
+- "[Exact data point 2]"
+- "[Exact quote with attribution]"
+
+## Layout × Style Signals
+- Content type: [type] → suggests [layout]
+- Tone: [tone] → suggests [style]
+- Audience: [audience] → suggests [style]
+- Complexity: [level] → suggests [layout density]
+
+## Design Instructions (from user input)
+[Any style, color, layout, or visual preferences extracted from user's steering prompt]
+
+## Recommended Combinations
+1. **[Layout] + [Style]** (Recommended): [Brief rationale]
+2. **[Layout] + [Style]**: [Brief rationale]
+3. **[Layout] + [Style]**: [Brief rationale]
+```
+
+## Analysis Checklist
+
+Before proceeding to structured content generation:
+
+- [ ] Have I read the entire source document?
+- [ ] Can I summarize the main topic in 1-2 sentences?
+- [ ] Have I identified 1-3 clear learning objectives?
+- [ ] Do I understand the target audience?
+- [ ] Have I classified the content type correctly?
+- [ ] Have I extracted all data points verbatim?
+- [ ] Have I identified visual opportunities?
+- [ ] Have I extracted design instructions from user input?
+- [ ] Have I recommended 3 layout×style combinations?
diff --git a/skills/creative/baoyu-infographic/references/base-prompt.md b/skills/creative/baoyu-infographic/references/base-prompt.md
new file mode 100644
index 00000000000..b65f972289d
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/base-prompt.md
@@ -0,0 +1,43 @@
+Create a professional infographic following these specifications:
+
+## Image Specifications
+
+- **Type**: Infographic
+- **Layout**: {{LAYOUT}}
+- **Style**: {{STYLE}}
+- **Aspect Ratio**: {{ASPECT_RATIO}}
+- **Language**: {{LANGUAGE}}
+
+## Core Principles
+
+- Follow the layout structure precisely for information architecture
+- Apply style aesthetics consistently throughout
+- If content involves sensitive or copyrighted figures, create stylistically similar alternatives
+- Keep information concise, highlight keywords and core concepts
+- Use ample whitespace for visual clarity
+- Maintain clear visual hierarchy
+
+## Text Requirements
+
+- All text must match the specified style treatment
+- Main titles should be prominent and readable
+- Key concepts should be visually emphasized
+- Labels should be clear and appropriately sized
+- Use the specified language for all text content
+
+## Layout Guidelines
+
+{{LAYOUT_GUIDELINES}}
+
+## Style Guidelines
+
+{{STYLE_GUIDELINES}}
+
+---
+
+Generate the infographic based on the content below:
+
+{{CONTENT}}
+
+Text labels (in {{LANGUAGE}}):
+{{TEXT_LABELS}}
diff --git a/skills/creative/baoyu-infographic/references/layouts/bento-grid.md b/skills/creative/baoyu-infographic/references/layouts/bento-grid.md
new file mode 100644
index 00000000000..8ce8bb890a4
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/bento-grid.md
@@ -0,0 +1,41 @@
+# bento-grid
+
+Modular grid layout with varied cell sizes, like a bento box.
+
+## Structure
+
+- Grid of rectangular cells
+- Mixed cell sizes (1x1, 2x1, 1x2, 2x2)
+- No strict symmetry required
+- Hero cell for main point
+- Supporting cells around it
+
+## Best For
+
+- Multiple topic overview
+- Feature highlights
+- Dashboard summaries
+- Portfolio displays
+- Mixed content types
+
+## Visual Elements
+
+- Clear cell boundaries
+- Varied cell backgrounds
+- Icons or illustrations per cell
+- Consistent padding/margins
+- Visual hierarchy through size
+
+## Text Placement
+
+- Main title at top
+- Cell titles within each cell
+- Brief content per cell
+- Minimal text, maximum visual
+- CTA or summary in prominent cell
+
+## Recommended Pairings
+
+- `craft-handmade`: Friendly overviews (default)
+- `corporate-memphis`: Business summaries
+- `pixel-art`: Retro feature grids
diff --git a/skills/creative/baoyu-infographic/references/layouts/binary-comparison.md b/skills/creative/baoyu-infographic/references/layouts/binary-comparison.md
new file mode 100644
index 00000000000..cf06980b065
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/binary-comparison.md
@@ -0,0 +1,48 @@
+# binary-comparison
+
+Side-by-side comparison of two items, states, or concepts.
+
+## Structure
+
+- Vertical divider splitting image in half
+- Left side: Item A / Before / Pro
+- Right side: Item B / After / Con
+- Mirrored layout for easy comparison
+- Clear visual distinction between sides
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Before-After** | Transformation over time | Temporal change, improvement |
+| **A vs B** | Feature comparison | Direct contrast, differences |
+| **Pro-Con** | Advantages/disadvantages | Balanced evaluation |
+
+## Best For
+
+- Before/after transformations
+- Product or option comparisons
+- Pros and cons analysis
+- Old vs new comparisons
+- Two perspectives on a topic
+
+## Visual Elements
+
+- Strong vertical dividing line or gradient
+- Contrasting colors per side
+- Matching element positions for comparison
+- VS symbol or divider decoration
+- Transformation arrow for before-after
+
+## Text Placement
+
+- Main title centered at top
+- Side labels (A/B, Before/After)
+- Corresponding points aligned horizontally
+- Summary at bottom if needed
+
+## Recommended Pairings
+
+- `corporate-memphis`: Business comparisons
+- `bold-graphic`: High-contrast dramatic comparisons
+- `craft-handmade`: Friendly explainers
diff --git a/skills/creative/baoyu-infographic/references/layouts/bridge.md b/skills/creative/baoyu-infographic/references/layouts/bridge.md
new file mode 100644
index 00000000000..116bf97f213
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/bridge.md
@@ -0,0 +1,41 @@
+# bridge
+
+Gap-crossing structure connecting problem to solution or current to future state.
+
+## Structure
+
+- Left side: current state/problem
+- Right side: desired state/solution
+- Bridge element spanning the gap
+- Gap representing challenge/obstacle
+- Bridge elements as steps/methods
+
+## Best For
+
+- Problem to solution journeys
+- Current vs future state
+- Gap analysis
+- Transformation bridges
+- Strategic initiatives
+
+## Visual Elements
+
+- Two distinct platforms/sides
+- Visible gap or chasm
+- Bridge structure with supports
+- Icons representing each side
+- Stepping stones or bridge planks
+
+## Text Placement
+
+- Title at top
+- Left label (From/Problem/Current)
+- Right label (To/Solution/Future)
+- Bridge elements labeled
+- Gap description below
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly journeys
+- `corporate-memphis`: Business transformations
+- `isometric-3d`: Technical transitions
diff --git a/skills/creative/baoyu-infographic/references/layouts/circular-flow.md b/skills/creative/baoyu-infographic/references/layouts/circular-flow.md
new file mode 100644
index 00000000000..068c5fd79d5
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/circular-flow.md
@@ -0,0 +1,41 @@
+# circular-flow
+
+Cyclic process showing continuous or recurring steps.
+
+## Structure
+
+- Circular arrangement
+- Steps around the circle
+- Arrows showing direction
+- No clear start/end (continuous)
+- Center can hold main concept
+
+## Best For
+
+- Recurring processes
+- Feedback loops
+- Lifecycle stages
+- Continuous improvement
+- Natural cycles
+
+## Visual Elements
+
+- Circle or ring shape
+- Directional arrows
+- Step nodes evenly spaced
+- Icons per step
+- Optional center element
+
+## Text Placement
+
+- Title at top
+- Step labels at each node
+- Brief descriptions near nodes
+- Center concept if applicable
+- Cycle name
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly cycles
+- `corporate-memphis`: Business processes
+- `subway-map`: Transit-style cycles
diff --git a/skills/creative/baoyu-infographic/references/layouts/comic-strip.md b/skills/creative/baoyu-infographic/references/layouts/comic-strip.md
new file mode 100644
index 00000000000..b760f57a4fa
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/comic-strip.md
@@ -0,0 +1,41 @@
+# comic-strip
+
+Sequential narrative panels telling a story or explaining a concept.
+
+## Structure
+
+- Multiple panels in sequence
+- Left-to-right, top-to-bottom reading
+- Characters or subjects in scenes
+- Speech/thought bubbles
+- Panel borders clearly defined
+
+## Best For
+
+- Storytelling explanations
+- User journey narratives
+- Scenario illustrations
+- Step sequences with context
+- Before/during/after stories
+
+## Visual Elements
+
+- Panel frames
+- Speech and thought bubbles
+- Sound effects (optional)
+- Characters with expressions
+- Scene backgrounds
+
+## Text Placement
+
+- Title at top
+- Dialogue in speech bubbles
+- Narration in caption boxes
+- Sound effects integrated
+- Panel numbers if needed
+
+## Recommended Pairings
+
+- `graphic-novel`: Dramatic narratives
+- `kawaii`: Cute character stories
+- `cartoon-hand-drawn`: Friendly explanations
diff --git a/skills/creative/baoyu-infographic/references/layouts/comparison-matrix.md b/skills/creative/baoyu-infographic/references/layouts/comparison-matrix.md
new file mode 100644
index 00000000000..16606281f76
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/comparison-matrix.md
@@ -0,0 +1,41 @@
+# comparison-matrix
+
+Grid-based multi-factor comparison across multiple items.
+
+## Structure
+
+- Table/grid layout
+- Rows: items being compared
+- Columns: comparison criteria
+- Cells: scores, checks, or values
+- Header row and column clearly marked
+
+## Best For
+
+- Product feature comparisons
+- Tool/software evaluations
+- Multi-criteria decisions
+- Specification sheets
+- Rating comparisons
+
+## Visual Elements
+
+- Clear grid lines or cell boundaries
+- Checkmarks, X marks, or scores in cells
+- Color coding for quick scanning
+- Icons for criteria categories
+- Highlight for recommended option
+
+## Text Placement
+
+- Title at top
+- Item names in first column
+- Criteria in header row
+- Brief values in cells
+- Legend if using symbols
+
+## Recommended Pairings
+
+- `corporate-memphis`: Business tool comparisons
+- `ui-wireframe`: Technical feature matrices
+- `blueprint`: Specification comparisons
diff --git a/skills/creative/baoyu-infographic/references/layouts/dashboard.md b/skills/creative/baoyu-infographic/references/layouts/dashboard.md
new file mode 100644
index 00000000000..70635ae1c97
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/dashboard.md
@@ -0,0 +1,41 @@
+# dashboard
+
+Multi-metric display with charts, numbers, and KPI indicators.
+
+## Structure
+
+- Multiple data widgets
+- Charts, graphs, numbers
+- Grid or modular layout
+- Key metrics prominent
+- Status indicators
+
+## Best For
+
+- KPI summaries
+- Performance metrics
+- Analytics overviews
+- Status reports
+- Data snapshots
+
+## Visual Elements
+
+- Chart types (bar, line, pie, gauge)
+- Big numbers for KPIs
+- Trend arrows (up/down)
+- Color-coded status (green/red)
+- Clean data visualization
+
+## Text Placement
+
+- Title at top
+- Widget titles above each section
+- Metric labels and values
+- Units clearly shown
+- Time period indicated
+
+## Recommended Pairings
+
+- `corporate-memphis`: Business dashboards
+- `ui-wireframe`: Technical dashboards
+- `cyberpunk-neon`: Futuristic displays
diff --git a/skills/creative/baoyu-infographic/references/layouts/dense-modules.md b/skills/creative/baoyu-infographic/references/layouts/dense-modules.md
new file mode 100644
index 00000000000..5e294e72355
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/dense-modules.md
@@ -0,0 +1,72 @@
+# dense-modules
+
+High-density modular layout with 6-7 typed information modules packed with concrete data.
+
+## Structure
+
+- 6-7 distinct modules per image, each serving a specific information function
+- Every module contains concrete data: brand names, numbers, percentages, parameters
+- Minimal whitespace—compact spacing prioritized over breathing room
+- Smaller text acceptable to maximize information density
+- Each module identified by coordinate label or section marker (e.g., MOD-1, SEC-A)
+
+## Module Archetypes
+
+| Module | Purpose | Content Requirements |
+|--------|---------|---------------------|
+| **Brand/Selection Array** | Grid of options with recommendations | 4-8 items with icons, names, brief descriptions; highlight "best choice" |
+| **Specification Scale** | Quality/measurement gauge | 3-5 levels with precise numerical increments, quality indicators (emoji faces, checkmarks) |
+| **Deep Dive/Detail** | Technical breakdown of key item | Zoom-in callouts, internal components, cross-section or exploded view |
+| **Scenario Comparison** | Side-by-side use cases | 3-6 scenarios with specific recommendations and data per scenario |
+| **Identification Tips** | How-to checklist | 3-5 inspection methods: look/test/check/ask format |
+| **Warning/Pitfall Zone** | Critical mistakes to avoid | 3-5 pitfalls with consequences, 1-2 correct approaches; high visual contrast |
+| **Quick Reference** | Compact summary | Dense table, one-line summaries, decision flowchart, or key takeaways |
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Coordinate-labeled** | Precision and systematicity | Each module has alphanumeric coordinate (A-01, B-05, C-12), ruler/axis markers |
+| **Grid-cell** | Order and structure | Modules in strict rectangular cells divided by thick lines, Swiss grid feel |
+| **Free-flowing** | Organic density | Magazine-style layout with dotted frames, varying module sizes, connected by arrows |
+
+## Best For
+
+- Product selection guides and buying guides
+- Multi-dimensional comparison content
+- Data-rich educational materials
+- "Avoid pitfalls" / "complete guide" formats
+- Content targeting platforms like Xiaohongshu with high-density visual requirements
+
+## Visual Elements
+
+- Module boundary markers (thick lines, dotted frames, or coordinate grids)
+- Quality indicators per module (emoji faces, checkmarks, crosses, crowns)
+- Data callout boxes with highlighted numbers
+- Comparison arrows and progression indicators
+- Warning/alert visual markers for pitfall modules
+- Metadata in corners (page numbers, timestamps, small barcodes)
+
+## Text Placement
+
+- Main title at top, prominent and impactful
+- Subtitle with module count ("X大维度全面解析...")
+- Module headers inside colored badges or labeled frames
+- Body text compact, multiple columns within modules
+- Numbers highlighted with accent colors, slightly larger than body text
+
+## Information Density Rules
+
+- Every corner should contain useful information or metadata
+- No decorative-only empty space
+- Text size may be reduced to fit more content—information over font size
+- Each module must have specific data points, not generic descriptions
+- Balance between density and readability: dense but organized
+
+## Recommended Pairings
+
+- `pop-laboratory`: Technical precision with coordinate markers and blueprint grid
+- `morandi-journal`: Hand-drawn warmth with doodle illustrations and organic frames
+- `retro-pop-grid`: 1970s pop art with strict grid cells and bold contrast
+- `corporate-memphis`: Clean business feel for product comparisons
+- `technical-schematic`: Engineering precision for technical product guides
diff --git a/skills/creative/baoyu-infographic/references/layouts/funnel.md b/skills/creative/baoyu-infographic/references/layouts/funnel.md
new file mode 100644
index 00000000000..9c5eb6dff3f
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/funnel.md
@@ -0,0 +1,41 @@
+# funnel
+
+Narrowing stages showing conversion, filtering, or refinement process.
+
+## Structure
+
+- Wide top (input/start)
+- Narrow bottom (output/result)
+- Horizontal layers for stages
+- Progressive narrowing
+- 3-6 stages typically
+
+## Best For
+
+- Sales/marketing funnels
+- Conversion processes
+- Filtering/selection
+- Recruitment pipelines
+- Decision processes
+
+## Visual Elements
+
+- Funnel shape clearly defined
+- Distinct colors per stage
+- Width indicates volume/quantity
+- Stage icons or symbols
+- Numbers/percentages per stage
+
+## Text Placement
+
+- Title at top
+- Stage names inside or beside
+- Metrics/numbers per stage
+- Input label at top
+- Output label at bottom
+
+## Recommended Pairings
+
+- `corporate-memphis`: Marketing funnels
+- `isometric-3d`: Technical pipelines
+- `cartoon-hand-drawn`: Educational funnels
diff --git a/skills/creative/baoyu-infographic/references/layouts/hierarchical-layers.md b/skills/creative/baoyu-infographic/references/layouts/hierarchical-layers.md
new file mode 100644
index 00000000000..dfa5da83577
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/hierarchical-layers.md
@@ -0,0 +1,48 @@
+# hierarchical-layers
+
+Nested layers showing levels of importance, influence, or proximity.
+
+## Structure
+
+- Multiple layers from core to periphery
+- Core/top: most important/central
+- Outer/bottom: decreasing importance
+- 3-7 levels typically
+- Clear boundaries between levels
+
+## Variants
+
+| Variant | Shape | Visual Emphasis |
+|---------|-------|-----------------|
+| **Pyramid** | Triangle, vertical | Top-down hierarchy, quantity |
+| **Concentric** | Rings, radial | Center-out influence, proximity |
+
+## Best For
+
+- Maslow's hierarchy style concepts
+- Priority and importance levels
+- Spheres of influence
+- Organizational structures
+- Stakeholder analysis
+
+## Visual Elements
+
+- Distinct color per level
+- Icons or illustrations per tier
+- Size indicates importance/quantity
+- Labels inside or beside layers
+- Decorative apex/center element
+
+## Text Placement
+
+- Title at top or side
+- Level names inside each tier
+- Brief descriptions outside
+- Quantities or percentages if relevant
+- Legend for color meanings
+
+## Recommended Pairings
+
+- `craft-handmade`: Playful layered concepts
+- `corporate-memphis`: Business hierarchies
+- `technical-schematic`: Technical 3D pyramids
diff --git a/skills/creative/baoyu-infographic/references/layouts/hub-spoke.md b/skills/creative/baoyu-infographic/references/layouts/hub-spoke.md
new file mode 100644
index 00000000000..372e53e0781
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/hub-spoke.md
@@ -0,0 +1,41 @@
+# hub-spoke
+
+Central concept with radiating connections to related items.
+
+## Structure
+
+- Central hub (main concept)
+- Spokes radiating outward
+- Nodes at spoke ends (related concepts)
+- Even or weighted distribution
+- Optional secondary connections
+
+## Best For
+
+- Central theme with components
+- Product features around core
+- Team roles around project
+- Ecosystem mapping
+- Mind maps
+
+## Visual Elements
+
+- Prominent central hub
+- Clear spoke lines
+- Consistent node styling
+- Icons representing each spoke item
+- Optional grouping colors
+
+## Text Placement
+
+- Title at top
+- Core concept in center hub
+- Spoke item labels at nodes
+- Brief descriptions near nodes
+- Connection labels on spokes if needed
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly concept maps
+- `corporate-memphis`: Business ecosystems
+- `subway-map`: Network-style connections
diff --git a/skills/creative/baoyu-infographic/references/layouts/iceberg.md b/skills/creative/baoyu-infographic/references/layouts/iceberg.md
new file mode 100644
index 00000000000..237c53ff8bf
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/iceberg.md
@@ -0,0 +1,41 @@
+# iceberg
+
+Surface vs hidden depths, visible vs underlying factors.
+
+## Structure
+
+- Waterline dividing visible/hidden
+- Tip above water (obvious/surface)
+- Larger mass below (hidden/deep)
+- Proportional to emphasize hidden depth
+- Optional layers within underwater section
+
+## Best For
+
+- Surface vs root causes
+- Visible vs invisible work
+- Symptoms vs underlying issues
+- Public vs private aspects
+- Known vs unknown factors
+
+## Visual Elements
+
+- Clear water/surface line
+- Above: smaller, brighter
+- Below: larger, darker/deeper
+- Wave or water texture
+- Gradient showing depth
+
+## Text Placement
+
+- Title at top
+- Surface items above waterline
+- Hidden items below, larger
+- Waterline label optional
+- Depth indicators for layers
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly metaphor
+- `storybook-watercolor`: Artistic depth
+- `graphic-novel`: Dramatic revelation
diff --git a/skills/creative/baoyu-infographic/references/layouts/isometric-map.md b/skills/creative/baoyu-infographic/references/layouts/isometric-map.md
new file mode 100644
index 00000000000..f8c3a8e5174
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/isometric-map.md
@@ -0,0 +1,41 @@
+# isometric-map
+
+3D-style spatial layout showing locations, relationships, or journey through space.
+
+## Structure
+
+- Isometric 3D perspective
+- Locations as buildings/landmarks
+- Paths connecting locations
+- Spatial relationships visible
+- Bird's eye view angle
+
+## Best For
+
+- Office/campus layouts
+- City/ecosystem maps
+- User journey maps
+- System architecture
+- Process landscapes
+
+## Visual Elements
+
+- Consistent isometric angle (30°)
+- 3D buildings or objects
+- Pathways and roads
+- Labels floating above
+- Mini scenes at locations
+
+## Text Placement
+
+- Title at top corner
+- Location labels above objects
+- Path labels along routes
+- Legend for symbols
+- Scale indicator if relevant
+
+## Recommended Pairings
+
+- `isometric-3d`: Clean technical maps
+- `pixel-art`: Retro game-style maps
+- `lego-brick`: Playful location maps
diff --git a/skills/creative/baoyu-infographic/references/layouts/jigsaw.md b/skills/creative/baoyu-infographic/references/layouts/jigsaw.md
new file mode 100644
index 00000000000..7ea6e35413b
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/jigsaw.md
@@ -0,0 +1,41 @@
+# jigsaw
+
+Interlocking puzzle pieces showing how parts fit together.
+
+## Structure
+
+- Puzzle pieces that interlock
+- Each piece represents a component
+- Connections show relationships
+- Can be assembled or exploded view
+- Missing piece highlights gaps
+
+## Best For
+
+- Component relationships
+- Team/skill fit
+- Strategy pieces
+- Integration concepts
+- Completeness assessments
+
+## Visual Elements
+
+- Classic puzzle piece shapes
+- Distinct colors per piece
+- Interlocking edges visible
+- Icons or labels per piece
+- Optional missing piece
+
+## Text Placement
+
+- Title at top
+- Piece labels inside or beside
+- Connection descriptions
+- Missing piece explanation
+- Assembly context
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly integration concepts
+- `paper-cutout`: Tactile puzzle feel
+- `corporate-memphis`: Business strategy pieces
diff --git a/skills/creative/baoyu-infographic/references/layouts/linear-progression.md b/skills/creative/baoyu-infographic/references/layouts/linear-progression.md
new file mode 100644
index 00000000000..427cb52a83e
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/linear-progression.md
@@ -0,0 +1,48 @@
+# linear-progression
+
+Sequential progression showing steps, timeline, or chronological events.
+
+## Structure
+
+- Linear arrangement (horizontal or vertical)
+- Nodes/markers at key points
+- Connecting line or path between nodes
+- Clear start and end points
+- Directional flow indicators
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Timeline** | Chronological events, dates | Time markers, period labels |
+| **Process** | Action steps, numbered sequence | Step numbers, action icons |
+
+## Best For
+
+- Step-by-step tutorials and how-tos
+- Historical timelines and evolution
+- Project milestones and roadmaps
+- Workflow documentation
+- Onboarding processes
+
+## Visual Elements
+
+- Numbered steps or date markers
+- Arrows or connectors showing direction
+- Icons representing each step/event
+- Consistent node spacing
+- Progress indicators optional
+
+## Text Placement
+
+- Title at top
+- Step/event titles at each node
+- Brief descriptions below nodes
+- Dates or numbers clearly visible
+
+## Recommended Pairings
+
+- `craft-handmade`: Friendly tutorials and timelines
+- `ikea-manual`: Clean assembly instructions
+- `corporate-memphis`: Business process flows
+- `aged-academia`: Historical discoveries
diff --git a/skills/creative/baoyu-infographic/references/layouts/periodic-table.md b/skills/creative/baoyu-infographic/references/layouts/periodic-table.md
new file mode 100644
index 00000000000..7287d111df0
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/periodic-table.md
@@ -0,0 +1,41 @@
+# periodic-table
+
+Grid of categorized elements with consistent cell formatting.
+
+## Structure
+
+- Rectangular grid
+- Each cell is one element
+- Color-coded categories
+- Consistent cell format
+- Optional grouping gaps
+
+## Best For
+
+- Categorized collections
+- Tool/resource catalogs
+- Skill matrices
+- Element collections
+- Reference guides
+
+## Visual Elements
+
+- Uniform cell sizes
+- Category colors
+- Symbol/abbreviation prominent
+- Small icon per cell
+- Category legend
+
+## Text Placement
+
+- Title at top
+- Cell: symbol, name, brief info
+- Category names in legend
+- Optional row/column headers
+- Footnotes for special cases
+
+## Recommended Pairings
+
+- `pop-art`: Vibrant element grids
+- `pixel-art`: Retro collection displays
+- `corporate-memphis`: Business tool catalogs
diff --git a/skills/creative/baoyu-infographic/references/layouts/story-mountain.md b/skills/creative/baoyu-infographic/references/layouts/story-mountain.md
new file mode 100644
index 00000000000..ac28521970f
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/story-mountain.md
@@ -0,0 +1,41 @@
+# story-mountain
+
+Plot structure visualization showing rising action, climax, and resolution.
+
+## Structure
+
+- Mountain/arc shape
+- Rising slope (build-up)
+- Peak (climax)
+- Falling slope (resolution)
+- Start and end at base level
+
+## Best For
+
+- Narrative structures
+- Project lifecycles
+- Tension/release patterns
+- Emotional journeys
+- Campaign arcs
+
+## Visual Elements
+
+- Mountain or arc curve
+- Points along the path
+- Climax visually emphasized
+- Slope steepness meaningful
+- Base camps or milestones
+
+## Text Placement
+
+- Title at top
+- Stage labels along path
+- Climax prominently labeled
+- Brief descriptions at points
+- Start/end clearly marked
+
+## Recommended Pairings
+
+- `storybook-watercolor`: Narrative journeys
+- `cartoon-hand-drawn`: Educational plot diagrams
+- `graphic-novel`: Dramatic story arcs
diff --git a/skills/creative/baoyu-infographic/references/layouts/structural-breakdown.md b/skills/creative/baoyu-infographic/references/layouts/structural-breakdown.md
new file mode 100644
index 00000000000..86f01a69024
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/structural-breakdown.md
@@ -0,0 +1,48 @@
+# structural-breakdown
+
+Internal structure visualization with labeled parts or layers.
+
+## Structure
+
+- Central subject (object, system, body)
+- Parts or layers clearly shown
+- Labels with callout lines
+- Exploded or cutaway view
+- Optional zoomed detail sections
+
+## Variants
+
+| Variant | View Type | Visual Emphasis |
+|---------|-----------|-----------------|
+| **Exploded** | Parts separated outward | Component relationships |
+| **Cross-section** | Sliced/cutaway view | Internal layers, composition |
+
+## Best For
+
+- Product part breakdowns
+- Anatomy explanations
+- System components
+- Device teardowns
+- Material composition
+
+## Visual Elements
+
+- Main subject clearly rendered
+- Callout lines with dots/arrows
+- Label boxes at endpoints
+- Numbered parts optionally
+- Layer boundaries or separation
+
+## Text Placement
+
+- Title at top
+- Part/layer labels at callouts
+- Brief descriptions in boxes
+- Legend for numbered systems
+- Depth/thickness if relevant
+
+## Recommended Pairings
+
+- `technical-schematic`: Technical schematics
+- `aged-academia`: Classic anatomical style
+- `craft-handmade`: Friendly breakdowns
diff --git a/skills/creative/baoyu-infographic/references/layouts/tree-branching.md b/skills/creative/baoyu-infographic/references/layouts/tree-branching.md
new file mode 100644
index 00000000000..520ef3bef78
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/tree-branching.md
@@ -0,0 +1,41 @@
+# tree-branching
+
+Hierarchical structure branching from root to leaves, showing categories and subcategories.
+
+## Structure
+
+- Root/trunk at top or left
+- Branches splitting into sub-branches
+- Leaves as terminal nodes
+- Clear parent-child relationships
+- Balanced or organic branching
+
+## Best For
+
+- Taxonomies and classifications
+- Decision trees
+- Organizational charts
+- File/folder structures
+- Family trees
+
+## Visual Elements
+
+- Connecting lines showing relationships
+- Nodes at branch points
+- Icons or labels at each node
+- Color coding by branch
+- Visual weight decreasing toward leaves
+
+## Text Placement
+
+- Title at top
+- Root concept prominently labeled
+- Branch and leaf labels
+- Optional descriptions at key nodes
+- Legend for categories
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly taxonomies
+- `da-vinci-notebook`: Scientific classifications
+- `origami`: Geometric tree structures
diff --git a/skills/creative/baoyu-infographic/references/layouts/venn-diagram.md b/skills/creative/baoyu-infographic/references/layouts/venn-diagram.md
new file mode 100644
index 00000000000..9b3e0a8a7c4
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/venn-diagram.md
@@ -0,0 +1,41 @@
+# venn-diagram
+
+Overlapping circles showing relationships, commonalities, and differences.
+
+## Structure
+
+- 2-3 overlapping circles
+- Each circle is a category/concept
+- Overlaps show shared elements
+- Center shows common to all
+- Unique areas for exclusives
+
+## Best For
+
+- Concept relationships
+- Skill overlaps
+- Market segments
+- Comparative analysis
+- Finding common ground
+
+## Visual Elements
+
+- Translucent circle fills
+- Clear overlap regions
+- Distinct colors per circle
+- Icons in regions
+- Boundary labels
+
+## Text Placement
+
+- Title at top
+- Circle labels outside or on edge
+- Items in appropriate regions
+- Overlap region labels
+- Legend if needed
+
+## Recommended Pairings
+
+- `cartoon-hand-drawn`: Friendly concept overlaps
+- `corporate-memphis`: Business segment analysis
+- `pop-art`: High-contrast comparisons
diff --git a/skills/creative/baoyu-infographic/references/layouts/winding-roadmap.md b/skills/creative/baoyu-infographic/references/layouts/winding-roadmap.md
new file mode 100644
index 00000000000..aad5543ad97
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/layouts/winding-roadmap.md
@@ -0,0 +1,41 @@
+# winding-roadmap
+
+Curved path showing journey with milestones and checkpoints.
+
+## Structure
+
+- S-curve or winding path
+- Milestones along the path
+- Start and destination points
+- Side elements (obstacles, helpers)
+- Progress indicators
+
+## Best For
+
+- Project roadmaps
+- Career paths
+- Customer journeys
+- Learning paths
+- Strategy timelines
+
+## Visual Elements
+
+- Curving road or river
+- Milestone markers/flags
+- Scene elements along path
+- Vehicle/character on journey
+- Destination landmark
+
+## Text Placement
+
+- Title at top
+- Milestone labels at each point
+- Path section names
+- Destination description
+- Optional timeline indicators
+
+## Recommended Pairings
+
+- `storybook-watercolor`: Whimsical journeys
+- `cartoon-hand-drawn`: Friendly roadmaps
+- `isometric-3d`: Technical project paths
diff --git a/skills/creative/baoyu-infographic/references/structured-content-template.md b/skills/creative/baoyu-infographic/references/structured-content-template.md
new file mode 100644
index 00000000000..32d527ff99e
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/structured-content-template.md
@@ -0,0 +1,244 @@
+# Structured Content Template
+
+Template for generating structured infographic content that informs the visual designer.
+
+## Purpose
+
+This document bridges content analysis and visual design:
+- Transforms source material into designer-ready format
+- Organizes learning objectives into visual sections
+- Preserves all source data verbatim
+- Separates content from design instructions
+
+## Instructional Design Process
+
+### Phase 1: High-Level Outline
+
+1. **Title**: Capture the essence in a compelling headline
+2. **Overview**: Brief description (1-2 sentences)
+3. **Learning Objectives**: List what the viewer will understand
+
+### Phase 2: Section Development
+
+For each learning objective:
+
+1. **Key Concept**: One-sentence summary of the section
+2. **Content**: Points extracted verbatim from source
+3. **Visual Element**: What should be shown visually
+4. **Text Labels**: Exact text for headlines, subheads, labels
+
+### Phase 3: Data Integrity Check
+
+Verify all source data is:
+- Copied exactly (no paraphrasing)
+- Attributed correctly (for quotes)
+- Formatted consistently
+
+## Critical Rules
+
+| Rule | Requirement | Example |
+|------|-------------|---------|
+| **Output format** | Markdown only | Use proper headers, lists, code blocks |
+| **Tone** | Expert trainer | Knowledgeable, clear, encouraging |
+| **No new information** | Only source content | Don't add examples not in source |
+| **Verbatim data** | Exact copies | "73% increase" not "significant increase" |
+
+## Structured Content Format
+
+```markdown
+# [Infographic Title]
+
+## Overview
+[Brief description of what this infographic conveys - 1-2 sentences]
+
+## Learning Objectives
+The viewer will understand:
+1. [Primary objective]
+2. [Secondary objective]
+3. [Tertiary objective if applicable]
+
+---
+
+## Section 1: [Section Title]
+
+**Key Concept**: [One-sentence summary of this section]
+
+**Content**:
+- [Point 1 - verbatim from source]
+- [Point 2 - verbatim from source]
+- [Point 3 - verbatim from source]
+
+**Visual Element**: [Description of what to show visually]
+- Type: [icon/chart/illustration/diagram/photo]
+- Subject: [what it depicts]
+- Treatment: [how it should be presented]
+
+**Text Labels**:
+- Headline: "[Exact text for headline]"
+- Subhead: "[Exact text for subhead]"
+- Labels: "[Label 1]", "[Label 2]", "[Label 3]"
+
+---
+
+## Section 2: [Section Title]
+
+**Key Concept**: [One-sentence summary]
+
+**Content**:
+- [Point 1]
+- [Point 2]
+
+**Visual Element**: [Description]
+
+**Text Labels**:
+- Headline: "[text]"
+- Labels: "[Label 1]", "[Label 2]"
+
+---
+
+[Continue for each section...]
+
+---
+
+## Data Points (Verbatim)
+
+All statistics, numbers, and quotes exactly as they appear in source:
+
+### Statistics
+- "[Exact statistic 1]"
+- "[Exact statistic 2]"
+- "[Exact statistic 3]"
+
+### Quotes
+- "[Exact quote]" — [Attribution]
+
+### Key Terms
+- **[Term 1]**: [Definition from source]
+- **[Term 2]**: [Definition from source]
+
+---
+
+## Design Instructions
+
+Extracted from user's steering prompt:
+
+### Style Preferences
+- [Any color preferences]
+- [Any mood/aesthetic preferences]
+- [Any artistic style preferences]
+
+### Layout Preferences
+- [Any structure preferences]
+- [Any organization preferences]
+
+### Other Requirements
+- [Any other visual requirements from user]
+- [Target platform if specified]
+- [Brand guidelines if any]
+```
+
+## Section Types by Content
+
+### For Process/Steps
+
+```markdown
+## Section N: Step N - [Step Title]
+
+**Key Concept**: [What this step accomplishes]
+
+**Content**:
+- Action: [What to do]
+- Details: [How to do it]
+- Note: [Important consideration]
+
+**Visual Element**:
+- Type: numbered step icon
+- Subject: [visual representing the action]
+- Arrow: leads to next step
+
+**Text Labels**:
+- Headline: "Step N: [Title]"
+- Action: "[Imperative verb + object]"
+```
+
+### For Comparison
+
+```markdown
+## Section N: [Item A] vs [Item B]
+
+**Key Concept**: [What distinguishes them]
+
+**Content**:
+| Aspect | [Item A] | [Item B] |
+|--------|----------|----------|
+| [Factor 1] | [Value] | [Value] |
+| [Factor 2] | [Value] | [Value] |
+
+**Visual Element**:
+- Type: split comparison
+- Left: [Item A representation]
+- Right: [Item B representation]
+
+**Text Labels**:
+- Headline: "[Item A] vs [Item B]"
+- Left label: "[Item A name]"
+- Right label: "[Item B name]"
+```
+
+### For Hierarchy
+
+```markdown
+## Section N: [Level Name]
+
+**Key Concept**: [What this level represents]
+
+**Content**:
+- Position: [Top/Middle/Bottom]
+- Priority: [Importance level]
+- Contains: [Elements at this level]
+
+**Visual Element**:
+- Type: layer/tier
+- Size: [relative to other levels]
+- Position: [where in hierarchy]
+
+**Text Labels**:
+- Level title: "[Name]"
+- Description: "[Brief description]"
+```
+
+### For Data/Statistics
+
+```markdown
+## Section N: [Metric Name]
+
+**Key Concept**: [What this data shows]
+
+**Content**:
+- Value: [Exact number/percentage]
+- Context: [What it means]
+- Comparison: [Benchmark if any]
+
+**Visual Element**:
+- Type: [chart/number highlight/gauge]
+- Emphasis: [how to draw attention]
+
+**Text Labels**:
+- Main number: "[Exact value]"
+- Label: "[Metric name]"
+- Context: "[Brief context]"
+```
+
+## Quality Checklist
+
+Before finalizing structured content:
+
+- [ ] Title captures the main message
+- [ ] Learning objectives are clear and measurable
+- [ ] Each section maps to an objective
+- [ ] All content is verbatim from source
+- [ ] Visual elements are clearly described
+- [ ] Text labels are specified exactly
+- [ ] Data points are collected and verified
+- [ ] Design instructions are separated
+- [ ] No new information has been added
diff --git a/skills/creative/baoyu-infographic/references/styles/aged-academia.md b/skills/creative/baoyu-infographic/references/styles/aged-academia.md
new file mode 100644
index 00000000000..d2094ccd45f
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/aged-academia.md
@@ -0,0 +1,36 @@
+# aged-academia
+
+Historical scientific illustration with aged paper aesthetic.
+
+## Color Palette
+
+- Primary: Sepia brown (#704214), aged ink, muted earth tones
+- Background: Parchment (#F4E4BC), yellowed paper texture
+- Accents: Faded red annotations, iron gall ink spots
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Notebook** | Personal sketches, inventions | Cursive notes, margin annotations |
+| **Specimen** | Scientific classification | Numbered diagrams, Latin labels |
+
+## Visual Elements
+
+- Aged paper texture overlay
+- Detailed cross-hatching and line work
+- Scientific illustration precision
+- Study notes and annotations
+- Specimen plate or sketch aesthetic
+- Numbered diagram elements
+
+## Typography
+
+- Handwritten cursive or serif fonts
+- Scientific annotations
+- Small caps for labels
+- Italics for scientific names
+
+## Best For
+
+Scientific education, biology topics, historical explanations, inventions, nature documentation
diff --git a/skills/creative/baoyu-infographic/references/styles/bold-graphic.md b/skills/creative/baoyu-infographic/references/styles/bold-graphic.md
new file mode 100644
index 00000000000..3f94bf600b6
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/bold-graphic.md
@@ -0,0 +1,36 @@
+# bold-graphic
+
+High-contrast comic style with bold outlines and dramatic visuals.
+
+## Color Palette
+
+- Primary: Bold primaries - red, yellow, blue, black
+- Background: White, halftone patterns, dramatic shadows
+- Accents: Spot colors, neon highlights
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Graphic-novel** | Dramatic narratives | Action lines, hatching, panels |
+| **Pop-art** | High-energy impact | Halftone dots, Warhol repetition |
+
+## Visual Elements
+
+- Bold black outlines
+- High contrast compositions
+- Halftone dot patterns
+- Comic panel borders optional
+- Action lines and motion
+- Speech bubbles and sound effects
+
+## Typography
+
+- Comic book lettering
+- Impact fonts for emphasis
+- POW/BANG effects for pop-art
+- Caption boxes for narrative
+
+## Best For
+
+Attention-grabbing content, dramatic narratives, pop culture, marketing, high-energy presentations
diff --git a/skills/creative/baoyu-infographic/references/styles/chalkboard.md b/skills/creative/baoyu-infographic/references/styles/chalkboard.md
new file mode 100644
index 00000000000..96b91b43f7c
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/chalkboard.md
@@ -0,0 +1,61 @@
+# chalkboard
+
+Black chalkboard background with colorful chalk drawing style
+
+## Design Aesthetic
+
+Classic classroom chalkboard aesthetic with hand-drawn chalk illustrations. Nostalgic educational feel with imperfect, sketchy lines that capture the warmth of traditional teaching. Colorful chalk creates visual hierarchy while maintaining the authentic chalkboard experience.
+
+## Background
+
+- Color: Chalkboard Black (#1A1A1A) or Dark Green-Black (#1C2B1C)
+- Texture: Realistic chalkboard texture with subtle scratches, dust particles, and faint eraser marks
+
+## Typography
+
+Hand-drawn chalk lettering style with visible chalk texture. Imperfect baseline adds authenticity. White or bright colored chalk for emphasis.
+
+## Color Palette
+
+| Role | Color | Hex | Usage |
+|------|-------|-----|-------|
+| Background | Chalkboard Black | #1A1A1A | Primary background |
+| Alt Background | Green-Black | #1C2B1C | Traditional green board |
+| Primary Text | Chalk White | #F5F5F5 | Main text, outlines |
+| Accent 1 | Chalk Yellow | #FFE566 | Highlights, emphasis |
+| Accent 2 | Chalk Pink | #FF9999 | Secondary highlights |
+| Accent 3 | Chalk Blue | #66B3FF | Diagrams, links |
+| Accent 4 | Chalk Green | #90EE90 | Success, nature |
+| Accent 5 | Chalk Orange | #FFB366 | Warnings, energy |
+
+## Visual Elements
+
+- Hand-drawn chalk illustrations with sketchy, imperfect lines
+- Chalk dust effects around text and key elements
+- Doodles: stars, arrows, underlines, circles, checkmarks
+- Mathematical formulas and simple diagrams
+- Eraser smudges and chalk residue textures
+- Wooden frame border optional
+- Stick figures and simple icons
+- Connection lines with hand-drawn feel
+
+## Style Rules
+
+### Do
+
+- Maintain authentic chalk texture on all elements
+- Use imperfect, hand-drawn quality throughout
+- Add subtle chalk dust and smudge effects
+- Create visual hierarchy with color variety
+- Include playful doodles and annotations
+
+### Don't
+
+- Use perfect geometric shapes
+- Create clean digital-looking lines
+- Add photorealistic elements
+- Use gradients or glossy effects
+
+## Best For
+
+Educational content, tutorials, classroom themes, teaching materials, workshops, informal learning sessions, knowledge sharing
diff --git a/skills/creative/baoyu-infographic/references/styles/claymation.md b/skills/creative/baoyu-infographic/references/styles/claymation.md
new file mode 100644
index 00000000000..d9c2b7e5f0a
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/claymation.md
@@ -0,0 +1,29 @@
+# claymation
+
+3D clay figure aesthetic with stop-motion charm
+
+## Color Palette
+
+- Primary: Saturated clay colors - bright but slightly muted
+- Background: Neutral studio backdrop, soft gradients
+- Accents: Complementary clay colors, shiny highlights
+
+## Visual Elements
+
+- Clay/plasticine texture on all objects
+- Fingerprint marks and imperfections
+- Rounded, sculpted forms
+- Soft shadows
+- Stop-motion staging
+- Miniature set aesthetic
+
+## Typography
+
+- Extruded clay letters
+- Dimensional, rounded text
+- Playful and chunky
+- Embedded in clay scenes
+
+## Best For
+
+Playful explanations, children's content, stop-motion narratives, friendly processes
diff --git a/skills/creative/baoyu-infographic/references/styles/corporate-memphis.md b/skills/creative/baoyu-infographic/references/styles/corporate-memphis.md
new file mode 100644
index 00000000000..1e7d5874852
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/corporate-memphis.md
@@ -0,0 +1,29 @@
+# corporate-memphis
+
+Flat vector people with vibrant geometric fills
+
+## Color Palette
+
+- Primary: Bright, saturated - purple, orange, teal, yellow
+- Background: White or light pastels
+- Accents: Gradient fills, geometric patterns
+
+## Visual Elements
+
+- Flat vector illustration
+- Disproportionate human figures
+- Abstract body shapes
+- Floating geometric elements
+- No outlines, solid fills
+- Plant and object accents
+
+## Typography
+
+- Clean sans-serif
+- Bold headings
+- Professional but friendly
+- Minimal decoration
+
+## Best For
+
+Business presentations, tech products, marketing materials, corporate training
diff --git a/skills/creative/baoyu-infographic/references/styles/craft-handmade.md b/skills/creative/baoyu-infographic/references/styles/craft-handmade.md
new file mode 100644
index 00000000000..86354111a57
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/craft-handmade.md
@@ -0,0 +1,44 @@
+# craft-handmade (DEFAULT)
+
+Hand-drawn and paper craft aesthetic with warm, organic feel.
+
+## Color Palette
+
+- Primary: Warm pastels, soft saturated colors, craft paper tones
+- Background: Light cream (#FFF8F0), textured paper (#F5F0E6)
+- Accents: Bold highlights, construction paper colors
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Hand-drawn** | Cartoon illustration | Simple icons, slightly imperfect lines |
+| **Paper-cutout** | Layered paper craft | Drop shadows, torn edges, texture |
+
+## Visual Elements
+
+- Hand-drawn or cut-paper quality
+- Organic, slightly imperfect shapes
+- Layered depth with shadows (paper variant)
+- Simple cartoon elements and icons
+- Character illustrations (people, personalities in cartoon form)
+- Ample whitespace, clean composition
+- Keywords and core concepts highlighted
+- **Strictly hand-drawn—no realistic or photographic elements**
+
+## Style Enforcement
+
+- All imagery must maintain cartoon/illustrated aesthetic
+- Replace real photos or realistic figures with hand-drawn equivalents
+- Maintain consistent line weight and illustration style throughout
+
+## Typography
+
+- Hand-drawn or casual font style
+- Clear, readable labels
+- Keywords emphasized with larger/bolder text
+- Cut-out letter style for paper variant
+
+## Best For
+
+Educational content, general explanations, friendly infographics, children's content, playful hierarchies
diff --git a/skills/creative/baoyu-infographic/references/styles/cyberpunk-neon.md b/skills/creative/baoyu-infographic/references/styles/cyberpunk-neon.md
new file mode 100644
index 00000000000..5a8681355ef
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/cyberpunk-neon.md
@@ -0,0 +1,29 @@
+# cyberpunk-neon
+
+Neon glow on dark backgrounds, futuristic aesthetic
+
+## Color Palette
+
+- Primary: Neon pink (#FF00FF), cyan (#00FFFF), electric blue
+- Background: Deep black (#0A0A0A), dark purple gradients
+- Accents: Neon glow effects, chrome reflections
+
+## Visual Elements
+
+- Glowing neon outlines
+- Dark atmospheric backgrounds
+- Digital glitch effects
+- Circuit patterns
+- Holographic elements
+- Rain and reflections
+
+## Typography
+
+- Glowing neon text
+- Digital/tech fonts
+- Flickering effects
+- Outlined glow letters
+
+## Best For
+
+Tech futures, gaming content, digital culture, futuristic concepts, night aesthetics
diff --git a/skills/creative/baoyu-infographic/references/styles/hand-drawn-edu.md b/skills/creative/baoyu-infographic/references/styles/hand-drawn-edu.md
new file mode 100644
index 00000000000..64cdf7eb37b
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/hand-drawn-edu.md
@@ -0,0 +1,63 @@
+# hand-drawn-edu
+
+Hand-drawn educational infographic with macaron pastel color blocks on warm cream paper texture.
+
+## Color Palette
+
+- Background: Warm cream (#F5F0E8) with subtle paper grain texture
+- Primary text: Deep charcoal (#2D2D2D) for headlines, outlines
+- Macaron Blue: #A8D8EA for cool-toned information zones
+- Macaron Mint: #B5E5CF for growth/positive zones
+- Macaron Lavender: #D5C6E0 for abstract/concept zones
+- Macaron Peach: #FFD5C2 for warm-toned zones
+- Accent: Coral Red (#E8655A) for key data, warnings, emphasis
+- Muted annotations: Warm gray (#6B6B6B) for secondary labels
+
+## Visual Elements
+
+- Macaron pastel rounded cards as distinct information zones
+- Hand-drawn wavy connection lines and arrows with small text labels
+- Simple stick-figure characters and cartoon icons to humanize concepts
+- Doodle decorations: small stars, underlines, spirals, sparkles
+- Color fills don't completely fill outlines — preserve casual hand-drawn feel
+- Dashed borders for secondary or contained zones
+- Small icon doodles (clipboard, lock, checkmark, lightbulb) to reinforce concepts
+- Bold centered quote or takeaway at the bottom
+- Slight hand-drawn wobble on all lines and shapes
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Sketch-notes** | Concept mapping | More stick figures, thought bubbles, connecting arrows |
+| **Pastel cards** | Structured info | Cleaner macaron blocks, less doodle, more white space |
+
+## Typography
+
+- Main title: Bold hand-drawn lettering with organic strokes, large confident letterforms with slight wobble
+- Section headers: Hand-lettered text on or inside macaron color blocks
+- Body text: Clear handwritten print style, legible but not mechanical
+- Annotations: Warm gray (#6B6B6B), smaller, neat handwritten labels
+- Keywords: Bold emphasis within body text
+
+## Style Enforcement
+
+- All lines must have slight hand-drawn wobble — no perfect geometry
+- Each information zone uses a distinct macaron color block
+- Maintain consistent wobble quality across all shapes and lines
+- Include at least one simple cartoon character or stick figure
+- Generous white space between zones — each zone should breathe
+- Maximum 4 macaron colors per infographic
+
+## Avoid
+
+- Perfect geometric shapes or straight lines
+- Photorealistic elements or stock illustration style
+- Pure white backgrounds
+- Flat vector icons or digital-precision graphics
+- Overcrowded layouts — let zones breathe
+- Corporate or clinical aesthetic
+
+## Best For
+
+Educational diagrams, process explainers, concept maps, knowledge summaries, tutorial walkthroughs, onboarding visuals
diff --git a/skills/creative/baoyu-infographic/references/styles/ikea-manual.md b/skills/creative/baoyu-infographic/references/styles/ikea-manual.md
new file mode 100644
index 00000000000..d859828d143
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/ikea-manual.md
@@ -0,0 +1,29 @@
+# ikea-manual
+
+Minimal line art assembly instruction style
+
+## Color Palette
+
+- Primary: Black lines, minimal fills
+- Background: White or cream paper
+- Accents: Red for warnings, blue for highlights
+
+## Visual Elements
+
+- Simple line drawings
+- Numbered step sequences
+- Arrow indicators
+- Exploded assembly views
+- Wordless communication
+- Stick figures for scale
+
+## Typography
+
+- Minimal text
+- Step numbers prominent
+- Universal symbols
+- Simple sans-serif when needed
+
+## Best For
+
+Step-by-step instructions, assembly guides, how-to content, universal communication
diff --git a/skills/creative/baoyu-infographic/references/styles/kawaii.md b/skills/creative/baoyu-infographic/references/styles/kawaii.md
new file mode 100644
index 00000000000..a7531a614b6
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/kawaii.md
@@ -0,0 +1,29 @@
+# kawaii
+
+Japanese cute style with big eyes and pastel colors
+
+## Color Palette
+
+- Primary: Soft pastels - pink (#FFB6C1), mint (#98D8C8), lavender (#E6E6FA)
+- Background: Light pink or cream, sparkle overlays
+- Accents: Bright pops, star and heart shapes
+
+## Visual Elements
+
+- Big sparkly eyes on characters
+- Rounded, soft shapes
+- Blushing cheeks
+- Sparkles and stars scattered
+- Cute animal characters
+- Chibi proportions
+
+## Typography
+
+- Rounded, bubbly fonts
+- Cute decorations on letters
+- Hearts and stars in text
+- Soft, friendly appearance
+
+## Best For
+
+Cute tutorials, children's education, lifestyle content, character-driven explanations
diff --git a/skills/creative/baoyu-infographic/references/styles/knolling.md b/skills/creative/baoyu-infographic/references/styles/knolling.md
new file mode 100644
index 00000000000..cd55c99377c
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/knolling.md
@@ -0,0 +1,29 @@
+# knolling
+
+Organized flat-lay with top-down arrangement
+
+## Color Palette
+
+- Primary: Object's natural colors
+- Background: Solid color - black, white, or colored surface
+- Accents: Shadows, subtle highlights
+
+## Visual Elements
+
+- Top-down camera angle
+- Objects arranged at 90° angles
+- Equal spacing between items
+- Clean organization
+- Symmetry and order
+- No overlapping items
+
+## Typography
+
+- Clean labels
+- Positioned outside objects
+- Connecting lines to items
+- Minimal, catalog-style
+
+## Best For
+
+Product collections, tool inventories, gear layouts, organized overviews
diff --git a/skills/creative/baoyu-infographic/references/styles/lego-brick.md b/skills/creative/baoyu-infographic/references/styles/lego-brick.md
new file mode 100644
index 00000000000..582cc654aad
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/lego-brick.md
@@ -0,0 +1,29 @@
+# lego-brick
+
+Toy brick construction with playful aesthetic
+
+## Color Palette
+
+- Primary: Classic LEGO colors - red, blue, yellow, green, white
+- Background: Light gray baseplate or white
+- Accents: Bright primary pops, shiny studs
+
+## Visual Elements
+
+- Visible brick studs
+- Modular construction
+- Minifigure characters
+- Building instruction style
+- Stackable elements
+- Plastic sheen
+
+## Typography
+
+- Blocky, bold fonts
+- LEGO instruction style
+- Step numbers
+- Playful appearance
+
+## Best For
+
+Building concepts, modular systems, playful education, children's content
diff --git a/skills/creative/baoyu-infographic/references/styles/morandi-journal.md b/skills/creative/baoyu-infographic/references/styles/morandi-journal.md
new file mode 100644
index 00000000000..951f725b102
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/morandi-journal.md
@@ -0,0 +1,60 @@
+# morandi-journal
+
+Hand-drawn doodle illustration with warm Morandi color tones and cozy bullet journal aesthetic.
+
+## Color Palette
+
+- Background: Warm cream/beige with subtle paper texture (#F5F0E6)
+- Primary: Muted teal/sage green (#7BA3A8) for headers and frames
+- Secondary: Warm terracotta/orange (#D4956A) for highlights and numbers
+- Line art: Dark charcoal brown (#4A4540)
+- Soft highlights: Pale yellow (#F5E6C8)
+
+## Visual Elements
+
+- Hand-drawn doodle illustrations with organic, slightly imperfect ink lines
+- Washi tape strip decorations (diagonal stripes pattern, beige and brown)
+- Rounded card containers for brand/option items
+- Hand-drawn rulers, scales, and progress bars with emoji quality indicators
+- Smiley/frowny faces as quality markers (😊✓ 😐 ☹️✗)
+- Dotted line frames around sections
+- Connecting arrows and dotted lines between modules
+- Corner decorations: tiny houses, stars, sparkles, clouds
+- Wavy line dividers between sections
+- Callout bubbles for tips
+- Magnifying glass icons for identification tips
+- Thumbs up/down icons (hand-drawn style)
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Cozy journal** | Maximum warmth | More washi tape, stickers, decorative doodles |
+| **Clean sketch** | Readability | Cleaner lines, less decoration, more structured |
+
+## Typography
+
+- Main title: Bold hand-lettered calligraphy style with decorative flourishes
+- Module headers: Clean handwritten text in white on dark teal rounded badge (#6B9080)
+- Body text: Neat handwritten print style, easy to read
+- Numbers: Highlighted in terracotta (#D4956A), slightly larger than body
+
+## Style Enforcement
+
+- All imagery must maintain hand-drawn/doodle aesthetic—no digital precision
+- Organic, slightly imperfect shapes throughout
+- Sketch-like quality with visible line weight variations
+- Warm and cozy journal feel, not clinical or corporate
+
+## Avoid
+
+- Flat vector icons or emoji
+- Clean geometric shapes
+- Stock illustration style
+- Strict grid layout
+- Pure white background
+- Digital/corporate look
+
+## Best For
+
+Product selection guides, lifestyle content, educational overviews, consumer-facing comparison content, Xiaohongshu-style posts
diff --git a/skills/creative/baoyu-infographic/references/styles/origami.md b/skills/creative/baoyu-infographic/references/styles/origami.md
new file mode 100644
index 00000000000..7a0bf5e2201
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/origami.md
@@ -0,0 +1,29 @@
+# origami
+
+Folded paper forms with geometric precision
+
+## Color Palette
+
+- Primary: Solid origami paper colors - red, blue, green, gold
+- Background: White or soft gray, subtle shadows
+- Accents: Paper fold highlights, crisp shadows
+
+## Visual Elements
+
+- Geometric folded shapes
+- Visible fold lines
+- Cast shadows showing depth
+- Paper texture
+- Angular, faceted forms
+- Low-poly aesthetic
+
+## Typography
+
+- Clean geometric fonts
+- Angular letterforms
+- Folded paper text effect
+- Minimal, precise labels
+
+## Best For
+
+Geometric concepts, transformation topics, Japanese themes, abstract representations
diff --git a/skills/creative/baoyu-infographic/references/styles/pixel-art.md b/skills/creative/baoyu-infographic/references/styles/pixel-art.md
new file mode 100644
index 00000000000..1fab4184876
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/pixel-art.md
@@ -0,0 +1,29 @@
+# pixel-art
+
+Retro 8-bit gaming aesthetic
+
+## Color Palette
+
+- Primary: Limited palette - NES/SNES colors
+- Background: Black or dark blue, scanlines optional
+- Accents: Bright pixel highlights, CRT glow
+
+## Visual Elements
+
+- Visible pixel grid
+- Limited color count per sprite
+- 8-bit or 16-bit style
+- Retro game UI elements
+- Pixel-perfect edges
+- Dithering for gradients
+
+## Typography
+
+- Pixel fonts
+- Blocky letterforms
+- Game UI style text
+- Score/stat display style
+
+## Best For
+
+Gaming topics, nostalgia content, developer audiences, retro tech themes
diff --git a/skills/creative/baoyu-infographic/references/styles/pop-laboratory.md b/skills/creative/baoyu-infographic/references/styles/pop-laboratory.md
new file mode 100644
index 00000000000..f53014dd96a
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/pop-laboratory.md
@@ -0,0 +1,48 @@
+# pop-laboratory
+
+Lab manual precision meets pop art color impact—coordinate systems, technical diagrams, and fluorescent accents on blueprint grid.
+
+## Color Palette
+
+- Background: Professional grayish-white with faint blueprint grid texture (#F2F2F2)
+- Primary: Muted teal/sage green (#B8D8BE) for major functional blocks and data zones
+- High-alert accent: Vibrant fluorescent pink (#E91E63) strictly for warnings, critical data, or "winner" highlights
+- Marker highlights: Vivid lemon yellow (#FFF200) as translucent highlighter effect for keywords
+- Line art: Ultra-fine charcoal brown (#2D2926) for technical grids, coordinates, and hairlines
+
+## Visual Elements
+
+- Coordinate-style labels on every module (e.g., R-20, G-02, SEC-08)
+- Technical diagrams: exploded views, cross-sections with anchor points, architectural skeletal lines
+- Vertical/horizontal rulers with precise markers (0.5mm, 1.8mm, 45°)
+- "Marker-over-print" effect: color blocks slightly offset from text, postmodern print feel
+- Cross-hair targets, mathematical symbols (Σ, Δ, ∞), directional arrows (X/Y axis)
+- Microscopic detail annotations alongside macroscopic bold headers
+- Corner metadata: tiny barcodes, timestamps, technical parameters
+- High contrast between massive bold headers and tiny 8pt-style annotations
+
+## Typography
+
+- Headers: Bold brutalist characters, high visual impact
+- Body: Professional sans-serif or crisp technical print
+- Numbers: Large, highlighted with yellow or blue to stand out
+- Annotations: Ultra-crisp, small technical labels
+
+## Style Enforcement
+
+- Strictly systematic color usage: only teal, pink, yellow, charcoal—no rainbow palette
+- Sufficient fine grid lines and coordinate annotations throughout
+- Maintain tension between large impactful headers and small precise parameters
+- Lab manual aesthetic: mix of microscopic details and macroscopic data
+
+## Avoid
+
+- Cute or cartoonish doodles
+- Soft pastels or generic textures
+- Empty white space
+- Flat vector stock icons
+- Organic or hand-drawn imperfections
+
+## Best For
+
+Technical product guides, specification comparisons, precision-focused data visualization, engineering-adjacent content
diff --git a/skills/creative/baoyu-infographic/references/styles/retro-pop-grid.md b/skills/creative/baoyu-infographic/references/styles/retro-pop-grid.md
new file mode 100644
index 00000000000..08c34d358bf
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/retro-pop-grid.md
@@ -0,0 +1,47 @@
+# retro-pop-grid
+
+1970s retro pop art with strict Swiss international grid, thick black outlines, and flat color blocks.
+
+## Color Palette
+
+- Background: Warm vintage cream/beige (#F5F0E6)
+- Flat accents: Salmon pink, sky blue, mustard yellow, mint green—all muted retro tones
+- Contrast blocks: Solid pure black (#000000) and solid pure white (#FFFFFF) used strategically for extreme contrast
+- Line art and outlines: Solid thick black
+
+## Visual Elements
+
+- Uniform thick black outlines on all illustrations, text boxes, and grid dividers
+- Pure 2D flat vector aesthetic with subtle screen print texture
+- Strict Swiss international grid: poster divided into square and rectangular cells by thick black lines
+- Black-background cells with white text for warnings or key categories (inverted contrast)
+- Geometric fill patterns in empty cells: checkerboards, diagonal lines, dots
+- Flat abstract symbols, warning signs, keyholes, stars, arrows
+- Vintage comic-style smiley/frowny faces for quality indicators
+- Colored cells used for breathing room—some with minimal/no content
+
+## Typography
+
+- Headers: Bold brutalist or retro thick display fonts, high legibility
+- Body: Clean sans-serif, structured typographic alignment
+- Decorative English text acceptable for stylistic labels ("WARNING", "INFO", "BEST")
+- All content text in specified language
+
+## Style Enforcement
+
+- Absolutely no gradients, shading, drop shadows, or 3D effects
+- Everything anchored in grid cells—no floating or unorganized elements
+- Maintain 1970s retro pop art and underground comic illustration feel
+- Visual density balanced with rhythmic grid—some cells intentionally sparse for contrast
+
+## Avoid
+
+- 3D rendering, realistic details, gradients, soft shadows
+- Soft, thin, or sketch-like pencil lines
+- Free-flowing, unorganized, or floating layouts (everything must be grid-anchored)
+- Pure white background canvas
+- Organic or hand-drawn imperfections
+
+## Best For
+
+Trendy product guides, design-conscious content, visually striking comparisons, content targeting design-savvy audiences, bold social media posts
diff --git a/skills/creative/baoyu-infographic/references/styles/storybook-watercolor.md b/skills/creative/baoyu-infographic/references/styles/storybook-watercolor.md
new file mode 100644
index 00000000000..01828f3baad
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/storybook-watercolor.md
@@ -0,0 +1,29 @@
+# storybook-watercolor
+
+Soft hand-painted illustration with whimsical charm
+
+## Color Palette
+
+- Primary: Soft watercolor washes - muted blues, greens, warm earth
+- Background: Watercolor paper texture, white or cream
+- Accents: Deeper pigment pools, splatter effects
+
+## Visual Elements
+
+- Visible brushstrokes
+- Soft color bleeds and gradients
+- White space as design element
+- Delicate line work over washes
+- Natural, organic shapes
+- Dreamy, atmospheric quality
+
+## Typography
+
+- Elegant hand-lettering
+- Watercolor-style text
+- Flowing, organic letterforms
+- Integrated with illustrations
+
+## Best For
+
+Storytelling, emotional journeys, nature topics, children's education, artistic presentations
diff --git a/skills/creative/baoyu-infographic/references/styles/subway-map.md b/skills/creative/baoyu-infographic/references/styles/subway-map.md
new file mode 100644
index 00000000000..de908576474
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/subway-map.md
@@ -0,0 +1,29 @@
+# subway-map
+
+Transit diagram style with colored lines and stations
+
+## Color Palette
+
+- Primary: Transit line colors - red, blue, green, yellow, orange
+- Background: White or light gray
+- Accents: Station dots, interchange markers
+
+## Visual Elements
+
+- Colored route lines
+- 45° and 90° angles only
+- Station circle markers
+- Interchange symbols
+- Simplified geography
+- Line thickness hierarchy
+
+## Typography
+
+- Clean sans-serif
+- Station name labels
+- Line number/name badges
+- Horizontal or angled text
+
+## Best For
+
+Journey maps, process flows, network diagrams, route explanations
diff --git a/skills/creative/baoyu-infographic/references/styles/technical-schematic.md b/skills/creative/baoyu-infographic/references/styles/technical-schematic.md
new file mode 100644
index 00000000000..5de34c576e9
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/technical-schematic.md
@@ -0,0 +1,36 @@
+# technical-schematic
+
+Technical diagrams with engineering precision and clean geometry.
+
+## Color Palette
+
+- Primary: Blues (#2563EB), teals, grays, white lines
+- Background: Deep blue (#1E3A5F), white, or light gray with grid
+- Accents: Amber highlights (#F59E0B), cyan callouts
+
+## Variants
+
+| Variant | Focus | Visual Emphasis |
+|---------|-------|-----------------|
+| **Blueprint** | Engineering schematics | White on blue, measurements, grid |
+| **Isometric** | 3D spatial representation | 30° angle blocks, clean fills |
+
+## Visual Elements
+
+- Geometric precision throughout
+- Grid pattern or isometric angle
+- Dimension lines and measurements
+- Technical symbols and annotations
+- Clean vector shapes
+- Consistent stroke weights
+
+## Typography
+
+- Technical stencil or clean sans-serif
+- All-caps labels
+- Measurement annotations
+- Floating labels for isometric
+
+## Best For
+
+Technical architecture, system diagrams, engineering specs, product breakdowns, data visualization
diff --git a/skills/creative/baoyu-infographic/references/styles/ui-wireframe.md b/skills/creative/baoyu-infographic/references/styles/ui-wireframe.md
new file mode 100644
index 00000000000..397a436ff75
--- /dev/null
+++ b/skills/creative/baoyu-infographic/references/styles/ui-wireframe.md
@@ -0,0 +1,29 @@
+# ui-wireframe
+
+Grayscale interface mockup style
+
+## Color Palette
+
+- Primary: Grays - light (#E5E5E5), medium (#9CA3AF), dark (#374151)
+- Background: White (#FFFFFF), light gray
+- Accents: Blue for interactive (#3B82F6), red for emphasis
+
+## Visual Elements
+
+- Wireframe boxes and placeholders
+- X marks for image placeholders
+- Simple line icons
+- Grid-based layout
+- Annotation callouts
+- Redline specifications
+
+## Typography
+
+- System fonts
+- Placeholder "Lorem ipsum"
+- UI label style
+- Sans-serif throughout
+
+## Best For
+
+Product designs, UI explanations, app concepts, user flow diagrams
diff --git a/skills/devops/webhook-subscriptions/SKILL.md b/skills/devops/webhook-subscriptions/SKILL.md
index e5ab6d5880d..dd20a19b415 100644
--- a/skills/devops/webhook-subscriptions/SKILL.md
+++ b/skills/devops/webhook-subscriptions/SKILL.md
@@ -1,10 +1,10 @@
 ---
 name: webhook-subscriptions
-description: Create and manage webhook subscriptions for event-driven agent activation. Use when the user wants external services to trigger agent runs automatically.
-version: 1.0.0
+description: Create and manage webhook subscriptions for event-driven agent activation, or for direct push notifications (zero LLM cost). Use when the user wants external services to trigger agent runs OR push notifications to chats.
+version: 1.1.0
 metadata:
   hermes:
-    tags: [webhook, events, automation, integrations]
+    tags: [webhook, events, automation, integrations, notifications, push]
 ---
 
 # Webhook Subscriptions
@@ -154,6 +154,29 @@ hermes webhook subscribe alerts \
   --deliver origin
 ```
 
+### Direct delivery (no agent, zero LLM cost)
+
+For use cases where you just want to push a notification through to a user's chat — no reasoning, no agent loop — add `--deliver-only`. The rendered `--prompt` template becomes the literal message body and is dispatched directly to the target adapter.
+
+Use this for:
+- External service push notifications (Supabase/Firebase webhooks → Telegram)
+- Monitoring alerts that should forward verbatim
+- Inter-agent pings where one agent is telling another agent's user something
+- Any webhook where an LLM round trip would be wasted effort
+
+```bash
+hermes webhook subscribe antenna-matches \
+  --deliver telegram \
+  --deliver-chat-id "123456789" \
+  --deliver-only \
+  --prompt "🎉 New match: {match.user_name} matched with you!" \
+  --description "Antenna match notifications"
+```
+
+The POST returns `200 OK` on successful delivery, `502` on target failure — so upstream services can retry intelligently. HMAC auth, rate limits, and idempotency still apply.
+
+Requires `--deliver` to be a real target (telegram, discord, slack, github_comment, etc.) — `--deliver log` is rejected because log-only direct delivery is pointless.
+
 ## Security
 
 - Each subscription gets an auto-generated HMAC-SHA256 secret (or provide your own with `--secret`)
diff --git a/skills/leisure/find-nearby/SKILL.md b/skills/leisure/find-nearby/SKILL.md
deleted file mode 100644
index f0ecdbf5314..00000000000
--- a/skills/leisure/find-nearby/SKILL.md
+++ /dev/null
@@ -1,69 +0,0 @@
----
-name: find-nearby
-description: Find nearby places (restaurants, cafes, bars, pharmacies, etc.) using OpenStreetMap. Works with coordinates, addresses, cities, zip codes, or Telegram location pins. No API keys needed.
-version: 1.0.0
-metadata:
-  hermes:
-    tags: [location, maps, nearby, places, restaurants, local]
-    related_skills: []
----
-
-# Find Nearby — Local Place Discovery
-
-Find restaurants, cafes, bars, pharmacies, and other places near any location. Uses OpenStreetMap (free, no API keys). Works with:
-
-- **Coordinates** from Telegram location pins (latitude/longitude in conversation)
-- **Addresses** ("near 123 Main St, Springfield")
-- **Cities** ("restaurants in downtown Austin")
-- **Zip codes** ("pharmacies near 90210")
-- **Landmarks** ("cafes near Times Square")
-
-## Quick Reference
-
-```bash
-# By coordinates (from Telegram location pin or user-provided)
-python3 SKILL_DIR/scripts/find_nearby.py --lat <LAT> --lon <LON> --type restaurant --radius 1500
-
-# By address, city, or landmark (auto-geocoded)
-python3 SKILL_DIR/scripts/find_nearby.py --near "Times Square, New York" --type cafe
-
-# Multiple place types
-python3 SKILL_DIR/scripts/find_nearby.py --near "downtown austin" --type restaurant --type bar --limit 10
-
-# JSON output
-python3 SKILL_DIR/scripts/find_nearby.py --near "90210" --type pharmacy --json
-```
-
-### Parameters
-
-| Flag | Description | Default |
-|------|-------------|---------|
-| `--lat`, `--lon` | Exact coordinates | — |
-| `--near` | Address, city, zip, or landmark (geocoded) | — |
-| `--type` | Place type (repeatable for multiple) | restaurant |
-| `--radius` | Search radius in meters | 1500 |
-| `--limit` | Max results | 15 |
-| `--json` | Machine-readable JSON output | off |
-
-### Common Place Types
-
-`restaurant`, `cafe`, `bar`, `pub`, `fast_food`, `pharmacy`, `hospital`, `bank`, `atm`, `fuel`, `parking`, `supermarket`, `convenience`, `hotel`
-
-## Workflow
-
-1. **Get the location.** Look for coordinates (`latitude: ... / longitude: ...`) from a Telegram pin, or ask the user for an address/city/zip.
-
-2. **Ask for preferences** (only if not already stated): place type, how far they're willing to go, any specifics (cuisine, "open now", etc.).
-
-3. **Run the script** with appropriate flags. Use `--json` if you need to process results programmatically.
-
-4. **Present results** with names, distances, and Google Maps links. If the user asked about hours or "open now," check the `hours` field in results — if missing or unclear, verify with `web_search`.
-
-5. **For directions**, use the `directions_url` from results, or construct: `https://www.google.com/maps/dir/?api=1&origin=<LAT>,<LON>&destination=<LAT>,<LON>`
-
-## Tips
-
-- If results are sparse, widen the radius (1500 → 3000m)
-- For "open now" requests: check the `hours` field in results, cross-reference with `web_search` for accuracy since OSM hours aren't always complete
-- Zip codes alone can be ambiguous globally — prompt the user for country/state if results look wrong
-- The script uses OpenStreetMap data which is community-maintained; coverage varies by region
diff --git a/skills/leisure/find-nearby/scripts/find_nearby.py b/skills/leisure/find-nearby/scripts/find_nearby.py
deleted file mode 100644
index 9d7fed78f46..00000000000
--- a/skills/leisure/find-nearby/scripts/find_nearby.py
+++ /dev/null
@@ -1,184 +0,0 @@
-#!/usr/bin/env python3
-"""Find nearby places using OpenStreetMap (Overpass + Nominatim). No API keys needed.
-
-Usage:
-    # By coordinates
-    python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --radius 1500
-
-    # By address/city/zip (auto-geocoded)
-    python find_nearby.py --near "Times Square, New York" --type cafe --radius 1000
-    python find_nearby.py --near "90210" --type pharmacy
-
-    # Multiple types
-    python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --type bar
-
-    # JSON output for programmatic use
-    python find_nearby.py --near "downtown las vegas" --type restaurant --json
-"""
-
-import argparse
-import json
-import math
-import sys
-import urllib.parse
-import urllib.request
-from typing import Any
-
-OVERPASS_URLS = [
-    "https://overpass-api.de/api/interpreter",
-    "https://overpass.kumi.systems/api/interpreter",
-]
-NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
-USER_AGENT = "HermesAgent/1.0 (find-nearby skill)"
-TIMEOUT = 15
-
-
-def _http_get(url: str) -> Any:
-    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
-    with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
-        return json.loads(r.read())
-
-
-def _http_post(url: str, data: str) -> Any:
-    req = urllib.request.Request(
-        url, data=data.encode(), headers={"User-Agent": USER_AGENT}
-    )
-    with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
-        return json.loads(r.read())
-
-
-def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
-    """Distance in meters between two coordinates."""
-    R = 6_371_000
-    rlat1, rlat2 = math.radians(lat1), math.radians(lat2)
-    dlat = math.radians(lat2 - lat1)
-    dlon = math.radians(lon2 - lon1)
-    a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2
-    return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
-
-
-def geocode(query: str) -> tuple[float, float]:
-    """Convert address/city/zip to coordinates via Nominatim."""
-    params = urllib.parse.urlencode({"q": query, "format": "json", "limit": 1})
-    results = _http_get(f"{NOMINATIM_URL}?{params}")
-    if not results:
-        print(f"Error: Could not geocode '{query}'. Try a more specific address.", file=sys.stderr)
-        sys.exit(1)
-    return float(results[0]["lat"]), float(results[0]["lon"])
-
-
-def find_nearby(lat: float, lon: float, types: list[str], radius: int = 1500, limit: int = 15) -> list[dict]:
-    """Query Overpass for nearby amenities."""
-    # Build Overpass QL query
-    type_filters = "".join(
-        f'nwr["amenity"="{t}"](around:{radius},{lat},{lon});' for t in types
-    )
-    query = f"[out:json][timeout:{TIMEOUT}];({type_filters});out center tags;"
-
-    # Try each Overpass server
-    data = None
-    for url in OVERPASS_URLS:
-        try:
-            data = _http_post(url, f"data={urllib.parse.quote(query)}")
-            break
-        except Exception:
-            continue
-
-    if not data:
-        return []
-
-    # Parse results
-    places = []
-    for el in data.get("elements", []):
-        tags = el.get("tags", {})
-        name = tags.get("name")
-        if not name:
-            continue
-
-        # Get coordinates (nodes have lat/lon directly, ways/relations use center)
-        plat = el.get("lat") or (el.get("center", {}) or {}).get("lat")
-        plon = el.get("lon") or (el.get("center", {}) or {}).get("lon")
-        if plat is None or plon is None:
-            continue
-
-        dist = haversine(lat, lon, plat, plon)
-
-        place = {
-            "name": name,
-            "type": tags.get("amenity", ""),
-            "distance_m": round(dist),
-            "lat": plat,
-            "lon": plon,
-            "maps_url": f"https://www.google.com/maps/search/?api=1&query={plat},{plon}",
-            "directions_url": f"https://www.google.com/maps/dir/?api=1&origin={lat},{lon}&destination={plat},{plon}",
-        }
-
-        # Add useful optional fields
-        if tags.get("cuisine"):
-            place["cuisine"] = tags["cuisine"]
-        if tags.get("opening_hours"):
-            place["hours"] = tags["opening_hours"]
-        if tags.get("phone"):
-            place["phone"] = tags["phone"]
-        if tags.get("website"):
-            place["website"] = tags["website"]
-        if tags.get("addr:street"):
-            addr_parts = [tags.get("addr:housenumber", ""), tags.get("addr:street", "")]
-            if tags.get("addr:city"):
-                addr_parts.append(tags["addr:city"])
-            place["address"] = " ".join(p for p in addr_parts if p)
-
-        places.append(place)
-
-    # Sort by distance, limit results
-    places.sort(key=lambda p: p["distance_m"])
-    return places[:limit]
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Find nearby places via OpenStreetMap")
-    parser.add_argument("--lat", type=float, help="Latitude")
-    parser.add_argument("--lon", type=float, help="Longitude")
-    parser.add_argument("--near", type=str, help="Address, city, or zip code (geocoded automatically)")
-    parser.add_argument("--type", action="append", dest="types", default=[], help="Place type (restaurant, cafe, bar, pharmacy, etc.)")
-    parser.add_argument("--radius", type=int, default=1500, help="Search radius in meters (default: 1500)")
-    parser.add_argument("--limit", type=int, default=15, help="Max results (default: 15)")
-    parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON")
-    args = parser.parse_args()
-
-    # Resolve coordinates
-    if args.near:
-        lat, lon = geocode(args.near)
-    elif args.lat is not None and args.lon is not None:
-        lat, lon = args.lat, args.lon
-    else:
-        print("Error: Provide --lat/--lon or --near", file=sys.stderr)
-        sys.exit(1)
-
-    if not args.types:
-        args.types = ["restaurant"]
-
-    places = find_nearby(lat, lon, args.types, args.radius, args.limit)
-
-    if args.json_output:
-        print(json.dumps({"origin": {"lat": lat, "lon": lon}, "results": places, "count": len(places)}, indent=2))
-    else:
-        if not places:
-            print(f"No {'/'.join(args.types)} found within {args.radius}m")
-            return
-        print(f"Found {len(places)} places within {args.radius}m:\n")
-        for i, p in enumerate(places, 1):
-            dist_str = f"{p['distance_m']}m" if p["distance_m"] < 1000 else f"{p['distance_m']/1000:.1f}km"
-            print(f"  {i}. {p['name']} ({p['type']}) — {dist_str}")
-            if p.get("cuisine"):
-                print(f"     Cuisine: {p['cuisine']}")
-            if p.get("hours"):
-                print(f"     Hours: {p['hours']}")
-            if p.get("address"):
-                print(f"     Address: {p['address']}")
-            print(f"     Map: {p['maps_url']}")
-            print()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/skills/mcp/DESCRIPTION.md b/skills/mcp/DESCRIPTION.md
index 627c20ea1b5..30a0660333b 100644
--- a/skills/mcp/DESCRIPTION.md
+++ b/skills/mcp/DESCRIPTION.md
@@ -1,3 +1,3 @@
 ---
-description: Skills for working with MCP (Model Context Protocol) servers, tools, and integrations. Includes the built-in native MCP client (configure servers in config.yaml for automatic tool discovery) and the mcporter CLI bridge for ad-hoc server interaction.
+description: Skills for working with MCP (Model Context Protocol) servers, tools, and integrations. Documents the built-in native MCP client — configure servers in config.yaml for automatic tool discovery.
 ---
diff --git a/skills/mlops/cloud/DESCRIPTION.md b/skills/mlops/cloud/DESCRIPTION.md
deleted file mode 100644
index 32675823e04..00000000000
--- a/skills/mlops/cloud/DESCRIPTION.md
+++ /dev/null
@@ -1,3 +0,0 @@
----
-description: GPU cloud providers and serverless compute platforms for ML workloads.
----
diff --git a/skills/mlops/inference/gguf/SKILL.md b/skills/mlops/inference/gguf/SKILL.md
deleted file mode 100644
index 21bb176c8f9..00000000000
--- a/skills/mlops/inference/gguf/SKILL.md
+++ /dev/null
@@ -1,430 +0,0 @@
----
-name: gguf-quantization
-description: GGUF format and llama.cpp quantization for efficient CPU/GPU inference. Use when deploying models on consumer hardware, Apple Silicon, or when needing flexible quantization from 2-8 bit without GPU requirements.
-version: 1.0.0
-author: Orchestra Research
-license: MIT
-dependencies: [llama-cpp-python>=0.2.0]
-metadata:
-  hermes:
-    tags: [GGUF, Quantization, llama.cpp, CPU Inference, Apple Silicon, Model Compression, Optimization]
-
----
-
-# GGUF - Quantization Format for llama.cpp
-
-The GGUF (GPT-Generated Unified Format) is the standard file format for llama.cpp, enabling efficient inference on CPUs, Apple Silicon, and GPUs with flexible quantization options.
-
-## When to use GGUF
-
-**Use GGUF when:**
-- Deploying on consumer hardware (laptops, desktops)
-- Running on Apple Silicon (M1/M2/M3) with Metal acceleration
-- Need CPU inference without GPU requirements
-- Want flexible quantization (Q2_K to Q8_0)
-- Using local AI tools (LM Studio, Ollama, text-generation-webui)
-
-**Key advantages:**
-- **Universal hardware**: CPU, Apple Silicon, NVIDIA, AMD support
-- **No Python runtime**: Pure C/C++ inference
-- **Flexible quantization**: 2-8 bit with various methods (K-quants)
-- **Ecosystem support**: LM Studio, Ollama, koboldcpp, and more
-- **imatrix**: Importance matrix for better low-bit quality
-
-**Use alternatives instead:**
-- **AWQ/GPTQ**: Maximum accuracy with calibration on NVIDIA GPUs
-- **HQQ**: Fast calibration-free quantization for HuggingFace
-- **bitsandbytes**: Simple integration with transformers library
-- **TensorRT-LLM**: Production NVIDIA deployment with maximum speed
-
-## Quick start
-
-### Installation
-
-```bash
-# Clone llama.cpp
-git clone https://github.com/ggml-org/llama.cpp
-cd llama.cpp
-
-# Build (CPU)
-make
-
-# Build with CUDA (NVIDIA)
-make GGML_CUDA=1
-
-# Build with Metal (Apple Silicon)
-make GGML_METAL=1
-
-# Install Python bindings (optional)
-pip install llama-cpp-python
-```
-
-### Convert model to GGUF
-
-```bash
-# Install requirements
-pip install -r requirements.txt
-
-# Convert HuggingFace model to GGUF (FP16)
-python convert_hf_to_gguf.py ./path/to/model --outfile model-f16.gguf
-
-# Or specify output type
-python convert_hf_to_gguf.py ./path/to/model \
-    --outfile model-f16.gguf \
-    --outtype f16
-```
-
-### Quantize model
-
-```bash
-# Basic quantization to Q4_K_M
-./llama-quantize model-f16.gguf model-q4_k_m.gguf Q4_K_M
-
-# Quantize with importance matrix (better quality)
-./llama-imatrix -m model-f16.gguf -f calibration.txt -o model.imatrix
-./llama-quantize --imatrix model.imatrix model-f16.gguf model-q4_k_m.gguf Q4_K_M
-```
-
-### Run inference
-
-```bash
-# CLI inference
-./llama-cli -m model-q4_k_m.gguf -p "Hello, how are you?"
-
-# Interactive mode
-./llama-cli -m model-q4_k_m.gguf --interactive
-
-# With GPU offload
-./llama-cli -m model-q4_k_m.gguf -ngl 35 -p "Hello!"
-```
-
-## Quantization types
-
-### K-quant methods (recommended)
-
-| Type | Bits | Size (7B) | Quality | Use Case |
-|------|------|-----------|---------|----------|
-| Q2_K | 2.5 | ~2.8 GB | Low | Extreme compression |
-| Q3_K_S | 3.0 | ~3.0 GB | Low-Med | Memory constrained |
-| Q3_K_M | 3.3 | ~3.3 GB | Medium | Balance |
-| Q4_K_S | 4.0 | ~3.8 GB | Med-High | Good balance |
-| Q4_K_M | 4.5 | ~4.1 GB | High | **Recommended default** |
-| Q5_K_S | 5.0 | ~4.6 GB | High | Quality focused |
-| Q5_K_M | 5.5 | ~4.8 GB | Very High | High quality |
-| Q6_K | 6.0 | ~5.5 GB | Excellent | Near-original |
-| Q8_0 | 8.0 | ~7.2 GB | Best | Maximum quality |
-
-### Legacy methods
-
-| Type | Description |
-|------|-------------|
-| Q4_0 | 4-bit, basic |
-| Q4_1 | 4-bit with delta |
-| Q5_0 | 5-bit, basic |
-| Q5_1 | 5-bit with delta |
-
-**Recommendation**: Use K-quant methods (Q4_K_M, Q5_K_M) for best quality/size ratio.
-
-## Conversion workflows
-
-### Workflow 1: HuggingFace to GGUF
-
-```bash
-# 1. Download model
-huggingface-cli download meta-llama/Llama-3.1-8B --local-dir ./llama-3.1-8b
-
-# 2. Convert to GGUF (FP16)
-python convert_hf_to_gguf.py ./llama-3.1-8b \
-    --outfile llama-3.1-8b-f16.gguf \
-    --outtype f16
-
-# 3. Quantize
-./llama-quantize llama-3.1-8b-f16.gguf llama-3.1-8b-q4_k_m.gguf Q4_K_M
-
-# 4. Test
-./llama-cli -m llama-3.1-8b-q4_k_m.gguf -p "Hello!" -n 50
-```
-
-### Workflow 2: With importance matrix (better quality)
-
-```bash
-# 1. Convert to GGUF
-python convert_hf_to_gguf.py ./model --outfile model-f16.gguf
-
-# 2. Create calibration text (diverse samples)
-cat > calibration.txt << 'EOF'
-The quick brown fox jumps over the lazy dog.
-Machine learning is a subset of artificial intelligence.
-Python is a popular programming language.
-# Add more diverse text samples...
-EOF
-
-# 3. Generate importance matrix
-./llama-imatrix -m model-f16.gguf \
-    -f calibration.txt \
-    --chunk 512 \
-    -o model.imatrix \
-    -ngl 35  # GPU layers if available
-
-# 4. Quantize with imatrix
-./llama-quantize --imatrix model.imatrix \
-    model-f16.gguf \
-    model-q4_k_m.gguf \
-    Q4_K_M
-```
-
-### Workflow 3: Multiple quantizations
-
-```bash
-#!/bin/bash
-MODEL="llama-3.1-8b-f16.gguf"
-IMATRIX="llama-3.1-8b.imatrix"
-
-# Generate imatrix once
-./llama-imatrix -m $MODEL -f wiki.txt -o $IMATRIX -ngl 35
-
-# Create multiple quantizations
-for QUANT in Q4_K_M Q5_K_M Q6_K Q8_0; do
-    OUTPUT="llama-3.1-8b-${QUANT,,}.gguf"
-    ./llama-quantize --imatrix $IMATRIX $MODEL $OUTPUT $QUANT
-    echo "Created: $OUTPUT ($(du -h $OUTPUT | cut -f1))"
-done
-```
-
-## Python usage
-
-### llama-cpp-python
-
-```python
-from llama_cpp import Llama
-
-# Load model
-llm = Llama(
-    model_path="./model-q4_k_m.gguf",
-    n_ctx=4096,          # Context window
-    n_gpu_layers=35,     # GPU offload (0 for CPU only)
-    n_threads=8          # CPU threads
-)
-
-# Generate
-output = llm(
-    "What is machine learning?",
-    max_tokens=256,
-    temperature=0.7,
-    stop=["</s>", "\n\n"]
-)
-print(output["choices"][0]["text"])
-```
-
-### Chat completion
-
-```python
-from llama_cpp import Llama
-
-llm = Llama(
-    model_path="./model-q4_k_m.gguf",
-    n_ctx=4096,
-    n_gpu_layers=35,
-    chat_format="llama-3"  # Or "chatml", "mistral", etc.
-)
-
-messages = [
-    {"role": "system", "content": "You are a helpful assistant."},
-    {"role": "user", "content": "What is Python?"}
-]
-
-response = llm.create_chat_completion(
-    messages=messages,
-    max_tokens=256,
-    temperature=0.7
-)
-print(response["choices"][0]["message"]["content"])
-```
-
-### Streaming
-
-```python
-from llama_cpp import Llama
-
-llm = Llama(model_path="./model-q4_k_m.gguf", n_gpu_layers=35)
-
-# Stream tokens
-for chunk in llm(
-    "Explain quantum computing:",
-    max_tokens=256,
-    stream=True
-):
-    print(chunk["choices"][0]["text"], end="", flush=True)
-```
-
-## Server mode
-
-### Start OpenAI-compatible server
-
-```bash
-# Start server
-./llama-server -m model-q4_k_m.gguf \
-    --host 0.0.0.0 \
-    --port 8080 \
-    -ngl 35 \
-    -c 4096
-
-# Or with Python bindings
-python -m llama_cpp.server \
-    --model model-q4_k_m.gguf \
-    --n_gpu_layers 35 \
-    --host 0.0.0.0 \
-    --port 8080
-```
-
-### Use with OpenAI client
-
-```python
-from openai import OpenAI
-
-client = OpenAI(
-    base_url="http://localhost:8080/v1",
-    api_key="not-needed"
-)
-
-response = client.chat.completions.create(
-    model="local-model",
-    messages=[{"role": "user", "content": "Hello!"}],
-    max_tokens=256
-)
-print(response.choices[0].message.content)
-```
-
-## Hardware optimization
-
-### Apple Silicon (Metal)
-
-```bash
-# Build with Metal
-make clean && make GGML_METAL=1
-
-# Run with Metal acceleration
-./llama-cli -m model.gguf -ngl 99 -p "Hello"
-
-# Python with Metal
-llm = Llama(
-    model_path="model.gguf",
-    n_gpu_layers=99,     # Offload all layers
-    n_threads=1          # Metal handles parallelism
-)
-```
-
-### NVIDIA CUDA
-
-```bash
-# Build with CUDA
-make clean && make GGML_CUDA=1
-
-# Run with CUDA
-./llama-cli -m model.gguf -ngl 35 -p "Hello"
-
-# Specify GPU
-CUDA_VISIBLE_DEVICES=0 ./llama-cli -m model.gguf -ngl 35
-```
-
-### CPU optimization
-
-```bash
-# Build with AVX2/AVX512
-make clean && make
-
-# Run with optimal threads
-./llama-cli -m model.gguf -t 8 -p "Hello"
-
-# Python CPU config
-llm = Llama(
-    model_path="model.gguf",
-    n_gpu_layers=0,      # CPU only
-    n_threads=8,         # Match physical cores
-    n_batch=512          # Batch size for prompt processing
-)
-```
-
-## Integration with tools
-
-### Ollama
-
-```bash
-# Create Modelfile
-cat > Modelfile << 'EOF'
-FROM ./model-q4_k_m.gguf
-TEMPLATE """{{ .System }}
-{{ .Prompt }}"""
-PARAMETER temperature 0.7
-PARAMETER num_ctx 4096
-EOF
-
-# Create Ollama model
-ollama create mymodel -f Modelfile
-
-# Run
-ollama run mymodel "Hello!"
-```
-
-### LM Studio
-
-1. Place GGUF file in `~/.cache/lm-studio/models/`
-2. Open LM Studio and select the model
-3. Configure context length and GPU offload
-4. Start inference
-
-### text-generation-webui
-
-```bash
-# Place in models folder
-cp model-q4_k_m.gguf text-generation-webui/models/
-
-# Start with llama.cpp loader
-python server.py --model model-q4_k_m.gguf --loader llama.cpp --n-gpu-layers 35
-```
-
-## Best practices
-
-1. **Use K-quants**: Q4_K_M offers best quality/size balance
-2. **Use imatrix**: Always use importance matrix for Q4 and below
-3. **GPU offload**: Offload as many layers as VRAM allows
-4. **Context length**: Start with 4096, increase if needed
-5. **Thread count**: Match physical CPU cores, not logical
-6. **Batch size**: Increase n_batch for faster prompt processing
-
-## Common issues
-
-**Model loads slowly:**
-```bash
-# Use mmap for faster loading
-./llama-cli -m model.gguf --mmap
-```
-
-**Out of memory:**
-```bash
-# Reduce GPU layers
-./llama-cli -m model.gguf -ngl 20  # Reduce from 35
-
-# Or use smaller quantization
-./llama-quantize model-f16.gguf model-q3_k_m.gguf Q3_K_M
-```
-
-**Poor quality at low bits:**
-```bash
-# Always use imatrix for Q4 and below
-./llama-imatrix -m model-f16.gguf -f calibration.txt -o model.imatrix
-./llama-quantize --imatrix model.imatrix model-f16.gguf model-q4_k_m.gguf Q4_K_M
-```
-
-## References
-
-- **[Advanced Usage](references/advanced-usage.md)** - Batching, speculative decoding, custom builds
-- **[Troubleshooting](references/troubleshooting.md)** - Common issues, debugging, benchmarks
-
-## Resources
-
-- **Repository**: https://github.com/ggml-org/llama.cpp
-- **Python Bindings**: https://github.com/abetlen/llama-cpp-python
-- **Pre-quantized Models**: https://huggingface.co/TheBloke
-- **GGUF Converter**: https://huggingface.co/spaces/ggml-org/gguf-my-repo
-- **License**: MIT
diff --git a/skills/mlops/inference/llama-cpp/SKILL.md b/skills/mlops/inference/llama-cpp/SKILL.md
index 57016c920df..33fc37adb18 100644
--- a/skills/mlops/inference/llama-cpp/SKILL.md
+++ b/skills/mlops/inference/llama-cpp/SKILL.md
@@ -1,138 +1,271 @@
 ---
 name: llama-cpp
-description: Runs LLM inference on CPU, Apple Silicon, and consumer GPUs without NVIDIA hardware. Use for edge deployment, M1/M2/M3 Macs, AMD/Intel GPUs, or when CUDA is unavailable. Supports GGUF quantization (1.5-8 bit) for reduced memory and 4-10× speedup vs PyTorch on CPU.
-version: 1.0.0
+description: Run LLM inference with llama.cpp on CPU, Apple Silicon, AMD/Intel GPUs, or NVIDIA — plus GGUF model conversion and quantization (2–8 bit with K-quants and imatrix). Covers CLI, Python bindings, OpenAI-compatible server, and Ollama/LM Studio integration. Use for edge deployment, M1/M2/M3/M4 Macs, CUDA-less environments, or flexible local quantization.
+version: 2.0.0
 author: Orchestra Research
 license: MIT
-dependencies: [llama-cpp-python]
+dependencies: [llama-cpp-python>=0.2.0]
 metadata:
   hermes:
-    tags: [Inference Serving, Llama.cpp, CPU Inference, Apple Silicon, Edge Deployment, GGUF, Quantization, Non-NVIDIA, AMD GPUs, Intel GPUs, Embedded]
-
+    tags: [llama.cpp, GGUF, Quantization, CPU Inference, Apple Silicon, Edge Deployment, Non-NVIDIA, AMD GPUs, Intel GPUs, Embedded, Model Compression]
 ---
 
-# llama.cpp
+# llama.cpp + GGUF
 
-Pure C/C++ LLM inference with minimal dependencies, optimized for CPUs and non-NVIDIA hardware.
+Pure C/C++ LLM inference with minimal dependencies, plus the GGUF (GPT-Generated Unified Format) standard used for quantized weights. One toolchain covers conversion, quantization, and serving.
 
-## When to use llama.cpp
+## When to use
 
-**Use llama.cpp when:**
-- Running on CPU-only machines
-- Deploying on Apple Silicon (M1/M2/M3/M4)
-- Using AMD or Intel GPUs (no CUDA)
-- Edge deployment (Raspberry Pi, embedded systems)
-- Need simple deployment without Docker/Python
+**Use llama.cpp + GGUF when:**
+- Running on CPU-only machines or Apple Silicon (M1/M2/M3/M4) with Metal acceleration
+- Using AMD (ROCm) or Intel GPUs where CUDA isn't available
+- Edge deployment (Raspberry Pi, embedded systems, consumer laptops)
+- Need flexible quantization (2–8 bit with K-quants)
+- Want local AI tools (LM Studio, Ollama, text-generation-webui, koboldcpp)
+- Want a single binary deploy without Docker/Python
 
-**Use TensorRT-LLM instead when:**
-- Have NVIDIA GPUs (A100/H100)
-- Need maximum throughput (100K+ tok/s)
-- Running in datacenter with CUDA
+**Key advantages:**
+- Universal hardware: CPU, Apple Silicon, NVIDIA, AMD, Intel
+- No Python runtime required (pure C/C++)
+- K-quants + imatrix for better low-bit quality
+- OpenAI-compatible server built in
+- Rich ecosystem (Ollama, LM Studio, llama-cpp-python)
 
-**Use vLLM instead when:**
-- Have NVIDIA GPUs
-- Need Python-first API
-- Want PagedAttention
+**Use alternatives instead:**
+- **vLLM** — NVIDIA GPUs, PagedAttention, Python-first, max throughput
+- **TensorRT-LLM** — Production NVIDIA (A100/H100), maximum speed
+- **AWQ/GPTQ** — Calibrated quantization for NVIDIA-only deployments
+- **bitsandbytes** — Simple HuggingFace transformers integration
+- **HQQ** — Fast calibration-free quantization
 
 ## Quick start
 
-### Installation
+### Install
 
 ```bash
-# macOS/Linux
+# macOS / Linux (simplest)
 brew install llama.cpp
 
 # Or build from source
-git clone https://github.com/ggerganov/llama.cpp
+git clone https://github.com/ggml-org/llama.cpp
 cd llama.cpp
-make
+make                        # CPU
+make GGML_METAL=1           # Apple Silicon
+make GGML_CUDA=1            # NVIDIA CUDA
+make LLAMA_HIP=1            # AMD ROCm
 
-# With Metal (Apple Silicon)
-make LLAMA_METAL=1
-
-# With CUDA (NVIDIA)
-make LLAMA_CUDA=1
-
-# With ROCm (AMD)
-make LLAMA_HIP=1
+# Python bindings (optional)
+pip install llama-cpp-python
+# With CUDA:   CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
+# With Metal:  CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
 ```
 
-### Download model
+### Download a pre-quantized GGUF
 
 ```bash
-# Download from HuggingFace (GGUF format)
+# TheBloke hosts most popular models pre-quantized
 huggingface-cli download \
     TheBloke/Llama-2-7B-Chat-GGUF \
     llama-2-7b-chat.Q4_K_M.gguf \
     --local-dir models/
+```
 
-# Or convert from HuggingFace
-python convert_hf_to_gguf.py models/llama-2-7b-chat/
+### Or convert a HuggingFace model to GGUF
+
+```bash
+# 1. Download HF model
+huggingface-cli download meta-llama/Llama-3.1-8B --local-dir ./llama-3.1-8b
+
+# 2. Convert to FP16 GGUF
+python convert_hf_to_gguf.py ./llama-3.1-8b \
+    --outfile llama-3.1-8b-f16.gguf \
+    --outtype f16
+
+# 3. Quantize to Q4_K_M
+./llama-quantize llama-3.1-8b-f16.gguf llama-3.1-8b-q4_k_m.gguf Q4_K_M
 ```
 
 ### Run inference
 
 ```bash
-# Simple chat
-./llama-cli \
-    -m models/llama-2-7b-chat.Q4_K_M.gguf \
-    -p "Explain quantum computing" \
-    -n 256  # Max tokens
+# One-shot prompt
+./llama-cli -m model.Q4_K_M.gguf -p "Explain quantum computing" -n 256
 
 # Interactive chat
-./llama-cli \
-    -m models/llama-2-7b-chat.Q4_K_M.gguf \
-    --interactive
+./llama-cli -m model.Q4_K_M.gguf --interactive
+
+# With GPU offload
+./llama-cli -m model.Q4_K_M.gguf -ngl 35 -p "Hello!"
 ```
 
-### Server mode
+### Serve an OpenAI-compatible API
 
 ```bash
-# Start OpenAI-compatible server
 ./llama-server \
-    -m models/llama-2-7b-chat.Q4_K_M.gguf \
+    -m model.Q4_K_M.gguf \
     --host 0.0.0.0 \
     --port 8080 \
-    -ngl 32  # Offload 32 layers to GPU
+    -ngl 35 \
+    -c 4096 \
+    --parallel 4 \
+    --cont-batching
+```
 
-# Client request
+```bash
 curl http://localhost:8080/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "llama-2-7b-chat",
+    "model": "local",
     "messages": [{"role": "user", "content": "Hello!"}],
     "temperature": 0.7,
     "max_tokens": 100
   }'
 ```
 
-## Quantization formats
+## Quantization formats (GGUF)
 
-### GGUF format overview
+### K-quant methods (recommended)
 
-| Format | Bits | Size (7B) | Speed | Quality | Use Case |
-|--------|------|-----------|-------|---------|----------|
-| **Q4_K_M** | 4.5 | 4.1 GB | Fast | Good | **Recommended default** |
-| Q4_K_S | 4.3 | 3.9 GB | Faster | Lower | Speed critical |
-| Q5_K_M | 5.5 | 4.8 GB | Medium | Better | Quality critical |
-| Q6_K | 6.5 | 5.5 GB | Slower | Best | Maximum quality |
-| Q8_0 | 8.0 | 7.0 GB | Slow | Excellent | Minimal degradation |
-| Q2_K | 2.5 | 2.7 GB | Fastest | Poor | Testing only |
+| Type | Bits | Size (7B) | Quality | Use Case |
+|------|------|-----------|---------|----------|
+| Q2_K | 2.5 | ~2.8 GB | Low | Extreme compression (testing only) |
+| Q3_K_S | 3.0 | ~3.0 GB | Low-Med | Memory constrained |
+| Q3_K_M | 3.3 | ~3.3 GB | Medium | Fits small devices |
+| Q4_K_S | 4.0 | ~3.8 GB | Med-High | Speed critical |
+| **Q4_K_M** | 4.5 | ~4.1 GB | High | **Recommended default** |
+| Q5_K_S | 5.0 | ~4.6 GB | High | Quality focused |
+| Q5_K_M | 5.5 | ~4.8 GB | Very High | High quality |
+| Q6_K | 6.0 | ~5.5 GB | Excellent | Near-original |
+| Q8_0 | 8.0 | ~7.2 GB | Best | Maximum quality, minimal degradation |
 
-### Choosing quantization
+**Variant suffixes** — `_S` (Small, faster, lower quality), `_M` (Medium, balanced), `_L` (Large, better quality).
+
+**Legacy (Q4_0/Q4_1/Q5_0/Q5_1) exist** but always prefer K-quants for better quality/size ratio.
+
+**IQ quantization** — ultra-low-bit with importance-aware methods: IQ2_XXS, IQ2_XS, IQ2_S, IQ3_XXS, IQ3_XS, IQ3_S, IQ4_XS. Require `--imatrix`.
+
+**Task-specific defaults:**
+- General chat / assistants: Q4_K_M, or Q5_K_M if RAM allows
+- Code generation: Q5_K_M or Q6_K (higher precision helps)
+- Technical / medical: Q6_K or Q8_0
+- Very large (70B, 405B) on consumer hardware: Q3_K_M or Q4_K_S
+- Raspberry Pi / edge: Q2_K or Q3_K_S
+
+## Conversion workflows
+
+### Basic: HF → GGUF → quantized
 
 ```bash
-# General use (balanced)
-Q4_K_M  # 4-bit, medium quality
+python convert_hf_to_gguf.py ./model --outfile model-f16.gguf --outtype f16
+./llama-quantize model-f16.gguf model-q4_k_m.gguf Q4_K_M
+./llama-cli -m model-q4_k_m.gguf -p "Hello!" -n 50
+```
 
-# Maximum speed (more degradation)
-Q2_K or Q3_K_M
+### With importance matrix (imatrix) — better low-bit quality
 
-# Maximum quality (slower)
-Q6_K or Q8_0
+`imatrix` gives 10–20% perplexity improvement at Q4, essential at Q3 and below.
 
-# Very large models (70B, 405B)
-Q3_K_M or Q4_K_S  # Lower bits to fit in memory
+```bash
+# 1. Convert to FP16 GGUF
+python convert_hf_to_gguf.py ./model --outfile model-f16.gguf
+
+# 2. Prepare calibration data (diverse text, ~100MB is ideal)
+cat > calibration.txt << 'EOF'
+The quick brown fox jumps over the lazy dog.
+Machine learning is a subset of artificial intelligence.
+# Add more diverse text samples...
+EOF
+
+# 3. Generate importance matrix
+./llama-imatrix -m model-f16.gguf \
+    -f calibration.txt \
+    --chunk 512 \
+    -o model.imatrix \
+    -ngl 35
+
+# 4. Quantize with imatrix
+./llama-quantize --imatrix model.imatrix \
+    model-f16.gguf model-q4_k_m.gguf Q4_K_M
+```
+
+### Multi-quant batch
+
+```bash
+#!/bin/bash
+MODEL="llama-3.1-8b-f16.gguf"
+IMATRIX="llama-3.1-8b.imatrix"
+
+./llama-imatrix -m $MODEL -f wiki.txt -o $IMATRIX -ngl 35
+
+for QUANT in Q4_K_M Q5_K_M Q6_K Q8_0; do
+    OUTPUT="llama-3.1-8b-${QUANT,,}.gguf"
+    ./llama-quantize --imatrix $IMATRIX $MODEL $OUTPUT $QUANT
+    echo "Created: $OUTPUT ($(du -h $OUTPUT | cut -f1))"
+done
+```
+
+### Quality testing (perplexity)
+
+```bash
+./llama-perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw -c 512
+# Baseline FP16: ~5.96  |  Q4_K_M: ~6.06 (+1.7%)  |  Q2_K: ~6.87 (+15.3%)
+```
+
+## Python bindings (llama-cpp-python)
+
+### Basic generation
+
+```python
+from llama_cpp import Llama
+
+llm = Llama(
+    model_path="./model-q4_k_m.gguf",
+    n_ctx=4096,
+    n_gpu_layers=35,     # 0 for CPU only, 99 to offload everything
+    n_threads=8,
+)
+
+output = llm(
+    "What is machine learning?",
+    max_tokens=256,
+    temperature=0.7,
+    stop=["</s>", "\n\n"],
+)
+print(output["choices"][0]["text"])
+```
+
+### Chat completion + streaming
+
+```python
+llm = Llama(
+    model_path="./model-q4_k_m.gguf",
+    n_ctx=4096,
+    n_gpu_layers=35,
+    chat_format="llama-3",    # Or "chatml", "mistral", etc.
+)
+
+# Non-streaming
+response = llm.create_chat_completion(
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "What is Python?"},
+    ],
+    max_tokens=256,
+    temperature=0.7,
+)
+print(response["choices"][0]["message"]["content"])
+
+# Streaming
+for chunk in llm("Explain quantum computing:", max_tokens=256, stream=True):
+    print(chunk["choices"][0]["text"], end="", flush=True)
+```
+
+### Embeddings
+
+```python
+llm = Llama(model_path="./model-q4_k_m.gguf", embedding=True, n_gpu_layers=35)
+vec = llm.embed("This is a test sentence.")
+print(f"Embedding dimension: {len(vec)}")
 ```
 
 ## Hardware acceleration
@@ -140,122 +273,166 @@ Q3_K_M or Q4_K_S  # Lower bits to fit in memory
 ### Apple Silicon (Metal)
 
 ```bash
-# Build with Metal
-make LLAMA_METAL=1
-
-# Run with GPU acceleration (automatic)
-./llama-cli -m model.gguf -ngl 999  # Offload all layers
-
-# Performance: M3 Max 40-60 tokens/sec (Llama 2-7B Q4_K_M)
+make clean && make GGML_METAL=1
+./llama-cli -m model.gguf -ngl 99 -p "Hello"   # offload all layers
 ```
 
-### NVIDIA GPUs (CUDA)
-
-```bash
-# Build with CUDA
-make LLAMA_CUDA=1
-
-# Offload layers to GPU
-./llama-cli -m model.gguf -ngl 35  # Offload 35/40 layers
-
-# Hybrid CPU+GPU for large models
-./llama-cli -m llama-70b.Q4_K_M.gguf -ngl 20  # GPU: 20 layers, CPU: rest
+```python
+llm = Llama(
+    model_path="model.gguf",
+    n_gpu_layers=99,     # Offload everything
+    n_threads=1,         # Metal handles parallelism
+)
 ```
 
-### AMD GPUs (ROCm)
+Performance: M3 Max ~40–60 tok/s on Llama 2-7B Q4_K_M.
+
+### NVIDIA (CUDA)
+
+```bash
+make clean && make GGML_CUDA=1
+./llama-cli -m model.gguf -ngl 35 -p "Hello"
+
+# Hybrid for large models
+./llama-cli -m llama-70b.Q4_K_M.gguf -ngl 20   # GPU: 20 layers, CPU: rest
+
+# Multi-GPU split
+./llama-cli -m large-model.gguf --tensor-split 0.5,0.5 -ngl 60
+```
+
+### AMD (ROCm)
 
 ```bash
-# Build with ROCm
 make LLAMA_HIP=1
-
-# Run with AMD GPU
 ./llama-cli -m model.gguf -ngl 999
 ```
 
-## Common patterns
-
-### Batch processing
+### CPU
 
 ```bash
-# Process multiple prompts from file
-cat prompts.txt | ./llama-cli \
-    -m model.gguf \
-    --batch-size 512 \
-    -n 100
+# Match PHYSICAL cores, not logical
+./llama-cli -m model.gguf -t 8 -p "Hello"
+
+# BLAS acceleration (2–3× speedup)
+make LLAMA_OPENBLAS=1
 ```
 
-### Constrained generation
-
-```bash
-# JSON output with grammar
-./llama-cli \
-    -m model.gguf \
-    -p "Generate a person: " \
-    --grammar-file grammars/json.gbnf
-
-# Outputs valid JSON only
-```
-
-### Context size
-
-```bash
-# Increase context (default 512)
-./llama-cli \
-    -m model.gguf \
-    -c 4096  # 4K context window
-
-# Very long context (if model supports)
-./llama-cli -m model.gguf -c 32768  # 32K context
+```python
+llm = Llama(
+    model_path="model.gguf",
+    n_gpu_layers=0,
+    n_threads=8,
+    n_batch=512,         # Larger batch = faster prompt processing
+)
 ```
 
 ## Performance benchmarks
 
-### CPU performance (Llama 2-7B Q4_K_M)
+### CPU (Llama 2-7B Q4_K_M)
 
-| CPU | Threads | Speed | Cost |
-|-----|---------|-------|------|
-| Apple M3 Max | 16 | 50 tok/s | $0 (local) |
-| AMD Ryzen 9 7950X | 32 | 35 tok/s | $0.50/hour |
-| Intel i9-13900K | 32 | 30 tok/s | $0.40/hour |
-| AWS c7i.16xlarge | 64 | 40 tok/s | $2.88/hour |
+| CPU | Threads | Speed |
+|-----|---------|-------|
+| Apple M3 Max (Metal) | 16 | 50 tok/s |
+| AMD Ryzen 9 7950X | 32 | 35 tok/s |
+| Intel i9-13900K | 32 | 30 tok/s |
 
-### GPU acceleration (Llama 2-7B Q4_K_M)
+### GPU offloading on RTX 4090
 
-| GPU | Speed | vs CPU | Cost |
-|-----|-------|--------|------|
-| NVIDIA RTX 4090 | 120 tok/s | 3-4× | $0 (local) |
-| NVIDIA A10 | 80 tok/s | 2-3× | $1.00/hour |
-| AMD MI250 | 70 tok/s | 2× | $2.00/hour |
-| Apple M3 Max (Metal) | 50 tok/s | ~Same | $0 (local) |
+| Layers GPU | Speed | VRAM |
+|------------|-------|------|
+| 0 (CPU only) | 30 tok/s | 0 GB |
+| 20 (hybrid) | 80 tok/s | 8 GB |
+| 35 (all) | 120 tok/s | 12 GB |
 
 ## Supported models
 
-**LLaMA family**:
-- Llama 2 (7B, 13B, 70B)
-- Llama 3 (8B, 70B, 405B)
-- Code Llama
+- **LLaMA family**: Llama 2 (7B/13B/70B), Llama 3 (8B/70B/405B), Code Llama
+- **Mistral family**: Mistral 7B, Mixtral 8x7B/8x22B
+- **Other**: Falcon, BLOOM, GPT-J, Phi-3, Gemma, Qwen, LLaVA (vision), Whisper (audio)
 
-**Mistral family**:
-- Mistral 7B
-- Mixtral 8x7B, 8x22B
+Find GGUF models: https://huggingface.co/models?library=gguf
 
-**Other**:
-- Falcon, BLOOM, GPT-J
-- Phi-3, Gemma, Qwen
-- LLaVA (vision), Whisper (audio)
+## Ecosystem integrations
 
-**Find models**: https://huggingface.co/models?library=gguf
+### Ollama
+
+```bash
+cat > Modelfile << 'EOF'
+FROM ./model-q4_k_m.gguf
+TEMPLATE """{{ .System }}
+{{ .Prompt }}"""
+PARAMETER temperature 0.7
+PARAMETER num_ctx 4096
+EOF
+
+ollama create mymodel -f Modelfile
+ollama run mymodel "Hello!"
+```
+
+### LM Studio
+
+1. Place GGUF file in `~/.cache/lm-studio/models/`
+2. Open LM Studio and select the model
+3. Configure context length and GPU offload, start inference
+
+### text-generation-webui
+
+```bash
+cp model-q4_k_m.gguf text-generation-webui/models/
+python server.py --model model-q4_k_m.gguf --loader llama.cpp --n-gpu-layers 35
+```
+
+### OpenAI client → llama-server
+
+```python
+from openai import OpenAI
+
+client = OpenAI(base_url="http://localhost:8080/v1", api_key="not-needed")
+response = client.chat.completions.create(
+    model="local-model",
+    messages=[{"role": "user", "content": "Hello!"}],
+    max_tokens=256,
+)
+print(response.choices[0].message.content)
+```
+
+## Best practices
+
+1. **Use K-quants** — Q4_K_M is the recommended default
+2. **Use imatrix** for Q4 and below (calibration improves quality substantially)
+3. **Offload as many layers as VRAM allows** — start high, reduce by 5 on OOM
+4. **Thread count** — match physical cores, not logical
+5. **Batch size** — increase `n_batch` (e.g. 512) for faster prompt processing
+6. **Context** — start at 4096, grow only as needed (memory scales with ctx)
+7. **Flash Attention** — add `--flash-attn` if your build supports it
+
+## Common issues (quick fixes)
+
+**Model loads slowly** — use `--mmap` for memory-mapped loading.
+
+**Out of memory (GPU)** — reduce `-ngl`, use a smaller quant (Q4_K_S / Q3_K_M), or quantize the KV cache:
+```python
+Llama(model_path="...", type_k=2, type_v=2, n_gpu_layers=35)  # Q4_0 KV cache
+```
+
+**Garbage output** — wrong `chat_format`, temperature too high, or model file corrupted. Test with `temperature=0.1` and verify FP16 baseline works.
+
+**Connection refused (server)** — bind to `--host 0.0.0.0`, check `lsof -i :8080`.
+
+See `references/troubleshooting.md` for the full playbook.
 
 ## References
 
-- **[Quantization Guide](references/quantization.md)** - GGUF formats, conversion, quality comparison
-- **[Server Deployment](references/server.md)** - API endpoints, Docker, monitoring
-- **[Optimization](references/optimization.md)** - Performance tuning, hybrid CPU+GPU
+- **[advanced-usage.md](references/advanced-usage.md)** — speculative decoding, batched inference, grammar-constrained generation, LoRA, multi-GPU, custom builds, benchmark scripts
+- **[quantization.md](references/quantization.md)** — perplexity tables, use-case guide, model size scaling (7B/13B/70B RAM needs), imatrix deep dive
+- **[server.md](references/server.md)** — OpenAI API endpoints, Docker deployment, NGINX load balancing, monitoring
+- **[optimization.md](references/optimization.md)** — CPU threading, BLAS, GPU offload heuristics, batch tuning, benchmarks
+- **[troubleshooting.md](references/troubleshooting.md)** — install/convert/quantize/inference/server issues, Apple Silicon, debugging
 
 ## Resources
 
-- **GitHub**: https://github.com/ggerganov/llama.cpp
-- **Models**: https://huggingface.co/models?library=gguf
-- **Discord**: https://discord.gg/llama-cpp
-
-
+- **GitHub**: https://github.com/ggml-org/llama.cpp
+- **Python bindings**: https://github.com/abetlen/llama-cpp-python
+- **Pre-quantized models**: https://huggingface.co/TheBloke
+- **GGUF converter Space**: https://huggingface.co/spaces/ggml-org/gguf-my-repo
+- **License**: MIT
diff --git a/skills/mlops/inference/gguf/references/advanced-usage.md b/skills/mlops/inference/llama-cpp/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/inference/gguf/references/advanced-usage.md
rename to skills/mlops/inference/llama-cpp/references/advanced-usage.md
diff --git a/skills/mlops/inference/gguf/references/troubleshooting.md b/skills/mlops/inference/llama-cpp/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/inference/gguf/references/troubleshooting.md
rename to skills/mlops/inference/llama-cpp/references/troubleshooting.md
diff --git a/skills/mlops/models/DESCRIPTION.md b/skills/mlops/models/DESCRIPTION.md
index 8170b517f51..8f7e669562c 100644
--- a/skills/mlops/models/DESCRIPTION.md
+++ b/skills/mlops/models/DESCRIPTION.md
@@ -1,3 +1,3 @@
 ---
-description: Specific model architectures and tools — computer vision (CLIP, SAM, Stable Diffusion), speech (Whisper), audio generation (AudioCraft), and multimodal models (LLaVA).
+description: Specific model architectures and tools — image segmentation (Segment Anything / SAM) and audio generation (AudioCraft / MusicGen). Additional model skills (CLIP, Stable Diffusion, Whisper, LLaVA) are available as optional skills.
 ---
diff --git a/skills/mlops/training/grpo-rl-training/README.md b/skills/mlops/training/grpo-rl-training/README.md
deleted file mode 100644
index 99b60d66438..00000000000
--- a/skills/mlops/training/grpo-rl-training/README.md
+++ /dev/null
@@ -1,97 +0,0 @@
-# GRPO/RL Training Skill
-
-**Expert-level guidance for Group Relative Policy Optimization with TRL**
-
-## 📁 Skill Structure
-
-```
-grpo-rl-training/
-├── SKILL.md                              # Main skill documentation (READ THIS FIRST)
-├── README.md                             # This file
-├── templates/
-│   └── basic_grpo_training.py            # Production-ready training template
-└── examples/
-    └── reward_functions_library.py       # 20+ reward function examples
-```
-
-## 🚀 Quick Start
-
-1. **Read SKILL.md** - Comprehensive guide with all concepts and patterns
-2. **Copy `templates/basic_grpo_training.py`** - Start with working code
-3. **Browse `examples/reward_functions_library.py`** - Pick reward functions for your task
-4. **Modify for your use case** - Adapt dataset, rewards, and config
-
-## 💡 What's Inside
-
-### SKILL.md (Main Documentation)
-- Core GRPO concepts and algorithm fundamentals
-- Complete implementation workflow (dataset → rewards → training → deployment)
-- 10+ reward function examples with code
-- Hyperparameter tuning guide
-- Training insights (loss behavior, metrics, debugging)
-- Troubleshooting guide
-- Production best practices
-
-### Templates
-- **basic_grpo_training.py**: Minimal, production-ready training script
-  - Uses Qwen 2.5 1.5B Instruct
-  - 3 reward functions (format + correctness)
-  - LoRA for efficient training
-  - Fully documented and ready to run
-
-### Examples
-- **reward_functions_library.py**: 20+ battle-tested reward functions
-  - Correctness rewards (exact match, fuzzy match, numeric, code execution)
-  - Format rewards (XML, JSON, strict/soft)
-  - Length rewards (ideal length, min/max)
-  - Style rewards (reasoning quality, citations, repetition penalty)
-  - Combined rewards (multi-objective optimization)
-  - Preset collections for common tasks
-
-## 📖 Usage for Agents
-
-When this skill is loaded in your agent's context:
-
-1. **Always read SKILL.md first** before implementing
-2. **Start simple** - Use length-based reward to validate setup
-3. **Build incrementally** - Add one reward function at a time
-4. **Reference examples** - Copy patterns from reward_functions_library.py
-5. **Monitor training** - Watch reward metrics (not loss!)
-
-## 🎯 Common Use Cases
-
-| Task Type | Recommended Rewards | Template |
-|-----------|---------------------|----------|
-| Math reasoning | `MATH_REASONING_REWARDS` preset | basic_grpo_training.py |
-| Code generation | `CODE_GENERATION_REWARDS` preset | Modify dataset in template |
-| Summarization | `SUMMARIZATION_REWARDS` preset | Adjust prompts + rewards |
-| Q&A | `QA_REWARDS` preset | Use fuzzy match + citations |
-
-## ⚠️ Critical Reminders
-
-- **Loss goes UP during training** - This is normal (it's KL divergence)
-- **Use 3-5 reward functions** - Single rewards often fail
-- **Test rewards before training** - Debug each function independently
-- **Monitor reward_std** - Should stay > 0.1 (avoid mode collapse)
-- **Start with num_generations=4-8** - Scale up if GPU allows
-
-## 🔗 External Resources
-
-- [TRL Documentation](https://huggingface.co/docs/trl)
-- [DeepSeek R1 Paper](https://arxiv.org/abs/2501.12948)
-- [Open R1 Implementation](https://github.com/huggingface/open-r1)
-- [Unsloth (2-3x faster)](https://docs.unsloth.ai/)
-
-## 📝 Version
-
-**v1.0.0** - Initial release (January 2025)
-
-## 👨‍💻 Maintained By
-
-Orchestra Research
-For questions or improvements, see https://orchestra.com
-
----
-
-**License:** MIT
-**Last Updated:** January 2025
diff --git a/skills/mlops/training/trl-fine-tuning/SKILL.md b/skills/mlops/training/trl-fine-tuning/SKILL.md
index 3bf4f6e12ba..70023fc707f 100644
--- a/skills/mlops/training/trl-fine-tuning/SKILL.md
+++ b/skills/mlops/training/trl-fine-tuning/SKILL.md
@@ -252,6 +252,8 @@ trl dpo \
 
 Train with reinforcement learning using minimal memory.
 
+For in-depth GRPO guidance — reward function design, critical training insights (loss behavior, mode collapse, tuning), and advanced multi-stage patterns — see **[references/grpo-training.md](references/grpo-training.md)**. A production-ready training script is in **[templates/basic_grpo_training.py](templates/basic_grpo_training.py)**.
+
 Copy this checklist:
 
 ```
@@ -428,6 +430,8 @@ config = PPOConfig(
 
 **Online RL methods**: See [references/online-rl.md](references/online-rl.md) for PPO, GRPO, RLOO, and OnlineDPO with detailed configurations.
 
+**GRPO deep dive**: See [references/grpo-training.md](references/grpo-training.md) for expert-level GRPO patterns — reward function design philosophy, training insights (why loss increases, mode collapse detection), hyperparameter tuning, multi-stage training, and troubleshooting. Production-ready template in [templates/basic_grpo_training.py](templates/basic_grpo_training.py).
+
 ## Hardware requirements
 
 - **GPU**: NVIDIA (CUDA required)
diff --git a/skills/mlops/training/grpo-rl-training/SKILL.md b/skills/mlops/training/trl-fine-tuning/references/grpo-training.md
similarity index 56%
rename from skills/mlops/training/grpo-rl-training/SKILL.md
rename to skills/mlops/training/trl-fine-tuning/references/grpo-training.md
index 1d7629ab633..a22bd40945d 100644
--- a/skills/mlops/training/grpo-rl-training/SKILL.md
+++ b/skills/mlops/training/trl-fine-tuning/references/grpo-training.md
@@ -1,51 +1,36 @@
----
-name: grpo-rl-training
-description: Expert guidance for GRPO/RL fine-tuning with TRL for reasoning and task-specific model training
-version: 1.0.0
-author: Orchestra Research
-license: MIT
-dependencies: [transformers>=4.47.0, trl>=0.14.0, datasets>=3.2.0, peft>=0.14.0, torch]
-metadata:
-  hermes:
-    tags: [Post-Training, Reinforcement Learning, GRPO, TRL, RLHF, Reward Modeling, Reasoning, DPO, PPO, Structured Output]
+# GRPO (Group Relative Policy Optimization) — Deep Guide
 
----
+Expert-level patterns, critical insights, and production-ready workflows for fine-tuning language models with custom reward functions using TRL's `GRPOTrainer`. This is the deep reference for the GRPO workflow summarized in the main skill.
 
-# GRPO/RL Training with TRL
+## When to use GRPO
 
-Expert-level guidance for implementing Group Relative Policy Optimization (GRPO) using the Transformer Reinforcement Learning (TRL) library. This skill provides battle-tested patterns, critical insights, and production-ready workflows for fine-tuning language models with custom reward functions.
-
-## When to Use This Skill
-
-Use GRPO training when you need to:
-- **Enforce specific output formats** (e.g., XML tags, JSON, structured reasoning)
+Use GRPO when you need to:
+- **Enforce specific output formats** (XML tags, JSON, structured reasoning)
 - **Teach verifiable tasks** with objective correctness metrics (math, coding, fact-checking)
 - **Improve reasoning capabilities** by rewarding chain-of-thought patterns
 - **Align models to domain-specific behaviors** without labeled preference data
 - **Optimize for multiple objectives** simultaneously (format + correctness + style)
 
 **Do NOT use GRPO for:**
-- Simple supervised fine-tuning tasks (use SFT instead)
+- Simple supervised fine-tuning tasks → use SFT
 - Tasks without clear reward signals
-- When you already have high-quality preference pairs (use DPO/PPO instead)
+- When you already have high-quality preference pairs → use DPO/PPO
 
----
+## Core concepts
 
-## Core Concepts
+### 1. GRPO algorithm fundamentals
 
-### 1. GRPO Algorithm Fundamentals
-
-**Key Mechanism:**
-- Generates **multiple completions** for each prompt (group size: 4-16)
+**Key mechanism:**
+- Generates **multiple completions** per prompt (group size: 4–16)
 - Compares completions within each group using reward functions
 - Updates policy to favor higher-rewarded responses relative to the group
 
-**Critical Difference from PPO:**
+**Critical differences from PPO:**
 - No separate reward model needed
 - More sample-efficient (learns from within-group comparisons)
 - Simpler to implement and debug
 
-**Mathematical Intuition:**
+**Mathematical intuition:**
 ```
 For each prompt p:
   1. Generate N completions: {c₁, c₂, ..., cₙ}
@@ -54,35 +39,32 @@ For each prompt p:
      relative to low-reward ones in the same group
 ```
 
-### 2. Reward Function Design Philosophy
+### 2. Reward function design philosophy
 
-**Golden Rules:**
-1. **Compose multiple reward functions** - Each handles one aspect (format, correctness, style)
-2. **Scale rewards appropriately** - Higher weight = stronger signal
-3. **Use incremental rewards** - Partial credit for partial compliance
-4. **Test rewards independently** - Debug each reward function in isolation
+**Golden rules:**
+1. **Compose multiple reward functions** — each handles one aspect (format, correctness, style)
+2. **Scale rewards appropriately** — higher weight = stronger signal
+3. **Use incremental rewards** — partial credit for partial compliance
+4. **Test rewards independently** — debug each reward function in isolation
 
-**Reward Function Types:**
+**Reward function types:**
 
 | Type | Use Case | Example Weight |
 |------|----------|----------------|
 | **Correctness** | Verifiable tasks (math, code) | 2.0 (highest) |
-| **Format** | Strict structure enforcement | 0.5-1.0 |
-| **Length** | Encourage verbosity/conciseness | 0.1-0.5 |
-| **Style** | Penalize unwanted patterns | -0.5 to 0.5 |
+| **Format** | Strict structure enforcement | 0.5–1.0 |
+| **Length** | Encourage verbosity/conciseness | 0.1–0.5 |
+| **Style** | Penalize unwanted patterns | −0.5 to 0.5 |
 
----
+## Implementation workflow
 
-## Implementation Workflow
+### Step 1: Dataset preparation
 
-### Step 1: Dataset Preparation
-
-**Critical Requirements:**
-- Prompts in chat format (list of dicts with 'role' and 'content')
+**Critical requirements:**
+- Prompts in chat format (list of dicts with `role` and `content`)
 - Include system prompts to set expectations
 - For verifiable tasks, include ground truth answers as additional columns
 
-**Example Structure:**
 ```python
 from datasets import load_dataset, Dataset
 
@@ -97,8 +79,7 @@ Respond in the following format:
 """
 
 def prepare_dataset(raw_data):
-    """
-    Transform raw data into GRPO-compatible format.
+    """Transform raw data into GRPO-compatible format.
 
     Returns: Dataset with columns:
     - 'prompt': List[Dict] with role/content (system + user messages)
@@ -113,14 +94,14 @@ def prepare_dataset(raw_data):
     })
 ```
 
-**Pro Tips:**
-- Use one-shot or few-shot examples in system prompt for complex formats
-- Keep prompts concise (max_prompt_length: 256-512 tokens)
+**Pro tips:**
+- Use one-shot or few-shot examples in the system prompt for complex formats
+- Keep prompts concise (max_prompt_length: 256–512 tokens)
 - Validate data quality before training (garbage in = garbage out)
 
-### Step 2: Reward Function Implementation
+### Step 2: Reward function implementation
 
-**Template Structure:**
+**Template structure:**
 ```python
 def reward_function_name(
     prompts,        # List[List[Dict]]: Original prompts
@@ -128,24 +109,16 @@ def reward_function_name(
     answer=None,    # Optional: Ground truth from dataset
     **kwargs        # Additional dataset columns
 ) -> list[float]:
-    """
-    Evaluate completions and return rewards.
-
-    Returns: List of floats (one per completion)
-    """
-    # Extract completion text
+    """Evaluate completions and return rewards (one per completion)."""
     responses = [comp[0]['content'] for comp in completions]
-
-    # Compute rewards
     rewards = []
     for response in responses:
         score = compute_score(response)
         rewards.append(score)
-
     return rewards
 ```
 
-**Example 1: Correctness Reward (Math/Coding)**
+**Example 1: correctness reward (math/coding)**
 ```python
 def correctness_reward(prompts, completions, answer, **kwargs):
     """Reward correct answers with high score."""
@@ -155,7 +128,7 @@ def correctness_reward(prompts, completions, answer, **kwargs):
             for ans, gt in zip(extracted, answer)]
 ```
 
-**Example 2: Format Reward (Structured Output)**
+**Example 2: format reward (structured output)**
 ```python
 import re
 
@@ -167,7 +140,7 @@ def format_reward(completions, **kwargs):
             for r in responses]
 ```
 
-**Example 3: Incremental Format Reward (Partial Credit)**
+**Example 3: incremental format reward (partial credit)**
 ```python
 def incremental_format_reward(completions, **kwargs):
     """Award partial credit for format compliance."""
@@ -176,14 +149,10 @@ def incremental_format_reward(completions, **kwargs):
 
     for r in responses:
         score = 0.0
-        if '<reasoning>' in r:
-            score += 0.25
-        if '</reasoning>' in r:
-            score += 0.25
-        if '<answer>' in r:
-            score += 0.25
-        if '</answer>' in r:
-            score += 0.25
+        if '<reasoning>' in r:  score += 0.25
+        if '</reasoning>' in r: score += 0.25
+        if '<answer>' in r:     score += 0.25
+        if '</answer>' in r:    score += 0.25
         # Penalize extra text after closing tag
         if r.count('</answer>') == 1:
             extra_text = r.split('</answer>')[-1].strip()
@@ -193,12 +162,11 @@ def incremental_format_reward(completions, **kwargs):
     return rewards
 ```
 
-**Critical Insight:**
-Combine 3-5 reward functions for robust training. Order matters less than diversity of signals.
+**Critical insight:** Combine 3–5 reward functions for robust training. Order matters less than diversity of signals.
 
-### Step 3: Training Configuration
+### Step 3: Training configuration
 
-**Memory-Optimized Config (Small GPU)**
+**Memory-optimized config (small GPU)**
 ```python
 from trl import GRPOConfig
 
@@ -218,13 +186,13 @@ training_args = GRPOConfig(
     gradient_accumulation_steps=4,  # Effective batch = 4
 
     # GRPO-specific
-    num_generations=8,            # Group size: 8-16 recommended
+    num_generations=8,            # Group size: 8–16 recommended
     max_prompt_length=256,
     max_completion_length=512,
 
     # Training duration
     num_train_epochs=1,
-    max_steps=None,               # Or set fixed steps (e.g., 500)
+    max_steps=None,
 
     # Optimization
     bf16=True,                    # Faster on A100/H100
@@ -234,11 +202,11 @@ training_args = GRPOConfig(
     # Logging
     logging_steps=1,
     save_steps=100,
-    report_to="wandb",            # Or "none" for no logging
+    report_to="wandb",
 )
 ```
 
-**High-Performance Config (Large GPU)**
+**High-performance config (large GPU)**
 ```python
 training_args = GRPOConfig(
     output_dir="outputs/grpo-model",
@@ -255,31 +223,30 @@ training_args = GRPOConfig(
 )
 ```
 
-**Critical Hyperparameters:**
+**Critical hyperparameters:**
 
 | Parameter | Impact | Tuning Advice |
 |-----------|--------|---------------|
-| `num_generations` | Group size for comparison | Start with 8, increase to 16 if GPU allows |
+| `num_generations` | Group size for comparison | Start 8, increase to 16 if GPU allows |
 | `learning_rate` | Convergence speed/stability | 5e-6 (safe), 1e-5 (faster, riskier) |
-| `max_completion_length` | Output verbosity | Match your task (512 for reasoning, 256 for short answers) |
+| `max_completion_length` | Output verbosity | Match your task (512 reasoning, 256 short answers) |
 | `gradient_accumulation_steps` | Effective batch size | Increase if GPU memory limited |
 
-### Step 4: Model Setup and Training
+### Step 4: Model setup and training
 
-**Standard Setup (Transformers)**
+**Standard setup (Transformers + TRL)**
 ```python
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import LoraConfig
 from trl import GRPOTrainer
 
-# Load model
 model_name = "Qwen/Qwen2.5-1.5B-Instruct"
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.bfloat16,
-    attn_implementation="flash_attention_2",  # 2-3x faster
-    device_map="auto"
+    attn_implementation="flash_attention_2",  # 2–3× faster
+    device_map="auto",
 )
 
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -287,17 +254,16 @@ tokenizer.pad_token = tokenizer.eos_token
 
 # Optional: LoRA for parameter-efficient training
 peft_config = LoraConfig(
-    r=16,                         # Rank (higher = more capacity)
-    lora_alpha=32,               # Scaling factor (typically 2*r)
+    r=16,
+    lora_alpha=32,
     target_modules=[
         "q_proj", "k_proj", "v_proj", "o_proj",
-        "gate_proj", "up_proj", "down_proj"
+        "gate_proj", "up_proj", "down_proj",
     ],
     task_type="CAUSAL_LM",
     lora_dropout=0.05,
 )
 
-# Initialize trainer
 trainer = GRPOTrainer(
     model=model,
     processing_class=tokenizer,
@@ -308,17 +274,14 @@ trainer = GRPOTrainer(
     ],
     args=training_args,
     train_dataset=dataset,
-    peft_config=peft_config,      # Remove for full fine-tuning
+    peft_config=peft_config,   # Remove for full fine-tuning
 )
 
-# Train
 trainer.train()
-
-# Save
 trainer.save_model("final_model")
 ```
 
-**Unsloth Setup (2-3x Faster)**
+**Unsloth setup (2–3× faster)**
 ```python
 from unsloth import FastLanguageModel
 
@@ -339,28 +302,26 @@ model = FastLanguageModel.get_peft_model(
     use_gradient_checkpointing="unsloth",
 )
 
-# Rest is identical to standard setup
+# Rest is identical to the standard setup
 trainer = GRPOTrainer(model=model, ...)
 trainer.train()
 ```
 
----
+## Critical training insights
 
-## Critical Training Insights
+### 1. Loss behavior (EXPECTED pattern)
+- **Loss starts near 0 and INCREASES during training** — this is CORRECT
+- Loss measures KL divergence from initial policy; the model is learning (diverging from original behavior to optimize rewards)
+- **Monitor reward metrics, not loss, for progress**
 
-### 1. Loss Behavior (EXPECTED PATTERN)
-- **Loss starts near 0 and INCREASES during training**
-- This is CORRECT - loss measures KL divergence from initial policy
-- Model is learning (diverging from original behavior to optimize rewards)
-- Monitor reward metrics instead of loss for progress
+### 2. Reward tracking
 
-### 2. Reward Tracking
 Key metrics to watch:
-- `reward`: Average across all completions
-- `reward_std`: Diversity within groups (should remain > 0)
-- `kl`: KL divergence from reference (should grow moderately)
+- `reward` — average across all completions
+- `reward_std` — diversity within groups (should remain > 0)
+- `kl` — KL divergence from reference (should grow moderately)
 
-**Healthy Training Pattern:**
+**Healthy pattern:**
 ```
 Step   Reward    Reward_Std   KL
 100    0.5       0.3          0.02
@@ -369,12 +330,12 @@ Step   Reward    Reward_Std   KL
 400    1.5       0.15         0.12
 ```
 
-**Warning Signs:**
-- Reward std → 0 (model collapsing to single response)
-- KL exploding (> 0.5) (diverging too much, reduce LR)
-- Reward stuck (reward functions too harsh or model capacity issue)
+**Warning signs:**
+- `reward_std` → 0 (model collapsing to a single response)
+- `kl` exploding (> 0.5) — diverging too much, reduce LR
+- Reward stuck — reward functions too harsh or model capacity issue
 
-### 3. Common Pitfalls and Solutions
+### 3. Common pitfalls and solutions
 
 | Problem | Symptom | Solution |
 |---------|---------|----------|
@@ -384,15 +345,14 @@ Step   Reward    Reward_Std   KL
 | **Slow training** | < 1 it/s | Enable `use_vllm=True`, use Unsloth, reduce seq length |
 | **Format ignored** | Model doesn't follow structure | Increase format reward weight, add incremental rewards |
 
----
+## Advanced patterns
 
-## Advanced Patterns
+### 1. Multi-stage training
 
-### 1. Multi-Stage Training
 For complex tasks, train in stages:
 
 ```python
-# Stage 1: Format compliance (epochs=1)
+# Stage 1: Format compliance
 trainer_stage1 = GRPOTrainer(
     model=model,
     reward_funcs=[incremental_format_reward, format_reward],
@@ -400,7 +360,7 @@ trainer_stage1 = GRPOTrainer(
 )
 trainer_stage1.train()
 
-# Stage 2: Correctness (epochs=1)
+# Stage 2: Correctness
 trainer_stage2 = GRPOTrainer(
     model=model,
     reward_funcs=[format_reward, correctness_reward],
@@ -409,7 +369,8 @@ trainer_stage2 = GRPOTrainer(
 trainer_stage2.train()
 ```
 
-### 2. Adaptive Reward Scaling
+### 2. Adaptive reward scaling
+
 ```python
 class AdaptiveReward:
     def __init__(self, base_reward_func, initial_weight=1.0):
@@ -428,148 +389,116 @@ class AdaptiveReward:
             self.weight *= 0.9
 ```
 
-### 3. Custom Dataset Integration
+### 3. Custom dataset integration
+
 ```python
 def load_custom_knowledge_base(csv_path):
-    """Example: School communication platform docs."""
     import pandas as pd
     df = pd.read_csv(csv_path)
-
-    dataset = Dataset.from_pandas(df).map(lambda x: {
+    return Dataset.from_pandas(df).map(lambda x: {
         'prompt': [
             {'role': 'system', 'content': CUSTOM_SYSTEM_PROMPT},
             {'role': 'user', 'content': x['question']}
         ],
         'answer': x['expert_answer']
     })
-    return dataset
 ```
 
----
+## Deployment and inference
 
-## Deployment and Inference
-
-### Save and Merge LoRA
+### Save and merge LoRA
 ```python
-# Merge LoRA adapters into base model
 if hasattr(trainer.model, 'merge_and_unload'):
     merged_model = trainer.model.merge_and_unload()
     merged_model.save_pretrained("production_model")
     tokenizer.save_pretrained("production_model")
 ```
 
-### Inference Example
+### Inference
 ```python
 from transformers import pipeline
 
-generator = pipeline(
-    "text-generation",
-    model="production_model",
-    tokenizer=tokenizer
-)
+generator = pipeline("text-generation", model="production_model", tokenizer=tokenizer)
 
 result = generator(
     [
         {'role': 'system', 'content': SYSTEM_PROMPT},
-        {'role': 'user', 'content': "What is 15 + 27?"}
+        {'role': 'user', 'content': "What is 15 + 27?"},
     ],
     max_new_tokens=256,
     do_sample=True,
     temperature=0.7,
-    top_p=0.9
+    top_p=0.9,
 )
 print(result[0]['generated_text'])
 ```
 
----
+## Best practices checklist
 
-## Best Practices Checklist
-
-**Before Training:**
+**Before training:**
 - [ ] Validate dataset format (prompts as List[Dict])
 - [ ] Test reward functions on sample data
-- [ ] Calculate expected max_prompt_length from data
-- [ ] Choose appropriate num_generations based on GPU memory
+- [ ] Calculate expected `max_prompt_length` from data
+- [ ] Choose `num_generations` based on GPU memory
 - [ ] Set up logging (wandb recommended)
 
-**During Training:**
+**During training:**
 - [ ] Monitor reward progression (should increase)
-- [ ] Check reward_std (should stay > 0.1)
+- [ ] Check `reward_std` (should stay > 0.1)
 - [ ] Watch for OOM errors (reduce batch size if needed)
-- [ ] Sample generations every 50-100 steps
+- [ ] Sample generations every 50–100 steps
 - [ ] Validate format compliance on holdout set
 
-**After Training:**
+**After training:**
 - [ ] Merge LoRA weights if using PEFT
 - [ ] Test on diverse prompts
 - [ ] Compare to baseline model
 - [ ] Document reward weights and hyperparameters
 - [ ] Save reproducibility config
 
----
+## Troubleshooting
 
-## Troubleshooting Guide
+### Debugging workflow
+1. **Isolate reward functions** — test each independently
+2. **Check data distribution** — ensure diversity in prompts
+3. **Reduce complexity** — start with single reward, add gradually
+4. **Monitor generations** — print samples every N steps
+5. **Validate extraction logic** — ensure answer parsing works
 
-### Debugging Workflow
-1. **Isolate reward functions** - Test each independently
-2. **Check data distribution** - Ensure diversity in prompts
-3. **Reduce complexity** - Start with single reward, add gradually
-4. **Monitor generations** - Print samples every N steps
-5. **Validate extraction logic** - Ensure answer parsing works
-
-### Quick Fixes
+### Quick debug reward
 ```python
-# Debug reward function
 def debug_reward(completions, **kwargs):
     responses = [comp[0]['content'] for comp in completions]
-    for i, r in enumerate(responses[:2]):  # Print first 2
+    for i, r in enumerate(responses[:2]):
         print(f"Response {i}: {r[:200]}...")
-    return [1.0] * len(responses)  # Dummy rewards
+    return [1.0] * len(responses)
 
 # Test without training
 trainer = GRPOTrainer(..., reward_funcs=[debug_reward])
-trainer.generate_completions(dataset[:1])  # Generate without updating
+trainer.generate_completions(dataset[:1])
 ```
 
----
+## Template
 
-## References and Resources
+A production-ready training script lives at **`../templates/basic_grpo_training.py`**. It uses Qwen 2.5-1.5B-Instruct with LoRA and three reward functions (incremental format, strict format, correctness) on GSM8K. Copy and adapt:
+1. `get_dataset()` — swap in your data loader
+2. Reward functions — tune to your task
+3. `SYSTEM_PROMPT` — match your output format
+4. `GRPOConfig` — adjust hyperparameters for your GPU
+
+## References and resources
 
-**Official Documentation:**
 - TRL GRPO Trainer: https://huggingface.co/docs/trl/grpo_trainer
-- DeepSeek R1 Paper: https://arxiv.org/abs/2501.12948
-- Unsloth Docs: https://docs.unsloth.ai/
-
-**Example Repositories:**
-- Open R1 Implementation: https://github.com/huggingface/open-r1
-- TRL Examples: https://github.com/huggingface/trl/tree/main/examples
-
-**Recommended Reading:**
-- Progressive Disclosure Pattern for agent instructions
-- Reward shaping in RL (Ng et al.)
-- LoRA paper (Hu et al., 2021)
-
----
-
-## Usage Instructions for Agents
-
-When this skill is loaded:
-
-1. **Read this entire file** before implementing GRPO training
-2. **Start with the simplest reward function** (e.g., length-based) to validate setup
-3. **Use the templates** in `templates/` directory as starting points
-4. **Reference examples** in `examples/` for task-specific implementations
-5. **Follow the workflow** sequentially (don't skip steps)
-6. **Debug incrementally** - add one reward function at a time
-
-**Critical Reminders:**
-- Always use multiple reward functions (3-5 is optimal)
-- Monitor reward metrics, not loss
-- Test reward functions before training
-- Start small (num_generations=4), scale up gradually
-- Save checkpoints frequently (every 100 steps)
-
-This skill is designed for **expert-level implementation**. Beginners should start with supervised fine-tuning before attempting GRPO.
-
+- GRPO paper (DeepSeek): https://arxiv.org/abs/2402.03300
+- DeepSeek R1 paper: https://arxiv.org/abs/2501.12948
+- Open R1 implementation: https://github.com/huggingface/open-r1
+- TRL examples: https://github.com/huggingface/trl/tree/main/examples
+- Unsloth (faster training): https://docs.unsloth.ai/
 
+## Critical reminders
 
+- **Loss goes UP during training** — this is normal (it's KL divergence)
+- **Use 3–5 reward functions** — single rewards often fail
+- **Test rewards before training** — debug each function independently
+- **Monitor `reward_std`** — should stay > 0.1 (avoid mode collapse)
+- **Start with `num_generations=4–8`** — scale up if GPU allows
diff --git a/skills/mlops/training/grpo-rl-training/templates/basic_grpo_training.py b/skills/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py
similarity index 100%
rename from skills/mlops/training/grpo-rl-training/templates/basic_grpo_training.py
rename to skills/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py
diff --git a/skills/productivity/maps/SKILL.md b/skills/productivity/maps/SKILL.md
new file mode 100644
index 00000000000..9eded20866b
--- /dev/null
+++ b/skills/productivity/maps/SKILL.md
@@ -0,0 +1,198 @@
+---
+name: maps
+description: >
+  Location intelligence — geocode a place, reverse-geocode coordinates,
+  find nearby places (44 POI categories), driving/walking/cycling
+  distance + time, turn-by-turn directions, timezone lookup, bounding
+  box + area for a named place, and POI search within a rectangle.
+  Uses OpenStreetMap + Overpass + OSRM. Free, no API key.
+version: 1.2.0
+author: Mibayy
+license: MIT
+metadata:
+  hermes:
+    tags: [maps, geocoding, places, routing, distance, directions, nearby, location, openstreetmap, nominatim, overpass, osrm]
+    category: productivity
+    requires_toolsets: [terminal]
+    supersedes: [find-nearby]
+---
+
+# Maps Skill
+
+Location intelligence using free, open data sources. 8 commands, 44 POI
+categories, zero dependencies (Python stdlib only), no API key required.
+
+Data sources: OpenStreetMap/Nominatim, Overpass API, OSRM, TimeAPI.io.
+
+This skill supersedes the old `find-nearby` skill — all of find-nearby's
+functionality is covered by the `nearby` command below, with the same
+`--near "<place>"` shortcut and multi-category support.
+
+## When to Use
+
+- User sends a Telegram location pin (latitude/longitude in the message) → `nearby`
+- User wants coordinates for a place name → `search`
+- User has coordinates and wants the address → `reverse`
+- User asks for nearby restaurants, hospitals, pharmacies, hotels, etc. → `nearby`
+- User wants driving/walking/cycling distance or travel time → `distance`
+- User wants turn-by-turn directions between two places → `directions`
+- User wants timezone information for a location → `timezone`
+- User wants to search for POIs within a geographic area → `area` + `bbox`
+
+## Prerequisites
+
+Python 3.8+ (stdlib only — no pip installs needed).
+
+Script path: `~/.hermes/skills/maps/scripts/maps_client.py`
+
+## Commands
+
+```bash
+MAPS=~/.hermes/skills/maps/scripts/maps_client.py
+```
+
+### search — Geocode a place name
+
+```bash
+python3 $MAPS search "Eiffel Tower"
+python3 $MAPS search "1600 Pennsylvania Ave, Washington DC"
+```
+
+Returns: lat, lon, display name, type, bounding box, importance score.
+
+### reverse — Coordinates to address
+
+```bash
+python3 $MAPS reverse 48.8584 2.2945
+```
+
+Returns: full address breakdown (street, city, state, country, postcode).
+
+### nearby — Find places by category
+
+```bash
+# By coordinates (from a Telegram location pin, for example)
+python3 $MAPS nearby 48.8584 2.2945 restaurant --limit 10
+python3 $MAPS nearby 40.7128 -74.0060 hospital --radius 2000
+
+# By address / city / zip / landmark — --near auto-geocodes
+python3 $MAPS nearby --near "Times Square, New York" --category cafe
+python3 $MAPS nearby --near "90210" --category pharmacy
+
+# Multiple categories merged into one query
+python3 $MAPS nearby --near "downtown austin" --category restaurant --category bar --limit 10
+```
+
+44 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, supermarket,
+atm, gas_station, parking, museum, park, school, university, bank, police,
+fire_station, library, airport, train_station, bus_stop, church, mosque,
+synagogue, dentist, doctor, cinema, theatre, gym, swimming_pool, post_office,
+convenience_store, bakery, bookshop, laundry, car_wash, car_rental,
+bicycle_rental, taxi, veterinary, zoo, playground, stadium, nightclub.
+
+Each result includes: `name`, `address`, `lat`/`lon`, `distance_m`,
+`maps_url` (clickable Google Maps link), `directions_url` (Google Maps
+directions from the search point), and promoted tags when available —
+`cuisine`, `hours` (opening_hours), `phone`, `website`.
+
+### distance — Travel distance and time
+
+```bash
+python3 $MAPS distance "Paris" --to "Lyon"
+python3 $MAPS distance "New York" --to "Boston" --mode driving
+python3 $MAPS distance "Big Ben" --to "Tower Bridge" --mode walking
+```
+
+Modes: driving (default), walking, cycling. Returns road distance, duration,
+and straight-line distance for comparison.
+
+### directions — Turn-by-turn navigation
+
+```bash
+python3 $MAPS directions "Eiffel Tower" --to "Louvre Museum" --mode walking
+python3 $MAPS directions "JFK Airport" --to "Times Square" --mode driving
+```
+
+Returns numbered steps with instruction, distance, duration, road name, and
+maneuver type (turn, depart, arrive, etc.).
+
+### timezone — Timezone for coordinates
+
+```bash
+python3 $MAPS timezone 48.8584 2.2945
+python3 $MAPS timezone 35.6762 139.6503
+```
+
+Returns timezone name, UTC offset, and current local time.
+
+### area — Bounding box and area for a place
+
+```bash
+python3 $MAPS area "Manhattan, New York"
+python3 $MAPS area "London"
+```
+
+Returns bounding box coordinates, width/height in km, and approximate area.
+Useful as input for the bbox command.
+
+### bbox — Search within a bounding box
+
+```bash
+python3 $MAPS bbox 40.75 -74.00 40.77 -73.98 restaurant --limit 20
+```
+
+Finds POIs within a geographic rectangle. Use `area` first to get the
+bounding box coordinates for a named place.
+
+## Working With Telegram Location Pins
+
+When a user sends a location pin, the message contains `latitude:` and
+`longitude:` fields. Extract those and pass them straight to `nearby`:
+
+```bash
+# User sent a pin at 36.17, -115.14 and asked "find cafes nearby"
+python3 $MAPS nearby 36.17 -115.14 cafe --radius 1500
+```
+
+Present results as a numbered list with names, distances, and the
+`maps_url` field so the user gets a tap-to-open link in chat. For "open
+now?" questions, check the `hours` field; if missing or unclear, verify
+with `web_search` since OSM hours are community-maintained and not always
+current.
+
+## Workflow Examples
+
+**"Find Italian restaurants near the Colosseum":**
+1. `nearby --near "Colosseum Rome" --category restaurant --radius 500`
+   — one command, auto-geocoded
+
+**"What's near this location pin they sent?":**
+1. Extract lat/lon from the Telegram message
+2. `nearby LAT LON cafe --radius 1500`
+
+**"How do I walk from hotel to conference center?":**
+1. `directions "Hotel Name" --to "Conference Center" --mode walking`
+
+**"What restaurants are in downtown Seattle?":**
+1. `area "Downtown Seattle"` → get bounding box
+2. `bbox S W N E restaurant --limit 30`
+
+## Pitfalls
+
+- Nominatim ToS: max 1 req/s (handled automatically by the script)
+- `nearby` requires lat/lon OR `--near "<address>"` — one of the two is needed
+- OSRM routing coverage is best for Europe and North America
+- Overpass API can be slow during peak hours; the script automatically
+  falls back between mirrors (overpass-api.de → overpass.kumi.systems)
+- `distance` and `directions` use `--to` flag for the destination (not positional)
+- If a zip code alone gives ambiguous results globally, include country/state
+
+## Verification
+
+```bash
+python3 ~/.hermes/skills/maps/scripts/maps_client.py search "Statue of Liberty"
+# Should return lat ~40.689, lon ~-74.044
+
+python3 ~/.hermes/skills/maps/scripts/maps_client.py nearby --near "Times Square" --category restaurant --limit 3
+# Should return a list of restaurants within ~500m of Times Square
+```
diff --git a/skills/productivity/maps/scripts/maps_client.py b/skills/productivity/maps/scripts/maps_client.py
new file mode 100644
index 00000000000..db0de82d6d7
--- /dev/null
+++ b/skills/productivity/maps/scripts/maps_client.py
@@ -0,0 +1,1249 @@
+#!/usr/bin/env python3
+"""
+maps_client.py - CLI tool for maps, geocoding, routing, POI search, and more.
+Uses only Python stdlib. Data from OpenStreetMap/Nominatim, Overpass API, OSRM,
+and TimeAPI.io.
+
+Commands:
+  search     - Geocode a place name to coordinates
+  reverse    - Reverse geocode coordinates to an address
+  nearby     - Find nearby POIs by category
+  distance   - Road distance and travel time between two places
+  directions - Turn-by-turn directions between two places
+  timezone   - Timezone info for coordinates
+  bbox       - Find POIs within a bounding box
+  area       - Get bounding box and area info for a named place
+"""
+
+import argparse
+import json
+import math
+import os
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+USER_AGENT = "HermesAgent/1.0 (contact: hermes@agent.ai)"
+DATA_SOURCE = "OpenStreetMap/Nominatim"
+
+NOMINATIM_SEARCH  = "https://nominatim.openstreetmap.org/search"
+NOMINATIM_REVERSE = "https://nominatim.openstreetmap.org/reverse"
+# Public Overpass endpoints. We try them in order so a single server
+# outage doesn't break the skill — kumi.systems is a well-known mirror.
+OVERPASS_URLS = [
+    "https://overpass-api.de/api/interpreter",
+    "https://overpass.kumi.systems/api/interpreter",
+]
+# Backward-compat alias for any caller that imports OVERPASS_API directly.
+OVERPASS_API      = OVERPASS_URLS[0]
+OSRM_BASE         = "https://router.project-osrm.org/route/v1"
+TIMEAPI_BASE      = "https://timeapi.io/api/timezone/coordinate"
+
+# Seconds to sleep between Nominatim requests (ToS requirement)
+NOMINATIM_RATE_LIMIT = 1.0
+
+# Maximum retries for HTTP errors
+MAX_RETRIES = 3
+RETRY_DELAY = 2.0  # seconds
+
+# Category -> (OSM tag key, OSM tag value)
+CATEGORY_TAGS = {
+    # Food & Drink
+    "restaurant":        ("amenity", "restaurant"),
+    "cafe":              ("amenity", "cafe"),
+    "bar":               ("amenity", "bar"),
+    "bakery":            ("shop",    "bakery"),
+    "convenience_store": ("shop",    "convenience"),
+    # Health
+    "hospital":          ("amenity", "hospital"),
+    "pharmacy":          ("amenity", "pharmacy"),
+    "dentist":           ("amenity", "dentist"),
+    "doctor":            ("amenity", "doctors"),
+    "veterinary":        ("amenity", "veterinary"),
+    # Accommodation
+    "hotel":             ("tourism", "hotel"),
+    # Shopping & Services
+    "supermarket":       ("shop",    "supermarket"),
+    "bookshop":          ("shop",    "books"),
+    "laundry":           ("shop",    "laundry"),
+    # Finance
+    "atm":               ("amenity", "atm"),
+    "bank":              ("amenity", "bank"),
+    # Transport
+    "gas_station":       ("amenity", "fuel"),
+    "parking":           ("amenity", "parking"),
+    "airport":           ("aeroway", "aerodrome"),
+    "train_station":     ("railway", "station"),
+    "bus_stop":          ("highway", "bus_stop"),
+    "taxi":              ("amenity", "taxi"),
+    "car_wash":          ("amenity", "car_wash"),
+    "car_rental":        ("amenity", "car_rental"),
+    "bicycle_rental":    ("amenity", "bicycle_rental"),
+    # Culture & Entertainment
+    "museum":            ("tourism", "museum"),
+    "cinema":            ("amenity", "cinema"),
+    "theatre":           ("amenity", "theatre"),
+    "nightclub":         ("amenity", "nightclub"),
+    "zoo":               ("tourism", "zoo"),
+    # Education
+    "school":            ("amenity", "school"),
+    "university":        ("amenity", "university"),
+    "library":           ("amenity", "library"),
+    # Public Services
+    "police":            ("amenity", "police"),
+    "fire_station":      ("amenity", "fire_station"),
+    "post_office":       ("amenity", "post_office"),
+    # Religion
+    "church":            ("amenity", "place_of_worship"),  # refined by religion tag
+    "mosque":            ("amenity", "place_of_worship"),
+    "synagogue":         ("amenity", "place_of_worship"),
+    # Recreation
+    "park":              ("leisure", "park"),
+    "gym":               ("leisure", "fitness_centre"),
+    "swimming_pool":     ("leisure", "swimming_pool"),
+    "playground":        ("leisure", "playground"),
+    "stadium":           ("leisure", "stadium"),
+}
+
+# Religion-specific overrides for place_of_worship categories
+RELIGION_FILTER = {
+    "church":    "christian",
+    "mosque":    "muslim",
+    "synagogue": "jewish",
+}
+
+VALID_CATEGORIES = sorted(CATEGORY_TAGS.keys())
+
+OSRM_PROFILES = {
+    "driving": "driving",
+    "walking": "foot",
+    "cycling": "bike",
+}
+
+# ---------------------------------------------------------------------------
+# Output helpers
+# ---------------------------------------------------------------------------
+
+def print_json(data):
+    """Print data as pretty-printed JSON to stdout."""
+    print(json.dumps(data, indent=2, ensure_ascii=False))
+
+
+def error_exit(message, code=1):
+    """Print an error result as JSON and exit."""
+    print_json({"error": message, "status": "error"})
+    sys.exit(code)
+
+
+# ---------------------------------------------------------------------------
+# HTTP helpers
+# ---------------------------------------------------------------------------
+
+def http_get(url, params=None, retries=MAX_RETRIES, silent=False):
+    """
+    Perform an HTTP GET request, returning parsed JSON.
+    Adds the required User-Agent header. Retries on transient errors.
+    If silent=True, raises RuntimeError instead of calling error_exit.
+    """
+    if params:
+        url = url + "?" + urllib.parse.urlencode(params)
+
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+
+    last_error = None
+    for attempt in range(1, retries + 1):
+        try:
+            with urllib.request.urlopen(req, timeout=15) as resp:
+                raw = resp.read().decode("utf-8")
+                return json.loads(raw)
+        except urllib.error.HTTPError as exc:
+            last_error = f"HTTP {exc.code}: {exc.reason} for {url}"
+            if exc.code in (429, 503, 502, 504):
+                time.sleep(RETRY_DELAY * attempt)
+            else:
+                if silent:
+                    raise RuntimeError(last_error)
+                error_exit(last_error)
+        except urllib.error.URLError as exc:
+            last_error = f"URL error: {exc.reason}"
+            time.sleep(RETRY_DELAY * attempt)
+        except json.JSONDecodeError as exc:
+            last_error = f"JSON parse error: {exc}"
+            time.sleep(RETRY_DELAY * attempt)
+
+    msg = f"Request failed after {retries} attempts. Last error: {last_error}"
+    if silent:
+        raise RuntimeError(msg)
+    error_exit(msg)
+
+
+def http_get_text(url, params=None, retries=MAX_RETRIES, silent=False):
+    """
+    Like http_get but returns raw text instead of parsed JSON.
+    Useful for APIs that may return non-JSON responses.
+    """
+    if params:
+        url = url + "?" + urllib.parse.urlencode(params)
+
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+
+    last_error = None
+    for attempt in range(1, retries + 1):
+        try:
+            with urllib.request.urlopen(req, timeout=15) as resp:
+                return resp.read().decode("utf-8")
+        except urllib.error.HTTPError as exc:
+            last_error = f"HTTP {exc.code}: {exc.reason} for {url}"
+            if exc.code in (429, 503, 502, 504):
+                time.sleep(RETRY_DELAY * attempt)
+            else:
+                if silent:
+                    raise RuntimeError(last_error)
+                error_exit(last_error)
+        except urllib.error.URLError as exc:
+            last_error = f"URL error: {exc.reason}"
+            time.sleep(RETRY_DELAY * attempt)
+
+    msg = f"Request failed after {retries} attempts. Last error: {last_error}"
+    if silent:
+        raise RuntimeError(msg)
+    error_exit(msg)
+
+
+def http_post(url, data_str, retries=MAX_RETRIES):
+    """
+    Perform an HTTP POST with a plain-text body (for Overpass QL).
+    Returns parsed JSON.
+    """
+    encoded = data_str.encode("utf-8")
+    req = urllib.request.Request(
+        url,
+        data=encoded,
+        headers={
+            "User-Agent": USER_AGENT,
+            "Content-Type": "application/x-www-form-urlencoded",
+        },
+    )
+
+    last_error = None
+    for attempt in range(1, retries + 1):
+        try:
+            with urllib.request.urlopen(req, timeout=30) as resp:
+                raw = resp.read().decode("utf-8")
+                return json.loads(raw)
+        except urllib.error.HTTPError as exc:
+            last_error = f"HTTP {exc.code}: {exc.reason}"
+            if exc.code in (429, 503, 502, 504):
+                time.sleep(RETRY_DELAY * attempt)
+            else:
+                error_exit(last_error)
+        except urllib.error.URLError as exc:
+            last_error = f"URL error: {exc.reason}"
+            time.sleep(RETRY_DELAY * attempt)
+        except json.JSONDecodeError as exc:
+            last_error = f"JSON parse error: {exc}"
+            time.sleep(RETRY_DELAY * attempt)
+
+    error_exit(f"POST failed after {retries} attempts. Last error: {last_error}")
+
+
+def overpass_query(query):
+    """POST an Overpass QL query, trying each URL in OVERPASS_URLS in turn.
+
+    A single public Overpass mirror can be rate-limited or down; trying the
+    next mirror before giving up turns a flaky outage into a retry. Returns
+    parsed JSON. Falls through to error_exit if every mirror fails.
+    """
+    post_data = "data=" + urllib.parse.quote(query)
+    last_error = None
+    for url in OVERPASS_URLS:
+        try:
+            return http_post(url, post_data, retries=1)
+        except SystemExit:
+            # error_exit inside http_post — keep trying the next mirror.
+            last_error = f"mirror {url} exhausted retries"
+            continue
+        except Exception as exc:
+            last_error = f"{url}: {exc}"
+            continue
+    error_exit(
+        f"All Overpass mirrors failed. Last error: {last_error or 'unknown'}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Geo math
+# ---------------------------------------------------------------------------
+
+def haversine_m(lat1, lon1, lat2, lon2):
+    """Return distance in metres between two lat/lon points (Haversine)."""
+    R = 6_371_000  # Earth mean radius in metres
+    phi1 = math.radians(lat1)
+    phi2 = math.radians(lat2)
+    dphi = math.radians(lat2 - lat1)
+    dlam = math.radians(lon2 - lon1)
+    a = (math.sin(dphi / 2) ** 2
+         + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2)
+    return 2 * R * math.atan2(math.sqrt(a), math.sqrt(1 - a))
+
+
+# ---------------------------------------------------------------------------
+# Nominatim helpers
+# ---------------------------------------------------------------------------
+
+def nominatim_search(query, limit=5):
+    """Geocode a free-text query. Returns list of result dicts."""
+    params = {
+        "q":              query,
+        "format":         "json",
+        "limit":          limit,
+        "addressdetails": 1,
+    }
+    time.sleep(NOMINATIM_RATE_LIMIT)
+    return http_get(NOMINATIM_SEARCH, params=params)
+
+
+def nominatim_reverse(lat, lon):
+    """Reverse geocode lat/lon. Returns a single result dict."""
+    params = {
+        "lat":            lat,
+        "lon":            lon,
+        "format":         "json",
+        "addressdetails": 1,
+    }
+    time.sleep(NOMINATIM_RATE_LIMIT)
+    return http_get(NOMINATIM_REVERSE, params=params)
+
+
+def geocode_single(query):
+    """
+    Geocode a query and return (lat, lon, display_name).
+    Exits with error if nothing found.
+    """
+    results = nominatim_search(query, limit=1)
+    if not results:
+        error_exit(f"Could not geocode: {query}")
+    r = results[0]
+    return float(r["lat"]), float(r["lon"]), r.get("display_name", query)
+
+
+# ---------------------------------------------------------------------------
+# Overpass helpers
+# ---------------------------------------------------------------------------
+
+def build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
+                          religion=None):
+    """Build an Overpass QL query for nearby POIs around a point."""
+    religion_filter = ""
+    if religion:
+        religion_filter = f'["religion"="{religion}"]'
+    return (
+        f'[out:json][timeout:25];\n'
+        f'(\n'
+        f'  node["{tag_key}"="{tag_val}"]{religion_filter}'
+        f'(around:{radius},{lat},{lon});\n'
+        f'  way["{tag_key}"="{tag_val}"]{religion_filter}'
+        f'(around:{radius},{lat},{lon});\n'
+        f');\n'
+        f'out center {limit};\n'
+    )
+
+
+def build_overpass_bbox(tag_key, tag_val, south, west, north, east, limit,
+                        religion=None):
+    """Build an Overpass QL query for POIs within a bounding box."""
+    religion_filter = ""
+    if religion:
+        religion_filter = f'["religion"="{religion}"]'
+    return (
+        f'[out:json][timeout:25];\n'
+        f'(\n'
+        f'  node["{tag_key}"="{tag_val}"]{religion_filter}'
+        f'({south},{west},{north},{east});\n'
+        f'  way["{tag_key}"="{tag_val}"]{religion_filter}'
+        f'({south},{west},{north},{east});\n'
+        f');\n'
+        f'out center {limit};\n'
+    )
+
+
+def parse_overpass_elements(elements, ref_lat=None, ref_lon=None):
+    """
+    Parse Overpass elements into a clean list of POI dicts.
+    If ref_lat/ref_lon are provided, computes distance and sorts by it.
+    """
+    places = []
+    for el in elements:
+        # Ways have a "center" sub-dict; nodes have lat/lon directly
+        if el["type"] == "way":
+            center = el.get("center", {})
+            el_lat = center.get("lat")
+            el_lon = center.get("lon")
+        else:
+            el_lat = el.get("lat")
+            el_lon = el.get("lon")
+
+        if el_lat is None or el_lon is None:
+            continue
+
+        tags = el.get("tags", {})
+        name = tags.get("name") or tags.get("name:en") or ""
+
+        # Build a short address from available tags
+        addr_parts = []
+        for part_key in ("addr:housenumber", "addr:street", "addr:city"):
+            val = tags.get(part_key)
+            if val:
+                addr_parts.append(val)
+        address_str = ", ".join(addr_parts) if addr_parts else ""
+
+        place = {
+            "name":     name,
+            "address":  address_str,
+            "lat":      el_lat,
+            "lon":      el_lon,
+            "osm_type": el.get("type", ""),
+            "osm_id":   el.get("id", ""),
+            # Clickable Google Maps link so the agent can render a tap-to-open
+            # URL in chat without composing one downstream.
+            "maps_url": f"https://www.google.com/maps/search/?api=1&query={el_lat},{el_lon}",
+            "tags": {
+                k: v for k, v in tags.items()
+                if k not in ("name", "name:en",
+                             "addr:housenumber", "addr:street", "addr:city")
+            },
+        }
+
+        # Promote commonly-useful tags to top-level fields so agents can
+        # reference them without digging into the raw ``tags`` dict.
+        for src_key, dst_key in (
+            ("cuisine",        "cuisine"),
+            ("opening_hours",  "hours"),
+            ("phone",          "phone"),
+            ("website",        "website"),
+        ):
+            val = tags.get(src_key)
+            if val:
+                place[dst_key] = val
+
+        if ref_lat is not None and ref_lon is not None:
+            dist_m = haversine_m(ref_lat, ref_lon, el_lat, el_lon)
+            place["distance_m"] = round(dist_m, 1)
+            # With a reference point we can also hand back a directions URL.
+            place["directions_url"] = (
+                f"https://www.google.com/maps/dir/?api=1"
+                f"&origin={ref_lat},{ref_lon}"
+                f"&destination={el_lat},{el_lon}"
+            )
+
+        places.append(place)
+
+    # Sort by distance if available
+    if places and "distance_m" in places[0]:
+        places.sort(key=lambda p: p["distance_m"])
+
+    return places
+
+
+# ---------------------------------------------------------------------------
+# Command: search
+# ---------------------------------------------------------------------------
+
+def cmd_search(args):
+    """Geocode a place name and return top results."""
+    query = " ".join(args.query)
+    raw   = nominatim_search(query, limit=5)
+
+    if not raw:
+        print_json({
+            "query":       query,
+            "results":     [],
+            "count":       0,
+            "data_source": DATA_SOURCE,
+        })
+        return
+
+    results = []
+    for item in raw:
+        bb = item.get("boundingbox", [])
+        results.append({
+            "name":         item.get("name") or item.get("display_name", ""),
+            "display_name": item.get("display_name", ""),
+            "lat":          float(item["lat"]),
+            "lon":          float(item["lon"]),
+            "type":         item.get("type", ""),
+            "category":     item.get("category", ""),
+            "osm_type":     item.get("osm_type", ""),
+            "osm_id":       item.get("osm_id", ""),
+            "bounding_box": {
+                "min_lat": float(bb[0]) if len(bb) > 0 else None,
+                "max_lat": float(bb[1]) if len(bb) > 1 else None,
+                "min_lon": float(bb[2]) if len(bb) > 2 else None,
+                "max_lon": float(bb[3]) if len(bb) > 3 else None,
+            },
+            "importance":   item.get("importance"),
+        })
+
+    print_json({
+        "query":       query,
+        "results":     results,
+        "count":       len(results),
+        "data_source": DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: reverse
+# ---------------------------------------------------------------------------
+
+def cmd_reverse(args):
+    """Reverse geocode coordinates to a human-readable address."""
+    try:
+        lat = float(args.lat)
+        lon = float(args.lon)
+    except ValueError:
+        error_exit("LAT and LON must be numeric values.")
+
+    if not (-90 <= lat <= 90):
+        error_exit("Latitude must be between -90 and 90.")
+    if not (-180 <= lon <= 180):
+        error_exit("Longitude must be between -180 and 180.")
+
+    data = nominatim_reverse(lat, lon)
+
+    if "error" in data:
+        error_exit(f"Reverse geocode failed: {data['error']}")
+
+    address = data.get("address", {})
+
+    print_json({
+        "lat":          lat,
+        "lon":          lon,
+        "display_name": data.get("display_name", ""),
+        "address": {
+            "house_number":  address.get("house_number", ""),
+            "road":          address.get("road", ""),
+            "neighbourhood": address.get("neighbourhood", ""),
+            "suburb":        address.get("suburb", ""),
+            "city":          (address.get("city")
+                              or address.get("town")
+                              or address.get("village", "")),
+            "county":        address.get("county", ""),
+            "state":         address.get("state", ""),
+            "postcode":      address.get("postcode", ""),
+            "country":       address.get("country", ""),
+            "country_code":  address.get("country_code", ""),
+        },
+        "osm_type":    data.get("osm_type", ""),
+        "osm_id":      data.get("osm_id", ""),
+        "data_source": DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: nearby
+# ---------------------------------------------------------------------------
+
+def cmd_nearby(args):
+    """Find nearby POIs using the Overpass API.
+
+    Accepts either explicit coordinates (``lat``/``lon``) or a free-form
+    address via ``--near`` (auto-geocoded through Nominatim). Supports
+    multiple categories in one call — results are merged, deduplicated
+    by ``osm_type+osm_id``, sorted by distance.
+    """
+    # Resolve the center point. --near takes precedence if provided so the
+    # agent can ask "cafes near Times Square" in one command without having
+    # to geocode first.
+    if getattr(args, "near", None):
+        near_query = " ".join(args.near).strip() if isinstance(args.near, list) else str(args.near).strip()
+        if not near_query:
+            error_exit("--near must be a non-empty address or place name.")
+        lat, lon, _ = geocode_single(near_query)
+    else:
+        try:
+            lat = float(args.lat)
+            lon = float(args.lon)
+        except (TypeError, ValueError):
+            error_exit("Provide numeric LAT and LON, or use --near \"<address>\".")
+
+    # Categories: support both legacy single positional ``category`` and the
+    # new repeatable ``--category`` flag. Users can ask for multiple place
+    # types in one query.
+    categories = []
+    if getattr(args, "category_list", None):
+        categories.extend(args.category_list)
+    if getattr(args, "category", None):
+        categories.append(args.category)
+    # Deduplicate, preserve order, lower-case.
+    categories = list(dict.fromkeys(c.lower() for c in categories if c))
+    if not categories:
+        error_exit("Provide at least one category (positional or --category).")
+    unknown = [c for c in categories if c not in CATEGORY_TAGS]
+    if unknown:
+        error_exit(
+            f"Unknown categor{'ies' if len(unknown) > 1 else 'y'} "
+            f"{', '.join(repr(c) for c in unknown)}. "
+            f"Valid categories: {', '.join(VALID_CATEGORIES)}"
+        )
+
+    radius = int(args.radius)
+    limit  = int(args.limit)
+    if radius <= 0:
+        error_exit("Radius must be a positive integer (metres).")
+    if limit <= 0:
+        error_exit("Limit must be a positive integer.")
+
+    # Query each category against the Overpass fallback chain, merge results,
+    # dedupe by OSM identity so POIs tagged under multiple categories don't
+    # appear twice.
+    merged = {}
+    for category in categories:
+        tag_key, tag_val = CATEGORY_TAGS[category]
+        religion = RELIGION_FILTER.get(category)
+        query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
+                                      religion=religion)
+        raw = overpass_query(query)
+        elements = raw.get("elements", [])
+        for place in parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon):
+            place["category"] = category
+            key = (place.get("osm_type", ""), place.get("osm_id", ""))
+            # Prefer the entry that actually has a distance_m attached (first
+            # pass through the ref_lat/ref_lon branch), then first-seen wins.
+            if key not in merged:
+                merged[key] = place
+
+    # Sort merged by distance when we have ref lat/lon, then cap at ``limit``.
+    places = sorted(
+        merged.values(),
+        key=lambda p: p.get("distance_m", float("inf")),
+    )[:limit]
+
+    print_json({
+        "center_lat":  lat,
+        "center_lon":  lon,
+        "categories":  categories,
+        "radius_m":    radius,
+        "count":       len(places),
+        "results":     places,
+        "data_source": DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: distance
+# ---------------------------------------------------------------------------
+
+def cmd_distance(args):
+    """Calculate road distance and travel time between two places."""
+    origin_query      = " ".join(args.origin)
+    destination_query = " ".join(args.to)
+    mode              = args.mode.lower()
+
+    if mode not in OSRM_PROFILES:
+        error_exit(f"Invalid mode '{mode}'. Choose from: {', '.join(OSRM_PROFILES)}")
+
+    # Geocode origin and destination
+    o_lat, o_lon, o_name = geocode_single(origin_query)
+    d_lat, d_lon, d_name = geocode_single(destination_query)
+
+    profile = OSRM_PROFILES[mode]
+    url = (
+        f"{OSRM_BASE}/{profile}/"
+        f"{o_lon},{o_lat};{d_lon},{d_lat}"
+        f"?overview=false&steps=false"
+    )
+
+    osrm_data = http_get(url)
+
+    if osrm_data.get("code") != "Ok":
+        error_exit(
+            f"OSRM routing failed: "
+            f"{osrm_data.get('message', osrm_data.get('code', 'unknown error'))}"
+        )
+
+    routes = osrm_data.get("routes", [])
+    if not routes:
+        error_exit("No route found between the two locations.")
+
+    route        = routes[0]
+    distance_m   = route.get("distance", 0)
+    duration_s   = route.get("duration", 0)
+    distance_km  = round(distance_m / 1000, 3)
+    duration_min = round(duration_s / 60, 2)
+
+    # Straight-line distance for reference
+    straight_m = haversine_m(o_lat, o_lon, d_lat, d_lon)
+
+    print_json({
+        "origin": {
+            "query":        origin_query,
+            "display_name": o_name,
+            "lat":          o_lat,
+            "lon":          o_lon,
+        },
+        "destination": {
+            "query":        destination_query,
+            "display_name": d_name,
+            "lat":          d_lat,
+            "lon":          d_lon,
+        },
+        "mode":             mode,
+        "distance_km":      distance_km,
+        "distance_m":       round(distance_m, 1),
+        "duration_minutes": duration_min,
+        "duration_seconds": round(duration_s, 1),
+        "straight_line_km": round(straight_m / 1000, 3),
+        "data_source":      DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: directions
+# ---------------------------------------------------------------------------
+
+def _format_duration(seconds):
+    """Format seconds into a human-readable string."""
+    if seconds < 60:
+        return f"{round(seconds)}s"
+    minutes = seconds / 60
+    if minutes < 60:
+        return f"{round(minutes, 1)} min"
+    hours = int(minutes // 60)
+    remaining = round(minutes % 60)
+    return f"{hours}h {remaining}min"
+
+
+def _format_distance(metres):
+    """Format metres into a human-readable string."""
+    if metres < 1000:
+        return f"{round(metres)} m"
+    return f"{round(metres / 1000, 2)} km"
+
+
+def cmd_directions(args):
+    """Get turn-by-turn directions between two places via OSRM."""
+    origin_query      = " ".join(args.origin)
+    destination_query = " ".join(args.to)
+    mode              = args.mode.lower()
+
+    if mode not in OSRM_PROFILES:
+        error_exit(f"Invalid mode '{mode}'. Choose from: {', '.join(OSRM_PROFILES)}")
+
+    # Geocode origin and destination
+    o_lat, o_lon, o_name = geocode_single(origin_query)
+    d_lat, d_lon, d_name = geocode_single(destination_query)
+
+    profile = OSRM_PROFILES[mode]
+    url = (
+        f"{OSRM_BASE}/{profile}/"
+        f"{o_lon},{o_lat};{d_lon},{d_lat}"
+        f"?overview=false&steps=true"
+    )
+
+    osrm_data = http_get(url)
+
+    if osrm_data.get("code") != "Ok":
+        error_exit(
+            f"OSRM routing failed: "
+            f"{osrm_data.get('message', osrm_data.get('code', 'unknown error'))}"
+        )
+
+    routes = osrm_data.get("routes", [])
+    if not routes:
+        error_exit("No route found between the two locations.")
+
+    route        = routes[0]
+    distance_m   = route.get("distance", 0)
+    duration_s   = route.get("duration", 0)
+
+    # Extract steps from all legs
+    steps = []
+    step_num = 0
+    for leg in route.get("legs", []):
+        for step in leg.get("steps", []):
+            maneuver = step.get("maneuver", {})
+            step_dist = step.get("distance", 0)
+            step_dur  = step.get("duration", 0)
+            step_name = step.get("name", "")
+            modifier  = maneuver.get("modifier", "")
+            m_type    = maneuver.get("type", "")
+
+            # Build instruction text
+            if m_type == "depart":
+                instruction = f"Depart on {step_name}" if step_name else "Depart"
+            elif m_type == "arrive":
+                instruction = "Arrive at destination"
+            elif m_type == "turn":
+                instruction = f"Turn {modifier} onto {step_name}" if step_name else f"Turn {modifier}"
+            elif m_type == "new name":
+                instruction = f"Continue onto {step_name}" if step_name else "Continue"
+            elif m_type == "merge":
+                instruction = f"Merge {modifier} onto {step_name}" if step_name else f"Merge {modifier}"
+            elif m_type == "fork":
+                instruction = f"Take the {modifier} fork onto {step_name}" if step_name else f"Take the {modifier} fork"
+            elif m_type == "roundabout":
+                instruction = f"Enter roundabout, exit onto {step_name}" if step_name else "Enter roundabout"
+            elif m_type == "rotary":
+                instruction = f"Enter rotary, exit onto {step_name}" if step_name else "Enter rotary"
+            elif m_type == "end of road":
+                instruction = f"At end of road, turn {modifier} onto {step_name}" if step_name else f"At end of road, turn {modifier}"
+            elif m_type == "continue":
+                instruction = f"Continue {modifier} on {step_name}" if step_name else f"Continue {modifier}"
+            elif m_type == "on ramp":
+                instruction = f"Take ramp onto {step_name}" if step_name else "Take ramp"
+            elif m_type == "off ramp":
+                instruction = f"Take exit onto {step_name}" if step_name else "Take exit"
+            else:
+                instruction = f"{m_type} {modifier} {step_name}".strip()
+
+            step_num += 1
+            steps.append({
+                "step":        step_num,
+                "instruction": instruction,
+                "distance":    _format_distance(step_dist),
+                "distance_m":  round(step_dist, 1),
+                "duration":    _format_duration(step_dur),
+                "duration_s":  round(step_dur, 1),
+                "road_name":   step_name,
+                "maneuver":    m_type,
+            })
+
+    print_json({
+        "origin": {
+            "query":        origin_query,
+            "display_name": o_name,
+            "lat":          o_lat,
+            "lon":          o_lon,
+        },
+        "destination": {
+            "query":        destination_query,
+            "display_name": d_name,
+            "lat":          d_lat,
+            "lon":          d_lon,
+        },
+        "mode":               mode,
+        "total_distance":     _format_distance(distance_m),
+        "total_distance_m":   round(distance_m, 1),
+        "total_duration":     _format_duration(duration_s),
+        "total_duration_s":   round(duration_s, 1),
+        "steps":              steps,
+        "step_count":         len(steps),
+        "data_source":        DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: timezone
+# ---------------------------------------------------------------------------
+
+def cmd_timezone(args):
+    """
+    Get timezone information for a lat/lon coordinate.
+
+    Strategy:
+      1. Try TimeAPI.io (free, no key, supports coordinate-based lookup).
+      2. Fallback: derive UTC offset approximation from longitude.
+    """
+    try:
+        lat = float(args.lat)
+        lon = float(args.lon)
+    except ValueError:
+        error_exit("LAT and LON must be numeric values.")
+
+    if not (-90 <= lat <= 90):
+        error_exit("Latitude must be between -90 and 90.")
+    if not (-180 <= lon <= 180):
+        error_exit("Longitude must be between -180 and 180.")
+
+    timezone_str = None
+    timezone_src = None
+    current_time = None
+    utc_offset   = None
+
+    # --- Strategy 1: TimeAPI.io coordinate lookup ---
+    try:
+        params = {"latitude": lat, "longitude": lon}
+        tz_data = http_get(TIMEAPI_BASE, params=params, silent=True)
+        if isinstance(tz_data, dict):
+            timezone_str = tz_data.get("timeZone")
+            current_time = tz_data.get("currentLocalTime")
+            # Build utc_offset from currentUtcOffset if available
+            offset_info = tz_data.get("currentUtcOffset", {})
+            if isinstance(offset_info, dict):
+                oh = offset_info.get("hours", 0)
+                om = abs(offset_info.get("minutes", 0))
+                os_ = offset_info.get("seconds", 0)
+                sign = "+" if oh >= 0 else "-"
+                utc_offset = f"{sign}{abs(oh):02d}:{om:02d}"
+            elif tz_data.get("standardUtcOffset"):
+                offset_info2 = tz_data["standardUtcOffset"]
+                if isinstance(offset_info2, dict):
+                    oh = offset_info2.get("hours", 0)
+                    om = abs(offset_info2.get("minutes", 0))
+                    sign = "+" if oh >= 0 else "-"
+                    utc_offset = f"{sign}{abs(oh):02d}:{om:02d}"
+            timezone_src = "timeapi.io"
+    except (RuntimeError, KeyError, TypeError):
+        pass  # API may be down; continue to fallback
+
+    # --- Strategy 2: longitude-based UTC offset approximation ---
+    if not timezone_str:
+        approx_offset_h = round(lon / 15)
+        if approx_offset_h >= 0:
+            utc_offset = f"+{approx_offset_h:02d}:00"
+        else:
+            utc_offset = f"-{abs(approx_offset_h):02d}:00"
+        timezone_str = f"UTC{utc_offset}"
+        timezone_src = "longitude approximation (longitude/15)"
+
+    print_json({
+        "lat":          lat,
+        "lon":          lon,
+        "timezone":     timezone_str,
+        "utc_offset":   utc_offset,
+        "current_time": current_time,
+        "source":       timezone_src,
+        "data_source":  DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: bbox
+# ---------------------------------------------------------------------------
+
+def cmd_bbox(args):
+    """Find POIs within a bounding box using the Overpass API."""
+    try:
+        lat1 = float(args.lat1)
+        lon1 = float(args.lon1)
+        lat2 = float(args.lat2)
+        lon2 = float(args.lon2)
+    except ValueError:
+        error_exit("All coordinate arguments must be numeric values.")
+
+    # Normalize: south/west < north/east
+    south = min(lat1, lat2)
+    north = max(lat1, lat2)
+    west  = min(lon1, lon2)
+    east  = max(lon1, lon2)
+
+    category = args.category.lower()
+    if category not in CATEGORY_TAGS:
+        error_exit(
+            f"Unknown category '{category}'. "
+            f"Valid categories: {', '.join(VALID_CATEGORIES)}"
+        )
+
+    limit = int(args.limit)
+    if limit <= 0:
+        error_exit("Limit must be a positive integer.")
+
+    tag_key, tag_val = CATEGORY_TAGS[category]
+    religion = RELIGION_FILTER.get(category)
+    query = build_overpass_bbox(tag_key, tag_val, south, west, north, east,
+                                limit, religion=religion)
+
+    raw = overpass_query(query)
+
+    elements = raw.get("elements", [])
+
+    # Use center of bbox as reference for distance sorting
+    center_lat = (south + north) / 2
+    center_lon = (west + east) / 2
+    places = parse_overpass_elements(elements, ref_lat=center_lat,
+                                     ref_lon=center_lon)
+
+    for p in places:
+        p["category"] = category
+
+    print_json({
+        "bounding_box": {
+            "south": south,
+            "west":  west,
+            "north": north,
+            "east":  east,
+        },
+        "category":    category,
+        "count":       len(places),
+        "results":     places,
+        "data_source": DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: area
+# ---------------------------------------------------------------------------
+
+def cmd_area(args):
+    """Get bounding box and area info for a named place."""
+    query = " ".join(args.place)
+    raw = nominatim_search(query, limit=1)
+
+    if not raw:
+        error_exit(f"Could not find place: {query}")
+
+    item = raw[0]
+    bb = item.get("boundingbox", [])
+
+    if len(bb) < 4:
+        error_exit(f"No bounding box data available for: {query}")
+
+    min_lat = float(bb[0])
+    max_lat = float(bb[1])
+    min_lon = float(bb[2])
+    max_lon = float(bb[3])
+
+    # Approximate area in km² using the bounding box
+    # Width in km at the average latitude
+    avg_lat = (min_lat + max_lat) / 2
+    height_km = haversine_m(min_lat, min_lon, max_lat, min_lon) / 1000
+    width_km  = haversine_m(avg_lat, min_lon, avg_lat, max_lon) / 1000
+    approx_area_km2 = round(height_km * width_km, 3)
+
+    print_json({
+        "query":        query,
+        "display_name": item.get("display_name", ""),
+        "lat":          float(item["lat"]),
+        "lon":          float(item["lon"]),
+        "type":         item.get("type", ""),
+        "category":     item.get("category", ""),
+        "bounding_box": {
+            "south": min_lat,
+            "north": max_lat,
+            "west":  min_lon,
+            "east":  max_lon,
+        },
+        "dimensions": {
+            "width_km":  round(width_km, 3),
+            "height_km": round(height_km, 3),
+        },
+        "approx_area_km2": approx_area_km2,
+        "osm_type":        item.get("osm_type", ""),
+        "osm_id":          item.get("osm_id", ""),
+        "data_source":     DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# CLI setup
+# ---------------------------------------------------------------------------
+
+def build_parser():
+    parser = argparse.ArgumentParser(
+        prog="maps_client.py",
+        description=(
+            "CLI maps tool: geocoding, reverse geocoding, POI search, "
+            "routing, directions, timezone, and area lookup. "
+            "Powered by OpenStreetMap, OSRM, Overpass, and TimeAPI.io. "
+            "No API keys required."
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=(
+            "Examples:\n"
+            "  maps_client.py search Times Square\n"
+            "  maps_client.py reverse 40.758 -73.985\n"
+            "  maps_client.py nearby 40.758 -73.985 restaurant --radius 800\n"
+            "  maps_client.py distance New York --to Los Angeles --mode driving\n"
+            "  maps_client.py directions Paris --to Berlin --mode driving\n"
+            "  maps_client.py timezone 48.8566 2.3522\n"
+            "  maps_client.py bbox 40.70 -74.02 40.78 -73.95 restaurant\n"
+            "  maps_client.py area Manhattan"
+        ),
+    )
+    sub = parser.add_subparsers(dest="command", required=True,
+                                 metavar="COMMAND")
+
+    # -- search --
+    p_search = sub.add_parser(
+        "search",
+        help="Geocode a place name to coordinates.",
+        description="Search for a place by name and return coordinates and details.",
+    )
+    p_search.add_argument(
+        "query", nargs="+",
+        help="Place name or address to search.",
+    )
+
+    # -- reverse --
+    p_reverse = sub.add_parser(
+        "reverse",
+        help="Reverse geocode coordinates to an address.",
+        description="Convert latitude/longitude coordinates to a human-readable address.",
+    )
+    p_reverse.add_argument("lat", help="Latitude (decimal degrees).")
+    p_reverse.add_argument("lon", help="Longitude (decimal degrees).")
+
+    # -- nearby --
+    p_nearby = sub.add_parser(
+        "nearby",
+        help="Find nearby places of a given category.",
+        description=(
+            "Find points of interest near a location using the Overpass API.\n"
+            "Provide either LAT/LON, or use --near \"<address>\" to auto-geocode.\n"
+            "Categories can be specified positionally OR repeated via --category\n"
+            "to merge multiple types in one query (e.g. --category bar --category cafe).\n"
+            f"Categories: {', '.join(VALID_CATEGORIES)}"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p_nearby.add_argument(
+        "lat", nargs="?", default=None,
+        help="Center latitude (decimal degrees). Omit if using --near.",
+    )
+    p_nearby.add_argument(
+        "lon", nargs="?", default=None,
+        help="Center longitude (decimal degrees). Omit if using --near.",
+    )
+    p_nearby.add_argument(
+        "category", nargs="?", default=None,
+        help="POI category (use --help for full list). Omit if using --category flags.",
+    )
+    p_nearby.add_argument(
+        "--near", nargs="+", metavar="PLACE",
+        help="Address, city, or landmark to search around (geocoded via Nominatim).",
+    )
+    p_nearby.add_argument(
+        "--category", action="append", dest="category_list", default=[],
+        metavar="CAT",
+        help="POI category (repeatable — adds a type to the search).",
+    )
+    p_nearby.add_argument(
+        "--radius", "-r",
+        default=500, type=int, metavar="METRES",
+        help="Search radius in metres (default: 500).",
+    )
+    p_nearby.add_argument(
+        "--limit", "-n",
+        default=10, type=int, metavar="N",
+        help="Maximum number of results (default: 10).",
+    )
+
+    # -- distance --
+    p_dist = sub.add_parser(
+        "distance",
+        help="Calculate road distance and travel time.",
+        description=(
+            "Calculate road distance and estimated travel time between two places.\n"
+            "Example: maps_client.py distance New York --to Los Angeles"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p_dist.add_argument(
+        "origin", nargs="+",
+        help="Origin address or place name.",
+    )
+    p_dist.add_argument(
+        "--to", nargs="+", required=True, metavar="DEST",
+        help="Destination address or place name (required).",
+    )
+    p_dist.add_argument(
+        "--mode", "-m",
+        default="driving",
+        choices=list(OSRM_PROFILES.keys()),
+        help="Travel mode (default: driving).",
+    )
+
+    # -- directions --
+    p_dir = sub.add_parser(
+        "directions",
+        help="Get turn-by-turn directions between two places.",
+        description=(
+            "Get step-by-step navigation directions between two places.\n"
+            "Example: maps_client.py directions Paris --to Berlin --mode driving"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p_dir.add_argument(
+        "origin", nargs="+",
+        help="Origin address or place name.",
+    )
+    p_dir.add_argument(
+        "--to", nargs="+", required=True, metavar="DEST",
+        help="Destination address or place name (required).",
+    )
+    p_dir.add_argument(
+        "--mode", "-m",
+        default="driving",
+        choices=list(OSRM_PROFILES.keys()),
+        help="Travel mode (default: driving).",
+    )
+
+    # -- timezone --
+    p_tz = sub.add_parser(
+        "timezone",
+        help="Get timezone information for coordinates.",
+        description="Look up timezone and current local time for a lat/lon coordinate.",
+    )
+    p_tz.add_argument("lat", help="Latitude (decimal degrees).")
+    p_tz.add_argument("lon", help="Longitude (decimal degrees).")
+
+    # -- bbox --
+    p_bbox = sub.add_parser(
+        "bbox",
+        help="Find POIs within a bounding box.",
+        description=(
+            "Search for points of interest within a geographic bounding box.\n"
+            "Tip: use the 'area' command to find bounding boxes for named places.\n"
+            f"Categories: {', '.join(VALID_CATEGORIES)}"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p_bbox.add_argument("lat1", help="First corner latitude.")
+    p_bbox.add_argument("lon1", help="First corner longitude.")
+    p_bbox.add_argument("lat2", help="Second corner latitude.")
+    p_bbox.add_argument("lon2", help="Second corner longitude.")
+    p_bbox.add_argument("category", help="POI category to search for.")
+    p_bbox.add_argument(
+        "--limit", "-n",
+        default=20, type=int, metavar="N",
+        help="Maximum number of results (default: 20).",
+    )
+
+    # -- area --
+    p_area = sub.add_parser(
+        "area",
+        help="Get bounding box and area info for a named place.",
+        description=(
+            "Look up a place by name and return its bounding box, dimensions, "
+            "and approximate area. Useful as input to the 'bbox' command."
+        ),
+    )
+    p_area.add_argument(
+        "place", nargs="+",
+        help="Place name to look up (e.g., 'Manhattan' or 'downtown Seattle').",
+    )
+
+    return parser
+
+
+def main():
+    parser = build_parser()
+    args   = parser.parse_args()
+
+    dispatch = {
+        "search":     cmd_search,
+        "reverse":    cmd_reverse,
+        "nearby":     cmd_nearby,
+        "distance":   cmd_distance,
+        "directions": cmd_directions,
+        "timezone":   cmd_timezone,
+        "bbox":       cmd_bbox,
+        "area":       cmd_area,
+    }
+
+    handler = dispatch.get(args.command)
+    if handler is None:
+        error_exit(f"Unknown command: {args.command}")
+
+    handler(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/skills/social-media/xitter/SKILL.md b/skills/social-media/xitter/SKILL.md
deleted file mode 100644
index 802924dff39..00000000000
--- a/skills/social-media/xitter/SKILL.md
+++ /dev/null
@@ -1,202 +0,0 @@
----
-name: xitter
-description: Interact with X/Twitter via the x-cli terminal client using official X API credentials. Use for posting, reading timelines, searching tweets, liking, retweeting, bookmarks, mentions, and user lookups.
-version: 1.0.0
-author: Siddharth Balyan + Hermes Agent
-license: MIT
-platforms: [linux, macos]
-prerequisites:
-  commands: [uv]
-  env_vars: [X_API_KEY, X_API_SECRET, X_BEARER_TOKEN, X_ACCESS_TOKEN, X_ACCESS_TOKEN_SECRET]
-metadata:
-  hermes:
-    tags: [twitter, x, social-media, x-cli]
-    homepage: https://github.com/Infatoshi/x-cli
----
-
-# Xitter — X/Twitter via x-cli
-
-Use `x-cli` for official X/Twitter API interactions from the terminal.
-
-This skill is for:
-- posting tweets, replies, and quote tweets
-- searching tweets and reading timelines
-- looking up users, followers, and following
-- liking and retweeting
-- checking mentions and bookmarks
-
-This skill intentionally does not vendor a separate CLI implementation into Hermes. Install and use upstream `x-cli` instead.
-
-## Important Cost / Access Note
-
-X API access is not meaningfully free for most real usage. Expect to need paid or prepaid X developer access. If commands fail with permissions or quota errors, check your X developer plan first.
-
-## Install
-
-Install upstream `x-cli` with `uv`:
-
-```bash
-uv tool install git+https://github.com/Infatoshi/x-cli.git
-```
-
-Upgrade later with:
-
-```bash
-uv tool upgrade x-cli
-```
-
-Verify:
-
-```bash
-x-cli --help
-```
-
-## Credentials
-
-You need these five values from the X Developer Portal:
-- `X_API_KEY`
-- `X_API_SECRET`
-- `X_BEARER_TOKEN`
-- `X_ACCESS_TOKEN`
-- `X_ACCESS_TOKEN_SECRET`
-
-Get them from:
-- https://developer.x.com/en/portal/dashboard
-
-### Why does X need 5 secrets?
-
-Unfortunately, the official X API splits auth across both app-level and user-level credentials:
-
-- `X_API_KEY` + `X_API_SECRET` identify your app
-- `X_BEARER_TOKEN` is used for app-level read access
-- `X_ACCESS_TOKEN` + `X_ACCESS_TOKEN_SECRET` let the CLI act as your user account for writes and authenticated actions
-
-So yes — it is a lot of secrets for one integration, but this is the stable official API path and is still preferable to cookie/session scraping.
-
-Setup requirements in the portal:
-1. Create or open your app
-2. In user authentication settings, set permissions to `Read and write`
-3. Generate or regenerate the access token + access token secret after enabling write permissions
-4. Save all five values carefully — missing any one of them will usually produce confusing auth or permission errors
-
-Note: upstream `x-cli` expects the full credential set to be present, so even if you mostly care about read-only commands, it is simplest to configure all five.
-
-## Cost / Friction Reality Check
-
-If this setup feels heavier than it should be, that is because it is. X’s official developer flow is high-friction and often paid. This skill chooses the official API path because it is more stable and maintainable than browser-cookie/session approaches.
-
-If the user wants the least brittle long-term setup, use this skill. If they want a zero-setup or unofficial path, that is a different trade-off and not what this skill is for.
-
-
-## Where to Store Credentials
-
-`x-cli` looks for credentials in `~/.config/x-cli/.env`.
-
-If you already keep your X credentials in `~/.hermes/.env`, the cleanest setup is:
-
-```bash
-mkdir -p ~/.config/x-cli
-ln -sf ~/.hermes/.env ~/.config/x-cli/.env
-```
-
-Or create a dedicated file:
-
-```bash
-mkdir -p ~/.config/x-cli
-cat > ~/.config/x-cli/.env <<'EOF'
-X_API_KEY=your_consumer_key
-X_API_SECRET=your_secret_key
-X_BEARER_TOKEN=your_bearer_token
-X_ACCESS_TOKEN=your_access_token
-X_ACCESS_TOKEN_SECRET=your_access_token_secret
-EOF
-chmod 600 ~/.config/x-cli/.env
-```
-
-## Quick Verification
-
-```bash
-x-cli user get openai
-x-cli tweet search "from:NousResearch" --max 3
-x-cli me mentions --max 5
-```
-
-If reads work but writes fail, regenerate the access token after confirming `Read and write` permissions.
-
-## Common Commands
-
-### Tweets
-
-```bash
-x-cli tweet post "hello world"
-x-cli tweet get https://x.com/user/status/1234567890
-x-cli tweet delete 1234567890
-x-cli tweet reply 1234567890 "nice post"
-x-cli tweet quote 1234567890 "worth reading"
-x-cli tweet search "AI agents" --max 20
-x-cli tweet metrics 1234567890
-```
-
-### Users
-
-```bash
-x-cli user get openai
-x-cli user timeline openai --max 10
-x-cli user followers openai --max 50
-x-cli user following openai --max 50
-```
-
-### Self / Authenticated User
-
-```bash
-x-cli me mentions --max 20
-x-cli me bookmarks --max 20
-x-cli me bookmark 1234567890
-x-cli me unbookmark 1234567890
-```
-
-### Quick Actions
-
-```bash
-x-cli like 1234567890
-x-cli retweet 1234567890
-```
-
-## Output Modes
-
-Use structured output when the agent needs to inspect fields programmatically:
-
-```bash
-x-cli -j tweet search "AI agents" --max 5
-x-cli -p user get openai
-x-cli -md tweet get 1234567890
-x-cli -v -j tweet get 1234567890
-```
-
-Recommended defaults:
-- `-j` for machine-readable output
-- `-v` when you need timestamps, metrics, or metadata
-- plain/default mode for quick human inspection
-
-## Agent Workflow
-
-1. Confirm `x-cli` is installed
-2. Confirm credentials are present
-3. Start with a read command (`user get`, `tweet search`, `me mentions`)
-4. Use `-j` when extracting fields for later steps
-5. Only perform write actions after confirming the target tweet/user and the user's intent
-
-## Pitfalls
-
-- **Paid API access**: many failures are plan/permission problems, not code problems.
-- **403 oauth1-permissions**: regenerate the access token after enabling `Read and write`.
-- **Reply restrictions**: X restricts many programmatic replies. `tweet quote` is often more reliable than `tweet reply`.
-- **Rate limits**: expect per-endpoint limits and cooldown windows.
-- **Credential drift**: if you rotate tokens in `~/.hermes/.env`, make sure `~/.config/x-cli/.env` still points at the current file.
-
-## Notes
-
-- Prefer official API workflows over cookie/session scraping.
-- Use tweet URLs or IDs interchangeably — `x-cli` accepts both.
-- If bookmark behavior changes upstream, check the upstream README first:
-  https://github.com/Infatoshi/x-cli
diff --git a/skills/social-media/xurl/SKILL.md b/skills/social-media/xurl/SKILL.md
new file mode 100644
index 00000000000..2d7a017c9cd
--- /dev/null
+++ b/skills/social-media/xurl/SKILL.md
@@ -0,0 +1,386 @@
+---
+name: xurl
+description: Interact with X/Twitter via xurl, the official X API CLI. Use for posting, replying, quoting, searching, timelines, mentions, likes, reposts, bookmarks, follows, DMs, media upload, and raw v2 endpoint access.
+version: 1.0.0
+author: xdevplatform + openclaw + Hermes Agent
+license: MIT
+platforms: [linux, macos]
+prerequisites:
+  commands: [xurl]
+metadata:
+  hermes:
+    tags: [twitter, x, social-media, xurl, official-api]
+    homepage: https://github.com/xdevplatform/xurl
+    upstream_skill: https://github.com/openclaw/openclaw/blob/main/skills/xurl/SKILL.md
+---
+
+# xurl — X (Twitter) API via the Official CLI
+
+`xurl` is the X developer platform's official CLI for the X API. It supports shortcut commands for common actions AND raw curl-style access to any v2 endpoint. All commands return JSON to stdout.
+
+Use this skill for:
+- posting, replying, quoting, deleting posts
+- searching posts and reading timelines/mentions
+- liking, reposting, bookmarking
+- following, unfollowing, blocking, muting
+- direct messages
+- media uploads (images and video)
+- raw access to any X API v2 endpoint
+- multi-app / multi-account workflows
+
+This skill replaces the older `xitter` skill (which wrapped a third-party Python CLI). `xurl` is maintained by the X developer platform team, supports OAuth 2.0 PKCE with auto-refresh, and covers a substantially larger API surface.
+
+---
+
+## Secret Safety (MANDATORY)
+
+Critical rules when operating inside an agent/LLM session:
+
+- **Never** read, print, parse, summarize, upload, or send `~/.xurl` to LLM context.
+- **Never** ask the user to paste credentials/tokens into chat.
+- The user must fill `~/.xurl` with secrets manually on their own machine.
+- **Never** recommend or execute auth commands with inline secrets in agent sessions.
+- **Never** use `--verbose` / `-v` in agent sessions — it can expose auth headers/tokens.
+- To verify credentials exist, only use: `xurl auth status`.
+
+Forbidden flags in agent commands (they accept inline secrets):
+`--bearer-token`, `--consumer-key`, `--consumer-secret`, `--access-token`, `--token-secret`, `--client-id`, `--client-secret`
+
+App credential registration and credential rotation must be done by the user manually, outside the agent session. After credentials are registered, the user authenticates with `xurl auth oauth2` — also outside the agent session. Tokens persist to `~/.xurl` in YAML. Each app has isolated tokens. OAuth 2.0 tokens auto-refresh.
+
+---
+
+## Installation
+
+Pick ONE method. On Linux, the shell script or `go install` are the easiest.
+
+```bash
+# Shell script (installs to ~/.local/bin, no sudo, works on Linux + macOS)
+curl -fsSL https://raw.githubusercontent.com/xdevplatform/xurl/main/install.sh | bash
+
+# Homebrew (macOS)
+brew install --cask xdevplatform/tap/xurl
+
+# npm
+npm install -g @xdevplatform/xurl
+
+# Go
+go install github.com/xdevplatform/xurl@latest
+```
+
+Verify:
+
+```bash
+xurl --help
+xurl auth status
+```
+
+If `xurl` is installed but `auth status` shows no apps or tokens, the user needs to complete auth manually — see the next section.
+
+---
+
+## One-Time User Setup (user runs these outside the agent)
+
+These steps must be performed by the user directly, NOT by the agent, because they involve pasting secrets. Direct the user to this block; do not execute it for them.
+
+1. Create or open an app at https://developer.x.com/en/portal/dashboard
+2. Set the redirect URI to `http://localhost:8080/callback`
+3. Copy the app's Client ID and Client Secret
+4. Register the app locally (user runs this):
+   ```bash
+   xurl auth apps add my-app --client-id YOUR_CLIENT_ID --client-secret YOUR_CLIENT_SECRET
+   ```
+5. Authenticate:
+   ```bash
+   xurl auth oauth2
+   ```
+   (This opens a browser for the OAuth 2.0 PKCE flow.)
+6. Verify:
+   ```bash
+   xurl auth status
+   xurl whoami
+   ```
+
+After this, the agent can use any command below without further setup. OAuth 2.0 tokens auto-refresh.
+
+---
+
+## Quick Reference
+
+| Action | Command |
+| --- | --- |
+| Post | `xurl post "Hello world!"` |
+| Reply | `xurl reply POST_ID "Nice post!"` |
+| Quote | `xurl quote POST_ID "My take"` |
+| Delete a post | `xurl delete POST_ID` |
+| Read a post | `xurl read POST_ID` |
+| Search posts | `xurl search "QUERY" -n 10` |
+| Who am I | `xurl whoami` |
+| Look up a user | `xurl user @handle` |
+| Home timeline | `xurl timeline -n 20` |
+| Mentions | `xurl mentions -n 10` |
+| Like / Unlike | `xurl like POST_ID` / `xurl unlike POST_ID` |
+| Repost / Undo | `xurl repost POST_ID` / `xurl unrepost POST_ID` |
+| Bookmark / Remove | `xurl bookmark POST_ID` / `xurl unbookmark POST_ID` |
+| List bookmarks / likes | `xurl bookmarks -n 10` / `xurl likes -n 10` |
+| Follow / Unfollow | `xurl follow @handle` / `xurl unfollow @handle` |
+| Following / Followers | `xurl following -n 20` / `xurl followers -n 20` |
+| Block / Unblock | `xurl block @handle` / `xurl unblock @handle` |
+| Mute / Unmute | `xurl mute @handle` / `xurl unmute @handle` |
+| Send DM | `xurl dm @handle "message"` |
+| List DMs | `xurl dms -n 10` |
+| Upload media | `xurl media upload path/to/file.mp4` |
+| Media status | `xurl media status MEDIA_ID` |
+| List apps | `xurl auth apps list` |
+| Remove app | `xurl auth apps remove NAME` |
+| Set default app | `xurl auth default APP_NAME [USERNAME]` |
+| Per-request app | `xurl --app NAME /2/users/me` |
+| Auth status | `xurl auth status` |
+
+Notes:
+- `POST_ID` accepts full URLs too (e.g. `https://x.com/user/status/1234567890`) — xurl extracts the ID.
+- Usernames work with or without a leading `@`.
+
+---
+
+## Command Details
+
+### Posting
+
+```bash
+xurl post "Hello world!"
+xurl post "Check this out" --media-id MEDIA_ID
+xurl post "Thread pics" --media-id 111 --media-id 222
+
+xurl reply 1234567890 "Great point!"
+xurl reply https://x.com/user/status/1234567890 "Agreed!"
+xurl reply 1234567890 "Look at this" --media-id MEDIA_ID
+
+xurl quote 1234567890 "Adding my thoughts"
+xurl delete 1234567890
+```
+
+### Reading & Search
+
+```bash
+xurl read 1234567890
+xurl read https://x.com/user/status/1234567890
+
+xurl search "golang"
+xurl search "from:elonmusk" -n 20
+xurl search "#buildinpublic lang:en" -n 15
+```
+
+### Users, Timeline, Mentions
+
+```bash
+xurl whoami
+xurl user elonmusk
+xurl user @XDevelopers
+
+xurl timeline -n 25
+xurl mentions -n 20
+```
+
+### Engagement
+
+```bash
+xurl like 1234567890
+xurl unlike 1234567890
+
+xurl repost 1234567890
+xurl unrepost 1234567890
+
+xurl bookmark 1234567890
+xurl unbookmark 1234567890
+
+xurl bookmarks -n 20
+xurl likes -n 20
+```
+
+### Social Graph
+
+```bash
+xurl follow @XDevelopers
+xurl unfollow @XDevelopers
+
+xurl following -n 50
+xurl followers -n 50
+
+# Another user's graph
+xurl following --of elonmusk -n 20
+xurl followers --of elonmusk -n 20
+
+xurl block @spammer
+xurl unblock @spammer
+xurl mute @annoying
+xurl unmute @annoying
+```
+
+### Direct Messages
+
+```bash
+xurl dm @someuser "Hey, saw your post!"
+xurl dms -n 25
+```
+
+### Media Upload
+
+```bash
+# Auto-detect type
+xurl media upload photo.jpg
+xurl media upload video.mp4
+
+# Explicit type/category
+xurl media upload --media-type image/jpeg --category tweet_image photo.jpg
+
+# Videos need server-side processing — check status (or poll)
+xurl media status MEDIA_ID
+xurl media status --wait MEDIA_ID
+
+# Full workflow
+xurl media upload meme.png                  # returns media id
+xurl post "lol" --media-id MEDIA_ID
+```
+
+---
+
+## Raw API Access
+
+The shortcuts cover common operations. For anything else, use raw curl-style mode against any X API v2 endpoint:
+
+```bash
+# GET
+xurl /2/users/me
+
+# POST with JSON body
+xurl -X POST /2/tweets -d '{"text":"Hello world!"}'
+
+# DELETE / PUT / PATCH
+xurl -X DELETE /2/tweets/1234567890
+
+# Custom headers
+xurl -H "Content-Type: application/json" /2/some/endpoint
+
+# Force streaming
+xurl -s /2/tweets/search/stream
+
+# Full URLs also work
+xurl https://api.x.com/2/users/me
+```
+
+---
+
+## Global Flags
+
+| Flag | Short | Description |
+| --- | --- | --- |
+| `--app` | | Use a specific registered app (overrides default) |
+| `--auth` | | Force auth type: `oauth1`, `oauth2`, or `app` |
+| `--username` | `-u` | Which OAuth2 account to use (if multiple exist) |
+| `--verbose` | `-v` | **Forbidden in agent sessions** — leaks auth headers |
+| `--trace` | `-t` | Add `X-B3-Flags: 1` trace header |
+
+---
+
+## Streaming
+
+Streaming endpoints are auto-detected. Known ones include:
+
+- `/2/tweets/search/stream`
+- `/2/tweets/sample/stream`
+- `/2/tweets/sample10/stream`
+
+Force streaming on any endpoint with `-s`.
+
+---
+
+## Output Format
+
+All commands return JSON to stdout. Structure mirrors X API v2:
+
+```json
+{ "data": { "id": "1234567890", "text": "Hello world!" } }
+```
+
+Errors are also JSON:
+
+```json
+{ "errors": [ { "message": "Not authorized", "code": 403 } ] }
+```
+
+---
+
+## Common Workflows
+
+### Post with an image
+```bash
+xurl media upload photo.jpg
+xurl post "Check out this photo!" --media-id MEDIA_ID
+```
+
+### Reply to a conversation
+```bash
+xurl read https://x.com/user/status/1234567890
+xurl reply 1234567890 "Here are my thoughts..."
+```
+
+### Search and engage
+```bash
+xurl search "topic of interest" -n 10
+xurl like POST_ID_FROM_RESULTS
+xurl reply POST_ID_FROM_RESULTS "Great point!"
+```
+
+### Check your activity
+```bash
+xurl whoami
+xurl mentions -n 20
+xurl timeline -n 20
+```
+
+### Multiple apps (credentials pre-configured manually)
+```bash
+xurl auth default prod alice               # prod app, alice user
+xurl --app staging /2/users/me             # one-off against staging
+```
+
+---
+
+## Error Handling
+
+- Non-zero exit code on any error.
+- API errors are still printed as JSON to stdout, so you can parse them.
+- Auth errors → have the user re-run `xurl auth oauth2` outside the agent session.
+- Commands that need the caller's user ID (like, repost, bookmark, follow, etc.) will auto-fetch it via `/2/users/me`. An auth failure there surfaces as an auth error.
+
+---
+
+## Agent Workflow
+
+1. Verify prerequisites: `xurl --help` and `xurl auth status`.
+2. If auth is missing, stop and direct the user to the "One-Time User Setup" section — do NOT attempt to register apps or pass secrets yourself.
+3. Start with a cheap read (`xurl whoami`, `xurl user @handle`, `xurl search ... -n 3`) to confirm reachability.
+4. Confirm the target post/user and the user's intent before any write action (post, reply, like, repost, DM, follow, block, delete).
+5. Use JSON output directly — every response is already structured.
+6. Never paste `~/.xurl` contents back into the conversation.
+
+---
+
+## Notes
+
+- **Rate limits:** X enforces per-endpoint rate limits. A 429 means wait and retry. Write endpoints (post, reply, like, repost) have tighter limits than reads.
+- **Scopes:** OAuth 2.0 tokens use broad scopes. A 403 on a specific action usually means the token is missing a scope — have the user re-run `xurl auth oauth2`.
+- **Token refresh:** OAuth 2.0 tokens auto-refresh. Nothing to do.
+- **Multiple apps:** Each app has isolated credentials/tokens. Switch with `xurl auth default` or `--app`.
+- **Multiple accounts per app:** Select with `-u / --username`, or set a default with `xurl auth default APP USER`.
+- **Token storage:** `~/.xurl` is YAML. Never read or send this file to LLM context.
+- **Cost:** X API access is typically paid for meaningful usage. Many failures are plan/permission problems, not code problems.
+
+---
+
+## Attribution
+
+- Upstream CLI: https://github.com/xdevplatform/xurl (X developer platform team, Chris Park et al.)
+- Upstream agent skill: https://github.com/openclaw/openclaw/blob/main/skills/xurl/SKILL.md
+- Hermes adaptation: reformatted for Hermes skill conventions; safety guardrails preserved verbatim.
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 1778855ddd7..aea8152a53e 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -697,7 +697,12 @@ class TestIsConnectionError:
 
 
 class TestKimiForCodingTemperature:
-    """kimi-for-coding now requires temperature=0.6 exactly."""
+    """Moonshot kimi-for-coding models require fixed temperatures.
+
+    k2.5 / k2-turbo-preview / k2-0905-preview → 0.6 (non-thinking lock).
+    k2-thinking / k2-thinking-turbo → 1.0 (thinking lock).
+    kimi-k2-instruct* and every other model preserve the caller's temperature.
+    """
 
     def test_build_call_kwargs_forces_fixed_temperature(self):
         from agent.auxiliary_client import _build_call_kwargs
@@ -772,12 +777,55 @@ class TestKimiForCodingTemperature:
         assert kwargs["model"] == "kimi-for-coding"
         assert kwargs["temperature"] == 0.6
 
-    def test_non_kimi_model_still_preserves_temperature(self):
+    @pytest.mark.parametrize(
+        "model,expected",
+        [
+            ("kimi-k2.5", 0.6),
+            ("kimi-k2-turbo-preview", 0.6),
+            ("kimi-k2-0905-preview", 0.6),
+            ("kimi-k2-thinking", 1.0),
+            ("kimi-k2-thinking-turbo", 1.0),
+            ("moonshotai/kimi-k2.5", 0.6),
+            ("moonshotai/Kimi-K2-Thinking", 1.0),
+        ],
+    )
+    def test_kimi_k2_family_temperature_override(self, model, expected):
+        """Moonshot kimi-k2.* models only accept fixed temperatures.
+
+        Non-thinking models → 0.6, thinking-mode models → 1.0.
+        """
         from agent.auxiliary_client import _build_call_kwargs
 
         kwargs = _build_call_kwargs(
             provider="kimi-coding",
-            model="kimi-k2.5",
+            model=model,
+            messages=[{"role": "user", "content": "hello"}],
+            temperature=0.3,
+        )
+
+        assert kwargs["temperature"] == expected
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "anthropic/claude-sonnet-4-6",
+            "gpt-5.4",
+            # kimi-k2-instruct is the non-coding K2 family — temperature is
+            # variable (recommended 0.6 but not enforced).  Must not clamp.
+            "kimi-k2-instruct",
+            "moonshotai/Kimi-K2-Instruct",
+            "moonshotai/Kimi-K2-Instruct-0905",
+            "kimi-k2-instruct-0905",
+            # Hypothetical future kimi name not in the whitelist.
+            "kimi-k2-experimental",
+        ],
+    )
+    def test_non_restricted_model_preserves_temperature(self, model):
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider="openrouter",
+            model=model,
             messages=[{"role": "user", "content": "hello"}],
             temperature=0.3,
         )
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 6164d812f6b..0c20dddcd7c 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -781,3 +781,127 @@ class TestTokenBudgetTailProtection:
         # Tool at index 2 is outside the protected tail (last 3 = indices 2,3,4)
         # so it might or might not be pruned depending on boundary
         assert isinstance(pruned, int)
+
+
+class TestTruncateToolCallArgsJson:
+    """Regression tests for #11762.
+
+    The previous implementation produced invalid JSON by slicing
+    ``function.arguments`` mid-string, which caused non-retryable 400s from
+    strict providers (observed on MiniMax) and stuck long sessions in a
+    re-send loop. The helper here must always emit parseable JSON whose
+    shape matches the original — shrunken, not corrupted.
+    """
+
+    def _helper(self):
+        from agent.context_compressor import _truncate_tool_call_args_json
+        return _truncate_tool_call_args_json
+
+    def test_shrunken_args_remain_valid_json(self):
+        import json as _json
+        shrink = self._helper()
+        original = _json.dumps({
+            "path": "~/.hermes/skills/shopping/browser-setup-notes.md",
+            "content": "# Shopping Browser Setup Notes\n\n" + "abc " * 400,
+        })
+        assert len(original) > 500
+        shrunk = shrink(original)
+        parsed = _json.loads(shrunk)  # must not raise
+        assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
+        assert parsed["content"].endswith("...[truncated]")
+        assert len(shrunk) < len(original)
+
+    def test_non_json_arguments_pass_through(self):
+        shrink = self._helper()
+        not_json = "this is not json at all, " * 50
+        assert shrink(not_json) == not_json
+
+    def test_short_string_leaves_unchanged(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps({"command": "ls -la", "cwd": "/tmp"})
+        assert _json.loads(shrink(payload)) == {"command": "ls -la", "cwd": "/tmp"}
+
+    def test_nested_structures_are_walked(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps({
+            "messages": [
+                {"role": "user", "content": "x" * 500},
+                {"role": "assistant", "content": "ok"},
+            ],
+            "meta": {"note": "y" * 500},
+        })
+        parsed = _json.loads(shrink(payload))
+        assert parsed["messages"][0]["content"].endswith("...[truncated]")
+        assert parsed["messages"][1]["content"] == "ok"
+        assert parsed["meta"]["note"].endswith("...[truncated]")
+
+    def test_non_string_leaves_preserved(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps({
+            "retries": 3,
+            "enabled": True,
+            "timeout": None,
+            "items": [1, 2, 3],
+            "note": "z" * 500,
+        })
+        parsed = _json.loads(shrink(payload))
+        assert parsed["retries"] == 3
+        assert parsed["enabled"] is True
+        assert parsed["timeout"] is None
+        assert parsed["items"] == [1, 2, 3]
+        assert parsed["note"].endswith("...[truncated]")
+
+    def test_scalar_json_string_gets_shrunk(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps("q" * 500)
+        parsed = _json.loads(shrink(payload))
+        assert isinstance(parsed, str)
+        assert parsed.endswith("...[truncated]")
+
+    def test_unicode_preserved(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps({"content": "非德满" + ("a" * 500)})
+        out = shrink(payload)
+        # ensure_ascii=False keeps CJK intact rather than emitting \uXXXX
+        assert "非德满" in out
+
+    def test_pass3_emits_valid_json_for_downstream_provider(self):
+        """End-to-end: Pass 3 must never produce the exact failure payload
+        that caused the 400 loop (unterminated string, missing brace)."""
+        import json as _json
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(
+                model="test/model",
+                threshold_percent=0.85,
+                protect_first_n=1,
+                protect_last_n=1,
+                quiet_mode=True,
+            )
+        huge_content = "# Shopping Browser Setup Notes\n\n## Overview\n" + "x " * 400
+        args_payload = _json.dumps({
+            "path": "~/.hermes/skills/shopping/browser-setup-notes.md",
+            "content": huge_content,
+        })
+        assert len(args_payload) > 500  # triggers the Pass-3 shrink
+        messages = [
+            {"role": "user", "content": "please write two files"},
+            {"role": "assistant", "content": None, "tool_calls": [
+                {"id": "call_1", "type": "function",
+                 "function": {"name": "write_file", "arguments": args_payload}},
+            ]},
+            {"role": "tool", "tool_call_id": "call_1",
+             "content": '{"bytes_written": 727}'},
+            {"role": "user", "content": "ok"},
+            {"role": "assistant", "content": "done"},
+        ]
+        result, _ = c._prune_old_tool_results(messages, protect_tail_count=2)
+        shrunk = result[1]["tool_calls"][0]["function"]["arguments"]
+        # Must parse — otherwise downstream provider returns 400
+        parsed = _json.loads(shrunk)
+        assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
+        assert parsed["content"].endswith("...[truncated]")
diff --git a/tests/agent/test_memory_provider.py b/tests/agent/test_memory_provider.py
index 9301960b717..5cd0d8ab413 100644
--- a/tests/agent/test_memory_provider.py
+++ b/tests/agent/test_memory_provider.py
@@ -971,8 +971,6 @@ class TestHonchoCadenceTracking:
         class FakeManager:
             def prefetch_context(self, key, query=None):
                 pass
-            def prefetch_dialectic(self, key, query):
-                pass
 
         p._manager = FakeManager()
 
diff --git a/tests/agent/test_memory_user_id.py b/tests/agent/test_memory_user_id.py
index c1b82208d0e..d33753bd2e1 100644
--- a/tests/agent/test_memory_user_id.py
+++ b/tests/agent/test_memory_user_id.py
@@ -208,34 +208,81 @@ class TestMem0UserIdScoping:
 
 
 class TestHonchoUserIdScoping:
-    """Verify Honcho plugin uses gateway user_id for peer_name when provided."""
+    """Verify Honcho plugin keeps runtime user scoping separate from config peer_name."""
 
-    def test_gateway_user_id_overrides_peer_name(self):
-        """When user_id is in kwargs and no explicit peer_name, user_id should be used."""
+    def test_gateway_user_id_is_passed_as_runtime_peer(self):
+        """Gateway user_id should scope Honcho sessions without mutating config peer_name."""
         from plugins.memory.honcho import HonchoMemoryProvider
 
         provider = HonchoMemoryProvider()
 
-        # Create a mock config with NO explicit peer_name
         mock_cfg = MagicMock()
         mock_cfg.enabled = True
         mock_cfg.api_key = "test-key"
         mock_cfg.base_url = None
-        mock_cfg.peer_name = ""  # No explicit peer_name — user_id should fill it
-        mock_cfg.recall_mode = "tools"  # Use tools mode to defer session init
+        mock_cfg.peer_name = "static-user"
+        mock_cfg.recall_mode = "context"
+        mock_cfg.context_tokens = None
+        mock_cfg.raw = {}
+        mock_cfg.dialectic_depth = 1
+        mock_cfg.dialectic_depth_levels = None
+        mock_cfg.init_on_session_start = False
+        mock_cfg.ai_peer = "hermes"
+        mock_cfg.resolve_session_name.return_value = "test-sess"
+        mock_cfg.session_strategy = "shared"
 
         with patch(
             "plugins.memory.honcho.client.HonchoClientConfig.from_global_config",
             return_value=mock_cfg,
-        ):
+        ), patch(
+            "plugins.memory.honcho.client.get_honcho_client",
+            return_value=MagicMock(),
+        ), patch(
+            "plugins.memory.honcho.session.HonchoSessionManager",
+        ) as mock_manager_cls:
+            mock_manager = MagicMock()
+            mock_manager.get_or_create.return_value = MagicMock(messages=[])
+            mock_manager_cls.return_value = mock_manager
             provider.initialize(
                 session_id="test-sess",
                 user_id="discord_user_789",
                 platform="discord",
             )
 
-        # The config's peer_name should have been overridden with the user_id
-        assert mock_cfg.peer_name == "discord_user_789"
+        assert mock_cfg.peer_name == "static-user"
+        assert mock_manager_cls.call_args.kwargs["runtime_user_peer_name"] == "discord_user_789"
+
+    def test_session_manager_prefers_runtime_user_id_over_config_peer_name(self):
+        """Session manager should isolate gateway users even when config peer_name is static."""
+        from plugins.memory.honcho.session import HonchoSessionManager
+
+        mock_cfg = MagicMock()
+        mock_cfg.peer_name = "static-user"
+        mock_cfg.ai_peer = "hermes"
+        mock_cfg.write_frequency = "sync"
+        mock_cfg.dialectic_reasoning_level = "low"
+        mock_cfg.dialectic_dynamic = True
+        mock_cfg.dialectic_max_chars = 600
+        mock_cfg.observation_mode = "directional"
+        mock_cfg.user_observe_me = True
+        mock_cfg.user_observe_others = True
+        mock_cfg.ai_observe_me = True
+        mock_cfg.ai_observe_others = True
+
+        manager = HonchoSessionManager(
+            honcho=MagicMock(),
+            config=mock_cfg,
+            runtime_user_peer_name="discord_user_789",
+        )
+
+        with patch.object(manager, "_get_or_create_peer", return_value=MagicMock()), patch.object(
+            manager,
+            "_get_or_create_honcho_session",
+            return_value=(MagicMock(), []),
+        ):
+            session = manager.get_or_create("discord:channel-1")
+
+        assert session.user_peer_id == "discord_user_789"
 
     def test_no_user_id_preserves_config_peer_name(self):
         """Without user_id, the config peer_name should be preserved."""
diff --git a/tests/agent/test_subagent_progress.py b/tests/agent/test_subagent_progress.py
index 99375d6bd6a..88b2e379026 100644
--- a/tests/agent/test_subagent_progress.py
+++ b/tests/agent/test_subagent_progress.py
@@ -79,7 +79,7 @@ class TestBuildChildProgressCallback:
         parent._delegate_spinner = None
         parent.tool_progress_callback = None
         
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
         assert cb is None
 
     def test_cli_spinner_tool_event(self):
@@ -93,7 +93,7 @@ class TestBuildChildProgressCallback:
         parent._delegate_spinner = spinner
         parent.tool_progress_callback = None
         
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
         assert cb is not None
         
         cb("tool.started", "web_search", "quantum computing", {})
@@ -113,7 +113,7 @@ class TestBuildChildProgressCallback:
         parent._delegate_spinner = spinner
         parent.tool_progress_callback = None
         
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
         cb("_thinking", "I'll search for papers first")
         
         output = buf.getvalue()
@@ -121,54 +121,64 @@ class TestBuildChildProgressCallback:
         assert "search for papers" in output
 
     def test_gateway_batched_progress(self):
-        """Gateway path should batch tool calls and flush at BATCH_SIZE."""
+        """Gateway path: each tool.started relays a subagent.tool event, and a
+        subagent.progress summary fires once BATCH_SIZE tools accumulate."""
         parent = MagicMock()
         parent._delegate_spinner = None
         parent_cb = MagicMock()
         parent.tool_progress_callback = parent_cb
-        
-        cb = _build_child_progress_callback(0, parent)
-        
-        # Send 4 tool calls — shouldn't flush yet (BATCH_SIZE = 5)
+
+        cb = _build_child_progress_callback(0, "test goal", parent)
+
+        # Each tool.started relays a subagent.tool event immediately (per-tool relay).
         for i in range(4):
             cb("tool.started", f"tool_{i}", f"arg_{i}", {})
-        parent_cb.assert_not_called()
-        
-        # 5th call should trigger flush
-        cb("tool.started", "tool_4", "arg_4", {})
-        parent_cb.assert_called_once()
-        call_args = parent_cb.call_args
-        assert "tool_0" in call_args[0][1]
-        assert "tool_4" in call_args[0][1]
+        # 4 per-tool relays so far, no batch summary yet (BATCH_SIZE=5)
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events == ["subagent.tool"] * 4
 
-    def test_thinking_not_relayed_to_gateway(self):
-        """Thinking events should NOT be sent to gateway (too noisy)."""
+        # 5th call triggers another per-tool relay PLUS the batch-size summary
+        cb("tool.started", "tool_4", "arg_4", {})
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events == ["subagent.tool"] * 5 + ["subagent.progress"]
+        summary_call = parent_cb.call_args_list[-1]
+        summary_text = summary_call.kwargs.get("preview") or summary_call.args[2]
+        assert "tool_0" in summary_text
+        assert "tool_4" in summary_text
+
+    def test_thinking_relayed_to_gateway(self):
+        """Thinking events are relayed as subagent.thinking events."""
         parent = MagicMock()
         parent._delegate_spinner = None
         parent_cb = MagicMock()
         parent.tool_progress_callback = parent_cb
-        
-        cb = _build_child_progress_callback(0, parent)
+
+        cb = _build_child_progress_callback(0, "test goal", parent)
         cb("_thinking", "some reasoning text")
-        
-        parent_cb.assert_not_called()
+
+        parent_cb.assert_called_once()
+        assert parent_cb.call_args.args[0] == "subagent.thinking"
+        assert parent_cb.call_args.args[2] == "some reasoning text"
 
     def test_parallel_callbacks_independent(self):
-        """Each child's callback should have independent batch state."""
+        """Each child's callback batches tool names independently."""
         parent = MagicMock()
         parent._delegate_spinner = None
         parent_cb = MagicMock()
         parent.tool_progress_callback = parent_cb
-        
-        cb0 = _build_child_progress_callback(0, parent)
-        cb1 = _build_child_progress_callback(1, parent)
-        
-        # Send 3 calls to each — neither should flush (batch size = 5)
+
+        cb0 = _build_child_progress_callback(0, "goal a", parent)
+        cb1 = _build_child_progress_callback(1, "goal b", parent)
+
+        # 3 tool.started per child = 6 per-tool relays; neither should hit
+        # the batch-size summary (batch size = 5, counted per-child).
         for i in range(3):
-            cb0(f"tool_{i}")
-            cb1(f"other_{i}")
-        
-        parent_cb.assert_not_called()
+            cb0("tool.started", f"tool_{i}", f"a_{i}", {})
+            cb1("tool.started", f"other_{i}", f"b_{i}", {})
+
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events.count("subagent.tool") == 6
+        assert "subagent.progress" not in events
 
     def test_task_index_prefix_in_batch_mode(self):
         """Batch mode (task_count > 1) should show 1-indexed prefix for all tasks."""
@@ -182,7 +192,7 @@ class TestBuildChildProgressCallback:
         parent.tool_progress_callback = None
         
         # task_index=0 in a batch of 3 → prefix "[1]"
-        cb0 = _build_child_progress_callback(0, parent, task_count=3)
+        cb0 = _build_child_progress_callback(0, "test goal", parent, task_count=3)
         cb0("web_search", "test")
         output = buf.getvalue()
         assert "[1]" in output
@@ -190,7 +200,7 @@ class TestBuildChildProgressCallback:
         # task_index=2 in a batch of 3 → prefix "[3]"
         buf.truncate(0)
         buf.seek(0)
-        cb2 = _build_child_progress_callback(2, parent, task_count=3)
+        cb2 = _build_child_progress_callback(2, "test goal", parent, task_count=3)
         cb2("web_search", "test")
         output = buf.getvalue()
         assert "[3]" in output
@@ -206,7 +216,7 @@ class TestBuildChildProgressCallback:
         parent._delegate_spinner = spinner
         parent.tool_progress_callback = None
         
-        cb = _build_child_progress_callback(0, parent, task_count=1)
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
         cb("tool.started", "web_search", "test", {})
         
         output = buf.getvalue()
@@ -321,26 +331,31 @@ class TestBatchFlush:
     """Tests for gateway batch flush on subagent completion."""
 
     def test_flush_sends_remaining_batch(self):
-        """_flush should send remaining tool names to gateway."""
+        """_flush should send a final subagent.progress summary of any unsent
+        tool names in the batch (less than BATCH_SIZE)."""
         parent = MagicMock()
         parent._delegate_spinner = None
         parent_cb = MagicMock()
         parent.tool_progress_callback = parent_cb
 
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
 
-        # Send 3 tools (below batch size of 5)
+        # Send 3 tools (below batch size of 5) — each relays subagent.tool
         cb("tool.started", "web_search", "query1", {})
         cb("tool.started", "read_file", "file.txt", {})
         cb("tool.started", "write_file", "out.txt", {})
-        parent_cb.assert_not_called()
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events == ["subagent.tool"] * 3  # per-tool relays so far
+        assert "subagent.progress" not in events  # no batch-size summary yet
 
-        # Flush should send the remaining 3
+        # Flush should send the remaining 3 as a summary
         cb._flush()
-        parent_cb.assert_called_once()
-        summary = parent_cb.call_args[0][1]
-        assert "web_search" in summary
-        assert "write_file" in summary
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events[-1] == "subagent.progress"
+        summary_call = parent_cb.call_args_list[-1]
+        summary_text = summary_call.kwargs.get("preview") or summary_call.args[2]
+        assert "web_search" in summary_text
+        assert "write_file" in summary_text
 
     def test_flush_noop_when_batch_empty(self):
         """_flush should not send anything when batch is empty."""
@@ -349,7 +364,7 @@ class TestBatchFlush:
         parent_cb = MagicMock()
         parent.tool_progress_callback = parent_cb
 
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
         cb._flush()
         parent_cb.assert_not_called()
 
@@ -364,7 +379,7 @@ class TestBatchFlush:
         parent._delegate_spinner = spinner
         parent.tool_progress_callback = None
 
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
         cb("tool.started", "web_search", "test", {})
         cb._flush()  # Should not crash
 
diff --git a/tests/cli/test_cli_status_bar.py b/tests/cli/test_cli_status_bar.py
index eabcd0f9624..4a65c6e4673 100644
--- a/tests/cli/test_cli_status_bar.py
+++ b/tests/cli/test_cli_status_bar.py
@@ -237,6 +237,13 @@ class TestCLIStatusBar:
         cli_obj._spinner_text = ""
         assert cli_obj._spinner_widget_height(width=90) == 0
 
+    def test_spinner_height_uses_display_width_for_wide_characters(self):
+        cli_obj = _make_cli()
+        cli_obj._spinner_text = "你" * 40
+        cli_obj._tool_start_time = 0
+
+        assert cli_obj._spinner_widget_height(width=64) == 2
+
     def test_voice_status_bar_compacts_on_narrow_terminals(self):
         cli_obj = _make_cli()
         cli_obj._voice_mode = True
diff --git a/tests/cli/test_gquota_command.py b/tests/cli/test_gquota_command.py
new file mode 100644
index 00000000000..0740e001262
--- /dev/null
+++ b/tests/cli/test_gquota_command.py
@@ -0,0 +1,21 @@
+from unittest.mock import MagicMock, patch
+
+
+def test_gquota_uses_chat_console_when_tui_is_live():
+    from agent.google_oauth import GoogleOAuthError
+    from cli import HermesCLI
+
+    cli = HermesCLI.__new__(HermesCLI)
+    cli.console = MagicMock()
+    cli._app = object()
+
+    live_console = MagicMock()
+
+    with patch("cli.ChatConsole", return_value=live_console), \
+         patch("agent.google_oauth.get_valid_access_token", side_effect=GoogleOAuthError("No Google OAuth credentials found")), \
+         patch("agent.google_oauth.load_credentials", return_value=None), \
+         patch("agent.google_code_assist.retrieve_user_quota"):
+        cli._handle_gquota_command("/gquota")
+
+    assert live_console.print.call_count == 2
+    cli.console.print.assert_not_called()
diff --git a/tests/cli/test_quick_commands.py b/tests/cli/test_quick_commands.py
index 7a89d4ca28a..1c94cb1b025 100644
--- a/tests/cli/test_quick_commands.py
+++ b/tests/cli/test_quick_commands.py
@@ -33,6 +33,20 @@ class TestCLIQuickCommands:
         printed = self._printed_plain(cli.console.print.call_args[0][0])
         assert printed == "daily-note"
 
+    def test_exec_command_uses_chat_console_when_tui_is_live(self):
+        cli = self._make_cli({"dn": {"type": "exec", "command": "echo daily-note"}})
+        cli._app = object()
+        live_console = MagicMock()
+
+        with patch("cli.ChatConsole", return_value=live_console):
+            result = cli.process_command("/dn")
+
+        assert result is True
+        live_console.print.assert_called_once()
+        printed = self._printed_plain(live_console.print.call_args[0][0])
+        assert printed == "daily-note"
+        cli.console.print.assert_not_called()
+
     def test_exec_command_stderr_shown_on_no_stdout(self):
         cli = self._make_cli({"err": {"type": "exec", "command": "echo error >&2"}})
         result = cli.process_command("/err")
diff --git a/tests/cli/test_reasoning_command.py b/tests/cli/test_reasoning_command.py
index 554cb6f96bc..228d2904b16 100644
--- a/tests/cli/test_reasoning_command.py
+++ b/tests/cli/test_reasoning_command.py
@@ -473,6 +473,7 @@ class TestInlineThinkBlockExtraction(unittest.TestCase):
         agent.verbose_logging = False
         agent.reasoning_callback = None
         agent.stream_delta_callback = None  # non-streaming by default
+        agent._stream_callback = None  # non-streaming by default
         return agent
 
     def test_single_think_block_extracted(self):
@@ -619,6 +620,7 @@ class TestReasoningDeltasFiredFlag(unittest.TestCase):
         agent = AIAgent.__new__(AIAgent)
         agent.reasoning_callback = None
         agent.stream_delta_callback = None
+        agent._stream_callback = None
         agent.verbose_logging = False
         return agent
 
diff --git a/tests/cli/test_resume_display.py b/tests/cli/test_resume_display.py
index d183e48b2bc..bb931bb1fea 100644
--- a/tests/cli/test_resume_display.py
+++ b/tests/cli/test_resume_display.py
@@ -344,6 +344,127 @@ class TestDisplayResumedHistory:
         assert "Just thinking" not in output
         assert "Hi there!" in output
 
+    def test_think_tags_stripped(self):
+        """<think>...</think> blocks should be stripped from display (#11316)."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Solve this"},
+            {
+                "role": "assistant",
+                "content": "<think>\nI need to reason carefully here.\n</think>\n\nThe answer is 7.",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<think>" not in output
+        assert "</think>" not in output
+        assert "I need to reason carefully here" not in output
+        assert "The answer is 7" in output
+
+    def test_thinking_tags_stripped(self):
+        """<thinking>...</thinking> blocks should be stripped from display."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "What is 2+2?"},
+            {
+                "role": "assistant",
+                "content": "<thinking>\nLet me compute: 2 + 2 = 4\n</thinking>\n\nThe answer is 4.",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<thinking>" not in output
+        assert "Let me compute" not in output
+        assert "The answer is 4" in output
+
+    def test_reasoning_tags_stripped(self):
+        """<reasoning>...</reasoning> blocks should be stripped from display."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Explain gravity"},
+            {
+                "role": "assistant",
+                "content": (
+                    "<reasoning>\nGravity is a fundamental force...\n</reasoning>\n\n"
+                    "Gravity pulls objects together."
+                ),
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<reasoning>" not in output
+        assert "fundamental force" not in output
+        assert "Gravity pulls objects together" in output
+
+    def test_thought_tags_stripped(self):
+        """<thought>...</thought> blocks (Gemma 4) should be stripped."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Say hello"},
+            {
+                "role": "assistant",
+                "content": "<thought>\nInternal thought here.\n</thought>\n\nHello!",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<thought>" not in output
+        assert "Internal thought here" not in output
+        assert "Hello!" in output
+
+    def test_unclosed_think_tag_stripped(self):
+        """Unclosed <think> (truncated generation) should not leak reasoning."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Truncated response"},
+            {
+                "role": "assistant",
+                "content": "Some text before.\n<think>\nUnfinished reasoning...",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<think>" not in output
+        assert "Unfinished reasoning" not in output
+        assert "Some text before" in output
+
+    def test_multiple_reasoning_blocks_all_stripped(self):
+        """Multiple interleaved reasoning blocks are all stripped."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Complex question"},
+            {
+                "role": "assistant",
+                "content": (
+                    "<think>\nFirst thought.\n</think>\n"
+                    "Partial text.\n"
+                    "<reasoning>\nSecond thought.\n</reasoning>\n"
+                    "Final answer."
+                ),
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "First thought" not in output
+        assert "Second thought" not in output
+        assert "Partial text" in output
+        assert "Final answer" in output
+
+    def test_orphan_closing_think_tag_stripped(self):
+        """A stray </think> with no matching open should not render to user."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Broken output"},
+            {
+                "role": "assistant",
+                "content": "some leftover reasoning</think>Visible answer.",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "</think>" not in output
+        assert "Visible answer" in output
+
     def test_assistant_with_text_and_tool_calls(self):
         """When an assistant message has both text content AND tool_calls."""
         cli = _make_cli()
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index 2717584e464..c083a4a80e2 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -1024,7 +1024,7 @@ class TestRunJobSkillBacked:
             "id": "multi-skill-job",
             "name": "multi skill test",
             "prompt": "Combine the results.",
-            "skills": ["blogwatcher", "find-nearby"],
+            "skills": ["blogwatcher", "maps"],
         }
 
         fake_db = MagicMock()
@@ -1057,12 +1057,12 @@ class TestRunJobSkillBacked:
         assert error is None
         assert final_response == "ok"
         assert skill_view_mock.call_count == 2
-        assert [call.args[0] for call in skill_view_mock.call_args_list] == ["blogwatcher", "find-nearby"]
+        assert [call.args[0] for call in skill_view_mock.call_args_list] == ["blogwatcher", "maps"]
 
         prompt_arg = mock_agent.run_conversation.call_args.args[0]
-        assert prompt_arg.index("blogwatcher") < prompt_arg.index("find-nearby")
+        assert prompt_arg.index("blogwatcher") < prompt_arg.index("maps")
         assert "Instructions for blogwatcher." in prompt_arg
-        assert "Instructions for find-nearby." in prompt_arg
+        assert "Instructions for maps." in prompt_arg
         assert "Combine the results." in prompt_arg
 
 
@@ -1175,6 +1175,180 @@ class TestBuildJobPromptSilentHint:
         assert system_pos < prompt_pos
 
 
+class TestParseWakeGate:
+    """Unit tests for _parse_wake_gate — pure function, no side effects."""
+
+    def test_empty_output_wakes(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate("") is True
+        assert _parse_wake_gate(None) is True
+
+    def test_whitespace_only_wakes(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate("   \n\n  \t\n") is True
+
+    def test_non_json_last_line_wakes(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate("hello world") is True
+        assert _parse_wake_gate("line 1\nline 2\nplain text") is True
+
+    def test_json_non_dict_wakes(self):
+        """Bare arrays, numbers, strings must not be interpreted as a gate."""
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate("[1, 2, 3]") is True
+        assert _parse_wake_gate("42") is True
+        assert _parse_wake_gate('"wakeAgent"') is True
+
+    def test_wake_gate_false_skips(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate('{"wakeAgent": false}') is False
+
+    def test_wake_gate_true_wakes(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate('{"wakeAgent": true}') is True
+
+    def test_wake_gate_missing_wakes(self):
+        """A JSON dict without a wakeAgent key defaults to waking."""
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate('{"data": {"foo": "bar"}}') is True
+
+    def test_non_boolean_false_still_wakes(self):
+        """Only strict ``False`` skips — truthy/falsy shortcuts are too risky."""
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate('{"wakeAgent": 0}') is True
+        assert _parse_wake_gate('{"wakeAgent": null}') is True
+        assert _parse_wake_gate('{"wakeAgent": ""}') is True
+
+    def test_only_last_non_empty_line_parsed(self):
+        from cron.scheduler import _parse_wake_gate
+        multi = 'some log output\nmore output\n{"wakeAgent": false}'
+        assert _parse_wake_gate(multi) is False
+
+    def test_trailing_blank_lines_ignored(self):
+        from cron.scheduler import _parse_wake_gate
+        multi = '{"wakeAgent": false}\n\n\n'
+        assert _parse_wake_gate(multi) is False
+
+    def test_non_last_json_line_does_not_gate(self):
+        """A JSON gate on an earlier line with plain text after it does NOT trigger."""
+        from cron.scheduler import _parse_wake_gate
+        multi = '{"wakeAgent": false}\nactually this is the real output'
+        assert _parse_wake_gate(multi) is True
+
+
+class TestRunJobWakeGate:
+    """Integration tests for run_job wake-gate short-circuit."""
+
+    def _make_job(self, name="wake-gate-test", script="check.py"):
+        """Minimal valid cron job dict for run_job."""
+        return {
+            "id": f"job_{name}",
+            "name": name,
+            "prompt": "Do a thing",
+            "schedule": "*/5 * * * *",
+            "script": script,
+        }
+
+    def test_wake_false_skips_agent_and_returns_silent(self, caplog):
+        """When _run_job_script output ends with {wakeAgent: false}, the agent
+        is not invoked and run_job returns the SILENT marker so delivery is
+        suppressed."""
+        from cron.scheduler import SILENT_MARKER
+        import cron.scheduler as scheduler
+
+        with patch.object(scheduler, "_run_job_script",
+                          return_value=(True, '{"wakeAgent": false}')), \
+             patch("run_agent.AIAgent") as agent_cls:
+            success, doc, final, err = scheduler.run_job(self._make_job())
+
+        assert success is True
+        assert err is None
+        assert final == SILENT_MARKER
+        assert "Script gate returned `wakeAgent=false`" in doc
+        agent_cls.assert_not_called()
+
+    def test_wake_true_runs_agent_with_injected_output(self):
+        """When the script returns {wakeAgent: true, data: ...}, the agent is
+        invoked and the data line still shows up in the prompt."""
+        import cron.scheduler as scheduler
+
+        script_output = '{"wakeAgent": true, "data": {"new": 3}}'
+        agent = MagicMock()
+        agent.run_conversation = MagicMock(return_value={
+            "final_response": "ok", "messages": []
+        })
+        with patch.object(scheduler, "_run_job_script",
+                          return_value=(True, script_output)), \
+             patch("run_agent.AIAgent", return_value=agent) as agent_cls:
+            success, doc, final, err = scheduler.run_job(self._make_job())
+
+        agent_cls.assert_called_once()
+        # The script output should be visible in the prompt passed to
+        # run_conversation.
+        call_kwargs = agent.run_conversation.call_args
+        prompt_arg = call_kwargs.args[0] if call_kwargs.args else call_kwargs.kwargs.get("user_message", "")
+        assert script_output in prompt_arg
+        assert success is True
+        assert err is None
+
+    def test_script_runs_only_once_on_wake(self):
+        """Wake-true path must not re-run the script inside _build_job_prompt
+        (script would execute twice otherwise, wasting work and risking
+        double-side-effects)."""
+        import cron.scheduler as scheduler
+
+        call_count = 0
+        def _script_stub(path):
+            nonlocal call_count
+            call_count += 1
+            return (True, "regular output")
+
+        agent = MagicMock()
+        agent.run_conversation = MagicMock(return_value={
+            "final_response": "ok", "messages": []
+        })
+        with patch.object(scheduler, "_run_job_script", side_effect=_script_stub), \
+             patch("run_agent.AIAgent", return_value=agent):
+            scheduler.run_job(self._make_job())
+
+        assert call_count == 1, f"script ran {call_count}x, expected exactly 1"
+
+    def test_script_failure_does_not_trigger_gate(self):
+        """If _run_job_script returns success=False, the gate is NOT evaluated
+        and the agent still runs (the failure is reported as context)."""
+        import cron.scheduler as scheduler
+
+        # Malicious or broken script whose stderr happens to contain the
+        # gate JSON — we must NOT honor it because ran_ok is False.
+        agent = MagicMock()
+        agent.run_conversation = MagicMock(return_value={
+            "final_response": "ok", "messages": []
+        })
+        with patch.object(scheduler, "_run_job_script",
+                          return_value=(False, '{"wakeAgent": false}')), \
+             patch("run_agent.AIAgent", return_value=agent) as agent_cls:
+            success, doc, final, err = scheduler.run_job(self._make_job())
+
+        agent_cls.assert_called_once()  # Agent DID wake despite the gate-like text
+
+    def test_no_script_path_runs_agent_normally(self):
+        """Regression: jobs without a script still work."""
+        import cron.scheduler as scheduler
+
+        agent = MagicMock()
+        agent.run_conversation = MagicMock(return_value={
+            "final_response": "ok", "messages": []
+        })
+        job = self._make_job(script=None)
+        job.pop("script", None)
+        with patch.object(scheduler, "_run_job_script") as script_fn, \
+             patch("run_agent.AIAgent", return_value=agent) as agent_cls:
+            scheduler.run_job(job)
+
+        script_fn.assert_not_called()
+        agent_cls.assert_called_once()
+
+
 class TestBuildJobPromptMissingSkill:
     """Verify that a missing skill logs a warning and does not crash the job."""
 
diff --git a/tests/gateway/test_cancel_background_drain.py b/tests/gateway/test_cancel_background_drain.py
new file mode 100644
index 00000000000..c95fdc062eb
--- /dev/null
+++ b/tests/gateway/test_cancel_background_drain.py
@@ -0,0 +1,148 @@
+"""Regression test: cancel_background_tasks must drain late-arrival tasks.
+
+During gateway shutdown, a message arriving while
+cancel_background_tasks is mid-await can spawn a fresh
+_process_message_background task via handle_message, which is added
+to self._background_tasks.  Without the re-drain loop, the subsequent
+_background_tasks.clear() drops the reference; the task runs
+untracked against a disconnecting adapter.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType
+from gateway.session import SessionSource, build_session_key
+
+
+class _StubAdapter(BasePlatformAdapter):
+    async def connect(self):
+        pass
+
+    async def disconnect(self):
+        pass
+
+    async def send(self, chat_id, text, **kwargs):
+        return None
+
+    async def get_chat_info(self, chat_id):
+        return {}
+
+
+def _make_adapter():
+    adapter = _StubAdapter(PlatformConfig(enabled=True, token="t"), Platform.TELEGRAM)
+    adapter._send_with_retry = AsyncMock(return_value=None)
+    return adapter
+
+
+def _event(text, cid="42"):
+    return MessageEvent(
+        text=text,
+        message_type=MessageType.TEXT,
+        source=SessionSource(platform=Platform.TELEGRAM, chat_id=cid, chat_type="dm"),
+    )
+
+
+@pytest.mark.asyncio
+async def test_cancel_background_tasks_drains_late_arrivals():
+    """A message that arrives during the gather window must be picked
+    up by the re-drain loop, not leaked as an untracked task."""
+    adapter = _make_adapter()
+    sk = build_session_key(
+        SessionSource(platform=Platform.TELEGRAM, chat_id="42", chat_type="dm")
+    )
+
+    m1_started = asyncio.Event()
+    m1_cleanup_running = asyncio.Event()
+    m2_started = asyncio.Event()
+    m2_cancelled = asyncio.Event()
+
+    async def handler(event):
+        if event.text == "M1":
+            m1_started.set()
+            try:
+                await asyncio.sleep(10)
+            except asyncio.CancelledError:
+                m1_cleanup_running.set()
+                # Widen the gather window with a shielded cleanup
+                # delay so M2 can get injected during it.
+                await asyncio.shield(asyncio.sleep(0.2))
+                raise
+        else:  # M2 — the late arrival
+            m2_started.set()
+            try:
+                await asyncio.sleep(10)
+            except asyncio.CancelledError:
+                m2_cancelled.set()
+                raise
+
+    adapter._message_handler = handler
+
+    # Spawn M1.
+    await adapter.handle_message(_event("M1"))
+    await asyncio.wait_for(m1_started.wait(), timeout=1.0)
+
+    # Kick off shutdown.  This will cancel M1 and await its cleanup.
+    cancel_task = asyncio.create_task(adapter.cancel_background_tasks())
+
+    # Wait until M1's cleanup is running (inside the shielded sleep).
+    # This is the race window: cancel_task is awaiting gather, M1 is
+    # shielded in cleanup, the _active_sessions entry has been cleared
+    # by M1's own finally.
+    await asyncio.wait_for(m1_cleanup_running.wait(), timeout=1.0)
+
+    # Clear the active-session entry (M1's finally hasn't fully run yet,
+    # but in production the platform dispatcher would deliver a new
+    # message that takes the no-active-session spawn path).  For this
+    # repro, make it deterministic.
+    adapter._active_sessions.pop(sk, None)
+
+    # Inject late arrival — spawns a fresh _process_message_background
+    # task and adds it to _background_tasks while cancel_task is still
+    # in gather.
+    await adapter.handle_message(_event("M2"))
+    await asyncio.wait_for(m2_started.wait(), timeout=1.0)
+
+    # Let cancel_task finish.  Round 1's gather completes when M1's
+    # shielded cleanup finishes.  Round 2 should pick up M2.
+    await asyncio.wait_for(cancel_task, timeout=5.0)
+
+    # Assert M2 was drained, not leaked.
+    assert m2_cancelled.is_set(), (
+        "Late-arrival M2 was NOT cancelled by cancel_background_tasks — "
+        "the re-drain loop is missing and the task leaked"
+    )
+    assert adapter._background_tasks == set()
+
+
+@pytest.mark.asyncio
+async def test_cancel_background_tasks_handles_no_tasks():
+    """Regression guard: no tasks, no hang, no error."""
+    adapter = _make_adapter()
+    await adapter.cancel_background_tasks()
+    assert adapter._background_tasks == set()
+
+
+@pytest.mark.asyncio
+async def test_cancel_background_tasks_bounded_rounds():
+    """Regression guard: the drain loop is bounded — it does not spin
+    forever even if late-arrival tasks keep getting spawned."""
+    adapter = _make_adapter()
+
+    # Single well-behaved task that cancels cleanly — baseline check
+    # that the loop terminates in one round.
+    async def quick():
+        try:
+            await asyncio.sleep(10)
+        except asyncio.CancelledError:
+            raise
+
+    task = asyncio.create_task(quick())
+    adapter._background_tasks.add(task)
+
+    await adapter.cancel_background_tasks()
+    assert task.done()
+    assert adapter._background_tasks == set()
diff --git a/tests/gateway/test_command_bypass_active_session.py b/tests/gateway/test_command_bypass_active_session.py
index 10ff062126a..ea910d30ba8 100644
--- a/tests/gateway/test_command_bypass_active_session.py
+++ b/tests/gateway/test_command_bypass_active_session.py
@@ -200,6 +200,25 @@ class TestCommandBypassActiveSession:
             "/background response was not sent back to the user"
         )
 
+    @pytest.mark.asyncio
+    async def test_steer_bypasses_guard(self):
+        """/steer must bypass the Level-1 active-session guard so it reaches
+        the gateway runner's /steer handler and injects into the running
+        agent instead of being queued as user text for the next turn.
+        """
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/steer also check auth.log"))
+
+        assert sk not in adapter._pending_messages, (
+            "/steer was queued as a pending message instead of being dispatched"
+        )
+        assert any("handled:steer" in r for r in adapter.sent_responses), (
+            "/steer response was not sent back to the user"
+        )
+
     @pytest.mark.asyncio
     async def test_help_bypasses_guard(self):
         """/help must bypass so it is not silently dropped as pending slash text."""
@@ -249,6 +268,82 @@ class TestCommandBypassActiveSession:
         )
 
 
+# ---------------------------------------------------------------------------
+# Tests: non-bypass-set commands (no dedicated Level-2 handler) also bypass
+# instead of interrupting + being discarded.  Regression for the Discord
+# ghost-slash-command bug where /model, /reasoning, /voice, /insights, /title,
+# /resume, /retry, /undo, /compress, /usage, /provider, /reload-mcp,
+# /sethome, /reset silently interrupted the running agent.
+# ---------------------------------------------------------------------------
+
+
+class TestAllResolvableCommandsBypassGuard:
+    """Every recognized slash command must bypass the Level-1 active-session
+    guard. Without this, commands the user fires mid-run interrupt the agent
+    AND get silently discarded by the slash-command safety net (zero-char
+    response)."""
+
+    @pytest.mark.parametrize(
+        "command_text,canonical",
+        [
+            ("/model claude-sonnet-4", "model"),
+            ("/model", "model"),
+            ("/reasoning high", "reasoning"),
+            ("/personality default", "personality"),
+            ("/voice on", "voice"),
+            ("/insights 7", "insights"),
+            ("/title my session", "title"),
+            ("/resume yesterday", "resume"),
+            ("/retry", "retry"),
+            ("/undo", "undo"),
+            ("/compress", "compress"),
+            ("/usage", "usage"),
+            ("/provider", "provider"),
+            ("/reload-mcp", "reload-mcp"),
+            ("/sethome", "sethome"),
+        ],
+    )
+    @pytest.mark.asyncio
+    async def test_command_bypasses_guard(self, command_text, canonical):
+        """Any resolvable slash command bypasses instead of being queued."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event(command_text))
+
+        assert sk not in adapter._pending_messages, (
+            f"{command_text} was queued as pending — it should bypass the guard"
+        )
+        assert len(adapter.sent_responses) > 0, (
+            f"{command_text} produced no response — it should be dispatched, "
+            "not silently discarded"
+        )
+
+    def test_should_bypass_returns_true_for_every_registered_command(self):
+        """Spot-check: the commands previously-broken on Discord all bypass."""
+        from hermes_cli.commands import should_bypass_active_session
+
+        for cmd in (
+            "model", "reasoning", "personality", "voice", "insights", "title",
+            "resume", "retry", "undo", "compress", "usage", "provider",
+            "reload-mcp", "sethome", "reset",
+        ):
+            assert should_bypass_active_session(cmd) is True, (
+                f"/{cmd} must bypass the active-session guard"
+            )
+
+    def test_should_bypass_returns_false_for_unknown(self):
+        """Unknown words don't bypass — they get queued as user text."""
+        from hermes_cli.commands import should_bypass_active_session
+
+        assert should_bypass_active_session("foobar") is False
+        assert should_bypass_active_session(None) is False
+        assert should_bypass_active_session("") is False
+        # A file path split on whitespace: '/path/to/file.py' -> 'path/to/file.py'
+        assert should_bypass_active_session("path/to/file.py") is False
+
+
 # ---------------------------------------------------------------------------
 # Tests: non-bypass messages still get queued
 # ---------------------------------------------------------------------------
diff --git a/tests/gateway/test_discord_race_polish.py b/tests/gateway/test_discord_race_polish.py
new file mode 100644
index 00000000000..a0f900aea60
--- /dev/null
+++ b/tests/gateway/test_discord_race_polish.py
@@ -0,0 +1,122 @@
+"""Regression tests for the Discord adapter race-polish fix.
+
+Two races are addressed:
+1. on_message allowlist check racing on_ready's _resolve_allowed_usernames
+   resolution window.  Username-based entries in DISCORD_ALLOWED_USERS
+   appear in the set as raw strings for several seconds after
+   connect/reconnect; author.id is always numeric, so legitimate users
+   are silently rejected until resolution finishes.
+2. join_voice_channel check-and-connect: concurrent /voice channel
+   invocations both see _voice_clients.get(guild_id) is None, both call
+   channel.connect(), second raises ClientException ('Already connected').
+"""
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+
+
+def _make_adapter():
+    """Bare DiscordAdapter for testing — object.__new__ pattern per AGENTS.md."""
+    from gateway.platforms.discord import DiscordAdapter
+
+    adapter = object.__new__(DiscordAdapter)
+    adapter._platform = Platform.DISCORD
+    adapter.config = PlatformConfig(enabled=True, token="t")
+    adapter._ready_event = asyncio.Event()
+    adapter._allowed_user_ids = set()
+    adapter._allowed_role_ids = set()
+    adapter._voice_clients = {}
+    adapter._voice_locks = {}
+    adapter._voice_receivers = {}
+    adapter._voice_listen_tasks = {}
+    adapter._voice_timeout_tasks = {}
+    adapter._voice_text_channels = {}
+    adapter._voice_sources = {}
+    adapter._client = MagicMock()
+    return adapter
+
+
+class TestJoinVoiceSerialization:
+    @pytest.mark.asyncio
+    async def test_concurrent_joins_do_not_double_connect(self):
+        """Two concurrent join_voice_channel calls on the same guild
+        must serialize through the per-guild lock — only ONE
+        channel.connect() actually fires; the second sees the
+        _voice_clients entry the first just installed."""
+        adapter = _make_adapter()
+
+        connect_count = [0]
+        connect_event = asyncio.Event()
+
+        class FakeVC:
+            def __init__(self, channel):
+                self.channel = channel
+
+            def is_connected(self):
+                return True
+
+            async def move_to(self, _channel):
+                return None
+
+            async def disconnect(self):
+                return None
+
+        async def slow_connect(self):
+            connect_count[0] += 1
+            # Widen the race window
+            await connect_event.wait()
+            return FakeVC(self)
+
+        channel = MagicMock()
+        channel.id = 111
+        channel.guild.id = 42
+        channel.connect = lambda: slow_connect(channel)
+
+        # Swap out VoiceReceiver so it doesn't try to set up real audio
+        from gateway.platforms import discord as discord_mod
+        with patch.object(discord_mod, "VoiceReceiver", MagicMock(return_value=MagicMock(start=lambda: None))):
+            with patch.object(discord_mod.asyncio, "ensure_future", lambda _c: asyncio.create_task(asyncio.sleep(0))):
+                # Fire two joins concurrently
+                t1 = asyncio.create_task(adapter.join_voice_channel(channel))
+                t2 = asyncio.create_task(adapter.join_voice_channel(channel))
+                # Let them run until they're blocked on our event
+                await asyncio.sleep(0.05)
+                # Release connect so both can finish
+                connect_event.set()
+                r1, r2 = await asyncio.gather(t1, t2)
+
+        assert connect_count[0] == 1, (
+            f"Expected exactly 1 channel.connect() call, got {connect_count[0]} — "
+            "per-guild voice lock is not serializing join_voice_channel"
+        )
+        assert r1 is True and r2 is True
+        assert 42 in adapter._voice_clients
+
+
+class TestOnMessageWaitsForReadyEvent:
+    @pytest.mark.asyncio
+    async def test_on_message_blocks_until_ready_event_set(self):
+        """A message arriving before on_ready finishes
+        _resolve_allowed_usernames must wait, not proceed with a
+        half-resolved allowlist."""
+        # This is an integration-style check — we pull out the
+        # on_message handler by asserting the source contains the
+        # expected wait pattern.  A full end-to-end test would require
+        # setting up the discord.py client machinery, which is not
+        # practical here.
+        import inspect
+        from gateway.platforms import discord as discord_mod
+
+        src = inspect.getsource(discord_mod.DiscordAdapter.connect)
+        assert "_ready_event.is_set()" in src, (
+            "on_message must gate on _ready_event so username-based "
+            "allowlist entries are resolved before the allowlist check"
+        )
+        assert "await asyncio.wait_for(" in src and "_ready_event.wait()" in src, (
+            "Expected asyncio.wait_for(_ready_event.wait(), timeout=...) "
+            "pattern in on_message"
+        )
diff --git a/tests/gateway/test_dm_topics.py b/tests/gateway/test_dm_topics.py
index b9a94c3438b..69e9629b23d 100644
--- a/tests/gateway/test_dm_topics.py
+++ b/tests/gateway/test_dm_topics.py
@@ -645,3 +645,54 @@ def test_group_topic_chat_id_int_string_coercion():
 
     assert event.auto_skill == "hermes-agent-dev"
     assert event.source.chat_topic == "Dev"
+
+
+# ── _build_message_event: from_user=None fallback in DMs ──
+
+
+def test_build_message_event_dm_from_user_none_falls_back_to_chat_id():
+    """When from_user is None in a DM, user_id should fall back to chat.id."""
+    from gateway.platforms.base import MessageType
+
+    adapter = _make_adapter()
+    msg = _make_mock_message(chat_id=12345, user_id=42, user_name="Alice")
+    # Simulate from_user being None (edge case on fresh restart / forwarded msg)
+    msg.from_user = None
+
+    event = adapter._build_message_event(msg, MessageType.TEXT)
+
+    # Should fall back to chat.id since chat_type is "dm"
+    assert event.source.user_id == "12345"
+    assert event.source.user_name == "Alice"  # falls back to chat.full_name
+
+
+def test_build_message_event_group_from_user_none_stays_none():
+    """When from_user is None in a group, user_id should remain None."""
+    from gateway.platforms.base import MessageType
+
+    adapter = _make_adapter()
+    msg = _make_mock_message(
+        chat_id=-1001234567890, chat_type=_ChatType.SUPERGROUP,
+        user_id=42, user_name="Alice"
+    )
+    msg.from_user = None
+
+    event = adapter._build_message_event(msg, MessageType.TEXT)
+
+    # Groups should NOT fall back — anonymous senders stay None
+    assert event.source.user_id is None
+    assert event.source.user_name is None
+
+
+def test_build_message_event_dm_from_user_present_uses_user():
+    """When from_user is present in a DM, it should be used (no fallback)."""
+    from gateway.platforms.base import MessageType
+
+    adapter = _make_adapter()
+    msg = _make_mock_message(chat_id=12345, user_id=99999, user_name="Bob")
+
+    event = adapter._build_message_event(msg, MessageType.TEXT)
+
+    # Normal case — from_user is used directly
+    assert event.source.user_id == "99999"
+    assert event.source.user_name == "Bob"
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index 661e37ec1a2..14ed9e1715d 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -2370,6 +2370,134 @@ class TestAdapterBehavior(unittest.TestCase):
         elements = payload["zh_cn"]["content"][0]
         self.assertEqual(elements, [{"tag": "md", "text": "可以用 **粗体** 和 *斜体*。"}])
 
+    @patch.dict(os.environ, {}, clear=True)
+    def test_send_splits_fenced_code_blocks_into_separate_post_rows(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        captured = {}
+
+        class _MessageAPI:
+            def create(self, request):
+                captured["request"] = request
+                return SimpleNamespace(
+                    success=lambda: True,
+                    data=SimpleNamespace(message_id="om_codeblock"),
+                )
+
+        adapter._client = SimpleNamespace(
+            im=SimpleNamespace(
+                v1=SimpleNamespace(
+                    message=_MessageAPI(),
+                )
+            )
+        )
+
+        async def _direct(func, *args, **kwargs):
+            return func(*args, **kwargs)
+
+        content = (
+            "确认已入库 ✓\n"
+            "文件路径：`/root/.hermes/profiles/agent_cto/cron/jobs.json`\n"
+            "**解码后的内容：**\n"
+            "```json\n"
+            '{"cron": "list"}\n'
+            "```\n"
+            "后续说明仍应保留。"
+        )
+
+        with patch("gateway.platforms.feishu.asyncio.to_thread", side_effect=_direct):
+            result = asyncio.run(
+                adapter.send(
+                    chat_id="oc_chat",
+                    content=content,
+                )
+            )
+
+        self.assertTrue(result.success)
+        self.assertEqual(captured["request"].request_body.msg_type, "post")
+        payload = json.loads(captured["request"].request_body.content)
+        rows = payload["zh_cn"]["content"]
+        self.assertEqual(
+            rows,
+            [
+                [
+                    {
+                        "tag": "md",
+                        "text": "确认已入库 ✓\n文件路径：`/root/.hermes/profiles/agent_cto/cron/jobs.json`\n**解码后的内容：**",
+                    }
+                ],
+                [{"tag": "md", "text": "```json\n{\"cron\": \"list\"}\n```"}],
+                [{"tag": "md", "text": "后续说明仍应保留。"}],
+            ],
+        )
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_build_post_payload_keeps_fence_like_code_lines_inside_code_block(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        payload = json.loads(
+            adapter._build_post_payload(
+                "before\n```python\n```oops\n```\nafter"
+            )
+        )
+
+        self.assertEqual(
+            payload["zh_cn"]["content"],
+            [
+                [{"tag": "md", "text": "before"}],
+                [{"tag": "md", "text": "```python\n```oops\n```"}],
+                [{"tag": "md", "text": "after"}],
+            ],
+        )
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_build_post_payload_preserves_trailing_spaces_in_code_block(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        payload = json.loads(
+            adapter._build_post_payload(
+                "before\n```python\nline with two spaces  \n```\nafter"
+            )
+        )
+
+        self.assertEqual(
+            payload["zh_cn"]["content"],
+            [
+                [{"tag": "md", "text": "before"}],
+                [{"tag": "md", "text": "```python\nline with two spaces  \n```"}],
+                [{"tag": "md", "text": "after"}],
+            ],
+        )
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_build_post_payload_splits_multiple_fenced_code_blocks(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        payload = json.loads(
+            adapter._build_post_payload(
+                "before\n```python\nprint(1)\n```\nmiddle\n```json\n{}\n```\nafter"
+            )
+        )
+
+        self.assertEqual(
+            payload["zh_cn"]["content"],
+            [
+                [{"tag": "md", "text": "before"}],
+                [{"tag": "md", "text": "```python\nprint(1)\n```"}],
+                [{"tag": "md", "text": "middle"}],
+                [{"tag": "md", "text": "```json\n{}\n```"}],
+                [{"tag": "md", "text": "after"}],
+            ],
+        )
+
     @patch.dict(os.environ, {}, clear=True)
     def test_send_falls_back_to_text_when_post_payload_is_rejected(self):
         from gateway.config import PlatformConfig
diff --git a/tests/gateway/test_pending_drain_race.py b/tests/gateway/test_pending_drain_race.py
new file mode 100644
index 00000000000..810d52e9e2a
--- /dev/null
+++ b/tests/gateway/test_pending_drain_race.py
@@ -0,0 +1,212 @@
+"""Regression tests: pending-drain + finally-cleanup races must not spawn
+duplicate agents OR silently drop messages that arrived during cleanup.
+
+Two related races in gateway/platforms/base.py:_process_message_background:
+
+1. Pending-drain path (previous line 1931):
+   ``del self._active_sessions[session_key]`` opened a window where a
+   concurrent inbound message could pass the Level-1 guard, spawn its
+   own _process_message_background, and run simultaneously with the
+   recursive drain.  Two agents on one session_key = duplicate responses.
+
+2. Finally-cleanup path (previous line 1990-1991):
+   Between the awaits in finally (typing_task, stop_typing) and the
+   ``del self._active_sessions[session_key]``, a new message could
+   land in _pending_messages.  The del ran anyway, and the message was
+   silently dropped — user never got a reply.
+
+Fix: keep the _active_sessions entry live across the turn chain and
+clear the Event instead of deleting; in finally, drain any
+late-arrival pending message by spawning a task instead of
+dropping it.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+)
+from gateway.session import SessionSource, build_session_key
+
+
+class _StubAdapter(BasePlatformAdapter):
+    async def connect(self):
+        pass
+
+    async def disconnect(self):
+        pass
+
+    async def send(self, chat_id, text, **kwargs):
+        return None
+
+    async def get_chat_info(self, chat_id):
+        return {}
+
+
+def _make_adapter():
+    adapter = _StubAdapter(PlatformConfig(enabled=True, token="t"), Platform.TELEGRAM)
+    adapter._send_with_retry = AsyncMock(return_value=None)
+    return adapter
+
+
+def _make_event(text="hi", chat_id="42"):
+    return MessageEvent(
+        text=text,
+        message_type=MessageType.TEXT,
+        source=SessionSource(platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"),
+    )
+
+
+def _sk(chat_id="42"):
+    return build_session_key(
+        SessionSource(platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm")
+    )
+
+
+@pytest.mark.asyncio
+async def test_pending_drain_keeps_active_session_guard_live():
+    """Fix for R5: during pending-drain cleanup, _active_sessions must stay
+    populated so concurrent inbound messages can't spawn a duplicate
+    _process_message_background.  We only CLEAR the Event, never delete."""
+    adapter = _make_adapter()
+    sk = _sk()
+
+    # Register a slow handler so the agent is "mid-processing" when the
+    # pending message arrives.
+    first_started = asyncio.Event()
+    release_first = asyncio.Event()
+
+    async def handler(event):
+        first_started.set()
+        await release_first.wait()
+        return "done"
+
+    adapter._message_handler = handler
+
+    # Spawn M1 through handle_message.
+    await adapter.handle_message(_make_event(text="M1"))
+
+    # Wait until M1 is actively running inside the handler.
+    await asyncio.wait_for(first_started.wait(), timeout=1.0)
+
+    # Assert: session is active.
+    assert sk in adapter._active_sessions
+    active_event = adapter._active_sessions[sk]
+
+    # Simulate pending message (M2) queued while M1 runs.
+    adapter._pending_messages[sk] = _make_event(text="M2")
+
+    # Release M1 — pending-drain block now runs.  During its cleanup
+    # awaits, _active_sessions[sk] must remain populated (same object
+    # reference) so any M3 arriving in that window hits the busy-handler.
+    release_first.set()
+
+    # Give the drain a moment to execute its .clear() + await typing_task
+    # without letting it fully finish the recursive call.
+    await asyncio.sleep(0)
+    await asyncio.sleep(0)
+
+    # Across the drain transition, the Event object must be the SAME
+    # reference (not replaced, not deleted).  If del happened, the key
+    # would be missing briefly; if a new Event was installed, the
+    # identity would differ.
+    assert sk in adapter._active_sessions, (
+        "_active_sessions[session_key] was deleted during pending-drain — "
+        "opens a window for duplicate-agent spawn"
+    )
+    assert adapter._active_sessions[sk] is active_event, (
+        "_active_sessions[session_key] was replaced during pending-drain — "
+        "the old Event may have waiters that now won't be signaled"
+    )
+
+    # Finish drain.
+    await asyncio.sleep(0.1)
+    await adapter.cancel_background_tasks()
+
+
+@pytest.mark.asyncio
+async def test_finally_cleanup_drains_late_arrival_pending():
+    """Fix for R6: if a message lands in _pending_messages during the
+    finally-block cleanup awaits, the finally must spawn a drain task
+    instead of deleting _active_sessions and dropping the message."""
+    adapter = _make_adapter()
+    sk = _sk()
+
+    processed = []
+
+    async def handler(event):
+        processed.append(event.text)
+        return "ok"
+
+    adapter._message_handler = handler
+
+    # Instrument stop_typing to inject a late-arrival pending message
+    # during the finally-block await window.  This exactly simulates the
+    # R6 race: the message arrives after the response has been sent but
+    # before _active_sessions is deleted.
+    original_stop = adapter.stop_typing if hasattr(adapter, "stop_typing") else None
+
+    injected = {"done": False}
+
+    async def stop_typing_injects_pending(*args, **kwargs):
+        # Yield so the injection happens mid-await.
+        await asyncio.sleep(0)
+        if not injected["done"]:
+            adapter._pending_messages[sk] = _make_event(text="LATE")
+            injected["done"] = True
+        if original_stop:
+            return await original_stop(*args, **kwargs)
+        return None
+
+    adapter.stop_typing = stop_typing_injects_pending
+
+    # Send M1.
+    await adapter.handle_message(_make_event(text="M1"))
+
+    # Drain: wait for M1 to finish and the late-drain task to process LATE.
+    for _ in range(50):  # up to ~0.5s
+        if "LATE" in processed:
+            break
+        await asyncio.sleep(0.01)
+
+    await adapter.cancel_background_tasks()
+
+    assert "M1" in processed, "M1 was not processed"
+    assert "LATE" in processed, (
+        "Late-arrival pending message was silently dropped — finally "
+        "cleanup should have spawned a drain task"
+    )
+
+
+@pytest.mark.asyncio
+async def test_no_pending_cleans_up_normally():
+    """Regression guard: when no pending message exists, the finally
+    block must still delete _active_sessions as before (no leak)."""
+    adapter = _make_adapter()
+    sk = _sk()
+
+    async def handler(event):
+        return "ok"
+
+    adapter._message_handler = handler
+
+    await adapter.handle_message(_make_event(text="solo"))
+
+    # Wait for background task to finish.
+    for _ in range(50):
+        if sk not in adapter._active_sessions:
+            break
+        await asyncio.sleep(0.01)
+
+    assert sk not in adapter._active_sessions, (
+        "_active_sessions was not cleaned up after a normal turn with no pending"
+    )
+    assert sk not in adapter._pending_messages
+
+    await adapter.cancel_background_tasks()
diff --git a/tests/gateway/test_pending_event_none.py b/tests/gateway/test_pending_event_none.py
index b2e1356fa14..e717c88296e 100644
--- a/tests/gateway/test_pending_event_none.py
+++ b/tests/gateway/test_pending_event_none.py
@@ -1,13 +1,18 @@
-"""Tests for the pending_event None guard in recursive _run_agent calls.
+"""Tests for pending follow-up extraction in recursive _run_agent calls.
 
 When pending_event is None (Path B: pending comes from interrupt_message),
 accessing pending_event.channel_prompt previously raised AttributeError.
 This verifies the fix: channel_prompt is captured inside the
 `if pending_event is not None:` block and falls back to None otherwise.
+
+Also verifies that internal control interrupt reasons like "Stop requested"
+do not get recycled into the pending-user-message follow-up path.
 """
 
 from types import SimpleNamespace
 
+from gateway.run import _is_control_interrupt_message
+
 
 def _extract_channel_prompt(pending_event):
     """Reproduce the fixed logic from gateway/run.py.
@@ -21,6 +26,15 @@ def _extract_channel_prompt(pending_event):
     return next_channel_prompt
 
 
+def _extract_pending_text(interrupted, pending_event, interrupt_message):
+    """Reproduce the fixed pending-text selection from gateway/run.py."""
+    if interrupted and pending_event is None and interrupt_message:
+        if _is_control_interrupt_message(interrupt_message):
+            return None
+        return interrupt_message
+    return None
+
+
 class TestPendingEventNoneChannelPrompt:
     """Guard against AttributeError when pending_event is None."""
 
@@ -40,3 +54,19 @@ class TestPendingEventNoneChannelPrompt:
         event = SimpleNamespace()
         result = _extract_channel_prompt(event)
         assert result is None
+
+
+class TestControlInterruptMessages:
+    """Control interrupt reasons must not become follow-up user input."""
+
+    def test_stop_requested_is_not_treated_as_pending_user_message(self):
+        result = _extract_pending_text(True, None, "Stop requested")
+        assert result is None
+
+    def test_session_reset_requested_is_not_treated_as_pending_user_message(self):
+        result = _extract_pending_text(True, None, "Session reset requested")
+        assert result is None
+
+    def test_real_user_interrupt_message_still_requeues(self):
+        result = _extract_pending_text(True, None, "actually use postgres instead")
+        assert result == "actually use postgres instead"
diff --git a/tests/gateway/test_proxy_mode.py b/tests/gateway/test_proxy_mode.py
index f3024cb09f1..11180639e8d 100644
--- a/tests/gateway/test_proxy_mode.py
+++ b/tests/gateway/test_proxy_mode.py
@@ -19,6 +19,7 @@ def _make_runner(proxy_url=None):
     runner.config = MagicMock()
     runner.config.streaming = StreamingConfig()
     runner._running_agents = {}
+    runner._session_run_generation = {}
     runner._session_model_overrides = {}
     runner._agent_cache = {}
     runner._agent_cache_lock = None
@@ -160,10 +161,12 @@ class TestRunAgentProxyDispatch:
             source=source,
             session_id="test-session-123",
             session_key="test-key",
+            run_generation=7,
         )
 
         assert result["final_response"] == "Hello from remote!"
         runner._run_agent_via_proxy.assert_called_once()
+        assert runner._run_agent_via_proxy.call_args.kwargs["run_generation"] == 7
 
     @pytest.mark.asyncio
     async def test_run_agent_skips_proxy_when_not_configured(self, monkeypatch):
@@ -370,6 +373,40 @@ class TestRunAgentViaProxy:
         assert "session_id" in result
         assert result["session_id"] == "sess-123"
 
+    @pytest.mark.asyncio
+    async def test_proxy_stale_generation_returns_empty_result(self, monkeypatch):
+        monkeypatch.setenv("GATEWAY_PROXY_URL", "http://host:8642")
+        monkeypatch.delenv("GATEWAY_PROXY_KEY", raising=False)
+        runner = _make_runner()
+        source = _make_source()
+        runner._session_run_generation["test-key"] = 2
+
+        resp = _FakeSSEResponse(
+            status=200,
+            sse_chunks=[
+                'data: {"choices":[{"delta":{"content":"stale"}}]}\n\n',
+                "data: [DONE]\n\n",
+            ],
+        )
+        session = _FakeSession(resp)
+
+        with patch("gateway.run._load_gateway_config", return_value={}):
+            with _patch_aiohttp(session):
+                with patch("aiohttp.ClientTimeout"):
+                    result = await runner._run_agent_via_proxy(
+                        message="hi",
+                        context_prompt="",
+                        history=[],
+                        source=source,
+                        session_id="sess-123",
+                        session_key="test-key",
+                        run_generation=1,
+                    )
+
+        assert result["final_response"] == ""
+        assert result["messages"] == []
+        assert result["api_calls"] == 0
+
     @pytest.mark.asyncio
     async def test_no_auth_header_without_key(self, monkeypatch):
         monkeypatch.setenv("GATEWAY_PROXY_URL", "http://host:8642")
diff --git a/tests/gateway/test_restart_redelivery_dedup.py b/tests/gateway/test_restart_redelivery_dedup.py
new file mode 100644
index 00000000000..aa4e4330caf
--- /dev/null
+++ b/tests/gateway/test_restart_redelivery_dedup.py
@@ -0,0 +1,247 @@
+"""Tests for /restart idempotency guard against Telegram update re-delivery.
+
+When PTB's graceful-shutdown ACK call (the final `get_updates` on exit) fails
+with a network error, Telegram re-delivers the `/restart` message to the new
+gateway process.  Without a dedup guard, the new gateway would process
+`/restart` again and immediately restart — a self-perpetuating loop.
+"""
+import asyncio
+import json
+import time
+from unittest.mock import MagicMock
+
+import pytest
+
+import gateway.run as gateway_run
+from gateway.platforms.base import MessageEvent, MessageType
+from tests.gateway.restart_test_helpers import make_restart_runner, make_restart_source
+
+
+def _make_restart_event(update_id: int | None = 100) -> MessageEvent:
+    return MessageEvent(
+        text="/restart",
+        message_type=MessageType.TEXT,
+        source=make_restart_source(),
+        message_id="m1",
+        platform_update_id=update_id,
+    )
+
+
+@pytest.mark.asyncio
+async def test_restart_handler_writes_dedup_marker_with_update_id(tmp_path, monkeypatch):
+    """First /restart writes .restart_last_processed.json with the triggering update_id."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    event = _make_restart_event(update_id=12345)
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    marker_path = tmp_path / ".restart_last_processed.json"
+    assert marker_path.exists()
+    data = json.loads(marker_path.read_text())
+    assert data["platform"] == "telegram"
+    assert data["update_id"] == 12345
+    assert isinstance(data["requested_at"], (int, float))
+
+
+@pytest.mark.asyncio
+async def test_redelivered_restart_with_same_update_id_is_ignored(tmp_path, monkeypatch):
+    """A /restart with update_id <= recorded marker is silently ignored as a redelivery."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    # Previous gateway recorded update_id=12345 a few seconds ago
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text(json.dumps({
+        "platform": "telegram",
+        "update_id": 12345,
+        "requested_at": time.time() - 5,
+    }))
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock()
+
+    event = _make_restart_event(update_id=12345)  # same update_id → redelivery
+    result = await runner._handle_restart_command(event)
+
+    assert result == ""  # silently ignored
+    runner.request_restart.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_redelivered_restart_with_older_update_id_is_ignored(tmp_path, monkeypatch):
+    """update_id strictly LESS than the recorded one is also a redelivery."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text(json.dumps({
+        "platform": "telegram",
+        "update_id": 12345,
+        "requested_at": time.time() - 5,
+    }))
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock()
+
+    event = _make_restart_event(update_id=12344)  # older update — shouldn't happen,
+                                                  # but if Telegram does re-deliver
+                                                  # something older, treat as stale
+    result = await runner._handle_restart_command(event)
+
+    assert result == ""
+    runner.request_restart.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_fresh_restart_with_higher_update_id_is_processed(tmp_path, monkeypatch):
+    """A NEW /restart from the user (higher update_id) bypasses the dedup guard."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    # Previous restart recorded update_id=12345
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text(json.dumps({
+        "platform": "telegram",
+        "update_id": 12345,
+        "requested_at": time.time() - 5,
+    }))
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    event = _make_restart_event(update_id=12346)  # strictly higher → fresh
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    runner.request_restart.assert_called_once()
+
+    # Marker is overwritten with the new update_id
+    data = json.loads(marker.read_text())
+    assert data["update_id"] == 12346
+
+
+@pytest.mark.asyncio
+async def test_stale_marker_older_than_5min_does_not_block(tmp_path, monkeypatch):
+    """A marker older than the 5-minute window is ignored — fresh /restart proceeds."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text(json.dumps({
+        "platform": "telegram",
+        "update_id": 12345,
+        "requested_at": time.time() - 600,  # 10 minutes ago
+    }))
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    # Same update_id as the stale marker, but the marker is too old to trust
+    event = _make_restart_event(update_id=12345)
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    runner.request_restart.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_no_marker_file_allows_restart(tmp_path, monkeypatch):
+    """Clean gateway start (no prior marker) processes /restart normally."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    event = _make_restart_event(update_id=100)
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    runner.request_restart.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_corrupt_marker_file_is_treated_as_absent(tmp_path, monkeypatch):
+    """Malformed JSON in the marker file doesn't crash — /restart proceeds."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text("not-json{")
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    event = _make_restart_event(update_id=100)
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    runner.request_restart.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_event_without_update_id_bypasses_dedup(tmp_path, monkeypatch):
+    """Events with no platform_update_id (non-Telegram, CLI fallback) aren't gated."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text(json.dumps({
+        "platform": "telegram",
+        "update_id": 999999,
+        "requested_at": time.time(),
+    }))
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    # No update_id — the dedup check should NOT kick in
+    event = _make_restart_event(update_id=None)
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    runner.request_restart.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_different_platform_bypasses_dedup(tmp_path, monkeypatch):
+    """Marker from Telegram doesn't block a /restart from another platform."""
+    from gateway.config import Platform
+    from gateway.session import SessionSource
+
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    marker = tmp_path / ".restart_last_processed.json"
+    marker.write_text(json.dumps({
+        "platform": "telegram",
+        "update_id": 12345,
+        "requested_at": time.time(),
+    }))
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    # /restart from Discord — not a redelivery candidate
+    discord_source = SessionSource(
+        platform=Platform.DISCORD,
+        chat_id="discord-chan",
+        chat_type="dm",
+        user_id="u1",
+    )
+    event = MessageEvent(
+        text="/restart",
+        message_type=MessageType.TEXT,
+        source=discord_source,
+        message_id="m1",
+        platform_update_id=12345,
+    )
+    result = await runner._handle_restart_command(event)
+
+    assert "Restarting gateway" in result
+    runner.request_restart.assert_called_once()
diff --git a/tests/gateway/test_restart_resume_pending.py b/tests/gateway/test_restart_resume_pending.py
new file mode 100644
index 00000000000..c11b2740db3
--- /dev/null
+++ b/tests/gateway/test_restart_resume_pending.py
@@ -0,0 +1,688 @@
+"""Tests for the resume_pending session continuity path.
+
+Covers the behaviour introduced to fix the ``Gateway shutting down ...
+task will be interrupted`` follow-up bug (spec: PR #11852, builds on
+PRs #9850, #9934, #7536):
+
+1. When a gateway restart drain times out and agents are force-interrupted,
+   the affected sessions are flagged ``resume_pending=True`` — not
+   ``suspended`` — so the next user message on the same session_key
+   auto-resumes from the existing transcript instead of getting routed
+   through ``suspend_recently_active()`` and converted into a fresh
+   session.
+
+2. ``suspended=True`` (from ``/stop`` or stuck-loop escalation) still
+   wins over ``resume_pending`` — the forced-wipe path is preserved.
+
+3. The restart-resume system note injected into the next user message is
+   a superset of the existing tool-tail auto-continue note (from
+   PR #9934), using session-entry metadata rather than just transcript
+   shape so it fires even when the interrupted transcript does NOT end
+   with a ``tool`` role.
+
+4. The existing ``.restart_failure_counts`` stuck-loop counter from
+   PR #7536 remains the single source of escalation — no parallel
+   counter is added on ``SessionEntry``.
+"""
+
+import asyncio
+from datetime import datetime, timedelta
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.session import SessionEntry, SessionSource, SessionStore
+from tests.gateway.restart_test_helpers import (
+    make_restart_runner,
+    make_restart_source,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_source(platform=Platform.TELEGRAM, chat_id="123", user_id="u1"):
+    return SessionSource(platform=platform, chat_id=chat_id, user_id=user_id)
+
+
+def _make_store(tmp_path):
+    return SessionStore(sessions_dir=tmp_path, config=GatewayConfig())
+
+
+def _simulate_note_injection(
+    agent_history: list,
+    user_message: str,
+    resume_entry: SessionEntry | None,
+) -> str:
+    """Mirror the note-injection logic in gateway/run.py _run_agent().
+
+    Matches the production code in the ``run_sync`` closure so we can
+    test the decision tree without a full gateway runner.
+    """
+    message = user_message
+    is_resume_pending = bool(
+        resume_entry is not None and getattr(resume_entry, "resume_pending", False)
+    )
+
+    if is_resume_pending:
+        reason = getattr(resume_entry, "resume_reason", None) or "restart_timeout"
+        reason_phrase = (
+            "a gateway restart"
+            if reason == "restart_timeout"
+            else "a gateway shutdown"
+            if reason == "shutdown_timeout"
+            else "a gateway interruption"
+        )
+        message = (
+            f"[System note: Your previous turn in this session was interrupted "
+            f"by {reason_phrase}. The conversation history below is intact. "
+            f"If it contains unfinished tool result(s), process them first and "
+            f"summarize what was accomplished, then address the user's new "
+            f"message below.]\n\n"
+            + message
+        )
+    elif agent_history and agent_history[-1].get("role") == "tool":
+        message = (
+            "[System note: Your previous turn was interrupted before you could "
+            "process the last tool result(s). The conversation history contains "
+            "tool outputs you haven't responded to yet. Please finish processing "
+            "those results and summarize what was accomplished, then address the "
+            "user's new message below.]\n\n"
+            + message
+        )
+    return message
+
+
+# ---------------------------------------------------------------------------
+# SessionEntry field + serialization
+# ---------------------------------------------------------------------------
+
+
+class TestSessionEntryResumeFields:
+    def test_defaults(self):
+        now = datetime.now()
+        entry = SessionEntry(
+            session_key="agent:main:telegram:dm:1",
+            session_id="sid",
+            created_at=now,
+            updated_at=now,
+        )
+        assert entry.resume_pending is False
+        assert entry.resume_reason is None
+        assert entry.last_resume_marked_at is None
+
+    def test_roundtrip_with_resume_fields(self):
+        now = datetime(2026, 4, 18, 12, 0, 0)
+        entry = SessionEntry(
+            session_key="agent:main:telegram:dm:1",
+            session_id="sid",
+            created_at=now,
+            updated_at=now,
+            resume_pending=True,
+            resume_reason="restart_timeout",
+            last_resume_marked_at=now,
+        )
+        restored = SessionEntry.from_dict(entry.to_dict())
+        assert restored.resume_pending is True
+        assert restored.resume_reason == "restart_timeout"
+        assert restored.last_resume_marked_at == now
+
+    def test_from_dict_legacy_without_resume_fields(self):
+        """Old sessions.json without the new fields deserialize cleanly."""
+        now = datetime.now()
+        legacy = {
+            "session_key": "agent:main:telegram:dm:1",
+            "session_id": "sid",
+            "created_at": now.isoformat(),
+            "updated_at": now.isoformat(),
+            "chat_type": "dm",
+        }
+        restored = SessionEntry.from_dict(legacy)
+        assert restored.resume_pending is False
+        assert restored.resume_reason is None
+        assert restored.last_resume_marked_at is None
+
+    def test_malformed_timestamp_is_tolerated(self):
+        now = datetime.now()
+        data = {
+            "session_key": "k",
+            "session_id": "sid",
+            "created_at": now.isoformat(),
+            "updated_at": now.isoformat(),
+            "resume_pending": True,
+            "resume_reason": "restart_timeout",
+            "last_resume_marked_at": "not-a-timestamp",
+        }
+        restored = SessionEntry.from_dict(data)
+        # resume_pending still honoured, only the broken timestamp drops
+        assert restored.resume_pending is True
+        assert restored.resume_reason == "restart_timeout"
+        assert restored.last_resume_marked_at is None
+
+
+# ---------------------------------------------------------------------------
+# SessionStore.mark_resume_pending / clear_resume_pending
+# ---------------------------------------------------------------------------
+
+
+class TestMarkResumePending:
+    def test_marks_existing_session(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+
+        assert store.mark_resume_pending(entry.session_key) is True
+        refreshed = store._entries[entry.session_key]
+        assert refreshed.resume_pending is True
+        assert refreshed.resume_reason == "restart_timeout"
+        assert refreshed.last_resume_marked_at is not None
+
+    def test_custom_reason_persists(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+
+        store.mark_resume_pending(entry.session_key, reason="shutdown_timeout")
+        assert store._entries[entry.session_key].resume_reason == "shutdown_timeout"
+
+    def test_returns_false_for_unknown_key(self, tmp_path):
+        store = _make_store(tmp_path)
+        assert store.mark_resume_pending("no-such-key") is False
+
+    def test_does_not_override_suspended(self, tmp_path):
+        """suspended wins — mark_resume_pending is a no-op on a suspended entry."""
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        store.suspend_session(entry.session_key)
+
+        assert store.mark_resume_pending(entry.session_key) is False
+        e = store._entries[entry.session_key]
+        assert e.suspended is True
+        assert e.resume_pending is False
+
+    def test_survives_roundtrip_through_json(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        store.mark_resume_pending(entry.session_key, reason="restart_timeout")
+
+        # Reload from disk
+        store2 = _make_store(tmp_path)
+        store2._ensure_loaded()
+        reloaded = store2._entries[entry.session_key]
+        assert reloaded.resume_pending is True
+        assert reloaded.resume_reason == "restart_timeout"
+
+
+class TestClearResumePending:
+    def test_clears_flag(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        store.mark_resume_pending(entry.session_key)
+
+        assert store.clear_resume_pending(entry.session_key) is True
+        e = store._entries[entry.session_key]
+        assert e.resume_pending is False
+        assert e.resume_reason is None
+        assert e.last_resume_marked_at is None
+
+    def test_returns_false_when_not_pending(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        # Not marked
+        assert store.clear_resume_pending(entry.session_key) is False
+
+    def test_returns_false_for_unknown_key(self, tmp_path):
+        store = _make_store(tmp_path)
+        assert store.clear_resume_pending("no-such-key") is False
+
+
+# ---------------------------------------------------------------------------
+# SessionStore.get_or_create_session resume_pending behaviour
+# ---------------------------------------------------------------------------
+
+
+class TestGetOrCreateResumePending:
+    def test_resume_pending_preserves_session_id(self, tmp_path):
+        """This is THE core behavioural fix — resume_pending ≠ new session."""
+        store = _make_store(tmp_path)
+        source = _make_source()
+        first = store.get_or_create_session(source)
+        original_sid = first.session_id
+        store.mark_resume_pending(first.session_key)
+
+        second = store.get_or_create_session(source)
+        assert second.session_id == original_sid
+        assert second.was_auto_reset is False
+        assert second.auto_reset_reason is None
+        # Flag is NOT cleared on read — only on successful turn completion.
+        assert second.resume_pending is True
+
+    def test_suspended_still_creates_new_session(self, tmp_path):
+        """Regression guard — suspended must still force a clean slate."""
+        store = _make_store(tmp_path)
+        source = _make_source()
+        first = store.get_or_create_session(source)
+        original_sid = first.session_id
+        store.suspend_session(first.session_key)
+
+        second = store.get_or_create_session(source)
+        assert second.session_id != original_sid
+        assert second.was_auto_reset is True
+        assert second.auto_reset_reason == "suspended"
+
+    def test_suspended_overrides_resume_pending(self, tmp_path):
+        """Terminal escalation: a session that somehow has BOTH flags must
+        behave like ``suspended`` — forced wipe + auto_reset_reason."""
+        store = _make_store(tmp_path)
+        source = _make_source()
+        first = store.get_or_create_session(source)
+        original_sid = first.session_id
+
+        # Force the pathological state directly (normally mark_resume_pending
+        # refuses to run when suspended=True, but a stuck-loop escalation
+        # can set suspended=True AFTER resume_pending is set).
+        with store._lock:
+            e = store._entries[first.session_key]
+            e.resume_pending = True
+            e.resume_reason = "restart_timeout"
+            e.suspended = True
+            store._save()
+
+        second = store.get_or_create_session(source)
+        assert second.session_id != original_sid
+        assert second.was_auto_reset is True
+        assert second.auto_reset_reason == "suspended"
+
+
+# ---------------------------------------------------------------------------
+# SessionStore.suspend_recently_active skip behaviour
+# ---------------------------------------------------------------------------
+
+
+class TestSuspendRecentlyActiveSkipsResumePending:
+    def test_resume_pending_entries_not_suspended(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        store.mark_resume_pending(entry.session_key)
+
+        count = store.suspend_recently_active()
+        assert count == 0
+        e = store._entries[entry.session_key]
+        assert e.suspended is False
+        assert e.resume_pending is True
+
+    def test_non_resume_pending_still_suspended(self, tmp_path):
+        """Non-resume sessions still get the old crash-recovery suspension."""
+        store = _make_store(tmp_path)
+        source_a = _make_source(chat_id="a")
+        source_b = _make_source(chat_id="b")
+        entry_a = store.get_or_create_session(source_a)
+        entry_b = store.get_or_create_session(source_b)
+        store.mark_resume_pending(entry_a.session_key)
+
+        count = store.suspend_recently_active()
+        assert count == 1
+        assert store._entries[entry_a.session_key].suspended is False
+        assert store._entries[entry_b.session_key].suspended is True
+
+
+# ---------------------------------------------------------------------------
+# Restart-resume system-note injection
+# ---------------------------------------------------------------------------
+
+
+class TestResumePendingSystemNote:
+    def _pending_entry(self, reason="restart_timeout") -> SessionEntry:
+        now = datetime.now()
+        return SessionEntry(
+            session_key="agent:main:telegram:dm:1",
+            session_id="sid",
+            created_at=now,
+            updated_at=now,
+            resume_pending=True,
+            resume_reason=reason,
+            last_resume_marked_at=now,
+        )
+
+    def test_resume_pending_restart_note_mentions_restart(self):
+        entry = self._pending_entry(reason="restart_timeout")
+        result = _simulate_note_injection(
+            agent_history=[{"role": "assistant", "content": "in progress"}],
+            user_message="what happened?",
+            resume_entry=entry,
+        )
+        assert "[System note:" in result
+        assert "gateway restart" in result
+        assert "what happened?" in result
+
+    def test_resume_pending_shutdown_note_mentions_shutdown(self):
+        entry = self._pending_entry(reason="shutdown_timeout")
+        result = _simulate_note_injection(
+            agent_history=[{"role": "assistant", "content": "in progress"}],
+            user_message="ping",
+            resume_entry=entry,
+        )
+        assert "gateway shutdown" in result
+
+    def test_resume_pending_fires_without_tool_tail(self):
+        """Key improvement over PR #9934: the restart-resume note fires
+        even when the transcript's last role is NOT ``tool``."""
+        entry = self._pending_entry()
+        history = [
+            {"role": "user", "content": "run a long thing"},
+            {"role": "assistant", "content": "ok, starting..."},
+        ]
+        result = _simulate_note_injection(history, "ping", resume_entry=entry)
+        assert "[System note:" in result
+        assert "gateway restart" in result
+
+    def test_resume_pending_subsumes_tool_tail_note(self):
+        """When BOTH conditions are true, the restart-resume note wins —
+        no duplicate notes."""
+        entry = self._pending_entry()
+        history = [
+            {"role": "assistant", "content": None, "tool_calls": [
+                {"id": "c1", "function": {"name": "x", "arguments": "{}"}},
+            ]},
+            {"role": "tool", "tool_call_id": "c1", "content": "result"},
+        ]
+        result = _simulate_note_injection(history, "ping", resume_entry=entry)
+        assert result.count("[System note:") == 1
+        assert "gateway restart" in result
+        # Old tool-tail wording absent
+        assert "haven't responded to yet" not in result
+
+    def test_no_resume_pending_preserves_tool_tail_note(self):
+        """Regression: the old PR #9934 tool-tail behaviour is unchanged."""
+        history = [
+            {"role": "assistant", "content": None, "tool_calls": [
+                {"id": "c1", "function": {"name": "x", "arguments": "{}"}},
+            ]},
+            {"role": "tool", "tool_call_id": "c1", "content": "result"},
+        ]
+        result = _simulate_note_injection(history, "ping", resume_entry=None)
+        assert "[System note:" in result
+        assert "tool result" in result
+
+    def test_no_note_when_nothing_to_resume(self):
+        history = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi"},
+        ]
+        result = _simulate_note_injection(history, "ping", resume_entry=None)
+        assert result == "ping"
+
+
+# ---------------------------------------------------------------------------
+# Drain-timeout path marks sessions resume_pending
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_drain_timeout_marks_resume_pending():
+    """End-to-end: a drain timeout during gateway stop should flag every
+    active session as resume_pending BEFORE the interrupt fires, so the
+    next startup's suspend_recently_active() does not destroy them."""
+    runner, adapter = make_restart_runner()
+    adapter.disconnect = AsyncMock()
+    runner._restart_drain_timeout = 0.05
+
+    running_agent = MagicMock()
+    session_key_one = "agent:main:telegram:dm:A"
+    session_key_two = "agent:main:telegram:dm:B"
+    runner._running_agents = {
+        session_key_one: running_agent,
+        session_key_two: MagicMock(),
+    }
+
+    # Plug a mock session_store that records marks.
+    session_store = MagicMock()
+    session_store.mark_resume_pending = MagicMock(return_value=True)
+    runner.session_store = session_store
+
+    with patch("gateway.status.remove_pid_file"), patch(
+        "gateway.status.write_runtime_status"
+    ):
+        await runner.stop()
+
+    # Both active sessions were marked with the shutdown_timeout reason.
+    calls = session_store.mark_resume_pending.call_args_list
+    marked = {args[0][0] for args in calls}
+    assert marked == {session_key_one, session_key_two}
+    for args in calls:
+        assert args[0][1] == "shutdown_timeout"
+
+
+@pytest.mark.asyncio
+async def test_drain_timeout_uses_restart_reason_when_restarting():
+    runner, adapter = make_restart_runner()
+    adapter.disconnect = AsyncMock()
+    runner._restart_drain_timeout = 0.05
+    runner._restart_requested = True
+
+    running_agent = MagicMock()
+    runner._running_agents = {"agent:main:telegram:dm:A": running_agent}
+
+    session_store = MagicMock()
+    session_store.mark_resume_pending = MagicMock(return_value=True)
+    runner.session_store = session_store
+
+    with patch("gateway.status.remove_pid_file"), patch(
+        "gateway.status.write_runtime_status"
+    ):
+        await runner.stop(restart=True, detached_restart=False, service_restart=True)
+
+    calls = session_store.mark_resume_pending.call_args_list
+    assert calls, "expected at least one mark_resume_pending call"
+    for args in calls:
+        assert args[0][1] == "restart_timeout"
+
+
+@pytest.mark.asyncio
+async def test_clean_drain_does_not_mark_resume_pending():
+    """If the drain completes within timeout (no force-interrupt), no
+    sessions should be flagged — the normal shutdown path is unchanged."""
+    runner, adapter = make_restart_runner()
+    adapter.disconnect = AsyncMock()
+
+    running_agent = MagicMock()
+    runner._running_agents = {"agent:main:telegram:dm:A": running_agent}
+
+    # Finish the agent before the (generous) drain deadline
+    async def finish_agent():
+        await asyncio.sleep(0.05)
+        runner._running_agents.clear()
+
+    asyncio.create_task(finish_agent())
+
+    session_store = MagicMock()
+    session_store.mark_resume_pending = MagicMock(return_value=True)
+    runner.session_store = session_store
+
+    with patch("gateway.status.remove_pid_file"), patch(
+        "gateway.status.write_runtime_status"
+    ):
+        await runner.stop()
+
+    session_store.mark_resume_pending.assert_not_called()
+    running_agent.interrupt.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_drain_timeout_only_marks_still_running_sessions():
+    """A session that finished gracefully during the drain window must
+    NOT be marked ``resume_pending`` — it completed cleanly and its
+    next turn should be a normal fresh turn, not one prefixed with the
+    restart-interruption system note.
+
+    Regression guard for using ``self._running_agents`` at timeout
+    rather than the ``active_agents`` drain-start snapshot.
+    """
+    runner, adapter = make_restart_runner()
+    adapter.disconnect = AsyncMock()
+    # Long enough for the finisher to exit, short enough to still time out
+    # with the stuck session still present.
+    runner._restart_drain_timeout = 0.3
+
+    session_key_finisher = "agent:main:telegram:dm:A"
+    session_key_stuck = "agent:main:telegram:dm:B"
+    runner._running_agents = {
+        session_key_finisher: MagicMock(),
+        session_key_stuck: MagicMock(),
+    }
+
+    async def finish_one():
+        await asyncio.sleep(0.05)
+        runner._running_agents.pop(session_key_finisher, None)
+
+    asyncio.create_task(finish_one())
+
+    session_store = MagicMock()
+    session_store.mark_resume_pending = MagicMock(return_value=True)
+    runner.session_store = session_store
+
+    with patch("gateway.status.remove_pid_file"), patch(
+        "gateway.status.write_runtime_status"
+    ):
+        await runner.stop()
+
+    calls = session_store.mark_resume_pending.call_args_list
+    marked = {args[0][0] for args in calls}
+    # Only the session still running at timeout is marked; the finisher is not.
+    assert marked == {session_key_stuck}
+
+
+@pytest.mark.asyncio
+async def test_drain_timeout_skips_pending_sentinel_sessions():
+    """Pending sentinels — sessions whose AIAgent construction hasn't
+    produced a real agent yet — are skipped by
+    ``_interrupt_running_agents()``.  The resume_pending marking must
+    mirror that: no agent started means no turn was interrupted.
+    """
+    from gateway.run import _AGENT_PENDING_SENTINEL
+
+    runner, adapter = make_restart_runner()
+    adapter.disconnect = AsyncMock()
+    runner._restart_drain_timeout = 0.05
+
+    session_key_real = "agent:main:telegram:dm:A"
+    session_key_sentinel = "agent:main:telegram:dm:B"
+    runner._running_agents = {
+        session_key_real: MagicMock(),
+        session_key_sentinel: _AGENT_PENDING_SENTINEL,
+    }
+
+    session_store = MagicMock()
+    session_store.mark_resume_pending = MagicMock(return_value=True)
+    runner.session_store = session_store
+
+    with patch("gateway.status.remove_pid_file"), patch(
+        "gateway.status.write_runtime_status"
+    ):
+        await runner.stop()
+
+    calls = session_store.mark_resume_pending.call_args_list
+    marked = {args[0][0] for args in calls}
+    assert marked == {session_key_real}
+
+
+# ---------------------------------------------------------------------------
+# Shutdown banner wording
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_restart_banner_uses_try_to_resume_wording():
+    """The notification sent before drain should hedge the resume promise
+    — the session-continuity fix is best-effort (stuck-loop counter can
+    still escalate to suspended)."""
+    runner, adapter = make_restart_runner()
+    runner._restart_requested = True
+    runner._running_agents["agent:main:telegram:dm:999"] = MagicMock()
+
+    await runner._notify_active_sessions_of_shutdown()
+
+    assert len(adapter.sent) == 1
+    msg = adapter.sent[0]
+    assert "restarting" in msg
+    assert "try to resume" in msg
+
+
+# ---------------------------------------------------------------------------
+# Stuck-loop escalation integration
+# ---------------------------------------------------------------------------
+
+
+class TestStuckLoopEscalation:
+    """The existing .restart_failure_counts counter (PR #7536) remains the
+    single source of terminal escalation — no parallel counter on
+    SessionEntry was added.  After the configured threshold, the startup
+    path flips suspended=True which overrides resume_pending."""
+
+    def test_escalation_via_stuck_loop_counter_overrides_resume_pending(
+        self, tmp_path, monkeypatch
+    ):
+        """Simulate a session that keeps getting restart-interrupted and
+        hits the stuck-loop threshold: next startup should force it to
+        fresh-session despite resume_pending being set."""
+        import json
+
+        from gateway.run import GatewayRunner
+
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        store.mark_resume_pending(entry.session_key, reason="restart_timeout")
+
+        # Simulate counter already at threshold (3 consecutive interrupted
+        # restarts).  _suspend_stuck_loop_sessions will flip suspended=True.
+        counts_file = tmp_path / ".restart_failure_counts"
+        counts_file.write_text(json.dumps({entry.session_key: 3}))
+
+        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+        runner = object.__new__(GatewayRunner)
+        runner.session_store = store
+
+        suspended_count = GatewayRunner._suspend_stuck_loop_sessions(runner)
+        assert suspended_count == 1
+        assert store._entries[entry.session_key].suspended is True
+        # resume_pending is still set on the entry, but suspended wins in
+        # get_or_create_session so the next message still gets a new sid.
+        second = store.get_or_create_session(source)
+        assert second.session_id != entry.session_id
+        assert second.auto_reset_reason == "suspended"
+
+    def test_successful_turn_flow_clears_both_counter_and_resume_pending(
+        self, tmp_path, monkeypatch
+    ):
+        """The gateway's post-turn cleanup should clear both signals so a
+        future restart-interrupt starts with a fresh counter."""
+        import json
+
+        from gateway.run import GatewayRunner
+
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        store.mark_resume_pending(entry.session_key, reason="restart_timeout")
+
+        counts_file = tmp_path / ".restart_failure_counts"
+        counts_file.write_text(json.dumps({entry.session_key: 2}))
+
+        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+        runner = object.__new__(GatewayRunner)
+        runner.session_store = store
+
+        GatewayRunner._clear_restart_failure_count(runner, entry.session_key)
+        store.clear_resume_pending(entry.session_key)
+
+        assert store._entries[entry.session_key].resume_pending is False
+        assert not counts_file.exists()
diff --git a/tests/gateway/test_run_progress_topics.py b/tests/gateway/test_run_progress_topics.py
index 4878f2faec8..59e9fa0408d 100644
--- a/tests/gateway/test_run_progress_topics.py
+++ b/tests/gateway/test_run_progress_topics.py
@@ -51,6 +51,9 @@ class ProgressCaptureAdapter(BasePlatformAdapter):
     async def send_typing(self, chat_id, metadata=None) -> None:
         self.typing.append({"chat_id": chat_id, "metadata": metadata})
 
+    async def stop_typing(self, chat_id) -> None:
+        self.typing.append({"chat_id": chat_id, "metadata": {"stopped": True}})
+
     async def get_chat_info(self, chat_id: str):
         return {"id": chat_id}
 
@@ -90,6 +93,40 @@ class LongPreviewAgent:
         }
 
 
+class DelayedProgressAgent:
+    def __init__(self, **kwargs):
+        self.tool_progress_callback = kwargs.get("tool_progress_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        self.tool_progress_callback("tool.started", "terminal", "first command", {})
+        time.sleep(0.45)
+        self.tool_progress_callback("tool.started", "terminal", "second command", {})
+        time.sleep(0.1)
+        return {
+            "final_response": "done",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
+class DelayedInterimAgent:
+    def __init__(self, **kwargs):
+        self.interim_assistant_callback = kwargs.get("interim_assistant_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        self.interim_assistant_callback("first interim")
+        time.sleep(0.45)
+        self.interim_assistant_callback("second interim")
+        time.sleep(0.1)
+        return {
+            "final_response": "done",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
 def _make_runner(adapter):
     gateway_run = importlib.import_module("gateway.run")
     GatewayRunner = gateway_run.GatewayRunner
@@ -104,6 +141,7 @@ def _make_runner(adapter):
     runner._fallback_model = None
     runner._session_db = None
     runner._running_agents = {}
+    runner._session_run_generation = {}
     runner.hooks = SimpleNamespace(loaded_hooks=False)
     runner.config = SimpleNamespace(
         thread_sessions_per_user=False,
@@ -744,6 +782,154 @@ async def test_base_processing_releases_post_delivery_callback_after_main_send()
     assert released == [True]
 
 
+@pytest.mark.asyncio
+async def test_run_agent_drops_tool_progress_after_generation_invalidation(monkeypatch, tmp_path):
+    import yaml
+
+    (tmp_path / "config.yaml").write_text(
+        yaml.dump({"display": {"tool_progress": "all"}}),
+        encoding="utf-8",
+    )
+
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = DelayedProgressAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+    import tools.terminal_tool  # noqa: F401 - register terminal tool metadata
+
+    adapter = ProgressCaptureAdapter(platform=Platform.DISCORD)
+    runner = _make_runner(adapter)
+    gateway_run = importlib.import_module("gateway.run")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+
+    source = SessionSource(
+        platform=Platform.DISCORD,
+        chat_id="dm-1",
+        chat_type="dm",
+        thread_id=None,
+    )
+    session_key = "agent:main:discord:dm:dm-1"
+    runner._session_run_generation[session_key] = 1
+
+    original_send = adapter.send
+    invalidated = {"done": False}
+
+    async def send_and_invalidate(chat_id, content, reply_to=None, metadata=None):
+        result = await original_send(chat_id, content, reply_to=reply_to, metadata=metadata)
+        if "first command" in content and not invalidated["done"]:
+            invalidated["done"] = True
+            runner._invalidate_session_run_generation(session_key, reason="test_stop")
+        return result
+
+    adapter.send = send_and_invalidate
+
+    result = await runner._run_agent(
+        message="hello",
+        context_prompt="",
+        history=[],
+        source=source,
+        session_id="sess-progress-stop",
+        session_key=session_key,
+        run_generation=1,
+    )
+
+    all_progress_text = " ".join(call["content"] for call in adapter.sent)
+    all_progress_text += " ".join(call["content"] for call in adapter.edits)
+    assert result["final_response"] == "done"
+    assert 'first command' in all_progress_text
+    assert 'second command' not in all_progress_text
+
+
+@pytest.mark.asyncio
+async def test_run_agent_drops_interim_commentary_after_generation_invalidation(monkeypatch, tmp_path):
+    import yaml
+
+    (tmp_path / "config.yaml").write_text(
+        yaml.dump({"display": {"tool_progress": "off", "interim_assistant_messages": True}}),
+        encoding="utf-8",
+    )
+
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = DelayedInterimAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    adapter = ProgressCaptureAdapter(platform=Platform.DISCORD)
+    runner = _make_runner(adapter)
+    gateway_run = importlib.import_module("gateway.run")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+
+    source = SessionSource(
+        platform=Platform.DISCORD,
+        chat_id="dm-2",
+        chat_type="dm",
+        thread_id=None,
+    )
+    session_key = "agent:main:discord:dm:dm-2"
+    runner._session_run_generation[session_key] = 1
+
+    original_send = adapter.send
+    invalidated = {"done": False}
+
+    async def send_and_invalidate(chat_id, content, reply_to=None, metadata=None):
+        result = await original_send(chat_id, content, reply_to=reply_to, metadata=metadata)
+        if content == "first interim" and not invalidated["done"]:
+            invalidated["done"] = True
+            runner._invalidate_session_run_generation(session_key, reason="test_stop")
+        return result
+
+    adapter.send = send_and_invalidate
+
+    result = await runner._run_agent(
+        message="hello",
+        context_prompt="",
+        history=[],
+        source=source,
+        session_id="sess-commentary-stop",
+        session_key=session_key,
+        run_generation=1,
+    )
+
+    sent_texts = [call["content"] for call in adapter.sent]
+    assert result["final_response"] == "done"
+    assert "first interim" in sent_texts
+    assert "second interim" not in sent_texts
+
+
+@pytest.mark.asyncio
+async def test_keep_typing_stops_immediately_when_interrupt_event_is_set():
+    adapter = ProgressCaptureAdapter(platform=Platform.DISCORD)
+    stop_event = asyncio.Event()
+
+    task = asyncio.create_task(
+        adapter._keep_typing(
+            "dm-typing-stop",
+            interval=30.0,
+            stop_event=stop_event,
+        )
+    )
+    await asyncio.sleep(0.05)
+    stop_event.set()
+    await asyncio.wait_for(task, timeout=0.5)
+
+    normal_typing_calls = [
+        call for call in adapter.typing if call.get("metadata") != {"stopped": True}
+    ]
+    stopped_calls = [
+        call for call in adapter.typing if call.get("metadata") == {"stopped": True}
+    ]
+    assert len(normal_typing_calls) == 1
+    assert len(stopped_calls) == 1
+
+
 @pytest.mark.asyncio
 async def test_verbose_mode_does_not_truncate_args_by_default(monkeypatch, tmp_path):
     """Verbose mode with default tool_preview_length (0) should NOT truncate args.
diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py
index 977d66fb3be..96d5d4627b0 100644
--- a/tests/gateway/test_runner_startup_failures.py
+++ b/tests/gateway/test_runner_startup_failures.py
@@ -319,3 +319,23 @@ async def test_start_gateway_replace_clears_marker_on_permission_denied(
     assert ok is False
     # Marker must NOT be left behind
     assert not (tmp_path / ".gateway-takeover.json").exists()
+
+
+def test_runner_warns_when_docker_gateway_lacks_explicit_output_mount(monkeypatch, tmp_path, caplog):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.setenv("TERMINAL_ENV", "docker")
+    monkeypatch.setenv("TERMINAL_DOCKER_VOLUMES", '["/etc/localtime:/etc/localtime:ro"]')
+    config = GatewayConfig(
+        platforms={
+            Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")
+        },
+        sessions_dir=tmp_path / "sessions",
+    )
+
+    with caplog.at_level("WARNING"):
+        GatewayRunner(config)
+
+    assert any(
+        "host-visible output mount" in record.message
+        for record in caplog.records
+    )
diff --git a/tests/gateway/test_safe_adapter_disconnect.py b/tests/gateway/test_safe_adapter_disconnect.py
new file mode 100644
index 00000000000..ec11f2663ad
--- /dev/null
+++ b/tests/gateway/test_safe_adapter_disconnect.py
@@ -0,0 +1,59 @@
+"""Regression tests: failed-connect path must call adapter.disconnect().
+
+When adapter.connect() returns False or raises, the adapter may have
+allocated resources (aiohttp.ClientSession, poll tasks, child
+subprocesses) before giving up. Without a defensive disconnect() call
+these leak and surface as "Unclosed client session" warnings at
+process exit (seen on the 2026-04-18 18:08:16 gateway restart).
+
+The fix: gateway/run.py wraps each adapter connect() with a safety-net
+call to _safe_adapter_disconnect() in the failure branches.
+"""
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import Platform
+from gateway.run import GatewayRunner
+
+
+@pytest.fixture
+def bare_runner():
+    """A GatewayRunner shell that only needs to support _safe_adapter_disconnect."""
+    return object.__new__(GatewayRunner)
+
+
+@pytest.mark.asyncio
+async def test_safe_disconnect_calls_adapter_disconnect(bare_runner):
+    """The helper forwards to adapter.disconnect()."""
+    adapter = MagicMock()
+    adapter.disconnect = AsyncMock(return_value=None)
+
+    await bare_runner._safe_adapter_disconnect(adapter, Platform.TELEGRAM)
+
+    adapter.disconnect.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_safe_disconnect_swallows_exceptions(bare_runner):
+    """An exception in adapter.disconnect() must not propagate — the
+    caller is already on an error path."""
+    adapter = MagicMock()
+    adapter.disconnect = AsyncMock(side_effect=RuntimeError("partial init"))
+
+    # Must NOT raise
+    await bare_runner._safe_adapter_disconnect(adapter, Platform.TELEGRAM)
+
+    adapter.disconnect.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_safe_disconnect_handles_none_platform(bare_runner):
+    """Logging path must tolerate platform=None."""
+    adapter = MagicMock()
+    adapter.disconnect = AsyncMock(side_effect=ValueError("nope"))
+
+    await bare_runner._safe_adapter_disconnect(adapter, None)
+
+    adapter.disconnect.assert_awaited_once()
diff --git a/tests/gateway/test_session_race_guard.py b/tests/gateway/test_session_race_guard.py
index 8c26abec590..fe1ef011a37 100644
--- a/tests/gateway/test_session_race_guard.py
+++ b/tests/gateway/test_session_race_guard.py
@@ -24,10 +24,18 @@ class _FakeAdapter:
 
     def __init__(self):
         self._pending_messages = {}
+        self._active_sessions = {}
+        self.interrupted_sessions = []
 
     async def send(self, chat_id, text, **kwargs):
         pass
 
+    async def interrupt_session_activity(self, session_key, chat_id):
+        self.interrupted_sessions.append((session_key, chat_id))
+        event = self._active_sessions.get(session_key)
+        if event is not None:
+            event.set()
+
 
 def _make_runner():
     runner = object.__new__(GatewayRunner)
@@ -37,6 +45,7 @@ def _make_runner():
     runner.adapters = {Platform.TELEGRAM: _FakeAdapter()}
     runner._running_agents = {}
     runner._running_agents_ts = {}
+    runner._session_run_generation = {}
     runner._pending_messages = {}
     runner._pending_approvals = {}
     runner._voice_mode = {}
@@ -81,7 +90,7 @@ async def test_sentinel_placed_before_agent_setup():
     # Patch _handle_message_with_agent to capture state at entry
     sentinel_was_set = False
 
-    async def mock_inner(self_inner, ev, src, qk):
+    async def mock_inner(self_inner, ev, src, qk, generation):
         nonlocal sentinel_was_set
         sentinel_was_set = runner._running_agents.get(qk) is _AGENT_PENDING_SENTINEL
         return "ok"
@@ -105,7 +114,7 @@ async def test_sentinel_cleaned_up_after_handler_returns():
     event = _make_event()
     session_key = build_session_key(event.source)
 
-    async def mock_inner(self_inner, ev, src, qk):
+    async def mock_inner(self_inner, ev, src, qk, generation):
         return "ok"
 
     with patch.object(GatewayRunner, "_handle_message_with_agent", mock_inner):
@@ -127,7 +136,7 @@ async def test_sentinel_cleaned_up_on_exception():
     event = _make_event()
     session_key = build_session_key(event.source)
 
-    async def mock_inner(self_inner, ev, src, qk):
+    async def mock_inner(self_inner, ev, src, qk, generation):
         raise RuntimeError("boom")
 
     with patch.object(GatewayRunner, "_handle_message_with_agent", mock_inner):
@@ -154,7 +163,7 @@ async def test_second_message_during_sentinel_queued_not_duplicate():
 
     barrier = asyncio.Event()
 
-    async def slow_inner(self_inner, ev, src, qk):
+    async def slow_inner(self_inner, ev, src, qk, generation):
         # Simulate slow setup — wait until test tells us to proceed
         await barrier.wait()
         return "ok"
@@ -333,7 +342,7 @@ async def test_stop_during_sentinel_force_cleans_session():
 
     barrier = asyncio.Event()
 
-    async def slow_inner(self_inner, ev, src, qk):
+    async def slow_inner(self_inner, ev, src, qk, generation):
         await barrier.wait()
         return "ok"
 
@@ -381,6 +390,7 @@ async def test_stop_hard_kills_running_agent():
     fake_agent = MagicMock()
     fake_agent.get_activity_summary.return_value = {"seconds_since_activity": 0}
     runner._running_agents[session_key] = fake_agent
+    runner.adapters[Platform.TELEGRAM]._active_sessions[session_key] = asyncio.Event()
 
     # Send /stop
     stop_event = _make_event(text="/stop")
@@ -393,6 +403,10 @@ async def test_stop_hard_kills_running_agent():
     assert session_key not in runner._running_agents, (
         "/stop must remove the agent from _running_agents so the session is unlocked"
     )
+    assert runner.adapters[Platform.TELEGRAM].interrupted_sessions == [
+        (session_key, "12345")
+    ]
+    assert runner.adapters[Platform.TELEGRAM]._active_sessions[session_key].is_set()
 
     # Must return a confirmation
     assert result is not None
diff --git a/tests/gateway/test_signal.py b/tests/gateway/test_signal.py
index 26f1e4f3bb3..eee3a0db8aa 100644
--- a/tests/gateway/test_signal.py
+++ b/tests/gateway/test_signal.py
@@ -740,3 +740,140 @@ class TestSignalStopTyping:
         await adapter.stop_typing("+155****4567")
 
         adapter._stop_typing_indicator.assert_awaited_once_with("+155****4567")
+
+
+# ---------------------------------------------------------------------------
+# Typing-indicator backoff on repeated failures (Signal RPC spam fix)
+# ---------------------------------------------------------------------------
+
+class TestSignalTypingBackoff:
+    """When base.py's _keep_typing refresh loop calls send_typing every ~2s
+    and the recipient is unreachable (NETWORK_FAILURE), the adapter must:
+
+    - log WARNING only for the first failure (subsequent failures use DEBUG
+      via log_failures=False on the _rpc call)
+    - after 3 consecutive failures, skip the RPC entirely during an
+      exponential cooldown window instead of hammering signal-cli every 2s
+    - reset counters on a successful sendTyping
+    - reset counters when _stop_typing_indicator() is called for the chat
+    """
+
+    @pytest.mark.asyncio
+    async def test_first_failure_logs_at_warning_subsequent_at_debug(
+        self, monkeypatch
+    ):
+        adapter = _make_signal_adapter(monkeypatch)
+        calls = []
+
+        async def _fake_rpc(method, params, rpc_id=None, *, log_failures=True):
+            calls.append({"log_failures": log_failures})
+            return None  # simulate NETWORK_FAILURE
+
+        adapter._rpc = _fake_rpc
+
+        await adapter.send_typing("+155****4567")
+        await adapter.send_typing("+155****4567")
+
+        assert len(calls) == 2
+        assert calls[0]["log_failures"] is True   # first failure — warn
+        assert calls[1]["log_failures"] is False  # subsequent — debug
+
+    @pytest.mark.asyncio
+    async def test_three_consecutive_failures_trigger_cooldown(
+        self, monkeypatch
+    ):
+        adapter = _make_signal_adapter(monkeypatch)
+        call_count = {"n": 0}
+
+        async def _fake_rpc(method, params, rpc_id=None, *, log_failures=True):
+            call_count["n"] += 1
+            return None
+
+        adapter._rpc = _fake_rpc
+
+        # Three failures engage the cooldown.
+        await adapter.send_typing("+155****4567")
+        await adapter.send_typing("+155****4567")
+        await adapter.send_typing("+155****4567")
+        assert call_count["n"] == 3
+        assert "+155****4567" in adapter._typing_skip_until
+
+        # Fourth, fifth, ... calls during the cooldown window are short-
+        # circuited — the RPC is not issued at all.
+        await adapter.send_typing("+155****4567")
+        await adapter.send_typing("+155****4567")
+        assert call_count["n"] == 3
+
+    @pytest.mark.asyncio
+    async def test_cooldown_is_per_chat_not_global(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch)
+        call_log = []
+
+        async def _fake_rpc(method, params, rpc_id=None, *, log_failures=True):
+            call_log.append(params.get("recipient") or params.get("groupId"))
+            return None
+
+        adapter._rpc = _fake_rpc
+
+        # Drive chat A into cooldown.
+        for _ in range(3):
+            await adapter.send_typing("+155****4567")
+        assert "+155****4567" in adapter._typing_skip_until
+
+        # Chat B is unaffected — still makes RPCs.
+        await adapter.send_typing("+155****9999")
+        await adapter.send_typing("+155****9999")
+        assert "+155****9999" not in adapter._typing_skip_until
+        # Chat A cooldown untouched
+        assert "+155****4567" in adapter._typing_skip_until
+
+    @pytest.mark.asyncio
+    async def test_success_resets_failure_counter_and_cooldown(
+        self, monkeypatch
+    ):
+        adapter = _make_signal_adapter(monkeypatch)
+        result_queue = [None, None, {"timestamp": 12345}]
+        call_log = []
+
+        async def _fake_rpc(method, params, rpc_id=None, *, log_failures=True):
+            call_log.append(log_failures)
+            return result_queue.pop(0)
+
+        adapter._rpc = _fake_rpc
+
+        await adapter.send_typing("+155****4567")   # fail 1 — warn
+        await adapter.send_typing("+155****4567")   # fail 2 — debug
+        await adapter.send_typing("+155****4567")   # success — reset
+
+        assert adapter._typing_failures.get("+155****4567", 0) == 0
+        assert "+155****4567" not in adapter._typing_skip_until
+
+        # Next failure after recovery logs at WARNING again (fresh counter).
+        async def _fail(method, params, rpc_id=None, *, log_failures=True):
+            call_log.append(log_failures)
+            return None
+
+        adapter._rpc = _fail
+        await adapter.send_typing("+155****4567")
+        assert call_log[-1] is True   # first failure in a fresh cycle
+
+    @pytest.mark.asyncio
+    async def test_stop_typing_indicator_clears_backoff_state(
+        self, monkeypatch
+    ):
+        adapter = _make_signal_adapter(monkeypatch)
+
+        async def _fail(method, params, rpc_id=None, *, log_failures=True):
+            return None
+
+        adapter._rpc = _fail
+
+        for _ in range(3):
+            await adapter.send_typing("+155****4567")
+        assert adapter._typing_failures.get("+155****4567") == 3
+        assert "+155****4567" in adapter._typing_skip_until
+
+        await adapter._stop_typing_indicator("+155****4567")
+
+        assert "+155****4567" not in adapter._typing_failures
+        assert "+155****4567" not in adapter._typing_skip_until
diff --git a/tests/gateway/test_status_command.py b/tests/gateway/test_status_command.py
index c4a64f30ab1..50e1c52cc29 100644
--- a/tests/gateway/test_status_command.py
+++ b/tests/gateway/test_status_command.py
@@ -50,6 +50,7 @@ def _make_runner(session_entry: SessionEntry):
     runner.session_store.rewrite_transcript = MagicMock()
     runner.session_store.update_session = MagicMock()
     runner._running_agents = {}
+    runner._session_run_generation = {}
     runner._pending_messages = {}
     runner._pending_approvals = {}
     runner._session_db = MagicMock()
@@ -223,6 +224,121 @@ async def test_handle_message_persists_agent_token_counts(monkeypatch):
     )
 
 
+@pytest.mark.asyncio
+async def test_handle_message_discards_stale_result_after_session_invalidation(monkeypatch):
+    import gateway.run as gateway_run
+
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner = _make_runner(session_entry)
+    runner.session_store.load_transcript.return_value = [{"role": "user", "content": "earlier"}]
+    session_key = session_entry.session_key
+    runner.adapters[Platform.TELEGRAM]._post_delivery_callbacks = {session_key: object()}
+
+    async def _stale_result(**kwargs):
+        runner._invalidate_session_run_generation(kwargs["session_key"], reason="test_stale_result")
+        return {
+            "final_response": "late reply",
+            "messages": [],
+            "tools": [],
+            "history_offset": 0,
+            "last_prompt_tokens": 80,
+            "input_tokens": 120,
+            "output_tokens": 45,
+            "model": "openai/test-model",
+        }
+
+    runner._run_agent = AsyncMock(side_effect=_stale_result)
+
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+    monkeypatch.setattr(
+        "agent.model_metadata.get_model_context_length",
+        lambda *_args, **_kwargs: 100000,
+    )
+
+    result = await runner._handle_message(_make_event("hello"))
+
+    assert result is None
+    runner.session_store.append_to_transcript.assert_not_called()
+    runner.session_store.update_session.assert_not_called()
+    assert session_key not in runner.adapters[Platform.TELEGRAM]._post_delivery_callbacks
+
+
+@pytest.mark.asyncio
+async def test_handle_message_stale_result_keeps_newer_generation_callback(monkeypatch):
+    import gateway.run as gateway_run
+
+    class _Adapter:
+        def __init__(self):
+            self._post_delivery_callbacks = {}
+
+        async def send(self, *args, **kwargs):
+            return None
+
+        def pop_post_delivery_callback(self, session_key, *, generation=None):
+            entry = self._post_delivery_callbacks.get(session_key)
+            if entry is None:
+                return None
+            if isinstance(entry, tuple):
+                entry_generation, callback = entry
+                if generation is not None and entry_generation != generation:
+                    return None
+                self._post_delivery_callbacks.pop(session_key, None)
+                return callback
+            if generation is not None:
+                return None
+            return self._post_delivery_callbacks.pop(session_key, None)
+
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner = _make_runner(session_entry)
+    runner.session_store.load_transcript.return_value = [{"role": "user", "content": "earlier"}]
+    session_key = session_entry.session_key
+    adapter = _Adapter()
+    runner.adapters[Platform.TELEGRAM] = adapter
+
+    async def _stale_result(**kwargs):
+        # Simulate a newer run claiming the callback slot before the stale run unwinds.
+        runner._session_run_generation[session_key] = 2
+        adapter._post_delivery_callbacks[session_key] = (2, lambda: None)
+        return {
+            "final_response": "late reply",
+            "messages": [],
+            "tools": [],
+            "history_offset": 0,
+            "last_prompt_tokens": 80,
+            "input_tokens": 120,
+            "output_tokens": 45,
+            "model": "openai/test-model",
+        }
+
+    runner._run_agent = AsyncMock(side_effect=_stale_result)
+
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+    monkeypatch.setattr(
+        "agent.model_metadata.get_model_context_length",
+        lambda *_args, **_kwargs: 100000,
+    )
+
+    result = await runner._handle_message(_make_event("hello"))
+
+    assert result is None
+    assert session_key in adapter._post_delivery_callbacks
+    assert adapter._post_delivery_callbacks[session_key][0] == 2
+
+
 
 @pytest.mark.asyncio
 async def test_status_command_bypasses_active_session_guard():
diff --git a/tests/gateway/test_steer_command.py b/tests/gateway/test_steer_command.py
new file mode 100644
index 00000000000..b756ff09622
--- /dev/null
+++ b/tests/gateway/test_steer_command.py
@@ -0,0 +1,191 @@
+"""Tests for the gateway /steer command handler.
+
+/steer injects a user message into the agent's next tool result without
+interrupting. The gateway runner must:
+
+  1. When an agent IS running → call ``agent.steer(text)``, do NOT set
+     ``_interrupt_requested``, do NOT touch ``_pending_messages``.
+  2. When the agent is the PENDING sentinel → fall back to /queue
+     semantics (store in ``adapter._pending_messages``).
+  3. When no agent is active → strip the slash prefix and let the normal
+     prompt pipeline handle it as a regular user message.
+"""
+from __future__ import annotations
+
+from datetime import datetime
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource, build_session_key
+
+
+def _make_source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="u1",
+        chat_id="c1",
+        user_name="tester",
+        chat_type="dm",
+    )
+
+
+def _make_event(text: str) -> MessageEvent:
+    return MessageEvent(
+        text=text,
+        source=_make_source(),
+        message_id="m1",
+    )
+
+
+def _make_runner(session_entry: SessionEntry):
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+    adapter = MagicMock()
+    adapter.send = AsyncMock()
+    adapter._pending_messages = {}
+    runner.adapters = {Platform.TELEGRAM: adapter}
+    runner._voice_mode = {}
+    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = session_entry
+    runner.session_store.load_transcript.return_value = []
+    runner.session_store.has_any_sessions.return_value = True
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._session_db = MagicMock()
+    runner._session_db.get_session_title.return_value = None
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._show_reasoning = False
+    runner._is_user_authorized = lambda _source: True
+    runner._set_session_env = lambda _context: None
+    runner._should_send_voice_reply = lambda *_args, **_kwargs: False
+    runner._send_voice_reply = AsyncMock()
+    runner._capture_gateway_honcho_if_configured = lambda *args, **kwargs: None
+    runner._emit_gateway_run_progress = AsyncMock()
+    return runner, adapter
+
+
+def _session_entry() -> SessionEntry:
+    return SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+        total_tokens=0,
+    )
+
+
+@pytest.mark.asyncio
+async def test_steer_calls_agent_steer_and_does_not_interrupt():
+    """When an agent is running, /steer must call agent.steer(text) and
+    leave interrupt state untouched."""
+    runner, adapter = _make_runner(_session_entry())
+    sk = build_session_key(_make_source())
+
+    running_agent = MagicMock()
+    running_agent.steer.return_value = True
+    runner._running_agents[sk] = running_agent
+
+    result = await runner._handle_message(_make_event("/steer also check auth.log"))
+
+    # The handler replied with a confirmation
+    assert result is not None
+    assert "steer" in result.lower() or "queued" in result.lower()
+    # The agent's steer() was called with the payload (prefix stripped)
+    running_agent.steer.assert_called_once_with("also check auth.log")
+    # Critically: interrupt was NOT called
+    running_agent.interrupt.assert_not_called()
+    # And no user-text queueing happened — the steer doesn't go into
+    # _pending_messages (that would be turn-boundary /queue semantics).
+    assert runner._pending_messages == {}
+    assert adapter._pending_messages == {}
+
+
+@pytest.mark.asyncio
+async def test_steer_without_payload_returns_usage():
+    runner, _adapter = _make_runner(_session_entry())
+    sk = build_session_key(_make_source())
+    running_agent = MagicMock()
+    runner._running_agents[sk] = running_agent
+
+    result = await runner._handle_message(_make_event("/steer"))
+
+    assert result is not None
+    assert "Usage" in result or "usage" in result
+    running_agent.steer.assert_not_called()
+    running_agent.interrupt.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_steer_with_pending_sentinel_falls_back_to_queue():
+    """When the agent hasn't finished booting (sentinel), /steer should
+    queue as a turn-boundary follow-up instead of crashing."""
+    from gateway.run import _AGENT_PENDING_SENTINEL
+
+    runner, adapter = _make_runner(_session_entry())
+    sk = build_session_key(_make_source())
+    runner._running_agents[sk] = _AGENT_PENDING_SENTINEL
+
+    result = await runner._handle_message(_make_event("/steer wait up"))
+
+    assert result is not None
+    assert "queued" in result.lower() or "starting" in result.lower()
+    # The fallback put the text into the adapter's pending queue.
+    assert sk in adapter._pending_messages
+    assert adapter._pending_messages[sk].text == "wait up"
+
+
+@pytest.mark.asyncio
+async def test_steer_agent_without_steer_method_falls_back():
+    """If the running agent somehow lacks the steer() method (older build,
+    test stub), the handler must not explode — fall back to /queue."""
+    runner, adapter = _make_runner(_session_entry())
+    sk = build_session_key(_make_source())
+
+    # A bare object that does NOT have steer() — use a spec'd Mock so
+    # hasattr(agent, "steer") returns False.
+    running_agent = MagicMock(spec=[])
+    runner._running_agents[sk] = running_agent
+
+    result = await runner._handle_message(_make_event("/steer fallback"))
+
+    assert result is not None
+    # Must mention queueing since steer wasn't available
+    assert "queued" in result.lower()
+    assert sk in adapter._pending_messages
+    assert adapter._pending_messages[sk].text == "fallback"
+
+
+@pytest.mark.asyncio
+async def test_steer_rejected_payload_returns_rejection_message():
+    """If agent.steer() returns False (e.g. empty after strip — though
+    the gateway already guards this), surface a rejection message."""
+    runner, _adapter = _make_runner(_session_entry())
+    sk = build_session_key(_make_source())
+
+    running_agent = MagicMock()
+    running_agent.steer.return_value = False
+    runner._running_agents[sk] = running_agent
+
+    result = await runner._handle_message(_make_event("/steer hello"))
+
+    assert result is not None
+    assert "rejected" in result.lower() or "empty" in result.lower()
+
+
+if __name__ == "__main__":  # pragma: no cover
+    pytest.main([__file__, "-v"])
diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py
index 99ac4dc188c..0a0e0631db7 100644
--- a/tests/gateway/test_stream_consumer.py
+++ b/tests/gateway/test_stream_consumer.py
@@ -502,11 +502,13 @@ class TestSegmentBreakOnToolBoundary:
 
     @pytest.mark.asyncio
     async def test_segment_break_clears_failed_edit_fallback_state(self):
-        """A tool boundary after edit failure must not duplicate the next segment."""
+        """A tool boundary after edit failure must flush the undelivered tail
+        without duplicating the prefix the user already saw (#8124)."""
         adapter = MagicMock()
         send_results = [
             SimpleNamespace(success=True, message_id="msg_1"),
             SimpleNamespace(success=True, message_id="msg_2"),
+            SimpleNamespace(success=True, message_id="msg_3"),
         ]
         adapter.send = AsyncMock(side_effect=send_results)
         adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=False, error="flood_control:6"))
@@ -526,7 +528,60 @@ class TestSegmentBreakOnToolBoundary:
         await task
 
         sent_texts = [call[1]["content"] for call in adapter.send.call_args_list]
-        assert sent_texts == ["Hello ▉", "Next segment"]
+        # The undelivered "world" tail must reach the user, and the next
+        # segment must not duplicate "Hello" that was already visible.
+        assert sent_texts == ["Hello ▉", "world", "Next segment"]
+
+    @pytest.mark.asyncio
+    async def test_segment_break_after_mid_stream_edit_failure_preserves_tail(self):
+        """Regression for #8124: when an earlier edit succeeded but later edits
+        fail (persistent flood control) and a tool boundary arrives before the
+        fallback threshold is reached, the pre-boundary tail must still be
+        delivered — not silently dropped by the segment reset."""
+        adapter = MagicMock()
+        # msg_1 for the initial partial, msg_2 for the flushed tail,
+        # msg_3 for the post-boundary segment.
+        send_results = [
+            SimpleNamespace(success=True, message_id="msg_1"),
+            SimpleNamespace(success=True, message_id="msg_2"),
+            SimpleNamespace(success=True, message_id="msg_3"),
+        ]
+        adapter.send = AsyncMock(side_effect=send_results)
+
+        # First two edits succeed, everything after fails with flood control
+        # — simulating Telegram's "edit once then get rate-limited" pattern.
+        edit_results = [
+            SimpleNamespace(success=True),   # "Hello world ▉"  — succeeds
+            SimpleNamespace(success=False, error="flood_control:6.0"),  # "Hello world more ▉" — flood triggered
+            SimpleNamespace(success=False, error="flood_control:6.0"),  # finalize edit at segment break
+            SimpleNamespace(success=False, error="flood_control:6.0"),  # cursor-strip attempt
+        ]
+        adapter.edit_message = AsyncMock(side_effect=edit_results + [edit_results[-1]] * 10)
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5, cursor=" ▉")
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        consumer.on_delta("Hello")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.08)
+        consumer.on_delta(" world")
+        await asyncio.sleep(0.08)
+        consumer.on_delta(" more")
+        await asyncio.sleep(0.08)
+        consumer.on_delta(None)  # tool boundary
+        consumer.on_delta("Here is the tool result.")
+        consumer.finish()
+        await task
+
+        sent_texts = [call[1]["content"] for call in adapter.send.call_args_list]
+        # "more" must have been delivered, not dropped.
+        all_text = " ".join(sent_texts)
+        assert "more" in all_text, (
+            f"Pre-boundary tail 'more' was silently dropped: sends={sent_texts}"
+        )
+        # Post-boundary text must also reach the user.
+        assert "Here is the tool result." in all_text
 
     @pytest.mark.asyncio
     async def test_no_message_id_enters_fallback_mode(self):
@@ -1161,3 +1216,87 @@ class TestBufferOnlyMode:
         # text, the consumer may send then edit, or just send once at got_done.
         # The key assertion: this doesn't break.
         assert adapter.send.call_count >= 1
+
+
+# ── Cursor stripping on fallback (#7183) ────────────────────────────────────
+
+
+class TestCursorStrippingOnFallback:
+    """Regression: cursor must be stripped when fallback continuation is empty (#7183).
+
+    When _send_fallback_final is called with nothing new to deliver (the visible
+    partial already matches final_text), the last edit may still show the cursor
+    character because fallback mode was entered after a failed edit.  Before the
+    fix this would leave the message permanently frozen with a visible ▉.
+    """
+
+    @pytest.mark.asyncio
+    async def test_cursor_stripped_when_continuation_empty(self):
+        """_send_fallback_final must attempt a final edit to strip the cursor."""
+        adapter = MagicMock()
+        adapter.MAX_MESSAGE_LENGTH = 4096
+        adapter.edit_message = AsyncMock(
+            return_value=SimpleNamespace(success=True, message_id="msg-1")
+        )
+
+        consumer = GatewayStreamConsumer(
+            adapter, "chat-1",
+            config=StreamConsumerConfig(cursor=" ▉"),
+        )
+        consumer._message_id = "msg-1"
+        consumer._last_sent_text = "Hello world ▉"
+        consumer._fallback_final_send = False
+
+        await consumer._send_fallback_final("Hello world")
+
+        adapter.edit_message.assert_called_once()
+        call_args = adapter.edit_message.call_args
+        assert call_args.kwargs["content"] == "Hello world"
+        assert consumer._already_sent is True
+        # _last_sent_text should reflect the cleaned text after a successful strip
+        assert consumer._last_sent_text == "Hello world"
+
+    @pytest.mark.asyncio
+    async def test_cursor_not_stripped_when_no_cursor_configured(self):
+        """No edit attempted when cursor is not configured."""
+        adapter = MagicMock()
+        adapter.MAX_MESSAGE_LENGTH = 4096
+        adapter.edit_message = AsyncMock()
+
+        consumer = GatewayStreamConsumer(
+            adapter, "chat-1",
+            config=StreamConsumerConfig(cursor=""),
+        )
+        consumer._message_id = "msg-1"
+        consumer._last_sent_text = "Hello world"
+        consumer._fallback_final_send = False
+
+        await consumer._send_fallback_final("Hello world")
+
+        adapter.edit_message.assert_not_called()
+        assert consumer._already_sent is True
+
+    @pytest.mark.asyncio
+    async def test_cursor_strip_edit_failure_handled(self):
+        """If the cursor-stripping edit itself fails, it must not crash and
+        must not corrupt _last_sent_text."""
+        adapter = MagicMock()
+        adapter.MAX_MESSAGE_LENGTH = 4096
+        adapter.edit_message = AsyncMock(
+            return_value=SimpleNamespace(success=False, error="flood_control")
+        )
+
+        consumer = GatewayStreamConsumer(
+            adapter, "chat-1",
+            config=StreamConsumerConfig(cursor=" ▉"),
+        )
+        consumer._message_id = "msg-1"
+        consumer._last_sent_text = "Hello ▉"
+        consumer._fallback_final_send = False
+
+        await consumer._send_fallback_final("Hello")
+
+        # Should still set already_sent despite the cursor-strip edit failure
+        assert consumer._already_sent is True
+        # _last_sent_text must NOT be updated when the edit failed
+        assert consumer._last_sent_text == "Hello ▉"
diff --git a/tests/gateway/test_telegram_documents.py b/tests/gateway/test_telegram_documents.py
index 86e5cb30fb0..3a68139fa99 100644
--- a/tests/gateway/test_telegram_documents.py
+++ b/tests/gateway/test_telegram_documents.py
@@ -483,6 +483,32 @@ class TestSendDocument:
         assert "not found" in result.error.lower()
         connected_adapter._bot.send_document.assert_not_called()
 
+    @pytest.mark.asyncio
+    async def test_send_document_workspace_path_has_docker_hint(self, connected_adapter):
+        """Container-local-looking paths get a more actionable Docker hint."""
+        result = await connected_adapter.send_document(
+            chat_id="12345",
+            file_path="/workspace/report.txt",
+        )
+
+        assert result.success is False
+        assert "docker sandbox" in result.error.lower()
+        assert "host-visible path" in result.error.lower()
+        connected_adapter._bot.send_document.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_send_document_outputs_path_has_docker_hint(self, connected_adapter):
+        """Legacy /outputs paths also get the Docker hint."""
+        result = await connected_adapter.send_document(
+            chat_id="12345",
+            file_path="/outputs/report.txt",
+        )
+
+        assert result.success is False
+        assert "docker sandbox" in result.error.lower()
+        assert "host-visible path" in result.error.lower()
+        connected_adapter._bot.send_document.assert_not_called()
+
     @pytest.mark.asyncio
     async def test_send_document_not_connected(self, adapter):
         """If bot is None, returns not connected error."""
@@ -665,6 +691,17 @@ class TestSendVideo:
         assert result.success is False
         assert "not found" in result.error.lower()
 
+    @pytest.mark.asyncio
+    async def test_send_video_workspace_path_has_docker_hint(self, connected_adapter):
+        result = await connected_adapter.send_video(
+            chat_id="12345",
+            video_path="/workspace/video.mp4",
+        )
+
+        assert result.success is False
+        assert "docker sandbox" in result.error.lower()
+        assert "host-visible path" in result.error.lower()
+
     @pytest.mark.asyncio
     async def test_send_video_not_connected(self, adapter):
         result = await adapter.send_video(
diff --git a/tests/gateway/test_text_batching.py b/tests/gateway/test_text_batching.py
index 56bc602ef09..1ad89ffd055 100644
--- a/tests/gateway/test_text_batching.py
+++ b/tests/gateway/test_text_batching.py
@@ -148,6 +148,70 @@ class TestDiscordTextBatching:
         await asyncio.sleep(0.25)
         adapter.handle_message.assert_called_once()
 
+    @pytest.mark.asyncio
+    async def test_shield_protects_handle_message_from_cancel(self):
+        """Regression guard: a follow-up chunk arriving while
+        handle_message is mid-flight must NOT cancel the running
+        dispatch.  _enqueue_text_event fires prior_task.cancel() on
+        every new chunk; without asyncio.shield around handle_message
+        the cancel propagates into the agent's streaming request and
+        aborts the response.
+        """
+        adapter = _make_discord_adapter()
+
+        handle_started = asyncio.Event()
+        release_handle = asyncio.Event()
+        first_handle_cancelled = asyncio.Event()
+        first_handle_completed = asyncio.Event()
+        call_count = [0]
+
+        async def slow_handle(event):
+            call_count[0] += 1
+            # Only the first call (batch 1) is the one we're protecting.
+            if call_count[0] == 1:
+                handle_started.set()
+                try:
+                    await release_handle.wait()
+                    first_handle_completed.set()
+                except asyncio.CancelledError:
+                    first_handle_cancelled.set()
+                    raise
+            # Second call (batch 2) returns immediately — not the subject
+            # of this test.
+
+        adapter.handle_message = slow_handle
+
+        # Prime batch 1 and wait for it to land inside handle_message.
+        adapter._enqueue_text_event(_make_event("batch 1", Platform.DISCORD))
+        await asyncio.wait_for(handle_started.wait(), timeout=1.0)
+
+        # A new chunk arrives — _enqueue_text_event fires
+        # prior_task.cancel() on batch 1's flush task, which is
+        # currently awaiting inside handle_message.
+        adapter._enqueue_text_event(_make_event("batch 2 follow-up", Platform.DISCORD))
+
+        # Let the cancel propagate.
+        await asyncio.sleep(0.05)
+
+        # CRITICAL ASSERTION: batch 1's handle_message must NOT have
+        # been cancelled.  Without asyncio.shield this assertion fails
+        # because CancelledError propagates from the flush task's
+        # `await self.handle_message(event)` into slow_handle.
+        assert not first_handle_cancelled.is_set(), (
+            "handle_message for batch 1 was cancelled by a follow-up "
+            "chunk — asyncio.shield is missing or broken"
+        )
+
+        # Release batch 1's handle_message and let it complete.
+        release_handle.set()
+        await asyncio.wait_for(first_handle_completed.wait(), timeout=1.0)
+        assert first_handle_completed.is_set()
+
+        # Cleanup
+        for task in list(adapter._pending_text_batch_tasks.values()):
+            task.cancel()
+        await asyncio.sleep(0.01)
+
 
 # =====================================================================
 # Matrix text batching
diff --git a/tests/gateway/test_voice_command.py b/tests/gateway/test_voice_command.py
index f0c3171d6e7..f25fb972e44 100644
--- a/tests/gateway/test_voice_command.py
+++ b/tests/gateway/test_voice_command.py
@@ -758,7 +758,7 @@ class TestVoiceChannelCommands:
         result = await runner._handle_voice_channel_join(event)
 
         assert "voice dependencies are missing" in result.lower()
-        assert "hermes-agent[messaging]" in result
+        assert "PyNaCl" in result
 
     # -- _handle_voice_channel_leave --
 
diff --git a/tests/gateway/test_webhook_deliver_only.py b/tests/gateway/test_webhook_deliver_only.py
new file mode 100644
index 00000000000..d73a1520159
--- /dev/null
+++ b/tests/gateway/test_webhook_deliver_only.py
@@ -0,0 +1,473 @@
+"""Tests for the webhook adapter's ``deliver_only`` route mode.
+
+``deliver_only`` lets external services (Supabase webhooks, monitoring
+alerts, background jobs, other agents) push plain-text notifications to
+a user's chat via the webhook adapter WITHOUT invoking the agent.  The
+rendered prompt template becomes the literal message body.
+
+Covers:
+- Agent is NOT invoked (``handle_message`` never called)
+- Rendered content is delivered to the target platform adapter
+- HTTP returns 200 OK on success, 502 on delivery failure
+- Startup validation rejects ``deliver_only`` without a real delivery target
+- HMAC auth, rate limiting, and idempotency still apply
+"""
+
+import asyncio
+import hashlib
+import hmac
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from aiohttp import web
+from aiohttp.test_utils import TestClient, TestServer
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent, SendResult
+from gateway.platforms.webhook import WebhookAdapter, _INSECURE_NO_AUTH
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_adapter(routes, **extra_kw) -> WebhookAdapter:
+    extra = {"host": "0.0.0.0", "port": 0, "routes": routes}
+    extra.update(extra_kw)
+    config = PlatformConfig(enabled=True, extra=extra)
+    return WebhookAdapter(config)
+
+
+def _create_app(adapter: WebhookAdapter) -> web.Application:
+    app = web.Application()
+    app.router.add_get("/health", adapter._handle_health)
+    app.router.add_post("/webhooks/{route_name}", adapter._handle_webhook)
+    return app
+
+
+def _wire_mock_target(adapter: WebhookAdapter, platform_name: str = "telegram"):
+    """Attach a gateway_runner with a mocked target adapter."""
+    mock_target = AsyncMock()
+    mock_target.send = AsyncMock(return_value=SendResult(success=True))
+
+    mock_runner = MagicMock()
+    mock_runner.adapters = {Platform(platform_name): mock_target}
+    mock_runner.config.get_home_channel.return_value = None
+
+    adapter.gateway_runner = mock_runner
+    return mock_target
+
+
+# ===================================================================
+# Core behaviour: agent bypass
+# ===================================================================
+
+class TestDeliverOnlyBypassesAgent:
+    """The whole point of the feature — handle_message must not be called."""
+
+    @pytest.mark.asyncio
+    async def test_post_delivers_directly_without_agent(self):
+        routes = {
+            "match-alert": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "12345"},
+                "prompt": "{payload.user} matched with {payload.other}!",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+
+        # Guard: handle_message must NOT be called in deliver_only mode
+        handle_message_calls: list[MessageEvent] = []
+
+        async def _capture(event):
+            handle_message_calls.append(event)
+
+        adapter.handle_message = _capture
+
+        app = _create_app(adapter)
+        body = json.dumps(
+            {"payload": {"user": "alice", "other": "bob"}}
+        ).encode()
+
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/match-alert",
+                data=body,
+                headers={
+                    "Content-Type": "application/json",
+                    "X-GitHub-Delivery": "delivery-1",
+                },
+            )
+            assert resp.status == 200
+            data = await resp.json()
+            assert data["status"] == "delivered"
+            assert data["route"] == "match-alert"
+            assert data["target"] == "telegram"
+
+        # Let any background tasks settle before asserting no agent call
+        await asyncio.sleep(0.05)
+
+        # Agent was NOT invoked
+        assert handle_message_calls == []
+
+        # Target adapter.send() WAS called with the rendered template
+        mock_target.send.assert_awaited_once()
+        call_args = mock_target.send.await_args
+        chat_id_arg, content_arg = call_args.args[0], call_args.args[1]
+        assert chat_id_arg == "12345"
+        assert content_arg == "alice matched with bob!"
+
+    @pytest.mark.asyncio
+    async def test_template_rendering_works(self):
+        """Dot-notation template variables resolve in deliver_only mode."""
+        routes = {
+            "alert": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "chat-1"},
+                "prompt": "Build {build.number} status: {build.status}",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+        app = _create_app(adapter)
+
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/alert",
+                json={"build": {"number": 77, "status": "FAILED"}},
+                headers={"X-GitHub-Delivery": "d-render-1"},
+            )
+            assert resp.status == 200
+
+        mock_target.send.assert_awaited_once()
+        content_arg = mock_target.send.await_args.args[1]
+        assert content_arg == "Build 77 status: FAILED"
+
+    @pytest.mark.asyncio
+    async def test_thread_id_passed_through(self):
+        """deliver_extra.thread_id flows through to the target adapter."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1", "thread_id": "topic-42"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-thread-1"},
+            )
+            assert resp.status == 200
+
+        assert mock_target.send.await_args.kwargs["metadata"] == {
+            "thread_id": "topic-42"
+        }
+
+
+# ===================================================================
+# HTTP status codes
+# ===================================================================
+
+class TestDeliverOnlyStatusCodes:
+
+    @pytest.mark.asyncio
+    async def test_delivery_failure_returns_502(self):
+        """If the target adapter returns SendResult(success=False), 502."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+        mock_target.send = AsyncMock(
+            return_value=SendResult(success=False, error="rate limited by tg")
+        )
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-fail-1"},
+            )
+            assert resp.status == 502
+            data = await resp.json()
+            # Generic error — no adapter-level detail leaks
+            assert data["error"] == "Delivery failed"
+            assert "rate limited" not in json.dumps(data)
+
+    @pytest.mark.asyncio
+    async def test_delivery_exception_returns_502(self):
+        """If adapter.send() raises, we return 502 (not 500)."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+        mock_target.send = AsyncMock(side_effect=RuntimeError("tg exploded"))
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-exc-1"},
+            )
+            assert resp.status == 502
+            data = await resp.json()
+            assert data["error"] == "Delivery failed"
+            # Exception message must not leak
+            assert "exploded" not in json.dumps(data)
+
+    @pytest.mark.asyncio
+    async def test_target_platform_not_connected_returns_502(self):
+        """deliver_only to a platform the gateway doesn't have → 502."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "discord",  # not configured in mock runner
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        _wire_mock_target(adapter, platform_name="telegram")  # only TG wired
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-no-platform-1"},
+            )
+            assert resp.status == 502
+
+
+# ===================================================================
+# Startup validation
+# ===================================================================
+
+class TestDeliverOnlyStartupValidation:
+
+    @pytest.mark.asyncio
+    async def test_deliver_only_with_log_deliver_rejected(self):
+        """deliver_only=true + deliver=log is nonsense — reject at connect()."""
+        routes = {
+            "bad": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "log",
+                "deliver_only": True,
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        with pytest.raises(ValueError, match="deliver_only=true but deliver is 'log'"):
+            await adapter.connect()
+
+    @pytest.mark.asyncio
+    async def test_deliver_only_with_missing_deliver_rejected(self):
+        """deliver_only=true with no deliver field defaults to 'log' → reject."""
+        routes = {
+            "bad": {
+                "secret": _INSECURE_NO_AUTH,
+                # no deliver field
+                "deliver_only": True,
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        with pytest.raises(ValueError, match="deliver_only=true"):
+            await adapter.connect()
+
+    @pytest.mark.asyncio
+    async def test_deliver_only_with_real_target_accepted(self):
+        """Sanity check — a valid deliver_only config passes validation."""
+        routes = {
+            "good": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        # connect() does more than validation (binds a socket) — we just
+        # want to verify the validation doesn't raise.  Call it and tear
+        # down immediately.
+        try:
+            started = await adapter.connect()
+            if started:
+                await adapter.disconnect()
+        except ValueError:
+            pytest.fail("valid deliver_only config should not raise ValueError")
+
+
+# ===================================================================
+# Security + reliability invariants still hold
+# ===================================================================
+
+class TestDeliverOnlySecurityInvariants:
+
+    @pytest.mark.asyncio
+    async def test_hmac_still_enforced(self):
+        """deliver_only does NOT bypass HMAC validation."""
+        secret = "real-secret-123"
+        routes = {
+            "r": {
+                "secret": secret,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            # No signature header → reject
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-noauth-1"},
+            )
+            assert resp.status == 401
+
+        # Target never called
+        mock_target.send.assert_not_awaited()
+
+    @pytest.mark.asyncio
+    async def test_idempotency_still_applies(self):
+        """Same delivery_id posted twice → second is suppressed."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            r1 = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "dup-1"},
+            )
+            assert r1.status == 200
+
+            r2 = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "dup-1"},
+            )
+            # Existing webhook adapter treats duplicates as 200 + status=duplicate
+            assert r2.status == 200
+            data = await r2.json()
+            assert data["status"] == "duplicate"
+
+        # Target was called exactly once
+        assert mock_target.send.await_count == 1
+
+    @pytest.mark.asyncio
+    async def test_rate_limit_still_applies(self):
+        """Route-level rate limit caps deliver_only POSTs too."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes, rate_limit=2)
+        _wire_mock_target(adapter)
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            for i in range(2):
+                r = await cli.post(
+                    "/webhooks/r",
+                    json={},
+                    headers={"X-GitHub-Delivery": f"rl-{i}"},
+                )
+                assert r.status == 200
+
+            # Third within the window → 429
+            r3 = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "rl-3"},
+            )
+            assert r3.status == 429
+
+
+# ===================================================================
+# Unit: _direct_deliver dispatch
+# ===================================================================
+
+class TestDirectDeliverUnit:
+
+    @pytest.mark.asyncio
+    async def test_dispatches_to_cross_platform_for_messaging_targets(self):
+        adapter = _make_adapter({})
+        mock_target = _wire_mock_target(adapter, "telegram")
+
+        result = await adapter._direct_deliver(
+            "hello",
+            {"deliver": "telegram", "deliver_extra": {"chat_id": "c-1"}},
+        )
+        assert result.success is True
+        mock_target.send.assert_awaited_once_with(
+            "c-1", "hello", metadata=None
+        )
+
+    @pytest.mark.asyncio
+    async def test_dispatches_to_github_comment(self):
+        adapter = _make_adapter({})
+        with patch.object(
+            adapter, "_deliver_github_comment",
+            new=AsyncMock(return_value=SendResult(success=True)),
+        ) as mock_gh:
+            result = await adapter._direct_deliver(
+                "review body",
+                {
+                    "deliver": "github_comment",
+                    "deliver_extra": {"repo": "org/r", "pr_number": "1"},
+                },
+            )
+            assert result.success is True
+            mock_gh.assert_awaited_once()
diff --git a/tests/hermes_cli/test_auth_codex_provider.py b/tests/hermes_cli/test_auth_codex_provider.py
index f05a80b6ac1..ddcaf1721f3 100644
--- a/tests/hermes_cli/test_auth_codex_provider.py
+++ b/tests/hermes_cli/test_auth_codex_provider.py
@@ -14,7 +14,6 @@ from hermes_cli.auth import (
     PROVIDER_REGISTRY,
     _read_codex_tokens,
     _save_codex_tokens,
-    _write_codex_cli_tokens,
     _import_codex_cli_tokens,
     get_codex_auth_status,
     get_provider_auth_state,
@@ -182,98 +181,6 @@ def test_codex_tokens_not_written_to_shared_file(tmp_path, monkeypatch):
     assert data["tokens"]["access_token"] == "hermes-at"
 
 
-def test_write_codex_cli_tokens_creates_file(tmp_path, monkeypatch):
-    """_write_codex_cli_tokens creates ~/.codex/auth.json with refreshed tokens."""
-    codex_home = tmp_path / "codex-cli"
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    _write_codex_cli_tokens("new-access", "new-refresh", last_refresh="2026-04-12T00:00:00Z")
-
-    auth_path = codex_home / "auth.json"
-    assert auth_path.exists()
-    data = json.loads(auth_path.read_text())
-    assert data["tokens"]["access_token"] == "new-access"
-    assert data["tokens"]["refresh_token"] == "new-refresh"
-    assert data["last_refresh"] == "2026-04-12T00:00:00Z"
-    # Verify file permissions are restricted
-    assert (auth_path.stat().st_mode & 0o777) == 0o600
-
-
-def test_write_codex_cli_tokens_preserves_existing(tmp_path, monkeypatch):
-    """_write_codex_cli_tokens preserves extra fields in existing auth.json."""
-    codex_home = tmp_path / "codex-cli"
-    codex_home.mkdir(parents=True, exist_ok=True)
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    existing = {
-        "tokens": {
-            "access_token": "old-access",
-            "refresh_token": "old-refresh",
-            "extra_field": "preserved",
-        },
-        "last_refresh": "2026-01-01T00:00:00Z",
-        "custom_key": "keep_me",
-    }
-    (codex_home / "auth.json").write_text(json.dumps(existing))
-
-    _write_codex_cli_tokens("updated-access", "updated-refresh")
-
-    data = json.loads((codex_home / "auth.json").read_text())
-    assert data["tokens"]["access_token"] == "updated-access"
-    assert data["tokens"]["refresh_token"] == "updated-refresh"
-    assert data["tokens"]["extra_field"] == "preserved"
-    assert data["custom_key"] == "keep_me"
-    # last_refresh not updated since we didn't pass it
-    assert data["last_refresh"] == "2026-01-01T00:00:00Z"
-
-
-def test_write_codex_cli_tokens_handles_missing_dir(tmp_path, monkeypatch):
-    """_write_codex_cli_tokens creates parent directories if missing."""
-    codex_home = tmp_path / "does" / "not" / "exist"
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    _write_codex_cli_tokens("at", "rt")
-
-    assert (codex_home / "auth.json").exists()
-    data = json.loads((codex_home / "auth.json").read_text())
-    assert data["tokens"]["access_token"] == "at"
-
-
-def test_refresh_codex_auth_tokens_writes_back_to_cli(tmp_path, monkeypatch):
-    """After refreshing, _refresh_codex_auth_tokens writes back to ~/.codex/auth.json."""
-    from hermes_cli.auth import _refresh_codex_auth_tokens
-
-    hermes_home = tmp_path / "hermes"
-    codex_home = tmp_path / "codex-cli"
-    hermes_home.mkdir(parents=True, exist_ok=True)
-    codex_home.mkdir(parents=True, exist_ok=True)
-    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
-    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    # Write initial CLI tokens
-    (codex_home / "auth.json").write_text(json.dumps({
-        "tokens": {"access_token": "old-at", "refresh_token": "old-rt"},
-    }))
-
-    # Mock the pure refresh to return new tokens
-    monkeypatch.setattr("hermes_cli.auth.refresh_codex_oauth_pure", lambda *a, **kw: {
-        "access_token": "refreshed-at",
-        "refresh_token": "refreshed-rt",
-        "last_refresh": "2026-04-12T01:00:00Z",
-    })
-
-    _refresh_codex_auth_tokens(
-        {"access_token": "old-at", "refresh_token": "old-rt"},
-        timeout_seconds=10,
-    )
-
-    # Verify CLI file was updated
-    cli_data = json.loads((codex_home / "auth.json").read_text())
-    assert cli_data["tokens"]["access_token"] == "refreshed-at"
-    assert cli_data["tokens"]["refresh_token"] == "refreshed-rt"
-
-
 def test_resolve_returns_hermes_auth_store_source(tmp_path, monkeypatch):
     hermes_home = tmp_path / "hermes"
     _setup_hermes_auth(hermes_home)
diff --git a/tests/hermes_cli/test_cmd_update.py b/tests/hermes_cli/test_cmd_update.py
index c8f284228bd..1e6a2245b2d 100644
--- a/tests/hermes_cli/test_cmd_update.py
+++ b/tests/hermes_cli/test_cmd_update.py
@@ -124,29 +124,23 @@ class TestCmdUpdateBranchFallback:
             if call.args and call.args[0][0] == "/usr/bin/npm"
         ]
 
+        # cmd_update runs npm commands in three locations:
+        #   1. repo root  — slash-command / TUI bridge deps
+        #   2. ui-tui/    — Ink TUI deps
+        #   3. web/       — install + "npm run build" for the web frontend
+        full_flags = [
+            "/usr/bin/npm",
+            "install",
+            "--silent",
+            "--no-fund",
+            "--no-audit",
+            "--progress=false",
+        ]
         assert npm_calls == [
-            (
-                [
-                    "/usr/bin/npm",
-                    "install",
-                    "--silent",
-                    "--no-fund",
-                    "--no-audit",
-                    "--progress=false",
-                ],
-                PROJECT_ROOT,
-            ),
-            (
-                [
-                    "/usr/bin/npm",
-                    "install",
-                    "--silent",
-                    "--no-fund",
-                    "--no-audit",
-                    "--progress=false",
-                ],
-                PROJECT_ROOT / "ui-tui",
-            ),
+            (full_flags, PROJECT_ROOT),
+            (full_flags, PROJECT_ROOT / "ui-tui"),
+            (["/usr/bin/npm", "install", "--silent"], PROJECT_ROOT / "web"),
+            (["/usr/bin/npm", "run", "build"], PROJECT_ROOT / "web"),
         ]
 
     def test_update_non_interactive_skips_migration_prompt(self, mock_args, capsys):
diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py
index f31ac045c4f..4330424b9a2 100644
--- a/tests/hermes_cli/test_config.py
+++ b/tests/hermes_cli/test_config.py
@@ -459,7 +459,7 @@ class TestCustomProviderCompatibility:
             migrate_config(interactive=False, quiet=True)
             raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
 
-        assert raw["_config_version"] == 18
+        assert raw["_config_version"] == 19
         assert raw["providers"]["openai-direct"] == {
             "api": "https://api.openai.com/v1",
             "api_key": "test-key",
@@ -606,7 +606,7 @@ class TestInterimAssistantMessageConfig:
             migrate_config(interactive=False, quiet=True)
             raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
 
-        assert raw["_config_version"] == 18
+        assert raw["_config_version"] == 19
         assert raw["display"]["tool_progress"] == "off"
         assert raw["display"]["interim_assistant_messages"] is True
 
@@ -626,6 +626,6 @@ class TestDiscordChannelPromptsConfig:
             migrate_config(interactive=False, quiet=True)
             raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
 
-        assert raw["_config_version"] == 18
+        assert raw["_config_version"] == 19
         assert raw["discord"]["auto_thread"] is True
         assert raw["discord"]["channel_prompts"] == {}
diff --git a/tests/hermes_cli/test_cron.py b/tests/hermes_cli/test_cron.py
index 9ae92048272..8593195a1ba 100644
--- a/tests/hermes_cli/test_cron.py
+++ b/tests/hermes_cli/test_cron.py
@@ -54,12 +54,12 @@ class TestCronCommandLifecycle:
                 deliver=None,
                 repeat=None,
                 skill=None,
-                skills=["find-nearby", "blogwatcher"],
+                skills=["maps", "blogwatcher"],
                 clear_skills=False,
             )
         )
         updated = get_job(job["id"])
-        assert updated["skills"] == ["find-nearby", "blogwatcher"]
+        assert updated["skills"] == ["maps", "blogwatcher"]
         assert updated["name"] == "Edited Job"
         assert updated["prompt"] == "Revised prompt"
         assert updated["schedule_display"] == "every 120m"
@@ -95,7 +95,7 @@ class TestCronCommandLifecycle:
                 deliver=None,
                 repeat=None,
                 skill=None,
-                skills=["blogwatcher", "find-nearby"],
+                skills=["blogwatcher", "maps"],
             )
         )
         out = capsys.readouterr().out
@@ -103,5 +103,5 @@ class TestCronCommandLifecycle:
 
         jobs = list_jobs()
         assert len(jobs) == 1
-        assert jobs[0]["skills"] == ["blogwatcher", "find-nearby"]
+        assert jobs[0]["skills"] == ["blogwatcher", "maps"]
         assert jobs[0]["name"] == "Skill combo"
diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py
index 089a5cf98d1..dbb1111fcf9 100644
--- a/tests/hermes_cli/test_gemini_provider.py
+++ b/tests/hermes_cli/test_gemini_provider.py
@@ -130,7 +130,7 @@ class TestGeminiModelCatalog:
         models = _PROVIDER_MODELS["gemini"]
         assert "gemini-2.5-pro" in models
         assert "gemini-2.5-flash" in models
-        assert "gemma-4-31b-it" in models
+        assert "gemma-4-31b-it" not in models
 
     def test_provider_models_has_3x(self):
         models = _PROVIDER_MODELS["gemini"]
@@ -207,6 +207,37 @@ class TestGeminiAgentInit:
             assert agent.api_mode == "chat_completions"
             assert agent.provider == "gemini"
 
+    def test_gemini_uses_bearer_auth(self, monkeypatch):
+        """Gemini OpenAI-compatible endpoint should receive the real API key."""
+        monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy_REAL_KEY")
+        real_key = "AIzaSy_REAL_KEY"
+        with patch("run_agent.OpenAI") as mock_openai:
+            mock_openai.return_value = MagicMock()
+            from run_agent import AIAgent
+            AIAgent(
+                model="gemini-2.5-flash",
+                provider="gemini",
+                api_key=real_key,
+                base_url="https://generativelanguage.googleapis.com/v1beta/openai",
+            )
+        call_kwargs = mock_openai.call_args[1]
+        assert call_kwargs.get("api_key") == real_key
+        headers = call_kwargs.get("default_headers", {})
+        assert "x-goog-api-key" not in headers
+
+    def test_gemini_resolve_provider_client_auth(self, monkeypatch):
+        """resolve_provider_client('gemini') should pass the real API key through."""
+        monkeypatch.setenv("GEMINI_API_KEY", "AIzaSy_TEST_KEY")
+        real_key = "AIzaSy_TEST_KEY"
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            mock_openai.return_value = MagicMock()
+            from agent.auxiliary_client import resolve_provider_client
+            resolve_provider_client("gemini")
+        call_kwargs = mock_openai.call_args[1]
+        assert call_kwargs.get("api_key") == real_key
+        headers = call_kwargs.get("default_headers", {})
+        assert "x-goog-api-key" not in headers
+
 
 # ── models.dev Integration ──
 
@@ -261,9 +292,32 @@ class TestGeminiModelsDev:
             result = list_agentic_models("gemini")
         assert "gemini-3-flash-preview" in result
         assert "gemini-2.5-pro" in result
-        assert "gemma-4-31b-it" in result
+        assert "gemma-4-31b-it" not in result
         # Filtered out:
         assert "gemini-embedding-001" not in result      # no tool_call
         assert "gemini-2.5-flash-preview-tts" not in result  # no tool_call
         assert "gemini-live-2.5-flash" not in result     # noise: live-
         assert "gemini-2.5-flash-preview-04-17" not in result  # noise: dated preview
+
+    def test_list_provider_models_hides_low_tpm_google_gemmas(self):
+        mock_data = {
+            "google": {
+                "models": {
+                    "gemini-2.5-pro": {},
+                    "gemma-4-31b-it": {},
+                    "gemma-3-27b-it": {},
+                    "gemini-1.5-pro": {},
+                    "gemini-2.0-flash": {},
+                }
+            }
+        }
+        with patch("agent.models_dev.fetch_models_dev", return_value=mock_data):
+            from agent.models_dev import list_provider_models
+
+            result = list_provider_models("gemini")
+
+        assert "gemini-2.5-pro" in result
+        assert "gemma-4-31b-it" not in result
+        assert "gemma-3-27b-it" not in result
+        assert "gemini-1.5-pro" not in result
+        assert "gemini-2.0-flash" not in result
diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py
index cbd41216622..1ddf6ab6399 100644
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@@ -450,9 +450,9 @@ class TestValidateApiNotFound:
         assert result["recognized"] is True
 
     def test_dissimilar_model_shows_suggestions_not_autocorrect(self):
-        """Models too different for auto-correction still get suggestions."""
+        """Models too different for auto-correction are rejected with suggestions."""
         result = _validate("anthropic/claude-nonexistent")
-        assert result["accepted"] is True
+        assert result["accepted"] is False
         assert result.get("corrected_model") is None
         assert "not found" in result["message"]
 
@@ -532,11 +532,11 @@ class TestValidateCodexAutoCorrection:
         assert result["message"] is None
 
     def test_very_different_name_falls_to_suggestions(self):
-        """Names too different for auto-correction get the suggestion list."""
+        """Names too different for auto-correction are rejected with a suggestion list."""
         codex_models = ["gpt-5.4-mini", "gpt-5.4", "gpt-5.3-codex"]
         with patch("hermes_cli.models.provider_model_ids", return_value=codex_models):
             result = validate_requested_model("totally-wrong", "openai-codex")
-        assert result["accepted"] is True
+        assert result["accepted"] is False
         assert result["recognized"] is False
         assert result.get("corrected_model") is None
         assert "not found" in result["message"]
diff --git a/tests/hermes_cli/test_setup_agent_settings.py b/tests/hermes_cli/test_setup_agent_settings.py
new file mode 100644
index 00000000000..868be7508c0
--- /dev/null
+++ b/tests/hermes_cli/test_setup_agent_settings.py
@@ -0,0 +1,29 @@
+"""Tests for agent-settings copy in the interactive setup wizard."""
+
+from hermes_cli.setup import setup_agent_settings
+
+
+def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monkeypatch, capsys):
+    """The helper text should match the value shown in the prompt."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    config = {
+        "agent": {"max_turns": 90},
+        "display": {"tool_progress": "all"},
+        "compression": {"threshold": 0.50},
+        "session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
+    }
+
+    prompt_answers = iter(["60", "all", "0.5"])
+
+    monkeypatch.setattr("hermes_cli.setup.get_env_value", lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "")
+    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
+    monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
+    monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
+    monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)
+
+    setup_agent_settings(config)
+
+    out = capsys.readouterr().out
+    assert "Press Enter to keep 60." in out
+    assert "Default is 90" not in out
diff --git a/tests/hermes_cli/test_update_hangup_protection.py b/tests/hermes_cli/test_update_hangup_protection.py
new file mode 100644
index 00000000000..e5c81a45a01
--- /dev/null
+++ b/tests/hermes_cli/test_update_hangup_protection.py
@@ -0,0 +1,325 @@
+"""Tests for SIGHUP protection and stdout mirroring in ``hermes update``.
+
+Covers ``_UpdateOutputStream``, ``_install_hangup_protection``, and
+``_finalize_update_output`` in ``hermes_cli/main.py``.  These exist so
+that ``hermes update`` survives a terminal disconnect mid-install
+(SSH drop, shell close) without leaving the venv half-installed.
+"""
+
+from __future__ import annotations
+
+import io
+import os
+import signal
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.main import (
+    _UpdateOutputStream,
+    _finalize_update_output,
+    _install_hangup_protection,
+)
+
+
+# -----------------------------------------------------------------------------
+# _UpdateOutputStream
+# -----------------------------------------------------------------------------
+
+
+class TestUpdateOutputStream:
+    def test_write_mirrors_to_both_original_and_log(self):
+        original = io.StringIO()
+        log = io.StringIO()
+        stream = _UpdateOutputStream(original, log)
+
+        stream.write("hello world\n")
+
+        assert original.getvalue() == "hello world\n"
+        assert log.getvalue() == "hello world\n"
+
+    def test_write_continues_after_broken_original(self):
+        """When the terminal disconnects, original.write raises BrokenPipeError.
+
+        The wrapper must catch it, flip the broken flag, and keep writing to
+        the log from then on.
+        """
+        log = io.StringIO()
+
+        class _BrokenStream:
+            def write(self, data):
+                raise BrokenPipeError("terminal gone")
+
+            def flush(self):
+                raise BrokenPipeError("terminal gone")
+
+        stream = _UpdateOutputStream(_BrokenStream(), log)
+
+        # First write triggers the broken-pipe path.
+        stream.write("first line\n")
+        # Subsequent writes take the fast broken path (no exception).
+        stream.write("second line\n")
+
+        assert log.getvalue() == "first line\nsecond line\n"
+        assert stream._original_broken is True
+
+    def test_write_tolerates_oserror_and_valueerror(self):
+        """OSError (EIO) and ValueError (closed file) should also be absorbed."""
+        log = io.StringIO()
+
+        class _RaisingStream:
+            def __init__(self, exc):
+                self._exc = exc
+
+            def write(self, data):
+                raise self._exc
+
+            def flush(self):
+                raise self._exc
+
+        for exc in (OSError("EIO"), ValueError("closed file")):
+            stream = _UpdateOutputStream(_RaisingStream(exc), log)
+            stream.write("x\n")
+            assert stream._original_broken is True
+
+    def test_log_failure_does_not_abort_write(self):
+        """Even if the log file write raises, the original write must still happen."""
+        class _BrokenLog:
+            def write(self, data):
+                raise OSError("disk full")
+
+            def flush(self):
+                raise OSError("disk full")
+
+        original = io.StringIO()
+        stream = _UpdateOutputStream(original, _BrokenLog())
+
+        stream.write("data\n")
+
+        assert original.getvalue() == "data\n"
+
+    def test_flush_tolerates_broken_original(self):
+        class _BrokenStream:
+            def write(self, data):
+                return len(data)
+
+            def flush(self):
+                raise BrokenPipeError("gone")
+
+        log = io.StringIO()
+        stream = _UpdateOutputStream(_BrokenStream(), log)
+        stream.flush()  # must not raise
+        assert stream._original_broken is True
+
+    def test_isatty_delegates_to_original(self):
+        class _TtyStream:
+            def isatty(self):
+                return True
+
+            def write(self, data):
+                return len(data)
+
+            def flush(self):
+                return None
+
+        stream = _UpdateOutputStream(_TtyStream(), io.StringIO())
+        assert stream.isatty() is True
+
+    def test_isatty_returns_false_after_broken(self):
+        class _BrokenStream:
+            def isatty(self):
+                return True
+
+            def write(self, data):
+                raise BrokenPipeError()
+
+            def flush(self):
+                return None
+
+        stream = _UpdateOutputStream(_BrokenStream(), io.StringIO())
+        stream.write("x")  # marks broken
+        assert stream.isatty() is False
+
+    def test_getattr_delegates_unknown_attrs(self):
+        class _StreamWithEncoding:
+            encoding = "utf-8"
+
+            def write(self, data):
+                return len(data)
+
+            def flush(self):
+                return None
+
+        stream = _UpdateOutputStream(_StreamWithEncoding(), io.StringIO())
+        assert stream.encoding == "utf-8"
+
+
+# -----------------------------------------------------------------------------
+# _install_hangup_protection
+# -----------------------------------------------------------------------------
+
+
+class TestInstallHangupProtection:
+    def test_gateway_mode_is_noop(self):
+        """In gateway mode the process is already detached — don't touch stdio or signals."""
+        prev_out, prev_err = sys.stdout, sys.stderr
+        prev_sighup = signal.getsignal(signal.SIGHUP) if hasattr(signal, "SIGHUP") else None
+
+        state = _install_hangup_protection(gateway_mode=True)
+
+        try:
+            assert sys.stdout is prev_out
+            assert sys.stderr is prev_err
+            assert state["log_file"] is None
+            assert state["installed"] is False
+            if hasattr(signal, "SIGHUP"):
+                assert signal.getsignal(signal.SIGHUP) == prev_sighup
+        finally:
+            _finalize_update_output(state)
+
+    @pytest.mark.skipif(
+        not hasattr(signal, "SIGHUP"), reason="SIGHUP not available on this platform"
+    )
+    def test_installs_sighup_ignore(self, tmp_path, monkeypatch):
+        """SIGHUP should be set to SIG_IGN so SSH disconnect doesn't kill the update."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        # Clear cached get_hermes_home if present
+        import hermes_cli.config as _cfg
+        if hasattr(_cfg, "_HERMES_HOME_CACHE"):
+            _cfg._HERMES_HOME_CACHE = None  # type: ignore[attr-defined]
+
+        original_handler = signal.getsignal(signal.SIGHUP)
+        state = _install_hangup_protection(gateway_mode=False)
+
+        try:
+            assert signal.getsignal(signal.SIGHUP) == signal.SIG_IGN
+        finally:
+            _finalize_update_output(state)
+            # Restore whatever was there before so we don't leak to other tests.
+            signal.signal(signal.SIGHUP, original_handler)
+
+    def test_wraps_stdout_and_stderr_with_mirror(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        # Nuke any cached home path
+        import hermes_cli.config as _cfg
+        if hasattr(_cfg, "_HERMES_HOME_CACHE"):
+            _cfg._HERMES_HOME_CACHE = None  # type: ignore[attr-defined]
+
+        prev_out, prev_err = sys.stdout, sys.stderr
+        state = _install_hangup_protection(gateway_mode=False)
+
+        try:
+            # On Windows (no SIGHUP) we still wrap stdio and create the log.
+            assert state["installed"] is True
+            assert isinstance(sys.stdout, _UpdateOutputStream)
+            assert isinstance(sys.stderr, _UpdateOutputStream)
+            assert state["log_file"] is not None
+
+            sys.stdout.write("checking mirror\n")
+            sys.stdout.flush()
+
+            log_path = tmp_path / "logs" / "update.log"
+            assert log_path.exists()
+            contents = log_path.read_text(encoding="utf-8")
+            assert "checking mirror" in contents
+            assert "hermes update started" in contents
+        finally:
+            _finalize_update_output(state)
+            # Sanity-check restoration
+            assert sys.stdout is prev_out
+            assert sys.stderr is prev_err
+
+    def test_logs_dir_created_if_missing(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import hermes_cli.config as _cfg
+        if hasattr(_cfg, "_HERMES_HOME_CACHE"):
+            _cfg._HERMES_HOME_CACHE = None  # type: ignore[attr-defined]
+
+        # No logs/ dir yet.
+        assert not (tmp_path / "logs").exists()
+
+        state = _install_hangup_protection(gateway_mode=False)
+        try:
+            assert (tmp_path / "logs").is_dir()
+            assert (tmp_path / "logs" / "update.log").exists()
+        finally:
+            _finalize_update_output(state)
+
+    def test_non_fatal_if_log_setup_fails(self, monkeypatch):
+        """If get_hermes_home() raises, stdio must be left untouched but SIGHUP still handled."""
+        prev_out, prev_err = sys.stdout, sys.stderr
+
+        def _boom():
+            raise RuntimeError("no home for you")
+
+        # Patch the import inside _install_hangup_protection.
+        monkeypatch.setattr(
+            "hermes_cli.config.get_hermes_home", _boom, raising=True
+        )
+
+        original_handler = (
+            signal.getsignal(signal.SIGHUP) if hasattr(signal, "SIGHUP") else None
+        )
+
+        state = _install_hangup_protection(gateway_mode=False)
+
+        try:
+            assert sys.stdout is prev_out
+            assert sys.stderr is prev_err
+            assert state["installed"] is False
+            # SIGHUP must still be installed even when log setup fails.
+            if hasattr(signal, "SIGHUP"):
+                assert signal.getsignal(signal.SIGHUP) == signal.SIG_IGN
+        finally:
+            _finalize_update_output(state)
+            if hasattr(signal, "SIGHUP") and original_handler is not None:
+                signal.signal(signal.SIGHUP, original_handler)
+
+
+# -----------------------------------------------------------------------------
+# _finalize_update_output
+# -----------------------------------------------------------------------------
+
+
+class TestFinalizeUpdateOutput:
+    def test_none_state_is_noop(self):
+        _finalize_update_output(None)  # must not raise
+
+    def test_restores_streams_and_closes_log(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import hermes_cli.config as _cfg
+        if hasattr(_cfg, "_HERMES_HOME_CACHE"):
+            _cfg._HERMES_HOME_CACHE = None  # type: ignore[attr-defined]
+
+        prev_out = sys.stdout
+        state = _install_hangup_protection(gateway_mode=False)
+        log_file = state["log_file"]
+
+        assert sys.stdout is not prev_out
+        assert log_file is not None
+
+        _finalize_update_output(state)
+
+        assert sys.stdout is prev_out
+        # The log file handle should be closed.
+        assert log_file.closed is True
+
+    def test_skipped_install_leaves_stdio_alone(self):
+        """When install failed (state['installed']=False) finalize should not
+        touch sys.stdout / sys.stderr (they were never wrapped)."""
+        # Build a synthetic state that mimics a failed install.
+        sentinel_out = object()
+        state = {
+            "prev_stdout": sentinel_out,
+            "prev_stderr": sentinel_out,
+            "log_file": None,
+            "installed": False,
+        }
+        before_out, before_err = sys.stdout, sys.stderr
+
+        _finalize_update_output(state)
+
+        assert sys.stdout is before_out
+        assert sys.stderr is before_err
diff --git a/tests/honcho_plugin/test_async_memory.py b/tests/honcho_plugin/test_async_memory.py
index 936f478846f..5df8d274540 100644
--- a/tests/honcho_plugin/test_async_memory.py
+++ b/tests/honcho_plugin/test_async_memory.py
@@ -460,10 +460,3 @@ class TestPrefetchCacheAccessors:
         assert mgr.pop_context_result("cli:test") == payload
         assert mgr.pop_context_result("cli:test") == {}
 
-    def test_set_and_pop_dialectic_result(self):
-        mgr = _make_manager(write_frequency="turn")
-
-        mgr.set_dialectic_result("cli:test", "Resume with toolset cleanup")
-
-        assert mgr.pop_dialectic_result("cli:test") == "Resume with toolset cleanup"
-        assert mgr.pop_dialectic_result("cli:test") == ""
diff --git a/tests/honcho_plugin/test_cli.py b/tests/honcho_plugin/test_cli.py
index 006d687dc1d..a6fc39ea7c0 100644
--- a/tests/honcho_plugin/test_cli.py
+++ b/tests/honcho_plugin/test_cli.py
@@ -26,6 +26,9 @@ class TestCmdStatus:
             write_frequency = "async"
             session_strategy = "per-session"
             context_tokens = 800
+            dialectic_reasoning_level = "low"
+            reasoning_level_cap = "high"
+            reasoning_heuristic = True
 
             def resolve_session_name(self):
                 return "hermes"
diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py
index 9784959d37d..25426118312 100644
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@@ -568,15 +568,15 @@ class TestToolsModeInitBehavior:
 
         with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
              patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
-             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager) as mock_manager_cls, \
              patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
             provider.initialize(session_id="test-session-001", **init_kwargs)
 
-        return provider, cfg
+        return provider, cfg, mock_manager_cls
 
     def test_tools_lazy_default(self):
         """tools + initOnSessionStart=false → session NOT initialized after initialize()."""
-        provider, _ = self._make_provider_with_config(
+        provider, _, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=False,
         )
         assert provider._session_initialized is False
@@ -585,7 +585,7 @@ class TestToolsModeInitBehavior:
 
     def test_tools_eager_init(self):
         """tools + initOnSessionStart=true → session IS initialized after initialize()."""
-        provider, _ = self._make_provider_with_config(
+        provider, _, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=True,
         )
         assert provider._session_initialized is True
@@ -593,33 +593,34 @@ class TestToolsModeInitBehavior:
 
     def test_tools_eager_prefetch_still_empty(self):
         """tools mode with eager init still returns empty from prefetch() (no auto-injection)."""
-        provider, _ = self._make_provider_with_config(
+        provider, _, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=True,
         )
         assert provider.prefetch("test query") == ""
 
     def test_tools_lazy_prefetch_empty(self):
         """tools mode with lazy init also returns empty from prefetch()."""
-        provider, _ = self._make_provider_with_config(
+        provider, _, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=False,
         )
         assert provider.prefetch("test query") == ""
 
     def test_explicit_peer_name_not_overridden_by_user_id(self):
         """Explicit peerName in config must not be replaced by gateway user_id."""
-        _, cfg = self._make_provider_with_config(
+        _, cfg, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=True,
             peer_name="Kathie", user_id="8439114563",
         )
         assert cfg.peer_name == "Kathie"
 
     def test_user_id_used_when_no_peer_name(self):
-        """Gateway user_id is used as peer_name when no explicit peerName configured."""
-        _, cfg = self._make_provider_with_config(
+        """Gateway user_id is passed separately from config peer_name."""
+        _, cfg, mock_manager_cls = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=True,
             peer_name=None, user_id="8439114563",
         )
-        assert cfg.peer_name == "8439114563"
+        assert cfg.peer_name is None
+        assert mock_manager_cls.call_args.kwargs["runtime_user_peer_name"] == "8439114563"
 
 
 class TestPerSessionMigrateGuard:
@@ -815,6 +816,27 @@ class TestDialecticInputGuard:
 # ---------------------------------------------------------------------------
 
 
+def _settle_prewarm(provider):
+    """Wait for the session-start prewarm dialectic thread, then return the
+    provider to a clean 'nothing fired yet' state so cadence/first-turn/
+    trivial-prompt tests can assert from a known baseline."""
+    if provider._prefetch_thread:
+        provider._prefetch_thread.join(timeout=3.0)
+    with provider._prefetch_lock:
+        provider._prefetch_result = ""
+        provider._prefetch_result_fired_at = -999
+    provider._prefetch_thread = None
+    provider._prefetch_thread_started_at = 0.0
+    provider._last_dialectic_turn = -999
+    provider._dialectic_empty_streak = 0
+    if getattr(provider, "_manager", None) is not None:
+        try:
+            provider._manager.dialectic_query.reset_mock()
+            provider._manager.prefetch_context.reset_mock()
+        except AttributeError:
+            pass
+
+
 class TestDialecticCadenceDefaults:
     """Regression tests for dialectic_cadence default value."""
 
@@ -840,12 +862,15 @@ class TestDialecticCadenceDefaults:
              patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
             provider.initialize(session_id="test-session-001")
 
+        _settle_prewarm(provider)
         return provider
 
-    def test_default_is_3(self):
-        """Default dialectic_cadence should be 3 to avoid per-turn LLM calls."""
+    def test_unset_falls_back_to_1(self):
+        """Unset dialecticCadence falls back to 1 (every turn) for backwards
+        compatibility with existing configs that predate the setting. The
+        setup wizard writes 2 explicitly on new configs."""
         provider = self._make_provider()
-        assert provider._dialectic_cadence == 3
+        assert provider._dialectic_cadence == 1
 
     def test_config_override(self):
         """dialecticCadence from config overrides the default."""
@@ -908,6 +933,7 @@ class TestDialecticDepth:
              patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
             provider.initialize(session_id="test-session-001")
 
+        _settle_prewarm(provider)
         return provider
 
     def test_default_depth_is_1(self):
@@ -1027,46 +1053,6 @@ class TestDialecticDepth:
         assert provider._manager.dialectic_query.call_count == 2
         assert "Synthesis" in result
 
-    def test_first_turn_runs_dialectic_synchronously(self):
-        """First turn should fire the dialectic synchronously (cold start)."""
-        from unittest.mock import MagicMock, patch
-        provider = self._make_provider(cfg_extra={"dialectic_depth": 1})
-        provider._manager = MagicMock()
-        provider._manager.dialectic_query.return_value = "cold start synthesis"
-        provider._manager.get_prefetch_context.return_value = None
-        provider._manager.pop_context_result.return_value = None
-        provider._session_key = "test"
-        provider._base_context_cache = ""  # cold start
-        provider._last_dialectic_turn = -999  # never fired
-
-        result = provider.prefetch("hello world")
-        assert "cold start synthesis" in result
-        assert provider._manager.dialectic_query.call_count == 1
-        # After first-turn sync, _last_dialectic_turn should be updated
-        assert provider._last_dialectic_turn != -999
-
-    def test_first_turn_dialectic_does_not_double_fire(self):
-        """After first-turn sync dialectic, queue_prefetch should skip (cadence)."""
-        from unittest.mock import MagicMock
-        provider = self._make_provider(cfg_extra={"dialectic_depth": 1})
-        provider._manager = MagicMock()
-        provider._manager.dialectic_query.return_value = "cold start synthesis"
-        provider._manager.get_prefetch_context.return_value = None
-        provider._manager.pop_context_result.return_value = None
-        provider._session_key = "test"
-        provider._base_context_cache = ""
-        provider._last_dialectic_turn = -999
-        provider._turn_count = 0
-
-        # First turn fires sync dialectic
-        provider.prefetch("hello")
-        assert provider._manager.dialectic_query.call_count == 1
-
-        # Now queue_prefetch on same turn should skip (cadence: 0 - 0 < 3)
-        provider._manager.dialectic_query.reset_mock()
-        provider.queue_prefetch("hello")
-        assert provider._manager.dialectic_query.call_count == 0
-
     def test_run_dialectic_depth_bails_early_on_strong_signal(self):
         """Depth 2 skips pass 1 when pass 0 returns strong signal."""
         from unittest.mock import MagicMock
@@ -1083,6 +1069,584 @@ class TestDialecticDepth:
         assert provider._manager.dialectic_query.call_count == 1
 
 
+# ---------------------------------------------------------------------------
+# Trivial-prompt heuristic + dialectic cadence silent-failure guards
+# ---------------------------------------------------------------------------
+
+
+class TestTrivialPromptHeuristic:
+    """Trivial prompts ('ok', 'y', slash commands) must short-circuit injection."""
+
+    @staticmethod
+    def _make_provider():
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        cfg = HonchoClientConfig(api_key="test-key", enabled=True, recall_mode="hybrid")
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_session = MagicMock()
+        mock_session.messages = []
+        mock_manager.get_or_create.return_value = mock_session
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-session-trivial")
+        _settle_prewarm(provider)
+        return provider
+
+    def test_classifier_catches_common_trivial_forms(self):
+        for t in ("ok", "OK", " ok ", "y", "yes", "sure", "thanks", "lgtm", "/help", "", "   "):
+            assert HonchoMemoryProvider._is_trivial_prompt(t), f"expected trivial: {t!r}"
+
+    def test_classifier_lets_substantive_prompts_through(self):
+        for t in ("hello world", "what's my name", "explain this", "ok so what's next"):
+            assert not HonchoMemoryProvider._is_trivial_prompt(t), f"expected non-trivial: {t!r}"
+
+    def test_prefetch_skips_on_trivial_prompt(self):
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._base_context_cache = "cached base"
+        provider._last_dialectic_turn = 0
+        provider._turn_count = 5
+
+        assert provider.prefetch("ok") == ""
+        assert provider.prefetch("/help") == ""
+        # Dialectic should not have fired
+        assert provider._manager.dialectic_query.call_count == 0
+
+    def test_queue_prefetch_skips_on_trivial_prompt(self):
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._turn_count = 10
+        provider._last_dialectic_turn = -999  # would otherwise fire
+        # initialize() pre-warms; clear call counts before the assertion.
+        provider._manager.prefetch_context.reset_mock()
+        provider._manager.dialectic_query.reset_mock()
+
+        provider.queue_prefetch("y")
+        # Trivial prompts short-circuit both context refresh and dialectic fire.
+        assert provider._manager.prefetch_context.call_count == 0
+        assert provider._manager.dialectic_query.call_count == 0
+
+
+class TestDialecticCadenceAdvancesOnSuccess:
+    """Cadence tracker advances only when the dialectic call returns a
+    non-empty result. Empty results (transient API error, sparse representation)
+    must retry on the next eligible turn instead of waiting the full cadence."""
+
+    @staticmethod
+    def _make_provider():
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        cfg = HonchoClientConfig(
+            api_key="test-key", enabled=True, recall_mode="hybrid", dialectic_depth=1,
+        )
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_session = MagicMock()
+        mock_session.messages = []
+        mock_manager.get_or_create.return_value = mock_session
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-session-retry")
+        _settle_prewarm(provider)
+        return provider
+
+    def test_empty_dialectic_result_does_not_advance_cadence(self):
+        import time as _time
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._manager.dialectic_query.return_value = ""  # silent failure
+        provider._turn_count = 5
+        provider._last_dialectic_turn = 0  # would fire (5 - 0 = 5 ≥ 3)
+
+        provider.queue_prefetch("hello")
+        # wait for the background thread to settle
+        if provider._prefetch_thread:
+            provider._prefetch_thread.join(timeout=2.0)
+
+        # Dialectic call was attempted
+        assert provider._manager.dialectic_query.call_count == 1
+        # But cadence tracker did NOT advance — next turn should retry
+        assert provider._last_dialectic_turn == 0
+
+    def test_non_empty_dialectic_result_advances_cadence(self):
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._manager.dialectic_query.return_value = "real synthesis output"
+        provider._turn_count = 5
+        provider._last_dialectic_turn = 0
+
+        provider.queue_prefetch("hello")
+        if provider._prefetch_thread:
+            provider._prefetch_thread.join(timeout=2.0)
+
+        assert provider._last_dialectic_turn == 5
+
+    def test_in_flight_thread_is_not_stacked(self):
+        import threading as _threading
+        import time as _time
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._turn_count = 10
+        provider._last_dialectic_turn = 0
+
+        # Simulate a prior thread still running (fresh, not stale)
+        hold = _threading.Event()
+
+        def _block():
+            hold.wait(timeout=5.0)
+
+        fresh = _threading.Thread(target=_block, daemon=True)
+        fresh.start()
+        provider._prefetch_thread = fresh
+        provider._prefetch_thread_started_at = _time.monotonic()  # fresh start
+
+        provider.queue_prefetch("hello")
+        # Should have short-circuited — no new dialectic call
+        assert provider._manager.dialectic_query.call_count == 0
+        hold.set()
+        fresh.join(timeout=2.0)
+
+
+class TestSessionStartDialecticPrewarm:
+    """Session-start prewarm fires a depth-aware dialectic whose result is
+    consumed by turn 1 — no duplicate .chat() and no dead-cache orphaning."""
+
+    @staticmethod
+    def _make_provider(cfg_extra=None, dialectic_result="prewarm synthesis"):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid")
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_manager.get_or_create.return_value = MagicMock(messages=[])
+        mock_manager.get_prefetch_context.return_value = None
+        mock_manager.pop_context_result.return_value = None
+        mock_manager.dialectic_query.return_value = dialectic_result
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-prewarm")
+        return provider
+
+    def test_prewarm_populates_prefetch_result(self):
+        p = self._make_provider()
+        # Wait for prewarm thread to land
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=3.0)
+        with p._prefetch_lock:
+            assert p._prefetch_result == "prewarm synthesis"
+        assert p._last_dialectic_turn == 0
+
+    def test_turn1_consumes_prewarm_without_duplicate_dialectic(self):
+        """With prewarm result already in _prefetch_result, turn 1 prefetch
+        should NOT fire another dialectic."""
+        p = self._make_provider()
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=3.0)
+        p._manager.dialectic_query.reset_mock()
+        p._session_key = "test-prewarm"
+        p._base_context_cache = ""
+        p._turn_count = 1
+
+        result = p.prefetch("hello world")
+        assert "prewarm synthesis" in result
+        # The sync first-turn path must NOT have fired another .chat()
+        assert p._manager.dialectic_query.call_count == 0
+
+    def test_turn1_falls_back_to_sync_when_prewarm_missing(self):
+        """If the prewarm produced nothing (empty graph, API blip), turn 1
+        still fires its own sync dialectic."""
+        p = self._make_provider(dialectic_result="")  # prewarm returns empty
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=3.0)
+        with p._prefetch_lock:
+            assert p._prefetch_result == ""  # prewarm landed nothing
+        # Switch dialectic_query to return something on the sync first-turn call
+        p._manager.dialectic_query.return_value = "sync recovery"
+        p._manager.dialectic_query.reset_mock()
+        p._session_key = "test-prewarm"
+        p._base_context_cache = ""
+        p._turn_count = 1
+
+        result = p.prefetch("hello world")
+        assert "sync recovery" in result
+        assert p._manager.dialectic_query.call_count == 1
+
+
+class TestDialecticLiveness:
+    """Liveness + observability: stale-thread recovery, stale-result discard,
+    empty-streak backoff, and the snapshot method used for diagnostics."""
+
+    @staticmethod
+    def _make_provider(cfg_extra=None):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid", timeout=2.0)
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_manager.get_or_create.return_value = MagicMock(messages=[])
+        mock_manager.get_prefetch_context.return_value = None
+        mock_manager.pop_context_result.return_value = None
+        mock_manager.dialectic_query.return_value = ""  # default: silent
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-liveness")
+        _settle_prewarm(provider)
+        return provider
+
+    def test_stale_thread_is_treated_as_dead(self):
+        """A thread older than timeout × multiplier no longer blocks new fires."""
+        import threading as _threading
+        p = self._make_provider()
+        p._session_key = "test"
+        p._turn_count = 10
+        p._last_dialectic_turn = 0
+        p._manager.dialectic_query.return_value = "fresh synthesis"
+
+        # Plant an alive thread with an old timestamp (stale)
+        hold = _threading.Event()
+        stuck = _threading.Thread(target=lambda: hold.wait(timeout=10.0), daemon=True)
+        stuck.start()
+        p._prefetch_thread = stuck
+        # timeout=2.0, multiplier=2.0, so anything older than 4s is stale
+        p._prefetch_thread_started_at = 0.0  # very old (1970 monotonic baseline)
+
+        p.queue_prefetch("hello")
+        # New thread should have been spawned since stuck one is stale
+        assert p._prefetch_thread is not stuck, "stale thread must be recycled"
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=2.0)
+        assert p._manager.dialectic_query.call_count == 1
+        hold.set()
+        stuck.join(timeout=2.0)
+
+    def test_stale_pending_result_is_discarded_on_read(self):
+        """A pending dialectic result from many turns ago is discarded
+        instead of injected against a fresh conversational pivot."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 2}})
+        p._session_key = "test"
+        p._base_context_cache = "base ctx"
+        with p._prefetch_lock:
+            p._prefetch_result = "ancient synthesis"
+            p._prefetch_result_fired_at = 1
+        # cadence=2, multiplier=2 → stale after 4 turns since fire
+        p._turn_count = 10
+        p._last_dialectic_turn = 1  # prevents sync first-turn path
+
+        result = p.prefetch("what's new")
+        assert "ancient synthesis" not in result, "stale pending must be discarded"
+        # Cache slot cleared
+        with p._prefetch_lock:
+            assert p._prefetch_result == ""
+            assert p._prefetch_result_fired_at == -999
+
+    def test_fresh_pending_result_is_kept(self):
+        """A pending result within the staleness window is injected normally."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 3}})
+        p._session_key = "test"
+        p._base_context_cache = ""
+        with p._prefetch_lock:
+            p._prefetch_result = "recent synthesis"
+            p._prefetch_result_fired_at = 8
+        p._turn_count = 9  # 1 turn since fire, well within cadence × 2 = 6
+        p._last_dialectic_turn = 8
+
+        result = p.prefetch("what's new")
+        assert "recent synthesis" in result
+
+    def test_empty_streak_widens_effective_cadence(self):
+        """After N empty returns, the gate waits cadence + N turns."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}})
+        p._dialectic_empty_streak = 3
+        # cadence=1, streak=3 → effective = 4
+        assert p._effective_cadence() == 4
+
+    def test_backoff_is_capped(self):
+        """Effective cadence is capped at cadence × _BACKOFF_MAX."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 2}})
+        p._dialectic_empty_streak = 100
+        # cadence=2, ceiling = 2 × 8 = 16
+        assert p._effective_cadence() == 16
+
+    def test_success_resets_empty_streak(self):
+        """A non-empty result zeroes the streak so healthy operation restores
+        the base cadence immediately."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}})
+        p._session_key = "test"
+        p._dialectic_empty_streak = 5
+        p._turn_count = 10
+        p._last_dialectic_turn = 0
+        p._manager.dialectic_query.return_value = "real output"
+
+        p.queue_prefetch("hello")
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=2.0)
+        assert p._dialectic_empty_streak == 0
+        assert p._last_dialectic_turn == 10
+
+    def test_empty_result_increments_streak(self):
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}})
+        p._session_key = "test"
+        p._turn_count = 5
+        p._last_dialectic_turn = 0
+        p._manager.dialectic_query.return_value = ""  # empty
+
+        p.queue_prefetch("hello")
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=2.0)
+        assert p._dialectic_empty_streak == 1
+        assert p._last_dialectic_turn == 0  # cadence not advanced
+
+    def test_liveness_snapshot_shape(self):
+        p = self._make_provider()
+        snap = p.liveness_snapshot()
+        for key in (
+            "turn_count", "last_dialectic_turn", "pending_result_fired_at",
+            "empty_streak", "effective_cadence", "thread_alive", "thread_age_seconds",
+        ):
+            assert key in snap
+
+
+class TestDialecticLifecycleSmoke:
+    """End-to-end smoke walking a multi-turn session through prewarm,
+    turn 1 consume, trivial skip, cadence fire, empty-result retry,
+    heuristic bump, and session-end flush."""
+
+    @staticmethod
+    def _make_provider(cfg_extra=None):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(
+            api_key="test-key", enabled=True, recall_mode="hybrid",
+            dialectic_reasoning_level="low", reasoning_heuristic=True,
+            reasoning_level_cap="high", dialectic_depth=1,
+        )
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_session = MagicMock()
+        mock_session.messages = []
+        mock_manager.get_or_create.return_value = mock_session
+        mock_manager.get_prefetch_context.return_value = None
+        mock_manager.pop_context_result.return_value = None
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            return provider, mock_manager, cfg
+
+    def _await_thread(self, provider):
+        if provider._prefetch_thread:
+            provider._prefetch_thread.join(timeout=3.0)
+
+    def test_full_multi_turn_session(self):
+        """Walks init → turns 1..8 → session end. Asserts at every step that
+        the plugin did exactly what it should and nothing more.
+
+        Uses dialecticCadence=3 so we can exercise skip-turns between fires
+        and the silent-failure retry path without their gates tripping each
+        other. Trivial + slash skips apply independent of cadence.
+        """
+        from unittest.mock import patch, MagicMock
+        provider, mgr, cfg = self._make_provider(
+            cfg_extra={"raw": {"dialecticCadence": 3}}
+        )
+
+        # Program the dialectic responses in the exact order they'll be requested.
+        # An extra or missing call fails the test — strong smoke signal.
+        responses = iter([
+            "prewarm: user is eri, works on hermes",      # session-start prewarm
+            "cadence fire: long query synthesis",         # turn 4 queue_prefetch
+            "",                                           # turn 7 fire: silent failure
+            "retry success: fresh synthesis",             # turn 8 queue_prefetch retry
+        ])
+        mgr.dialectic_query.side_effect = lambda *a, **kw: next(responses)
+
+        # ---- init: prewarm fires ----
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mgr), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="smoke-test")
+
+        self._await_thread(provider)
+        with provider._prefetch_lock:
+            assert provider._prefetch_result.startswith("prewarm"), \
+                "session-start prewarm must land in _prefetch_result"
+        assert provider._last_dialectic_turn == 0, "prewarm marks turn 0"
+        assert mgr.dialectic_query.call_count == 1
+
+        # ---- turn 1: consume prewarm, no duplicate dialectic ----
+        provider.on_turn_start(1, "hey")
+        inject1 = provider.prefetch("hey")
+        assert "prewarm" in inject1, "turn 1 must surface prewarm"
+        provider.sync_turn("hey", "hi there")
+        provider.queue_prefetch("hey")  # cadence gate: (1-0)<3 → skip
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 1, \
+            "turn 1 must not fire — prewarm covered it and cadence skips"
+
+        # ---- turn 2: trivial 'ok' → skip everything ----
+        mgr.prefetch_context.reset_mock()
+        provider.on_turn_start(2, "ok")
+        assert provider.prefetch("ok") == "", "trivial prompt must short-circuit injection"
+        provider.sync_turn("ok", "cool")
+        provider.queue_prefetch("ok")
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 1, "trivial must not fire dialectic"
+        assert mgr.prefetch_context.call_count == 0, "trivial must not fire context refresh"
+
+        # ---- turn 3: slash '/help' → also skip ----
+        provider.on_turn_start(3, "/help")
+        assert provider.prefetch("/help") == ""
+        provider.queue_prefetch("/help")
+        assert mgr.dialectic_query.call_count == 1
+
+        # ---- turn 4: long query → cadence fires + heuristic bumps ----
+        long_q = "walk me through " + ("x " * 100)  # ~200 chars → heuristic +1
+        provider.on_turn_start(4, long_q)
+        provider.prefetch(long_q)
+        provider.sync_turn(long_q, "sure")
+        provider.queue_prefetch(long_q)  # (4-0)≥3 → fires
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 2, "turn 4 cadence fire"
+        _, kwargs = mgr.dialectic_query.call_args
+        assert kwargs.get("reasoning_level") in ("medium", "high"), \
+            f"long query must bump reasoning level above 'low'; got {kwargs.get('reasoning_level')}"
+        assert provider._last_dialectic_turn == 4, "cadence tracker advances on success"
+
+        # ---- turns 5–6: cadence cooldown, no fires ----
+        for t in (5, 6):
+            provider.on_turn_start(t, "tell me more")
+            provider.queue_prefetch("tell me more")
+            self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 2, "turns 5–6 blocked by cadence window"
+
+        # ---- turn 7: fires but silent failure (empty dialectic) ----
+        provider.on_turn_start(7, "and then what")
+        provider.queue_prefetch("and then what")  # (7-4)≥3 → fires
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 3, "turn 7 fires"
+        assert provider._last_dialectic_turn == 4, \
+            "silent failure must NOT burn the cadence window"
+
+        # ---- turn 8: retries because cadence didn't advance ----
+        provider.on_turn_start(8, "try again")
+        provider.queue_prefetch("try again")  # (8-4)≥3 → fires again
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 4, \
+            "turn 8 retries because turn 7's empty result didn't advance cadence"
+        assert provider._last_dialectic_turn == 8, "retry success advances"
+
+        # ---- session end: flush messages ----
+        provider.on_session_end([])
+        mgr.flush_all.assert_called()
+
+
+class TestReasoningHeuristic:
+    """Char-count heuristic that scales the auto-injected reasoning level by
+    query length, clamped at reasoning_level_cap."""
+
+    @staticmethod
+    def _make_provider(cfg_extra=None):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(
+            api_key="test-key", enabled=True, recall_mode="hybrid",
+            dialectic_reasoning_level="low", reasoning_heuristic=True,
+            reasoning_level_cap="high",
+        )
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_manager.get_or_create.return_value = MagicMock(messages=[])
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-heuristic")
+        _settle_prewarm(provider)
+        return provider
+
+    def test_short_query_stays_at_base(self):
+        p = self._make_provider()
+        assert p._apply_reasoning_heuristic("low", "hey") == "low"
+
+    def test_medium_query_bumps_one_level(self):
+        p = self._make_provider()
+        q = "x" * 150
+        assert p._apply_reasoning_heuristic("low", q) == "medium"
+
+    def test_long_query_bumps_two_levels(self):
+        p = self._make_provider()
+        q = "x" * 500
+        assert p._apply_reasoning_heuristic("low", q) == "high"
+
+    def test_bump_respects_cap(self):
+        p = self._make_provider(cfg_extra={"reasoning_level_cap": "medium"})
+        q = "x" * 500  # would hit 'high' without the cap
+        assert p._apply_reasoning_heuristic("low", q) == "medium"
+
+    def test_max_never_auto_selected_with_default_cap(self):
+        p = self._make_provider(cfg_extra={"dialectic_reasoning_level": "high"})
+        q = "x" * 500  # base=high, bump would push to 'max'
+        assert p._apply_reasoning_heuristic("high", q) == "high"
+
+    def test_heuristic_disabled_returns_base(self):
+        p = self._make_provider(cfg_extra={"reasoning_heuristic": False})
+        q = "x" * 500
+        assert p._apply_reasoning_heuristic("low", q) == "low"
+
+    def test_resolve_pass_level_applies_heuristic_at_base_mapping(self):
+        """Depth=1, pass 0 maps to 'base' → heuristic applies."""
+        p = self._make_provider()
+        q = "x" * 150
+        assert p._resolve_pass_level(0, query=q) == "medium"
+
+    def test_resolve_pass_level_does_not_touch_explicit_per_pass(self):
+        """dialecticDepthLevels wins absolutely — no heuristic scaling."""
+        p = self._make_provider(cfg_extra={"dialectic_depth_levels": ["minimal"]})
+        q = "x" * 500  # heuristic would otherwise bump to 'high'
+        assert p._resolve_pass_level(0, query=q) == "minimal"
+
+    def test_resolve_pass_level_does_not_touch_lighter_passes(self):
+        """Depth 3 pass 0 is hardcoded 'minimal' — heuristic must not bump it."""
+        p = self._make_provider(cfg_extra={"dialectic_depth": 3})
+        q = "x" * 500
+        assert p._resolve_pass_level(0, query=q) == "minimal"
+        # But the 'base' pass (idx 1 for depth 3) does get heuristic
+        assert p._resolve_pass_level(1, query=q) == "high"
+
+
 # ---------------------------------------------------------------------------
 # set_peer_card None guard
 # ---------------------------------------------------------------------------
diff --git a/tests/run_agent/test_concurrent_interrupt.py b/tests/run_agent/test_concurrent_interrupt.py
index fdeb8dd6907..e5d8b88e727 100644
--- a/tests/run_agent/test_concurrent_interrupt.py
+++ b/tests/run_agent/test_concurrent_interrupt.py
@@ -23,6 +23,10 @@ def _make_agent(monkeypatch):
 
     class _Stub:
         _interrupt_requested = False
+        _interrupt_message = None
+        # Bind to this thread's ident so interrupt() targets a real tid.
+        _execution_thread_id = threading.current_thread().ident
+        _interrupt_thread_signal_pending = False
         log_prefix = ""
         quiet_mode = True
         verbose_logging = False
@@ -40,6 +44,15 @@ def _make_agent(monkeypatch):
         _current_tool = None
         _last_activity = 0
         _print_fn = print
+        # Worker-thread tracking state mirrored from AIAgent.__init__ so the
+        # real interrupt() method can fan out to concurrent-tool workers.
+        _active_children: list = []
+
+        def __init__(self):
+            # Instance-level (not class-level) so each test gets a fresh set.
+            self._tool_worker_threads: set = set()
+            self._tool_worker_threads_lock = threading.Lock()
+            self._active_children_lock = threading.Lock()
 
         def _touch_activity(self, desc):
             self._last_activity = time.time()
@@ -60,8 +73,10 @@ def _make_agent(monkeypatch):
             return False
 
     stub = _Stub()
-    # Bind the real methods
+    # Bind the real methods under test
     stub._execute_tool_calls_concurrent = _ra.AIAgent._execute_tool_calls_concurrent.__get__(stub)
+    stub.interrupt = _ra.AIAgent.interrupt.__get__(stub)
+    stub.clear_interrupt = _ra.AIAgent.clear_interrupt.__get__(stub)
     stub._invoke_tool = MagicMock(side_effect=lambda *a, **kw: '{"ok": true}')
     return stub
 
@@ -137,3 +152,109 @@ def test_concurrent_preflight_interrupt_skips_all(monkeypatch):
     assert "skipped due to user interrupt" in messages[1]["content"]
     # _invoke_tool should never have been called
     agent._invoke_tool.assert_not_called()
+
+
+def test_running_concurrent_worker_sees_is_interrupted(monkeypatch):
+    """Regression guard for the "interrupt-doesn't-reach-hung-tool" class of
+    bug Physikal reported in April 2026.
+
+    Before this fix, `AIAgent.interrupt()` called `_set_interrupt(True,
+    _execution_thread_id)` — which only flagged the agent's *main* thread.
+    Tools running inside `_execute_tool_calls_concurrent` execute on
+    ThreadPoolExecutor worker threads whose tids are NOT the agent's, so
+    `is_interrupted()` (which checks the *current* thread's tid) returned
+    False inside those tools no matter how many times the gateway called
+    `.interrupt()`.  Hung ssh / long curl / big make-build tools would run
+    to their own timeout.
+
+    This test runs a fake tool in the concurrent path that polls
+    `is_interrupted()` like a real terminal command does, then calls
+    `agent.interrupt()` from another thread, and asserts the poll sees True
+    within one second.
+    """
+    from tools.interrupt import is_interrupted
+
+    agent = _make_agent(monkeypatch)
+
+    # Counter plus observation hooks so we can prove the worker saw the flip.
+    observed = {"saw_true": False, "poll_count": 0, "worker_tid": None}
+    worker_started = threading.Event()
+
+    def polling_tool(name, args, task_id, call_id=None):
+        observed["worker_tid"] = threading.current_thread().ident
+        worker_started.set()
+        deadline = time.monotonic() + 5.0
+        while time.monotonic() < deadline:
+            observed["poll_count"] += 1
+            if is_interrupted():
+                observed["saw_true"] = True
+                return '{"interrupted": true}'
+            time.sleep(0.05)
+        return '{"timed_out": true}'
+
+    agent._invoke_tool = MagicMock(side_effect=polling_tool)
+
+    tc1 = _FakeToolCall("hung_fake_tool_1", call_id="tc1")
+    tc2 = _FakeToolCall("hung_fake_tool_2", call_id="tc2")
+    msg = _FakeAssistantMsg([tc1, tc2])
+    messages = []
+
+    def _interrupt_after_start():
+        # Wait until at least one worker is running so its tid is tracked.
+        worker_started.wait(timeout=2.0)
+        time.sleep(0.2)  # let the other worker enter too
+        agent.interrupt("stop requested by test")
+
+    t = threading.Thread(target=_interrupt_after_start)
+    t.start()
+    start = time.monotonic()
+    agent._execute_tool_calls_concurrent(msg, messages, "test_task")
+    elapsed = time.monotonic() - start
+    t.join(timeout=2.0)
+
+    # The worker must have actually polled is_interrupted — otherwise the
+    # test isn't exercising what it claims to.
+    assert observed["poll_count"] > 0, (
+        "polling_tool never ran — test scaffold issue"
+    )
+    # The worker must see the interrupt within ~1 s of agent.interrupt()
+    # being called.  Before the fix this loop ran until its 5 s own-timeout.
+    assert observed["saw_true"], (
+        f"is_interrupted() never returned True inside the concurrent worker "
+        f"after agent.interrupt() — interrupt-propagation hole regressed. "
+        f"worker_tid={observed['worker_tid']!r} poll_count={observed['poll_count']}"
+    )
+    assert elapsed < 3.0, (
+        f"concurrent execution took {elapsed:.2f}s after interrupt — the fan-out "
+        f"to worker tids didn't shortcut the tool's poll loop as expected"
+    )
+    # Also verify cleanup: no stale worker tids should remain after all
+    # tools finished.
+    assert agent._tool_worker_threads == set(), (
+        f"worker tids leaked after run: {agent._tool_worker_threads}"
+    )
+
+
+def test_clear_interrupt_clears_worker_tids(monkeypatch):
+    """After clear_interrupt(), stale worker-tid bits must be cleared so the
+    next turn's tools — which may be scheduled onto recycled tids — don't
+    see a false interrupt."""
+    from tools.interrupt import is_interrupted, set_interrupt
+
+    agent = _make_agent(monkeypatch)
+    # Simulate a worker having registered but not yet exited cleanly (e.g. a
+    # hypothetical bug in the tear-down).  Put a fake tid in the set and
+    # flag it interrupted.
+    fake_tid = threading.current_thread().ident  # use real tid so is_interrupted can see it
+    with agent._tool_worker_threads_lock:
+        agent._tool_worker_threads.add(fake_tid)
+    set_interrupt(True, fake_tid)
+    assert is_interrupted() is True  # sanity
+
+    agent.clear_interrupt()
+
+    assert is_interrupted() is False, (
+        "clear_interrupt() did not clear the interrupt bit for a tracked "
+        "worker tid — stale interrupt can leak into the next turn"
+    )
+
diff --git a/tests/run_agent/test_memory_provider_init.py b/tests/run_agent/test_memory_provider_init.py
new file mode 100644
index 00000000000..89431db85d0
--- /dev/null
+++ b/tests/run_agent/test_memory_provider_init.py
@@ -0,0 +1,39 @@
+"""Regression tests for memory provider selection during AIAgent init."""
+
+from types import SimpleNamespace
+from unittest.mock import patch
+
+
+def test_blank_memory_provider_does_not_auto_enable_honcho():
+    """Blank memory.provider should remain opt-out even if Honcho fallback looks configured."""
+    cfg = {"memory": {"provider": ""}, "agent": {}}
+    honcho_cfg = SimpleNamespace(enabled=True, api_key="stale-key", base_url=None)
+
+    with (
+        patch("hermes_cli.config.load_config", return_value=cfg),
+        patch("hermes_cli.config.save_config") as save_config,
+        patch(
+            "plugins.memory.honcho.client.HonchoClientConfig.from_global_config",
+            return_value=honcho_cfg,
+        ) as from_global_config,
+        patch("plugins.memory.load_memory_provider") as load_memory_provider,
+        patch("agent.model_metadata.get_model_context_length", return_value=204_800),
+        patch("run_agent.get_tool_definitions", return_value=[]),
+        patch("run_agent.check_toolset_requirements", return_value={}),
+        patch("run_agent.OpenAI"),
+    ):
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            api_key="test-key-1234567890",
+            base_url="https://openrouter.ai/api/v1",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=False,
+        )
+
+    assert agent._memory_manager is None
+    from_global_config.assert_not_called()
+    load_memory_provider.assert_not_called()
+    save_config.assert_not_called()
+
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 86f95580f02..bedb7bbf484 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -317,6 +317,60 @@ class TestStripThinkBlocks:
         result = agent._strip_think_blocks("<thought>orphaned reasoning without close")
         assert "<thought>" not in result
 
+    # ─── Unterminated-block coverage (#8878, #9568, #10408) ──────────────
+    # Reasoning models served via NIM / MiniMax M2.7 frequently drop the
+    # closing tag, leaking raw reasoning into assistant content. The open
+    # tag appears at a block boundary (start of text or after a newline);
+    # everything from that tag to end-of-string is stripped.
+
+    def test_unterminated_think_block_content_stripped(self, agent):
+        """Content after unterminated <think> is fully stripped."""
+        result = agent._strip_think_blocks("<think>orphaned reasoning without close")
+        assert "orphaned reasoning" not in result
+        assert result.strip() == ""
+
+    def test_unterminated_thought_block_content_stripped(self, agent):
+        """Gemma-style <thought> with no close is fully stripped."""
+        result = agent._strip_think_blocks("<thought>orphaned reasoning without close")
+        assert "orphaned reasoning" not in result
+        assert result.strip() == ""
+
+    def test_unterminated_multiline_block_stripped(self, agent):
+        """Multi-line unterminated blocks are stripped in full."""
+        result = agent._strip_think_blocks(
+            "<think>\nmulti\nline\nreasoning\nthat never closes"
+        )
+        assert "multi" not in result
+        assert "never closes" not in result
+
+    def test_unterminated_block_after_answer_preserves_prefix(self, agent):
+        """Visible answer before a line-starting unterminated tag is kept."""
+        result = agent._strip_think_blocks(
+            "Answer is 42.\n<think>actually let me reconsider"
+        )
+        assert "Answer is 42." in result
+        assert "reconsider" not in result
+
+    def test_inline_think_mention_in_prose_not_over_stripped(self, agent):
+        """Mid-line `<think>` mentioned in prose must not swallow the rest
+        of the content (the block-boundary check prevents this)."""
+        text = "Use the <think> tag like this in your prose."
+        result = agent._strip_think_blocks(text)
+        # Block-boundary check prevents unterminated-strip from firing
+        assert "prose" in result
+        assert "Use the" in result
+
+    def test_mixed_case_closed_pair_stripped(self, agent):
+        """Mixed-case variants <THINK>…</THINK>, <Thinking>…</Thinking> are
+        handled by case-insensitive closed-pair regex, so the trailing
+        content is preserved."""
+        result = agent._strip_think_blocks("<THINK>upper</THINK>final")
+        assert "upper" not in result
+        assert "final" in result
+        result = agent._strip_think_blocks("<Thinking>mixed</Thinking>final")
+        assert "mixed" not in result
+        assert "final" in result
+
 
 class TestExtractReasoning:
     def test_reasoning_field(self, agent):
@@ -1088,6 +1142,41 @@ class TestBuildAssistantMessage:
         result = agent._build_assistant_message(msg, "tool_calls")
         assert "extra_content" not in result["tool_calls"][0]
 
+    def test_think_blocks_stripped_from_content(self, agent):
+        """Inline <think> blocks are stripped from stored content (#8878, #9568).
+
+        The reasoning is captured into ``msg['reasoning']`` via the inline
+        fallback in ``_extract_reasoning``; the raw tags in ``content`` are
+        redundant and leak to messaging platforms / pollute titles /
+        inflate context if left in place.
+        """
+        msg = _mock_assistant_msg(
+            content="<think>internal reasoning</think>The actual answer."
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert "<think>" not in result["content"]
+        assert "internal reasoning" not in result["content"]
+        assert "The actual answer." in result["content"]
+        # Reasoning preserved separately via inline extraction fallback
+        assert result["reasoning"] == "internal reasoning"
+
+    def test_think_blocks_stripped_preserves_normal_content(self, agent):
+        """Content without reasoning tags passes through unchanged."""
+        msg = _mock_assistant_msg(content="No thinking here.")
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["content"] == "No thinking here."
+
+    def test_unterminated_think_block_stripped(self, agent):
+        """Unterminated <think> block (MiniMax / NIM dropped close tag) is
+        fully stripped from stored content."""
+        msg = _mock_assistant_msg(
+            content="<think>reasoning that never closes on this NIM endpoint"
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert "<think>" not in result["content"]
+        assert "reasoning that never closes" not in result["content"]
+        assert result["content"] == ""
+
 
 class TestFormatToolsForSystemMessage:
     def test_no_tools_returns_empty_array(self, agent):
@@ -1196,6 +1285,7 @@ class TestExecuteToolCalls:
         tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1")
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc])
         messages = []
+        agent.platform = "cli"
         agent.tool_progress_callback = None
 
         with patch("run_agent.handle_function_call", return_value="search result"), \
@@ -1207,6 +1297,21 @@ class TestExecuteToolCalls:
         assert len(messages) == 1
         assert messages[0]["role"] == "tool"
 
+    def test_quiet_tool_output_suppressed_without_progress_callback_for_non_cli_agent(self, agent):
+        tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc])
+        messages = []
+        agent.platform = None
+        agent.tool_progress_callback = None
+
+        with patch("run_agent.handle_function_call", return_value="search result"), \
+             patch.object(agent, "_safe_print") as mock_print:
+            agent._execute_tool_calls(mock_msg, messages, "task-1")
+
+        mock_print.assert_not_called()
+        assert len(messages) == 1
+        assert messages[0]["role"] == "tool"
+
     def test_vprint_suppressed_in_parseable_quiet_mode(self, agent):
         agent.suppress_status_output = True
 
@@ -1787,6 +1892,30 @@ class TestRunConversation:
         assert all("message_count" in c and "messages" not in c for c in pre_request_calls)
         assert all("usage" in c and "response" not in c for c in post_request_calls)
 
+    def test_content_with_tool_calls_stays_silent_for_non_cli_quiet_mode(self, agent):
+        self._setup_agent(agent)
+        agent.platform = None
+        tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
+        resp1 = _mock_response(
+            content="I'll search for that.",
+            finish_reason="tool_calls",
+            tool_calls=[tc],
+        )
+        resp2 = _mock_response(content="Done searching", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [resp1, resp2]
+
+        with (
+            patch("run_agent.handle_function_call", return_value="search result"),
+            patch.object(agent, "_safe_print") as mock_print,
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("search something")
+
+        assert result["final_response"] == "Done searching"
+        mock_print.assert_not_called()
+
     def test_interrupt_breaks_loop(self, agent):
         self._setup_agent(agent)
 
diff --git a/tests/run_agent/test_steer.py b/tests/run_agent/test_steer.py
new file mode 100644
index 00000000000..a298ede8c08
--- /dev/null
+++ b/tests/run_agent/test_steer.py
@@ -0,0 +1,228 @@
+"""Tests for AIAgent.steer() — mid-run user message injection.
+
+/steer lets the user add a note to the agent's next tool result without
+interrupting the current tool call. The agent sees the note inline with
+tool output on its next iteration, preserving message-role alternation
+and prompt-cache integrity.
+"""
+from __future__ import annotations
+
+import threading
+
+import pytest
+
+from run_agent import AIAgent
+
+
+def _bare_agent() -> AIAgent:
+    """Build an AIAgent without running __init__, then install the steer
+    state manually — matches the existing object.__new__ stub pattern
+    used elsewhere in the test suite.
+    """
+    agent = object.__new__(AIAgent)
+    agent._pending_steer = None
+    agent._pending_steer_lock = threading.Lock()
+    return agent
+
+
+class TestSteerAcceptance:
+    def test_accepts_non_empty_text(self):
+        agent = _bare_agent()
+        assert agent.steer("go ahead and check the logs") is True
+        assert agent._pending_steer == "go ahead and check the logs"
+
+    def test_rejects_empty_string(self):
+        agent = _bare_agent()
+        assert agent.steer("") is False
+        assert agent._pending_steer is None
+
+    def test_rejects_whitespace_only(self):
+        agent = _bare_agent()
+        assert agent.steer("   \n\t  ") is False
+        assert agent._pending_steer is None
+
+    def test_rejects_none(self):
+        agent = _bare_agent()
+        assert agent.steer(None) is False  # type: ignore[arg-type]
+        assert agent._pending_steer is None
+
+    def test_strips_surrounding_whitespace(self):
+        agent = _bare_agent()
+        assert agent.steer("  hello world  \n") is True
+        assert agent._pending_steer == "hello world"
+
+    def test_concatenates_multiple_steers_with_newlines(self):
+        agent = _bare_agent()
+        agent.steer("first note")
+        agent.steer("second note")
+        agent.steer("third note")
+        assert agent._pending_steer == "first note\nsecond note\nthird note"
+
+
+class TestSteerDrain:
+    def test_drain_returns_and_clears(self):
+        agent = _bare_agent()
+        agent.steer("hello")
+        assert agent._drain_pending_steer() == "hello"
+        assert agent._pending_steer is None
+
+    def test_drain_on_empty_returns_none(self):
+        agent = _bare_agent()
+        assert agent._drain_pending_steer() is None
+
+
+class TestSteerInjection:
+    def test_appends_to_last_tool_result(self):
+        agent = _bare_agent()
+        agent.steer("please also check auth.log")
+        messages = [
+            {"role": "user", "content": "what's in /var/log?"},
+            {"role": "assistant", "tool_calls": [{"id": "a"}, {"id": "b"}]},
+            {"role": "tool", "content": "ls output A", "tool_call_id": "a"},
+            {"role": "tool", "content": "ls output B", "tool_call_id": "b"},
+        ]
+        agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=2)
+        # The LAST tool result is modified; earlier ones are untouched.
+        assert messages[2]["content"] == "ls output A"
+        assert "ls output B" in messages[3]["content"]
+        assert "[USER STEER" in messages[3]["content"]
+        assert "please also check auth.log" in messages[3]["content"]
+        # And pending_steer is consumed.
+        assert agent._pending_steer is None
+
+    def test_no_op_when_no_steer_pending(self):
+        agent = _bare_agent()
+        messages = [
+            {"role": "assistant", "tool_calls": [{"id": "a"}]},
+            {"role": "tool", "content": "output", "tool_call_id": "a"},
+        ]
+        agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=1)
+        assert messages[-1]["content"] == "output"  # unchanged
+
+    def test_no_op_when_num_tool_msgs_zero(self):
+        agent = _bare_agent()
+        agent.steer("steer")
+        messages = [{"role": "user", "content": "hi"}]
+        agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=0)
+        # Steer should remain pending (nothing to drain into)
+        assert agent._pending_steer == "steer"
+
+    def test_marker_is_unambiguous_about_origin(self):
+        """The injection marker must make clear the text is from the user
+        and not tool output — this is the cache-safe way to signal
+        provenance without violating message-role alternation.
+        """
+        agent = _bare_agent()
+        agent.steer("stop after next step")
+        messages = [{"role": "tool", "content": "x", "tool_call_id": "1"}]
+        agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=1)
+        content = messages[-1]["content"]
+        assert "USER STEER" in content
+        assert "not tool output" in content.lower() or "injected mid-run" in content.lower()
+
+    def test_multimodal_content_list_preserved(self):
+        """Anthropic-style list content should be preserved, with the steer
+        appended as a text block."""
+        agent = _bare_agent()
+        agent.steer("extra note")
+        original_blocks = [{"type": "text", "text": "existing output"}]
+        messages = [
+            {"role": "tool", "content": list(original_blocks), "tool_call_id": "1"}
+        ]
+        agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=1)
+        new_content = messages[-1]["content"]
+        assert isinstance(new_content, list)
+        assert len(new_content) == 2
+        assert new_content[0] == {"type": "text", "text": "existing output"}
+        assert new_content[1]["type"] == "text"
+        assert "extra note" in new_content[1]["text"]
+
+    def test_restashed_when_no_tool_result_in_batch(self):
+        """If the 'batch' contains no tool-role messages (e.g. all skipped
+        after an interrupt), the steer should be put back into the pending
+        slot so the caller's fallback path can deliver it."""
+        agent = _bare_agent()
+        agent.steer("ping")
+        messages = [
+            {"role": "user", "content": "x"},
+            {"role": "assistant", "content": "y"},
+        ]
+        # Claim there were N tool msgs, but the tail has none — simulates
+        # the interrupt-cancelled case.
+        agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=2)
+        # Messages untouched
+        assert messages[-1]["content"] == "y"
+        # And the steer is back in pending so the fallback can grab it
+        assert agent._pending_steer == "ping"
+
+
+class TestSteerThreadSafety:
+    def test_concurrent_steer_calls_preserve_all_text(self):
+        agent = _bare_agent()
+        N = 200
+
+        def worker(idx: int) -> None:
+            agent.steer(f"note-{idx}")
+
+        threads = [threading.Thread(target=worker, args=(i,)) for i in range(N)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        text = agent._drain_pending_steer()
+        assert text is not None
+        # Every single note must be preserved — none dropped by the lock.
+        lines = text.split("\n")
+        assert len(lines) == N
+        assert set(lines) == {f"note-{i}" for i in range(N)}
+
+
+class TestSteerClearedOnInterrupt:
+    def test_clear_interrupt_drops_pending_steer(self):
+        """A hard interrupt supersedes any pending steer — the agent's
+        next tool iteration won't happen, so delivering the steer later
+        would be surprising."""
+        agent = _bare_agent()
+        # Minimal surface needed by clear_interrupt()
+        agent._interrupt_requested = True
+        agent._interrupt_message = None
+        agent._interrupt_thread_signal_pending = False
+        agent._execution_thread_id = None
+        agent._tool_worker_threads = None
+        agent._tool_worker_threads_lock = None
+
+        agent.steer("will be dropped")
+        assert agent._pending_steer == "will be dropped"
+
+        agent.clear_interrupt()
+        assert agent._pending_steer is None
+
+
+class TestSteerCommandRegistry:
+    def test_steer_in_command_registry(self):
+        """The /steer slash command must be registered so it reaches all
+        platforms (CLI, gateway, TUI autocomplete, Telegram/Slack menus).
+        """
+        from hermes_cli.commands import resolve_command, ACTIVE_SESSION_BYPASS_COMMANDS
+
+        cmd = resolve_command("steer")
+        assert cmd is not None
+        assert cmd.name == "steer"
+        assert cmd.category == "Session"
+        assert cmd.args_hint == "<prompt>"
+
+    def test_steer_in_bypass_set(self):
+        """When the agent is running, /steer MUST bypass the Level-1
+        base-adapter queue so it reaches the gateway runner's /steer
+        handler. Otherwise it would be queued as user text and only
+        delivered at turn end — defeating the whole point.
+        """
+        from hermes_cli.commands import ACTIVE_SESSION_BYPASS_COMMANDS, should_bypass_active_session
+
+        assert "steer" in ACTIVE_SESSION_BYPASS_COMMANDS
+        assert should_bypass_active_session("steer") is True
+
+
+if __name__ == "__main__":  # pragma: no cover
+    pytest.main([__file__, "-v"])
diff --git a/tests/run_agent/test_streaming.py b/tests/run_agent/test_streaming.py
index 73a9872020e..e4825599af8 100644
--- a/tests/run_agent/test_streaming.py
+++ b/tests/run_agent/test_streaming.py
@@ -141,6 +141,50 @@ class TestStreamingAccumulator:
         assert tc[0].function.name == "terminal"
         assert tc[0].function.arguments == '{"command": "ls"}'
 
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_tool_name_not_duplicated_when_resent_per_chunk(self, mock_close, mock_create):
+        """MiniMax M2.7 via NVIDIA NIM resends the full name in every chunk.
+
+        Bug #8259: the old += accumulation produced "read_fileread_file".
+        Assignment (matching OpenAI Node SDK / LiteLLM) prevents this.
+        """
+        from run_agent import AIAgent
+
+        chunks = [
+            _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_nim", name="read_file")
+            ]),
+            _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_nim", name="read_file", arguments='{"path":')
+            ]),
+            _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_nim", name="read_file", arguments=' "x.py"}')
+            ]),
+            _make_stream_chunk(finish_reason="tool_calls"),
+        ]
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.return_value = iter(chunks)
+        mock_create.return_value = mock_client
+
+        agent = AIAgent(
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+
+        response = agent._interruptible_streaming_api_call({})
+
+        tc = response.choices[0].message.tool_calls
+        assert tc is not None
+        assert len(tc) == 1
+        assert tc[0].function.name == "read_file"
+        assert tc[0].function.arguments == '{"path": "x.py"}'
+
     @patch("run_agent.AIAgent._create_request_openai_client")
     @patch("run_agent.AIAgent._close_request_openai_client")
     def test_tool_call_extra_content_preserved(self, mock_close, mock_create):
@@ -952,3 +996,138 @@ class TestAnthropicStreamCallbacks:
         agent._interruptible_streaming_api_call({})
 
         assert touch_calls.count("receiving stream response") == len(events)
+
+
+class TestPartialToolCallWarning:
+    """Regression: when a stream dies mid tool-call argument generation after
+    text was already delivered, the partial-stream stub at run_agent.py
+    line ~6107 used to silently set ``tool_calls=None`` and return
+    ``finish_reason=stop``, losing the attempted action with zero user-facing
+    signal.  Live-observed Apr 2026 with MiniMax M2.7 on a 6-minute audit
+    task — agent streamed commentary, emitted a write_file tool call,
+    MiniMax stalled for 240 s mid-arguments, stale-stream detector killed
+    the connection, the stub returned, session ended with no file written
+    and no error shown.
+
+    Fix: when the stream accumulator captured any tool-call names before the
+    error, the stub now appends a user-visible warning to content AND fires
+    it as a stream delta so the user sees it immediately.
+    """
+
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_partial_tool_call_surfaces_warning(self, mock_close, mock_create):
+        """Stream with text + partial tool-call name + mid-stream error
+        produces a stub whose content contains the user-visible warning
+        and whose tool_calls is None."""
+        from run_agent import AIAgent
+
+        class _StallError(RuntimeError):
+            pass
+
+        def _stalling_stream():
+            yield _make_stream_chunk(content="Let me write the audit: ")
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_1", name="write_file"),
+            ])
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, arguments='{"path": "/tmp/x", '),
+            ])
+            raise _StallError("simulated upstream stall")
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = lambda *a, **kw: _stalling_stream()
+        mock_create.return_value = mock_client
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+
+        fired_deltas: list = []
+        agent._fire_stream_delta = lambda text: fired_deltas.append(text)
+        agent._current_streamed_assistant_text = "Let me write the audit: "
+
+        import os as _os
+        _prev = _os.environ.get("HERMES_STREAM_RETRIES")
+        _os.environ["HERMES_STREAM_RETRIES"] = "0"
+        try:
+            response = agent._interruptible_streaming_api_call({})
+        finally:
+            if _prev is None:
+                _os.environ.pop("HERMES_STREAM_RETRIES", None)
+            else:
+                _os.environ["HERMES_STREAM_RETRIES"] = _prev
+
+        content = response.choices[0].message.content or ""
+        assert "Let me write the audit:" in content, (
+            f"Partial text not preserved in stub: {content!r}"
+        )
+        assert "Stream stalled mid tool-call" in content, (
+            f"Stub content is missing the dropped-tool-call warning; users "
+            f"get silent failure.  Got content={content!r}"
+        )
+        assert "write_file" in content, (
+            f"Warning should name the dropped tool. Got: {content!r}"
+        )
+        assert response.choices[0].message.tool_calls is None
+        assert any("Stream stalled mid tool-call" in d for d in fired_deltas), (
+            f"Warning was not surfaced as a live stream delta. "
+            f"fired_deltas={fired_deltas}"
+        )
+
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_partial_text_only_no_warning(self, mock_close, mock_create):
+        """Text-only partial stream (no tool call mid-flight) keeps the
+        pre-fix behaviour: bare recovered text, no warning noise."""
+        from run_agent import AIAgent
+
+        class _StallError(RuntimeError):
+            pass
+
+        def _stalling_stream():
+            yield _make_stream_chunk(content="Here's my answer so far")
+            raise _StallError("simulated upstream stall")
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = lambda *a, **kw: _stalling_stream()
+        mock_create.return_value = mock_client
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+        agent._current_streamed_assistant_text = "Here's my answer so far"
+
+        import os as _os
+        _prev = _os.environ.get("HERMES_STREAM_RETRIES")
+        _os.environ["HERMES_STREAM_RETRIES"] = "0"
+        try:
+            response = agent._interruptible_streaming_api_call({})
+        finally:
+            if _prev is None:
+                _os.environ.pop("HERMES_STREAM_RETRIES", None)
+            else:
+                _os.environ["HERMES_STREAM_RETRIES"] = _prev
+
+        content = response.choices[0].message.content or ""
+        assert content == "Here's my answer so far", (
+            f"Pre-fix behaviour regressed for text-only partial streams: {content!r}"
+        )
+        assert "Stream stalled" not in content, (
+            f"Unexpected warning on text-only partial stream: {content!r}"
+        )
+
diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py
index 5f9a16a529c..d54d7b9fb0f 100644
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -479,6 +479,141 @@ class TestFTS5Search:
         assert s('my-app.config.ts') == '"my-app.config.ts"'
 
 
+# =========================================================================
+# CJK (Chinese/Japanese/Korean) LIKE fallback
+# =========================================================================
+
+class TestCJKSearchFallback:
+    """Regression tests for CJK search (see #11511).
+
+    SQLite FTS5's default tokenizer treats contiguous CJK runs as a single
+    token ("和其他agent的聊天记录" → one token), so substring queries like
+    "记忆断裂" return 0 rows despite the data being present. SessionDB falls
+    back to LIKE substring matching whenever FTS5 returns no results and
+    the query contains CJK characters.
+    """
+
+    def test_cjk_detection_covers_all_ranges(self):
+        from hermes_state import SessionDB
+        f = SessionDB._contains_cjk
+        # Chinese (CJK Unified Ideographs)
+        assert f("记忆断裂") is True
+        # Japanese Hiragana + Katakana
+        assert f("こんにちは") is True
+        assert f("カタカナ") is True
+        # Korean Hangul syllables (both early and late — guards against
+        # the \ud7a0-\ud7af typo seen in one of the duplicate PRs)
+        assert f("안녕하세요") is True
+        assert f("기억") is True
+        # Non-CJK
+        assert f("hello world") is False
+        assert f("日本語mixedwithenglish") is True
+        assert f("") is False
+
+    def test_chinese_multichar_query_returns_results(self, db):
+        """The headline bug: multi-char Chinese query must not return []."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message(
+            "s1", role="user",
+            content="昨天和其他Agent的聊天记录，记忆断裂问题复现了",
+        )
+        results = db.search_messages("记忆断裂")
+        assert len(results) == 1
+        assert results[0]["session_id"] == "s1"
+
+    def test_chinese_bigram_query(self, db):
+        db.create_session(session_id="s1", source="telegram")
+        db.append_message("s1", role="user", content="今天讨论A2A通信协议的实现")
+        results = db.search_messages("通信")
+        assert len(results) == 1
+
+    def test_korean_query_returns_results(self, db):
+        """Guards against Hangul range typos (\\uac00-\\ud7af, not \\ud7a0-)."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message("s1", role="user", content="안녕하세요 반갑습니다")
+        results = db.search_messages("안녕")
+        assert len(results) == 1
+
+    def test_japanese_query_returns_results(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.append_message("s1", role="user", content="こんにちは世界")
+        assert len(db.search_messages("こんにちは")) == 1
+        assert len(db.search_messages("世界")) == 1
+
+    def test_cjk_fallback_preserves_source_filter(self, db):
+        """Guards against the SQL-builder bug where filter clauses land
+        after LIMIT/OFFSET (seen in one of the duplicate PRs)."""
+        db.create_session(session_id="s1", source="cli")
+        db.create_session(session_id="s2", source="telegram")
+        db.append_message("s1", role="user", content="记忆断裂在CLI")
+        db.append_message("s2", role="user", content="记忆断裂在Telegram")
+
+        results = db.search_messages("记忆断裂", source_filter=["telegram"])
+        assert len(results) == 1
+        assert results[0]["source"] == "telegram"
+
+    def test_cjk_fallback_preserves_exclude_sources(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.create_session(session_id="s2", source="tool")
+        db.append_message("s1", role="user", content="记忆断裂在CLI")
+        db.append_message("s2", role="assistant", content="记忆断裂在tool")
+
+        results = db.search_messages("记忆断裂", exclude_sources=["tool"])
+        sources = {r["source"] for r in results}
+        assert "tool" not in sources
+        assert "cli" in sources
+
+    def test_cjk_fallback_preserves_role_filter(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.append_message("s1", role="user", content="用户说的记忆断裂")
+        db.append_message("s1", role="assistant", content="助手说的记忆断裂")
+
+        results = db.search_messages("记忆断裂", role_filter=["assistant"])
+        assert len(results) == 1
+        assert results[0]["role"] == "assistant"
+
+    def test_cjk_snippet_is_centered_on_match(self, db):
+        """Snippet should contain the search term, not just the first N chars."""
+        db.create_session(session_id="s1", source="cli")
+        long_prefix = "这是一段很长的前缀用来把匹配位置推到文档中间" * 3
+        long_suffix = "这是一段很长的后缀内容填充剩余空间" * 3
+        db.append_message(
+            "s1", role="user",
+            content=f"{long_prefix}记忆断裂{long_suffix}",
+        )
+        results = db.search_messages("记忆断裂")
+        assert len(results) == 1
+        # The centered substr() snippet must include the matched term.
+        assert "记忆断裂" in results[0]["snippet"]
+
+    def test_english_query_still_uses_fts5_fast_path(self, db):
+        """English queries must not trigger the LIKE fallback (fast path regression)."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message("s1", role="user", content="Deploy docker containers")
+        results = db.search_messages("docker")
+        assert len(results) == 1
+        # No CJK in query → LIKE fallback must not run. We don't assert this
+        # directly (no instrumentation), but the FTS5 path produces an
+        # FTS5-style snippet with highlight markers when the term is short.
+        # At minimum: english queries must still match.
+
+    def test_cjk_query_with_no_matches_returns_empty(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.append_message("s1", role="user", content="unrelated English content")
+        results = db.search_messages("记忆断裂")
+        assert results == []
+
+    def test_mixed_cjk_english_query(self, db):
+        """Mixed queries should still fall back to LIKE when FTS5 misses."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message("s1", role="user", content="讨论Agent通信协议")
+        # "Agent通信" is CJK+English — FTS5 default tokenizer indexes the
+        # whole CJK run with embedded "agent" as separate tokens; the LIKE
+        # fallback handles the substring correctly.
+        results = db.search_messages("Agent通信")
+        assert len(results) == 1
+
+
 # =========================================================================
 # Session search and listing
 # =========================================================================
diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index e7681b784cf..533516b95da 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -363,6 +363,28 @@ def test_image_attach_appends_local_image(monkeypatch):
     assert len(server._sessions["sid"]["attached_images"]) == 1
 
 
+def test_commands_catalog_surfaces_quick_commands(monkeypatch):
+    monkeypatch.setattr(server, "_load_cfg", lambda: {"quick_commands": {
+        "build": {"type": "exec", "command": "npm run build"},
+        "git": {"type": "alias", "target": "/shell git"},
+        "notes": {"type": "exec", "command": "cat NOTES.md", "description": "Open design notes"},
+    }})
+
+    resp = server.handle_request({"id": "1", "method": "commands.catalog", "params": {}})
+
+    pairs = dict(resp["result"]["pairs"])
+    assert "npm run build" in pairs["/build"]
+    assert pairs["/git"].startswith("alias →")
+    assert pairs["/notes"] == "Open design notes"
+
+    user_cat = next(c for c in resp["result"]["categories"] if c["name"] == "User commands")
+    user_pairs = dict(user_cat["pairs"])
+    assert set(user_pairs) == {"/build", "/git", "/notes"}
+
+    assert resp["result"]["canon"]["/build"] == "/build"
+    assert resp["result"]["canon"]["/notes"] == "/notes"
+
+
 def test_command_dispatch_exec_nonzero_surfaces_error(monkeypatch):
     monkeypatch.setattr(server, "_load_cfg", lambda: {"quick_commands": {"boom": {"type": "exec", "command": "boom"}}})
     monkeypatch.setattr(
@@ -438,3 +460,651 @@ def test_rollback_restore_resolves_number_and_file_path():
     assert resp["result"]["success"] is True
     assert calls["args"][1] == "bbb222"
     assert calls["args"][2] == "src/app.tsx"
+
+
+# ── session.steer ────────────────────────────────────────────────────
+
+
+def test_session_steer_calls_agent_steer_when_agent_supports_it():
+    """The TUI RPC method must call agent.steer(text) and return a
+    queued status without touching interrupt state.
+    """
+    calls = {}
+
+    class _Agent:
+        def steer(self, text):
+            calls["steer_text"] = text
+            return True
+
+        def interrupt(self, *args, **kwargs):
+            calls["interrupt_called"] = True
+
+    server._sessions["sid"] = _session(agent=_Agent())
+    try:
+        resp = server.handle_request(
+            {
+                "id": "1",
+                "method": "session.steer",
+                "params": {"session_id": "sid", "text": "also check auth.log"},
+            }
+        )
+    finally:
+        server._sessions.pop("sid", None)
+
+    assert "result" in resp, resp
+    assert resp["result"]["status"] == "queued"
+    assert resp["result"]["text"] == "also check auth.log"
+    assert calls["steer_text"] == "also check auth.log"
+    assert "interrupt_called" not in calls  # must NOT interrupt
+
+
+def test_session_steer_rejects_empty_text():
+    server._sessions["sid"] = _session(agent=types.SimpleNamespace(steer=lambda t: True))
+    try:
+        resp = server.handle_request(
+            {
+                "id": "1",
+                "method": "session.steer",
+                "params": {"session_id": "sid", "text": "   "},
+            }
+        )
+    finally:
+        server._sessions.pop("sid", None)
+
+    assert "error" in resp, resp
+    assert resp["error"]["code"] == 4002
+
+
+def test_session_steer_errors_when_agent_has_no_steer_method():
+    server._sessions["sid"] = _session(agent=types.SimpleNamespace())  # no steer()
+    try:
+        resp = server.handle_request(
+            {
+                "id": "1",
+                "method": "session.steer",
+                "params": {"session_id": "sid", "text": "hi"},
+            }
+        )
+    finally:
+        server._sessions.pop("sid", None)
+
+    assert "error" in resp, resp
+    assert resp["error"]["code"] == 4010
+
+
+def test_session_info_includes_mcp_servers(monkeypatch):
+    fake_status = [
+        {"name": "github", "transport": "http", "tools": 12, "connected": True},
+        {"name": "filesystem", "transport": "stdio", "tools": 4, "connected": True},
+        {"name": "broken", "transport": "stdio", "tools": 0, "connected": False},
+    ]
+    fake_mod = types.ModuleType("tools.mcp_tool")
+    fake_mod.get_mcp_status = lambda: fake_status
+    monkeypatch.setitem(sys.modules, "tools.mcp_tool", fake_mod)
+
+    info = server._session_info(types.SimpleNamespace(tools=[], model=""))
+
+    assert info["mcp_servers"] == fake_status
+
+
+# ---------------------------------------------------------------------------
+# History-mutating commands must reject while session.running is True.
+# Without these guards, prompt.submit's post-run history write either
+# clobbers the mutation (version matches) or silently drops the agent's
+# output (version mismatch) — both produce UI<->backend state desync.
+# ---------------------------------------------------------------------------
+
+
+def test_session_undo_rejects_while_running():
+    """Fix for TUI silent-drop #1: /undo must not mutate history
+    while the agent is mid-turn — would either clobber the undo or
+    cause prompt.submit to silently drop the agent's response."""
+    server._sessions["sid"] = _session(running=True, history=[
+        {"role": "user", "content": "hi"},
+        {"role": "assistant", "content": "hello"},
+    ])
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("error"), "session.undo should reject while running"
+        assert resp["error"]["code"] == 4009
+        assert "session busy" in resp["error"]["message"]
+        # History must be unchanged
+        assert len(server._sessions["sid"]["history"]) == 2
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_session_undo_allowed_when_idle():
+    """Regression guard: when not running, /undo still works."""
+    server._sessions["sid"] = _session(running=False, history=[
+        {"role": "user", "content": "hi"},
+        {"role": "assistant", "content": "hello"},
+    ])
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("result"), f"got error: {resp.get('error')}"
+        assert resp["result"]["removed"] == 2
+        assert server._sessions["sid"]["history"] == []
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_session_compress_rejects_while_running(monkeypatch):
+    server._sessions["sid"] = _session(running=True)
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "session.compress", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("error")
+        assert resp["error"]["code"] == 4009
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_rollback_restore_rejects_full_history_while_running(monkeypatch):
+    """Full-history rollback must reject; file-scoped rollback still allowed."""
+    server._sessions["sid"] = _session(running=True)
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "rollback.restore", "params": {"session_id": "sid", "hash": "abc"}}
+        )
+        assert resp.get("error"), "full-history rollback should reject while running"
+        assert resp["error"]["code"] == 4009
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_prompt_submit_history_version_mismatch_surfaces_warning(monkeypatch):
+    """Fix for TUI silent-drop #2: the defensive backstop at prompt.submit
+    must attach a 'warning' to message.complete when history was
+    mutated externally during the turn (instead of silently dropping
+    the agent's output)."""
+    # Agent bumps history_version itself mid-run to simulate an external
+    # mutation slipping past the guards.
+    session_ref = {"s": None}
+
+    class _RacyAgent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            # Simulate: something external bumped history_version
+            # while we were running.
+            with session_ref["s"]["history_lock"]:
+                session_ref["s"]["history_version"] += 1
+            return {"final_response": "agent reply", "messages": [{"role": "assistant", "content": "agent reply"}]}
+
+    class _ImmediateThread:
+        def __init__(self, target=None, daemon=None):
+            self._target = target
+
+        def start(self):
+            self._target()
+
+    server._sessions["sid"] = _session(agent=_RacyAgent())
+    session_ref["s"] = server._sessions["sid"]
+    emits: list[tuple] = []
+    try:
+        monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+        monkeypatch.setattr(server, "_get_usage", lambda _a: {})
+        monkeypatch.setattr(server, "render_message", lambda _t, _c: "")
+        monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a))
+
+        resp = server.handle_request(
+            {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}}
+        )
+        assert resp.get("result"), f"got error: {resp.get('error')}"
+
+        # History should NOT contain the agent's output (version mismatch)
+        assert server._sessions["sid"]["history"] == []
+
+        # message.complete must carry a 'warning' so the UI / operator
+        # knows the output was not persisted.
+        complete_calls = [a for a in emits if a[0] == "message.complete"]
+        assert len(complete_calls) == 1
+        _, _, payload = complete_calls[0]
+        assert "warning" in payload, (
+            "message.complete must include a 'warning' field on "
+            "history_version mismatch — otherwise the UI silently "
+            "shows output that was never persisted"
+        )
+        assert "not saved" in payload["warning"].lower() or "changed" in payload["warning"].lower()
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_prompt_submit_history_version_match_persists_normally(monkeypatch):
+    """Regression guard: the backstop does not affect the happy path."""
+    class _Agent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            return {"final_response": "reply", "messages": [{"role": "assistant", "content": "reply"}]}
+
+    class _ImmediateThread:
+        def __init__(self, target=None, daemon=None):
+            self._target = target
+
+        def start(self):
+            self._target()
+
+    server._sessions["sid"] = _session(agent=_Agent())
+    emits: list[tuple] = []
+    try:
+        monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+        monkeypatch.setattr(server, "_get_usage", lambda _a: {})
+        monkeypatch.setattr(server, "render_message", lambda _t, _c: "")
+        monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a))
+
+        resp = server.handle_request(
+            {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}}
+        )
+        assert resp.get("result")
+
+        # History was written
+        assert server._sessions["sid"]["history"] == [{"role": "assistant", "content": "reply"}]
+        assert server._sessions["sid"]["history_version"] == 1
+
+        # No warning should be attached
+        complete_calls = [a for a in emits if a[0] == "message.complete"]
+        assert len(complete_calls) == 1
+        _, _, payload = complete_calls[0]
+        assert "warning" not in payload
+    finally:
+        server._sessions.pop("sid", None)
+
+
+# ---------------------------------------------------------------------------
+# session.interrupt must only cancel pending prompts owned by the calling
+# session — it must not blast-resolve clarify/sudo/secret prompts on
+# unrelated sessions sharing the same tui_gateway process.  Without
+# session scoping the other sessions' prompts silently resolve to empty
+# strings, unblocking their agent threads as if the user cancelled.
+# ---------------------------------------------------------------------------
+
+
+def test_interrupt_only_clears_own_session_pending():
+    """session.interrupt on session A must NOT release pending prompts
+    that belong to session B."""
+    import types
+
+    session_a = _session()
+    session_a["agent"] = types.SimpleNamespace(interrupt=lambda: None)
+    session_b = _session()
+    session_b["agent"] = types.SimpleNamespace(interrupt=lambda: None)
+    server._sessions["sid_a"] = session_a
+    server._sessions["sid_b"] = session_b
+
+    try:
+        # Simulate pending prompts on both sessions (what _block creates
+        # while a clarify/sudo/secret request is outstanding).
+        ev_a = threading.Event()
+        ev_b = threading.Event()
+        server._pending["rid-a"] = ("sid_a", ev_a)
+        server._pending["rid-b"] = ("sid_b", ev_b)
+        server._answers.clear()
+
+        # Interrupt session A.
+        resp = server.handle_request(
+            {"id": "1", "method": "session.interrupt", "params": {"session_id": "sid_a"}}
+        )
+        assert resp.get("result"), f"got error: {resp.get('error')}"
+
+        # Session A's pending must be released to empty.
+        assert ev_a.is_set(), "sid_a pending Event should be set after interrupt"
+        assert server._answers.get("rid-a") == ""
+
+        # Session B's pending MUST remain untouched — no cross-session blast.
+        assert not ev_b.is_set(), (
+            "CRITICAL: session.interrupt on sid_a released a pending prompt "
+            "belonging to sid_b — other sessions' clarify/sudo/secret "
+            "prompts are being silently cancelled"
+        )
+        assert "rid-b" not in server._answers
+    finally:
+        server._sessions.pop("sid_a", None)
+        server._sessions.pop("sid_b", None)
+        server._pending.pop("rid-a", None)
+        server._pending.pop("rid-b", None)
+        server._answers.pop("rid-a", None)
+        server._answers.pop("rid-b", None)
+
+
+def test_interrupt_clears_multiple_own_pending():
+    """When a single session has multiple pending prompts (uncommon but
+    possible via nested tool calls), interrupt must release all of them."""
+    import types
+
+    sess = _session()
+    sess["agent"] = types.SimpleNamespace(interrupt=lambda: None)
+    server._sessions["sid"] = sess
+
+    try:
+        ev1, ev2 = threading.Event(), threading.Event()
+        server._pending["r1"] = ("sid", ev1)
+        server._pending["r2"] = ("sid", ev2)
+
+        resp = server.handle_request(
+            {"id": "1", "method": "session.interrupt", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("result")
+        assert ev1.is_set() and ev2.is_set()
+        assert server._answers.get("r1") == "" and server._answers.get("r2") == ""
+    finally:
+        server._sessions.pop("sid", None)
+        for key in ("r1", "r2"):
+            server._pending.pop(key, None)
+            server._answers.pop(key, None)
+
+
+def test_clear_pending_without_sid_clears_all():
+    """_clear_pending(None) is the shutdown path — must still release
+    every pending prompt regardless of owning session."""
+    ev1, ev2, ev3 = threading.Event(), threading.Event(), threading.Event()
+    server._pending["a"] = ("sid_x", ev1)
+    server._pending["b"] = ("sid_y", ev2)
+    server._pending["c"] = ("sid_z", ev3)
+    try:
+        server._clear_pending(None)
+        assert ev1.is_set() and ev2.is_set() and ev3.is_set()
+    finally:
+        for key in ("a", "b", "c"):
+            server._pending.pop(key, None)
+            server._answers.pop(key, None)
+
+
+def test_respond_unpacks_sid_tuple_correctly():
+    """After the (sid, Event) tuple change, _respond must still work."""
+    ev = threading.Event()
+    server._pending["rid-x"] = ("sid_x", ev)
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "clarify.respond",
+             "params": {"request_id": "rid-x", "answer": "the answer"}}
+        )
+        assert resp.get("result")
+        assert ev.is_set()
+        assert server._answers.get("rid-x") == "the answer"
+    finally:
+        server._pending.pop("rid-x", None)
+        server._answers.pop("rid-x", None)
+
+
+
+# ---------------------------------------------------------------------------
+# /model switch and other agent-mutating commands must reject while the
+# session is running.  agent.switch_model() mutates self.model, self.provider,
+# self.base_url, self.client etc. in place — the worker thread running
+# agent.run_conversation is reading those on every iteration.  Same class of
+# bug as the session.undo / session.compress mid-run silent-drop; same fix
+# pattern: reject with 4009 while running.
+# ---------------------------------------------------------------------------
+
+
+def test_config_set_model_rejects_while_running(monkeypatch):
+    """/model via config.set must reject during an in-flight turn."""
+    seen = {"called": False}
+
+    def _fake_apply(sid, session, raw):
+        seen["called"] = True
+        return {"value": raw, "warning": ""}
+
+    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply)
+
+    server._sessions["sid"] = _session(running=True)
+    try:
+        resp = server.handle_request({
+            "id": "1", "method": "config.set",
+            "params": {"session_id": "sid", "key": "model", "value": "anthropic/claude-sonnet-4.6"},
+        })
+        assert resp.get("error")
+        assert resp["error"]["code"] == 4009
+        assert "session busy" in resp["error"]["message"]
+        assert not seen["called"], (
+            "_apply_model_switch was called mid-turn — would race with "
+            "the worker thread reading agent.model / agent.client"
+        )
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_config_set_model_allowed_when_idle(monkeypatch):
+    """Regression guard: idle sessions can still switch models."""
+    seen = {"called": False}
+
+    def _fake_apply(sid, session, raw):
+        seen["called"] = True
+        return {"value": "newmodel", "warning": ""}
+
+    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply)
+
+    server._sessions["sid"] = _session(running=False)
+    try:
+        resp = server.handle_request({
+            "id": "1", "method": "config.set",
+            "params": {"session_id": "sid", "key": "model", "value": "newmodel"},
+        })
+        assert resp.get("result")
+        assert resp["result"]["value"] == "newmodel"
+        assert seen["called"]
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_mirror_slash_side_effects_rejects_mutating_commands_while_running(monkeypatch):
+    """Slash worker passthrough (e.g. /model, /personality, /prompt,
+    /compress) must reject during an in-flight turn.  Same race as
+    config.set — mutates live agent state while run_conversation is
+    reading it."""
+    import types
+
+    applied = {"model": False, "compress": False}
+
+    def _fake_apply_model(sid, session, arg):
+        applied["model"] = True
+        return {"value": arg, "warning": ""}
+
+    def _fake_compress(session, focus):
+        applied["compress"] = True
+        return (0, {})
+
+    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply_model)
+    monkeypatch.setattr(server, "_compress_session_history", _fake_compress)
+
+    session = _session(running=True)
+    session["agent"] = types.SimpleNamespace(model="x")
+
+    for cmd, expected_name in [
+        ("/model new/model", "model"),
+        ("/personality default", "personality"),
+        ("/prompt", "prompt"),
+        ("/compress", "compress"),
+    ]:
+        warning = server._mirror_slash_side_effects("sid", session, cmd)
+        assert "session busy" in warning, (
+            f"{cmd} should have returned busy warning, got: {warning!r}"
+        )
+        assert f"/{expected_name}" in warning
+
+    # None of the mutating side-effect helpers should have fired.
+    assert not applied["model"], "model switch fired despite running session"
+    assert not applied["compress"], "compress fired despite running session"
+
+
+def test_mirror_slash_side_effects_allowed_when_idle(monkeypatch):
+    """Regression guard: idle session still runs the side effects."""
+    import types
+
+    applied = {"model": False}
+
+    def _fake_apply_model(sid, session, arg):
+        applied["model"] = True
+        return {"value": arg, "warning": ""}
+
+    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply_model)
+
+    session = _session(running=False)
+    session["agent"] = types.SimpleNamespace(model="x")
+
+    warning = server._mirror_slash_side_effects("sid", session, "/model foo")
+    # Should NOT contain "session busy" — the switch went through.
+    assert "session busy" not in warning
+    assert applied["model"]
+
+
+# ---------------------------------------------------------------------------
+# session.create / session.close race: fast /new churn must not orphan the
+# slash_worker subprocess or the global approval-notify registration.
+# ---------------------------------------------------------------------------
+
+
+def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
+    """Regression guard: if session.close runs while session.create's
+    _build thread is still constructing the agent, the build thread
+    must detect the orphan and clean up the slash_worker + notify
+    registration it's about to install.  Without the cleanup those
+    resources leak — the subprocess stays alive until atexit and the
+    notify callback lingers in the global registry."""
+    import threading
+
+    closed_workers: list[str] = []
+    unregistered_keys: list[str] = []
+
+    class _FakeWorker:
+        def __init__(self, key, model):
+            self.key = key
+            self._closed = False
+
+        def close(self):
+            self._closed = True
+            closed_workers.append(self.key)
+
+    class _FakeAgent:
+        def __init__(self):
+            self.model = "x"
+            self.provider = "openrouter"
+            self.base_url = ""
+            self.api_key = ""
+
+    # Make _build block until we release it — simulates slow agent init
+    release_build = threading.Event()
+
+    def _slow_make_agent(sid, key):
+        release_build.wait(timeout=3.0)
+        return _FakeAgent()
+
+    # Stub everything _build touches
+    monkeypatch.setattr(server, "_make_agent", _slow_make_agent)
+    monkeypatch.setattr(server, "_SlashWorker", _FakeWorker)
+    monkeypatch.setattr(server, "_get_db", lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None))
+    monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"})
+    monkeypatch.setattr(server, "_probe_credentials", lambda _a: None)
+    monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None)
+    monkeypatch.setattr(server, "_emit", lambda *a, **kw: None)
+
+    # Shim register/unregister to observe leaks
+    import tools.approval as _approval
+    monkeypatch.setattr(_approval, "register_gateway_notify",
+                        lambda key, cb: None)
+    monkeypatch.setattr(_approval, "unregister_gateway_notify",
+                        lambda key: unregistered_keys.append(key))
+    monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None)
+
+    # Start: session.create spawns _build thread, returns synchronously
+    resp = server.handle_request({
+        "id": "1", "method": "session.create", "params": {"cols": 80},
+    })
+    assert resp.get("result"), f"got error: {resp.get('error')}"
+    sid = resp["result"]["session_id"]
+
+    # Build thread is blocked in _slow_make_agent.  Close the session
+    # NOW — this pops _sessions[sid] before _build can install the
+    # worker/notify.
+    close_resp = server.handle_request({
+        "id": "2", "method": "session.close", "params": {"session_id": sid},
+    })
+    assert close_resp.get("result", {}).get("closed") is True
+
+    # At this point session.close saw slash_worker=None (not yet
+    # installed) so it didn't close anything.  Release the build thread
+    # and let it finish — it should detect the orphan and clean up the
+    # worker it just allocated + unregister the notify.
+    release_build.set()
+
+    # Give the build thread a moment to run through its finally.
+    for _ in range(100):
+        if closed_workers:
+            break
+        import time
+        time.sleep(0.02)
+
+    assert len(closed_workers) == 1, (
+        f"orphan worker was not cleaned up — closed_workers={closed_workers}"
+    )
+    # Notify may be unregistered by both session.close (unconditional)
+    # and the orphan-cleanup path; the key guarantee is that the build
+    # thread does at least one unregister call (any prior close
+    # already popped the callback; the duplicate is a no-op).
+    assert len(unregistered_keys) >= 1, (
+        f"orphan notify registration was not unregistered — "
+        f"unregistered_keys={unregistered_keys}"
+    )
+
+
+def test_session_create_no_race_keeps_worker_alive(monkeypatch):
+    """Regression guard: when session.close does NOT race, the build
+    thread must install the worker + notify normally and leave them
+    alone (no over-eager cleanup)."""
+    closed_workers: list[str] = []
+    unregistered_keys: list[str] = []
+
+    class _FakeWorker:
+        def __init__(self, key, model):
+            self.key = key
+
+        def close(self):
+            closed_workers.append(self.key)
+
+    class _FakeAgent:
+        def __init__(self):
+            self.model = "x"
+            self.provider = "openrouter"
+            self.base_url = ""
+            self.api_key = ""
+
+    monkeypatch.setattr(server, "_make_agent", lambda sid, key: _FakeAgent())
+    monkeypatch.setattr(server, "_SlashWorker", _FakeWorker)
+    monkeypatch.setattr(server, "_get_db", lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None))
+    monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"})
+    monkeypatch.setattr(server, "_probe_credentials", lambda _a: None)
+    monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None)
+    monkeypatch.setattr(server, "_emit", lambda *a, **kw: None)
+
+    import tools.approval as _approval
+    monkeypatch.setattr(_approval, "register_gateway_notify", lambda key, cb: None)
+    monkeypatch.setattr(_approval, "unregister_gateway_notify",
+                        lambda key: unregistered_keys.append(key))
+    monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None)
+
+    resp = server.handle_request({
+        "id": "1", "method": "session.create", "params": {"cols": 80},
+    })
+    sid = resp["result"]["session_id"]
+
+    # Wait for the build to finish (ready event inside session dict).
+    session = server._sessions[sid]
+    session["agent_ready"].wait(timeout=2.0)
+
+    # Build finished without a close race — nothing should have been
+    # cleaned up by the orphan check.
+    assert closed_workers == [], (
+        f"build thread closed its own worker despite no race: {closed_workers}"
+    )
+    assert unregistered_keys == [], (
+        f"build thread unregistered its own notify despite no race: {unregistered_keys}"
+    )
+
+    # Session should have the live worker installed.
+    assert session.get("slash_worker") is not None
+
+    # Cleanup
+    server._sessions.pop(sid, None)
diff --git a/tests/tools/test_browser_cdp_tool.py b/tests/tools/test_browser_cdp_tool.py
new file mode 100644
index 00000000000..e7e187ceb0b
--- /dev/null
+++ b/tests/tools/test_browser_cdp_tool.py
@@ -0,0 +1,408 @@
+"""Unit tests for browser_cdp tool.
+
+Uses a tiny in-process ``websockets`` server to simulate a CDP endpoint —
+gives real protocol coverage (connect, send, recv, close) without needing
+a real Chrome instance.
+"""
+from __future__ import annotations
+
+import asyncio
+import json
+import threading
+import time
+from typing import Any, Dict, List
+
+import pytest
+
+import websockets
+from websockets.asyncio.server import serve
+
+from tools import browser_cdp_tool
+
+
+# ---------------------------------------------------------------------------
+# In-process CDP mock server
+# ---------------------------------------------------------------------------
+
+
+class _CDPServer:
+    """A tiny CDP-over-WebSocket mock.
+
+    Each client gets a greeting-free stream.  The server replies to each
+    inbound request whose ``id`` is set, using the registered handler for
+    that method.  If no handler is registered, returns a generic CDP error.
+    """
+
+    def __init__(self) -> None:
+        self._handlers: Dict[str, Any] = {}
+        self._responses: List[Dict[str, Any]] = []
+        self._loop: asyncio.AbstractEventLoop | None = None
+        self._server: Any = None
+        self._thread: threading.Thread | None = None
+        self._host = "127.0.0.1"
+        self._port = 0
+
+    # --- handler registration --------------------------------------------
+
+    def on(self, method: str, handler):
+        """Register a handler ``handler(params, session_id) -> dict or Exception``."""
+        self._handlers[method] = handler
+
+    # --- lifecycle -------------------------------------------------------
+
+    def start(self) -> str:
+        ready = threading.Event()
+
+        def _run() -> None:
+            self._loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(self._loop)
+
+            async def _handler(ws):
+                try:
+                    async for raw in ws:
+                        msg = json.loads(raw)
+                        call_id = msg.get("id")
+                        method = msg.get("method", "")
+                        params = msg.get("params", {}) or {}
+                        session_id = msg.get("sessionId")
+                        self._responses.append(msg)
+
+                        fn = self._handlers.get(method)
+                        if fn is None:
+                            reply = {
+                                "id": call_id,
+                                "error": {
+                                    "code": -32601,
+                                    "message": f"No handler for {method}",
+                                },
+                            }
+                        else:
+                            try:
+                                result = fn(params, session_id)
+                                if isinstance(result, Exception):
+                                    raise result
+                                reply = {"id": call_id, "result": result}
+                            except Exception as exc:
+                                reply = {
+                                    "id": call_id,
+                                    "error": {"code": -1, "message": str(exc)},
+                                }
+                        if session_id:
+                            reply["sessionId"] = session_id
+                        await ws.send(json.dumps(reply))
+                except websockets.exceptions.ConnectionClosed:
+                    pass
+
+            async def _serve() -> None:
+                self._server = await serve(_handler, self._host, 0)
+                sock = next(iter(self._server.sockets))
+                self._port = sock.getsockname()[1]
+                ready.set()
+                await self._server.wait_closed()
+
+            try:
+                self._loop.run_until_complete(_serve())
+            finally:
+                self._loop.close()
+
+        self._thread = threading.Thread(target=_run, daemon=True)
+        self._thread.start()
+        if not ready.wait(timeout=5.0):
+            raise RuntimeError("CDP mock server failed to start within 5s")
+        return f"ws://{self._host}:{self._port}/devtools/browser/mock"
+
+    def stop(self) -> None:
+        if self._loop and self._server:
+            def _close() -> None:
+                self._server.close()
+
+            self._loop.call_soon_threadsafe(_close)
+        if self._thread:
+            self._thread.join(timeout=3.0)
+
+    def received(self) -> List[Dict[str, Any]]:
+        return list(self._responses)
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def cdp_server(monkeypatch):
+    """Start a CDP mock and route tool resolution to it."""
+    server = _CDPServer()
+    ws_url = server.start()
+    monkeypatch.setattr(
+        browser_cdp_tool, "_resolve_cdp_endpoint", lambda: ws_url
+    )
+    try:
+        yield server
+    finally:
+        server.stop()
+
+
+# ---------------------------------------------------------------------------
+# Input validation
+# ---------------------------------------------------------------------------
+
+
+def test_missing_method_returns_error():
+    result = json.loads(browser_cdp_tool.browser_cdp(method=""))
+    assert "error" in result
+    assert "method" in result["error"].lower()
+    assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL
+
+
+def test_non_string_method_returns_error():
+    result = json.loads(browser_cdp_tool.browser_cdp(method=123))  # type: ignore[arg-type]
+    assert "error" in result
+    assert "method" in result["error"].lower()
+
+
+def test_non_dict_params_returns_error(monkeypatch):
+    monkeypatch.setattr(
+        browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "ws://localhost:9999"
+    )
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(method="Target.getTargets", params="not-a-dict")  # type: ignore[arg-type]
+    )
+    assert "error" in result
+    assert "object" in result["error"].lower() or "dict" in result["error"].lower()
+
+
+# ---------------------------------------------------------------------------
+# Endpoint resolution
+# ---------------------------------------------------------------------------
+
+
+def test_no_endpoint_returns_helpful_error(monkeypatch):
+    monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "")
+    result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
+    assert "error" in result
+    assert "/browser connect" in result["error"]
+    assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL
+
+
+def test_non_ws_endpoint_returns_error(monkeypatch):
+    monkeypatch.setattr(
+        browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "http://localhost:9222"
+    )
+    result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
+    assert "error" in result
+    assert "WebSocket" in result["error"]
+
+
+def test_websockets_missing_returns_error(monkeypatch):
+    monkeypatch.setattr(browser_cdp_tool, "_WS_AVAILABLE", False)
+    result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
+    assert "error" in result
+    assert "websockets" in result["error"].lower()
+
+
+# ---------------------------------------------------------------------------
+# Happy-path: browser-level call
+# ---------------------------------------------------------------------------
+
+
+def test_browser_level_success(cdp_server):
+    cdp_server.on(
+        "Target.getTargets",
+        lambda params, sid: {
+            "targetInfos": [
+                {"targetId": "A", "type": "page", "title": "Tab 1", "url": "about:blank"},
+                {"targetId": "B", "type": "page", "title": "Tab 2", "url": "https://a.test"},
+            ]
+        },
+    )
+    result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
+    assert result["success"] is True
+    assert result["method"] == "Target.getTargets"
+    assert "target_id" not in result
+    assert len(result["result"]["targetInfos"]) == 2
+    # Verify the server actually received exactly one call (no extra traffic)
+    calls = cdp_server.received()
+    assert len(calls) == 1
+    assert calls[0]["method"] == "Target.getTargets"
+    assert "sessionId" not in calls[0]
+
+
+def test_empty_params_sends_empty_object(cdp_server):
+    cdp_server.on("Browser.getVersion", lambda params, sid: {"product": "Mock/1.0"})
+    json.loads(browser_cdp_tool.browser_cdp(method="Browser.getVersion"))
+    assert cdp_server.received()[0]["params"] == {}
+
+
+# ---------------------------------------------------------------------------
+# Happy-path: target-attached call
+# ---------------------------------------------------------------------------
+
+
+def test_target_attach_then_call(cdp_server):
+    cdp_server.on(
+        "Target.attachToTarget",
+        lambda params, sid: {"sessionId": f"sess-{params['targetId']}"},
+    )
+    cdp_server.on(
+        "Runtime.evaluate",
+        lambda params, sid: {
+            "result": {"type": "string", "value": f"evaluated[{sid}]"},
+        },
+    )
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(
+            method="Runtime.evaluate",
+            params={"expression": "document.title", "returnByValue": True},
+            target_id="tab-A",
+        )
+    )
+    assert result["success"] is True
+    assert result["target_id"] == "tab-A"
+    assert result["result"]["result"]["value"] == "evaluated[sess-tab-A]"
+
+    calls = cdp_server.received()
+    # First call: attach
+    assert calls[0]["method"] == "Target.attachToTarget"
+    assert calls[0]["params"] == {"targetId": "tab-A", "flatten": True}
+    # Second call: dispatched method on the session
+    assert calls[1]["method"] == "Runtime.evaluate"
+    assert calls[1]["sessionId"] == "sess-tab-A"
+
+
+# ---------------------------------------------------------------------------
+# CDP error responses
+# ---------------------------------------------------------------------------
+
+
+def test_cdp_method_error_returns_tool_error(cdp_server):
+    # No handler registered -> server returns CDP error
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(method="NonExistent.method")
+    )
+    assert "error" in result
+    assert "CDP error" in result["error"]
+    assert result.get("method") == "NonExistent.method"
+
+
+def test_attach_failure_returns_tool_error(cdp_server):
+    # Target.attachToTarget has no handler -> server errors on attach
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(
+            method="Runtime.evaluate",
+            params={"expression": "1+1"},
+            target_id="missing",
+        )
+    )
+    assert "error" in result
+    assert "Target.attachToTarget" in result["error"]
+
+
+# ---------------------------------------------------------------------------
+# Timeouts
+# ---------------------------------------------------------------------------
+
+
+def test_timeout_when_server_never_replies(cdp_server):
+    # Register a handler that blocks forever
+    def slow(params, sid):
+        time.sleep(10)
+        return {}
+
+    cdp_server.on("Page.slowMethod", slow)
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(
+            method="Page.slowMethod", timeout=0.5
+        )
+    )
+    assert "error" in result
+    assert "tim" in result["error"].lower()
+
+
+# ---------------------------------------------------------------------------
+# Timeout clamping
+# ---------------------------------------------------------------------------
+
+
+def test_timeout_clamped_above_max(cdp_server):
+    cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"})
+    # timeout=10_000 should be clamped to 300 but still succeed
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout=10_000)
+    )
+    assert result["success"] is True
+
+
+def test_invalid_timeout_falls_back_to_default(cdp_server):
+    cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"})
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout="nope")  # type: ignore[arg-type]
+    )
+    assert result["success"] is True
+
+
+# ---------------------------------------------------------------------------
+# Registry integration
+# ---------------------------------------------------------------------------
+
+
+def test_registered_in_browser_toolset():
+    from tools.registry import registry
+
+    entry = registry.get_entry("browser_cdp")
+    assert entry is not None
+    assert entry.toolset == "browser"
+    assert entry.schema["name"] == "browser_cdp"
+    assert entry.schema["parameters"]["required"] == ["method"]
+    assert "Chrome DevTools Protocol" in entry.schema["description"]
+    assert browser_cdp_tool.CDP_DOCS_URL in entry.schema["description"]
+
+
+def test_dispatch_through_registry(cdp_server):
+    from tools.registry import registry
+
+    cdp_server.on("Target.getTargets", lambda p, s: {"targetInfos": []})
+    raw = registry.dispatch(
+        "browser_cdp", {"method": "Target.getTargets"}, task_id="t1"
+    )
+    result = json.loads(raw)
+    assert result["success"] is True
+    assert result["method"] == "Target.getTargets"
+
+
+# ---------------------------------------------------------------------------
+# check_fn gating
+# ---------------------------------------------------------------------------
+
+
+def test_check_fn_false_when_no_cdp_url(monkeypatch):
+    """Gate closes when no CDP URL is set — even if the browser toolset is
+    otherwise configured."""
+    import tools.browser_tool as bt
+
+    monkeypatch.setattr(bt, "check_browser_requirements", lambda: True)
+    monkeypatch.setattr(bt, "_get_cdp_override", lambda: "")
+    assert browser_cdp_tool._browser_cdp_check() is False
+
+
+def test_check_fn_true_when_cdp_url_set(monkeypatch):
+    """Gate opens as soon as a CDP URL is resolvable."""
+    import tools.browser_tool as bt
+
+    monkeypatch.setattr(bt, "check_browser_requirements", lambda: True)
+    monkeypatch.setattr(
+        bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x"
+    )
+    assert browser_cdp_tool._browser_cdp_check() is True
+
+
+def test_check_fn_false_when_browser_requirements_fail(monkeypatch):
+    """Even with a CDP URL, gate closes if the overall browser toolset is
+    unavailable (e.g. agent-browser not installed)."""
+    import tools.browser_tool as bt
+
+    monkeypatch.setattr(bt, "check_browser_requirements", lambda: False)
+    monkeypatch.setattr(
+        bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x"
+    )
+    assert browser_cdp_tool._browser_cdp_check() is False
diff --git a/tests/tools/test_code_execution_modes.py b/tests/tools/test_code_execution_modes.py
new file mode 100644
index 00000000000..875eaf7aeda
--- /dev/null
+++ b/tests/tools/test_code_execution_modes.py
@@ -0,0 +1,455 @@
+#!/usr/bin/env python3
+"""Tests for execute_code's strict / project execution modes.
+
+The mode switch controls two things:
+  - working directory: staging tmpdir (strict) vs session CWD (project)
+  - interpreter:       sys.executable (strict) vs active venv's python (project)
+
+Security-critical invariants — env scrubbing, tool whitelist, resource caps —
+must apply identically in both modes. These tests guard all three layers.
+
+Mode is sourced exclusively from ``code_execution.mode`` in config.yaml —
+there is no env-var override. Tests patch ``_load_config`` directly.
+"""
+
+import json
+import os
+import sys
+import unittest
+from contextlib import contextmanager
+from unittest.mock import patch
+
+import pytest
+
+os.environ["TERMINAL_ENV"] = "local"
+
+
+@pytest.fixture(autouse=True)
+def _force_local_terminal(monkeypatch):
+    """Mirror test_code_execution.py — guarantee local backend under xdist."""
+    monkeypatch.setenv("TERMINAL_ENV", "local")
+
+
+from tools.code_execution_tool import (
+    SANDBOX_ALLOWED_TOOLS,
+    DEFAULT_EXECUTION_MODE,
+    EXECUTION_MODES,
+    _get_execution_mode,
+    _is_usable_python,
+    _resolve_child_cwd,
+    _resolve_child_python,
+    build_execute_code_schema,
+    execute_code,
+)
+
+
+@contextmanager
+def _mock_mode(mode):
+    """Context manager that pins code_execution.mode to the given value."""
+    with patch("tools.code_execution_tool._load_config",
+               return_value={"mode": mode}):
+        yield
+
+
+def _mock_handle_function_call(function_name, function_args, task_id=None, user_task=None):
+    """Minimal mock dispatcher reused across tests."""
+    if function_name == "terminal":
+        return json.dumps({"output": "mock", "exit_code": 0})
+    if function_name == "read_file":
+        return json.dumps({"content": "line1\n", "total_lines": 1})
+    return json.dumps({"error": f"Unknown tool: {function_name}"})
+
+
+# ---------------------------------------------------------------------------
+# Mode resolution
+# ---------------------------------------------------------------------------
+
+class TestGetExecutionMode(unittest.TestCase):
+    """_get_execution_mode reads config.yaml only (no env var surface)."""
+
+    def test_default_is_project(self):
+        self.assertEqual(DEFAULT_EXECUTION_MODE, "project")
+
+    def test_config_project(self):
+        with patch("tools.code_execution_tool._load_config",
+                   return_value={"mode": "project"}):
+            self.assertEqual(_get_execution_mode(), "project")
+
+    def test_config_strict(self):
+        with patch("tools.code_execution_tool._load_config",
+                   return_value={"mode": "strict"}):
+            self.assertEqual(_get_execution_mode(), "strict")
+
+    def test_config_case_insensitive(self):
+        with patch("tools.code_execution_tool._load_config",
+                   return_value={"mode": "STRICT"}):
+            self.assertEqual(_get_execution_mode(), "strict")
+
+    def test_config_strips_whitespace(self):
+        with patch("tools.code_execution_tool._load_config",
+                   return_value={"mode": "  project  "}):
+            self.assertEqual(_get_execution_mode(), "project")
+
+    def test_empty_config_falls_back_to_default(self):
+        with patch("tools.code_execution_tool._load_config", return_value={}):
+            self.assertEqual(_get_execution_mode(), DEFAULT_EXECUTION_MODE)
+
+    def test_bogus_config_falls_back_to_default(self):
+        with patch("tools.code_execution_tool._load_config",
+                   return_value={"mode": "banana"}):
+            self.assertEqual(_get_execution_mode(), DEFAULT_EXECUTION_MODE)
+
+    def test_none_config_falls_back_to_default(self):
+        with patch("tools.code_execution_tool._load_config",
+                   return_value={"mode": None}):
+            # str(None).lower() = "none" → not in EXECUTION_MODES → default
+            self.assertEqual(_get_execution_mode(), DEFAULT_EXECUTION_MODE)
+
+    def test_execution_modes_tuple(self):
+        """Canonical set of modes — tests + config layer rely on this shape."""
+        self.assertEqual(set(EXECUTION_MODES), {"project", "strict"})
+
+
+# ---------------------------------------------------------------------------
+# Interpreter resolver
+# ---------------------------------------------------------------------------
+
+class TestResolveChildPython(unittest.TestCase):
+    """_resolve_child_python — picks the right interpreter per mode."""
+
+    def test_strict_always_sys_executable(self):
+        """Strict mode never leaves sys.executable, even if venv is set."""
+        with patch.dict(os.environ, {"VIRTUAL_ENV": "/some/venv"}):
+            self.assertEqual(_resolve_child_python("strict"), sys.executable)
+
+    def test_project_with_no_venv_falls_back(self):
+        """Project mode without VIRTUAL_ENV or CONDA_PREFIX → sys.executable."""
+        env = {k: v for k, v in os.environ.items()
+               if k not in ("VIRTUAL_ENV", "CONDA_PREFIX")}
+        with patch.dict(os.environ, env, clear=True):
+            self.assertEqual(_resolve_child_python("project"), sys.executable)
+
+    def test_project_with_virtualenv_picks_venv_python(self):
+        """Project mode + VIRTUAL_ENV pointing at a real venv → that python."""
+        import tempfile, pathlib
+        with tempfile.TemporaryDirectory() as td:
+            fake_venv = pathlib.Path(td)
+            (fake_venv / "bin").mkdir()
+            # Symlink to real python so the version check actually passes
+            (fake_venv / "bin" / "python").symlink_to(sys.executable)
+            with patch.dict(os.environ, {"VIRTUAL_ENV": str(fake_venv)}):
+                # Clear cache — _is_usable_python memoizes on path
+                _is_usable_python.cache_clear()
+                result = _resolve_child_python("project")
+                self.assertEqual(result, str(fake_venv / "bin" / "python"))
+
+    def test_project_with_broken_venv_falls_back(self):
+        """VIRTUAL_ENV set but bin/python missing → sys.executable."""
+        import tempfile
+        with tempfile.TemporaryDirectory() as td:
+            # No bin/python inside — broken venv
+            with patch.dict(os.environ, {"VIRTUAL_ENV": td}):
+                _is_usable_python.cache_clear()
+                self.assertEqual(_resolve_child_python("project"), sys.executable)
+
+    def test_project_prefers_virtualenv_over_conda(self):
+        """If both VIRTUAL_ENV and CONDA_PREFIX are set, VIRTUAL_ENV wins."""
+        import tempfile, pathlib
+        with tempfile.TemporaryDirectory() as ve_td, tempfile.TemporaryDirectory() as conda_td:
+            ve = pathlib.Path(ve_td)
+            (ve / "bin").mkdir()
+            (ve / "bin" / "python").symlink_to(sys.executable)
+
+            conda = pathlib.Path(conda_td)
+            (conda / "bin").mkdir()
+            (conda / "bin" / "python").symlink_to(sys.executable)
+
+            with patch.dict(os.environ, {"VIRTUAL_ENV": str(ve), "CONDA_PREFIX": str(conda)}):
+                _is_usable_python.cache_clear()
+                result = _resolve_child_python("project")
+                self.assertEqual(result, str(ve / "bin" / "python"))
+
+    def test_is_usable_python_rejects_nonexistent(self):
+        _is_usable_python.cache_clear()
+        self.assertFalse(_is_usable_python("/does/not/exist/python"))
+
+    def test_is_usable_python_accepts_real_python(self):
+        _is_usable_python.cache_clear()
+        self.assertTrue(_is_usable_python(sys.executable))
+
+
+# ---------------------------------------------------------------------------
+# CWD resolver
+# ---------------------------------------------------------------------------
+
+class TestResolveChildCwd(unittest.TestCase):
+
+    def test_strict_uses_staging_dir(self):
+        self.assertEqual(_resolve_child_cwd("strict", "/tmp/staging"), "/tmp/staging")
+
+    def test_project_without_terminal_cwd_uses_getcwd(self):
+        env = {k: v for k, v in os.environ.items() if k != "TERMINAL_CWD"}
+        with patch.dict(os.environ, env, clear=True):
+            self.assertEqual(_resolve_child_cwd("project", "/tmp/staging"), os.getcwd())
+
+    def test_project_uses_terminal_cwd_when_set(self):
+        import tempfile
+        with tempfile.TemporaryDirectory() as td:
+            with patch.dict(os.environ, {"TERMINAL_CWD": td}):
+                self.assertEqual(_resolve_child_cwd("project", "/tmp/staging"), td)
+
+    def test_project_bogus_terminal_cwd_falls_back_to_getcwd(self):
+        with patch.dict(os.environ, {"TERMINAL_CWD": "/does/not/exist/anywhere"}):
+            self.assertEqual(_resolve_child_cwd("project", "/tmp/staging"), os.getcwd())
+
+    def test_project_expands_tilde(self):
+        import pathlib
+        home = str(pathlib.Path.home())
+        with patch.dict(os.environ, {"TERMINAL_CWD": "~"}):
+            self.assertEqual(_resolve_child_cwd("project", "/tmp/staging"), home)
+
+
+# ---------------------------------------------------------------------------
+# Schema description
+# ---------------------------------------------------------------------------
+
+class TestModeAwareSchema(unittest.TestCase):
+
+    def test_strict_description_mentions_temp_dir(self):
+        desc = build_execute_code_schema(mode="strict")["description"]
+        self.assertIn("temp dir", desc)
+
+    def test_project_description_mentions_session_and_venv(self):
+        desc = build_execute_code_schema(mode="project")["description"]
+        self.assertIn("session", desc)
+        self.assertIn("venv", desc)
+
+    def test_neither_description_uses_sandbox_language(self):
+        """REGRESSION GUARD for commit 39b83f34.
+
+        Agents on local backends falsely believed they were sandboxed and
+        refused networking tasks. Do not reintroduce any 'sandbox' /
+        'isolated' / 'cloud' language in the tool description.
+        """
+        for mode in EXECUTION_MODES:
+            desc = build_execute_code_schema(mode=mode)["description"].lower()
+            for forbidden in ("sandbox", "isolated", "cloud"):
+                self.assertNotIn(forbidden, desc,
+                                 f"mode={mode}: '{forbidden}' leaked into description")
+
+    def test_descriptions_are_similar_length(self):
+        """Both modes should have roughly the same-size description."""
+        strict = len(build_execute_code_schema(mode="strict")["description"])
+        project = len(build_execute_code_schema(mode="project")["description"])
+        self.assertLess(abs(strict - project), 200)
+
+    def test_default_mode_reads_config(self):
+        """build_execute_code_schema() with mode=None reads config.yaml."""
+        with _mock_mode("strict"):
+            desc = build_execute_code_schema()["description"]
+            self.assertIn("temp dir", desc)
+        with _mock_mode("project"):
+            desc = build_execute_code_schema()["description"]
+            self.assertIn("session", desc)
+
+
+# ---------------------------------------------------------------------------
+# Integration: what actually happens when execute_code runs per mode
+# ---------------------------------------------------------------------------
+
+@pytest.mark.skipif(sys.platform == "win32", reason="execute_code is POSIX-only")
+class TestExecuteCodeModeIntegration(unittest.TestCase):
+    """End-to-end: verify the subprocess actually runs where we expect."""
+
+    def _run(self, code, mode, enabled_tools=None, extra_env=None):
+        env_overrides = extra_env or {}
+        with _mock_mode(mode):
+            with patch.dict(os.environ, env_overrides):
+                with patch("model_tools.handle_function_call",
+                           side_effect=_mock_handle_function_call):
+                    raw = execute_code(
+                        code=code,
+                        task_id=f"test-{mode}",
+                        enabled_tools=enabled_tools or list(SANDBOX_ALLOWED_TOOLS),
+                    )
+        return json.loads(raw)
+
+    def test_strict_mode_runs_in_tmpdir(self):
+        """Strict mode: script's os.getcwd() is the staging tmpdir."""
+        result = self._run("import os; print(os.getcwd())", mode="strict")
+        self.assertEqual(result["status"], "success")
+        self.assertIn("hermes_sandbox_", result["output"])
+
+    def test_project_mode_runs_in_session_cwd(self):
+        """Project mode: script's os.getcwd() is the session's working dir."""
+        import tempfile
+        with tempfile.TemporaryDirectory() as td:
+            result = self._run(
+                "import os; print(os.getcwd())",
+                mode="project",
+                extra_env={"TERMINAL_CWD": td},
+            )
+            self.assertEqual(result["status"], "success")
+            # Resolve symlinks (macOS /tmp → /private/tmp) on both sides
+            self.assertEqual(
+                os.path.realpath(result["output"].strip()),
+                os.path.realpath(td),
+            )
+
+    def test_project_mode_interpreter_is_venv_python(self):
+        """Project mode: sys.executable inside the child is the venv's python
+        when VIRTUAL_ENV is set to a real venv."""
+        # The hermes-agent venv is always active during tests, so this also
+        # happens to equal sys.executable of the parent. What we're asserting
+        # is: resolver picked a venv-bin/python path, not that it differs
+        # from sys.executable.
+        result = self._run("import sys; print(sys.executable)", mode="project")
+        self.assertEqual(result["status"], "success")
+        # Either VIRTUAL_ENV-bin/python or sys.executable fallback, both OK.
+        output = result["output"].strip()
+        ve = os.environ.get("VIRTUAL_ENV", "").strip()
+        if ve:
+            self.assertTrue(
+                output.startswith(ve) or output == sys.executable,
+                f"project-mode python should be under VIRTUAL_ENV={ve} or sys.executable={sys.executable}, got {output}",
+            )
+
+    def test_project_mode_can_still_import_hermes_tools(self):
+        """Regression: hermes_tools still importable from non-tmpdir CWD.
+
+        This is the PYTHONPATH fix — without it, switching to session CWD
+        breaks `from hermes_tools import terminal`.
+        """
+        import tempfile
+        with tempfile.TemporaryDirectory() as td:
+            code = (
+                "from hermes_tools import terminal\n"
+                "r = terminal('echo x')\n"
+                "print(r.get('output', 'MISSING'))\n"
+            )
+            result = self._run(code, mode="project", extra_env={"TERMINAL_CWD": td})
+            self.assertEqual(result["status"], "success")
+            self.assertIn("mock", result["output"])
+
+    def test_strict_mode_can_still_import_hermes_tools(self):
+        """Regression: strict mode's tmpdir CWD still works for imports."""
+        code = (
+            "from hermes_tools import terminal\n"
+            "r = terminal('echo x')\n"
+            "print(r.get('output', 'MISSING'))\n"
+        )
+        result = self._run(code, mode="strict")
+        self.assertEqual(result["status"], "success")
+        self.assertIn("mock", result["output"])
+
+
+# ---------------------------------------------------------------------------
+# SECURITY-CRITICAL regression guards
+#
+# These MUST pass in both strict and project mode. The whole tiered-mode
+# proposition rests on the claim that switching from strict to project only
+# changes CWD + interpreter, not the security posture.
+# ---------------------------------------------------------------------------
+
+@pytest.mark.skipif(sys.platform == "win32", reason="execute_code is POSIX-only")
+class TestSecurityInvariantsAcrossModes(unittest.TestCase):
+
+    def _run(self, code, mode):
+        with _mock_mode(mode):
+            with patch("model_tools.handle_function_call",
+                       side_effect=_mock_handle_function_call):
+                raw = execute_code(
+                    code=code,
+                    task_id=f"test-sec-{mode}",
+                    enabled_tools=list(SANDBOX_ALLOWED_TOOLS),
+                )
+        return json.loads(raw)
+
+    def test_api_keys_scrubbed_in_strict_mode(self):
+        code = (
+            "import os\n"
+            "print('KEY=' + os.environ.get('OPENAI_API_KEY', 'MISSING'))\n"
+            "print('TOK=' + os.environ.get('ANTHROPIC_API_KEY', 'MISSING'))\n"
+        )
+        with patch.dict(os.environ, {
+            "OPENAI_API_KEY": "sk-should-not-leak",
+            "ANTHROPIC_API_KEY": "ant-should-not-leak",
+        }):
+            result = self._run(code, mode="strict")
+        self.assertEqual(result["status"], "success")
+        self.assertIn("KEY=MISSING", result["output"])
+        self.assertIn("TOK=MISSING", result["output"])
+        self.assertNotIn("sk-should-not-leak", result["output"])
+        self.assertNotIn("ant-should-not-leak", result["output"])
+
+    def test_api_keys_scrubbed_in_project_mode(self):
+        """CRITICAL: the project-mode default does NOT leak user credentials."""
+        code = (
+            "import os\n"
+            "print('KEY=' + os.environ.get('OPENAI_API_KEY', 'MISSING'))\n"
+            "print('TOK=' + os.environ.get('ANTHROPIC_API_KEY', 'MISSING'))\n"
+            "print('SEC=' + os.environ.get('GITHUB_TOKEN', 'MISSING'))\n"
+        )
+        with patch.dict(os.environ, {
+            "OPENAI_API_KEY": "sk-should-not-leak",
+            "ANTHROPIC_API_KEY": "ant-should-not-leak",
+            "GITHUB_TOKEN": "ghp-should-not-leak",
+        }):
+            result = self._run(code, mode="project")
+        self.assertEqual(result["status"], "success")
+        for needle in ("KEY=MISSING", "TOK=MISSING", "SEC=MISSING"):
+            self.assertIn(needle, result["output"])
+        for leaked in ("sk-should-not-leak", "ant-should-not-leak", "ghp-should-not-leak"):
+            self.assertNotIn(leaked, result["output"])
+
+    def test_secret_substrings_scrubbed_in_project_mode(self):
+        """SECRET/PASSWORD/CREDENTIAL/PASSWD/AUTH filters still apply."""
+        code = (
+            "import os\n"
+            "for k in ('MY_SECRET', 'DB_PASSWORD', 'VAULT_CREDENTIAL', "
+            "'LDAP_PASSWD', 'AUTH_TOKEN'):\n"
+            "    print(f'{k}=' + os.environ.get(k, 'MISSING'))\n"
+        )
+        with patch.dict(os.environ, {
+            "MY_SECRET": "secret-should-not-leak",
+            "DB_PASSWORD": "password-should-not-leak",
+            "VAULT_CREDENTIAL": "cred-should-not-leak",
+            "LDAP_PASSWD": "passwd-should-not-leak",
+            "AUTH_TOKEN": "auth-should-not-leak",
+        }):
+            result = self._run(code, mode="project")
+        self.assertEqual(result["status"], "success")
+        for leaked in ("secret-should-not-leak", "password-should-not-leak",
+                       "cred-should-not-leak", "passwd-should-not-leak",
+                       "auth-should-not-leak"):
+            self.assertNotIn(leaked, result["output"])
+
+    def test_tool_whitelist_enforced_in_strict_mode(self):
+        """A script cannot RPC-call tools outside SANDBOX_ALLOWED_TOOLS."""
+        # execute_code is NOT in SANDBOX_ALLOWED_TOOLS (no recursion)
+        self.assertNotIn("execute_code", SANDBOX_ALLOWED_TOOLS)
+        code = (
+            "import hermes_tools as ht\n"
+            "print('execute_code_available:', hasattr(ht, 'execute_code'))\n"
+            "print('delegate_task_available:', hasattr(ht, 'delegate_task'))\n"
+        )
+        result = self._run(code, mode="strict")
+        self.assertEqual(result["status"], "success")
+        self.assertIn("execute_code_available: False", result["output"])
+        self.assertIn("delegate_task_available: False", result["output"])
+
+    def test_tool_whitelist_enforced_in_project_mode(self):
+        """CRITICAL: project mode does NOT widen the tool whitelist."""
+        code = (
+            "import hermes_tools as ht\n"
+            "print('execute_code_available:', hasattr(ht, 'execute_code'))\n"
+            "print('delegate_task_available:', hasattr(ht, 'delegate_task'))\n"
+        )
+        result = self._run(code, mode="project")
+        self.assertEqual(result["status"], "success")
+        self.assertIn("execute_code_available: False", result["output"])
+        self.assertIn("delegate_task_available: False", result["output"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/tools/test_cron_approval_mode.py b/tests/tools/test_cron_approval_mode.py
new file mode 100644
index 00000000000..965d2eaa474
--- /dev/null
+++ b/tests/tools/test_cron_approval_mode.py
@@ -0,0 +1,256 @@
+"""Tests for approvals.cron_mode — configurable approval behavior for cron jobs."""
+
+import os
+import pytest
+
+import tools.approval as approval_module
+from tools.approval import (
+    _get_cron_approval_mode,
+    check_all_command_guards,
+    check_dangerous_command,
+    detect_dangerous_command,
+)
+
+
+@pytest.fixture(autouse=True)
+def _clear_approval_state():
+    approval_module._permanent_approved.clear()
+    approval_module.clear_session("default")
+    approval_module.clear_session("test-session")
+    yield
+    approval_module._permanent_approved.clear()
+    approval_module.clear_session("default")
+    approval_module.clear_session("test-session")
+
+
+# ---------------------------------------------------------------------------
+# _get_cron_approval_mode() config parsing
+# ---------------------------------------------------------------------------
+
+class TestCronApprovalModeParsing:
+    def test_default_is_deny(self):
+        """When no config is set, cron_mode defaults to 'deny'."""
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {}}):
+            assert _get_cron_approval_mode() == "deny"
+
+    def test_explicit_deny(self):
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "deny"}}):
+            assert _get_cron_approval_mode() == "deny"
+
+    def test_explicit_approve(self):
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "approve"}}):
+            assert _get_cron_approval_mode() == "approve"
+
+    def test_off_maps_to_approve(self):
+        """'off' is an alias for 'approve' (matches --yolo semantics)."""
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "off"}}):
+            assert _get_cron_approval_mode() == "approve"
+
+    def test_allow_maps_to_approve(self):
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "allow"}}):
+            assert _get_cron_approval_mode() == "approve"
+
+    def test_yes_maps_to_approve(self):
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "yes"}}):
+            assert _get_cron_approval_mode() == "approve"
+
+    def test_case_insensitive(self):
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "APPROVE"}}):
+            assert _get_cron_approval_mode() == "approve"
+
+    def test_unknown_value_defaults_to_deny(self):
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": "maybe"}}):
+            assert _get_cron_approval_mode() == "deny"
+
+    def test_config_load_failure_defaults_to_deny(self):
+        """If config loading fails entirely, default to deny (safe)."""
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", side_effect=RuntimeError("config broken")):
+            assert _get_cron_approval_mode() == "deny"
+
+    def test_yaml_boolean_false_maps_to_deny(self):
+        """YAML 1.1 parses bare 'off' as False. Ensure it maps to deny."""
+        from unittest.mock import patch as mock_patch
+        with mock_patch("hermes_cli.config.load_config", return_value={"approvals": {"cron_mode": False}}):
+            # str(False) = "False", which is not in the approve set, so deny
+            assert _get_cron_approval_mode() == "deny"
+
+
+# ---------------------------------------------------------------------------
+# check_dangerous_command() with cron session
+# ---------------------------------------------------------------------------
+
+class TestCronDenyMode:
+    """When HERMES_CRON_SESSION is set and cron_mode=deny, dangerous commands are blocked."""
+
+    def test_dangerous_command_blocked_in_cron_deny_mode(self, monkeypatch):
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_dangerous_command("rm -rf /tmp/stuff", "local")
+            assert not result["approved"]
+            assert "BLOCKED" in result["message"]
+            assert "cron_mode" in result["message"]
+
+    def test_safe_command_allowed_in_cron_deny_mode(self, monkeypatch):
+        """Non-dangerous commands still work even with cron_mode=deny."""
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_dangerous_command("ls -la", "local")
+            assert result["approved"]
+
+    def test_multiple_dangerous_patterns_blocked(self, monkeypatch):
+        """All dangerous patterns are blocked, not just rm."""
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        dangerous_commands = [
+            "rm -rf /",
+            "chmod 777 /etc/passwd",
+            "mkfs.ext4 /dev/sda1",
+            "dd if=/dev/zero of=/dev/sda",
+        ]
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            for cmd in dangerous_commands:
+                is_dangerous, _, _ = detect_dangerous_command(cmd)
+                if is_dangerous:
+                    result = check_dangerous_command(cmd, "local")
+                    assert not result["approved"], f"Should be blocked: {cmd}"
+                    assert "BLOCKED" in result["message"]
+
+    def test_block_message_includes_description(self, monkeypatch):
+        """The block message should mention what pattern was matched."""
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_dangerous_command("rm -rf /tmp/stuff", "local")
+            assert not result["approved"]
+            # Should contain the description of what was flagged
+            assert "dangerous" in result["message"].lower() or "delete" in result["message"].lower()
+
+
+class TestCronApproveMode:
+    """When HERMES_CRON_SESSION is set and cron_mode=approve, dangerous commands pass through."""
+
+    def test_dangerous_command_allowed_in_cron_approve_mode(self, monkeypatch):
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="approve"):
+            result = check_dangerous_command("rm -rf /tmp/stuff", "local")
+            assert result["approved"]
+
+
+# ---------------------------------------------------------------------------
+# check_all_command_guards() with cron session
+# ---------------------------------------------------------------------------
+
+class TestCronDenyModeAllGuards:
+    """The combined guard function also respects cron_mode."""
+
+    def test_dangerous_command_blocked_in_combined_guard(self, monkeypatch):
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_all_command_guards("rm -rf /tmp/stuff", "local")
+            assert not result["approved"]
+            assert "BLOCKED" in result["message"]
+
+    def test_safe_command_allowed_in_combined_guard(self, monkeypatch):
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_all_command_guards("echo hello", "local")
+            assert result["approved"]
+
+    def test_combined_guard_approve_mode(self, monkeypatch):
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="approve"):
+            result = check_all_command_guards("rm -rf /tmp/stuff", "local")
+            assert result["approved"]
+
+
+# ---------------------------------------------------------------------------
+# Edge cases: cron mode interaction with other approval mechanisms
+# ---------------------------------------------------------------------------
+
+class TestCronModeInteractions:
+    """Cron mode should NOT interfere with other approval bypass mechanisms."""
+
+    def test_container_env_still_auto_approves(self, monkeypatch):
+        """Docker/sandbox environments bypass approvals regardless of cron_mode."""
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_dangerous_command("rm -rf /", "docker")
+            assert result["approved"]
+
+    def test_yolo_overrides_cron_deny(self, monkeypatch):
+        """--yolo still works even if cron_mode=deny."""
+        monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+        monkeypatch.setenv("HERMES_YOLO_MODE", "1")
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+
+        from unittest.mock import patch as mock_patch
+        with mock_patch("tools.approval._get_cron_approval_mode", return_value="deny"):
+            result = check_dangerous_command("rm -rf /", "local")
+            assert result["approved"]
+
+    def test_non_cron_non_interactive_still_auto_approves(self, monkeypatch):
+        """Non-cron, non-interactive sessions (e.g. scripted usage) still auto-approve."""
+        monkeypatch.delenv("HERMES_CRON_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        result = check_dangerous_command("rm -rf /tmp/stuff", "local")
+        assert result["approved"]
diff --git a/tests/tools/test_cronjob_tools.py b/tests/tools/test_cronjob_tools.py
index dd6b0101b1b..38fc12cc8c7 100644
--- a/tests/tools/test_cronjob_tools.py
+++ b/tests/tools/test_cronjob_tools.py
@@ -192,23 +192,23 @@ class TestUnifiedCronjobTool:
         result = json.loads(
             cronjob(
                 action="create",
-                skills=["blogwatcher", "find-nearby"],
+                skills=["blogwatcher", "maps"],
                 prompt="Use both skills and combine the result.",
                 schedule="every 1h",
                 name="Combo job",
             )
         )
         assert result["success"] is True
-        assert result["skills"] == ["blogwatcher", "find-nearby"]
+        assert result["skills"] == ["blogwatcher", "maps"]
 
         listing = json.loads(cronjob(action="list"))
-        assert listing["jobs"][0]["skills"] == ["blogwatcher", "find-nearby"]
+        assert listing["jobs"][0]["skills"] == ["blogwatcher", "maps"]
 
     def test_multi_skill_default_name_prefers_prompt_when_present(self):
         result = json.loads(
             cronjob(
                 action="create",
-                skills=["blogwatcher", "find-nearby"],
+                skills=["blogwatcher", "maps"],
                 prompt="Use both skills and combine the result.",
                 schedule="every 1h",
             )
@@ -220,7 +220,7 @@ class TestUnifiedCronjobTool:
         created = json.loads(
             cronjob(
                 action="create",
-                skills=["blogwatcher", "find-nearby"],
+                skills=["blogwatcher", "maps"],
                 prompt="Use both skills and combine the result.",
                 schedule="every 1h",
             )
diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py
index 3299b927e56..e1e119d9199 100644
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -274,6 +274,7 @@ class TestDelegateTask(unittest.TestCase):
                 model=None,
                 max_iterations=10,
                 parent_agent=parent,
+                task_count=1,
             )
 
         self.assertIs(mock_child._print_fn, sink)
@@ -294,6 +295,7 @@ class TestDelegateTask(unittest.TestCase):
                 model=None,
                 max_iterations=10,
                 parent_agent=parent,
+                task_count=1,
             )
 
         self.assertTrue(callable(mock_child.thinking_callback))
@@ -363,6 +365,7 @@ class TestToolNamePreservation(unittest.TestCase):
                     model=None,
                     max_iterations=10,
                     parent_agent=parent,
+                    task_count=1,
                 )
             except NameError as exc:
                 self.fail(
@@ -1000,6 +1003,7 @@ class TestChildCredentialPoolResolution(unittest.TestCase):
                 model=None,
                 max_iterations=10,
                 parent_agent=parent,
+                task_count=1,
             )
 
             self.assertEqual(mock_child._credential_pool, mock_pool)
@@ -1225,6 +1229,7 @@ class TestDelegationReasoningEffort(unittest.TestCase):
         _build_child_agent(
             task_index=0, goal="test", context=None, toolsets=None,
             model=None, max_iterations=50, parent_agent=parent,
+            task_count=1,
         )
         call_kwargs = MockAgent.call_args[1]
         self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "xhigh"})
@@ -1241,6 +1246,7 @@ class TestDelegationReasoningEffort(unittest.TestCase):
         _build_child_agent(
             task_index=0, goal="test", context=None, toolsets=None,
             model=None, max_iterations=50, parent_agent=parent,
+            task_count=1,
         )
         call_kwargs = MockAgent.call_args[1]
         self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "low"})
@@ -1257,6 +1263,7 @@ class TestDelegationReasoningEffort(unittest.TestCase):
         _build_child_agent(
             task_index=0, goal="test", context=None, toolsets=None,
             model=None, max_iterations=50, parent_agent=parent,
+            task_count=1,
         )
         call_kwargs = MockAgent.call_args[1]
         self.assertEqual(call_kwargs["reasoning_config"], {"enabled": False})
@@ -1273,6 +1280,7 @@ class TestDelegationReasoningEffort(unittest.TestCase):
         _build_child_agent(
             task_index=0, goal="test", context=None, toolsets=None,
             model=None, max_iterations=50, parent_agent=parent,
+            task_count=1,
         )
         call_kwargs = MockAgent.call_args[1]
         self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "medium"})
diff --git a/tests/tools/test_local_interrupt_cleanup.py b/tests/tools/test_local_interrupt_cleanup.py
new file mode 100644
index 00000000000..72310009a54
--- /dev/null
+++ b/tests/tools/test_local_interrupt_cleanup.py
@@ -0,0 +1,145 @@
+"""Regression tests for _wait_for_process subprocess cleanup on exception exit.
+
+When the poll loop exits via KeyboardInterrupt or SystemExit (SIGTERM via
+cli.py signal handler, SIGINT on the main thread in non-interactive -q mode,
+or explicit sys.exit from some caller), the child subprocess must be killed
+before the exception propagates — otherwise the local backend's use of
+os.setsid leaves an orphan with PPID=1.
+
+The live repro that motivated this: hermes chat -q ... 'sleep 300', SIGTERM
+to the python process, sleep 300 survived with PPID=1 for the full 300 s
+because _wait_for_process never got to call _kill_process before python
+died.  See commit message for full context.
+"""
+import os
+import signal
+import subprocess
+import threading
+import time
+
+import pytest
+
+from tools.environments.local import LocalEnvironment
+
+
+@pytest.fixture(autouse=True)
+def _isolate_hermes_home(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / "logs").mkdir(exist_ok=True)
+
+
+def _pgid_still_alive(pgid: int) -> bool:
+    """Return True if any process in the given process group is still alive."""
+    try:
+        os.killpg(pgid, 0)  # signal 0 = existence check
+        return True
+    except ProcessLookupError:
+        return False
+
+
+def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
+    """When KeyboardInterrupt arrives mid-poll, the subprocess group must be
+    killed before the exception is re-raised."""
+    env = LocalEnvironment(cwd="/tmp")
+    try:
+        result_holder = {}
+        proc_holder = {}
+        started = threading.Event()
+        raise_at = [None]  # set by the main thread to tell worker when
+
+        # Drive execute() on a separate thread so we can SIGNAL-interrupt it
+        # via a thread-targeted exception without killing our test process.
+        def worker():
+            # Spawn a subprocess that will definitely be alive long enough
+            # to observe the cleanup, via env.execute(...) — the normal path
+            # that goes through _wait_for_process.
+            try:
+                result_holder["result"] = env.execute("sleep 30", timeout=60)
+            except BaseException as e:  # noqa: BLE001 — we want to observe it
+                result_holder["exception"] = type(e).__name__
+
+        t = threading.Thread(target=worker, daemon=True)
+        t.start()
+        # Wait until the subprocess actually exists.  LocalEnvironment.execute
+        # does init_session() (one spawn) before the real command, so we need
+        # to wait until a sleep 30 is visible.  Use pgrep-style lookup via
+        # /proc to find the bash process running our sleep.
+        deadline = time.monotonic() + 5.0
+        target_pid = None
+        while time.monotonic() < deadline:
+            # Walk our children and grand-children to find one running 'sleep 30'
+            try:
+                import psutil  # optional — fall back if absent
+                for p in psutil.Process(os.getpid()).children(recursive=True):
+                    try:
+                        if "sleep 30" in " ".join(p.cmdline()):
+                            target_pid = p.pid
+                            break
+                    except (psutil.NoSuchProcess, psutil.AccessDenied):
+                        continue
+            except ImportError:
+                # Fall back to ps
+                ps = subprocess.run(
+                    ["ps", "-eo", "pid,ppid,pgid,cmd"], capture_output=True, text=True,
+                )
+                for line in ps.stdout.splitlines():
+                    if "sleep 30" in line and "grep" not in line:
+                        parts = line.split()
+                        if parts and parts[0].isdigit():
+                            target_pid = int(parts[0])
+                            break
+            if target_pid:
+                break
+            time.sleep(0.1)
+
+        assert target_pid is not None, (
+            "test setup: couldn't find 'sleep 30' subprocess after 5 s"
+        )
+        pgid = os.getpgid(target_pid)
+        assert _pgid_still_alive(pgid), "sanity: subprocess should be alive"
+
+        # Now inject a KeyboardInterrupt into the worker thread the same
+        # way CPython's signal machinery would.  We use ctypes.PyThreadState_SetAsyncExc
+        # which is how signal delivery to non-main threads is simulated.
+        import ctypes
+        import sys as _sys
+        # py-thread-state exception targets need the ident, not the Thread
+        tid = t.ident
+        assert tid is not None
+        # Fire KeyboardInterrupt into the worker thread
+        ret = ctypes.pythonapi.PyThreadState_SetAsyncExc(
+            ctypes.c_ulong(tid), ctypes.py_object(KeyboardInterrupt),
+        )
+        assert ret == 1, f"SetAsyncExc returned {ret}, expected 1"
+
+        # Give the worker a moment to: hit the exception at the next poll,
+        # run the except-block cleanup (_kill_process), and exit.
+        t.join(timeout=5.0)
+        assert not t.is_alive(), "worker didn't exit within 5 s of the interrupt"
+
+        # The critical assertion: the subprocess GROUP must be dead.  Not
+        # just the bash wrapper — the 'sleep 30' child too.
+        # Give the SIGTERM+1s wait+SIGKILL escalation a moment to complete.
+        deadline = time.monotonic() + 3.0
+        while time.monotonic() < deadline:
+            if not _pgid_still_alive(pgid):
+                break
+            time.sleep(0.1)
+        assert not _pgid_still_alive(pgid), (
+            f"subprocess group {pgid} is STILL ALIVE after worker received "
+            f"KeyboardInterrupt — orphan bug regressed.  This is the "
+            f"sleep-300-survives-SIGTERM scenario from Physikal's Apr 2026 "
+            f"report.  See tools/environments/base.py _wait_for_process "
+            f"except-block."
+        )
+        # And the worker should have observed the KeyboardInterrupt (i.e.
+        # it re-raised cleanly, not silently swallowed).
+        assert result_holder.get("exception") == "KeyboardInterrupt", (
+            f"worker result: {result_holder!r} — expected KeyboardInterrupt "
+            f"propagation after cleanup"
+        )
+    finally:
+        try:
+            env.cleanup()
+        except Exception:
+            pass
diff --git a/tests/tools/test_registry.py b/tests/tools/test_registry.py
index 85246bd7609..eb895e55a1a 100644
--- a/tests/tools/test_registry.py
+++ b/tests/tools/test_registry.py
@@ -296,6 +296,8 @@ class TestBuiltinDiscovery:
             "tools.code_execution_tool",
             "tools.cronjob_tools",
             "tools.delegate_tool",
+            "tools.feishu_doc_tool",
+            "tools.feishu_drive_tool",
             "tools.file_tools",
             "tools.homeassistant_tool",
             "tools.image_generation_tool",
diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py
index 6ee5fe65b65..da154cc1680 100644
--- a/tests/tui_gateway/test_protocol.py
+++ b/tests/tui_gateway/test_protocol.py
@@ -4,6 +4,7 @@ import io
 import json
 import sys
 import threading
+import time
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -120,7 +121,9 @@ def test_block_and_respond(capture):
 
     rid = next(iter(server._pending))
     server._answers[rid] = "my_answer"
-    server._pending[rid].set()
+    # _pending values are (sid, Event) tuples — unpack to set the Event
+    _, ev = server._pending[rid]
+    ev.set()
 
     threading.Event().wait(0.1)
     assert result[0] == "my_answer"
@@ -128,7 +131,8 @@ def test_block_and_respond(capture):
 
 def test_clear_pending(server):
     ev = threading.Event()
-    server._pending["r1"] = ev
+    # _pending values are (sid, Event) tuples
+    server._pending["r1"] = ("sid-x", ev)
     server._clear_pending()
 
     assert ev.is_set()
@@ -231,3 +235,279 @@ def test_cli_exec_blocked(server, argv):
 ])
 def test_cli_exec_allowed(server, argv):
     assert server._cli_exec_blocked(argv) is None
+
+
+# ── slash.exec skill command interception ────────────────────────────
+
+
+def test_slash_exec_rejects_skill_commands(server):
+    """slash.exec must reject skill commands so the TUI falls through to command.dispatch."""
+    # Register a mock session
+    sid = "test-session"
+    server._sessions[sid] = {"session_key": sid, "agent": None}
+
+    # Mock scan_skill_commands to return a known skill
+    fake_skills = {"/hermes-agent-dev": {"name": "hermes-agent-dev", "description": "Dev workflow"}}
+
+    with patch("agent.skill_commands.get_skill_commands", return_value=fake_skills):
+        resp = server.handle_request({
+            "id": "r1",
+            "method": "slash.exec",
+            "params": {"command": "hermes-agent-dev", "session_id": sid},
+        })
+
+    # Should return an error so the TUI's .catch() fires command.dispatch
+    assert "error" in resp
+    assert resp["error"]["code"] == 4018
+    assert "skill command" in resp["error"]["message"]
+
+
+@pytest.mark.parametrize("cmd", ["retry", "queue hello", "q hello", "steer fix the test", "plan"])
+def test_slash_exec_rejects_pending_input_commands(server, cmd):
+    """slash.exec must reject commands that use _pending_input in the CLI."""
+    sid = "test-session"
+    server._sessions[sid] = {"session_key": sid, "agent": None}
+
+    resp = server.handle_request({
+        "id": "r1",
+        "method": "slash.exec",
+        "params": {"command": cmd, "session_id": sid},
+    })
+
+    assert "error" in resp
+    assert resp["error"]["code"] == 4018
+    assert "pending-input command" in resp["error"]["message"]
+
+
+def test_command_dispatch_queue_sends_message(server):
+    """command.dispatch /queue returns {type: 'send', message: ...} for the TUI."""
+    sid = "test-session"
+    server._sessions[sid] = {"session_key": sid}
+
+    resp = server.handle_request({
+        "id": "r1",
+        "method": "command.dispatch",
+        "params": {"name": "queue", "arg": "tell me about quantum computing", "session_id": sid},
+    })
+
+    assert "error" not in resp
+    result = resp["result"]
+    assert result["type"] == "send"
+    assert result["message"] == "tell me about quantum computing"
+
+
+def test_command_dispatch_queue_requires_arg(server):
+    """command.dispatch /queue without an argument returns an error."""
+    sid = "test-session"
+    server._sessions[sid] = {"session_key": sid}
+
+    resp = server.handle_request({
+        "id": "r2",
+        "method": "command.dispatch",
+        "params": {"name": "queue", "arg": "", "session_id": sid},
+    })
+
+    assert "error" in resp
+    assert resp["error"]["code"] == 4004
+
+
+def test_command_dispatch_steer_fallback_sends_message(server):
+    """command.dispatch /steer with no active agent falls back to send."""
+    sid = "test-session"
+    server._sessions[sid] = {"session_key": sid, "agent": None}
+
+    resp = server.handle_request({
+        "id": "r3",
+        "method": "command.dispatch",
+        "params": {"name": "steer", "arg": "focus on testing", "session_id": sid},
+    })
+
+    assert "error" not in resp
+    result = resp["result"]
+    assert result["type"] == "send"
+    assert result["message"] == "focus on testing"
+
+
+def test_command_dispatch_retry_finds_last_user_message(server):
+    """command.dispatch /retry walks session['history'] to find the last user message."""
+    sid = "test-session"
+    history = [
+        {"role": "user", "content": "first question"},
+        {"role": "assistant", "content": "first answer"},
+        {"role": "user", "content": "second question"},
+        {"role": "assistant", "content": "second answer"},
+    ]
+    server._sessions[sid] = {
+        "session_key": sid,
+        "agent": None,
+        "history": history,
+        "history_lock": threading.Lock(),
+        "history_version": 0,
+    }
+
+    resp = server.handle_request({
+        "id": "r4",
+        "method": "command.dispatch",
+        "params": {"name": "retry", "session_id": sid},
+    })
+
+    assert "error" not in resp
+    result = resp["result"]
+    assert result["type"] == "send"
+    assert result["message"] == "second question"
+    # Verify history was truncated: everything from last user message onward removed
+    assert len(server._sessions[sid]["history"]) == 2
+    assert server._sessions[sid]["history"][-1]["role"] == "assistant"
+    assert server._sessions[sid]["history_version"] == 1
+
+
+def test_command_dispatch_retry_empty_history(server):
+    """command.dispatch /retry with empty history returns error."""
+    sid = "test-session"
+    server._sessions[sid] = {
+        "session_key": sid,
+        "agent": None,
+        "history": [],
+        "history_lock": threading.Lock(),
+        "history_version": 0,
+    }
+
+    resp = server.handle_request({
+        "id": "r5",
+        "method": "command.dispatch",
+        "params": {"name": "retry", "session_id": sid},
+    })
+
+    assert "error" in resp
+    assert resp["error"]["code"] == 4018
+
+
+def test_command_dispatch_retry_handles_multipart_content(server):
+    """command.dispatch /retry extracts text from multipart content lists."""
+    sid = "test-session"
+    history = [
+        {"role": "user", "content": [
+            {"type": "text", "text": "analyze this"},
+            {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
+        ]},
+        {"role": "assistant", "content": "I see the image."},
+    ]
+    server._sessions[sid] = {
+        "session_key": sid,
+        "agent": None,
+        "history": history,
+        "history_lock": threading.Lock(),
+        "history_version": 0,
+    }
+
+    resp = server.handle_request({
+        "id": "r6",
+        "method": "command.dispatch",
+        "params": {"name": "retry", "session_id": sid},
+    })
+
+    assert "error" not in resp
+    result = resp["result"]
+    assert result["type"] == "send"
+    assert result["message"] == "analyze this"
+
+
+def test_command_dispatch_returns_skill_payload(server):
+    """command.dispatch returns structured skill payload for the TUI to send()."""
+    sid = "test-session"
+    server._sessions[sid] = {"session_key": sid}
+
+    fake_skills = {"/hermes-agent-dev": {"name": "hermes-agent-dev", "description": "Dev workflow"}}
+    fake_msg = "Loaded skill content here"
+
+    with patch("agent.skill_commands.scan_skill_commands", return_value=fake_skills), \
+         patch("agent.skill_commands.build_skill_invocation_message", return_value=fake_msg):
+        resp = server.handle_request({
+            "id": "r2",
+            "method": "command.dispatch",
+            "params": {"name": "hermes-agent-dev", "session_id": sid},
+        })
+
+    assert "error" not in resp
+    result = resp["result"]
+    assert result["type"] == "skill"
+    assert result["message"] == fake_msg
+    assert result["name"] == "hermes-agent-dev"
+
+
+# ── dispatch(): pool routing for long handlers (#12546) ──────────────
+
+
+def test_dispatch_runs_short_handlers_inline(server):
+    """Non-long handlers return their response synchronously from dispatch()."""
+    server._methods["fast.ping"] = lambda rid, params: server._ok(rid, {"pong": True})
+
+    resp = server.dispatch({"id": "r1", "method": "fast.ping", "params": {}})
+
+    assert resp == {"jsonrpc": "2.0", "id": "r1", "result": {"pong": True}}
+
+
+def test_dispatch_offloads_long_handlers_and_emits_via_stdout(capture):
+    """Long handlers run on the pool and write their response via write_json."""
+    server, buf = capture
+    server._methods["slash.exec"] = lambda rid, params: server._ok(rid, {"output": "hi"})
+
+    resp = server.dispatch({"id": "r2", "method": "slash.exec", "params": {}})
+    assert resp is None
+
+    for _ in range(50):
+        if buf.getvalue():
+            break
+        time.sleep(0.01)
+
+    written = json.loads(buf.getvalue())
+    assert written == {"jsonrpc": "2.0", "id": "r2", "result": {"output": "hi"}}
+
+
+def test_dispatch_long_handler_does_not_block_fast_handler(server):
+    """A slow long handler must not prevent a concurrent fast handler from completing."""
+    released = threading.Event()
+    server._methods["slash.exec"] = lambda rid, params: (released.wait(timeout=5), server._ok(rid, {"done": True}))[1]
+    server._methods["fast.ping"] = lambda rid, params: server._ok(rid, {"pong": True})
+
+    t0 = time.monotonic()
+    assert server.dispatch({"id": "slow", "method": "slash.exec", "params": {}}) is None
+
+    fast_resp = server.dispatch({"id": "fast", "method": "fast.ping", "params": {}})
+    fast_elapsed = time.monotonic() - t0
+
+    assert fast_resp["result"] == {"pong": True}
+    assert fast_elapsed < 0.5, f"fast handler blocked for {fast_elapsed:.2f}s behind slow handler"
+
+    released.set()
+
+
+def test_dispatch_long_handler_exception_produces_error_response(capture):
+    """An exception inside a pool-dispatched handler still yields a JSON-RPC error."""
+    server, buf = capture
+
+    def boom(rid, params):
+        raise RuntimeError("kaboom")
+
+    server._methods["slash.exec"] = boom
+
+    server.dispatch({"id": "r3", "method": "slash.exec", "params": {}})
+
+    for _ in range(50):
+        if buf.getvalue():
+            break
+        time.sleep(0.01)
+
+    written = json.loads(buf.getvalue())
+    assert written["id"] == "r3"
+    assert written["error"]["code"] == -32000
+    assert "kaboom" in written["error"]["message"]
+
+
+def test_dispatch_unknown_long_method_still_goes_inline(server):
+    """Method name not in _LONG_HANDLERS takes the sync path even if handler is slow."""
+    server._methods["some.method"] = lambda rid, params: server._ok(rid, {"ok": True})
+
+    resp = server.dispatch({"id": "r4", "method": "some.method", "params": {}})
+
+    assert resp["result"] == {"ok": True}
diff --git a/tools/approval.py b/tools/approval.py
index 7d8c5b032e8..fc344bd77b7 100644
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -532,6 +532,19 @@ def _get_approval_timeout() -> int:
         return 60
 
 
+def _get_cron_approval_mode() -> str:
+    """Read the cron approval mode from config. Returns 'deny' or 'approve'."""
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        mode = str(config.get("approvals", {}).get("cron_mode", "deny")).lower().strip()
+        if mode in ("approve", "off", "allow", "yes"):
+            return "approve"
+        return "deny"
+    except Exception:
+        return "deny"
+
+
 def _smart_approve(command: str, description: str) -> str:
     """Use the auxiliary LLM to assess risk and decide approval.
 
@@ -614,6 +627,19 @@ def check_dangerous_command(command: str, env_type: str,
     is_gateway = os.getenv("HERMES_GATEWAY_SESSION")
 
     if not is_cli and not is_gateway:
+        # Cron sessions: respect cron_mode config
+        if os.getenv("HERMES_CRON_SESSION"):
+            if _get_cron_approval_mode() == "deny":
+                return {
+                    "approved": False,
+                    "message": (
+                        f"BLOCKED: Command flagged as dangerous ({description}) "
+                        "but cron jobs run without a user present to approve it. "
+                        "Find an alternative approach that avoids this command. "
+                        "To allow dangerous commands in cron jobs, set "
+                        "approvals.cron_mode: approve in config.yaml."
+                    ),
+                }
         return {"approved": True, "message": None}
 
     if is_gateway or os.getenv("HERMES_EXEC_ASK"):
@@ -712,6 +738,22 @@ def check_all_command_guards(command: str, env_type: str,
     # Preserve the existing non-interactive behavior: outside CLI/gateway/ask
     # flows, we do not block on approvals and we skip external guard work.
     if not is_cli and not is_gateway and not is_ask:
+        # Cron sessions: respect cron_mode config
+        if os.getenv("HERMES_CRON_SESSION"):
+            if _get_cron_approval_mode() == "deny":
+                # Run detection to get a description for the block message
+                is_dangerous, _pk, description = detect_dangerous_command(command)
+                if is_dangerous:
+                    return {
+                        "approved": False,
+                        "message": (
+                            f"BLOCKED: Command flagged as dangerous ({description}) "
+                            "but cron jobs run without a user present to approve it. "
+                            "Find an alternative approach that avoids this command. "
+                            "To allow dangerous commands in cron jobs, set "
+                            "approvals.cron_mode: approve in config.yaml."
+                        ),
+                    }
         return {"approved": True, "message": None}
 
     # --- Phase 1: Gather findings from both checks ---
diff --git a/tools/browser_cdp_tool.py b/tools/browser_cdp_tool.py
new file mode 100644
index 00000000000..7817b9c35a5
--- /dev/null
+++ b/tools/browser_cdp_tool.py
@@ -0,0 +1,416 @@
+#!/usr/bin/env python3
+"""
+Raw Chrome DevTools Protocol (CDP) passthrough tool.
+
+Exposes a single tool, ``browser_cdp``, that sends arbitrary CDP commands to
+the browser's DevTools WebSocket endpoint.  Works when a CDP URL is
+configured — either via ``/browser connect`` (sets ``BROWSER_CDP_URL``) or
+``browser.cdp_url`` in ``config.yaml`` — or when a CDP-backed cloud provider
+session is active.
+
+This is the escape hatch for browser operations not covered by the main
+browser tool surface (``browser_navigate``, ``browser_click``,
+``browser_console``, etc.) — handling native dialogs, iframe-scoped
+evaluation, cookie/network control, low-level tab management, etc.
+
+Method reference: https://chromedevtools.github.io/devtools-protocol/
+"""
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+from typing import Any, Dict, Optional
+
+from tools.registry import registry, tool_error
+
+logger = logging.getLogger(__name__)
+
+CDP_DOCS_URL = "https://chromedevtools.github.io/devtools-protocol/"
+
+# ``websockets`` is a transitive dependency of hermes-agent (via fal_client
+# and firecrawl-py) and is already imported by gateway/platforms/feishu.py.
+# Wrap the import so a clean error surfaces if the package is ever absent.
+try:
+    import websockets
+    from websockets.exceptions import WebSocketException
+
+    _WS_AVAILABLE = True
+except ImportError:
+    websockets = None  # type: ignore[assignment]
+    WebSocketException = Exception  # type: ignore[assignment,misc]
+    _WS_AVAILABLE = False
+
+
+# ---------------------------------------------------------------------------
+# Async-from-sync bridge (matches the pattern in homeassistant_tool.py)
+# ---------------------------------------------------------------------------
+
+
+def _run_async(coro):
+    """Run an async coroutine from a sync handler, safe inside or outside a loop."""
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = None
+
+    if loop and loop.is_running():
+        import concurrent.futures
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            future = pool.submit(asyncio.run, coro)
+            return future.result()
+    return asyncio.run(coro)
+
+
+# ---------------------------------------------------------------------------
+# Endpoint resolution
+# ---------------------------------------------------------------------------
+
+
+def _resolve_cdp_endpoint() -> str:
+    """Return the normalized CDP WebSocket URL, or empty string if unavailable.
+
+    Delegates to ``tools.browser_tool._get_cdp_override`` so precedence stays
+    consistent with the rest of the browser tool surface:
+
+    1. ``BROWSER_CDP_URL`` env var (live override from ``/browser connect``)
+    2. ``browser.cdp_url`` in ``config.yaml``
+    """
+    try:
+        from tools.browser_tool import _get_cdp_override  # type: ignore[import-not-found]
+
+        return (_get_cdp_override() or "").strip()
+    except Exception as exc:  # pragma: no cover — defensive
+        logger.debug("browser_cdp: failed to resolve CDP endpoint: %s", exc)
+        return ""
+
+
+# ---------------------------------------------------------------------------
+# Core CDP call
+# ---------------------------------------------------------------------------
+
+
+async def _cdp_call(
+    ws_url: str,
+    method: str,
+    params: Dict[str, Any],
+    target_id: Optional[str],
+    timeout: float,
+) -> Dict[str, Any]:
+    """Make a single CDP call, optionally attaching to a target first.
+
+    When ``target_id`` is provided, we call ``Target.attachToTarget`` with
+    ``flatten=True`` to multiplex a page-level session over the same
+    browser-level WebSocket, then send ``method`` with that ``sessionId``.
+    When ``target_id`` is None, ``method`` is sent at browser level — which
+    works for ``Target.*``, ``Browser.*``, ``Storage.*`` and a few other
+    globally-scoped domains.
+    """
+    assert websockets is not None  # guarded by _WS_AVAILABLE at call-site
+
+    async with websockets.connect(
+        ws_url,
+        max_size=None,  # CDP responses (e.g. DOM.getDocument) can be large
+        open_timeout=timeout,
+        close_timeout=5,
+        ping_interval=None,  # CDP server doesn't expect pings
+    ) as ws:
+        next_id = 1
+        session_id: Optional[str] = None
+
+        # --- Step 1: attach to target if requested ---
+        if target_id:
+            attach_id = next_id
+            next_id += 1
+            await ws.send(
+                json.dumps(
+                    {
+                        "id": attach_id,
+                        "method": "Target.attachToTarget",
+                        "params": {"targetId": target_id, "flatten": True},
+                    }
+                )
+            )
+            deadline = asyncio.get_event_loop().time() + timeout
+            while True:
+                remaining = deadline - asyncio.get_event_loop().time()
+                if remaining <= 0:
+                    raise TimeoutError(
+                        f"Timed out attaching to target {target_id}"
+                    )
+                raw = await asyncio.wait_for(ws.recv(), timeout=remaining)
+                msg = json.loads(raw)
+                if msg.get("id") == attach_id:
+                    if "error" in msg:
+                        raise RuntimeError(
+                            f"Target.attachToTarget failed: {msg['error']}"
+                        )
+                    session_id = msg.get("result", {}).get("sessionId")
+                    if not session_id:
+                        raise RuntimeError(
+                            "Target.attachToTarget did not return a sessionId"
+                        )
+                    break
+                # Ignore events (messages without "id") while waiting
+
+        # --- Step 2: dispatch the real method ---
+        call_id = next_id
+        next_id += 1
+        req: Dict[str, Any] = {
+            "id": call_id,
+            "method": method,
+            "params": params or {},
+        }
+        if session_id:
+            req["sessionId"] = session_id
+        await ws.send(json.dumps(req))
+
+        deadline = asyncio.get_event_loop().time() + timeout
+        while True:
+            remaining = deadline - asyncio.get_event_loop().time()
+            if remaining <= 0:
+                raise TimeoutError(
+                    f"Timed out waiting for response to {method}"
+                )
+            raw = await asyncio.wait_for(ws.recv(), timeout=remaining)
+            msg = json.loads(raw)
+            if msg.get("id") == call_id:
+                if "error" in msg:
+                    raise RuntimeError(f"CDP error: {msg['error']}")
+                return msg.get("result", {})
+            # Ignore events / out-of-order responses
+
+
+# ---------------------------------------------------------------------------
+# Public tool function
+# ---------------------------------------------------------------------------
+
+
+def browser_cdp(
+    method: str,
+    params: Optional[Dict[str, Any]] = None,
+    target_id: Optional[str] = None,
+    timeout: float = 30.0,
+    task_id: Optional[str] = None,
+) -> str:
+    """Send a raw CDP command.  See ``CDP_DOCS_URL`` for method documentation.
+
+    Args:
+        method: CDP method name, e.g. ``"Target.getTargets"``.
+        params: Method-specific parameters; defaults to ``{}``.
+        target_id: Optional target/tab ID for page-level methods.  When set,
+            we first attach to the target (``flatten=True``) and send
+            ``method`` with the resulting ``sessionId``.
+        timeout: Seconds to wait for the call to complete.
+        task_id: Unused (tool is stateless) — accepted for uniformity with
+            other browser tools.
+
+    Returns:
+        JSON string ``{"success": True, "method": ..., "result": {...}}`` on
+        success, or ``{"error": "..."}`` on failure.
+    """
+    del task_id  # unused — stateless
+
+    if not method or not isinstance(method, str):
+        return tool_error(
+            "'method' is required (e.g. 'Target.getTargets')",
+            cdp_docs=CDP_DOCS_URL,
+        )
+
+    if not _WS_AVAILABLE:
+        return tool_error(
+            "The 'websockets' Python package is required but not installed. "
+            "Install it with: pip install websockets"
+        )
+
+    endpoint = _resolve_cdp_endpoint()
+    if not endpoint:
+        return tool_error(
+            "No CDP endpoint is available. Run '/browser connect' to attach "
+            "to a running Chrome, or set 'browser.cdp_url' in config.yaml. "
+            "The Camofox backend is REST-only and does not expose CDP.",
+            cdp_docs=CDP_DOCS_URL,
+        )
+
+    if not endpoint.startswith(("ws://", "wss://")):
+        return tool_error(
+            f"CDP endpoint is not a WebSocket URL: {endpoint!r}. "
+            "Expected ws://... or wss://... — the /browser connect "
+            "resolver should have rewritten this. Check that Chrome is "
+            "actually listening on the debug port."
+        )
+
+    call_params: Dict[str, Any] = params or {}
+    if not isinstance(call_params, dict):
+        return tool_error(
+            f"'params' must be an object/dict, got {type(call_params).__name__}"
+        )
+
+    try:
+        safe_timeout = float(timeout) if timeout else 30.0
+    except (TypeError, ValueError):
+        safe_timeout = 30.0
+    safe_timeout = max(1.0, min(safe_timeout, 300.0))
+
+    try:
+        result = _run_async(
+            _cdp_call(endpoint, method, call_params, target_id, safe_timeout)
+        )
+    except asyncio.TimeoutError as exc:
+        return tool_error(
+            f"CDP call timed out after {safe_timeout}s: {exc}",
+            method=method,
+        )
+    except TimeoutError as exc:
+        return tool_error(str(exc), method=method)
+    except RuntimeError as exc:
+        return tool_error(str(exc), method=method)
+    except WebSocketException as exc:
+        return tool_error(
+            f"WebSocket error talking to CDP at {endpoint}: {exc}. The "
+            "browser may have disconnected — try '/browser connect' again.",
+            method=method,
+        )
+    except Exception as exc:  # pragma: no cover — unexpected
+        logger.exception("browser_cdp unexpected error")
+        return tool_error(
+            f"Unexpected error: {type(exc).__name__}: {exc}",
+            method=method,
+        )
+
+    payload: Dict[str, Any] = {
+        "success": True,
+        "method": method,
+        "result": result,
+    }
+    if target_id:
+        payload["target_id"] = target_id
+    return json.dumps(payload, ensure_ascii=False)
+
+
+# ---------------------------------------------------------------------------
+# Registry
+# ---------------------------------------------------------------------------
+
+
+BROWSER_CDP_SCHEMA: Dict[str, Any] = {
+    "name": "browser_cdp",
+    "description": (
+        "Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for "
+        "browser operations not covered by browser_navigate, browser_click, "
+        "browser_console, etc.\n\n"
+        "**Requires a reachable CDP endpoint.** Available when the user has "
+        "run '/browser connect' to attach to a running Chrome, or when "
+        "'browser.cdp_url' is set in config.yaml. Not currently wired up for "
+        "cloud backends (Browserbase, Browser Use, Firecrawl) — those expose "
+        "CDP per session but live-session routing is a follow-up. Camofox is "
+        "REST-only and will never support CDP. If the tool is in your toolset "
+        "at all, a CDP endpoint is already reachable.\n\n"
+        f"**CDP method reference:** {CDP_DOCS_URL} — use web_extract on a "
+        "method's URL (e.g. '/tot/Page/#method-handleJavaScriptDialog') "
+        "to look up parameters and return shape.\n\n"
+        "**Common patterns:**\n"
+        "- List tabs: method='Target.getTargets', params={}\n"
+        "- Handle a native JS dialog: method='Page.handleJavaScriptDialog', "
+        "params={'accept': true, 'promptText': ''}, target_id=<tabId>\n"
+        "- Get all cookies: method='Network.getAllCookies', params={}\n"
+        "- Eval in a specific tab: method='Runtime.evaluate', "
+        "params={'expression': '...', 'returnByValue': true}, "
+        "target_id=<tabId>\n"
+        "- Set viewport for a tab: method='Emulation.setDeviceMetricsOverride', "
+        "params={'width': 1280, 'height': 720, 'deviceScaleFactor': 1, "
+        "'mobile': false}, target_id=<tabId>\n\n"
+        "**Usage rules:**\n"
+        "- Browser-level methods (Target.*, Browser.*, Storage.*): omit "
+        "target_id.\n"
+        "- Page-level methods (Page.*, Runtime.*, DOM.*, Emulation.*, "
+        "Network.* scoped to a tab): pass target_id from Target.getTargets.\n"
+        "- Each call is independent — sessions and event subscriptions do "
+        "not persist between calls. For stateful workflows, prefer the "
+        "dedicated browser tools."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "method": {
+                "type": "string",
+                "description": (
+                    "CDP method name, e.g. 'Target.getTargets', "
+                    "'Runtime.evaluate', 'Page.handleJavaScriptDialog'."
+                ),
+            },
+            "params": {
+                "type": "object",
+                "description": (
+                    "Method-specific parameters as a JSON object. Omit or "
+                    "pass {} for methods that take no parameters."
+                ),
+                "additionalProperties": True,
+            },
+            "target_id": {
+                "type": "string",
+                "description": (
+                    "Optional. Target/tab ID from Target.getTargets result "
+                    "(each entry's 'targetId'). Required for page-level "
+                    "methods; must be omitted for browser-level methods."
+                ),
+            },
+            "timeout": {
+                "type": "number",
+                "description": (
+                    "Timeout in seconds (default 30, max 300)."
+                ),
+                "default": 30,
+            },
+        },
+        "required": ["method"],
+    },
+}
+
+
+def _browser_cdp_check() -> bool:
+    """Availability check for browser_cdp.
+
+    The tool is only offered when the Python side can actually reach a CDP
+    endpoint right now — meaning a static URL is set via ``/browser connect``
+    (``BROWSER_CDP_URL``) or ``browser.cdp_url`` in ``config.yaml``.
+
+    Backends that do *not* currently expose CDP to us — Camofox (REST-only),
+    the default local agent-browser mode (Playwright hides its internal CDP
+    port), and cloud providers whose per-session ``cdp_url`` is not yet
+    surfaced — are gated out so the model doesn't see a tool that would
+    reliably fail.  Cloud-provider CDP routing is a follow-up.
+
+    Kept in a thin wrapper so the registration statement stays at module top
+    level (the tool-discovery AST scan only picks up top-level
+    ``registry.register(...)`` calls).
+    """
+    try:
+        from tools.browser_tool import (  # type: ignore[import-not-found]
+            _get_cdp_override,
+            check_browser_requirements,
+        )
+    except ImportError as exc:  # pragma: no cover — defensive
+        logger.debug("browser_cdp check: browser_tool import failed: %s", exc)
+        return False
+    if not check_browser_requirements():
+        return False
+    return bool(_get_cdp_override())
+
+
+registry.register(
+    name="browser_cdp",
+    toolset="browser",
+    schema=BROWSER_CDP_SCHEMA,
+    handler=lambda args, **kw: browser_cdp(
+        method=args.get("method", ""),
+        params=args.get("params"),
+        target_id=args.get("target_id"),
+        timeout=args.get("timeout", 30.0),
+        task_id=kw.get("task_id"),
+    ),
+    check_fn=_browser_cdp_check,
+    emoji="🧪",
+)
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index 3e7e3f925b9..c5a89488a08 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -29,6 +29,7 @@ Remote execution additionally requires Python 3 in the terminal backend.
 """
 
 import base64
+import functools
 import json
 import logging
 import os
@@ -1022,10 +1023,15 @@ def execute_code(
         child_env["HERMES_RPC_SOCKET"] = sock_path
         child_env["PYTHONDONTWRITEBYTECODE"] = "1"
         # Ensure the hermes-agent root is importable in the sandbox so
-        # repo-root modules are available to child scripts.
+        # repo-root modules are available to child scripts.  We also prepend
+        # the staging tmpdir so ``from hermes_tools import ...`` resolves even
+        # when the subprocess CWD is not tmpdir (project mode).
         _hermes_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
         _existing_pp = child_env.get("PYTHONPATH", "")
-        child_env["PYTHONPATH"] = _hermes_root + (os.pathsep + _existing_pp if _existing_pp else "")
+        _pp_parts = [tmpdir, _hermes_root]
+        if _existing_pp:
+            _pp_parts.append(_existing_pp)
+        child_env["PYTHONPATH"] = os.pathsep.join(_pp_parts)
         # Inject user's configured timezone so datetime.now() in sandboxed
         # code reflects the correct wall-clock time.  Only TZ is set —
         # HERMES_TIMEZONE is an internal Hermes setting and must not leak
@@ -1042,9 +1048,19 @@ def execute_code(
         if _profile_home:
             child_env["HOME"] = _profile_home
 
+        # Resolve interpreter + CWD based on execute_code mode.
+        #   - strict : today's behavior (sys.executable + tmpdir CWD).
+        #   - project: user's venv python + session's working directory, so
+        #              project deps like pandas and user files resolve.
+        # Env scrubbing and tool whitelist apply identically in both modes.
+        _mode = _get_execution_mode()
+        _child_python = _resolve_child_python(_mode)
+        _child_cwd = _resolve_child_cwd(_mode, tmpdir)
+        _script_path = os.path.join(tmpdir, "script.py")
+
         proc = subprocess.Popen(
-            [sys.executable, "script.py"],
-            cwd=tmpdir,
+            [_child_python, _script_path],
+            cwd=_child_cwd,
             env=child_env,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
@@ -1299,6 +1315,127 @@ def _load_config() -> dict:
         return {}
 
 
+# ---------------------------------------------------------------------------
+# Execution mode resolution (strict vs project)
+# ---------------------------------------------------------------------------
+
+# Valid values for code_execution.mode. Kept as a module constant so tests
+# and the config layer can reference the canonical set.
+EXECUTION_MODES = ("project", "strict")
+DEFAULT_EXECUTION_MODE = "project"
+
+
+def _get_execution_mode() -> str:
+    """Return the active execute_code mode — 'project' or 'strict'.
+
+    Reads ``code_execution.mode`` from config.yaml; invalid values fall back
+    to ``DEFAULT_EXECUTION_MODE`` ('project') with a log warning.
+
+    Mode semantics:
+      - ``project`` (default): scripts run in the session's working directory
+        with the active virtual environment's python, so project dependencies
+        (pandas, torch, project packages) and files resolve naturally.
+      - ``strict``: scripts run in an isolated temp directory with
+        ``sys.executable`` (hermes-agent's python). Reproducible and the
+        interpreter is guaranteed to work, but project deps and relative paths
+        won't resolve.
+
+    Env scrubbing and tool whitelist apply identically in both modes.
+    """
+    cfg_value = str(_load_config().get("mode", DEFAULT_EXECUTION_MODE)).strip().lower()
+    if cfg_value in EXECUTION_MODES:
+        return cfg_value
+    logger.warning(
+        "Ignoring code_execution.mode=%r (expected one of %s), falling back to %r",
+        cfg_value, EXECUTION_MODES, DEFAULT_EXECUTION_MODE,
+    )
+    return DEFAULT_EXECUTION_MODE
+
+
+@functools.lru_cache(maxsize=32)
+def _is_usable_python(python_path: str) -> bool:
+    """Check whether a candidate Python interpreter is usable for execute_code.
+
+    Requires Python 3.8+ (f-strings and stdlib modules the RPC stubs need).
+    Cached so we don't fork a subprocess on every execute_code call.
+    """
+    try:
+        result = subprocess.run(
+            [python_path, "-c",
+             "import sys; sys.exit(0 if sys.version_info >= (3, 8) else 1)"],
+            timeout=5,
+            capture_output=True,
+        )
+        return result.returncode == 0
+    except (OSError, subprocess.TimeoutExpired, subprocess.SubprocessError):
+        return False
+
+
+def _resolve_child_python(mode: str) -> str:
+    """Pick the Python interpreter for the execute_code subprocess.
+
+    In ``strict`` mode, always ``sys.executable`` — guaranteed to work and
+    keeps behavior fully reproducible across sessions.
+
+    In ``project`` mode, prefer the user's active virtualenv/conda env's
+    python so ``import pandas`` etc. work. Falls back to ``sys.executable``
+    if no venv is detected, the candidate binary is missing/not executable,
+    or it fails a Python 3.8+ version check.
+    """
+    if mode != "project":
+        return sys.executable
+
+    if _IS_WINDOWS:
+        exe_names = ("python.exe", "python3.exe")
+        subdirs = ("Scripts",)
+    else:
+        exe_names = ("python", "python3")
+        subdirs = ("bin",)
+
+    for var in ("VIRTUAL_ENV", "CONDA_PREFIX"):
+        root = os.environ.get(var, "").strip()
+        if not root:
+            continue
+        for subdir in subdirs:
+            for exe in exe_names:
+                candidate = os.path.join(root, subdir, exe)
+                if not (os.path.isfile(candidate) and os.access(candidate, os.X_OK)):
+                    continue
+                if _is_usable_python(candidate):
+                    return candidate
+                # Found the interpreter but it failed the version check —
+                # log once and fall through to sys.executable.
+                logger.info(
+                    "execute_code: skipping %s=%s (Python version < 3.8 or broken). "
+                    "Using sys.executable instead.", var, candidate,
+                )
+                return sys.executable
+
+    return sys.executable
+
+
+def _resolve_child_cwd(mode: str, staging_dir: str) -> str:
+    """Resolve the working directory for the execute_code subprocess.
+
+    - ``strict``: the staging tmpdir (today's behavior).
+    - ``project``: the session's TERMINAL_CWD (same as the terminal tool), or
+      ``os.getcwd()`` if TERMINAL_CWD is unset or doesn't point at a real dir.
+      Falls back to the staging tmpdir as a last resort so we never invoke
+      Popen with a nonexistent cwd.
+    """
+    if mode != "project":
+        return staging_dir
+    raw = os.environ.get("TERMINAL_CWD", "").strip()
+    if raw:
+        expanded = os.path.expanduser(raw)
+        if os.path.isdir(expanded):
+            return expanded
+    here = os.getcwd()
+    if os.path.isdir(here):
+        return here
+    return staging_dir
+
+
 # ---------------------------------------------------------------------------
 # OpenAI Function-Calling Schema
 # ---------------------------------------------------------------------------
@@ -1330,15 +1467,24 @@ _TOOL_DOC_LINES = [
 ]
 
 
-def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
+def build_execute_code_schema(enabled_sandbox_tools: set = None,
+                              mode: str = None) -> dict:
     """Build the execute_code schema with description listing only enabled tools.
 
     When tools are disabled via ``hermes tools`` (e.g. web is turned off),
     the schema description should NOT mention web_search / web_extract —
     otherwise the model thinks they are available and keeps trying to use them.
+
+    ``mode`` controls the working-directory sentence in the description:
+      - ``'strict'``: scripts run in a temp dir (not the session's CWD)
+      - ``'project'`` (default): scripts run in the session's CWD with the
+        active venv's python
+    If ``mode`` is None, the current ``code_execution.mode`` config is read.
     """
     if enabled_sandbox_tools is None:
         enabled_sandbox_tools = SANDBOX_ALLOWED_TOOLS
+    if mode is None:
+        mode = _get_execution_mode()
 
     # Build tool documentation lines for only the enabled tools
     tool_lines = "\n".join(
@@ -1354,6 +1500,20 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
     else:
         import_str = "..."
 
+    # Mode-specific CWD guidance. Project mode is the default and matches
+    # terminal()'s filesystem/interpreter; strict mode retains the isolated
+    # temp-dir staging and hermes-agent's own python.
+    if mode == "strict":
+        cwd_note = (
+            "Scripts run in their own temp dir, not the session's CWD — use absolute paths "
+            "(os.path.expanduser('~/.hermes/.env')) or terminal()/read_file() for user files."
+        )
+    else:
+        cwd_note = (
+            "Scripts run in the session's working directory with the active venv's python, "
+            "so project deps (pandas, etc.) and relative paths work like in terminal()."
+        )
+
     description = (
         "Run a Python script that can call Hermes tools programmatically. "
         "Use this when you need 3+ tool calls with processing logic between them, "
@@ -1367,6 +1527,7 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
         f"{tool_lines}\n\n"
         "Limits: 5-minute timeout, 50KB stdout cap, max 50 tool calls per script. "
         "terminal() is foreground-only (no background or pty).\n\n"
+        f"{cwd_note}\n\n"
         "Print your final result to stdout. Use Python stdlib (json, re, math, csv, "
         "datetime, collections, etc.) for processing between tool calls.\n\n"
         "Also available (no import needed — built into hermes_tools):\n"
@@ -1395,7 +1556,8 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
     }
 
 
-# Default schema used at registration time (all sandbox tools listed)
+# Default schema used at registration time (all sandbox tools listed,
+# current configured mode).  model_tools.py rebuilds per-session anyway.
 EXECUTE_CODE_SCHEMA = build_execute_code_schema()
 
 
diff --git a/tools/environments/base.py b/tools/environments/base.py
index 8e990792369..1bc08449e49 100644
--- a/tools/environments/base.py
+++ b/tools/environments/base.py
@@ -23,6 +23,19 @@ from tools.interrupt import is_interrupted
 
 logger = logging.getLogger(__name__)
 
+# Opt-in debug tracing for the interrupt/activity/poll machinery.  Set
+# HERMES_DEBUG_INTERRUPT=1 to log loop entry/exit, periodic heartbeats, and
+# every is_interrupted() state change from _wait_for_process.  Off by default
+# to avoid flooding production gateway logs.
+_DEBUG_INTERRUPT = bool(os.getenv("HERMES_DEBUG_INTERRUPT"))
+
+if _DEBUG_INTERRUPT:
+    # AIAgent's quiet_mode path (run_agent.py) forces the `tools` logger to
+    # ERROR on CLI startup, which would silently swallow every trace we emit.
+    # Force this module's own logger back to INFO so the trace is visible in
+    # agent.log regardless of quiet-mode.  Scoped to the opt-in case only.
+    logger.setLevel(logging.INFO)
+
 # Thread-local activity callback.  The agent sets this before a tool call so
 # long-running _wait_for_process loops can report liveness to the gateway.
 _activity_callback_local = threading.local()
@@ -413,6 +426,13 @@ class BaseEnvironment(ABC):
         Fires the ``activity_callback`` (if set on this instance) every 10s
         while the process is running so the gateway's inactivity timeout
         doesn't kill long-running commands.
+
+        Also wraps the poll loop in a ``try/finally`` that guarantees we
+        call ``self._kill_process(proc)`` if we exit via ``KeyboardInterrupt``
+        or ``SystemExit``.  Without this, the local backend (which spawns
+        subprocesses with ``os.setsid`` into their own process group) leaves
+        an orphan with ``PPID=1`` when python is shut down mid-tool — the
+        ``sleep 300``-survives-30-min bug Physikal and I both hit.
         """
         output_chunks: list[str] = []
 
@@ -437,28 +457,101 @@ class BaseEnvironment(ABC):
             "start": _now,
         }
 
-        while proc.poll() is None:
-            if is_interrupted():
+        # --- Debug tracing (opt-in via HERMES_DEBUG_INTERRUPT=1) -------------
+        # Captures loop entry/exit, interrupt state changes, and periodic
+        # heartbeats so we can diagnose "agent never sees the interrupt"
+        # reports without reproducing locally.
+        _tid = threading.current_thread().ident
+        _pid = getattr(proc, "pid", None)
+        _iter_count = 0
+        _last_heartbeat = _now
+        _last_interrupt_state = False
+        _cb_was_none = _get_activity_callback() is None
+        if _DEBUG_INTERRUPT:
+            logger.info(
+                "[interrupt-debug] _wait_for_process ENTER tid=%s pid=%s "
+                "timeout=%ss activity_cb=%s initial_interrupt=%s",
+                _tid, _pid, timeout,
+                "set" if not _cb_was_none else "MISSING",
+                is_interrupted(),
+            )
+
+        try:
+            while proc.poll() is None:
+                _iter_count += 1
+                if is_interrupted():
+                    if _DEBUG_INTERRUPT:
+                        logger.info(
+                            "[interrupt-debug] _wait_for_process INTERRUPT DETECTED "
+                            "tid=%s pid=%s iter=%d elapsed=%.1fs — killing process group",
+                            _tid, _pid, _iter_count, time.monotonic() - _activity_state["start"],
+                        )
+                    self._kill_process(proc)
+                    drain_thread.join(timeout=2)
+                    return {
+                        "output": "".join(output_chunks) + "\n[Command interrupted]",
+                        "returncode": 130,
+                    }
+                if time.monotonic() > deadline:
+                    if _DEBUG_INTERRUPT:
+                        logger.info(
+                            "[interrupt-debug] _wait_for_process TIMEOUT "
+                            "tid=%s pid=%s iter=%d timeout=%ss",
+                            _tid, _pid, _iter_count, timeout,
+                        )
+                    self._kill_process(proc)
+                    drain_thread.join(timeout=2)
+                    partial = "".join(output_chunks)
+                    timeout_msg = f"\n[Command timed out after {timeout}s]"
+                    return {
+                        "output": partial + timeout_msg
+                        if partial
+                        else timeout_msg.lstrip(),
+                        "returncode": 124,
+                    }
+                # Periodic activity touch so the gateway knows we're alive
+                touch_activity_if_due(_activity_state, "terminal command running")
+
+                # Heartbeat every ~30s: proves the loop is alive and reports
+                # the activity-callback state (thread-local, can get clobbered
+                # by nested tool calls or executor thread reuse).
+                if _DEBUG_INTERRUPT and time.monotonic() - _last_heartbeat >= 30.0:
+                    _cb_now_none = _get_activity_callback() is None
+                    logger.info(
+                        "[interrupt-debug] _wait_for_process HEARTBEAT "
+                        "tid=%s pid=%s iter=%d elapsed=%.0fs "
+                        "interrupt=%s activity_cb=%s%s",
+                        _tid, _pid, _iter_count,
+                        time.monotonic() - _activity_state["start"],
+                        is_interrupted(),
+                        "set" if not _cb_now_none else "MISSING",
+                        " (LOST during run)" if _cb_now_none and not _cb_was_none else "",
+                    )
+                    _last_heartbeat = time.monotonic()
+                    _cb_was_none = _cb_now_none
+
+                time.sleep(0.2)
+        except (KeyboardInterrupt, SystemExit):
+            # Signal arrived (SIGTERM/SIGHUP/SIGINT) or sys.exit() was called
+            # while we were polling.  The local backend spawns subprocesses
+            # with os.setsid, which puts them in their own process group — so
+            # if we let the interrupt propagate without killing the child,
+            # python exits and the child is reparented to init (PPID=1) and
+            # keeps running as an orphan.  Killing the process group here
+            # guarantees the tool's side effects stop when the agent stops.
+            if _DEBUG_INTERRUPT:
+                logger.info(
+                    "[interrupt-debug] _wait_for_process EXCEPTION_EXIT "
+                    "tid=%s pid=%s iter=%d elapsed=%.1fs — killing subprocess group before re-raise",
+                    _tid, _pid, _iter_count,
+                    time.monotonic() - _activity_state["start"],
+                )
+            try:
                 self._kill_process(proc)
                 drain_thread.join(timeout=2)
-                return {
-                    "output": "".join(output_chunks) + "\n[Command interrupted]",
-                    "returncode": 130,
-                }
-            if time.monotonic() > deadline:
-                self._kill_process(proc)
-                drain_thread.join(timeout=2)
-                partial = "".join(output_chunks)
-                timeout_msg = f"\n[Command timed out after {timeout}s]"
-                return {
-                    "output": partial + timeout_msg
-                    if partial
-                    else timeout_msg.lstrip(),
-                    "returncode": 124,
-                }
-            # Periodic activity touch so the gateway knows we're alive
-            touch_activity_if_due(_activity_state, "terminal command running")
-            time.sleep(0.2)
+            except Exception:
+                pass  # cleanup is best-effort
+            raise
 
         drain_thread.join(timeout=5)
 
@@ -467,6 +560,15 @@ class BaseEnvironment(ABC):
         except Exception:
             pass
 
+        if _DEBUG_INTERRUPT:
+            logger.info(
+                "[interrupt-debug] _wait_for_process EXIT (natural) "
+                "tid=%s pid=%s iter=%d elapsed=%.1fs returncode=%s",
+                _tid, _pid, _iter_count,
+                time.monotonic() - _activity_state["start"],
+                proc.returncode,
+            )
+
         return {"output": "".join(output_chunks), "returncode": proc.returncode}
 
     def _kill_process(self, proc: ProcessHandle):
diff --git a/tools/interrupt.py b/tools/interrupt.py
index 9bc8b83ae4f..ac784332f91 100644
--- a/tools/interrupt.py
+++ b/tools/interrupt.py
@@ -14,8 +14,23 @@ Usage in tools:
         return {"output": "[interrupted]", "returncode": 130}
 """
 
+import logging
+import os
 import threading
 
+logger = logging.getLogger(__name__)
+
+# Opt-in debug tracing — pairs with HERMES_DEBUG_INTERRUPT in
+# tools/environments/base.py.  Enables per-call logging of set/check so the
+# caller thread, target thread, and current state are visible when
+# diagnosing "interrupt signaled but tool never saw it" reports.
+_DEBUG_INTERRUPT = bool(os.getenv("HERMES_DEBUG_INTERRUPT"))
+
+if _DEBUG_INTERRUPT:
+    # AIAgent's quiet_mode path forces `tools` logger to ERROR on CLI startup.
+    # Force our own logger back to INFO so the trace is visible in agent.log.
+    logger.setLevel(logging.INFO)
+
 # Set of thread idents that have been interrupted.
 _interrupted_threads: set[int] = set()
 _lock = threading.Lock()
@@ -35,6 +50,13 @@ def set_interrupt(active: bool, thread_id: int | None = None) -> None:
             _interrupted_threads.add(tid)
         else:
             _interrupted_threads.discard(tid)
+        _snapshot = set(_interrupted_threads) if _DEBUG_INTERRUPT else None
+    if _DEBUG_INTERRUPT:
+        logger.info(
+            "[interrupt-debug] set_interrupt(active=%s, target_tid=%s) "
+            "called_from_tid=%s current_set=%s",
+            active, tid, threading.current_thread().ident, _snapshot,
+        )
 
 
 def is_interrupted() -> bool:
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 69832cc1c7a..1182207b84c 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -1126,7 +1126,7 @@ def terminal_tool(
         workdir: Working directory for this command (optional, uses session cwd if not set)
         pty: If True, use pseudo-terminal for interactive CLI tools (local backend only)
         notify_on_complete: If True and background=True, auto-notify the agent when the process exits
-        watch_patterns: List of strings to watch for in background output; triggers notification on match
+        watch_patterns: List of strings to watch for in background output; fires a notification on first match per pattern. Use ONLY for mid-process signals (errors, readiness markers) that appear before exit. For end-of-run markers use notify_on_complete instead — stacking both produces duplicate, delayed notifications.
 
     Returns:
         str: JSON string with output, exit_code, and error fields
@@ -1724,7 +1724,7 @@ TERMINAL_SCHEMA = {
             "watch_patterns": {
                 "type": "array",
                 "items": {"type": "string"},
-                "description": "List of strings to watch for in background process output. When any pattern matches a line of output, you'll be notified with the matching text — like notify_on_complete but triggers mid-process on specific output. Use for monitoring logs, watching for errors, or waiting for specific events (e.g. [\"ERROR\", \"FAIL\", \"listening on port\"])."
+                "description": "Strings to watch for in background process output. Fires a notification the first time each pattern matches a line of output. **Use ONLY for mid-process signals** you want to react to before the process exits — errors, readiness markers, intermediate step markers (e.g. [\"ERROR\", \"Traceback\", \"listening on port\"]). Do NOT use for end-of-run markers (summary headers, 'DONE', 'PASS' printed right before exit) — use `notify_on_complete` for that instead. Stacking end-of-run patterns on top of `notify_on_complete` produces duplicate, delayed notifications that arrive after you've already moved on, since delivery is asynchronous and continues after the process exits."
             }
         },
         "required": ["command"]
diff --git a/tools/voice_mode.py b/tools/voice_mode.py
index 50515fc6903..66ecb242c67 100644
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -15,6 +15,7 @@ import platform
 import re
 import shutil
 import subprocess
+import sys
 import tempfile
 import threading
 import time
@@ -582,8 +583,7 @@ class AudioRecorder:
         except (ImportError, OSError) as e:
             raise RuntimeError(
                 "Voice mode requires sounddevice and numpy.\n"
-                "Install with: pip install sounddevice numpy\n"
-                "Or: pip install hermes-agent[voice]"
+                f"Install with: {sys.executable} -m pip install sounddevice numpy"
             ) from e
 
         with self._lock:
diff --git a/toolsets.py b/toolsets.py
index 6ac8d0782d6..d9f353e1f20 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -43,7 +43,7 @@ _HERMES_CORE_TOOLS = [
     "browser_navigate", "browser_snapshot", "browser_click",
     "browser_type", "browser_scroll", "browser_back",
     "browser_press", "browser_get_images",
-    "browser_vision", "browser_console",
+    "browser_vision", "browser_console", "browser_cdp",
     # Text-to-speech
     "text_to_speech",
     # Planning & memory
@@ -115,7 +115,7 @@ TOOLSETS = {
             "browser_navigate", "browser_snapshot", "browser_click",
             "browser_type", "browser_scroll", "browser_back",
             "browser_press", "browser_get_images",
-            "browser_vision", "browser_console", "web_search"
+            "browser_vision", "browser_console", "browser_cdp", "web_search"
         ],
         "includes": []
     },
@@ -249,7 +249,7 @@ TOOLSETS = {
             "browser_navigate", "browser_snapshot", "browser_click",
             "browser_type", "browser_scroll", "browser_back",
             "browser_press", "browser_get_images",
-            "browser_vision", "browser_console",
+            "browser_vision", "browser_console", "browser_cdp",
             "todo", "memory",
             "session_search",
             "execute_code", "delegate_task",
@@ -274,7 +274,7 @@ TOOLSETS = {
             "browser_navigate", "browser_snapshot", "browser_click",
             "browser_type", "browser_scroll", "browser_back",
             "browser_press", "browser_get_images",
-            "browser_vision", "browser_console",
+            "browser_vision", "browser_console", "browser_cdp",
             # Planning & memory
             "todo", "memory",
             # Session history search
diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py
index a9667528de4..d2b82b9dab2 100644
--- a/tui_gateway/entry.py
+++ b/tui_gateway/entry.py
@@ -2,7 +2,7 @@ import json
 import signal
 import sys
 
-from tui_gateway.server import handle_request, resolve_skin, write_json
+from tui_gateway.server import dispatch, resolve_skin, write_json
 
 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
 signal.signal(signal.SIGINT, signal.SIG_IGN)
@@ -28,7 +28,7 @@ def main():
                 sys.exit(0)
             continue
 
-        resp = handle_request(req)
+        resp = dispatch(req)
         if resp is not None:
             if not write_json(resp):
                 sys.exit(0)
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 3ef76a0f02e..3a48e381e8c 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1,4 +1,5 @@
 import atexit
+import concurrent.futures
 import copy
 import json
 import os
@@ -27,7 +28,7 @@ from tui_gateway.render import make_stream_renderer, render_diff, render_message
 
 _sessions: dict[str, dict] = {}
 _methods: dict[str, callable] = {}
-_pending: dict[str, threading.Event] = {}
+_pending: dict[str, tuple[str, threading.Event]] = {}
 _answers: dict[str, str] = {}
 _db = None
 _stdout_lock = threading.Lock()
@@ -36,6 +37,23 @@ _cfg_cache: dict | None = None
 _cfg_mtime: float | None = None
 _SLASH_WORKER_TIMEOUT_S = max(5.0, float(os.environ.get("HERMES_TUI_SLASH_TIMEOUT_S", "45") or 45))
 
+# ── Async RPC dispatch (#12546) ──────────────────────────────────────
+# A handful of handlers block the dispatcher loop in entry.py for seconds
+# to minutes (slash.exec, cli.exec, shell.exec, session.resume,
+# session.branch). While they're running, inbound RPCs — notably
+# approval.respond and session.interrupt — sit unread in the stdin pipe.
+# We route only those slow handlers onto a small thread pool; everything
+# else stays on the main thread so ordering stays sane for the fast path.
+# write_json is already _stdout_lock-guarded, so concurrent response
+# writes are safe.
+_LONG_HANDLERS = frozenset({"cli.exec", "session.branch", "session.resume", "shell.exec", "slash.exec"})
+
+_pool = concurrent.futures.ThreadPoolExecutor(
+    max_workers=max(2, int(os.environ.get("HERMES_TUI_RPC_POOL_WORKERS", "4") or 4)),
+    thread_name_prefix="tui-rpc",
+)
+atexit.register(lambda: _pool.shutdown(wait=False, cancel_futures=True))
+
 # Reserve real stdout for JSON-RPC only; redirect Python's stdout to stderr
 # so stray print() from libraries/tools becomes harmless gateway.stderr instead
 # of corrupting the JSON protocol.
@@ -200,6 +218,29 @@ def handle_request(req: dict) -> dict | None:
     return fn(req.get("id"), req.get("params", {}))
 
 
+def dispatch(req: dict) -> dict | None:
+    """Route inbound RPCs — long handlers to the pool, everything else inline.
+
+    Returns a response dict when handled inline. Returns None when the
+    handler was scheduled on the pool; the worker writes its own
+    response via write_json when done.
+    """
+    if req.get("method") not in _LONG_HANDLERS:
+        return handle_request(req)
+
+    def run():
+        try:
+            resp = handle_request(req)
+        except Exception as exc:
+            resp = _err(req.get("id"), -32000, f"handler error: {exc}")
+        if resp is not None:
+            write_json(resp)
+
+    _pool.submit(run)
+
+    return None
+
+
 def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
     ready = session.get("agent_ready")
     if ready is not None and not ready.wait(timeout=timeout):
@@ -296,7 +337,7 @@ def _enable_gateway_prompts() -> None:
 def _block(event: str, sid: str, payload: dict, timeout: int = 300) -> str:
     rid = uuid.uuid4().hex[:8]
     ev = threading.Event()
-    _pending[rid] = ev
+    _pending[rid] = (sid, ev)
     payload["request_id"] = rid
     _emit(event, sid, payload)
     ev.wait(timeout=timeout)
@@ -304,10 +345,19 @@ def _block(event: str, sid: str, payload: dict, timeout: int = 300) -> str:
     return _answers.pop(rid, "")
 
 
-def _clear_pending():
-    for rid, ev in list(_pending.items()):
-        _answers[rid] = ""
-        ev.set()
+def _clear_pending(sid: str | None = None) -> None:
+    """Release pending prompts with an empty answer.
+
+    When *sid* is provided, only prompts owned by that session are
+    released — critical for session.interrupt, which must not
+    collaterally cancel clarify/sudo/secret prompts on unrelated
+    sessions sharing the same tui_gateway process.  When *sid* is
+    None, every pending prompt is released (used during shutdown).
+    """
+    for rid, (owner_sid, ev) in list(_pending.items()):
+        if sid is None or owner_sid == sid:
+            _answers[rid] = ""
+            ev.set()
 
 
 # ── Agent factory ────────────────────────────────────────────────────
@@ -588,6 +638,11 @@ def _session_info(agent) -> dict:
         info["skills"] = get_available_skills()
     except Exception:
         pass
+    try:
+        from tools.mcp_tool import get_mcp_status
+        info["mcp_servers"] = get_mcp_status()
+    except Exception:
+        info["mcp_servers"] = []
     try:
         from hermes_cli.banner import get_update_result
         from hermes_cli.config import recommended_update_command
@@ -1074,7 +1129,23 @@ def _(rid, params: dict) -> dict:
     }
 
     def _build() -> None:
-        session = _sessions[sid]
+        session = _sessions.get(sid)
+        if session is None:
+            # session.close ran before the build thread got scheduled.
+            ready.set()
+            return
+
+        # Track what we allocate so we can clean up if session.close
+        # races us to the finish line.  session.close pops _sessions[sid]
+        # unconditionally and tries to close the slash_worker it finds;
+        # if _build is still mid-construction when close runs, close
+        # finds slash_worker=None / notify unregistered and returns
+        # cleanly — leaving us, the build thread, to later install the
+        # worker + notify on an orphaned session dict.  The finally
+        # block below detects the orphan and cleans up instead of
+        # leaking a subprocess and a global notify registration.
+        worker = None
+        notify_registered = False
         try:
             tokens = _set_session_context(key)
             try:
@@ -1086,13 +1157,15 @@ def _(rid, params: dict) -> dict:
             session["agent"] = agent
 
             try:
-                session["slash_worker"] = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
+                worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
+                session["slash_worker"] = worker
             except Exception:
                 pass
 
             try:
                 from tools.approval import register_gateway_notify, load_permanent_allowlist
                 register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
+                notify_registered = True
                 load_permanent_allowlist()
             except Exception:
                 pass
@@ -1108,6 +1181,23 @@ def _(rid, params: dict) -> dict:
             session["agent_error"] = str(e)
             _emit("error", sid, {"message": f"agent init failed: {e}"})
         finally:
+            # Orphan check: if session.close raced us and popped
+            # _sessions[sid] while we were building, the dict we just
+            # populated is unreachable.  Clean up the subprocess and
+            # the global notify registration ourselves — session.close
+            # couldn't see them at the time it ran.
+            if _sessions.get(sid) is not session:
+                if worker is not None:
+                    try:
+                        worker.close()
+                    except Exception:
+                        pass
+                if notify_registered:
+                    try:
+                        from tools.approval import unregister_gateway_notify
+                        unregister_gateway_notify(key)
+                    except Exception:
+                        pass
             ready.set()
 
     threading.Thread(target=_build, daemon=True).start()
@@ -1219,6 +1309,13 @@ def _(rid, params: dict) -> dict:
     session, err = _sess(params, rid)
     if err:
         return err
+    # Reject during an in-flight turn.  If we mutated history while
+    # the agent thread is running, prompt.submit's post-run history
+    # write would either clobber the undo (version matches) or
+    # silently drop the agent's output (version mismatch, see below).
+    # Neither is what the user wants — make them /interrupt first.
+    if session.get("running"):
+        return _err(rid, 4009, "session busy — /interrupt the current turn before /undo")
     removed = 0
     with session["history_lock"]:
         history = session.get("history", [])
@@ -1238,6 +1335,8 @@ def _(rid, params: dict) -> dict:
     session, err = _sess(params, rid)
     if err:
         return err
+    if session.get("running"):
+        return _err(rid, 4009, "session busy — /interrupt the current turn before /compress")
     try:
         with session["history_lock"]:
             removed, usage = _compress_session_history(session, str(params.get("focus_topic", "") or "").strip())
@@ -1331,7 +1430,11 @@ def _(rid, params: dict) -> dict:
         return err
     if hasattr(session["agent"], "interrupt"):
         session["agent"].interrupt()
-    _clear_pending()
+    # Scope the pending-prompt release to THIS session.  A global
+    # _clear_pending() would collaterally cancel clarify/sudo/secret
+    # prompts on unrelated sessions sharing the same tui_gateway
+    # process, silently resolving them to empty strings.
+    _clear_pending(params.get("session_id", ""))
     try:
         from tools.approval import resolve_gateway_approval
         resolve_gateway_approval(session["session_key"], "deny", resolve_all=True)
@@ -1340,6 +1443,31 @@ def _(rid, params: dict) -> dict:
     return _ok(rid, {"status": "interrupted"})
 
 
+@method("session.steer")
+def _(rid, params: dict) -> dict:
+    """Inject a user message into the next tool result without interrupting.
+
+    Mirrors AIAgent.steer(). Safe to call while a turn is running — the text
+    lands on the last tool result of the next tool batch and the model sees
+    it on its next iteration. No interrupt, no new user turn, no role
+    alternation violation.
+    """
+    text = (params.get("text") or "").strip()
+    if not text:
+        return _err(rid, 4002, "text is required")
+    session, err = _sess_nowait(params, rid)
+    if err:
+        return err
+    agent = session.get("agent")
+    if agent is None or not hasattr(agent, "steer"):
+        return _err(rid, 4010, "agent does not support steer")
+    try:
+        accepted = agent.steer(text)
+    except Exception as exc:
+        return _err(rid, 5000, f"steer failed: {exc}")
+    return _ok(rid, {"status": "queued" if accepted else "rejected", "text": text})
+
+
 @method("terminal.resize")
 def _(rid, params: dict) -> dict:
     session, err = _sess_nowait(params, rid)
@@ -1413,12 +1541,33 @@ def _(rid, params: dict) -> dict:
             )
 
             last_reasoning = None
+            status_note = None
             if isinstance(result, dict):
                 if isinstance(result.get("messages"), list):
                     with session["history_lock"]:
-                        if int(session.get("history_version", 0)) == history_version:
+                        current_version = int(session.get("history_version", 0))
+                        if current_version == history_version:
                             session["history"] = result["messages"]
                             session["history_version"] = history_version + 1
+                        else:
+                            # History mutated externally during the turn
+                            # (undo/compress/retry/rollback now guard on
+                            # session.running, but this is the defensive
+                            # backstop for any path that slips past).
+                            # Surface the desync rather than silently
+                            # dropping the agent's output — the UI can
+                            # show the response and warn that it was
+                            # not persisted.
+                            print(
+                                f"[tui_gateway] prompt.submit: history_version mismatch "
+                                f"(expected={history_version} current={current_version}) — "
+                                f"agent output NOT written to session history",
+                                file=sys.stderr,
+                            )
+                            status_note = (
+                                "History changed during this turn — the response above is visible "
+                                "but was not saved to session history."
+                            )
                 raw = result.get("final_response", "")
                 status = "interrupted" if result.get("interrupted") else "error" if result.get("error") else "complete"
                 lr = result.get("last_reasoning")
@@ -1431,6 +1580,8 @@ def _(rid, params: dict) -> dict:
             payload = {"text": raw, "usage": _get_usage(agent), "status": status}
             if last_reasoning:
                 payload["reasoning"] = last_reasoning
+            if status_note:
+                payload["warning"] = status_note
             rendered = render_message(raw, cols)
             if rendered:
                 payload["rendered"] = rendered
@@ -1622,9 +1773,10 @@ def _(rid, params: dict) -> dict:
 
 def _respond(rid, params, key):
     r = params.get("request_id", "")
-    ev = _pending.get(r)
-    if not ev:
+    entry = _pending.get(r)
+    if not entry:
         return _err(rid, 4009, f"no pending {key} request")
+    _, ev = entry
     _answers[r] = params.get(key, "")
     ev.set()
     return _ok(rid, {"status": "ok"})
@@ -1667,6 +1819,19 @@ def _(rid, params: dict) -> dict:
             if not value:
                 return _err(rid, 4002, "model value required")
             if session:
+                # Reject during an in-flight turn.  agent.switch_model()
+                # mutates self.model / self.provider / self.base_url /
+                # self.client in place; the worker thread running
+                # agent.run_conversation is reading those on every
+                # iteration.  A mid-turn swap can send an HTTP request
+                # with the new base_url but old model (or vice versa),
+                # producing 400/404s the user never asked for.  Parity
+                # with the gateway's running-agent /model guard.
+                if session.get("running"):
+                    return _err(
+                        rid, 4009,
+                        "session busy — /interrupt the current turn before switching models",
+                    )
                 result = _apply_model_switch(params.get("session_id", ""), session, value)
             else:
                 result = _apply_model_switch("", {"agent": None}, value)
@@ -1924,6 +2089,13 @@ _TUI_EXTRA: list[tuple[str, str, str]] = [
     ("/logs", "Show recent gateway log lines", "TUI"),
 ]
 
+# Commands that queue messages onto _pending_input in the CLI.
+# In the TUI the slash worker subprocess has no reader for that queue,
+# so slash.exec rejects them → TUI falls through to command.dispatch.
+_PENDING_INPUT_COMMANDS: frozenset[str] = frozenset({
+    "retry", "queue", "q", "steer", "plan",
+})
+
 
 @method("commands.catalog")
 def _(rid, params: dict) -> dict:
@@ -1962,8 +2134,35 @@ def _(rid, params: dict) -> dict:
                 cat_order.append(cat)
             cat_map[cat].append([name, desc])
 
-        skill_count = 0
         warning = ""
+        try:
+            qcmds = _load_cfg().get("quick_commands", {}) or {}
+            if isinstance(qcmds, dict) and qcmds:
+                bucket = "User commands"
+                if bucket not in cat_map:
+                    cat_map[bucket] = []
+                    cat_order.append(bucket)
+                for qname, qc in sorted(qcmds.items()):
+                    if not isinstance(qc, dict):
+                        continue
+                    key = f"/{qname}"
+                    canon[key.lower()] = key
+                    qtype = qc.get("type", "")
+                    if qtype == "exec":
+                        default_desc = f"exec: {qc.get('command', '')}"
+                    elif qtype == "alias":
+                        default_desc = f"alias → {qc.get('target', '')}"
+                    else:
+                        default_desc = qtype or "quick command"
+                    qdesc = str(qc.get("description") or default_desc)
+                    qdesc = qdesc[:120] + ("…" if len(qdesc) > 120 else "")
+                    all_pairs.append([key, qdesc])
+                    cat_map[bucket].append([key, qdesc])
+        except Exception as e:
+            if not warning:
+                warning = f"quick_commands discovery unavailable: {e}"
+
+        skill_count = 0
         try:
             from agent.skill_commands import scan_skill_commands
             for k, info in sorted(scan_skill_commands().items()):
@@ -2092,6 +2291,77 @@ def _(rid, params: dict) -> dict:
     except Exception:
         pass
 
+    # ── Commands that queue messages onto _pending_input in the CLI ───
+    # In the TUI the slash worker subprocess has no reader for that queue,
+    # so we handle them here and return a structured payload.
+
+    if name in ("queue", "q"):
+        if not arg:
+            return _err(rid, 4004, "usage: /queue <prompt>")
+        return _ok(rid, {"type": "send", "message": arg})
+
+    if name == "retry":
+        if not session:
+            return _err(rid, 4001, "no active session to retry")
+        if session.get("running"):
+            return _err(rid, 4009, "session busy — /interrupt the current turn before /retry")
+        history = session.get("history", [])
+        if not history:
+            return _err(rid, 4018, "no previous user message to retry")
+        # Walk backwards to find the last user message
+        last_user_idx = None
+        for i in range(len(history) - 1, -1, -1):
+            if history[i].get("role") == "user":
+                last_user_idx = i
+                break
+        if last_user_idx is None:
+            return _err(rid, 4018, "no previous user message to retry")
+        content = history[last_user_idx].get("content", "")
+        if isinstance(content, list):
+            content = " ".join(
+                p.get("text", "") for p in content if isinstance(p, dict) and p.get("type") == "text"
+            )
+        if not content:
+            return _err(rid, 4018, "last user message is empty")
+        # Truncate history: remove everything from the last user message onward
+        # (mirrors CLI retry_last() which strips the failed exchange)
+        with session["history_lock"]:
+            session["history"] = history[:last_user_idx]
+            session["history_version"] = int(session.get("history_version", 0)) + 1
+        return _ok(rid, {"type": "send", "message": content})
+
+    if name == "steer":
+        if not arg:
+            return _err(rid, 4004, "usage: /steer <prompt>")
+        agent = session.get("agent") if session else None
+        if agent and hasattr(agent, "steer"):
+            try:
+                accepted = agent.steer(arg)
+                if accepted:
+                    return _ok(rid, {"type": "exec", "output": f"⏩ Steer queued — arrives after the next tool call: {arg[:80]}{'...' if len(arg) > 80 else ''}"})
+            except Exception:
+                pass
+        # Fallback: no active run, treat as next-turn message
+        return _ok(rid, {"type": "send", "message": arg})
+
+    if name == "plan":
+        try:
+            from agent.skill_commands import build_skill_invocation_message as _bsim, build_plan_path
+            user_instruction = arg or ""
+            plan_path = build_plan_path(user_instruction)
+            msg = _bsim(
+                "/plan", user_instruction,
+                task_id=session.get("session_key", "") if session else "",
+                runtime_note=(
+                    "Save the markdown plan with write_file to this exact relative path "
+                    f"inside the active workspace/backend cwd: {plan_path}"
+                ),
+            )
+            if msg:
+                return _ok(rid, {"type": "send", "message": msg})
+        except Exception as e:
+            return _err(rid, 5030, f"plan skill failed: {e}")
+
     return _err(rid, 4018, f"not a quick/plugin/skill command: {name}")
 
 
@@ -2265,6 +2535,17 @@ def _mirror_slash_side_effects(sid: str, session: dict, command: str) -> str:
         return ""
     name, arg, agent = parts[0], (parts[1].strip() if len(parts) > 1 else ""), session.get("agent")
 
+    # Reject agent-mutating commands during an in-flight turn.  These
+    # all do read-then-mutate on live agent/session state that the
+    # worker thread running agent.run_conversation is using.  Parity
+    # with the session.compress / session.undo guards and the gateway
+    # runner's running-agent /model guard.
+    _MUTATES_WHILE_RUNNING = {"model", "personality", "prompt", "compress"}
+    if name in _MUTATES_WHILE_RUNNING and session.get("running"):
+        return (
+            f"session busy — /interrupt the current turn before running /{name}"
+        )
+
     try:
         if name == "model" and arg and agent:
             result = _apply_model_switch(sid, session, arg)
@@ -2308,6 +2589,26 @@ def _(rid, params: dict) -> dict:
     if not cmd:
         return _err(rid, 4004, "empty command")
 
+    # Skill slash commands and _pending_input commands must NOT go through the
+    # slash worker — see _PENDING_INPUT_COMMANDS definition above.
+    # (/browser connect/disconnect also uses _pending_input for context
+    # notes, but the actual browser operations need the slash worker's
+    # env-var side effects, so they stay in slash.exec — only the context
+    # note to the model is lost, which is low-severity.)
+    _cmd_parts = cmd.split() if not cmd.startswith("/") else cmd.lstrip("/").split()
+    _cmd_base = _cmd_parts[0] if _cmd_parts else ""
+
+    if _cmd_base in _PENDING_INPUT_COMMANDS:
+        return _err(rid, 4018, f"pending-input command: use command.dispatch for /{_cmd_base}")
+
+    try:
+        from agent.skill_commands import get_skill_commands
+        _cmd_key = f"/{_cmd_base}"
+        if _cmd_key in get_skill_commands():
+            return _err(rid, 4018, f"skill command: use command.dispatch for {_cmd_key}")
+    except Exception:
+        pass
+
     worker = session.get("slash_worker")
     if not worker:
         try:
@@ -2425,6 +2726,13 @@ def _(rid, params: dict) -> dict:
     file_path = params.get("file_path", "")
     if not target:
         return _err(rid, 4014, "hash required")
+    # Full-history rollback mutates session history.  Rejecting during
+    # an in-flight turn prevents prompt.submit from silently dropping
+    # the agent's output (version mismatch path) or clobbering the
+    # rollback (version-matches path).  A file-scoped rollback only
+    # touches disk, so we allow it.
+    if not file_path and session.get("running"):
+        return _err(rid, 4009, "session busy — /interrupt the current turn before full rollback.restore")
     try:
         def go(mgr, cwd):
             resolved = _resolve_checkpoint_hash(mgr, cwd, target)
diff --git a/ui-tui/src/__tests__/asCommandDispatch.test.ts b/ui-tui/src/__tests__/asCommandDispatch.test.ts
index 49ea56936c5..dfa7595174e 100644
--- a/ui-tui/src/__tests__/asCommandDispatch.test.ts
+++ b/ui-tui/src/__tests__/asCommandDispatch.test.ts
@@ -3,7 +3,7 @@ import { describe, expect, it } from 'vitest'
 import { asCommandDispatch } from '../lib/rpc.js'
 
 describe('asCommandDispatch', () => {
-  it('parses exec, alias, and skill', () => {
+  it('parses exec, alias, skill, and send', () => {
     expect(asCommandDispatch({ type: 'exec', output: 'hi' })).toEqual({ type: 'exec', output: 'hi' })
     expect(asCommandDispatch({ type: 'alias', target: 'help' })).toEqual({ type: 'alias', target: 'help' })
     expect(asCommandDispatch({ type: 'skill', name: 'x', message: 'do' })).toEqual({
@@ -11,11 +11,17 @@ describe('asCommandDispatch', () => {
       name: 'x',
       message: 'do'
     })
+    expect(asCommandDispatch({ type: 'send', message: 'hello world' })).toEqual({
+      type: 'send',
+      message: 'hello world'
+    })
   })
 
   it('rejects malformed payloads', () => {
     expect(asCommandDispatch(null)).toBeNull()
     expect(asCommandDispatch({ type: 'alias' })).toBeNull()
     expect(asCommandDispatch({ type: 'skill', name: 1 })).toBeNull()
+    expect(asCommandDispatch({ type: 'send' })).toBeNull()
+    expect(asCommandDispatch({ type: 'send', message: 42 })).toBeNull()
   })
 })
diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
index e546ce640e4..f1f0c306bcd 100644
--- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
+++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
@@ -4,7 +4,7 @@ import { createGatewayEventHandler } from '../app/createGatewayEventHandler.js'
 import { resetOverlayState } from '../app/overlayStore.js'
 import { turnController } from '../app/turnController.js'
 import { resetTurnState } from '../app/turnStore.js'
-import { resetUiState } from '../app/uiStore.js'
+import { patchUiState, resetUiState } from '../app/uiStore.js'
 import { estimateTokensRough } from '../lib/text.js'
 import type { Msg } from '../types.js'
 
@@ -47,6 +47,7 @@ describe('createGatewayEventHandler', () => {
     resetUiState()
     resetTurnState()
     turnController.fullReset()
+    patchUiState({ showReasoning: true })
   })
 
   it('persists completed tool rows when message.complete lands immediately after tool.complete', () => {
diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts
index 9e1db994634..1f2f938a936 100644
--- a/ui-tui/src/__tests__/createSlashHandler.test.ts
+++ b/ui-tui/src/__tests__/createSlashHandler.test.ts
@@ -17,6 +17,64 @@ describe('createSlashHandler', () => {
     expect(getOverlayState().picker).toBe(true)
   })
 
+  it('opens the skills hub locally for bare /skills', () => {
+    const ctx = buildCtx()
+
+    expect(createSlashHandler(ctx)('/skills')).toBe(true)
+    expect(getOverlayState().skillsHub).toBe(true)
+    expect(ctx.gateway.rpc).not.toHaveBeenCalled()
+    expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
+  })
+
+  it('routes /skills install <name> to skills.manage without opening overlay', () => {
+    const ctx = buildCtx()
+
+    expect(createSlashHandler(ctx)('/skills install foo')).toBe(true)
+    expect(getOverlayState().skillsHub).toBe(false)
+    expect(ctx.gateway.rpc).toHaveBeenCalledWith('skills.manage', {
+      action: 'install',
+      query: 'foo'
+    })
+  })
+
+  it('routes /skills inspect <name> to skills.manage', () => {
+    const ctx = buildCtx()
+
+    createSlashHandler(ctx)('/skills inspect my-skill')
+    expect(ctx.gateway.rpc).toHaveBeenCalledWith('skills.manage', {
+      action: 'inspect',
+      query: 'my-skill'
+    })
+  })
+
+  it('routes /skills search <query> to skills.manage', () => {
+    const ctx = buildCtx()
+
+    createSlashHandler(ctx)('/skills search vibe')
+    expect(ctx.gateway.rpc).toHaveBeenCalledWith('skills.manage', {
+      action: 'search',
+      query: 'vibe'
+    })
+  })
+
+  it('routes /skills browse [page] to skills.manage with a numeric page', () => {
+    const ctx = buildCtx()
+
+    createSlashHandler(ctx)('/skills browse 3')
+    expect(ctx.gateway.rpc).toHaveBeenCalledWith('skills.manage', {
+      action: 'browse',
+      page: 3
+    })
+  })
+
+  it('shows usage for an unknown /skills subcommand', () => {
+    const ctx = buildCtx()
+
+    createSlashHandler(ctx)('/skills zzz')
+    expect(ctx.gateway.rpc).not.toHaveBeenCalled()
+    expect(ctx.transcript.sys).toHaveBeenCalledWith(expect.stringContaining('usage: /skills'))
+  })
+
   it('cycles details mode and persists it', async () => {
     const ctx = buildCtx()
 
@@ -121,6 +179,67 @@ describe('createSlashHandler', () => {
     expect(createSlashHandler(ctx)('/h')).toBe(true)
     expect(ctx.transcript.panel).toHaveBeenCalledWith(expect.any(String), expect.any(Array))
   })
+
+  it('falls through to command.dispatch for skill commands and sends the message', async () => {
+    const skillMessage = 'Use this skill to do X.\n\n## Steps\n1. First step'
+
+    const ctx = buildCtx({
+      gateway: {
+        gw: {
+          getLogTail: vi.fn(() => ''),
+          request: vi.fn((method: string) => {
+            if (method === 'slash.exec') {
+              return Promise.reject(new Error('skill command: use command.dispatch'))
+            }
+
+            if (method === 'command.dispatch') {
+              return Promise.resolve({ type: 'skill', message: skillMessage, name: 'hermes-agent-dev' })
+            }
+
+            return Promise.resolve({})
+          })
+        },
+        rpc: vi.fn(() => Promise.resolve({}))
+      }
+    })
+
+    const h = createSlashHandler(ctx)
+    expect(h('/hermes-agent-dev')).toBe(true)
+    await vi.waitFor(() => {
+      expect(ctx.transcript.sys).toHaveBeenCalledWith('⚡ loading skill: hermes-agent-dev')
+    })
+    expect(ctx.transcript.send).toHaveBeenCalledWith(skillMessage)
+  })
+
+  it('handles send-type dispatch for /plan command', async () => {
+    const planMessage = 'Plan skill content loaded'
+
+    const ctx = buildCtx({
+      gateway: {
+        gw: {
+          getLogTail: vi.fn(() => ''),
+          request: vi.fn((method: string) => {
+            if (method === 'slash.exec') {
+              return Promise.reject(new Error('pending-input command'))
+            }
+
+            if (method === 'command.dispatch') {
+              return Promise.resolve({ type: 'send', message: planMessage })
+            }
+
+            return Promise.resolve({})
+          })
+        },
+        rpc: vi.fn(() => Promise.resolve({}))
+      }
+    })
+
+    const h = createSlashHandler(ctx)
+    expect(h('/plan create a REST API')).toBe(true)
+    await vi.waitFor(() => {
+      expect(ctx.transcript.send).toHaveBeenCalledWith(planMessage)
+    })
+  })
 })
 
 const buildCtx = (overrides: Partial<Ctx> = {}): Ctx => ({
diff --git a/ui-tui/src/__tests__/paths.test.ts b/ui-tui/src/__tests__/paths.test.ts
new file mode 100644
index 00000000000..ef3c31ff36e
--- /dev/null
+++ b/ui-tui/src/__tests__/paths.test.ts
@@ -0,0 +1,70 @@
+import { afterEach, beforeEach, describe, expect, it } from 'vitest'
+
+import { fmtCwdBranch, shortCwd } from '../domain/paths.js'
+
+describe('shortCwd', () => {
+  const origHome = process.env.HOME
+
+  beforeEach(() => {
+    process.env.HOME = '/Users/bb'
+  })
+
+  afterEach(() => {
+    process.env.HOME = origHome
+  })
+
+  it('collapses HOME to ~', () => {
+    expect(shortCwd('/Users/bb/proj/repo')).toBe('~/proj/repo')
+  })
+
+  it('leaves non-HOME paths alone', () => {
+    expect(shortCwd('/tmp/work')).toBe('/tmp/work')
+  })
+
+  it('truncates long paths from the left with ellipsis', () => {
+    const out = shortCwd('/var/long/deeply/nested/workspace/here', 10)
+    expect(out.startsWith('…')).toBe(true)
+    expect(out.length).toBe(10)
+    expect('/var/long/deeply/nested/workspace/here'.endsWith(out.slice(1))).toBe(true)
+  })
+
+  it('keeps paths shorter than max intact', () => {
+    expect(shortCwd('/a/b', 10)).toBe('/a/b')
+  })
+})
+
+describe('fmtCwdBranch', () => {
+  const origHome = process.env.HOME
+
+  beforeEach(() => {
+    process.env.HOME = '/Users/bb'
+  })
+
+  afterEach(() => {
+    process.env.HOME = origHome
+  })
+
+  it('returns bare cwd when branch is null', () => {
+    expect(fmtCwdBranch('/Users/bb/proj', null)).toBe('~/proj')
+  })
+
+  it('returns bare cwd when branch is empty', () => {
+    expect(fmtCwdBranch('/Users/bb/proj', '')).toBe('~/proj')
+  })
+
+  it('appends branch in parens', () => {
+    expect(fmtCwdBranch('/Users/bb/proj', 'main')).toBe('~/proj (main)')
+  })
+
+  it('truncates the path to keep the branch tag readable', () => {
+    const out = fmtCwdBranch('/Users/bb/very/deeply/nested/project/folder', 'feature-branch', 30)
+    expect(out).toMatch(/ \(feature-branch\)$/)
+    expect(out.length).toBeLessThanOrEqual(30)
+  })
+
+  it('truncates very long branch names from the right', () => {
+    const out = fmtCwdBranch('/Users/bb/p', 'a-very-long-feature-branch-name')
+    expect(out).toMatch(/^~\/p \(…/)
+    expect(out).toContain(')')
+  })
+})
diff --git a/ui-tui/src/__tests__/providers.test.ts b/ui-tui/src/__tests__/providers.test.ts
new file mode 100644
index 00000000000..2dfd76d0220
--- /dev/null
+++ b/ui-tui/src/__tests__/providers.test.ts
@@ -0,0 +1,65 @@
+import { describe, expect, it } from 'vitest'
+
+import { providerDisplayNames } from '../domain/providers.js'
+
+describe('providerDisplayNames', () => {
+  it('returns bare names when all are unique', () => {
+    expect(
+      providerDisplayNames([
+        { name: 'Anthropic', slug: 'anthropic' },
+        { name: 'OpenAI', slug: 'openai' }
+      ])
+    ).toEqual(['Anthropic', 'OpenAI'])
+  })
+
+  it('appends slug to every collision so the disambiguation is symmetric', () => {
+    expect(
+      providerDisplayNames([
+        { name: 'Kimi For Coding', slug: 'kimi-coding' },
+        { name: 'Kimi For Coding', slug: 'kimi-coding-cn' }
+      ])
+    ).toEqual(['Kimi For Coding (kimi-coding)', 'Kimi For Coding (kimi-coding-cn)'])
+  })
+
+  it('only disambiguates the colliding group', () => {
+    expect(
+      providerDisplayNames([
+        { name: 'Anthropic', slug: 'anthropic' },
+        { name: 'Foo', slug: 'foo-a' },
+        { name: 'Foo', slug: 'foo-b' }
+      ])
+    ).toEqual(['Anthropic', 'Foo (foo-a)', 'Foo (foo-b)'])
+  })
+
+  it('falls back to plain name if slug is empty', () => {
+    expect(
+      providerDisplayNames([
+        { name: 'Foo', slug: '' },
+        { name: 'Foo', slug: '' }
+      ])
+    ).toEqual(['Foo', 'Foo'])
+  })
+
+  it('skips disambiguation when slug equals name', () => {
+    expect(
+      providerDisplayNames([
+        { name: 'foo', slug: 'foo' },
+        { name: 'foo', slug: 'foo' }
+      ])
+    ).toEqual(['foo', 'foo'])
+  })
+
+  it('handles empty input', () => {
+    expect(providerDisplayNames([])).toEqual([])
+  })
+
+  it('preserves order', () => {
+    const input = [
+      { name: 'Z', slug: 'z' },
+      { name: 'A', slug: 'a1' },
+      { name: 'A', slug: 'a2' }
+    ]
+
+    expect(providerDisplayNames(input)).toEqual(['Z', 'A (a1)', 'A (a2)'])
+  })
+})
diff --git a/ui-tui/src/__tests__/reasoning.test.ts b/ui-tui/src/__tests__/reasoning.test.ts
new file mode 100644
index 00000000000..c961ea7a0c2
--- /dev/null
+++ b/ui-tui/src/__tests__/reasoning.test.ts
@@ -0,0 +1,50 @@
+import { describe, expect, it } from 'vitest'
+
+import { hasReasoningTag, splitReasoning } from '../lib/reasoning.js'
+
+describe('splitReasoning', () => {
+  it('extracts <think>…</think> and strips it from text', () => {
+    const { reasoning, text } = splitReasoning('<think>plotting</think>\n\nhere is the answer')
+
+    expect(reasoning).toBe('plotting')
+    expect(text).toBe('here is the answer')
+  })
+
+  it('handles multiple tag shapes', () => {
+    const input = '<reasoning>a</reasoning> <THINKING>b</THINKING> <thought>c</thought> body'
+    const { reasoning, text } = splitReasoning(input)
+
+    expect(reasoning).toContain('a')
+    expect(reasoning).toContain('b')
+    expect(reasoning).toContain('c')
+    expect(text).toBe('body')
+  })
+
+  it('treats unclosed trailing <think>… as reasoning', () => {
+    const { reasoning, text } = splitReasoning('answer start <think>still deciding')
+
+    expect(reasoning).toBe('still deciding')
+    expect(text).toBe('answer start')
+  })
+
+  it('returns empty reasoning and untouched text when no tags present', () => {
+    const { reasoning, text } = splitReasoning('plain body with no tags')
+
+    expect(reasoning).toBe('')
+    expect(text).toBe('plain body with no tags')
+  })
+
+  it('preserves text when reasoning block is empty', () => {
+    const { reasoning, text } = splitReasoning('<think></think>only body')
+
+    expect(reasoning).toBe('')
+    expect(text).toBe('only body')
+  })
+
+  it('detects presence of any supported tag', () => {
+    expect(hasReasoningTag('pre <think>x</think> post')).toBe(true)
+    expect(hasReasoningTag('pre <reasoning>x</reasoning>')).toBe(true)
+    expect(hasReasoningTag('<REASONING_SCRATCHPAD>x</REASONING_SCRATCHPAD>')).toBe(true)
+    expect(hasReasoningTag('no tags at all')).toBe(false)
+  })
+})
diff --git a/ui-tui/src/__tests__/syntax.test.ts b/ui-tui/src/__tests__/syntax.test.ts
new file mode 100644
index 00000000000..505988b2abf
--- /dev/null
+++ b/ui-tui/src/__tests__/syntax.test.ts
@@ -0,0 +1,45 @@
+import { describe, expect, it } from 'vitest'
+
+import { highlightLine, isHighlightable } from '../lib/syntax.js'
+import { DEFAULT_THEME } from '../theme.js'
+
+const t = DEFAULT_THEME
+
+describe('syntax highlighter', () => {
+  it('recognizes supported langs and aliases', () => {
+    expect(isHighlightable('ts')).toBe(true)
+    expect(isHighlightable('js')).toBe(true)
+    expect(isHighlightable('python')).toBe(true)
+    expect(isHighlightable('rs')).toBe(true)
+    expect(isHighlightable('bash')).toBe(true)
+    expect(isHighlightable('whatever')).toBe(false)
+    expect(isHighlightable('')).toBe(false)
+  })
+
+  it('paints a whole-line comment dim', () => {
+    const tokens = highlightLine('// hello', 'ts', t)
+
+    expect(tokens).toEqual([[t.color.dim, '// hello']])
+  })
+
+  it('paints keywords, strings, and numbers in a ts line', () => {
+    const tokens = highlightLine(`const x = 'hi' + 42`, 'ts', t)
+    const colors = tokens.map(tok => tok[0])
+
+    expect(colors).toContain(t.color.bronze) // const
+    expect(colors).toContain(t.color.amber) // 'hi'
+    expect(colors).toContain(t.color.cornsilk) // 42
+  })
+
+  it('falls through unchanged for unknown langs', () => {
+    const tokens = highlightLine(`const x = 1`, 'zzz', t)
+
+    expect(tokens).toEqual([['', 'const x = 1']])
+  })
+
+  it('treats `#` as a python comment, not a selector', () => {
+    const tokens = highlightLine('# comment', 'py', t)
+
+    expect(tokens).toEqual([[t.color.dim, '# comment']])
+  })
+})
diff --git a/ui-tui/src/__tests__/theme.test.ts b/ui-tui/src/__tests__/theme.test.ts
index 86a9768b0fd..4fe165c8d56 100644
--- a/ui-tui/src/__tests__/theme.test.ts
+++ b/ui-tui/src/__tests__/theme.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest'
 
-import { DEFAULT_THEME, fromSkin } from '../theme.js'
+import { DARK_THEME, DEFAULT_THEME, fromSkin, LIGHT_THEME } from '../theme.js'
 
 describe('DEFAULT_THEME', () => {
   it('has brand defaults', () => {
@@ -15,6 +15,26 @@ describe('DEFAULT_THEME', () => {
   })
 })
 
+describe('LIGHT_THEME', () => {
+  it('avoids bright-yellow accents unreadable on white backgrounds (#11300)', () => {
+    expect(LIGHT_THEME.color.gold).not.toBe('#FFD700')
+    expect(LIGHT_THEME.color.amber).not.toBe('#FFBF00')
+    expect(LIGHT_THEME.color.dim).not.toBe('#B8860B')
+    expect(LIGHT_THEME.color.statusWarn).not.toBe('#FFD700')
+  })
+
+  it('keeps the same shape as DARK_THEME', () => {
+    expect(Object.keys(LIGHT_THEME.color).sort()).toEqual(Object.keys(DARK_THEME.color).sort())
+    expect(LIGHT_THEME.brand).toEqual(DARK_THEME.brand)
+  })
+})
+
+describe('DEFAULT_THEME aliasing', () => {
+  it('defaults to DARK_THEME when HERMES_TUI_LIGHT is unset', () => {
+    expect(DEFAULT_THEME).toBe(DARK_THEME)
+  })
+})
+
 describe('fromSkin', () => {
   it('overrides banner colors', () => {
     expect(fromSkin({ banner_title: '#FF0000' }, {}).color.gold).toBe('#FF0000')
diff --git a/ui-tui/src/__tests__/useConfigSync.test.ts b/ui-tui/src/__tests__/useConfigSync.test.ts
new file mode 100644
index 00000000000..c14ecff3aa7
--- /dev/null
+++ b/ui-tui/src/__tests__/useConfigSync.test.ts
@@ -0,0 +1,67 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+
+import { $uiState, resetUiState } from '../app/uiStore.js'
+import { applyDisplay } from '../app/useConfigSync.js'
+
+describe('applyDisplay', () => {
+  beforeEach(() => {
+    resetUiState()
+  })
+
+  it('fans every display flag out to $uiState and the bell callback', () => {
+    const setBell = vi.fn()
+
+    applyDisplay(
+      {
+        config: {
+          display: {
+            bell_on_complete: true,
+            details_mode: 'expanded',
+            inline_diffs: false,
+            show_cost: true,
+            show_reasoning: true,
+            streaming: false,
+            tui_compact: true,
+            tui_statusbar: false
+          }
+        }
+      },
+      setBell
+    )
+
+    const s = $uiState.get()
+    expect(setBell).toHaveBeenCalledWith(true)
+    expect(s.compact).toBe(true)
+    expect(s.detailsMode).toBe('expanded')
+    expect(s.inlineDiffs).toBe(false)
+    expect(s.showCost).toBe(true)
+    expect(s.showReasoning).toBe(true)
+    expect(s.statusBar).toBe(false)
+    expect(s.streaming).toBe(false)
+  })
+
+  it('applies v1 parity defaults when display fields are missing', () => {
+    const setBell = vi.fn()
+
+    applyDisplay({ config: { display: {} } }, setBell)
+
+    const s = $uiState.get()
+    expect(setBell).toHaveBeenCalledWith(false)
+    expect(s.inlineDiffs).toBe(true)
+    expect(s.showCost).toBe(false)
+    expect(s.showReasoning).toBe(false)
+    expect(s.statusBar).toBe(true)
+    expect(s.streaming).toBe(true)
+  })
+
+  it('treats a null config like an empty display block', () => {
+    const setBell = vi.fn()
+
+    applyDisplay(null, setBell)
+
+    const s = $uiState.get()
+    expect(setBell).toHaveBeenCalledWith(false)
+    expect(s.inlineDiffs).toBe(true)
+    expect(s.streaming).toBe(true)
+  })
+})
diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts
index e728f8bbd01..8f45bb3d7eb 100644
--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@@ -46,7 +46,6 @@ const pushNote = pushUnique(6)
 const pushTool = pushUnique(8)
 
 export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: GatewayEvent) => void {
-  const { dequeue, queueEditRef, sendQueued } = ctx.composer
   const { rpc } = ctx.gateway
   const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session
   const { bellOnComplete, stdout, sys } = ctx.system
@@ -266,7 +265,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
       case 'tool.complete':
         turnController.recordToolComplete(ev.payload.tool_id, ev.payload.name, ev.payload.error, ev.payload.summary)
 
-        if (ev.payload.inline_diff) {
+        if (ev.payload.inline_diff && getUiState().inlineDiffs) {
           sys(ev.payload.inline_diff)
         }
 
@@ -394,16 +393,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           patchUiState(state => ({ ...state, usage: { ...state.usage, ...ev.payload!.usage } }))
         }
 
-        if (queueEditRef.current !== null) {
-          return
-        }
-
-        const next = dequeue()
-
-        if (next) {
-          sendQueued(next)
-        }
-
         return
       }
 
diff --git a/ui-tui/src/app/createSlashHandler.ts b/ui-tui/src/app/createSlashHandler.ts
index 87475341aea..425e778ef3d 100644
--- a/ui-tui/src/app/createSlashHandler.ts
+++ b/ui-tui/src/app/createSlashHandler.ts
@@ -105,6 +105,10 @@ export function createSlashHandler(ctx: SlashHandlerContext): (cmd: string) => b
 
               return d.message?.trim() ? send(d.message) : sys(`/${parsed.name}: skill payload missing message`)
             }
+
+            if (d.type === 'send') {
+              return d.message?.trim() ? send(d.message) : sys(`/${parsed.name}: empty message`)
+            }
           })
           .catch(guardedErr)
       })
diff --git a/ui-tui/src/app/inputSelectionStore.ts b/ui-tui/src/app/inputSelectionStore.ts
new file mode 100644
index 00000000000..25b67c4283e
--- /dev/null
+++ b/ui-tui/src/app/inputSelectionStore.ts
@@ -0,0 +1,14 @@
+import { atom } from 'nanostores'
+
+export interface InputSelection {
+  clear: () => void
+  end: number
+  start: number
+  value: string
+}
+
+export const $inputSelection = atom<InputSelection | null>(null)
+
+export const setInputSelection = (next: InputSelection | null) => $inputSelection.set(next)
+
+export const getInputSelection = () => $inputSelection.get()
diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts
index 998afe2a198..af13e047c70 100644
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -10,6 +10,7 @@ import type {
   ActivityItem,
   ApprovalReq,
   ClarifyReq,
+  ConfirmReq,
   DetailsMode,
   Msg,
   PanelSection,
@@ -53,10 +54,12 @@ export interface GatewayProviderProps {
 export interface OverlayState {
   approval: ApprovalReq | null
   clarify: ClarifyReq | null
+  confirm: ConfirmReq | null
   modelPicker: boolean
   pager: null | PagerState
   picker: boolean
   secret: null | SecretReq
+  skillsHub: boolean
   sudo: null | SudoReq
 }
 
@@ -78,9 +81,13 @@ export interface UiState {
   compact: boolean
   detailsMode: DetailsMode
   info: null | SessionInfo
+  inlineDiffs: boolean
+  showCost: boolean
+  showReasoning: boolean
   sid: null | string
   status: string
   statusBar: boolean
+  streaming: boolean
   theme: Theme
   usage: Usage
 }
@@ -186,11 +193,6 @@ export interface InputHandlerResult {
 }
 
 export interface GatewayEventHandlerContext {
-  composer: {
-    dequeue: () => string | undefined
-    queueEditRef: MutableRefObject<null | number>
-    sendQueued: (text: string) => void
-  }
   gateway: GatewayServices
   session: {
     STARTUP_RESUME_ID: string
@@ -335,5 +337,6 @@ export interface AppOverlaysProps {
 
 export interface PasteSnippet {
   label: string
+  path?: string
   text: string
 }
diff --git a/ui-tui/src/app/overlayStore.ts b/ui-tui/src/app/overlayStore.ts
index 4b24f0daab9..06dbd27a789 100644
--- a/ui-tui/src/app/overlayStore.ts
+++ b/ui-tui/src/app/overlayStore.ts
@@ -5,17 +5,21 @@ import type { OverlayState } from './interfaces.js'
 const buildOverlayState = (): OverlayState => ({
   approval: null,
   clarify: null,
+  confirm: null,
   modelPicker: false,
   pager: null,
   picker: false,
   secret: null,
+  skillsHub: false,
   sudo: null
 })
 
 export const $overlayState = atom<OverlayState>(buildOverlayState())
 
-export const $isBlocked = computed($overlayState, ({ approval, clarify, modelPicker, pager, picker, secret, sudo }) =>
-  Boolean(approval || clarify || modelPicker || pager || picker || secret || sudo)
+export const $isBlocked = computed(
+  $overlayState,
+  ({ approval, clarify, confirm, modelPicker, pager, picker, secret, skillsHub, sudo }) =>
+    Boolean(approval || clarify || confirm || modelPicker || pager || picker || secret || skillsHub || sudo)
 )
 
 export const getOverlayState = () => $overlayState.get()
diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts
index e0832c7a694..0f8916c5cb6 100644
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@@ -1,7 +1,13 @@
+import { NO_CONFIRM_DESTRUCTIVE } from '../../../config/env.js'
 import { dailyFortune, randomFortune } from '../../../content/fortunes.js'
 import { HOTKEYS } from '../../../content/hotkeys.js'
 import { nextDetailsMode, parseDetailsMode } from '../../../domain/details.js'
-import type { ConfigGetValueResponse, ConfigSetResponse, SessionUndoResponse } from '../../../gatewayTypes.js'
+import type {
+  ConfigGetValueResponse,
+  ConfigSetResponse,
+  SessionSteerResponse,
+  SessionUndoResponse
+} from '../../../gatewayTypes.js'
 import { writeOsc52Clipboard } from '../../../lib/osc52.js'
 import type { DetailsMode, Msg, PanelSection } from '../../../types.js'
 import { patchOverlayState } from '../../overlayStore.js'
@@ -77,8 +83,27 @@ export const coreCommands: SlashCommand[] = [
         return
       }
 
-      patchUiState({ status: 'forging session…' })
-      ctx.session.newSession(cmd.startsWith('/new') ? 'new session started' : undefined)
+      const isNew = cmd.startsWith('/new')
+
+      const commit = () => {
+        patchUiState({ status: 'forging session…' })
+        ctx.session.newSession(isNew ? 'new session started' : undefined)
+      }
+
+      if (NO_CONFIRM_DESTRUCTIVE) {
+        return commit()
+      }
+
+      patchOverlayState({
+        confirm: {
+          cancelLabel: 'No, keep going',
+          confirmLabel: isNew ? 'Yes, start a new session' : 'Yes, clear the session',
+          danger: true,
+          detail: 'This ends the current conversation and clears the transcript.',
+          onConfirm: commit,
+          title: isNew ? 'Start a new session?' : 'Clear the current session?'
+        }
+      })
     }
   },
 
@@ -245,6 +270,44 @@ export const coreCommands: SlashCommand[] = [
     }
   },
 
+  {
+    help: 'inject a message after the next tool call (no interrupt)',
+    name: 'steer',
+    run: (arg, ctx) => {
+      const payload = arg?.trim() ?? ''
+
+      if (!payload) {
+        return ctx.transcript.sys('usage: /steer <prompt>')
+      }
+
+      // If the agent isn't running, fall back to the queue so the user's
+      // message isn't lost — identical semantics to the gateway handler.
+      if (!ctx.ui.busy || !ctx.sid) {
+        ctx.composer.enqueue(payload)
+        ctx.transcript.sys(
+          `no active turn — queued for next: "${payload.slice(0, 50)}${payload.length > 50 ? '…' : ''}"`
+        )
+
+        return
+      }
+
+      ctx.gateway
+        .rpc<SessionSteerResponse>('session.steer', { session_id: ctx.sid, text: payload })
+        .then(
+          ctx.guarded<SessionSteerResponse>(r => {
+            if (r?.status === 'queued') {
+              ctx.transcript.sys(
+                `⏩ steer queued — arrives after next tool call: "${payload.slice(0, 50)}${payload.length > 50 ? '…' : ''}"`
+              )
+            } else {
+              ctx.transcript.sys('steer rejected')
+            }
+          })
+        )
+        .catch(ctx.guardedErr)
+    }
+  },
+
   {
     help: 'undo last exchange',
     name: 'undo',
diff --git a/ui-tui/src/app/slash/commands/ops.ts b/ui-tui/src/app/slash/commands/ops.ts
index 979e1f470aa..26318b3fb06 100644
--- a/ui-tui/src/app/slash/commands/ops.ts
+++ b/ui-tui/src/app/slash/commands/ops.ts
@@ -1,7 +1,209 @@
 import type { ToolsConfigureResponse } from '../../../gatewayTypes.js'
+import type { PanelSection } from '../../../types.js'
+import { patchOverlayState } from '../../overlayStore.js'
 import type { SlashCommand } from '../types.js'
 
+interface SkillInfo {
+  category?: string
+  description?: string
+  name?: string
+  path?: string
+}
+
+interface SkillsListResponse {
+  skills?: Record<string, string[]>
+}
+
+interface SkillsInspectResponse {
+  info?: SkillInfo
+}
+
+interface SkillsSearchResponse {
+  results?: { description?: string; name: string }[]
+}
+
+interface SkillsInstallResponse {
+  installed?: boolean
+  name?: string
+}
+
+interface SkillsBrowseItem {
+  description?: string
+  name: string
+  source?: string
+  trust?: string
+}
+
+interface SkillsBrowseResponse {
+  items?: SkillsBrowseItem[]
+  page?: number
+  total?: number
+  total_pages?: number
+}
+
 export const opsCommands: SlashCommand[] = [
+  {
+    help: 'browse, inspect, install skills',
+    name: 'skills',
+    run: (arg, ctx) => {
+      const text = arg.trim()
+
+      if (!text) {
+        return patchOverlayState({ skillsHub: true })
+      }
+
+      const [sub, ...rest] = text.split(/\s+/)
+      const query = rest.join(' ').trim()
+      const { rpc } = ctx.gateway
+      const { page, panel, sys } = ctx.transcript
+
+      if (sub === 'list') {
+        rpc<SkillsListResponse>('skills.manage', { action: 'list' })
+          .then(
+            ctx.guarded<SkillsListResponse>(r => {
+              const cats = Object.entries(r.skills ?? {}).sort()
+
+              if (!cats.length) {
+                return sys('no skills available')
+              }
+
+              panel(
+                'Skills',
+                cats.map<PanelSection>(([title, items]) => ({ items, title }))
+              )
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      if (sub === 'inspect') {
+        if (!query) {
+          return sys('usage: /skills inspect <name>')
+        }
+
+        rpc<SkillsInspectResponse>('skills.manage', { action: 'inspect', query })
+          .then(
+            ctx.guarded<SkillsInspectResponse>(r => {
+              const info = r.info ?? {}
+
+              if (!info.name) {
+                return sys(`unknown skill: ${query}`)
+              }
+
+              const rows: [string, string][] = [
+                ['Name', String(info.name)],
+                ['Category', String(info.category ?? '')],
+                ['Path', String(info.path ?? '')]
+              ]
+
+              const sections: PanelSection[] = [{ rows }]
+
+              if (info.description) {
+                sections.push({ text: String(info.description) })
+              }
+
+              panel('Skill', sections)
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      if (sub === 'search') {
+        if (!query) {
+          return sys('usage: /skills search <query>')
+        }
+
+        rpc<SkillsSearchResponse>('skills.manage', { action: 'search', query })
+          .then(
+            ctx.guarded<SkillsSearchResponse>(r => {
+              const results = r.results ?? []
+
+              if (!results.length) {
+                return sys(`no results for: ${query}`)
+              }
+
+              panel(`Search: ${query}`, [{ rows: results.map(s => [s.name, s.description ?? '']) }])
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      if (sub === 'install') {
+        if (!query) {
+          return sys('usage: /skills install <name or url>')
+        }
+
+        sys(`installing ${query}…`)
+
+        rpc<SkillsInstallResponse>('skills.manage', { action: 'install', query })
+          .then(
+            ctx.guarded<SkillsInstallResponse>(r =>
+              sys(r.installed ? `installed ${r.name ?? query}` : 'install failed')
+            )
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      if (sub === 'browse') {
+        const pageNum = query ? parseInt(query, 10) : 1
+
+        if (Number.isNaN(pageNum) || pageNum < 1) {
+          return sys('usage: /skills browse [page]  (page must be a positive number)')
+        }
+
+        sys('fetching community skills (scans 6 sources, may take ~15s)…')
+
+        rpc<SkillsBrowseResponse>('skills.manage', { action: 'browse', page: pageNum })
+          .then(
+            ctx.guarded<SkillsBrowseResponse>(r => {
+              const items = r.items ?? []
+
+              if (!items.length) {
+                return sys(`no skills on page ${pageNum}${r.total ? ` (total ${r.total})` : ''}`)
+              }
+
+              const rows: [string, string][] = items.map(s => [
+                s.trust ? `${s.name} · ${s.trust}` : s.name,
+                String(s.description ?? '').slice(0, 160)
+              ])
+
+              const footer: string[] = []
+
+              if (r.page && r.total_pages) {
+                footer.push(`page ${r.page} of ${r.total_pages}`)
+              }
+
+              if (r.total) {
+                footer.push(`${r.total} skills total`)
+              }
+
+              if (r.page && r.total_pages && r.page < r.total_pages) {
+                footer.push(`/skills browse ${r.page + 1} for more`)
+              }
+
+              panel(`Browse Skills${pageNum > 1 ? ` — p${pageNum}` : ''}`, [
+                { rows },
+                ...(footer.length ? [{ text: footer.join(' · ') }] : [])
+              ])
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      sys('usage: /skills [list | inspect <n> | install <n> | search <q> | browse [page]]')
+    }
+  },
+
   {
     help: 'enable or disable tools (client-side history reset on change)',
     name: 'tools',
diff --git a/ui-tui/src/app/slash/commands/setup.ts b/ui-tui/src/app/slash/commands/setup.ts
index c6d5cc8637b..d9a948e5419 100644
--- a/ui-tui/src/app/slash/commands/setup.ts
+++ b/ui-tui/src/app/slash/commands/setup.ts
@@ -6,9 +6,8 @@ import type { SlashCommand } from '../types.js'
 
 export const setupCommands: SlashCommand[] = [
   {
-    aliases: ['provider'],
-    help: 'configure LLM provider and model (launches `hermes model`)',
-    name: 'model',
+    help: 'configure LLM provider + model (launches `hermes model`)',
+    name: 'provider',
     run: (_arg, ctx) =>
       void runExternalSetup({
         args: ['model'],
diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts
index 73d0571734e..236324ffb98 100644
--- a/ui-tui/src/app/turnController.ts
+++ b/ui-tui/src/app/turnController.ts
@@ -1,5 +1,6 @@
 import { REASONING_PULSE_MS, STREAM_BATCH_MS } from '../config/timing.js'
 import type { SessionInterruptResponse, SubagentEventPayload } from '../gatewayTypes.js'
+import { hasReasoningTag, splitReasoning } from '../lib/reasoning.js'
 import {
   buildToolTrailLine,
   estimateTokensRough,
@@ -11,7 +12,7 @@ import type { ActiveTool, ActivityItem, Msg, SubagentProgress } from '../types.j
 
 import { resetOverlayState } from './overlayStore.js'
 import { patchTurnState, resetTurnState } from './turnStore.js'
-import { patchUiState } from './uiStore.js'
+import { getUiState, patchUiState } from './uiStore.js'
 
 const INTERRUPT_COOLDOWN_MS = 1500
 const ACTIVITY_LIMIT = 8
@@ -121,18 +122,31 @@ class TurnController {
   }
 
   flushStreamingSegment() {
-    const text = this.bufRef.trimStart()
+    const raw = this.bufRef.trimStart()
 
-    if (!text) {
+    if (!raw) {
       return
     }
 
-    const tools = this.pendingSegmentTools
+    const split = hasReasoningTag(raw) ? splitReasoning(raw) : { reasoning: '', text: raw }
+
+    if (split.reasoning && !this.reasoningText.trim()) {
+      this.reasoningText = split.reasoning
+      patchTurnState({ reasoning: this.reasoningText, reasoningTokens: estimateTokensRough(this.reasoningText) })
+    }
+
+    const text = split.text
 
     this.streamTimer = clear(this.streamTimer)
-    this.segmentMessages = [...this.segmentMessages, { role: 'assistant', text, ...(tools.length && { tools }) }]
+
+    if (text) {
+      const tools = this.pendingSegmentTools
+
+      this.segmentMessages = [...this.segmentMessages, { role: 'assistant', text, ...(tools.length && { tools }) }]
+      this.pendingSegmentTools = []
+    }
+
     this.bufRef = ''
-    this.pendingSegmentTools = []
     patchTurnState({ streamPendingTools: [], streamSegments: this.segmentMessages, streaming: '' })
   }
 
@@ -187,8 +201,11 @@ class TurnController {
   }
 
   recordMessageComplete(payload: { rendered?: string; reasoning?: string; text?: string }) {
-    const finalText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
-    const savedReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
+    const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
+    const split = splitReasoning(rawText)
+    const finalText = split.text
+    const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
+    const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
     const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0
     const savedToolTokens = this.toolTokenAcc
     const tools = this.pendingSegmentTools
@@ -226,10 +243,17 @@ class TurnController {
     }
 
     this.bufRef = rendered ?? this.bufRef + text
-    this.scheduleStreaming()
+
+    if (getUiState().streaming) {
+      this.scheduleStreaming()
+    }
   }
 
   recordReasoningAvailable(text: string) {
+    if (!getUiState().showReasoning) {
+      return
+    }
+
     const incoming = text.trim()
 
     if (!incoming || this.reasoningText.trim()) {
@@ -242,6 +266,10 @@ class TurnController {
   }
 
   recordReasoningDelta(text: string) {
+    if (!getUiState().showReasoning) {
+      return
+    }
+
     this.reasoningText += text
     this.scheduleReasoning()
     this.pulseReasoningStreaming()
@@ -344,7 +372,9 @@ class TurnController {
 
     this.streamTimer = setTimeout(() => {
       this.streamTimer = null
-      patchTurnState({ streaming: this.bufRef.trimStart() })
+      const raw = this.bufRef.trimStart()
+      const visible = hasReasoningTag(raw) ? splitReasoning(raw).text : raw
+      patchTurnState({ streaming: visible })
     }, STREAM_BATCH_MS)
   }
 
diff --git a/ui-tui/src/app/uiStore.ts b/ui-tui/src/app/uiStore.ts
index b7f5c20f4df..81089f1795a 100644
--- a/ui-tui/src/app/uiStore.ts
+++ b/ui-tui/src/app/uiStore.ts
@@ -11,9 +11,13 @@ const buildUiState = (): UiState => ({
   compact: false,
   detailsMode: 'collapsed',
   info: null,
+  inlineDiffs: true,
+  showCost: false,
+  showReasoning: false,
   sid: null,
   status: 'summoning hermes…',
   statusBar: true,
+  streaming: true,
   theme: DEFAULT_THEME,
   usage: ZERO
 })
diff --git a/ui-tui/src/app/useComposerState.ts b/ui-tui/src/app/useComposerState.ts
index 14a40412c99..bebda273d9f 100644
--- a/ui-tui/src/app/useComposerState.ts
+++ b/ui-tui/src/app/useComposerState.ts
@@ -70,12 +70,25 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
 
       setPasteSnips(prev => [...prev, { label, text: cleanedText }].slice(-32))
 
+      void gw
+        .request<{ path?: string }>('paste.collapse', { text: cleanedText })
+        .then(r => {
+          const path = r?.path
+
+          if (!path) {
+            return
+          }
+
+          setPasteSnips(prev => prev.map(s => (s.label === label ? { ...s, path } : s)))
+        })
+        .catch(() => {})
+
       return {
         cursor: cursor + insert.length,
         value: value.slice(0, cursor) + insert + value.slice(cursor)
       }
     },
-    [onClipboardPaste]
+    [gw, onClipboardPaste]
   )
 
   const openEditor = useCallback(() => {
diff --git a/ui-tui/src/app/useConfigSync.ts b/ui-tui/src/app/useConfigSync.ts
index fe3cec57378..8a3756342ba 100644
--- a/ui-tui/src/app/useConfigSync.ts
+++ b/ui-tui/src/app/useConfigSync.ts
@@ -27,14 +27,18 @@ const quietRpc = async <T extends Record<string, any> = Record<string, any>>(
   }
 }
 
-const applyDisplay = (cfg: ConfigFullResponse | null, setBell: (v: boolean) => void) => {
+export const applyDisplay = (cfg: ConfigFullResponse | null, setBell: (v: boolean) => void) => {
   const d = cfg?.config?.display ?? {}
 
   setBell(!!d.bell_on_complete)
   patchUiState({
     compact: !!d.tui_compact,
     detailsMode: resolveDetailsMode(d),
-    statusBar: d.tui_statusbar !== false
+    inlineDiffs: d.inline_diffs !== false,
+    showCost: !!d.show_cost,
+    showReasoning: !!d.show_reasoning,
+    statusBar: d.tui_statusbar !== false,
+    streaming: d.streaming !== false
   })
 }
 
diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts
index 70000b73c8c..258cf7cee3e 100644
--- a/ui-tui/src/app/useInputHandlers.ts
+++ b/ui-tui/src/app/useInputHandlers.ts
@@ -7,7 +7,9 @@ import type {
   SudoRespondResponse,
   VoiceRecordResponse
 } from '../gatewayTypes.js'
+import { writeOsc52Clipboard } from '../lib/osc52.js'
 
+import { getInputSelection } from './inputSelectionStore.js'
 import type { InputHandlerContext, InputHandlerResult } from './interfaces.js'
 import { $isBlocked, $overlayState, patchOverlayState } from './overlayStore.js'
 import { turnController } from './turnController.js'
@@ -63,6 +65,10 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
       return patchOverlayState({ modelPicker: false })
     }
 
+    if (overlay.skillsHub) {
+      return patchOverlayState({ skillsHub: false })
+    }
+
     if (overlay.picker) {
       return patchOverlayState({ picker: false })
     }
@@ -243,6 +249,15 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
         return copySelection()
       }
 
+      const inputSel = getInputSelection()
+
+      if (inputSel && inputSel.end > inputSel.start) {
+        writeOsc52Clipboard(inputSel.value.slice(inputSel.start, inputSel.end))
+        inputSel.clear()
+
+        return
+      }
+
       if (live.busy && live.sid) {
         return turnController.interruptTurn({
           appendMessage: actions.appendMessage,
diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts
index 73ea9febdac..e0c18dec64f 100644
--- a/ui-tui/src/app/useMainApp.ts
+++ b/ui-tui/src/app/useMainApp.ts
@@ -1,11 +1,11 @@
-import { type ScrollBoxHandle, useApp, useHasSelection, useSelection, useStdout } from '@hermes/ink'
+import { type ScrollBoxHandle, useApp, useHasSelection, useSelection, useStdout, useTerminalTitle } from '@hermes/ink'
 import { useStore } from '@nanostores/react'
 import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 
 import { STARTUP_RESUME_ID } from '../config/env.js'
 import { MAX_HISTORY, WHEEL_SCROLL_STEP } from '../config/limits.js'
 import { imageTokenMeta } from '../domain/messages.js'
-import { shortCwd } from '../domain/paths.js'
+import { fmtCwdBranch } from '../domain/paths.js'
 import { type GatewayClient } from '../gatewayClient.js'
 import type {
   ClarifyRespondResponse,
@@ -13,6 +13,7 @@ import type {
   GatewayEvent,
   TerminalResizeResponse
 } from '../gatewayTypes.js'
+import { useGitBranch } from '../hooks/useGitBranch.js'
 import { useVirtualHistory } from '../hooks/useVirtualHistory.js'
 import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
 import { buildToolTrailLine, sameToolTrailGroup, toolTrailLabel } from '../lib/text.js'
@@ -284,6 +285,13 @@ export function useMainApp(gw: GatewayClient) {
 
   useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid: ui.sid })
 
+  // ── Terminal tab title ─────────────────────────────────────────────
+  // Show model name + status so users can identify the Hermes tab.
+  const shortModel = ui.info?.model?.replace(/^.*\//, '') ?? ''
+  const titleStatus = ui.busy ? '⏳' : '✓'
+  const terminalTitle = shortModel ? `${titleStatus} ${shortModel} — Hermes` : 'Hermes'
+  useTerminalTitle(terminalTitle)
+
   useEffect(() => {
     if (!ui.sid || !stdout) {
       return
@@ -372,12 +380,13 @@ export function useMainApp(gw: GatewayClient) {
     sys
   })
 
-  const prevSidRef = useRef<null | string>(null)
+  // Drain one queued message whenever the session settles (busy → false):
+  // agent turn ends, interrupt, shell.exec finishes, error recovered, or the
+  // session first comes up with pre-queued messages. Without this, shell.exec
+  // and error paths never emit message.complete, so anything enqueued while
+  // `!sleep` / a failed turn was running would stay stuck forever.
   useEffect(() => {
-    const prev = prevSidRef.current
-    prevSidRef.current = ui.sid
-
-    if (prev !== null || !ui.sid || ui.busy || composerRefs.queueEditRef.current !== null) {
+    if (!ui.sid || ui.busy || composerRefs.queueEditRef.current !== null) {
       return
     }
 
@@ -408,7 +417,6 @@ export function useMainApp(gw: GatewayClient) {
   const onEvent = useMemo(
     () =>
       createGatewayEventHandler({
-        composer: { dequeue: composerActions.dequeue, queueEditRef: composerRefs.queueEditRef, sendQueued },
         gateway,
         session: {
           STARTUP_RESUME_ID,
@@ -424,11 +432,8 @@ export function useMainApp(gw: GatewayClient) {
     [
       appendMessage,
       bellOnComplete,
-      composerActions,
-      composerRefs,
       gateway,
       panel,
-      sendQueued,
       session.newSession,
       session.resetSession,
       session.resumeById,
@@ -613,9 +618,12 @@ export function useMainApp(gw: GatewayClient) {
     [turn, showProgressArea]
   )
 
+  const cwd = ui.info?.cwd || process.env.HERMES_CWD || process.cwd()
+  const gitBranch = useGitBranch(cwd)
+
   const appStatus = useMemo(
     () => ({
-      cwdLabel: shortCwd(ui.info?.cwd || process.env.HERMES_CWD || process.cwd()),
+      cwdLabel: fmtCwdBranch(cwd, gitBranch),
       goodVibesTick,
       sessionStartedAt: ui.sid ? sessionStartedAt : null,
       showStickyPrompt: !!stickyPrompt,
@@ -623,7 +631,7 @@ export function useMainApp(gw: GatewayClient) {
       stickyPrompt,
       voiceLabel: voiceRecording ? 'REC' : voiceProcessing ? 'STT' : `voice ${voiceEnabled ? 'on' : 'off'}`
     }),
-    [goodVibesTick, sessionStartedAt, stickyPrompt, ui, voiceEnabled, voiceProcessing, voiceRecording]
+    [cwd, gitBranch, goodVibesTick, sessionStartedAt, stickyPrompt, ui, voiceEnabled, voiceProcessing, voiceRecording]
   )
 
   const appTranscript = useMemo(
diff --git a/ui-tui/src/components/appChrome.tsx b/ui-tui/src/components/appChrome.tsx
index ed6f914c96b..2f5f807dec7 100644
--- a/ui-tui/src/components/appChrome.tsx
+++ b/ui-tui/src/components/appChrome.tsx
@@ -99,6 +99,7 @@ export function StatusRule({
   usage,
   bgCount,
   sessionStartedAt,
+  showCost,
   voiceLabel,
   t
 }: StatusRuleProps) {
@@ -136,6 +137,9 @@ export function StatusRule({
           ) : null}
           {voiceLabel ? <Text color={t.color.dim}> │ {voiceLabel}</Text> : null}
           {bgCount > 0 ? <Text color={t.color.dim}> │ {bgCount} bg</Text> : null}
+          {showCost && typeof usage.cost_usd === 'number' ? (
+            <Text color={t.color.dim}> │ ${usage.cost_usd.toFixed(4)}</Text>
+          ) : null}
         </Text>
       </Box>
 
@@ -285,6 +289,7 @@ interface StatusRuleProps {
   cwdLabel: string
   model: string
   sessionStartedAt?: number | null
+  showCost: boolean
   status: string
   statusColor: string
   t: Theme
diff --git a/ui-tui/src/components/appLayout.tsx b/ui-tui/src/components/appLayout.tsx
index 26d8e4b0a99..f13adf1bbd0 100644
--- a/ui-tui/src/components/appLayout.tsx
+++ b/ui-tui/src/components/appLayout.tsx
@@ -190,6 +190,7 @@ const ComposerPane = memo(function ComposerPane({
             cwdLabel={status.cwdLabel}
             model={ui.info?.model?.split('/').pop() ?? ''}
             sessionStartedAt={status.sessionStartedAt}
+            showCost={ui.showCost}
             status={ui.status}
             statusColor={status.statusColor}
             t={ui.theme}
diff --git a/ui-tui/src/components/appOverlays.tsx b/ui-tui/src/components/appOverlays.tsx
index 23187cf3f92..844996af3f9 100644
--- a/ui-tui/src/components/appOverlays.tsx
+++ b/ui-tui/src/components/appOverlays.tsx
@@ -9,8 +9,9 @@ import { $uiState } from '../app/uiStore.js'
 import { FloatBox } from './appChrome.js'
 import { MaskedPrompt } from './maskedPrompt.js'
 import { ModelPicker } from './modelPicker.js'
-import { ApprovalPrompt, ClarifyPrompt } from './prompts.js'
+import { ApprovalPrompt, ClarifyPrompt, ConfirmPrompt } from './prompts.js'
 import { SessionPicker } from './sessionPicker.js'
+import { SkillsHub } from './skillsHub.js'
 
 export function PromptZone({
   cols,
@@ -30,6 +31,23 @@ export function PromptZone({
     )
   }
 
+  if (overlay.confirm) {
+    const req = overlay.confirm
+
+    const onConfirm = () => {
+      patchOverlayState({ confirm: null })
+      req.onConfirm()
+    }
+
+    const onCancel = () => patchOverlayState({ confirm: null })
+
+    return (
+      <Box flexDirection="column" flexShrink={0} paddingX={1} paddingY={1}>
+        <ConfirmPrompt onCancel={onCancel} onConfirm={onConfirm} req={req} t={ui.theme} />
+      </Box>
+    )
+  }
+
   if (overlay.clarify) {
     return (
       <Box flexDirection="column" flexShrink={0} paddingX={1} paddingY={1}>
@@ -82,7 +100,7 @@ export function FloatingOverlays({
   const overlay = useStore($overlayState)
   const ui = useStore($uiState)
 
-  const hasAny = overlay.modelPicker || overlay.pager || overlay.picker || completions.length
+  const hasAny = overlay.modelPicker || overlay.pager || overlay.picker || overlay.skillsHub || completions.length
 
   if (!hasAny) {
     return null
@@ -115,6 +133,12 @@ export function FloatingOverlays({
         </FloatBox>
       )}
 
+      {overlay.skillsHub && (
+        <FloatBox color={ui.theme.color.bronze}>
+          <SkillsHub gw={gw} onClose={() => patchOverlayState({ skillsHub: false })} t={ui.theme} />
+        </FloatBox>
+      )}
+
       {overlay.pager && (
         <FloatBox color={ui.theme.color.bronze}>
           <Box flexDirection="column" paddingX={1} paddingY={1}>
diff --git a/ui-tui/src/components/branding.tsx b/ui-tui/src/components/branding.tsx
index fc019ac86f0..919c34b612f 100644
--- a/ui-tui/src/components/branding.tsx
+++ b/ui-tui/src/components/branding.tsx
@@ -126,11 +126,36 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
 
         {section('Tools', info.tools, 8, 'more toolsets…')}
         {section('Skills', info.skills)}
+
+        {info.mcp_servers && info.mcp_servers.length > 0 && (
+          <Box flexDirection="column" marginTop={1}>
+            <Text bold color={t.color.amber}>
+              MCP Servers
+            </Text>
+
+            {info.mcp_servers.map(s => (
+              <Text key={s.name} wrap="truncate">
+                <Text color={t.color.dim}>{`  ${s.name} `}</Text>
+                <Text color={t.color.dim}>{`[${s.transport}]`}</Text>
+                <Text color={t.color.dim}>: </Text>
+                {s.connected ? (
+                  <Text color={t.color.cornsilk}>
+                    {s.tools} tool{s.tools === 1 ? '' : 's'}
+                  </Text>
+                ) : (
+                  <Text color={t.color.error}>failed</Text>
+                )}
+              </Text>
+            ))}
+          </Box>
+        )}
+
         <Text />
 
         <Text color={t.color.cornsilk}>
           {flat(info.tools).length} tools{' · '}
           {flat(info.skills).length} skills
+          {info.mcp_servers?.length ? ` · ${info.mcp_servers.length} MCP` : ''}
           {' · '}
           <Text color={t.color.dim}>/help for commands</Text>
         </Text>
diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx
index 865ab857960..5e1063837b9 100644
--- a/ui-tui/src/components/markdown.tsx
+++ b/ui-tui/src/components/markdown.tsx
@@ -1,6 +1,7 @@
-import { Box, Text } from '@hermes/ink'
+import { Box, Link, Text } from '@hermes/ink'
 import { memo, type ReactNode, useMemo } from 'react'
 
+import { highlightLine, isHighlightable } from '../lib/syntax.js'
 import type { Theme } from '../theme.js'
 
 const FENCE_RE = /^\s*(`{3,}|~{3,})(.*)$/
@@ -22,10 +23,12 @@ type Fence = {
   len: number
 }
 
-const renderLink = (key: number, t: Theme, label: string) => (
-  <Text color={t.color.amber} key={key} underline>
-    {label}
-  </Text>
+const renderLink = (key: number, t: Theme, label: string, url: string) => (
+  <Link key={key} url={url}>
+    <Text color={t.color.amber} underline>
+      {label}
+    </Text>
+  </Link>
 )
 
 const trimBareUrl = (value: string) => {
@@ -37,11 +40,17 @@ const trimBareUrl = (value: string) => {
   }
 }
 
-const renderAutolink = (key: number, t: Theme, raw: string) => (
-  <Text color={t.color.amber} key={key} underline>
-    {raw.replace(/^mailto:/, '')}
-  </Text>
-)
+const renderAutolink = (key: number, t: Theme, raw: string) => {
+  const url = raw.startsWith('mailto:') ? raw : raw.includes('@') && !raw.startsWith('http') ? `mailto:${raw}` : raw
+
+  return (
+    <Link key={key} url={url}>
+      <Text color={t.color.amber} underline>
+        {raw.replace(/^mailto:/, '')}
+      </Text>
+    </Link>
+  )
+}
 
 const indentDepth = (indent: string) => Math.floor(indent.replace(/\t/g, '  ').length / 2)
 
@@ -141,7 +150,7 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
         </Text>
       )
     } else if (m[4] && m[5]) {
-      parts.push(renderLink(parts.length, t, m[4]))
+      parts.push(renderLink(parts.length, t, m[4], m[5]))
     } else if (m[6]) {
       parts.push(renderAutolink(parts.length, t, m[6]))
     } else if (m[7]) {
@@ -282,11 +291,28 @@ function MdImpl({ compact, t, text }: MdProps) {
         start('code')
 
         const isDiff = lang === 'diff'
+        const highlighted = !isDiff && isHighlightable(lang)
 
         nodes.push(
           <Box flexDirection="column" key={key} paddingLeft={2}>
             {lang && !isDiff && <Text color={t.color.dim}>{'─ ' + lang}</Text>}
             {block.map((l, j) => {
+              if (highlighted) {
+                return (
+                  <Text key={j}>
+                    {highlightLine(l, lang, t).map(([color, text], k) =>
+                      color ? (
+                        <Text color={color} key={k}>
+                          {text}
+                        </Text>
+                      ) : (
+                        <Text key={k}>{text}</Text>
+                      )
+                    )}
+                  </Text>
+                )
+              }
+
               const add = isDiff && l.startsWith('+')
               const del = isDiff && l.startsWith('-')
               const hunk = isDiff && l.startsWith('@@')
diff --git a/ui-tui/src/components/messageLine.tsx b/ui-tui/src/components/messageLine.tsx
index 59db604e4bd..8d77a49e573 100644
--- a/ui-tui/src/components/messageLine.tsx
+++ b/ui-tui/src/components/messageLine.tsx
@@ -28,12 +28,21 @@ export const MessageLine = memo(function MessageLine({
   }
 
   if (msg.role === 'tool') {
+    const maxChars = Math.max(24, cols - 14)
+    const stripped = hasAnsi(msg.text) ? stripAnsi(msg.text) : msg.text
+    const preview = compactPreview(stripped, maxChars) || '(empty tool result)'
+
     return (
       <Box alignSelf="flex-start" borderColor={t.color.dim} borderStyle="round" marginLeft={3} paddingX={1}>
-        <Text color={t.color.dim} wrap="truncate-end">
-          {compactPreview(hasAnsi(msg.text) ? stripAnsi(msg.text) : msg.text, Math.max(24, cols - 14)) ||
-            '(empty tool result)'}
-        </Text>
+        {hasAnsi(msg.text) ? (
+          <Text wrap="truncate-end">
+            <Ansi>{msg.text}</Ansi>
+          </Text>
+        ) : (
+          <Text color={t.color.dim} wrap="truncate-end">
+            {preview}
+          </Text>
+        )}
       </Box>
     )
   }
diff --git a/ui-tui/src/components/modelPicker.tsx b/ui-tui/src/components/modelPicker.tsx
index 10a00cdf19e..5ee19e407c7 100644
--- a/ui-tui/src/components/modelPicker.tsx
+++ b/ui-tui/src/components/modelPicker.tsx
@@ -1,6 +1,7 @@
 import { Box, Text, useInput } from '@hermes/ink'
-import { useEffect, useState } from 'react'
+import { useEffect, useMemo, useState } from 'react'
 
+import { providerDisplayNames } from '../domain/providers.js'
 import type { GatewayClient } from '../gatewayClient.js'
 import type { ModelOptionProvider, ModelOptionsResponse } from '../gatewayTypes.js'
 import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
@@ -59,6 +60,7 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
 
   const provider = providers[providerIdx]
   const models = provider?.models ?? []
+  const names = useMemo(() => providerDisplayNames(providers), [providers])
 
   useInput((ch, key) => {
     if (key.escape) {
@@ -160,7 +162,7 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
 
   if (stage === 'provider') {
     const rows = providers.map(
-      p => `${p.is_current ? '*' : ' '} ${p.name} · ${p.total_models ?? p.models?.length ?? 0} models`
+      (p, i) => `${p.is_current ? '*' : ' '} ${names[i]} · ${p.total_models ?? p.models?.length ?? 0} models`
     )
 
     const { items, off } = visibleItems(rows, providerIdx)
@@ -179,7 +181,10 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
           const idx = off + i
 
           return (
-            <Text color={providerIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+            <Text
+              color={providerIdx === idx ? t.color.cornsilk : t.color.dim}
+              key={providers[idx]?.slug ?? `row-${idx}`}
+            >
               {providerIdx === idx ? '▸ ' : '  '}
               {i + 1}. {row}
             </Text>
@@ -201,7 +206,7 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
         Select Model
       </Text>
 
-      <Text color={t.color.dim}>{provider?.name || '(unknown provider)'}</Text>
+      <Text color={t.color.dim}>{names[providerIdx] || '(unknown provider)'}</Text>
       {!models.length ? <Text color={t.color.dim}>no models listed for this provider</Text> : null}
       {provider?.warning ? <Text color={t.color.label}>warning: {provider.warning}</Text> : null}
       {off > 0 && <Text color={t.color.dim}> ↑ {off} more</Text>}
@@ -210,7 +215,10 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
         const idx = off + i
 
         return (
-          <Text color={modelIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+          <Text
+            color={modelIdx === idx ? t.color.cornsilk : t.color.dim}
+            key={`${provider?.slug ?? 'prov'}:${idx}:${row}`}
+          >
             {modelIdx === idx ? '▸ ' : '  '}
             {i + 1}. {row}
           </Text>
diff --git a/ui-tui/src/components/prompts.tsx b/ui-tui/src/components/prompts.tsx
index 98aba0789b5..f9d00dbfe31 100644
--- a/ui-tui/src/components/prompts.tsx
+++ b/ui-tui/src/components/prompts.tsx
@@ -2,12 +2,13 @@ import { Box, Text, useInput } from '@hermes/ink'
 import { useState } from 'react'
 
 import type { Theme } from '../theme.js'
-import type { ApprovalReq, ClarifyReq } from '../types.js'
+import type { ApprovalReq, ClarifyReq, ConfirmReq } from '../types.js'
 
 import { TextInput } from './textInput.js'
 
 const OPTS = ['once', 'session', 'always', 'deny'] as const
 const LABELS = { always: 'Always allow', deny: 'Deny', once: 'Allow once', session: 'Allow this session' } as const
+const CMD_PREVIEW_LINES = 10
 
 export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) {
   const [sel, setSel] = useState(0)
@@ -34,13 +35,30 @@ export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) {
     }
   })
 
+  const rawLines = req.command.split('\n')
+  const shown = rawLines.slice(0, CMD_PREVIEW_LINES)
+  const overflow = rawLines.length - shown.length
+
   return (
     <Box borderColor={t.color.warn} borderStyle="double" flexDirection="column" paddingX={1}>
       <Text bold color={t.color.warn}>
         ⚠ approval required · {req.description}
       </Text>
 
-      <Text color={t.color.cornsilk}> {req.command}</Text>
+      <Box flexDirection="column" paddingLeft={1}>
+        {shown.map((line, i) => (
+          <Text color={t.color.cornsilk} key={i} wrap="truncate-end">
+            {line || ' '}
+          </Text>
+        ))}
+
+        {overflow > 0 ? (
+          <Text color={t.color.dim}>
+            … +{overflow} more line{overflow === 1 ? '' : 's'} (full text above)
+          </Text>
+        ) : null}
+      </Box>
+
       <Text />
 
       {OPTS.map((o, i) => (
@@ -133,6 +151,68 @@ export function ClarifyPrompt({ cols = 80, onAnswer, onCancel, req, t }: Clarify
   )
 }
 
+export function ConfirmPrompt({ onCancel, onConfirm, req, t }: ConfirmPromptProps) {
+  const [sel, setSel] = useState(0)
+
+  useInput((ch, key) => {
+    const lower = ch.toLowerCase()
+
+    if (key.escape || (key.ctrl && lower === 'c') || lower === 'n') {
+      return onCancel()
+    }
+
+    if (lower === 'y') {
+      return onConfirm()
+    }
+
+    if (key.upArrow) {
+      setSel(0)
+    }
+
+    if (key.downArrow) {
+      setSel(1)
+    }
+
+    if (key.return) {
+      sel === 0 ? onCancel() : onConfirm()
+    }
+  })
+
+  const accent = req.danger ? t.color.error : t.color.warn
+
+  const rows = [
+    { color: t.color.cornsilk, label: req.cancelLabel ?? 'No' },
+    { color: req.danger ? t.color.error : t.color.cornsilk, label: req.confirmLabel ?? 'Yes' }
+  ]
+
+  return (
+    <Box borderColor={accent} borderStyle="double" flexDirection="column" paddingX={1}>
+      <Text bold color={accent}>
+        {req.danger ? '⚠' : '?'} {req.title}
+      </Text>
+
+      {req.detail ? (
+        <Box paddingLeft={1}>
+          <Text color={t.color.cornsilk} wrap="truncate-end">
+            {req.detail}
+          </Text>
+        </Box>
+      ) : null}
+
+      <Text />
+
+      {rows.map((row, i) => (
+        <Text key={row.label}>
+          <Text color={sel === i ? accent : t.color.dim}>{sel === i ? '▸ ' : '  '}</Text>
+          <Text color={sel === i ? row.color : t.color.dim}>{row.label}</Text>
+        </Text>
+      ))}
+
+      <Text color={t.color.dim}>↑/↓ select · Enter confirm · Y/N quick · Esc cancel</Text>
+    </Box>
+  )
+}
+
 interface ApprovalPromptProps {
   onChoice: (s: string) => void
   req: ApprovalReq
@@ -146,3 +226,10 @@ interface ClarifyPromptProps {
   req: ClarifyReq
   t: Theme
 }
+
+interface ConfirmPromptProps {
+  onCancel: () => void
+  onConfirm: () => void
+  req: ConfirmReq
+  t: Theme
+}
diff --git a/ui-tui/src/components/skillsHub.tsx b/ui-tui/src/components/skillsHub.tsx
new file mode 100644
index 00000000000..877bb0ef384
--- /dev/null
+++ b/ui-tui/src/components/skillsHub.tsx
@@ -0,0 +1,296 @@
+import { Box, Text, useInput } from '@hermes/ink'
+import { useEffect, useState } from 'react'
+
+import type { GatewayClient } from '../gatewayClient.js'
+import { rpcErrorMessage } from '../lib/rpc.js'
+import type { Theme } from '../theme.js'
+
+const VISIBLE = 12
+
+const pageOffset = (count: number, sel: number) => Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), count - VISIBLE))
+
+const visibleItems = (items: string[], sel: number) => {
+  const off = pageOffset(items.length, sel)
+
+  return { items: items.slice(off, off + VISIBLE), off }
+}
+
+export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
+  const [skillsByCat, setSkillsByCat] = useState<Record<string, string[]>>({})
+  const [selectedCat, setSelectedCat] = useState('')
+  const [catIdx, setCatIdx] = useState(0)
+  const [skillIdx, setSkillIdx] = useState(0)
+  const [stage, setStage] = useState<'actions' | 'category' | 'skill'>('category')
+  const [info, setInfo] = useState<null | SkillInfo>(null)
+  const [installing, setInstalling] = useState(false)
+  const [err, setErr] = useState('')
+  const [loading, setLoading] = useState(true)
+
+  useEffect(() => {
+    gw.request<{ skills?: Record<string, string[]> }>('skills.manage', { action: 'list' })
+      .then(r => {
+        setSkillsByCat(r?.skills ?? {})
+        setErr('')
+        setLoading(false)
+      })
+      .catch((e: unknown) => {
+        setErr(rpcErrorMessage(e))
+        setLoading(false)
+      })
+  }, [gw])
+
+  const cats = Object.keys(skillsByCat).sort()
+  const skills = selectedCat ? (skillsByCat[selectedCat] ?? []) : []
+  const skillName = skills[skillIdx] ?? ''
+
+  const inspect = (name: string) => {
+    setInfo(null)
+    setErr('')
+
+    gw.request<{ info?: SkillInfo }>('skills.manage', { action: 'inspect', query: name })
+      .then(r => setInfo(r?.info ?? { name }))
+      .catch((e: unknown) => setErr(rpcErrorMessage(e)))
+  }
+
+  const install = (name: string) => {
+    setInstalling(true)
+    setErr('')
+
+    gw.request<{ installed?: boolean; name?: string }>('skills.manage', { action: 'install', query: name })
+      .then(() => onClose())
+      .catch((e: unknown) => setErr(rpcErrorMessage(e)))
+      .finally(() => setInstalling(false))
+  }
+
+  useInput((ch, key) => {
+    if (installing) {
+      return
+    }
+
+    if (key.escape) {
+      if (stage === 'actions') {
+        setStage('skill')
+        setInfo(null)
+        setErr('')
+
+        return
+      }
+
+      if (stage === 'skill') {
+        setStage('category')
+        setSkillIdx(0)
+
+        return
+      }
+
+      onClose()
+
+      return
+    }
+
+    if (stage === 'actions') {
+      if (key.return) {
+        setStage('skill')
+        setInfo(null)
+        setErr('')
+
+        return
+      }
+
+      if (ch.toLowerCase() === 'x' && skillName) {
+        install(skillName)
+
+        return
+      }
+
+      if (ch.toLowerCase() === 'i' && skillName) {
+        inspect(skillName)
+      }
+
+      return
+    }
+
+    const count = stage === 'category' ? cats.length : skills.length
+    const sel = stage === 'category' ? catIdx : skillIdx
+    const setSel = stage === 'category' ? setCatIdx : setSkillIdx
+
+    if (key.upArrow && sel > 0) {
+      setSel(v => v - 1)
+
+      return
+    }
+
+    if (key.downArrow && sel < count - 1) {
+      setSel(v => v + 1)
+
+      return
+    }
+
+    if (key.return) {
+      if (stage === 'category') {
+        const cat = cats[catIdx]
+
+        if (!cat) {
+          return
+        }
+
+        setSelectedCat(cat)
+        setSkillIdx(0)
+        setStage('skill')
+
+        return
+      }
+
+      const name = skills[skillIdx]
+
+      if (name) {
+        setStage('actions')
+        inspect(name)
+      }
+
+      return
+    }
+
+    const n = ch === '0' ? 10 : parseInt(ch, 10)
+
+    if (!Number.isNaN(n) && n >= 1 && n <= Math.min(10, count)) {
+      const off = pageOffset(count, sel)
+      const next = off + n - 1
+
+      if (stage === 'category') {
+        const cat = cats[next]
+
+        if (cat) {
+          setSelectedCat(cat)
+          setCatIdx(next)
+          setSkillIdx(0)
+          setStage('skill')
+        }
+
+        return
+      }
+
+      const name = skills[next]
+
+      if (name) {
+        setSkillIdx(next)
+        setStage('actions')
+        inspect(name)
+      }
+    }
+  })
+
+  if (loading) {
+    return <Text color={t.color.dim}>loading skills…</Text>
+  }
+
+  if (err && stage === 'category') {
+    return (
+      <Box flexDirection="column">
+        <Text color={t.color.label}>error: {err}</Text>
+        <Text color={t.color.dim}>Esc to cancel</Text>
+      </Box>
+    )
+  }
+
+  if (!cats.length) {
+    return (
+      <Box flexDirection="column">
+        <Text color={t.color.dim}>no skills available</Text>
+        <Text color={t.color.dim}>Esc to cancel</Text>
+      </Box>
+    )
+  }
+
+  if (stage === 'category') {
+    const rows = cats.map(c => `${c} · ${skillsByCat[c]?.length ?? 0} skills`)
+    const { items, off } = visibleItems(rows, catIdx)
+
+    return (
+      <Box flexDirection="column">
+        <Text bold color={t.color.amber}>
+          Skills Hub
+        </Text>
+
+        <Text color={t.color.dim}>select a category</Text>
+        {off > 0 && <Text color={t.color.dim}> ↑ {off} more</Text>}
+
+        {items.map((row, i) => {
+          const idx = off + i
+
+          return (
+            <Text color={catIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+              {catIdx === idx ? '▸ ' : '  '}
+              {i + 1}. {row}
+            </Text>
+          )
+        })}
+
+        {off + VISIBLE < rows.length && <Text color={t.color.dim}> ↓ {rows.length - off - VISIBLE} more</Text>}
+        <Text color={t.color.dim}>↑/↓ select · Enter open · 1-9,0 quick · Esc cancel</Text>
+      </Box>
+    )
+  }
+
+  if (stage === 'skill') {
+    const { items, off } = visibleItems(skills, skillIdx)
+
+    return (
+      <Box flexDirection="column">
+        <Text bold color={t.color.amber}>
+          {selectedCat}
+        </Text>
+
+        <Text color={t.color.dim}>{skills.length} skill(s)</Text>
+        {!skills.length ? <Text color={t.color.dim}>no skills in this category</Text> : null}
+        {off > 0 && <Text color={t.color.dim}> ↑ {off} more</Text>}
+
+        {items.map((row, i) => {
+          const idx = off + i
+
+          return (
+            <Text color={skillIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+              {skillIdx === idx ? '▸ ' : '  '}
+              {i + 1}. {row}
+            </Text>
+          )
+        })}
+
+        {off + VISIBLE < skills.length && <Text color={t.color.dim}> ↓ {skills.length - off - VISIBLE} more</Text>}
+        <Text color={t.color.dim}>
+          {skills.length ? '↑/↓ select · Enter open · 1-9,0 quick · Esc back' : 'Esc back'}
+        </Text>
+      </Box>
+    )
+  }
+
+  return (
+    <Box flexDirection="column">
+      <Text bold color={t.color.amber}>
+        {info?.name ?? skillName}
+      </Text>
+
+      <Text color={t.color.dim}>{info?.category ?? selectedCat}</Text>
+      {info?.description ? <Text color={t.color.cornsilk}>{info.description}</Text> : null}
+      {info?.path ? <Text color={t.color.dim}>path: {info.path}</Text> : null}
+      {!info && !err ? <Text color={t.color.dim}>loading…</Text> : null}
+      {err ? <Text color={t.color.label}>error: {err}</Text> : null}
+      {installing ? <Text color={t.color.amber}>installing…</Text> : null}
+
+      <Text color={t.color.dim}>i reinspect · x reinstall · Enter/Esc back</Text>
+    </Box>
+  )
+}
+
+interface SkillInfo {
+  category?: string
+  description?: string
+  name?: string
+  path?: string
+}
+
+interface SkillsHubProps {
+  gw: GatewayClient
+  onClose: () => void
+  t: Theme
+}
diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index f2bbee63cf2..dff8121b5e9 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -2,6 +2,8 @@ import type { InputEvent, Key } from '@hermes/ink'
 import * as Ink from '@hermes/ink'
 import { useEffect, useMemo, useRef, useState } from 'react'
 
+import { setInputSelection } from '../app/inputSelectionStore.js'
+
 type InkExt = typeof Ink & {
   stringWidth: (s: string) => number
   useDeclaredCursor: (a: { line: number; column: number; active: boolean }) => (el: any) => void
@@ -351,6 +353,28 @@ export function TextInput({
     }
   }, [value])
 
+  useEffect(() => {
+    if (!focus) {
+      return
+    }
+
+    if (selected) {
+      setInputSelection({
+        clear: () => {
+          selRef.current = null
+          setSel(null)
+        },
+        end: selected.end,
+        start: selected.start,
+        value: vRef.current
+      })
+    } else {
+      setInputSelection(null)
+    }
+
+    return () => setInputSelection(null)
+  }, [focus, selected])
+
   useEffect(
     () => () => {
       if (pasteTimer.current) {
@@ -464,7 +488,7 @@ export function TextInput({
     (inp: string, k: Key, event: InputEvent) => {
       const eventRaw = event.keypress.raw
 
-      if (eventRaw === '\x1bv' || eventRaw === '\x1bV') {
+      if (eventRaw === '\x1bv' || eventRaw === '\x1bV' || eventRaw === '\x16') {
         return void emitPaste({ cursor: curRef.current, hotkey: true, text: '', value: vRef.current })
       }
 
diff --git a/ui-tui/src/config/env.ts b/ui-tui/src/config/env.ts
index 3a476d6bc5f..60f1e80c539 100644
--- a/ui-tui/src/config/env.ts
+++ b/ui-tui/src/config/env.ts
@@ -1,2 +1,3 @@
 export const STARTUP_RESUME_ID = (process.env.HERMES_TUI_RESUME ?? '').trim()
 export const MOUSE_TRACKING = !/^(?:1|true|yes|on)$/i.test((process.env.HERMES_TUI_DISABLE_MOUSE ?? '').trim())
+export const NO_CONFIRM_DESTRUCTIVE = /^(?:1|true|yes|on)$/i.test((process.env.HERMES_TUI_NO_CONFIRM ?? '').trim())
diff --git a/ui-tui/src/domain/paths.ts b/ui-tui/src/domain/paths.ts
index 78daff170a8..43c023b6ba9 100644
--- a/ui-tui/src/domain/paths.ts
+++ b/ui-tui/src/domain/paths.ts
@@ -4,3 +4,13 @@ export const shortCwd = (cwd: string, max = 28) => {
 
   return p.length <= max ? p : `…${p.slice(-(max - 1))}`
 }
+
+export const fmtCwdBranch = (cwd: string, branch: null | string, max = 40) => {
+  if (!branch) {
+    return shortCwd(cwd, max)
+  }
+
+  const tag = ` (${branch.length > 16 ? `…${branch.slice(-15)}` : branch})`
+
+  return `${shortCwd(cwd, Math.max(8, max - tag.length))}${tag}`
+}
diff --git a/ui-tui/src/domain/providers.ts b/ui-tui/src/domain/providers.ts
new file mode 100644
index 00000000000..83ac016ff19
--- /dev/null
+++ b/ui-tui/src/domain/providers.ts
@@ -0,0 +1,11 @@
+export const providerDisplayNames = (providers: readonly { name: string; slug: string }[]): string[] => {
+  const counts = new Map<string, number>()
+
+  for (const p of providers) {
+    counts.set(p.name, (counts.get(p.name) ?? 0) + 1)
+  }
+
+  return providers.map(p =>
+    (counts.get(p.name) ?? 0) > 1 && p.slug && p.slug !== p.name ? `${p.name} (${p.slug})` : p.name
+  )
+}
diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts
index 9e21b9bc587..6fa1ad92e5b 100644
--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@@ -47,12 +47,17 @@ export type CommandDispatchResponse =
   | { output?: string; type: 'exec' | 'plugin' }
   | { target: string; type: 'alias' }
   | { message?: string; name: string; type: 'skill' }
+  | { message: string; type: 'send' }
 
 // ── Config ───────────────────────────────────────────────────────────
 
 export interface ConfigDisplayConfig {
   bell_on_complete?: boolean
   details_mode?: string
+  inline_diffs?: boolean
+  show_cost?: boolean
+  show_reasoning?: boolean
+  streaming?: boolean
   thinking_mode?: string
   tui_compact?: boolean
   tui_statusbar?: boolean
@@ -152,6 +157,11 @@ export interface SessionInterruptResponse {
   ok?: boolean
 }
 
+export interface SessionSteerResponse {
+  status?: 'queued' | 'rejected'
+  text?: string
+}
+
 // ── Prompt / submission ──────────────────────────────────────────────
 
 export interface PromptSubmitResponse {
diff --git a/ui-tui/src/hooks/useGitBranch.ts b/ui-tui/src/hooks/useGitBranch.ts
new file mode 100644
index 00000000000..7eb4880177a
--- /dev/null
+++ b/ui-tui/src/hooks/useGitBranch.ts
@@ -0,0 +1,72 @@
+import { execFile } from 'node:child_process'
+import { promisify } from 'node:util'
+
+import { useEffect, useState } from 'react'
+
+const TTL_MS = 15_000
+const TIMEOUT_MS = 500
+
+const pexec = promisify(execFile)
+const cache = new Map<string, { at: number; branch: null | string }>()
+const inflight = new Map<string, Promise<null | string>>()
+
+const resolveBranch = async (cwd: string): Promise<null | string> => {
+  try {
+    const { stdout } = await pexec('git', ['-C', cwd, 'rev-parse', '--abbrev-ref', 'HEAD'], { timeout: TIMEOUT_MS })
+    const b = stdout.trim()
+
+    return !b || b === 'HEAD' ? null : b
+  } catch {
+    return null
+  }
+}
+
+const fetchBranch = (cwd: string): Promise<null | string> => {
+  const pending = inflight.get(cwd)
+
+  if (pending) {
+    return pending
+  }
+
+  const p = resolveBranch(cwd).finally(() => inflight.delete(cwd))
+  inflight.set(cwd, p)
+
+  return p
+}
+
+export function useGitBranch(cwd: string): null | string {
+  const [branch, setBranch] = useState<null | string>(() => cache.get(cwd)?.branch ?? null)
+
+  useEffect(() => {
+    let cancelled = false
+
+    const tick = async () => {
+      const hit = cache.get(cwd)
+
+      if (hit && Date.now() - hit.at < TTL_MS) {
+        if (!cancelled) {
+          setBranch(hit.branch)
+        }
+
+        return
+      }
+
+      const b = await fetchBranch(cwd)
+      cache.set(cwd, { at: Date.now(), branch: b })
+
+      if (!cancelled) {
+        setBranch(b)
+      }
+    }
+
+    void tick()
+    const id = setInterval(() => void tick(), TTL_MS)
+
+    return () => {
+      cancelled = true
+      clearInterval(id)
+    }
+  }, [cwd])
+
+  return branch
+}
diff --git a/ui-tui/src/lib/reasoning.ts b/ui-tui/src/lib/reasoning.ts
new file mode 100644
index 00000000000..eba63918c41
--- /dev/null
+++ b/ui-tui/src/lib/reasoning.ts
@@ -0,0 +1,50 @@
+const TAGS = ['think', 'reasoning', 'thinking', 'thought', 'REASONING_SCRATCHPAD'] as const
+
+export interface SplitReasoning {
+  reasoning: string
+  text: string
+}
+
+export function splitReasoning(input: string): SplitReasoning {
+  let text = input
+  const reasoning: string[] = []
+
+  for (const tag of TAGS) {
+    const paired = new RegExp(`<${tag}>([\\s\\S]*?)</${tag}>\\s*`, 'gi')
+    text = text.replace(paired, (_m, inner: string) => {
+      const trimmed = inner.trim()
+
+      if (trimmed) {
+        reasoning.push(trimmed)
+      }
+
+      return ''
+    })
+
+    const unclosed = new RegExp(`<${tag}>([\\s\\S]*)$`, 'i')
+    text = text.replace(unclosed, (_m, inner: string) => {
+      const trimmed = inner.trim()
+
+      if (trimmed) {
+        reasoning.push(trimmed)
+      }
+
+      return ''
+    })
+  }
+
+  return {
+    reasoning: reasoning.join('\n\n').trim(),
+    text: text.trim()
+  }
+}
+
+export const hasReasoningTag = (input: string) => {
+  for (const tag of TAGS) {
+    if (input.includes(`<${tag}>`)) {
+      return true
+    }
+  }
+
+  return false
+}
diff --git a/ui-tui/src/lib/rpc.ts b/ui-tui/src/lib/rpc.ts
index 1697d142bbf..70faa4bbbe1 100644
--- a/ui-tui/src/lib/rpc.ts
+++ b/ui-tui/src/lib/rpc.ts
@@ -26,6 +26,10 @@ export const asCommandDispatch = (value: unknown): CommandDispatchResponse | nul
     return { type: 'skill', name: o.name, message: typeof o.message === 'string' ? o.message : undefined }
   }
 
+  if (t === 'send' && typeof o.message === 'string') {
+    return { type: 'send', message: o.message }
+  }
+
   return null
 }
 
diff --git a/ui-tui/src/lib/syntax.ts b/ui-tui/src/lib/syntax.ts
new file mode 100644
index 00000000000..06173b63e9f
--- /dev/null
+++ b/ui-tui/src/lib/syntax.ts
@@ -0,0 +1,117 @@
+import type { Theme } from '../theme.js'
+
+export type Token = [string, string]
+
+interface LangSpec {
+  comment: null | string
+  keywords: Set<string>
+}
+
+const KW = (s: string) => new Set(s.split(/\s+/).filter(Boolean))
+
+const TS = KW(`
+  abstract as async await break case catch class const continue debugger default delete do else enum export extends
+  false finally for from function get if implements import in instanceof interface is let new null of package private
+  protected public readonly return set static super switch this throw true try type typeof undefined var void while
+  with yield
+`)
+
+const PY = KW(`
+  False None True and as assert async await break class continue def del elif else except finally for from global if
+  import in is lambda nonlocal not or pass raise return try while with yield
+`)
+
+const SH = KW(`
+  if then else elif fi for in do done while until case esac function return break continue local export readonly
+  declare typeset
+`)
+
+const GO = KW(`
+  break case chan const continue default defer else fallthrough for func go goto if import interface map package range
+  return select struct switch type var nil true false
+`)
+
+const RUST = KW(`
+  as async await break const continue crate dyn else enum extern false fn for if impl in let loop match mod move mut
+  pub ref return self Self static struct super trait true type unsafe use where while yield
+`)
+
+const SQL = KW(`
+  select from where and or not in is null as by group order limit offset insert into values update set delete create
+  table drop alter add column primary key foreign references join left right inner outer on
+`)
+
+const LANGS: Record<string, LangSpec> = {
+  go: { comment: '//', keywords: GO },
+  json: { comment: null, keywords: KW('true false null') },
+  py: { comment: '#', keywords: PY },
+  rust: { comment: '//', keywords: RUST },
+  sh: { comment: '#', keywords: SH },
+  sql: { comment: '--', keywords: SQL },
+  ts: { comment: '//', keywords: TS },
+  yaml: { comment: '#', keywords: KW('true false null yes no on off') }
+}
+
+const ALIAS: Record<string, string> = {
+  bash: 'sh',
+  javascript: 'ts',
+  js: 'ts',
+  jsx: 'ts',
+  python: 'py',
+  rs: 'rust',
+  shell: 'sh',
+  tsx: 'ts',
+  typescript: 'ts',
+  yml: 'yaml',
+  zsh: 'sh'
+}
+
+const resolve = (lang: string): LangSpec | null => LANGS[ALIAS[lang] ?? lang] ?? null
+
+export const isHighlightable = (lang: string): boolean => resolve(lang) !== null
+
+const TOKEN_RE = /'(?:[^'\\]|\\.)*'|"(?:[^"\\]|\\.)*"|`(?:[^`\\]|\\.)*`|\b\d+(?:\.\d+)?\b|[A-Za-z_$][\w$]*/g
+
+export function highlightLine(line: string, lang: string, t: Theme): Token[] {
+  const spec = resolve(lang)
+
+  if (!spec) {
+    return [['', line]]
+  }
+
+  if (spec.comment && line.trimStart().startsWith(spec.comment)) {
+    return [[t.color.dim, line]]
+  }
+
+  const tokens: Token[] = []
+  let last = 0
+
+  for (const m of line.matchAll(TOKEN_RE)) {
+    const start = m.index ?? 0
+
+    if (start > last) {
+      tokens.push(['', line.slice(last, start)])
+    }
+
+    const tok = m[0]
+    const ch = tok[0]!
+
+    if (ch === '"' || ch === "'" || ch === '`') {
+      tokens.push([t.color.amber, tok])
+    } else if (ch >= '0' && ch <= '9') {
+      tokens.push([t.color.cornsilk, tok])
+    } else if (spec.keywords.has(tok)) {
+      tokens.push([t.color.bronze, tok])
+    } else {
+      tokens.push(['', tok])
+    }
+
+    last = start + tok.length
+  }
+
+  if (last < line.length) {
+    tokens.push(['', line.slice(last)])
+  }
+
+  return tokens
+}
diff --git a/ui-tui/src/theme.ts b/ui-tui/src/theme.ts
index 88bc3c39081..386e436f523 100644
--- a/ui-tui/src/theme.ts
+++ b/ui-tui/src/theme.ts
@@ -78,7 +78,17 @@ function mix(a: string, b: string, t: number) {
 
 // ── Defaults ─────────────────────────────────────────────────────────
 
-export const DEFAULT_THEME: Theme = {
+const BRAND: ThemeBrand = {
+  name: 'Hermes Agent',
+  icon: '⚕',
+  prompt: '❯',
+  welcome: 'Type your message or /help for commands.',
+  goodbye: 'Goodbye! ⚕',
+  tool: '┊',
+  helpHeader: '(^_^)? Commands'
+}
+
+export const DARK_THEME: Theme = {
   color: {
     gold: '#FFD700',
     amber: '#FFBF00',
@@ -112,20 +122,59 @@ export const DEFAULT_THEME: Theme = {
     shellDollar: '#4dabf7'
   },
 
-  brand: {
-    name: 'Hermes Agent',
-    icon: '⚕',
-    prompt: '❯',
-    welcome: 'Type your message or /help for commands.',
-    goodbye: 'Goodbye! ⚕',
-    tool: '┊',
-    helpHeader: '(^_^)? Commands'
-  },
+  brand: BRAND,
 
   bannerLogo: '',
   bannerHero: ''
 }
 
+// Light-terminal palette: darker golds/ambers that stay legible on white
+// backgrounds. Same shape as DARK_THEME so `fromSkin` still layers on top
+// cleanly (#11300).
+export const LIGHT_THEME: Theme = {
+  color: {
+    gold: '#8B6914',
+    amber: '#A0651C',
+    bronze: '#7A4F1F',
+    cornsilk: '#3D2F13',
+    dim: '#7A5A0F',
+    completionBg: '#F5F5F5',
+    completionCurrentBg: mix('#F5F5F5', '#A0651C', 0.25),
+
+    label: '#7A5A0F',
+    ok: '#2E7D32',
+    error: '#C62828',
+    warn: '#E65100',
+
+    prompt: '#2B2014',
+    sessionLabel: '#7A5A0F',
+    sessionBorder: '#7A5A0F',
+
+    statusBg: '#F5F5F5',
+    statusFg: '#333333',
+    statusGood: '#2E7D32',
+    statusWarn: '#8B6914',
+    statusBad: '#D84315',
+    statusCritical: '#B71C1C',
+    selectionBg: '#D4E4F7',
+
+    diffAdded: 'rgb(200,240,200)',
+    diffRemoved: 'rgb(240,200,200)',
+    diffAddedWord: 'rgb(27,94,32)',
+    diffRemovedWord: 'rgb(183,28,28)',
+    shellDollar: '#1565C0'
+  },
+
+  brand: BRAND,
+
+  bannerLogo: '',
+  bannerHero: ''
+}
+
+const LIGHT_MODE = /^(?:1|true|yes|on)$/i.test((process.env.HERMES_TUI_LIGHT ?? '').trim())
+
+export const DEFAULT_THEME: Theme = LIGHT_MODE ? LIGHT_THEME : DARK_THEME
+
 // ── Skin → Theme ─────────────────────────────────────────────────────
 
 export function fromSkin(
diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts
index ab7d7efab96..3045a74a856 100644
--- a/ui-tui/src/types.ts
+++ b/ui-tui/src/types.ts
@@ -29,6 +29,15 @@ export interface ApprovalReq {
   description: string
 }
 
+export interface ConfirmReq {
+  cancelLabel?: string
+  confirmLabel?: string
+  danger?: boolean
+  detail?: string
+  onConfirm: () => void
+  title: string
+}
+
 export interface ClarifyReq {
   choices: string[] | null
   question: string
@@ -51,8 +60,16 @@ export type Role = 'assistant' | 'system' | 'tool' | 'user'
 export type DetailsMode = 'hidden' | 'collapsed' | 'expanded'
 export type ThinkingMode = 'collapsed' | 'truncated' | 'full'
 
+export interface McpServerStatus {
+  connected: boolean
+  name: string
+  tools: number
+  transport: string
+}
+
 export interface SessionInfo {
   cwd?: string
+  mcp_servers?: McpServerStatus[]
   model: string
   release_date?: string
   skills: Record<string, string[]>
@@ -68,6 +85,7 @@ export interface Usage {
   context_max?: number
   context_percent?: number
   context_used?: number
+  cost_usd?: number
   input: number
   output: number
   total: number
diff --git a/ui-tui/src/types/hermes-ink.d.ts b/ui-tui/src/types/hermes-ink.d.ts
index 9b2deec35ff..9f8987ad34b 100644
--- a/ui-tui/src/types/hermes-ink.d.ts
+++ b/ui-tui/src/types/hermes-ink.d.ts
@@ -63,6 +63,11 @@ declare module '@hermes/ink' {
   export const Box: React.ComponentType<any>
   export const AlternateScreen: React.ComponentType<any>
   export const Ansi: React.ComponentType<any>
+  export const Link: React.ComponentType<{
+    readonly children?: React.ReactNode
+    readonly fallback?: React.ReactNode
+    readonly url: string
+  }>
   export const NoSelect: React.ComponentType<any>
   export const ScrollBox: React.ComponentType<any>
   export const Text: React.ComponentType<any>
@@ -92,6 +97,7 @@ declare module '@hermes/ink' {
   export function useHasSelection(): boolean
   export function useStdout(): { readonly stdout?: NodeJS.WriteStream }
   export function useTerminalFocus(): boolean
+  export function useTerminalTitle(title: string | null): void
   export function useDeclaredCursor(args: {
     readonly line: number
     readonly column: number
diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md
index a28b1256e6e..219c1e7d555 100644
--- a/website/docs/getting-started/installation.md
+++ b/website/docs/getting-started/installation.md
@@ -6,7 +6,7 @@ description: "Install Hermes Agent on Linux, macOS, WSL2, or Android via Termux"
 
 # Installation
 
-Get Hermes Agent up and running in under two minutes with the one-line installer, or follow the manual steps for full control.
+Get Hermes Agent up and running in under two minutes with the one-line installer.
 
 ## Quick Install
 
@@ -82,202 +82,9 @@ If you use Nix (on NixOS, macOS, or Linux), there's a dedicated setup path with
 
 ---
 
-## Manual Installation
+## Manual / Developer Installation
 
-If you prefer full control over the installation process, follow these steps.
-
-### Step 1: Clone the Repository
-
-Clone with `--recurse-submodules` to pull the required submodules:
-
-```bash
-git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
-cd hermes-agent
-```
-
-If you already cloned without `--recurse-submodules`:
-```bash
-git submodule update --init --recursive
-```
-
-### Step 2: Install uv & Create Virtual Environment
-
-```bash
-# Install uv (if not already installed)
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# Create venv with Python 3.11 (uv downloads it if not present — no sudo needed)
-uv venv venv --python 3.11
-```
-
-:::tip
-You do **not** need to activate the venv to use `hermes`. The entry point has a hardcoded shebang pointing to the venv Python, so it works globally once symlinked.
-:::
-
-### Step 3: Install Python Dependencies
-
-```bash
-# Tell uv which venv to install into
-export VIRTUAL_ENV="$(pwd)/venv"
-
-# Install with all extras
-uv pip install -e ".[all]"
-```
-
-If you only want the core agent (no Telegram/Discord/cron support):
-```bash
-uv pip install -e "."
-```
-
-<details>
-<summary><strong>Optional extras breakdown</strong></summary>
-
-| Extra | What it adds | Install command |
-|-------|-------------|-----------------|
-| `all` | Everything below | `uv pip install -e ".[all]"` |
-| `messaging` | Telegram, Discord & Slack gateway | `uv pip install -e ".[messaging]"` |
-| `cron` | Cron expression parsing for scheduled tasks | `uv pip install -e ".[cron]"` |
-| `cli` | Terminal menu UI for setup wizard | `uv pip install -e ".[cli]"` |
-| `modal` | Modal cloud execution backend | `uv pip install -e ".[modal]"` |
-| `tts-premium` | ElevenLabs premium voices | `uv pip install -e ".[tts-premium]"` |
-| `voice` | CLI microphone input + audio playback | `uv pip install -e ".[voice]"` |
-| `pty` | PTY terminal support | `uv pip install -e ".[pty]"` |
-| `termux` | Tested Android / Termux bundle (`cron`, `cli`, `pty`, `mcp`, `honcho`, `acp`) | `python -m pip install -e ".[termux]" -c constraints-termux.txt` |
-| `honcho` | AI-native memory (Honcho integration) | `uv pip install -e ".[honcho]"` |
-| `mcp` | Model Context Protocol support | `uv pip install -e ".[mcp]"` |
-| `homeassistant` | Home Assistant integration | `uv pip install -e ".[homeassistant]"` |
-| `acp` | ACP editor integration support | `uv pip install -e ".[acp]"` |
-| `slack` | Slack messaging | `uv pip install -e ".[slack]"` |
-| `dev` | pytest & test utilities | `uv pip install -e ".[dev]"` |
-
-You can combine extras: `uv pip install -e ".[messaging,cron]"`
-
-:::tip Termux users
-`.[all]` is not currently available on Android because the `voice` extra pulls `faster-whisper`, which depends on `ctranslate2` wheels that are not published for Android. Use `.[termux]` for the tested mobile install path, then add individual extras only as needed.
-:::
-
-</details>
-
-### Step 4: Install Optional Submodules (if needed)
-
-```bash
-# RL training backend (optional)
-uv pip install -e "./tinker-atropos"
-```
-
-Both are optional — if you skip them, the corresponding toolsets simply won't be available.
-
-### Step 5: Install Node.js Dependencies (Optional)
-
-Only needed for **browser automation** (Browserbase-powered) and **WhatsApp bridge**:
-
-```bash
-npm install
-```
-
-### Step 6: Create the Configuration Directory
-
-```bash
-# Create the directory structure
-mkdir -p ~/.hermes/{cron,sessions,logs,memories,skills,pairing,hooks,image_cache,audio_cache,whatsapp/session}
-
-# Copy the example config file
-cp cli-config.yaml.example ~/.hermes/config.yaml
-
-# Create an empty .env file for API keys
-touch ~/.hermes/.env
-```
-
-### Step 7: Add Your API Keys
-
-Open `~/.hermes/.env` and add at minimum an LLM provider key:
-
-```bash
-# Required — at least one LLM provider:
-OPENROUTER_API_KEY=sk-or-v1-your-key-here
-
-# Optional — enable additional tools:
-FIRECRAWL_API_KEY=fc-your-key          # Web search & scraping (or self-host, see docs)
-FAL_KEY=your-fal-key                   # Image generation (FLUX)
-```
-
-Or set them via the CLI:
-```bash
-hermes config set OPENROUTER_API_KEY sk-or-v1-your-key-here
-```
-
-### Step 8: Add `hermes` to Your PATH
-
-```bash
-mkdir -p ~/.local/bin
-ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
-```
-
-If `~/.local/bin` isn't on your PATH, add it to your shell config:
-
-```bash
-# Bash
-echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc && source ~/.bashrc
-
-# Zsh
-echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.zshrc && source ~/.zshrc
-
-# Fish
-fish_add_path $HOME/.local/bin
-```
-
-### Step 9: Configure Your Provider
-
-```bash
-hermes model       # Select your LLM provider and model
-```
-
-### Step 10: Verify the Installation
-
-```bash
-hermes version    # Check that the command is available
-hermes doctor     # Run diagnostics to verify everything is working
-hermes status     # Check your configuration
-hermes chat -q "Hello! What tools do you have available?"
-```
-
----
-
-## Quick-Reference: Manual Install (Condensed)
-
-For those who just want the commands:
-
-```bash
-# Install uv
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# Clone & enter
-git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
-cd hermes-agent
-
-# Create venv with Python 3.11
-uv venv venv --python 3.11
-export VIRTUAL_ENV="$(pwd)/venv"
-
-# Install everything
-uv pip install -e ".[all]"
-uv pip install -e "./tinker-atropos"
-npm install  # optional, for browser tools and WhatsApp
-
-# Configure
-mkdir -p ~/.hermes/{cron,sessions,logs,memories,skills,pairing,hooks,image_cache,audio_cache,whatsapp/session}
-cp cli-config.yaml.example ~/.hermes/config.yaml
-touch ~/.hermes/.env
-echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
-
-# Make hermes available globally
-mkdir -p ~/.local/bin
-ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
-
-# Verify
-hermes doctor
-hermes
-```
+If you want to clone the repo and install from source — for contributing, running from a specific branch, or having full control over the virtual environment — see the [Development Setup](../developer-guide/contributing.md#development-setup) section in the Contributing guide.
 
 ---
 
diff --git a/website/docs/getting-started/learning-path.md b/website/docs/getting-started/learning-path.md
index bcdbb44d420..41170ccccdb 100644
--- a/website/docs/getting-started/learning-path.md
+++ b/website/docs/getting-started/learning-path.md
@@ -129,7 +129,7 @@ Not sure what's available? Here's a quick directory of major features:
 | **MCP** | Connect to external tool servers via Model Context Protocol | [MCP](/docs/user-guide/features/mcp) |
 | **Cron** | Schedule recurring agent tasks | [Cron](/docs/user-guide/features/cron) |
 | **Delegation** | Spawn sub-agents for parallel work | [Delegation](/docs/user-guide/features/delegation) |
-| **Code Execution** | Run code in sandboxed environments | [Code Execution](/docs/user-guide/features/code-execution) |
+| **Code Execution** | Run Python scripts that call Hermes tools programmatically | [Code Execution](/docs/user-guide/features/code-execution) |
 | **Browser** | Web browsing and scraping | [Browser](/docs/user-guide/features/browser) |
 | **Hooks** | Event-driven callbacks and middleware | [Hooks](/docs/user-guide/features/hooks) |
 | **Batch Processing** | Process multiple inputs in bulk | [Batch Processing](/docs/user-guide/features/batch-processing) |
diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index 428d23b7ce3..b67f63ae36e 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -1,12 +1,35 @@
 ---
 sidebar_position: 1
 title: "Quickstart"
-description: "Your first conversation with Hermes Agent — from install to chatting in 2 minutes"
+description: "Your first conversation with Hermes Agent — from install to chatting in under 5 minutes"
 ---
 
 # Quickstart
 
-This guide walks you through installing Hermes Agent, setting up a provider, and having your first conversation. By the end, you'll know the key features and how to explore further.
+This guide gets you from zero to a working Hermes setup that survives real use. Install, choose a provider, verify a working chat, and know exactly what to do when something breaks.
+
+## Who this is for
+
+- Brand new and want the shortest path to a working setup
+- Switching providers and don't want to lose time to config mistakes
+- Setting up Hermes for a team, bot, or always-on workflow
+- Tired of "it installed, but it still does nothing"
+
+## The fastest path
+
+Pick the row that matches your goal:
+
+| Goal | Do this first | Then do this |
+|---|---|---|
+| I just want Hermes working on my machine | `hermes setup` | Run a real chat and verify it responds |
+| I already know my provider | `hermes model` | Save the config, then start chatting |
+| I want a bot or always-on setup | `hermes gateway setup` after CLI works | Connect Telegram, Discord, Slack, or another platform |
+| I want a local or self-hosted model | `hermes model` → custom endpoint | Verify the endpoint, model name, and context length |
+| I want multi-provider fallback | `hermes model` first | Add routing and fallback only after the base chat works |
+
+**Rule of thumb:** if Hermes cannot complete a normal chat, do not add more features yet. Get one clean conversation working first, then layer on gateway, cron, skills, voice, or routing.
+
+---
 
 ## 1. Install Hermes Agent
 
@@ -31,80 +54,109 @@ After it finishes, reload your shell:
 source ~/.bashrc   # or source ~/.zshrc
 ```
 
-## 2. Set Up a Provider
+For detailed installation options, prerequisites, and troubleshooting, see the [Installation guide](./installation.md).
 
-The installer configures your LLM provider automatically. To change it later, use one of these commands:
+## 2. Choose a Provider
+
+The single most important setup step. Use `hermes model` to walk through the choice interactively:
 
 ```bash
-hermes model       # Choose your LLM provider and model
-hermes tools       # Configure which tools are enabled
-hermes setup       # Or configure everything at once
+hermes model
 ```
 
-`hermes model` walks you through selecting an inference provider:
+Good defaults:
 
-| Provider | What it is | How to set up |
-|----------|-----------|---------------|
-| **Nous Portal** | Subscription-based, zero-config | OAuth login via `hermes model` |
-| **OpenAI Codex** | ChatGPT OAuth, uses Codex models | Device code auth via `hermes model` |
-| **Anthropic** | Claude models directly (Pro/Max or API key) | `hermes model` with Claude Code auth, or an Anthropic API key |
-| **OpenRouter** | Multi-provider routing across many models | Enter your API key |
-| **Z.AI** | GLM / Zhipu-hosted models | Set `GLM_API_KEY` / `ZAI_API_KEY` |
-| **Kimi / Moonshot** | Moonshot-hosted coding and chat models | Set `KIMI_API_KEY` |
-| **Kimi / Moonshot China** | China-region Moonshot endpoint | Set `KIMI_CN_API_KEY` |
-| **Arcee AI** | Trinity models | Set `ARCEEAI_API_KEY` |
-| **MiniMax** | International MiniMax endpoint | Set `MINIMAX_API_KEY` |
-| **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` |
-| **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` |
-| **Hugging Face** | 20+ open models via unified router (Qwen, DeepSeek, Kimi, etc.) | Set `HF_TOKEN` |
-| **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` |
-| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
-| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
-| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` |
-| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
-| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
-| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) |
-| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
-| **Custom Endpoint** | VLLM, SGLang, Ollama, or any OpenAI-compatible API | Set base URL + API key |
+| Situation | Recommended path |
+|---|---|
+| Least friction | Nous Portal or OpenRouter |
+| You already have Claude or Codex auth | Anthropic or OpenAI Codex |
+| You want local/private inference | Ollama or any custom OpenAI-compatible endpoint |
+| You want multi-provider routing | OpenRouter |
+| You have a custom GPU server | vLLM, SGLang, LiteLLM, or any OpenAI-compatible endpoint |
+
+For most first-time users: choose a provider, accept the defaults unless you know why you're changing them. The full provider catalog with env vars and setup steps lives on the [Providers](../integrations/providers.md) page.
 
 :::caution Minimum context: 64K tokens
 Hermes Agent requires a model with at least **64,000 tokens** of context. Models with smaller windows cannot maintain enough working memory for multi-step tool-calling workflows and will be rejected at startup. Most hosted models (Claude, GPT, Gemini, Qwen, DeepSeek) meet this easily. If you're running a local model, set its context size to at least 64K (e.g. `--ctx-size 65536` for llama.cpp or `-c 65536` for Ollama).
 :::
 
 :::tip
-You can switch providers at any time with `hermes model` — no code changes, no lock-in. When configuring a custom endpoint, Hermes will prompt for the context window size and auto-detect it when possible. See [Context Length Detection](../integrations/providers.md#context-length-detection) for details.
+You can switch providers at any time with `hermes model` — no lock-in. For a full list of all supported providers and setup details, see [AI Providers](../integrations/providers.md).
 :::
 
-## 3. Start Chatting
+### How settings are stored
+
+Hermes separates secrets from normal config:
+
+- **Secrets and tokens** → `~/.hermes/.env`
+- **Non-secret settings** → `~/.hermes/config.yaml`
+
+The easiest way to set values correctly is through the CLI:
+
+```bash
+hermes config set model anthropic/claude-opus-4.6
+hermes config set terminal.backend docker
+hermes config set OPENROUTER_API_KEY sk-or-...
+```
+
+The right value goes to the right file automatically.
+
+## 3. Run Your First Chat
 
 ```bash
 hermes            # classic CLI
 hermes --tui      # modern TUI (recommended)
 ```
 
-That's it! You'll see a welcome banner with your model, available tools, and skills. Type a message and press Enter.
+You'll see a welcome banner with your model, available tools, and skills. Use a prompt that's specific and easy to verify:
 
 :::tip Pick your interface
 Hermes ships with two terminal interfaces: the classic `prompt_toolkit` CLI and a newer [TUI](../user-guide/tui.md) with modal overlays, mouse selection, and non-blocking input. Both share the same sessions, slash commands, and config — try each with `hermes` vs `hermes --tui`.
 :::
 
 ```
-❯ What can you help me with?
+Summarize this repo in 5 bullets and tell me what the main entrypoint is.
 ```
 
-The agent has access to tools for web search, file operations, terminal commands, and more — all out of the box.
+```
+Check my current directory and tell me what looks like the main project file.
+```
 
-## 4. Try Key Features
+```
+Help me set up a clean GitHub PR workflow for this codebase.
+```
 
-### Ask it to use the terminal
+**What success looks like:**
+
+- The banner shows your chosen model/provider
+- Hermes replies without error
+- It can use a tool if needed (terminal, file read, web search)
+- The conversation continues normally for more than one turn
+
+If that works, you're past the hardest part.
+
+## 4. Verify Sessions Work
+
+Before moving on, make sure resume works:
+
+```bash
+hermes --continue    # Resume the most recent session
+hermes -c            # Short form
+```
+
+That should bring you back to the session you just had. If it doesn't, check whether you're in the same profile and whether the session actually saved. This matters later when you're juggling multiple setups or machines.
+
+## 5. Try Key Features
+
+### Use the terminal
 
 ```
 ❯ What's my disk usage? Show the top 5 largest directories.
 ```
 
-The agent will run terminal commands on your behalf and show you the results.
+The agent runs terminal commands on your behalf and shows results.
 
-### Use slash commands
+### Slash commands
 
 Type `/` to see an autocomplete dropdown of all commands:
 
@@ -122,22 +174,27 @@ Press `Alt+Enter` or `Ctrl+J` to add a new line. Great for pasting code or writi
 
 ### Interrupt the agent
 
-If the agent is taking too long, just type a new message and press Enter — it interrupts the current task and switches to your new instructions. `Ctrl+C` also works.
+If the agent is taking too long, type a new message and press Enter — it interrupts the current task and switches to your new instructions. `Ctrl+C` also works.
 
-### Resume a session
+## 6. Add the Next Layer
 
-When you exit, hermes prints a resume command:
+Only after the base chat works. Pick what you need:
+
+### Bot or shared assistant
 
 ```bash
-hermes --continue    # Resume the most recent session
-hermes -c            # Short form
+hermes gateway setup    # Interactive platform configuration
 ```
 
-## 5. Explore Further
+Connect [Telegram](/docs/user-guide/messaging/telegram), [Discord](/docs/user-guide/messaging/discord), [Slack](/docs/user-guide/messaging/slack), [WhatsApp](/docs/user-guide/messaging/whatsapp), [Signal](/docs/user-guide/messaging/signal), [Email](/docs/user-guide/messaging/email), or [Home Assistant](/docs/user-guide/messaging/homeassistant).
 
-Here are some things to try next:
+### Automation and tools
 
-### Set up a sandboxed terminal
+- `hermes tools` — tune tool access per platform
+- `hermes skills` — browse and install reusable workflows
+- Cron — only after your bot or CLI setup is stable
+
+### Sandboxed terminal
 
 For safety, run the agent in a Docker container or on a remote server:
 
@@ -146,71 +203,25 @@ hermes config set terminal.backend docker    # Docker isolation
 hermes config set terminal.backend ssh       # Remote server
 ```
 
-### Connect messaging platforms
-
-Chat with Hermes from your phone or other surfaces via Telegram, Discord, Slack, WhatsApp, Signal, Email, or Home Assistant:
-
-```bash
-hermes gateway setup    # Interactive platform configuration
-```
-
-### Add voice mode
-
-Want microphone input in the CLI or spoken replies in messaging?
+### Voice mode
 
 ```bash
 pip install "hermes-agent[voice]"
 # Includes faster-whisper for free local speech-to-text
 ```
 
-Then start Hermes and enable it inside the CLI:
+Then in the CLI: `/voice on`. Press `Ctrl+B` to record. See [Voice Mode](../user-guide/features/voice-mode.md).
 
-```text
-/voice on
-```
-
-Press `Ctrl+B` to record, or use `/voice tts` to have Hermes speak its replies. See [Voice Mode](../user-guide/features/voice-mode.md) for the full setup across CLI, Telegram, Discord, and Discord voice channels.
-
-### Schedule automated tasks
-
-```
-❯ Every morning at 9am, check Hacker News for AI news and send me a summary on Telegram.
-```
-
-The agent will set up a cron job that runs automatically via the gateway.
-
-### Browse and install skills
+### Skills
 
 ```bash
 hermes skills search kubernetes
-hermes skills search react --source skills-sh
-hermes skills search https://mintlify.com/docs --source well-known
 hermes skills install openai/skills/k8s
-hermes skills install official/security/1password
-hermes skills install skills-sh/vercel-labs/json-render/json-render-react --force
 ```
 
-Tips:
-- Use `--source skills-sh` to search the public `skills.sh` directory.
-- Use `--source well-known` with a docs/site URL to discover skills from `/.well-known/skills/index.json`.
-- Use `--force` only after reviewing a third-party skill. It can override non-dangerous policy blocks, but not a `dangerous` scan verdict.
+Or use `/skills` inside a chat session.
 
-Or use the `/skills` slash command inside chat.
-
-### Use Hermes inside an editor via ACP
-
-Hermes can also run as an ACP server for ACP-compatible editors like VS Code, Zed, and JetBrains:
-
-```bash
-pip install -e '.[acp]'
-hermes acp
-```
-
-See [ACP Editor Integration](../user-guide/features/acp.md) for setup details.
-
-### Try MCP servers
-
-Connect to external tools via the Model Context Protocol:
+### MCP servers
 
 ```yaml
 # Add to ~/.hermes/config.yaml
@@ -222,6 +233,43 @@ mcp_servers:
       GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_xxx"
 ```
 
+### Editor integration (ACP)
+
+```bash
+pip install -e '.[acp]'
+hermes acp
+```
+
+See [ACP Editor Integration](../user-guide/features/acp.md).
+
+---
+
+## Common Failure Modes
+
+These are the problems that waste the most time:
+
+| Symptom | Likely cause | Fix |
+|---|---|---|
+| Hermes opens but gives empty or broken replies | Provider auth or model selection is wrong | Run `hermes model` again and confirm provider, model, and auth |
+| Custom endpoint "works" but returns garbage | Wrong base URL, model name, or not actually OpenAI-compatible | Verify the endpoint in a separate client first |
+| Gateway starts but nobody can message it | Bot token, allowlist, or platform setup is incomplete | Re-run `hermes gateway setup` and check `hermes gateway status` |
+| `hermes --continue` can't find old session | Switched profiles or session never saved | Check `hermes sessions list` and confirm you're in the right profile |
+| Model unavailable or odd fallback behavior | Provider routing or fallback settings are too aggressive | Keep routing off until the base provider is stable |
+| `hermes doctor` flags config problems | Config values are missing or stale | Fix the config, retest a plain chat before adding features |
+
+## Recovery Toolkit
+
+When something feels off, use this order:
+
+1. `hermes doctor`
+2. `hermes model`
+3. `hermes setup`
+4. `hermes sessions list`
+5. `hermes --continue`
+6. `hermes gateway status`
+
+That sequence gets you from "broken vibes" back to a known state fast.
+
 ---
 
 ## Quick Reference
@@ -243,3 +291,6 @@ mcp_servers:
 - **[Configuration](../user-guide/configuration.md)** — Customize your setup
 - **[Messaging Gateway](../user-guide/messaging/index.md)** — Connect Telegram, Discord, Slack, WhatsApp, Signal, Email, or Home Assistant
 - **[Tools & Toolsets](../user-guide/features/tools.md)** — Explore available capabilities
+- **[AI Providers](../integrations/providers.md)** — Full provider list and setup details
+- **[Skills System](../user-guide/features/skills.md)** — Reusable workflows and knowledge
+- **[Tips & Best Practices](../guides/tips.md)** — Power user tips
diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md
index b0e34e07dec..eb74427a0a0 100644
--- a/website/docs/getting-started/updating.md
+++ b/website/docs/getting-started/updating.md
@@ -59,6 +59,21 @@ Already up to date.  (or: Updating abc1234..def5678)
 If `git status --short` shows unexpected changes after `hermes update`, stop and inspect them before continuing. This usually means local modifications were reapplied on top of the updated code, or a dependency step refreshed lockfiles.
 :::
 
+### If your terminal disconnects mid-update
+
+`hermes update` protects itself against accidental terminal loss:
+
+- The update ignores `SIGHUP`, so closing your SSH session or terminal window no longer kills it mid-install. `pip` and `git` child processes inherit this protection, so the Python environment cannot be left half-installed by a dropped connection.
+- All output is mirrored to `~/.hermes/logs/update.log` while the update runs. If your terminal disappears, reconnect and inspect the log to see whether the update finished and whether the gateway restart succeeded:
+
+```bash
+tail -f ~/.hermes/logs/update.log
+```
+
+- `Ctrl-C` (SIGINT) and system shutdown (SIGTERM) are still honored — those are deliberate cancellations, not accidents.
+
+You no longer need to wrap `hermes update` in `screen` or `tmux` to survive a terminal drop.
+
 ### Checking your current version
 
 ```bash
diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md
index e8611197a17..4e2ee5cf267 100644
--- a/website/docs/guides/build-a-hermes-plugin.md
+++ b/website/docs/guides/build-a-hermes-plugin.md
@@ -419,8 +419,8 @@ Each hook is documented in full on the **[Event Hooks reference](/docs/user-guid
 | [`post_llm_call`](/docs/user-guide/features/hooks#post_llm_call) | Once per turn, after the tool-calling loop (successful turns only) | `session_id: str, user_message: str, assistant_response: str, conversation_history: list, model: str, platform: str` | ignored |
 | [`on_session_start`](/docs/user-guide/features/hooks#on_session_start) | New session created (first turn only) | `session_id: str, model: str, platform: str` | ignored |
 | [`on_session_end`](/docs/user-guide/features/hooks#on_session_end) | End of every `run_conversation` call + CLI exit | `session_id: str, completed: bool, interrupted: bool, model: str, platform: str` | ignored |
-| [`pre_api_request`](/docs/user-guide/features/hooks#pre_api_request) | Before each HTTP request to the LLM provider | `method: str, url: str, headers: dict, body: dict` | ignored |
-| [`post_api_request`](/docs/user-guide/features/hooks#post_api_request) | After each HTTP response from the LLM provider | `method: str, url: str, status_code: int, response: dict` | ignored |
+| [`on_session_finalize`](/docs/user-guide/features/hooks#on_session_finalize) | CLI/gateway tears down an active session | `session_id: str \| None, platform: str` | ignored |
+| [`on_session_reset`](/docs/user-guide/features/hooks#on_session_reset) | Gateway swaps in a new session key (`/new`, `/reset`) | `session_id: str, platform: str` | ignored |
 
 Most hooks are fire-and-forget observers — their return values are ignored. The exception is `pre_llm_call`, which can inject context into the conversation.
 
diff --git a/website/docs/guides/github-pr-review-agent.md b/website/docs/guides/github-pr-review-agent.md
new file mode 100644
index 00000000000..530d8d6df05
--- /dev/null
+++ b/website/docs/guides/github-pr-review-agent.md
@@ -0,0 +1,300 @@
+---
+sidebar_position: 10
+title: "Tutorial: GitHub PR Review Agent"
+description: "Build an automated AI code reviewer that monitors your repos, reviews pull requests, and delivers feedback — hands-free"
+---
+
+# Tutorial: Build a GitHub PR Review Agent
+
+**The problem:** Your team opens PRs faster than you can review them. PRs sit for days waiting for eyeballs. Junior devs merge bugs because nobody had time to check. You spend your mornings catching up on diffs instead of building.
+
+**The solution:** An AI agent that watches your repos around the clock, reviews every new PR for bugs, security issues, and code quality, and sends you a summary — so you only spend time on PRs that actually need human judgment.
+
+**What you'll build:**
+
+```
+┌──────────────┐     ┌───────────────┐     ┌──────────────┐     ┌──────────────┐
+│  Cron Timer  │────▶│  Hermes Agent │────▶│  GitHub API  │────▶│  Review to   │
+│  (every 2h)  │     │  + gh CLI     │     │  (PR diffs)  │     │  Telegram/   │
+│              │     │  + skill      │     │              │     │  Discord/    │
+│              │     │  + memory     │     │              │     │  local file  │
+└──────────────┘     └───────────────┘     └──────────────┘     └──────────────┘
+```
+
+This guide uses **cron jobs** to poll for PRs on a schedule — no server or public endpoint needed. Works behind NAT and firewalls.
+
+:::tip Want real-time reviews instead?
+If you have a public endpoint available, check out [Automated GitHub PR Comments with Webhooks](./webhook-github-pr-review.md) — GitHub pushes events to Hermes instantly when PRs are opened or updated.
+:::
+
+---
+
+## Prerequisites
+
+- **Hermes Agent installed** — see the [Installation guide](/docs/getting-started/installation)
+- **Gateway running** for cron jobs:
+  ```bash
+  hermes gateway install   # Install as a service
+  # or
+  hermes gateway           # Run in foreground
+  ```
+- **GitHub CLI (`gh`) installed and authenticated**:
+  ```bash
+  # Install
+  brew install gh        # macOS
+  sudo apt install gh    # Ubuntu/Debian
+
+  # Authenticate
+  gh auth login
+  ```
+- **Messaging configured** (optional) — [Telegram](/docs/user-guide/messaging/telegram) or [Discord](/docs/user-guide/messaging/discord)
+
+:::tip No messaging? No problem
+Use `deliver: "local"` to save reviews to `~/.hermes/cron/output/`. Great for testing before wiring up notifications.
+:::
+
+---
+
+## Step 1: Verify the Setup
+
+Make sure Hermes can access GitHub. Start a chat:
+
+```bash
+hermes
+```
+
+Test with a simple command:
+
+```
+Run: gh pr list --repo NousResearch/hermes-agent --state open --limit 3
+```
+
+You should see a list of open PRs. If this works, you're ready.
+
+---
+
+## Step 2: Try a Manual Review
+
+Still in the chat, ask Hermes to review a real PR:
+
+```
+Review this pull request. Read the diff, check for bugs, security issues,
+and code quality. Be specific about line numbers and quote problematic code.
+
+Run: gh pr diff 3888 --repo NousResearch/hermes-agent
+```
+
+Hermes will:
+1. Execute `gh pr diff` to fetch the code changes
+2. Read through the entire diff
+3. Produce a structured review with specific findings
+
+If you're happy with the quality, time to automate it.
+
+---
+
+## Step 3: Create a Review Skill
+
+A skill gives Hermes consistent review guidelines that persist across sessions and cron runs. Without one, review quality varies.
+
+```bash
+mkdir -p ~/.hermes/skills/code-review
+```
+
+Create `~/.hermes/skills/code-review/SKILL.md`:
+
+```markdown
+---
+name: code-review
+description: Review pull requests for bugs, security issues, and code quality
+---
+
+# Code Review Guidelines
+
+When reviewing a pull request:
+
+## What to Check
+1. **Bugs** — Logic errors, off-by-one, null/undefined handling
+2. **Security** — Injection, auth bypass, secrets in code, SSRF
+3. **Performance** — N+1 queries, unbounded loops, memory leaks
+4. **Style** — Naming conventions, dead code, missing error handling
+5. **Tests** — Are changes tested? Do tests cover edge cases?
+
+## Output Format
+For each finding:
+- **File:Line** — exact location
+- **Severity** — Critical / Warning / Suggestion
+- **What's wrong** — one sentence
+- **Fix** — how to fix it
+
+## Rules
+- Be specific. Quote the problematic code.
+- Don't flag style nitpicks unless they affect readability.
+- If the PR looks good, say so. Don't invent problems.
+- End with: APPROVE / REQUEST_CHANGES / COMMENT
+```
+
+Verify it loaded — start `hermes` and you should see `code-review` in the skills list at startup.
+
+---
+
+## Step 4: Teach It Your Conventions
+
+This is what makes the reviewer actually useful. Start a session and teach Hermes your team's standards:
+
+```
+Remember: In our backend repo, we use Python with FastAPI.
+All endpoints must have type annotations and Pydantic models.
+We don't allow raw SQL — only SQLAlchemy ORM.
+Test files go in tests/ and must use pytest fixtures.
+```
+
+```
+Remember: In our frontend repo, we use TypeScript with React.
+No `any` types allowed. All components must have props interfaces.
+We use React Query for data fetching, never useEffect for API calls.
+```
+
+These memories persist forever — the reviewer will enforce your conventions without being told each time.
+
+---
+
+## Step 5: Create the Automated Cron Job
+
+Now wire it all together. Create a cron job that runs every 2 hours:
+
+```bash
+hermes cron create "0 */2 * * *" \
+  "Check for new open PRs and review them.
+
+Repos to monitor:
+- myorg/backend-api
+- myorg/frontend-app
+
+Steps:
+1. Run: gh pr list --repo REPO --state open --limit 5 --json number,title,author,createdAt
+2. For each PR created or updated in the last 4 hours:
+   - Run: gh pr diff NUMBER --repo REPO
+   - Review the diff using the code-review guidelines
+3. Format output as:
+
+## PR Reviews — today
+
+### [repo] #[number]: [title]
+**Author:** [name] | **Verdict:** APPROVE/REQUEST_CHANGES/COMMENT
+[findings]
+
+If no new PRs found, say: No new PRs to review." \
+  --name "pr-review" \
+  --deliver telegram \
+  --skill code-review
+```
+
+Verify it's scheduled:
+
+```bash
+hermes cron list
+```
+
+### Other useful schedules
+
+| Schedule | When |
+|----------|------|
+| `0 */2 * * *` | Every 2 hours |
+| `0 9,13,17 * * 1-5` | Three times a day, weekdays only |
+| `0 9 * * 1` | Weekly Monday morning roundup |
+| `30m` | Every 30 minutes (high-traffic repos) |
+
+---
+
+## Step 6: Run It On Demand
+
+Don't want to wait for the schedule? Trigger it manually:
+
+```bash
+hermes cron run pr-review
+```
+
+Or from within a chat session:
+
+```
+/cron run pr-review
+```
+
+---
+
+## Going Further
+
+### Post Reviews Directly to GitHub
+
+Instead of delivering to Telegram, have the agent comment on the PR itself:
+
+Add this to your cron prompt:
+
+```
+After reviewing, post your review:
+- For issues: gh pr review NUMBER --repo REPO --comment --body "YOUR_REVIEW"
+- For critical issues: gh pr review NUMBER --repo REPO --request-changes --body "YOUR_REVIEW"
+- For clean PRs: gh pr review NUMBER --repo REPO --approve --body "Looks good"
+```
+
+:::caution
+Make sure `gh` has a token with `repo` scope. Reviews are posted as whoever `gh` is authenticated as.
+:::
+
+### Weekly PR Dashboard
+
+Create a Monday morning overview of all your repos:
+
+```bash
+hermes cron create "0 9 * * 1" \
+  "Generate a weekly PR dashboard:
+- myorg/backend-api
+- myorg/frontend-app
+- myorg/infra
+
+For each repo show:
+1. Open PR count and oldest PR age
+2. PRs merged this week
+3. Stale PRs (older than 5 days)
+4. PRs with no reviewer assigned
+
+Format as a clean summary." \
+  --name "weekly-dashboard" \
+  --deliver telegram
+```
+
+### Multi-Repo Monitoring
+
+Scale up by adding more repos to the prompt. The agent processes them sequentially — no extra setup needed.
+
+---
+
+## Troubleshooting
+
+### "gh: command not found"
+The gateway runs in a minimal environment. Ensure `gh` is in the system PATH and restart the gateway.
+
+### Reviews are too generic
+1. Add the `code-review` skill (Step 3)
+2. Teach Hermes your conventions via memory (Step 4)
+3. The more context it has about your stack, the better the reviews
+
+### Cron job doesn't run
+```bash
+hermes gateway status    # Is the gateway running?
+hermes cron list         # Is the job enabled?
+```
+
+### Rate limits
+GitHub allows 5,000 API requests/hour for authenticated users. Each PR review uses ~3-5 requests (list + diff + optional comments). Even reviewing 100 PRs/day stays well within limits.
+
+---
+
+## What's Next?
+
+- **[Webhook-Based PR Reviews](./webhook-github-pr-review.md)** — get instant reviews when PRs are opened (requires a public endpoint)
+- **[Daily Briefing Bot](/docs/guides/daily-briefing-bot)** — combine PR reviews with your morning news digest
+- **[Build a Plugin](/docs/guides/build-a-hermes-plugin)** — wrap the review logic into a shareable plugin
+- **[Profiles](/docs/user-guide/profiles)** — run a dedicated reviewer profile with its own memory and config
+- **[Fallback Providers](/docs/user-guide/features/fallback-providers)** — ensure reviews run even when one provider is down
diff --git a/website/docs/guides/webhook-github-pr-review.md b/website/docs/guides/webhook-github-pr-review.md
new file mode 100644
index 00000000000..b0dd15ecea1
--- /dev/null
+++ b/website/docs/guides/webhook-github-pr-review.md
@@ -0,0 +1,329 @@
+---
+sidebar_position: 11
+sidebar_label: "GitHub PR Reviews via Webhook"
+title: "Automated GitHub PR Comments with Webhooks"
+description: "Connect Hermes to GitHub so it automatically fetches PR diffs, reviews code changes, and posts comments — triggered by webhooks with no manual prompting"
+---
+
+# Automated GitHub PR Comments with Webhooks
+
+This guide walks you through connecting Hermes Agent to GitHub so it automatically fetches a pull request's diff, analyzes the code changes, and posts a comment — triggered by a webhook event with no manual prompting.
+
+When a PR is opened or updated, GitHub sends a webhook POST to your Hermes instance. Hermes runs the agent with a prompt that instructs it to retrieve the diff via the `gh` CLI, and the response is posted back to the PR thread.
+
+:::tip Want a simpler setup without a public endpoint?
+If you don't have a public URL or just want to get started quickly, check out [Build a GitHub PR Review Agent](./github-pr-review-agent.md) — uses cron jobs to poll for PRs on a schedule, works behind NAT and firewalls.
+:::
+
+:::info Reference docs
+For the full webhook platform reference (all config options, delivery types, dynamic subscriptions, security model) see [Webhooks](/docs/user-guide/messaging/webhooks).
+:::
+
+:::warning Prompt injection risk
+Webhook payloads contain attacker-controlled data — PR titles, commit messages, and descriptions can contain malicious instructions. When your webhook endpoint is exposed to the internet, run the gateway in a sandboxed environment (Docker, SSH backend). See the [security section](#security-notes) below.
+:::
+
+---
+
+## Prerequisites
+
+- Hermes Agent installed and running (`hermes gateway`)
+- [`gh` CLI](https://cli.github.com/) installed and authenticated on the gateway host (`gh auth login`)
+- A publicly reachable URL for your Hermes instance (see [Local testing with ngrok](#local-testing-with-ngrok) if running locally)
+- Admin access to the GitHub repository (required to manage webhooks)
+
+---
+
+## Step 1 — Enable the webhook platform
+
+Add the following to your `~/.hermes/config.yaml`:
+
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      port: 8644          # default; change if another service occupies this port
+      rate_limit: 30      # max requests per minute per route (not a global cap)
+
+      routes:
+        github-pr-review:
+          secret: "your-webhook-secret-here"   # must match the GitHub webhook secret exactly
+          events:
+            - pull_request
+
+          # The agent is instructed to fetch the actual diff before reviewing.
+          # {number} and {repository.full_name} are resolved from the GitHub payload.
+          prompt: |
+            A pull request event was received (action: {action}).
+
+            PR #{number}: {pull_request.title}
+            Author: {pull_request.user.login}
+            Branch: {pull_request.head.ref} → {pull_request.base.ref}
+            Description: {pull_request.body}
+            URL: {pull_request.html_url}
+
+            If the action is "closed" or "labeled", stop here and do not post a comment.
+
+            Otherwise:
+            1. Run: gh pr diff {number} --repo {repository.full_name}
+            2. Review the code changes for correctness, security issues, and clarity.
+            3. Write a concise, actionable review comment and post it.
+
+          deliver: github_comment
+          deliver_extra:
+            repo: "{repository.full_name}"
+            pr_number: "{number}"
+```
+
+**Key fields:**
+
+| Field | Description |
+|---|---|
+| `secret` (route-level) | HMAC secret for this route. Falls back to `extra.secret` global if omitted. |
+| `events` | List of `X-GitHub-Event` header values to accept. Empty list = accept all. |
+| `prompt` | Template; `{field}` and `{nested.field}` resolve from the GitHub payload. |
+| `deliver` | `github_comment` posts via `gh pr comment`. `log` just writes to the gateway log. |
+| `deliver_extra.repo` | Resolves to e.g. `org/repo` from the payload. |
+| `deliver_extra.pr_number` | Resolves to the PR number from the payload. |
+
+:::note The payload does not contain code
+The GitHub webhook payload includes PR metadata (title, description, branch names, URLs) but **not the diff**. The prompt above instructs the agent to run `gh pr diff` to fetch the actual changes. The `terminal` tool is included in the default `hermes-webhook` toolset, so no extra configuration is needed.
+:::
+
+---
+
+## Step 2 — Start the gateway
+
+```bash
+hermes gateway
+```
+
+You should see:
+
+```
+[webhook] Listening on 0.0.0.0:8644 — routes: github-pr-review
+```
+
+Verify it's running:
+
+```bash
+curl http://localhost:8644/health
+# {"status": "ok", "platform": "webhook"}
+```
+
+---
+
+## Step 3 — Register the webhook on GitHub
+
+1. Go to your repository → **Settings** → **Webhooks** → **Add webhook**
+2. Fill in:
+   - **Payload URL:** `https://your-public-url.example.com/webhooks/github-pr-review`
+   - **Content type:** `application/json`
+   - **Secret:** the same value you set for `secret` in the route config
+   - **Which events?** → Select individual events → check **Pull requests**
+3. Click **Add webhook**
+
+GitHub will immediately send a `ping` event to confirm the connection. It is safely ignored — `ping` is not in your `events` list — and returns `{"status": "ignored", "event": "ping"}`. It is only logged at DEBUG level, so it won't appear in the console at the default log level.
+
+---
+
+## Step 4 — Open a test PR
+
+Create a branch, push a change, and open a PR. Within 30–90 seconds (depending on PR size and model), Hermes should post a review comment.
+
+To follow the agent's progress in real time:
+
+```bash
+tail -f "${HERMES_HOME:-$HOME/.hermes}/logs/gateway.log"
+```
+
+---
+
+## Local testing with ngrok
+
+If Hermes is running on your laptop, use [ngrok](https://ngrok.com/) to expose it:
+
+```bash
+ngrok http 8644
+```
+
+Copy the `https://...ngrok-free.app` URL and use it as your GitHub Payload URL. On the free ngrok tier the URL changes each time ngrok restarts — update your GitHub webhook each session. Paid ngrok accounts get a static domain.
+
+You can smoke-test a static route directly with `curl` — no GitHub account or real PR needed.
+
+:::tip Use `deliver: log` when testing locally
+Change `deliver: github_comment` to `deliver: log` in your config while testing. Otherwise the agent will attempt to post a comment to the fake `org/repo#99` repo in the test payload, which will fail. Switch back to `deliver: github_comment` once you're satisfied with the prompt output.
+:::
+
+```bash
+SECRET="your-webhook-secret-here"
+BODY='{"action":"opened","number":99,"pull_request":{"title":"Test PR","body":"Adds a feature.","user":{"login":"testuser"},"head":{"ref":"feat/x"},"base":{"ref":"main"},"html_url":"https://github.com/org/repo/pull/99"},"repository":{"full_name":"org/repo"}}'
+SIG=$(printf '%s' "$BODY" | openssl dgst -sha256 -hmac "$SECRET" -hex | awk '{print "sha256="$2}')
+
+curl -s -X POST http://localhost:8644/webhooks/github-pr-review \
+  -H "Content-Type: application/json" \
+  -H "X-GitHub-Event: pull_request" \
+  -H "X-Hub-Signature-256: $SIG" \
+  -d "$BODY"
+# Expected: {"status":"accepted","route":"github-pr-review","event":"pull_request","delivery_id":"..."}
+```
+
+Then watch the agent run:
+```bash
+tail -f "${HERMES_HOME:-$HOME/.hermes}/logs/gateway.log"
+```
+
+:::note
+`hermes webhook test <name>` only works for **dynamic subscriptions** created with `hermes webhook subscribe`. It does not read routes from `config.yaml`.
+:::
+
+---
+
+## Filtering to specific actions
+
+GitHub sends `pull_request` events for many actions: `opened`, `synchronize`, `reopened`, `closed`, `labeled`, etc. The `events` list filters only by the `X-GitHub-Event` header value — it cannot filter by action sub-type at the routing level.
+
+The prompt in Step 1 already handles this by instructing the agent to stop early for `closed` and `labeled` events.
+
+:::warning The agent still runs and consumes tokens
+The "stop here" instruction prevents a meaningful review, but the agent still runs to completion for every `pull_request` event regardless of action. GitHub webhooks can only filter by event type (`pull_request`, `push`, `issues`, etc.) — not by action sub-type (`opened`, `closed`, `labeled`). There is no routing-level filter for sub-actions. For high-volume repos, accept this cost or filter upstream with a GitHub Actions workflow that calls your webhook URL conditionally.
+:::
+
+> There is no Jinja2 or conditional template syntax. `{field}` and `{nested.field}` are the only substitutions supported. Anything else is passed verbatim to the agent.
+
+---
+
+## Using a skill for consistent review style
+
+Load a [Hermes skill](/docs/user-guide/features/skills) to give the agent a consistent review persona. Add `skills` to your route inside `platforms.webhook.extra.routes` in `config.yaml`:
+
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      routes:
+        github-pr-review:
+          secret: "your-webhook-secret-here"
+          events: [pull_request]
+          prompt: |
+            A pull request event was received (action: {action}).
+            PR #{number}: {pull_request.title} by {pull_request.user.login}
+            URL: {pull_request.html_url}
+
+            If the action is "closed" or "labeled", stop here and do not post a comment.
+
+            Otherwise:
+            1. Run: gh pr diff {number} --repo {repository.full_name}
+            2. Review the diff using your review guidelines.
+            3. Write a concise, actionable review comment and post it.
+          skills:
+            - review
+          deliver: github_comment
+          deliver_extra:
+            repo: "{repository.full_name}"
+            pr_number: "{number}"
+```
+
+> **Note:** Only the first skill in the list that is found is loaded. Hermes does not stack multiple skills — subsequent entries are ignored.
+
+---
+
+## Sending responses to Slack or Discord instead
+
+Replace the `deliver` and `deliver_extra` fields inside your route with your target platform:
+
+```yaml
+# Inside platforms.webhook.extra.routes.<route-name>:
+
+# Slack
+deliver: slack
+deliver_extra:
+  chat_id: "C0123456789"   # Slack channel ID (omit to use the configured home channel)
+
+# Discord
+deliver: discord
+deliver_extra:
+  chat_id: "987654321012345678"  # Discord channel ID (omit to use home channel)
+```
+
+The target platform must also be enabled and connected in the gateway. If `chat_id` is omitted, the response is sent to that platform's configured home channel.
+
+Valid `deliver` values: `log` · `github_comment` · `telegram` · `discord` · `slack` · `signal` · `sms`
+
+---
+
+## GitLab support
+
+The same adapter works with GitLab. GitLab uses `X-Gitlab-Token` for authentication (plain string match, not HMAC) — Hermes handles both automatically.
+
+For event filtering, GitLab sets `X-GitLab-Event` to values like `Merge Request Hook`, `Push Hook`, `Pipeline Hook`. Use the exact header value in `events`:
+
+```yaml
+events:
+  - Merge Request Hook
+```
+
+GitLab payload fields differ from GitHub's — e.g. `{object_attributes.title}` for the MR title and `{object_attributes.iid}` for the MR number. The easiest way to discover the full payload structure is GitLab's **Test** button in your webhook settings, combined with the **Recent Deliveries** log. Alternatively, omit `prompt` from your route config — Hermes will then pass the full payload as formatted JSON directly to the agent, and the agent's response (visible in the gateway log with `deliver: log`) will describe its structure.
+
+---
+
+## Security notes
+
+- **Never use `INSECURE_NO_AUTH`** in production — it disables signature validation entirely. It is only for local development.
+- **Rotate your webhook secret** periodically and update it in both GitHub (webhook settings) and your `config.yaml`.
+- **Rate limiting** is 30 req/min per route by default (configurable via `extra.rate_limit`). Exceeding it returns `429`.
+- **Duplicate deliveries** (webhook retries) are deduplicated via a 1-hour idempotency cache. The cache key is `X-GitHub-Delivery` if present, then `X-Request-ID`, then a millisecond timestamp. When neither delivery ID header is set, retries are **not** deduplicated.
+- **Prompt injection:** PR titles, descriptions, and commit messages are attacker-controlled. Malicious PRs could attempt to manipulate the agent's actions. Run the gateway in a sandboxed environment (Docker, VM) when exposed to the public internet.
+
+---
+
+## Troubleshooting
+
+| Symptom | Check |
+|---|---|
+| `401 Invalid signature` | Secret in config.yaml doesn't match GitHub webhook secret |
+| `404 Unknown route` | Route name in the URL doesn't match the key in `routes:` |
+| `429 Rate limit exceeded` | 30 req/min per route exceeded — common when re-delivering test events from GitHub's UI; wait a minute or raise `extra.rate_limit` |
+| No comment posted | `gh` not installed, not on PATH, or not authenticated (`gh auth login`) |
+| Agent runs but no comment | Check the gateway log — if the agent output was empty or just "SKIP", delivery is still attempted |
+| Port already in use | Change `extra.port` in config.yaml |
+| Agent runs but reviews only the PR description | The prompt isn't including the `gh pr diff` instruction — the diff is not in the webhook payload |
+| Can't see the ping event | Ignored events return `{"status":"ignored","event":"ping"}` at DEBUG log level only — check GitHub's delivery log (repo → Settings → Webhooks → your webhook → Recent Deliveries) |
+
+**GitHub's Recent Deliveries tab** (repo → Settings → Webhooks → your webhook) shows the exact request headers, payload, HTTP status, and response body for every delivery. It is the fastest way to diagnose failures without touching your server logs.
+
+---
+
+## Full config reference
+
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      host: "0.0.0.0"         # bind address (default: 0.0.0.0)
+      port: 8644               # listen port (default: 8644)
+      secret: ""               # optional global fallback secret
+      rate_limit: 30           # requests per minute per route
+      max_body_bytes: 1048576  # payload size limit in bytes (default: 1 MB)
+
+      routes:
+        <route-name>:
+          secret: "required-per-route"
+          events: []            # [] = accept all; otherwise list X-GitHub-Event values
+          prompt: ""            # {field} / {nested.field} resolved from payload
+          skills: []            # first matching skill is loaded (only one)
+          deliver: "log"        # log | github_comment | telegram | discord | slack | signal | sms
+          deliver_extra: {}     # repo + pr_number for github_comment; chat_id for others
+```
+
+---
+
+## What's Next?
+
+- **[Cron-Based PR Reviews](./github-pr-review-agent.md)** — poll for PRs on a schedule, no public endpoint needed
+- **[Webhook Reference](/docs/user-guide/messaging/webhooks)** — full config reference for the webhook platform
+- **[Build a Plugin](/docs/guides/build-a-hermes-plugin)** — package review logic into a shareable plugin
+- **[Profiles](/docs/user-guide/profiles)** — run a dedicated reviewer profile with its own memory and config
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index 750ad671cda..9d32fc21ecb 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -289,12 +289,98 @@ Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_
 When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoints (global, China, coding variants) to find one that accepts your API key. You don't need to set `GLM_BASE_URL` manually — the working endpoint is detected and cached automatically.
 :::
 
-### xAI (Grok) Prompt Caching
+### xAI (Grok) — Responses API + Prompt Caching
+
+xAI is wired through the Responses API (`codex_responses` transport) for automatic reasoning support on Grok 4 models — no `reasoning_effort` parameter needed, the server reasons by default. Set `XAI_API_KEY` in `~/.hermes/.env` and pick xAI in `hermes model`, or drop `grok` as a shortcut into `/model grok-4-1-fast-reasoning`.
 
 When using xAI as a provider (any base URL containing `x.ai`), Hermes automatically enables prompt caching by sending the `x-grok-conv-id` header with every API request. This routes requests to the same server within a conversation session, allowing xAI's infrastructure to reuse cached system prompts and conversation history.
 
 No configuration is needed — caching activates automatically when an xAI endpoint is detected and a session ID is available. This reduces latency and cost for multi-turn conversations.
 
+xAI also ships a dedicated TTS endpoint (`/v1/tts`). Select **xAI TTS** in `hermes tools` → Voice & TTS, or see the [Voice & TTS](../user-guide/features/tts.md#text-to-speech) page for config.
+
+### Ollama Cloud — Managed Ollama Models, OAuth + API Key
+
+[Ollama Cloud](https://ollama.com/cloud) hosts the same open-weight catalog as local Ollama but without the GPU requirement. Pick it in `hermes model` as **Ollama Cloud**, paste your API key from [ollama.com/settings/keys](https://ollama.com/settings/keys), and Hermes auto-discovers the available models.
+
+```bash
+hermes model
+# → pick "Ollama Cloud"
+# → paste your OLLAMA_API_KEY
+# → select from discovered models (gpt-oss:120b, glm-4.6:cloud, qwen3-coder:480b-cloud, etc.)
+```
+
+Or `config.yaml` directly:
+```yaml
+model:
+  provider: "ollama-cloud"
+  default: "gpt-oss:120b"
+```
+
+The model catalog is fetched dynamically from `ollama.com/v1/models` and cached for one hour. `model:tag` notation (e.g. `qwen3-coder:480b-cloud`) is preserved through normalization — don't use dashes.
+
+:::tip Ollama Cloud vs local Ollama
+Both speak the same OpenAI-compatible API. Cloud is a first-class provider (`--provider ollama-cloud`, `OLLAMA_API_KEY`); local Ollama is reached via the Custom Endpoint flow (base URL `http://localhost:11434/v1`, no key). Use cloud for large models you can't run locally; use local for privacy or offline work.
+:::
+
+### AWS Bedrock
+
+Anthropic Claude, Amazon Nova, DeepSeek v3.2, Meta Llama 4, and other models via AWS Bedrock. Uses the AWS SDK (`boto3`) credential chain — no API key, just standard AWS auth.
+
+```bash
+# Simplest — named profile in ~/.aws/credentials
+hermes chat --provider bedrock --model us.anthropic.claude-sonnet-4-6
+
+# Or with explicit env vars
+AWS_PROFILE=myprofile AWS_REGION=us-east-1 hermes chat --provider bedrock --model us.anthropic.claude-sonnet-4-6
+```
+
+Or permanently in `config.yaml`:
+```yaml
+model:
+  provider: "bedrock"
+  default: "us.anthropic.claude-sonnet-4-6"
+bedrock:
+  region: "us-east-1"          # or set AWS_REGION
+  # profile: "myprofile"       # or set AWS_PROFILE
+  # discovery: true            # auto-discover region from IAM
+  # guardrail:                 # optional Bedrock Guardrails
+  #   id: "your-guardrail-id"
+  #   version: "DRAFT"
+```
+
+Authentication uses the standard boto3 chain: explicit `AWS_ACCESS_KEY_ID`/`AWS_SECRET_ACCESS_KEY`, `AWS_PROFILE` from `~/.aws/credentials`, IAM role on EC2/ECS/Lambda, IMDS, or SSO. No env var is required if you're already authenticated with the AWS CLI.
+
+Bedrock uses the **Converse API** under the hood — requests are translated to Bedrock's model-agnostic shape, so the same config works for Claude, Nova, DeepSeek, and Llama models. Set `BEDROCK_BASE_URL` only if you're calling a non-default regional endpoint.
+
+See the [AWS Bedrock guide](/docs/guides/aws-bedrock) for a walkthrough of IAM setup, region selection, and cross-region inference.
+
+### Qwen Portal (OAuth)
+
+Alibaba's Qwen Portal with browser-based OAuth login. Pick **Qwen OAuth (Portal)** in `hermes model`, sign in through the browser, and Hermes persists the refresh token.
+
+```bash
+hermes model
+# → pick "Qwen OAuth (Portal)"
+# → browser opens; sign in with your Alibaba account
+# → confirm — credentials are saved to ~/.hermes/auth.json
+
+hermes chat   # uses portal.qwen.ai/v1 endpoint
+```
+
+Or configure `config.yaml`:
+```yaml
+model:
+  provider: "qwen-oauth"
+  default: "qwen3-coder-plus"
+```
+
+Set `HERMES_QWEN_BASE_URL` only if the portal endpoint relocates (default: `https://portal.qwen.ai/v1`).
+
+:::tip Qwen OAuth vs DashScope (Alibaba)
+`qwen-oauth` uses the consumer-facing Qwen Portal with OAuth login — ideal for individual users. The `alibaba` provider uses DashScope's enterprise API with a `DASHSCOPE_API_KEY` — ideal for programmatic / production workloads. Both route to Qwen-family models but live at different endpoints.
+:::
+
 ### NVIDIA NIM
 
 Nemotron and other open source models via [build.nvidia.com](https://build.nvidia.com) (free API key) or a local NIM endpoint.
@@ -966,11 +1052,11 @@ custom_providers:
     # api_key omitted — Hermes uses "no-key-required" for keyless local servers
   - name: work
     base_url: https://gpu-server.internal.corp/v1
-    api_key: corp-api-key
+    key_env: CORP_API_KEY
     api_mode: chat_completions   # optional, auto-detected from URL
   - name: anthropic-proxy
     base_url: https://proxy.example.com/anthropic
-    api_key: proxy-key
+    key_env: ANTHROPIC_PROXY_KEY
     api_mode: anthropic_messages  # for Anthropic-compatible proxies
 ```
 
@@ -1068,12 +1154,12 @@ fallback_model:
   provider: openrouter                    # required
   model: anthropic/claude-sonnet-4        # required
   # base_url: http://localhost:8000/v1    # optional, for custom endpoints
-  # api_key_env: MY_CUSTOM_KEY           # optional, env var name for custom endpoint API key
+  # key_env: MY_CUSTOM_KEY               # optional, env var name for custom endpoint API key
 ```
 
 When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session.
 
-Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `custom`.
+Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `custom`.
 
 :::tip
 Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers).
@@ -1092,7 +1178,7 @@ smart_model_routing:
     provider: openrouter
     model: google/gemini-2.5-flash
     # base_url: http://localhost:8000/v1  # optional custom endpoint
-    # api_key_env: MY_CUSTOM_KEY          # optional env var name for that endpoint's API key
+    # key_env: MY_CUSTOM_KEY              # optional env var name for that endpoint's API key
 ```
 
 How it works:
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 6b08552676e..1fc4911158b 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -63,9 +63,6 @@ hermes [global-options] <command> [subcommand/options]
 | `hermes insights` | Show token/cost/activity analytics. |
 | `hermes claw` | OpenClaw migration helpers. |
 | `hermes dashboard` | Launch the web dashboard for managing config, API keys, and sessions. |
-| `hermes debug` | Debug tools — upload logs and system info for support. |
-| `hermes backup` | Back up Hermes home directory to a zip file. |
-| `hermes import` | Restore a Hermes backup from a zip file. |
 | `hermes profile` | Manage profiles — multiple isolated Hermes instances. |
 | `hermes completion` | Print shell completion scripts (bash/zsh). |
 | `hermes version` | Show version information. |
@@ -85,7 +82,7 @@ Common options:
 | `-q`, `--query "..."` | One-shot, non-interactive prompt. |
 | `-m`, `--model <model>` | Override the model for this run. |
 | `-t`, `--toolsets <csv>` | Enable a comma-separated set of toolsets. |
-| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`. |
+| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`. |
 | `-s`, `--skills <name>` | Preload one or more skills for the session (can be repeated or comma-separated). |
 | `-v`, `--verbose` | Verbose output. |
 | `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. |
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index ead884ba7b7..640e7be999b 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -14,6 +14,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 |----------|-------------|
 | `OPENROUTER_API_KEY` | OpenRouter API key (recommended for flexibility) |
 | `OPENROUTER_BASE_URL` | Override the OpenRouter-compatible base URL |
+| `NOUS_BASE_URL` | Override Nous Portal base URL (rarely needed; development/testing only) |
+| `NOUS_INFERENCE_BASE_URL` | Override Nous inference endpoint directly |
 | `AI_GATEWAY_API_KEY` | Vercel AI Gateway API key ([ai-gateway.vercel.sh](https://ai-gateway.vercel.sh)) |
 | `AI_GATEWAY_BASE_URL` | Override AI Gateway base URL (default: `https://ai-gateway.vercel.sh/v1`) |
 | `OPENAI_API_KEY` | API key for custom OpenAI-compatible endpoints (used with `OPENAI_BASE_URL`) |
@@ -35,9 +37,9 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 | `ARCEEAI_API_KEY` | Arcee AI API key ([chat.arcee.ai](https://chat.arcee.ai/)) |
 | `ARCEE_BASE_URL` | Override Arcee base URL (default: `https://api.arcee.ai/api/v1`) |
 | `MINIMAX_API_KEY` | MiniMax API key — global endpoint ([minimax.io](https://www.minimax.io)) |
-| `MINIMAX_BASE_URL` | Override MiniMax base URL (default: `https://api.minimax.io/v1`) |
+| `MINIMAX_BASE_URL` | Override MiniMax base URL (default: `https://api.minimax.io/anthropic` — Hermes uses MiniMax's Anthropic Messages-compatible endpoint) |
 | `MINIMAX_CN_API_KEY` | MiniMax API key — China endpoint ([minimaxi.com](https://www.minimaxi.com)) |
-| `MINIMAX_CN_BASE_URL` | Override MiniMax China base URL (default: `https://api.minimaxi.com/v1`) |
+| `MINIMAX_CN_BASE_URL` | Override MiniMax China base URL (default: `https://api.minimaxi.com/anthropic`) |
 | `KILOCODE_API_KEY` | Kilo Code API key ([kilo.ai](https://kilo.ai)) |
 | `KILOCODE_BASE_URL` | Override Kilo Code base URL (default: `https://api.kilo.ai/api/gateway`) |
 | `XIAOMI_API_KEY` | Xiaomi MiMo API key ([platform.xiaomimimo.com](https://platform.xiaomimimo.com)) |
@@ -53,9 +55,20 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 | `ANTHROPIC_API_KEY` | Anthropic Console API key ([console.anthropic.com](https://console.anthropic.com/)) |
 | `ANTHROPIC_TOKEN` | Manual or legacy Anthropic OAuth/setup-token override |
 | `DASHSCOPE_API_KEY` | Alibaba Cloud DashScope API key for Qwen models ([modelstudio.console.alibabacloud.com](https://modelstudio.console.alibabacloud.com/)) |
-| `DASHSCOPE_BASE_URL` | Custom DashScope base URL (default: `https://coding-intl.dashscope.aliyuncs.com/v1`) |
+| `DASHSCOPE_BASE_URL` | Custom DashScope base URL (default: `https://dashscope-intl.aliyuncs.com/compatible-mode/v1`; use `https://dashscope.aliyuncs.com/compatible-mode/v1` for mainland-China region) |
 | `DEEPSEEK_API_KEY` | DeepSeek API key for direct DeepSeek access ([platform.deepseek.com](https://platform.deepseek.com/api_keys)) |
 | `DEEPSEEK_BASE_URL` | Custom DeepSeek API base URL |
+| `NVIDIA_API_KEY` | NVIDIA NIM API key — Nemotron and open models ([build.nvidia.com](https://build.nvidia.com)) |
+| `NVIDIA_BASE_URL` | Override NVIDIA base URL (default: `https://integrate.api.nvidia.com/v1`; set to `http://localhost:8000/v1` for a local NIM endpoint) |
+| `OLLAMA_API_KEY` | Ollama Cloud API key — managed Ollama catalog without local GPU ([ollama.com/settings/keys](https://ollama.com/settings/keys)) |
+| `OLLAMA_BASE_URL` | Override Ollama Cloud base URL (default: `https://ollama.com/v1`) |
+| `XAI_API_KEY` | xAI (Grok) API key for chat + TTS ([console.x.ai](https://console.x.ai/)) |
+| `XAI_BASE_URL` | Override xAI base URL (default: `https://api.x.ai/v1`) |
+| `MISTRAL_API_KEY` | Mistral API key for Voxtral TTS and Voxtral STT ([console.mistral.ai](https://console.mistral.ai)) |
+| `AWS_REGION` | AWS region for Bedrock inference (e.g. `us-east-1`, `eu-central-1`). Read by boto3. |
+| `AWS_PROFILE` | AWS named profile for Bedrock authentication (reads `~/.aws/credentials`). Leave unset to use default boto3 credential chain. |
+| `BEDROCK_BASE_URL` | Override Bedrock runtime base URL (default: `https://bedrock-runtime.us-east-1.amazonaws.com`; usually leave unset and use `AWS_REGION` instead) |
+| `HERMES_QWEN_BASE_URL` | Qwen Portal base URL override (default: `https://portal.qwen.ai/v1`) |
 | `OPENCODE_ZEN_API_KEY` | OpenCode Zen API key — pay-as-you-go access to curated models ([opencode.ai](https://opencode.ai/auth)) |
 | `OPENCODE_ZEN_BASE_URL` | Override OpenCode Zen base URL |
 | `OPENCODE_GO_API_KEY` | OpenCode Go API key — $10/month subscription for open models ([opencode.ai](https://opencode.ai/auth)) |
@@ -73,7 +86,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
 
 | Variable | Description |
 |----------|-------------|
-| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
+| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
 | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
 | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
 | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
@@ -183,10 +196,14 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `TELEGRAM_WEBHOOK_PORT` | Local listen port for webhook server (default: `8443`) |
 | `TELEGRAM_WEBHOOK_SECRET` | Secret token for verifying updates come from Telegram |
 | `TELEGRAM_REACTIONS` | Enable emoji reactions on messages during processing (default: `false`) |
+| `TELEGRAM_REPLY_TO_MODE` | Reply-reference behavior: `off`, `first` (default), or `all`. Matches the Discord pattern. |
 | `TELEGRAM_IGNORED_THREADS` | Comma-separated Telegram forum topic/thread IDs where the bot never responds |
 | `TELEGRAM_PROXY` | Proxy URL for Telegram connections — overrides `HTTPS_PROXY`. Supports `http://`, `https://`, `socks5://` |
 | `DISCORD_BOT_TOKEN` | Discord bot token |
 | `DISCORD_ALLOWED_USERS` | Comma-separated Discord user IDs allowed to use the bot |
+| `DISCORD_ALLOWED_ROLES` | Comma-separated Discord role IDs allowed to use the bot (OR with `DISCORD_ALLOWED_USERS`). Auto-enables the Members intent. Useful when moderation teams churn — role grants propagate automatically. |
+| `DISCORD_ALLOWED_CHANNELS` | Comma-separated Discord channel IDs. When set, the bot only responds in these channels (plus DMs if allowed). Overrides `config.yaml` `discord.allowed_channels`. |
+| `DISCORD_PROXY` | Proxy URL for Discord connections — overrides `HTTPS_PROXY`. Supports `http://`, `https://`, `socks5://` |
 | `DISCORD_HOME_CHANNEL` | Default Discord channel for cron delivery |
 | `DISCORD_HOME_CHANNEL_NAME` | Display name for the Discord home channel |
 | `DISCORD_REQUIRE_MENTION` | Require an @mention before responding in server channels |
@@ -291,6 +308,8 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `QQ_GROUP_ALLOWED_USERS` | Comma-separated QQ group IDs for group @-message access |
 | `QQ_ALLOW_ALL_USERS` | Allow all users (`true`/`false`, overrides `QQ_ALLOWED_USERS`) |
 | `QQBOT_HOME_CHANNEL` | QQ user/group openID for cron delivery and notifications |
+| `QQBOT_HOME_CHANNEL_NAME` | Display name for the QQ home channel |
+| `QQ_SANDBOX` | Route QQ Bot to the sandbox gateway for development testing (`true`/`false`). Use with a sandbox app credential from [q.qq.com](https://q.qq.com). |
 | `MATTERMOST_URL` | Mattermost server URL (e.g. `https://mm.example.com`) |
 | `MATTERMOST_TOKEN` | Bot token or personal access token for Mattermost |
 | `MATTERMOST_ALLOWED_USERS` | Comma-separated Mattermost user IDs allowed to message the bot |
@@ -305,6 +324,8 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `MATRIX_ALLOWED_USERS` | Comma-separated Matrix user IDs allowed to message the bot (e.g. `@alice:matrix.org`) |
 | `MATRIX_HOME_ROOM` | Room ID for proactive message delivery (e.g. `!abc123:matrix.org`) |
 | `MATRIX_ENCRYPTION` | Enable end-to-end encryption (`true`/`false`, default: `false`) |
+| `MATRIX_DEVICE_ID` | Stable Matrix device ID for E2EE persistence across restarts (e.g. `HERMES_BOT`). Without this, E2EE keys rotate every startup and historic-room decrypt breaks. |
+| `MATRIX_REACTIONS` | Enable processing-lifecycle emoji reactions on inbound messages (default: `true`). Set to `false` to disable. |
 | `MATRIX_REQUIRE_MENTION` | Require `@mention` in rooms (default: `true`). Set to `false` to respond to all messages. |
 | `MATRIX_FREE_RESPONSE_ROOMS` | Comma-separated room IDs where bot responds without `@mention` |
 | `MATRIX_AUTO_THREAD` | Auto-create threads for room messages (default: `true`) |
diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md
index c39f510b1ff..132a4d00a9e 100644
--- a/website/docs/reference/faq.md
+++ b/website/docs/reference/faq.md
@@ -110,7 +110,7 @@ Yes. Import the `AIAgent` class and use Hermes programmatically:
 ```python
 from run_agent import AIAgent
 
-agent = AIAgent(model="openrouter/nous/hermes-3-llama-3.1-70b")
+agent = AIAgent(model="anthropic/claude-opus-4.7")
 response = agent.chat("Explain quantum computing briefly")
 ```
 
@@ -243,7 +243,7 @@ Make sure the key matches the provider. An OpenAI key won't work with OpenRouter
 hermes model
 
 # Set a valid model
-hermes config set HERMES_MODEL openrouter/nous/hermes-3-llama-3.1-70b
+hermes config set HERMES_MODEL anthropic/claude-opus-4.7
 
 # Or specify per-session
 hermes chat --model openrouter/meta-llama/llama-3.1-70b-instruct
@@ -781,7 +781,7 @@ hermes config show | head -20
 hermes model
 
 # Or test with a known-good model
-hermes chat -q "hello" --model anthropic/claude-sonnet-4.6
+hermes chat -q "hello" --model anthropic/claude-opus-4.7
 ```
 
 If using OpenRouter, make sure your API key has credits. A 400 from OpenRouter often means the model requires a paid plan or the model ID has a typo.
diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index 18ec4b3810b..f5dd2ac5bfe 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -54,7 +54,9 @@ hermes skills uninstall <skill-name>
 | Skill | Description |
 |-------|-------------|
 | **blender-mcp** | Control Blender directly from Hermes via socket connection to the blender-mcp addon. Create 3D objects, materials, animations, and run arbitrary Blender Python (bpy) code. |
+| **concept-diagrams** | Generate flat, minimal light/dark-aware SVG diagrams as standalone HTML files, using a unified educational visual language (9 semantic color ramps, automatic dark mode). Best for physics setups, chemistry mechanisms, math curves, physical objects (aircraft, turbines, smartphones), floor plans, cross-sections, lifecycle/process narratives, and hub-spoke system diagrams. Ships with 15 example diagrams. |
 | **meme-generation** | Generate real meme images by picking a template and overlaying text with Pillow. Produces actual `.png` meme files. |
+| **touchdesigner-mcp** | Control a running TouchDesigner instance via the twozero MCP plugin — create operators, set parameters, wire connections, execute Python, build real-time audio-reactive visuals and GLSL networks. 36 native tools. |
 
 ## DevOps
 
@@ -73,6 +75,7 @@ hermes skills uninstall <skill-name>
 
 | Skill | Description |
 |-------|-------------|
+| **fitness-nutrition** | Gym workout planner and nutrition tracker. Search 690+ exercises by muscle, equipment, or category via wger. Look up macros and calories for 380,000+ foods via USDA FoodData Central. Computes BMI, TDEE, one-rep max, macro splits, and body fat — pure Python, no pip installs. |
 | **neuroskill-bci** | Brain-Computer Interface (BCI) integration for neuroscience research workflows. |
 
 ## MCP
@@ -80,6 +83,7 @@ hermes skills uninstall <skill-name>
 | Skill | Description |
 |-------|-------------|
 | **fastmcp** | Build, test, inspect, install, and deploy MCP servers with FastMCP in Python. Covers wrapping APIs or databases as MCP tools, exposing resources or prompts, and deployment. |
+| **mcporter** | The `mcporter` CLI — list, configure, auth, and call MCP servers/tools directly (HTTP or stdio) from the terminal. Useful for ad-hoc MCP interactions; for always-on tool discovery use the built-in `native-mcp` client instead. |
 
 ## Migration
 
@@ -95,22 +99,29 @@ The largest optional category — covers the full ML pipeline from data curation
 |-------|-------------|
 | **accelerate** | Simplest distributed training API. 4 lines to add distributed support to any PyTorch script. Unified API for DeepSpeed/FSDP/Megatron/DDP. |
 | **chroma** | Open-source embedding database. Store embeddings and metadata, perform vector and full-text search. Simple 4-function API for RAG and semantic search. |
+| **clip** | OpenAI's vision-language model connecting images and text. Zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks without fine-tuning. |
 | **faiss** | Facebook's library for efficient similarity search and clustering of dense vectors. Supports billions of vectors, GPU acceleration, and various index types (Flat, IVF, HNSW). |
 | **flash-attention** | Optimize transformer attention with Flash Attention for 2-4x speedup and 10-20x memory reduction. Supports PyTorch SDPA, flash-attn library, H100 FP8, and sliding window. |
+| **guidance** | Control LLM output with regex and grammars, guarantee valid JSON/XML/code generation, enforce structured formats, and build multi-step workflows with Guidance — Microsoft Research's constrained generation framework. |
 | **hermes-atropos-environments** | Build, test, and debug Hermes Agent RL environments for Atropos training. Covers the HermesAgentBaseEnv interface, reward functions, agent loop integration, and evaluation. |
 | **huggingface-tokenizers** | Fast Rust-based tokenizers for research and production. Tokenizes 1GB in under 20 seconds. Supports BPE, WordPiece, and Unigram algorithms. |
 | **instructor** | Extract structured data from LLM responses with Pydantic validation, retry failed extractions automatically, and stream partial results. |
 | **lambda-labs** | Reserved and on-demand GPU cloud instances for ML training and inference. SSH access, persistent filesystems, and multi-node clusters. |
 | **llava** | Large Language and Vision Assistant — visual instruction tuning and image-based conversations combining CLIP vision with LLaMA language models. |
+| **modal** | Serverless GPU cloud platform for running ML workloads. On-demand GPU access without infrastructure management, ML model deployment as APIs, or batch jobs with automatic scaling. |
 | **nemo-curator** | GPU-accelerated data curation for LLM training. Fuzzy deduplication (16x faster), quality filtering (30+ heuristics), semantic dedup, PII redaction. Scales with RAPIDS. |
+| **peft-fine-tuning** | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Train <1% of parameters with minimal accuracy loss for 7B–70B models on limited GPU memory. HuggingFace's official PEFT library. |
 | **pinecone** | Managed vector database for production AI. Auto-scaling, hybrid search (dense + sparse), metadata filtering, and low latency (under 100ms p95). |
+| **pytorch-fsdp** | Expert guidance for Fully Sharded Data Parallel training with PyTorch FSDP — parameter sharding, mixed precision, CPU offloading, FSDP2. |
 | **pytorch-lightning** | High-level PyTorch framework with Trainer class, automatic distributed training (DDP/FSDP/DeepSpeed), callbacks, and minimal boilerplate. |
 | **qdrant** | High-performance vector similarity search engine. Rust-powered with fast nearest neighbor search, hybrid search with filtering, and scalable vector storage. |
 | **saelens** | Train and analyze Sparse Autoencoders (SAEs) using SAELens to decompose neural network activations into interpretable features. |
 | **simpo** | Simple Preference Optimization — reference-free alternative to DPO with better performance (+6.4 pts on AlpacaEval 2.0). No reference model needed. |
 | **slime** | LLM post-training with RL using Megatron+SGLang framework. Custom data generation workflows and tight Megatron-LM integration for RL scaling. |
+| **stable-diffusion-image-generation** | State-of-the-art text-to-image generation with Stable Diffusion via HuggingFace Diffusers. Text-to-image, image-to-image translation, inpainting, and custom diffusion pipelines. |
 | **tensorrt-llm** | Optimize LLM inference with NVIDIA TensorRT for maximum throughput. 10-100x faster than PyTorch on A100/H100 with quantization (FP8/INT4) and in-flight batching. |
 | **torchtitan** | PyTorch-native distributed LLM pretraining with 4D parallelism (FSDP2, TP, PP, CP). Scale from 8 to 512+ GPUs with Float8 and torch.compile. |
+| **whisper** | OpenAI's general-purpose speech recognition. 99 languages, transcription, translation to English, and language ID. Six model sizes from tiny (39M) to large (1550M). Best for robust multilingual ASR. |
 
 ## Productivity
 
diff --git a/website/docs/reference/profile-commands.md b/website/docs/reference/profile-commands.md
index 8c8feafb518..e4f28e83460 100644
--- a/website/docs/reference/profile-commands.md
+++ b/website/docs/reference/profile-commands.md
@@ -81,6 +81,8 @@ Creates a new profile.
 | `--clone-from <profile>` | Clone from a specific profile instead of the current one. Used with `--clone` or `--clone-all`. |
 | `--no-alias` | Skip wrapper script creation. |
 
+Creating a profile does **not** make that profile directory the default project/workspace directory for terminal commands. If you want a profile to start in a specific project, set `terminal.cwd` in that profile's `config.yaml`.
+
 **Examples:**
 
 ```bash
@@ -129,6 +131,8 @@ hermes profile show <name>
 
 Displays details about a profile including its home directory, configured model, gateway status, skills count, and configuration file status.
 
+This shows the profile's Hermes home directory, not the terminal working directory. Terminal commands start from `terminal.cwd` (or the launch directory on the local backend when `cwd: "."`).
+
 | Argument | Description |
 |----------|-------------|
 | `<name>` | Profile to inspect. |
diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md
index 13ef2f7fc4a..46c29929f9c 100644
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@@ -27,27 +27,32 @@ Skills for spawning and orchestrating autonomous AI coding agents and multi-agen
 |-------|-------------|------|
 | `claude-code` | Delegate coding tasks to Claude Code (Anthropic's CLI agent). Use for building features, refactoring, PR reviews, and iterative coding. Requires the claude CLI installed. | `autonomous-ai-agents/claude-code` |
 | `codex` | Delegate coding tasks to OpenAI Codex CLI agent. Use for building features, refactoring, PR reviews, and batch issue fixing. Requires the codex CLI and a git repository. | `autonomous-ai-agents/codex` |
-| `hermes-agent-spawning` | Spawn additional Hermes Agent instances as autonomous subprocesses for independent long-running tasks. Supports non-interactive one-shot mode (-q) and interactive PTY mode for multi-turn collaboration. Different from delegate_task — this runs a full separate hermes process. | `autonomous-ai-agents/hermes-agent` |
+| `hermes-agent` | Complete guide to using and extending Hermes Agent — CLI usage, setup, configuration, spawning additional agents, gateway platforms, skills, voice, tools, profiles, and a concise contributor reference. Load this skill when helping users configure Hermes, troubleshoot issues, s… | `autonomous-ai-agents/hermes-agent` |
 | `opencode` | Delegate coding tasks to OpenCode CLI agent for feature implementation, refactoring, PR review, and long-running autonomous sessions. Requires the opencode CLI installed and authenticated. | `autonomous-ai-agents/opencode` |
 
+## creative
+
+Creative content generation — ASCII art, hand-drawn diagrams, animations, music, and visual design tools.
+
+| Skill | Description | Path |
+|-------|-------------|------|
+| `architecture-diagram` | Generate dark-themed SVG diagrams of software systems and cloud infrastructure as standalone HTML files with inline SVG graphics. Semantic component colors (cyan=frontend, emerald=backend, violet=database, amber=cloud/AWS, rose=security, orange=message bus), JetBrains Mono fon… | `creative/architecture-diagram` |
+| `ascii-art` | Generate ASCII art using pyfiglet (571 fonts), cowsay, boxes, toilet, image-to-ascii, remote APIs (asciified, ascii.co.uk), and LLM fallback. No API keys required. | `creative/ascii-art` |
+| `ascii-video` | Production pipeline for ASCII art video — any format. Converts video/audio/images/generative input into colored ASCII character video output (MP4, GIF, image sequence). Covers: video-to-ASCII conversion, audio-reactive music visualizers, generative ASCII art animations, hybrid… | `creative/ascii-video` |
+| `excalidraw` | Create hand-drawn style diagrams using Excalidraw JSON format. Generate .excalidraw files for architecture diagrams, flowcharts, sequence diagrams, concept maps, and more. Files can be opened at excalidraw.com or uploaded for shareable links. | `creative/excalidraw` |
+| `ideation` | Generate project ideas through creative constraints. Use when the user says 'I want to build something', 'give me a project idea', 'I'm bored', 'what should I make', 'inspire me', or any variant of 'I have tools but no direction'. Works for code, art, hardware, writing, tools,… | `creative/creative-ideation` |
+| `manim-video` | Production pipeline for mathematical and technical animations using Manim Community Edition. Creates 3Blue1Brown-style explainer videos, algorithm visualizations, equation derivations, architecture diagrams, and data stories. Use when users request: animated explanations, math… | `creative/manim-video` |
+| `p5js` | Production pipeline for interactive and generative visual art using p5.js. Creates browser-based sketches, generative art, data visualizations, interactive experiences, 3D scenes, audio-reactive visuals, and motion graphics — exported as HTML, PNG, GIF, MP4, or SVG. Covers: 2D… | `creative/p5js` |
+| `popular-web-designs` | 54 production-quality design systems extracted from real websites. Load a template to generate HTML/CSS that matches the visual identity of sites like Stripe, Linear, Vercel, Notion, Airbnb, and more. Each template includes colors, typography, components, layout rules, and rea… | `creative/popular-web-designs` |
+| `songwriting-and-ai-music` | Songwriting craft, AI music generation prompts (Suno focus), parody/adaptation techniques, phonetic tricks, and lessons learned. These are tools and ideas, not rules. Break any of them when the art calls for it. | `creative/songwriting-and-ai-music` |
+
 ## data-science
 
 Skills for data science workflows — interactive exploration, Jupyter notebooks, data analysis, and visualization.
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `jupyter-live-kernel` | Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb. Load this skill when the task involves exploration, iteration, or inspecting intermediate results. | `data-science/jupyter-live-kernel` |
-
-## creative
-
-Creative content generation — ASCII art, hand-drawn style diagrams, and visual design tools.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `ascii-art` | Generate ASCII art using pyfiglet (571 fonts), cowsay, boxes, toilet, image-to-ascii, remote APIs (asciified, ascii.co.uk), and LLM fallback. No API keys required. | `creative/ascii-art` |
-| `ascii-video` | "Production pipeline for ASCII art video — any format. Converts video/audio/images/generative input into colored ASCII character video output (MP4, GIF, image sequence). Covers: video-to-ASCII conversion, audio-reactive music visualizers, generative ASCII art animations, hybrid… | `creative/ascii-video` |
-| `excalidraw` | Create hand-drawn style diagrams using Excalidraw JSON format. Generate .excalidraw files for architecture diagrams, flowcharts, sequence diagrams, concept maps, and more. Files can be opened at excalidraw.com or uploaded for shareable links. | `creative/excalidraw` |
-| `p5js` | Production pipeline for interactive and generative visual art using p5.js. Create sketches, render them to images/video via headless browser, and serve live previews. Supports canvas animations, data visualizations, and creative coding experiments. | `creative/p5js` |
+| `jupyter-live-kernel` | Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb. Load this skill when the task involves exploration, iteration, or inspecting intermediate results — data science, ML experimentation, API exploration, or building up complex code step-by-step. Uses… | `data-science/jupyter-live-kernel` |
 
 ## devops
 
@@ -55,14 +60,15 @@ DevOps and infrastructure automation skills.
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `webhook-subscriptions` | Create and manage webhook subscriptions for event-driven agent activation. External services (GitHub, Stripe, CI/CD, IoT) POST events to trigger agent runs. Requires webhook platform to be enabled. | `devops/webhook-subscriptions` |
+| `webhook-subscriptions` | Create and manage webhook subscriptions for event-driven agent activation. Use when the user wants external services to trigger agent runs automatically. | `devops/webhook-subscriptions` |
 
 ## dogfood
 
+Internal dogfooding and QA skills used to test Hermes Agent itself.
+
 | Skill | Description | Path |
 |-------|-------------|------|
-| `dogfood` | Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports. | `dogfood/dogfood` |
-| `hermes-agent-setup` | Help users configure Hermes Agent — CLI usage, setup wizard, model/provider selection, tools, skills, voice/STT/TTS, gateway, and troubleshooting. | `dogfood/hermes-agent-setup` |
+| `dogfood` | Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports | `dogfood` |
 
 ## email
 
@@ -83,7 +89,7 @@ Skills for setting up, configuring, and managing game servers, modpacks, and gam
 
 ## github
 
-GitHub workflow skills for managing repositories, pull requests, code reviews, issues, and CI/CD pipelines using the gh CLI and git via terminal.
+GitHub workflow skills for managing repositories, pull requests, code reviews, issues, and CI/CD pipelines.
 
 | Skill | Description | Path |
 |-------|-------------|------|
@@ -94,27 +100,12 @@ GitHub workflow skills for managing repositories, pull requests, code reviews, i
 | `github-pr-workflow` | Full pull request lifecycle — create branches, commit changes, open PRs, monitor CI status, auto-fix failures, and merge. Works with gh CLI or falls back to git + GitHub REST API via curl. | `github/github-pr-workflow` |
 | `github-repo-management` | Clone, create, fork, configure, and manage GitHub repositories. Manage remotes, secrets, releases, and workflows. Works with gh CLI or falls back to git + GitHub REST API via curl. | `github/github-repo-management` |
 
-## inference-sh
-
-Skills for AI app execution via inference.sh cloud platform.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `inference-sh-cli` | Run 150+ AI apps via inference.sh CLI (infsh) — image generation, video creation, LLMs, search, 3D, social automation. | `inference-sh/cli` |
-
-## leisure
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `find-nearby` | Find nearby places (restaurants, cafes, bars, pharmacies, etc.) using OpenStreetMap. Works with coordinates, addresses, cities, zip codes, or Telegram location pins. No API keys needed. | `leisure/find-nearby` |
-
 ## mcp
 
-Skills for working with MCP (Model Context Protocol) servers, tools, and integrations. Includes the built-in native MCP client (configure servers in config.yaml for automatic tool discovery) and the mcporter CLI bridge for ad-hoc server interaction.
+Skills for working with MCP (Model Context Protocol) servers, tools, and integrations.
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `mcporter` | Use the mcporter CLI to list, configure, auth, and call MCP servers/tools directly (HTTP or stdio), including ad-hoc servers, config edits, and CLI/type generation. | `mcp/mcporter` |
 | `native-mcp` | Built-in MCP (Model Context Protocol) client that connects to external MCP servers, discovers their tools, and registers them as native Hermes Agent tools. Supports stdio and HTTP transports with automatic reconnection, security filtering, and zero-config tool injection. | `mcp/native-mcp` |
 
 ## media
@@ -126,7 +117,7 @@ Skills for working with media content — YouTube transcripts, GIF search, music
 | `gif-search` | Search and download GIFs from Tenor using curl. No dependencies beyond curl and jq. Useful for finding reaction GIFs, creating visual content, and sending GIFs in chat. | `media/gif-search` |
 | `heartmula` | Set up and run HeartMuLa, the open-source music generation model family (Suno-like). Generates full songs from lyrics + tags with multilingual support. | `media/heartmula` |
 | `songsee` | Generate spectrograms and audio feature visualizations (mel, chroma, MFCC, tempogram, etc.) from audio files via CLI. Useful for audio analysis, music production debugging, and visual documentation. | `media/songsee` |
-| `youtube-content` | Fetch YouTube video transcripts and transform them into structured content (chapters, summaries, threads, blog posts). | `media/youtube-content` |
+| `youtube-content` | Fetch YouTube video transcripts and transform them into structured content (chapters, summaries, threads, blog posts). Use when the user shares a YouTube URL or video link, asks to summarize a video, requests a transcript, or wants to extract and reformat content from any YouT… | `media/youtube-content` |
 
 ## mlops
 
@@ -134,27 +125,15 @@ General-purpose ML operations tools — model hub management, dataset operations
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `huggingface-hub` | Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, deploy inference endpoints. | `mlops/huggingface-hub` |
-
-## mlops/cloud
-
-GPU cloud providers and serverless compute platforms for ML workloads.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `lambda-labs-gpu-cloud` | Reserved and on-demand GPU cloud instances for ML training and inference. Use when you need dedicated GPU instances with simple SSH access, persistent filesystems, or high-performance multi-node clusters for large-scale training. | `mlops/cloud/lambda-labs` |
-| `modal-serverless-gpu` | Serverless GPU cloud platform for running ML workloads. Use when you need on-demand GPU access without infrastructure management, deploying ML models as APIs, or running batch jobs with automatic scaling. | `mlops/cloud/modal` |
+| `huggingface-hub` | Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Spaces and buckets. | `mlops/huggingface-hub` |
 
 ## mlops/evaluation
 
-Model evaluation benchmarks, experiment tracking, data curation, tokenizers, and interpretability tools.
+Model evaluation benchmarks, experiment tracking, and interpretability tools.
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `evaluating-llms-harness` | Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag). Use when benchmarking model quality, comparing models, reporting academic results, or tracking training progress. Industry standard used by EleutherAI, HuggingFace, and major labs. Sup… | `mlops/evaluation/lm-evaluation-harness` |
-| `huggingface-tokenizers` | Fast tokenizers optimized for research and production. Rust-based implementation tokenizes 1GB in &lt;20 seconds. Supports BPE, WordPiece, and Unigram algorithms. Train custom vocabularies, track alignments, handle padding/truncation. Integrates seamlessly with transformers. Use… | `mlops/evaluation/huggingface-tokenizers` |
-| `nemo-curator` | GPU-accelerated data curation for LLM training. Supports text/image/video/audio. Features fuzzy deduplication (16× faster), quality filtering (30+ heuristics), semantic deduplication, PII redaction, NSFW detection. Scales across GPUs with RAPIDS. Use for preparing high-quality t… | `mlops/evaluation/nemo-curator` |
-| `sparse-autoencoder-training` | Provides guidance for training and analyzing Sparse Autoencoders (SAEs) using SAELens to decompose neural network activations into interpretable features. Use when discovering interpretable features, analyzing superposition, or studying monosemantic representations in language m… | `mlops/evaluation/saelens` |
+| `evaluating-llms-harness` | Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag). Use when benchmarking model quality, comparing models, reporting academic results, or tracking training progress. Industry standard used by EleutherAI, HuggingFace, and major labs. S… | `mlops/evaluation/lm-evaluation-harness` |
 | `weights-and-biases` | Track ML experiments with automatic logging, visualize training in real-time, optimize hyperparameters with sweeps, and manage model registry with W&B - collaborative MLOps platform | `mlops/evaluation/weights-and-biases` |
 
 ## mlops/inference
@@ -163,27 +142,19 @@ Model serving, quantization (GGUF/GPTQ), structured output, inference optimizati
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `gguf-quantization` | GGUF format and llama.cpp quantization for efficient CPU/GPU inference. Use when deploying models on consumer hardware, Apple Silicon, or when needing flexible quantization from 2-8 bit without GPU requirements. | `mlops/inference/gguf` |
-| `guidance` | Control LLM output with regex and grammars, guarantee valid JSON/XML/code generation, enforce structured formats, and build multi-step workflows with Guidance - Microsoft Research's constrained generation framework | `mlops/inference/guidance` |
-| `instructor` | Extract structured data from LLM responses with Pydantic validation, retry failed extractions automatically, parse complex JSON with type safety, and stream partial results with Instructor - battle-tested structured output library | `mlops/inference/instructor` |
-| `llama-cpp` | Runs LLM inference on CPU, Apple Silicon, and consumer GPUs without NVIDIA hardware. Use for edge deployment, M1/M2/M3 Macs, AMD/Intel GPUs, or when CUDA is unavailable. Supports GGUF quantization (1.5-8 bit) for reduced memory and 4-10× speedup vs PyTorch on CPU. | `mlops/inference/llama-cpp` |
-| `obliteratus` | Remove refusal behaviors from open-weight LLMs using OBLITERATUS — mechanistic interpretability techniques (diff-in-means, SVD, whitened SVD, LEACE, SAE decomposition, etc.) to excise guardrails while preserving reasoning. 9 CLI methods, 28 analysis modules, 116 model presets ac… | `mlops/inference/obliteratus` |
+| `llama-cpp` | Run LLM inference with llama.cpp on CPU, Apple Silicon, AMD/Intel GPUs, or NVIDIA — plus GGUF model conversion and quantization (2–8 bit with K-quants and imatrix). Covers CLI, Python bindings, OpenAI-compatible server, and Ollama/LM Studio integration. Use for edge deployment… | `mlops/inference/llama-cpp` |
+| `obliteratus` | Remove refusal behaviors from open-weight LLMs using OBLITERATUS — mechanistic interpretability techniques (diff-in-means, SVD, whitened SVD, LEACE, SAE decomposition, etc.) to excise guardrails while preserving reasoning. 9 CLI methods, 28 analysis modules, 116 model presets … | `mlops/inference/obliteratus` |
 | `outlines` | Guarantee valid JSON/XML/code structure during generation, use Pydantic models for type-safe outputs, support local models (Transformers, vLLM), and maximize inference speed with Outlines - dottxt.ai's structured generation library | `mlops/inference/outlines` |
-| `serving-llms-vllm` | Serves LLMs with high throughput using vLLM's PagedAttention and continuous batching. Use when deploying production LLM APIs, optimizing inference latency/throughput, or serving models with limited GPU memory. Supports OpenAI-compatible endpoints, quantization (GPTQ/AWQ/FP8), an… | `mlops/inference/vllm` |
-| `tensorrt-llm` | Optimizes LLM inference with NVIDIA TensorRT for maximum throughput and lowest latency. Use for production deployment on NVIDIA GPUs (A100/H100), when you need 10-100x faster inference than PyTorch, or for serving models with quantization (FP8/INT4), in-flight batching, and mult… | `mlops/inference/tensorrt-llm` |
+| `serving-llms-vllm` | Serves LLMs with high throughput using vLLM's PagedAttention and continuous batching. Use when deploying production LLM APIs, optimizing inference latency/throughput, or serving models with limited GPU memory. Supports OpenAI-compatible endpoints, quantization (GPTQ/AWQ/FP8), … | `mlops/inference/vllm` |
 
 ## mlops/models
 
-Specific model architectures and tools — computer vision (CLIP, SAM, Stable Diffusion), speech (Whisper), audio generation (AudioCraft), and multimodal models (LLaVA).
+Specific model architectures — image segmentation (SAM) and audio generation (AudioCraft / MusicGen). Additional model skills (CLIP, Stable Diffusion, Whisper, LLaVA) are available as optional skills.
 
 | Skill | Description | Path |
 |-------|-------------|------|
 | `audiocraft-audio-generation` | PyTorch library for audio generation including text-to-music (MusicGen) and text-to-sound (AudioGen). Use when you need to generate music from text descriptions, create sound effects, or perform melody-conditioned music generation. | `mlops/models/audiocraft` |
-| `clip` | OpenAI's model connecting vision and language. Enables zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks without fine-tuning. Best for general-purpo… | `mlops/models/clip` |
-| `llava` | Large Language and Vision Assistant. Enables visual instruction tuning and image-based conversations. Combines CLIP vision encoder with Vicuna/LLaMA language models. Supports multi-turn image chat, visual question answering, and instruction following. Use for vision-language cha… | `mlops/models/llava` |
 | `segment-anything-model` | Foundation model for image segmentation with zero-shot transfer. Use when you need to segment any object in images using points, boxes, or masks as prompts, or automatically generate all object masks in an image. | `mlops/models/segment-anything` |
-| `stable-diffusion-image-generation` | State-of-the-art text-to-image generation with Stable Diffusion models via HuggingFace Diffusers. Use when generating images from text prompts, performing image-to-image translation, inpainting, or building custom diffusion pipelines. | `mlops/models/stable-diffusion` |
-| `whisper` | OpenAI's general-purpose speech recognition model. Supports 99 languages, transcription, translation to English, and language identification. Six model sizes from tiny (39M params) to large (1550M params). Use for speech-to-text, podcast transcription, or multilingual audio proc… | `mlops/models/whisper` |
 
 ## mlops/research
 
@@ -195,38 +166,17 @@ ML research frameworks for building and optimizing AI systems with declarative p
 
 ## mlops/training
 
-Fine-tuning, RLHF/DPO/GRPO training, distributed training frameworks, and optimization tools for training LLMs and other models.
+Fine-tuning, RLHF/DPO/GRPO training, distributed training frameworks, and optimization tools.
 
 | Skill | Description | Path |
 |-------|-------------|------|
 | `axolotl` | Expert guidance for fine-tuning LLMs with Axolotl - YAML configs, 100+ models, LoRA/QLoRA, DPO/KTO/ORPO/GRPO, multimodal support | `mlops/training/axolotl` |
-| `distributed-llm-pretraining-torchtitan` | Provides PyTorch-native distributed LLM pretraining using torchtitan with 4D parallelism (FSDP2, TP, PP, CP). Use when pretraining Llama 3.1, DeepSeek V3, or custom models at scale from 8 to 512+ GPUs with Float8, torch.compile, and distributed checkpointing. | `mlops/training/torchtitan` |
-| `fine-tuning-with-trl` | Fine-tune LLMs using reinforcement learning with TRL - SFT for instruction tuning, DPO for preference alignment, PPO/GRPO for reward optimization, and reward model training. Use when need RLHF, align model with preferences, or train from human feedback. Works with HuggingFace Tr… | `mlops/training/trl-fine-tuning` |
-| `grpo-rl-training` | Expert guidance for GRPO/RL fine-tuning with TRL for reasoning and task-specific model training | `mlops/training/grpo-rl-training` |
-| `hermes-atropos-environments` | Build, test, and debug Hermes Agent RL environments for Atropos training. Covers the HermesAgentBaseEnv interface, reward functions, agent loop integration, evaluation with tools, wandb logging, and the three CLI modes (serve/process/evaluate). Use when creating, reviewing, or f… | `mlops/training/hermes-atropos-environments` |
-| `huggingface-accelerate` | Simplest distributed training API. 4 lines to add distributed support to any PyTorch script. Unified API for DeepSpeed/FSDP/Megatron/DDP. Automatic device placement, mixed precision (FP16/BF16/FP8). Interactive config, single launch command. HuggingFace ecosystem standard. | `mlops/training/accelerate` |
-| `optimizing-attention-flash` | Optimizes transformer attention with Flash Attention for 2-4x speedup and 10-20x memory reduction. Use when training/running transformers with long sequences (&gt;512 tokens), encountering GPU memory issues with attention, or need faster inference. Supports PyTorch native SDPA,… | `mlops/training/flash-attention` |
-| `peft-fine-tuning` | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Use when fine-tuning large models (7B-70B) with limited GPU memory, when you need to train &lt;1% of parameters with minimal accuracy loss, or for multi-adapter serving. HuggingFace's official library i… | `mlops/training/peft` |
-| `pytorch-fsdp` | Expert guidance for Fully Sharded Data Parallel training with PyTorch FSDP - parameter sharding, mixed precision, CPU offloading, FSDP2 | `mlops/training/pytorch-fsdp` |
-| `pytorch-lightning` | High-level PyTorch framework with Trainer class, automatic distributed training (DDP/FSDP/DeepSpeed), callbacks system, and minimal boilerplate. Scales from laptop to supercomputer with same code. Use when you want clean training loops with built-in best practices. | `mlops/training/pytorch-lightning` |
-| `simpo-training` | Simple Preference Optimization for LLM alignment. Reference-free alternative to DPO with better performance (+6.4 points on AlpacaEval 2.0). No reference model needed, more efficient than DPO. Use for preference alignment when want simpler, faster training than DPO/PPO. | `mlops/training/simpo` |
-| `slime-rl-training` | Provides guidance for LLM post-training with RL using slime, a Megatron+SGLang framework. Use when training GLM models, implementing custom data generation workflows, or needing tight Megatron-LM integration for RL scaling. | `mlops/training/slime` |
+| `fine-tuning-with-trl` | Fine-tune LLMs using reinforcement learning with TRL - SFT for instruction tuning, DPO for preference alignment, PPO/GRPO for reward optimization, and reward model training. Use when need RLHF, align model with preferences, or train from human feedback. Works with HuggingFace … | `mlops/training/trl-fine-tuning` |
 | `unsloth` | Expert guidance for fast fine-tuning with Unsloth - 2-5x faster training, 50-80% less memory, LoRA/QLoRA optimization | `mlops/training/unsloth` |
 
-## mlops/vector-databases
-
-Vector similarity search and embedding databases for RAG, semantic search, and AI application backends.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `chroma` | Open-source embedding database for AI applications. Store embeddings and metadata, perform vector and full-text search, filter by metadata. Simple 4-function API. Scales from notebooks to production clusters. Use for semantic search, RAG applications, or document retrieval. Best… | `mlops/vector-databases/chroma` |
-| `faiss` | Facebook's library for efficient similarity search and clustering of dense vectors. Supports billions of vectors, GPU acceleration, and various index types (Flat, IVF, HNSW). Use for fast k-NN search, large-scale vector retrieval, or when you need pure similarity search without… | `mlops/vector-databases/faiss` |
-| `pinecone` | Managed vector database for production AI applications. Fully managed, auto-scaling, with hybrid search (dense + sparse), metadata filtering, and namespaces. Low latency (&lt;100ms p95). Use for production RAG, recommendation systems, or semantic search at scale. Best for server… | `mlops/vector-databases/pinecone` |
-| `qdrant-vector-search` | High-performance vector similarity search engine for RAG and semantic search. Use when building production RAG systems requiring fast nearest neighbor search, hybrid search with filtering, or scalable vector storage with Rust-powered performance. | `mlops/vector-databases/qdrant` |
-
 ## note-taking
 
-Note taking skills, to save information, assist with research, and collab on multi-session planning and information sharing.
+Note taking skills, to save information, assist with research, and collaborate on multi-session planning.
 
 | Skill | Description | Path |
 |-------|-------------|------|
@@ -238,26 +188,13 @@ Skills for document creation, presentations, spreadsheets, and other productivit
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `google-workspace` | Gmail, Calendar, Drive, Contacts, Sheets, and Docs integration via Python. Uses OAuth2 with automatic token refresh. No external binaries needed — runs entirely with Google's Python client libraries in the Hermes venv. | `productivity/google-workspace` |
-| `linear` | Manage Linear issues, projects, and teams via the GraphQL API. Create, update, search, and organize issues. | `productivity/linear` |
+| `google-workspace` | Gmail, Calendar, Drive, Contacts, Sheets, and Docs integration for Hermes. Uses Hermes-managed OAuth2 setup, prefers the Google Workspace CLI (`gws`) when available for broader API coverage, and falls back to the Python client libraries otherwise. | `productivity/google-workspace` |
+| `linear` | Manage Linear issues, projects, and teams via the GraphQL API. Create, update, search, and organize issues. Uses API key auth (no OAuth needed). All operations via curl — no dependencies. | `productivity/linear` |
+| `maps` | Location intelligence — geocode, reverse-geocode, nearby POI search (44 categories, coordinates or address via `--near`), driving/walking/cycling distance + time, turn-by-turn directions, timezone, bounding box + area, POI search in a rectangle. Uses OpenStreetMap + Overpass + OSRM. No API key needed. Telegram location-pin friendly. | `productivity/maps` |
 | `nano-pdf` | Edit PDFs with natural-language instructions using the nano-pdf CLI. Modify text, fix typos, update titles, and make content changes to specific pages without manual editing. | `productivity/nano-pdf` |
 | `notion` | Notion API for creating and managing pages, databases, and blocks via curl. Search, create, update, and query Notion workspaces directly from the terminal. | `productivity/notion` |
 | `ocr-and-documents` | Extract text from PDFs and scanned documents. Use web_extract for remote URLs, pymupdf for local text-based PDFs, marker-pdf for OCR/scanned docs. For DOCX use python-docx, for PPTX see the powerpoint skill. | `productivity/ocr-and-documents` |
-| `powerpoint` | "Use this skill any time a .pptx file is involved in any way — as input, output, or both. This includes: creating slide decks, pitch decks, or presentations; reading, parsing, or extracting text from any .pptx file (even if the extracted content will be used elsewhere, like in a… | `productivity/powerpoint` |
-
-## research
-
-Skills for academic research, paper discovery, literature review, domain reconnaissance, market data, content monitoring, and scientific knowledge retrieval.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `arxiv` | Search and retrieve academic papers from arXiv using their free REST API. No API key needed. Search by keyword, author, category, or ID. Combine with web_extract or the ocr-and-documents skill to read full paper content. | `research/arxiv` |
-| `blogwatcher` | Monitor blogs and RSS/Atom feeds for updates using the blogwatcher CLI. Add blogs, scan for new articles, and track what you've read. | `research/blogwatcher` |
-| `llm-wiki` | Karpathy's LLM Wiki — build and maintain a persistent, interlinked markdown knowledge base. Ingest sources, query compiled knowledge, and lint for consistency. Unlike RAG, the wiki compiles knowledge once and keeps it current. Works as an Obsidian vault. Wiki path is controlled by the `WIKI_PATH` env var (defaults to `~/wiki`). | `research/llm-wiki` |
-| `domain-intel` | Passive domain reconnaissance using Python stdlib. Subdomain discovery, SSL certificate inspection, WHOIS lookups, DNS records, domain availability checks, and bulk multi-domain analysis. No API keys required. | `research/domain-intel` |
-| `duckduckgo-search` | Free web search via DuckDuckGo — text, news, images, videos. No API key needed. Prefer the `ddgs` CLI when installed; use the Python DDGS library only after verifying that `ddgs` is available in the current runtime. | `research/duckduckgo-search` |
-| `ml-paper-writing` | Write publication-ready ML/AI papers for NeurIPS, ICML, ICLR, ACL, AAAI, COLM. Use when drafting papers from research repos, structuring arguments, verifying citations, or preparing camera-ready submissions. Includes LaTeX templates, reviewer guidelines, and citation verificatio… | `research/ml-paper-writing` |
-| `polymarket` | Query Polymarket prediction market data — search markets, get prices, orderbooks, and price history. Read-only via public REST APIs, no API key needed. | `research/polymarket` |
+| `powerpoint` | Use this skill any time a .pptx file is involved in any way — as input, output, or both. This includes: creating slide decks, pitch decks, or presentations; reading, parsing, or extracting text from any .pptx file (even if the extracted content will be used elsewhere, like in … | `productivity/powerpoint` |
 
 ## red-teaming
 
@@ -265,7 +202,19 @@ Skills for LLM red-teaming, jailbreaking, and safety filter bypass research.
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `godmode` | Jailbreak API-served LLMs using G0DM0D3 techniques — Parseltongue input obfuscation (33 techniques), GODMODE CLASSIC system prompt templates, ULTRAPLINIAN multi-model racing, encoding escalation, and Hermes-native prefill/system prompt integration. Works on any model accessible via API including closed-source models. | `red-teaming/godmode` |
+| `godmode` | Jailbreak API-served LLMs using G0DM0D3 techniques — Parseltongue input obfuscation (33 techniques), GODMODE CLASSIC system prompt templates, ULTRAPLINIAN multi-model racing, encoding escalation, and Hermes-native prefill/system prompt integration. Use when a user wants to byp… | `red-teaming/godmode` |
+
+## research
+
+Skills for academic research, paper discovery, literature review, market data, content monitoring, and scientific knowledge retrieval.
+
+| Skill | Description | Path |
+|-------|-------------|------|
+| `arxiv` | Search and retrieve academic papers from arXiv using their free REST API. No API key needed. Search by keyword, author, category, or ID. Combine with web_extract or the ocr-and-documents skill to read full paper content. | `research/arxiv` |
+| `blogwatcher` | Monitor blogs and RSS/Atom feeds for updates using the blogwatcher-cli tool. Add blogs, scan for new articles, track read status, and filter by category. | `research/blogwatcher` |
+| `llm-wiki` | Karpathy's LLM Wiki — build and maintain a persistent, interlinked markdown knowledge base. Ingest sources, query compiled knowledge, and lint for consistency. | `research/llm-wiki` |
+| `polymarket` | Query Polymarket prediction market data — search markets, get prices, orderbooks, and price history. Read-only via public REST APIs, no API key needed. | `research/polymarket` |
+| `research-paper-writing` | End-to-end pipeline for writing ML/AI research papers — from experiment design through analysis, drafting, revision, and submission. Covers NeurIPS, ICML, ICLR, ACL, AAAI, COLM. Integrates automated experiment monitoring, statistical analysis, iterative writing, and citation v… | `research/research-paper-writing` |
 
 ## smart-home
 
@@ -281,20 +230,22 @@ Skills for interacting with social platforms — posting, reading, monitoring, a
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `xitter` | Interact with X/Twitter via the x-cli terminal client using official X API credentials. | `social-media/xitter` |
+| `xurl` | Interact with X/Twitter via xurl, the official X API CLI. Use for posting, replying, quoting, searching, timelines, mentions, likes, reposts, bookmarks, follows, DMs, media upload, and raw v2 endpoint access. | `social-media/xurl` |
 
 ## software-development
 
+General software-engineering skills — planning, reviewing, debugging, and test-driven development.
+
 | Skill | Description | Path |
 |-------|-------------|------|
-| `code-review` | Guidelines for performing thorough code reviews with security and quality focus | `software-development/code-review` |
-| `plan` | Plan mode for Hermes — inspect context, write a markdown plan into `.hermes/plans/` in the active workspace/backend working directory, and do not execute the work. | `software-development/plan` |
-| `requesting-code-review` | Use when completing tasks, implementing major features, or before merging. Validates work meets requirements through systematic review process. | `software-development/requesting-code-review` |
+| `plan` | Plan mode for Hermes — inspect context, write a markdown plan into the active workspace's `.hermes/plans/` directory, and do not execute the work. | `software-development/plan` |
+| `requesting-code-review` | Pre-commit verification pipeline — static security scan, baseline-aware quality gates, independent reviewer subagent, and auto-fix loop. Use after code changes and before committing, pushing, or opening a PR. | `software-development/requesting-code-review` |
 | `subagent-driven-development` | Use when executing implementation plans with independent tasks. Dispatches fresh delegate_task per task with two-stage review (spec compliance then code quality). | `software-development/subagent-driven-development` |
 | `systematic-debugging` | Use when encountering any bug, test failure, or unexpected behavior. 4-phase root cause investigation — NO fixes without understanding the problem first. | `software-development/systematic-debugging` |
 | `test-driven-development` | Use when implementing any feature or bugfix, before writing implementation code. Enforces RED-GREEN-REFACTOR cycle with test-first approach. | `software-development/test-driven-development` |
 | `writing-plans` | Use when you have a spec or requirements for a multi-step task. Creates comprehensive implementation plans with bite-sized tasks, exact file paths, and complete code examples. | `software-development/writing-plans` |
 
+
 ---
 
 # Optional Skills
@@ -324,6 +275,7 @@ hermes skills install official/<category>/<skill>
 |-------|-------------|------|
 | `blender-mcp` | Control Blender directly from Hermes via socket connection to the blender-mcp addon. Create 3D objects, materials, animations, and run arbitrary Blender Python (bpy) code. | `creative/blender-mcp` |
 | `meme-generation` | Generate real meme images by picking a template and overlaying text with Pillow. Produces actual .png meme files. | `creative/meme-generation` |
+| `touchdesigner-mcp` | Control a running TouchDesigner instance via the twozero MCP plugin — create operators, set parameters, wire connections, execute Python, build real-time audio-reactive visuals and GLSL networks. 36 native tools. | `creative/touchdesigner-mcp` |
 
 ## devops
 
diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md
index 2ad3c62d81c..79453474fc8 100644
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@@ -35,7 +35,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/queue <prompt>` (alias: `/q`) | Queue a prompt for the next turn (doesn't interrupt the current agent response). **Note:** `/q` is claimed by both `/queue` and `/quit`; the last registration wins, so `/q` resolves to `/quit` in practice. Use `/queue` explicitly. |
 | `/resume [name]` | Resume a previously-named session |
 | `/status` | Show session info |
-| `/snapshot` (alias: `/snap`) | Create or restore state snapshots of Hermes config/state (usage: /snapshot [create\|restore \<id\>\|prune]) |
+| `/agents` (alias: `/tasks`) | Show active agents and running tasks across the current session. |
 | `/background <prompt>` (alias: `/bg`) | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). |
 | `/btw <question>` | Ephemeral side question using session context (no tools, not persisted). Useful for quick clarifications without affecting the conversation history. |
 | `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
@@ -50,9 +50,8 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/provider` | Show available providers and current provider |
 | `/personality` | Set a predefined personality |
 | `/verbose` | Cycle tool progress display: off → new → all → verbose. Can be [enabled for messaging](#notes) via config. |
-| `/fast` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode (usage: /fast [normal\|fast\|status]) |
+| `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. Options: `normal`, `fast`, `status`. |
 | `/reasoning` | Manage reasoning effort and display (usage: /reasoning [level\|show\|hide]) |
-| `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. Options: `normal`, `fast`, `status`, `on`, `off`. |
 | `/skin` | Show or change the display skin/theme |
 | `/statusbar` (alias: `/sb`) | Toggle the context/model status bar on or off |
 | `/voice [on\|off\|tts\|status]` | Toggle CLI voice mode and spoken playback. Recording uses `voice.record_key` (default: `Ctrl+B`). |
@@ -80,9 +79,11 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/insights` | Show usage insights and analytics (last 30 days) |
 | `/platforms` (alias: `/gateway`) | Show gateway/messaging platform status |
 | `/paste` | Check clipboard for an image and attach it |
+| `/copy [number]` | Copy the last assistant response to clipboard (or the Nth-from-last with a number). CLI-only. |
 | `/image <path>` | Attach a local image file for your next prompt. |
 | `/debug` | Upload debug report (system info + logs) and get shareable links. Also available in messaging. |
 | `/profile` | Show active profile name and home directory |
+| `/gquota` | Show Google Gemini Code Assist quota usage with progress bars (only available when the `google-gemini-cli` provider is active). |
 
 ### Exit
 
@@ -150,8 +151,6 @@ The messaging gateway supports the following built-in commands inside Telegram,
 | `/deny` | Reject a pending dangerous command. |
 | `/update` | Update Hermes Agent to the latest version. |
 | `/restart` | Gracefully restart the gateway after draining active runs. When the gateway comes back online, it sends a confirmation to the requester's chat/thread. |
-| `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. |
-| `/debug` | Upload debug report (system info + logs) and get shareable links. |
 | `/debug` | Upload debug report (system info + logs) and get shareable links. |
 | `/help` | Show messaging help. |
 | `/<skill-name>` | Invoke any installed skill by name. |
diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md
index e1138dc00a1..c255c8f6a41 100644
--- a/website/docs/reference/tools-reference.md
+++ b/website/docs/reference/tools-reference.md
@@ -6,9 +6,9 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool
 
 # Built-in Tools Reference
 
-This page documents all 47 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
+This page documents all 53 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
 
-**Quick counts:** 10 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, and 15 standalone tools across other toolsets.
+**Quick counts:** 11 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets.
 
 :::tip MCP Tools
 In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration.
@@ -19,6 +19,7 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server
 | Tool | Description | Requires environment |
 |------|-------------|----------------------|
 | `browser_back` | Navigate back to the previous page in browser history. Requires browser_navigate to be called first. | — |
+| `browser_cdp` | Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for browser operations not covered by browser_navigate, browser_click, browser_console, etc. Only available when a CDP endpoint is reachable at session start — via `/browser connect` or `browser.cdp_url` config. See https://chromedevtools.github.io/devtools-protocol/ | — |
 | `browser_click` | Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first. | — |
 | `browser_console` | Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requi… | — |
 | `browser_get_images` | Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first. | — |
@@ -53,6 +54,25 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server
 |------|-------------|----------------------|
 | `delegate_task` | Spawn one or more subagents to work on tasks in isolated contexts. Each subagent gets its own conversation, terminal session, and toolset. Only the final summary is returned -- intermediate tool results never enter your context window. TWO… | — |
 
+## `feishu_doc` toolset
+
+Scoped to the Feishu document-comment intelligent-reply handler (`gateway/platforms/feishu_comment.py`). Not exposed on `hermes-cli` or the regular Feishu chat adapter.
+
+| Tool | Description | Requires environment |
+|------|-------------|----------------------|
+| `feishu_doc_read` | Read the full text content of a Feishu/Lark document (Docx, Doc, or Sheet) given its file_type and token. | Feishu app credentials |
+
+## `feishu_drive` toolset
+
+Scoped to the Feishu document-comment handler. Drives comment read/write operations on drive files.
+
+| Tool | Description | Requires environment |
+|------|-------------|----------------------|
+| `feishu_drive_add_comment` | Add a top-level comment on a Feishu/Lark document or file. | Feishu app credentials |
+| `feishu_drive_list_comments` | List whole-document comments on a Feishu/Lark file, most recent first. | Feishu app credentials |
+| `feishu_drive_list_comment_replies` | List replies on a specific Feishu comment thread (whole-doc or local-selection). | Feishu app credentials |
+| `feishu_drive_reply_comment` | Post a reply on a Feishu comment thread, with optional `@`-mention. | Feishu app credentials |
+
 ## `file` toolset
 
 | Tool | Description | Requires environment |
diff --git a/website/docs/reference/toolsets-reference.md b/website/docs/reference/toolsets-reference.md
index e941015b6a9..bb911004e19 100644
--- a/website/docs/reference/toolsets-reference.md
+++ b/website/docs/reference/toolsets-reference.md
@@ -52,11 +52,13 @@ Or in-session:
 
 | Toolset | Tools | Purpose |
 |---------|-------|---------|
-| `browser` | `browser_back`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. |
+| `browser` | `browser_back`, `browser_cdp`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. `browser_cdp` is a raw CDP passthrough gated on a reachable CDP endpoint — it only appears when `/browser connect` is active or `browser.cdp_url` is set. |
 | `clarify` | `clarify` | Ask the user a question when the agent needs clarification. |
 | `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. |
 | `cronjob` | `cronjob` | Schedule and manage recurring tasks. |
 | `delegation` | `delegate_task` | Spawn isolated subagent instances for parallel work. |
+| `feishu_doc` | `feishu_doc_read` | Read Feishu/Lark document content. Used by the Feishu document-comment intelligent-reply handler. |
+| `feishu_drive` | `feishu_drive_add_comment`, `feishu_drive_list_comments`, `feishu_drive_list_comment_replies`, `feishu_drive_reply_comment` | Feishu/Lark drive comment operations. Scoped to the comment agent; not exposed on `hermes-cli` or other messaging toolsets. |
 | `file` | `patch`, `read_file`, `search_files`, `write_file` | File reading, writing, searching, and editing. |
 | `homeassistant` | `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services` | Smart home control via Home Assistant. Only available when `HASS_TOKEN` is set. |
 | `image_gen` | `image_generate` | Text-to-image generation via FAL.ai. |
@@ -79,7 +81,7 @@ These expand to multiple core toolsets, providing a convenient shorthand for com
 
 | Toolset | Expands to | Use case |
 |---------|-----------|----------|
-| `debugging` | `patch`, `process`, `read_file`, `search_files`, `terminal`, `web_extract`, `web_search`, `write_file` | Debug sessions — file access, terminal, and web research without browser or delegation overhead. |
+| `debugging` | `web` + `file` + `process`, `terminal` (via `includes`) — effectively `patch`, `process`, `read_file`, `search_files`, `terminal`, `web_extract`, `web_search`, `write_file` | Debug sessions — file access, terminal, and web research without browser or delegation overhead. |
 | `safe` | `image_generate`, `vision_analyze`, `web_extract`, `web_search` | Read-only research and media generation. No file writes, no terminal access, no code execution. Good for untrusted or constrained environments. |
 
 ## Platform Toolsets
@@ -88,7 +90,7 @@ Platform toolsets define the complete tool configuration for a deployment target
 
 | Toolset | Differences from `hermes-cli` |
 |---------|-------------------------------|
-| `hermes-cli` | Full toolset — all 36 tools including `clarify`. The default for interactive CLI sessions. |
+| `hermes-cli` | Full toolset — all 36 core tools including `clarify`. The default for interactive CLI sessions. |
 | `hermes-acp` | Drops `clarify`, `cronjob`, `image_generate`, `send_message`, `text_to_speech`, homeassistant tools. Focused on coding tasks in IDE context. |
 | `hermes-api-server` | Drops `clarify`, `send_message`, and `text_to_speech`. Adds everything else — suitable for programmatic access where user interaction isn't possible. |
 | `hermes-telegram` | Same as `hermes-cli`. |
@@ -100,16 +102,16 @@ Platform toolsets define the complete tool configuration for a deployment target
 | `hermes-mattermost` | Same as `hermes-cli`. |
 | `hermes-email` | Same as `hermes-cli`. |
 | `hermes-sms` | Same as `hermes-cli`. |
-| `hermes-dingtalk` | Same as `hermes-cli`. |
-| `hermes-feishu` | Same as `hermes-cli`. |
-| `hermes-wecom` | Same as `hermes-cli`. |
-| `hermes-wecom-callback` | WeCom callback toolset — enterprise self-built app messaging (full access). |
-| `hermes-weixin` | Same as `hermes-cli`. |
 | `hermes-bluebubbles` | Same as `hermes-cli`. |
+| `hermes-dingtalk` | Same as `hermes-cli`. |
+| `hermes-feishu` | Same as `hermes-cli`. Note: the `feishu_doc` / `feishu_drive` toolsets are used only by the document-comment handler, not by the regular Feishu chat adapter. |
 | `hermes-qqbot` | Same as `hermes-cli`. |
-| `hermes-homeassistant` | Same as `hermes-cli`. |
+| `hermes-wecom` | Same as `hermes-cli`. |
+| `hermes-wecom-callback` | Same as `hermes-cli`. |
+| `hermes-weixin` | Same as `hermes-cli`. |
+| `hermes-homeassistant` | Same as `hermes-cli` plus the `homeassistant` toolset always on. |
 | `hermes-webhook` | Same as `hermes-cli`. |
-| `hermes-gateway` | Union of all messaging platform toolsets. Used internally when the gateway needs the broadest possible tool set. |
+| `hermes-gateway` | Internal gateway orchestrator toolset — union of the broadest possible tool set when the gateway needs to accept any message source. |
 
 ## Dynamic Toolsets
 
@@ -119,11 +121,10 @@ Each configured MCP server generates a `mcp-<server>` toolset at runtime. For ex
 
 ```yaml
 # config.yaml
-mcp:
-  servers:
-    github:
-      command: npx
-      args: ["-y", "@modelcontextprotocol/server-github"]
+mcp_servers:
+  github:
+    command: npx
+    args: ["-y", "@modelcontextprotocol/server-github"]
 ```
 
 This creates a `mcp-github` toolset you can reference in `--toolsets` or platform configs.
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index bef9b5cfd55..f91a25c3843 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -257,7 +257,7 @@ terminal:
   docker_volumes:
     - "/home/user/projects:/workspace/projects"   # Read-write (default)
     - "/home/user/datasets:/data:ro"              # Read-only
-    - "/home/user/outputs:/outputs"               # Agent writes, you read
+    - "/home/user/.hermes/cache/documents:/output" # Gateway-visible exports
 ```
 
 This is useful for:
@@ -265,6 +265,22 @@ This is useful for:
 - **Receiving files** from the agent (generated code, reports, exports)
 - **Shared workspaces** where both you and the agent access the same files
 
+If you use a messaging gateway and want the agent to send generated files via
+`MEDIA:/...`, prefer a dedicated host-visible export mount such as
+`/home/user/.hermes/cache/documents:/output`.
+
+- Write files inside Docker to `/output/...`
+- Emit the **host path** in `MEDIA:`, for example:
+  `MEDIA:/home/user/.hermes/cache/documents/report.txt`
+- Do **not** emit `/workspace/...` or `/output/...` unless that exact path also
+  exists for the gateway process on the host
+
+:::warning
+YAML duplicate keys silently override earlier ones. If you already have a
+`docker_volumes:` block, merge new mounts into the same list instead of adding
+another `docker_volumes:` key later in the file.
+:::
+
 Can also be set via environment variable: `TERMINAL_DOCKER_VOLUMES='["/host:/container"]'` (JSON array).
 
 ### Docker Credential Forwarding
@@ -601,7 +617,7 @@ Every model slot in Hermes — auxiliary tasks, compression, fallback — uses t
 
 When `base_url` is set, Hermes ignores the provider and calls that endpoint directly (using `api_key` or `OPENAI_API_KEY` for auth). When only `provider` is set, Hermes uses that provider's built-in auth and base URL.
 
-Available providers for auxiliary tasks: `auto`, `openrouter`, `nous`, `codex`, `copilot`, `anthropic`, `main`, `zai`, `kimi-coding`, `kimi-coding-cn`, `arcee`, `minimax`, any provider registered in the [provider registry](/docs/reference/environment-variables), or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`).
+Available providers for auxiliary tasks: `auto`, `main`, plus any provider in the [provider registry](/docs/reference/environment-variables) — `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `alibaba`, `bedrock`, `huggingface`, `arcee`, `xiaomi`, `kilocode`, `opencode-zen`, `opencode-go`, `ai-gateway` — or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`).
 
 :::warning `"main"` is for auxiliary tasks only
 The `"main"` provider option means "use whatever provider my main agent uses" — it's only valid inside `auxiliary:`, `compression:`, and `fallback_model:` configs. It is **not** a valid value for your top-level `model.provider` setting. If you use a custom OpenAI-compatible endpoint, set `provider: custom` in your `model:` section. See [AI Providers](/docs/integrations/providers) for all main model provider options.
@@ -851,7 +867,7 @@ agent:
 
 ```yaml
 tts:
-  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "neutts"
+  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "xai" | "neutts"
   speed: 1.0                    # Global speed multiplier (fallback for all providers)
   edge:
     voice: "en-US-AriaNeural"   # 322 voices, 74 languages
@@ -867,6 +883,18 @@ tts:
   minimax:
     speed: 1.0                  # Speech speed multiplier
     # base_url: ""              # Optional: override for OpenAI-compatible TTS endpoints
+  mistral:
+    model: "voxtral-mini-tts-2603"
+    voice_id: "c69964a6-ab8b-4f8a-9465-ec0925096ec8"  # Paul - Neutral (default)
+  gemini:
+    model: "gemini-2.5-flash-preview-tts"   # or gemini-2.5-pro-preview-tts
+    voice: "Kore"               # 30 prebuilt voices: Zephyr, Puck, Kore, Enceladus, etc.
+  xai:
+    voice_id: "eve"             # xAI TTS voice
+    language: "en"              # ISO 639-1
+    sample_rate: 24000
+    bit_rate: 128000            # MP3 bitrate
+    # base_url: "https://api.x.ai/v1"
   neutts:
     ref_audio: ''
     ref_text: ''
@@ -1092,14 +1120,22 @@ human_delay:
 
 ## Code Execution
 
-Configure the sandboxed Python code execution tool:
+Configure the `execute_code` tool:
 
 ```yaml
 code_execution:
+  mode: project                # project (default) | strict
   timeout: 300                 # Max execution time in seconds
   max_tool_calls: 50           # Max tool calls within code execution
 ```
 
+**`mode`** controls the working directory and Python interpreter for scripts:
+
+- **`project`** (default) — scripts run in the session's working directory with the active virtualenv/conda env's python. Project deps (`pandas`, `torch`, project packages) and relative paths (`.env`, `./data.csv`) resolve naturally, matching what `terminal()` sees.
+- **`strict`** — scripts run in a temp staging directory with `sys.executable` (Hermes's own python). Maximum reproducibility, but project deps and relative paths won't resolve.
+
+Environment scrubbing (strips `*_API_KEY`, `*_TOKEN`, `*_SECRET`, `*_PASSWORD`, `*_CREDENTIAL`, `*_PASSWD`, `*_AUTH`) and the tool whitelist apply identically in both modes — switching mode does not change the security posture.
+
 ## Web Search Backends
 
 The `web_search`, `web_extract`, and `web_crawl` tools support four backend providers. Configure the backend in `config.yaml` or via `hermes tools`:
diff --git a/website/docs/user-guide/features/api-server.md b/website/docs/user-guide/features/api-server.md
index ebcb4523e86..82c6db0b2c2 100644
--- a/website/docs/user-guide/features/api-server.md
+++ b/website/docs/user-guide/features/api-server.md
@@ -154,12 +154,64 @@ Delete a stored response.
 
 ### GET /v1/models
 
-Lists the agent as an available model. The advertised model name defaults to the [profile](/docs/user-guide/features/profiles) name (or `hermes-agent` for the default profile). Required by most frontends for model discovery.
+Lists the agent as an available model. The advertised model name defaults to the [profile](/docs/user-guide/profiles) name (or `hermes-agent` for the default profile). Required by most frontends for model discovery.
 
 ### GET /health
 
 Health check. Returns `{"status": "ok"}`. Also available at **GET /v1/health** for OpenAI-compatible clients that expect the `/v1/` prefix.
 
+### GET /health/detailed
+
+Extended health check that also reports active sessions, running agents, and resource usage. Useful for monitoring/observability tooling.
+
+## Runs API (streaming-friendly alternative)
+
+In addition to `/v1/chat/completions` and `/v1/responses`, the server exposes a **runs** API for long-form sessions where the client wants to subscribe to progress events instead of managing streaming themselves.
+
+### POST /v1/runs
+
+Create a new agent run. Returns a `run_id` that can be used to subscribe to progress events.
+
+### GET /v1/runs/\{run_id\}/events
+
+Server-Sent Events stream of the run's tool-call progress, token deltas, and lifecycle events. Designed for dashboards and thick clients that want to attach/detach without losing state.
+
+## Jobs API (background scheduled work)
+
+The server exposes a lightweight jobs CRUD surface for managing scheduled / background agent runs from a remote client. All endpoints are gated behind the same bearer auth.
+
+### GET /api/jobs
+
+List all scheduled jobs.
+
+### POST /api/jobs
+
+Create a new scheduled job. Body accepts the same shape as `hermes cron` — prompt, schedule, skills, provider override, delivery target.
+
+### GET /api/jobs/\{job_id\}
+
+Fetch a single job's definition and last-run state.
+
+### PATCH /api/jobs/\{job_id\}
+
+Update fields on an existing job (prompt, schedule, etc.). Partial updates are merged.
+
+### DELETE /api/jobs/\{job_id\}
+
+Remove a job. Also cancels any in-flight run.
+
+### POST /api/jobs/\{job_id\}/pause
+
+Pause a job without deleting it. Next-scheduled-run timestamps are suspended until resumed.
+
+### POST /api/jobs/\{job_id\}/resume
+
+Resume a previously paused job.
+
+### POST /api/jobs/\{job_id\}/run
+
+Trigger the job to run immediately, out of schedule.
+
 ## System Prompt Handling
 
 When a frontend sends a `system` message (Chat Completions) or `instructions` field (Responses API), hermes-agent **layers it on top** of its core system prompt. Your agent keeps all its tools, memory, and skills — the frontend's system prompt adds extra instructions.
@@ -247,7 +299,7 @@ Any frontend that supports the OpenAI API format works. Tested/documented integr
 
 ## Multi-User Setup with Profiles
 
-To give multiple users their own isolated Hermes instance (separate config, memory, skills), use [profiles](/docs/user-guide/features/profiles):
+To give multiple users their own isolated Hermes instance (separate config, memory, skills), use [profiles](/docs/user-guide/profiles):
 
 ```bash
 # Create a profile per user
diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md
index 9880965ae48..d6624bf7d11 100644
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@@ -111,16 +111,49 @@ When `CAMOFOX_URL` is set, all browser tools automatically route through Camofox
 
 #### Persistent browser sessions
 
-By default, each Camofox session gets a random identity — cookies and logins don't survive across agent restarts. To enable persistent browser sessions:
+By default, each Camofox session gets a random identity — cookies and logins don't survive across agent restarts. To enable persistent browser sessions, add the following to `~/.hermes/config.yaml`:
 
 ```yaml
-# In ~/.hermes/config.yaml
 browser:
   camofox:
     managed_persistence: true
 ```
 
-When enabled, Hermes sends a stable profile-scoped `userId` to Camofox. The Camofox server automatically maps each `userId` to a dedicated persistent Firefox profile, so cookies, logins, and localStorage survive across restarts. Different Hermes profiles get different browser profiles (profile isolation).
+Then fully restart Hermes so the new config is picked up.
+
+:::warning Nested path matters
+Hermes reads `browser.camofox.managed_persistence`, **not** a top-level `managed_persistence`. A common mistake is writing:
+
+```yaml
+# ❌ Wrong — Hermes ignores this
+managed_persistence: true
+```
+
+If the flag is placed at the wrong path, Hermes silently falls back to a random ephemeral `userId` and your login state will be lost on every session.
+:::
+
+##### What Hermes does
+- Sends a deterministic profile-scoped `userId` to Camofox so the server can reuse the same Firefox profile across sessions.
+- Skips server-side context destruction on cleanup, so cookies and logins survive between agent tasks.
+- Scopes the `userId` to the active Hermes profile, so different Hermes profiles get different browser profiles (profile isolation).
+
+##### What Hermes does not do
+- It does not force persistence on the Camofox server. Hermes only sends a stable `userId`; the server must honor it by mapping that `userId` to a persistent Firefox profile directory.
+- If your Camofox server build treats every request as ephemeral (e.g. always calls `browser.newContext()` without loading a stored profile), Hermes cannot make those sessions persist. Make sure you are running a Camofox build that implements userId-based profile persistence.
+
+##### Verify it's working
+
+1. Start Hermes and your Camofox server.
+2. Open Google (or any login site) in a browser task and sign in manually.
+3. End the browser task normally.
+4. Start a new browser task.
+5. Open the same site again — you should still be signed in.
+
+If step 5 logs you out, the Camofox server isn't honoring the stable `userId`. Double-check your config path, confirm you fully restarted Hermes after editing `config.yaml`, and verify your Camofox server version supports persistent per-user profiles.
+
+##### Where state lives
+
+Hermes derives the stable `userId` from the profile-scoped directory `~/.hermes/browser_auth/camofox/` (or the equivalent under `$HERMES_HOME` for non-default profiles). The actual browser profile data lives on the Camofox server side, keyed by that `userId`. To fully reset a persistent profile, clear it on the Camofox server and remove the corresponding Hermes profile's state directory.
 
 #### VNC live view
 
@@ -130,6 +163,10 @@ When Camofox runs in headed mode (with a visible browser window), it exposes a V
 
 Instead of a cloud provider, you can attach Hermes browser tools to your own running Chrome instance via the Chrome DevTools Protocol (CDP). This is useful when you want to see what the agent is doing in real-time, interact with pages that require your own cookies/sessions, or avoid cloud browser costs.
 
+:::note
+`/browser connect` is an **interactive-CLI slash command** — it is not dispatched by the gateway. If you try to run it inside a WebUI, Telegram, Discord, or other gateway chat, the message will be sent to the agent as plain text and the command will not execute. Start Hermes from the terminal (`hermes` or `hermes chat`) and issue `/browser connect` there.
+:::
+
 In the CLI, use:
 
 ```
@@ -142,14 +179,27 @@ In the CLI, use:
 If Chrome isn't already running with remote debugging, Hermes will attempt to auto-launch it with `--remote-debugging-port=9222`.
 
 :::tip
-To start Chrome manually with CDP enabled:
+To start Chrome manually with CDP enabled, use a dedicated user-data-dir so the debug port actually comes up even if Chrome is already running with your normal profile:
+
 ```bash
 # Linux
-google-chrome --remote-debugging-port=9222
+google-chrome \
+  --remote-debugging-port=9222 \
+  --user-data-dir=$HOME/.hermes/chrome-debug \
+  --no-first-run \
+  --no-default-browser-check &
 
 # macOS
-"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --remote-debugging-port=9222
+"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
+  --remote-debugging-port=9222 \
+  --user-data-dir="$HOME/.hermes/chrome-debug" \
+  --no-first-run \
+  --no-default-browser-check &
 ```
+
+Then launch the Hermes CLI and run `/browser connect`.
+
+**Why `--user-data-dir`?** Without it, launching Chrome while a regular Chrome instance is already running typically opens a new window on the existing process — and that existing process was not started with `--remote-debugging-port`, so port 9222 never opens. A dedicated user-data-dir forces a fresh Chrome process where the debug port actually listens. `--no-first-run --no-default-browser-check` skips the first-launch wizard for the fresh profile.
 :::
 
 When connected via CDP, all browser tools (`browser_navigate`, `browser_click`, etc.) operate on your live Chrome instance instead of spinning up a cloud session.
@@ -277,6 +327,36 @@ Check the browser console for any JavaScript errors
 
 Use `clear=True` to clear the console after reading, so subsequent calls only show new messages.
 
+### `browser_cdp`
+
+Raw Chrome DevTools Protocol passthrough — the escape hatch for browser operations not covered by the other tools. Use for native dialog handling, iframe-scoped evaluation, cookie/network control, or any CDP verb the agent needs.
+
+**Only available when a CDP endpoint is reachable at session start** — meaning `/browser connect` has attached to a running Chrome, or `browser.cdp_url` is set in `config.yaml`. The default local agent-browser mode, Camofox, and cloud providers (Browserbase, Browser Use, Firecrawl) do not currently expose CDP to this tool — cloud providers have per-session CDP URLs but live-session routing is a follow-up.
+
+**CDP method reference:** https://chromedevtools.github.io/devtools-protocol/ — the agent can `web_extract` a specific method's page to look up parameters and return shape.
+
+Common patterns:
+
+```
+# List tabs (browser-level, no target_id)
+browser_cdp(method="Target.getTargets")
+
+# Handle a native JS dialog on a tab
+browser_cdp(method="Page.handleJavaScriptDialog",
+            params={"accept": true, "promptText": ""},
+            target_id="<tabId>")
+
+# Evaluate JS in a specific tab
+browser_cdp(method="Runtime.evaluate",
+            params={"expression": "document.title", "returnByValue": true},
+            target_id="<tabId>")
+
+# Get all cookies
+browser_cdp(method="Network.getAllCookies")
+```
+
+Browser-level methods (`Target.*`, `Browser.*`, `Storage.*`) omit `target_id`. Page-level methods (`Page.*`, `Runtime.*`, `DOM.*`, `Emulation.*`) require a `target_id` from `Target.getTargets`. Each call is independent — sessions do not persist between calls.
+
 ## Practical Examples
 
 ### Filling Out a Web Form
diff --git a/website/docs/user-guide/features/code-execution.md b/website/docs/user-guide/features/code-execution.md
index 53668da9010..4deae296220 100644
--- a/website/docs/user-guide/features/code-execution.md
+++ b/website/docs/user-guide/features/code-execution.md
@@ -1,12 +1,12 @@
 ---
 sidebar_position: 8
 title: "Code Execution"
-description: "Sandboxed Python execution with RPC tool access — collapse multi-step workflows into a single turn"
+description: "Programmatic Python execution with RPC tool access — collapse multi-step workflows into a single turn"
 ---
 
 # Code Execution (Programmatic Tool Calling)
 
-The `execute_code` tool lets the agent write Python scripts that call Hermes tools programmatically, collapsing multi-step workflows into a single LLM turn. The script runs in a sandboxed child process on the agent host, communicating via Unix domain socket RPC.
+The `execute_code` tool lets the agent write Python scripts that call Hermes tools programmatically, collapsing multi-step workflows into a single LLM turn. The script runs in a child process on the agent host, communicating with Hermes over a Unix domain socket RPC.
 
 ## How It Works
 
@@ -27,7 +27,7 @@ for r in results["data"]["web"]:
 print(summary)
 ```
 
-**Available tools in sandbox:** `web_search`, `web_extract`, `read_file`, `write_file`, `search_files`, `patch`, `terminal` (foreground only).
+**Available tools inside scripts:** `web_search`, `web_extract`, `read_file`, `write_file`, `search_files`, `patch`, `terminal` (foreground only).
 
 ## When the Agent Uses This
 
@@ -126,6 +126,35 @@ report = {
 print(json.dumps(report, indent=2))
 ```
 
+## Execution Mode
+
+`execute_code` has two execution modes controlled by `code_execution.mode` in `~/.hermes/config.yaml`:
+
+| Mode | Working directory | Python interpreter |
+|------|-------------------|--------------------|
+| **`project`** (default) | The session's working directory (same as `terminal()`) | Active `VIRTUAL_ENV` / `CONDA_PREFIX` python, falling back to Hermes's own python |
+| `strict` | A temp staging directory isolated from the user's project | `sys.executable` (Hermes's own python) |
+
+**When to leave it on `project`:** you want `import pandas`, `from my_project import foo`, or relative paths like `open(".env")` to work the same way they do in `terminal()`. This is almost always what you want.
+
+**When to flip to `strict`:** you need maximum reproducibility — you want the same interpreter every session regardless of which venv the user activated, and you want scripts quarantined from the project tree (no risk of accidentally reading project files through a relative path).
+
+```yaml
+# ~/.hermes/config.yaml
+code_execution:
+  mode: project   # or "strict"
+```
+
+Fallback behavior in `project` mode: if `VIRTUAL_ENV` / `CONDA_PREFIX` is unset, broken, or points at a Python older than 3.8, the resolver falls back cleanly to `sys.executable` — it never leaves the agent without a working interpreter.
+
+Security-critical invariants are identical across both modes:
+
+- environment scrubbing (API keys, tokens, credentials stripped)
+- tool whitelist (scripts cannot call `execute_code` recursively, `delegate_task`, or MCP tools)
+- resource limits (timeout, stdout cap, tool-call cap)
+
+Switching mode changes where scripts run and which interpreter runs them, not what credentials they can see or which tools they can call.
+
 ## Resource Limits
 
 | Resource | Limit | Notes |
@@ -140,6 +169,7 @@ All limits are configurable via `config.yaml`:
 ```yaml
 # In ~/.hermes/config.yaml
 code_execution:
+  mode: project      # project (default) | strict
   timeout: 300       # Max seconds per script (default: 300)
   max_tool_calls: 50 # Max tool calls per execution (default: 50)
 ```
@@ -176,7 +206,7 @@ Environment variables containing `KEY`, `TOKEN`, `SECRET`, `PASSWORD`, `CREDENTI
 
 ### Skill Environment Variable Passthrough
 
-When a skill declares `required_environment_variables` in its frontmatter, those variables are **automatically passed through** to both `execute_code` and `terminal` sandboxes after the skill is loaded. This lets skills use their declared API keys without weakening the security posture for arbitrary code.
+When a skill declares `required_environment_variables` in its frontmatter, those variables are **automatically passed through** to both `execute_code` and `terminal` child processes after the skill is loaded. This lets skills use their declared API keys without weakening the security posture for arbitrary code.
 
 For non-skill use cases, you can explicitly allowlist variables in `config.yaml`:
 
@@ -189,7 +219,7 @@ terminal:
 
 See the [Security guide](/docs/user-guide/security#environment-variable-passthrough) for full details.
 
-The script runs in a temporary directory that is cleaned up after execution. The child process runs in its own process group so it can be cleanly killed on timeout or interruption.
+Hermes always writes the script and the auto-generated `hermes_tools.py` RPC stub into a temp staging directory that is cleaned up after execution. In `strict` mode the script also *runs* there; in `project` mode it runs in the session's working directory (the staging directory stays on `PYTHONPATH` so imports still resolve). The child process runs in its own process group so it can be cleanly killed on timeout or interruption.
 
 ## execute_code vs terminal
 
diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md
index 222c00827c2..4628fcc639a 100644
--- a/website/docs/user-guide/features/cron.md
+++ b/website/docs/user-guide/features/cron.md
@@ -30,7 +30,7 @@ Cron-run sessions cannot recursively create more cron jobs. Hermes disables cron
 /cron add 30m "Remind me to check the build"
 /cron add "every 2h" "Check server status"
 /cron add "every 1h" "Summarize new feed items" --skill blogwatcher
-/cron add "every 1h" "Use both skills and combine the result" --skill blogwatcher --skill find-nearby
+/cron add "every 1h" "Use both skills and combine the result" --skill blogwatcher --skill maps
 ```
 
 ### From the standalone CLI
@@ -40,7 +40,7 @@ hermes cron create "every 2h" "Check server status"
 hermes cron create "every 1h" "Summarize new feed items" --skill blogwatcher
 hermes cron create "every 1h" "Use both skills and combine the result" \
   --skill blogwatcher \
-  --skill find-nearby \
+  --skill maps \
   --name "Skill combo"
 ```
 
@@ -77,7 +77,7 @@ Skills are loaded in order. The prompt becomes the task instruction layered on t
 ```python
 cronjob(
     action="create",
-    skills=["blogwatcher", "find-nearby"],
+    skills=["blogwatcher", "maps"],
     prompt="Look for new local events and interesting nearby places, then combine them into one short brief.",
     schedule="every 6h",
     name="Local brief",
@@ -95,7 +95,7 @@ You do not need to delete and recreate jobs just to change them.
 ```bash
 /cron edit <job_id> --schedule "every 4h"
 /cron edit <job_id> --prompt "Use the revised task"
-/cron edit <job_id> --skill blogwatcher --skill find-nearby
+/cron edit <job_id> --skill blogwatcher --skill maps
 /cron edit <job_id> --remove-skill blogwatcher
 /cron edit <job_id> --clear-skills
 ```
@@ -105,8 +105,8 @@ You do not need to delete and recreate jobs just to change them.
 ```bash
 hermes cron edit <job_id> --schedule "every 4h"
 hermes cron edit <job_id> --prompt "Use the revised task"
-hermes cron edit <job_id> --skill blogwatcher --skill find-nearby
-hermes cron edit <job_id> --add-skill find-nearby
+hermes cron edit <job_id> --skill blogwatcher --skill maps
+hermes cron edit <job_id> --add-skill maps
 hermes cron edit <job_id> --remove-skill blogwatcher
 hermes cron edit <job_id> --clear-skills
 ```
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index 12fde185d46..01e5524f6a4 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -48,6 +48,12 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
 | MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` |
 | DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
 | NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
+| Ollama Cloud | `ollama-cloud` | `OLLAMA_API_KEY` |
+| Google Gemini (OAuth) | `google-gemini-cli` | `hermes model` (Google OAuth; optional: `HERMES_GEMINI_PROJECT_ID`) |
+| Google AI Studio | `gemini` | `GOOGLE_API_KEY` (alias: `GEMINI_API_KEY`) |
+| xAI (Grok) | `xai` (alias `grok`) | `XAI_API_KEY` (optional: `XAI_BASE_URL`) |
+| AWS Bedrock | `bedrock` | Standard boto3 auth (`AWS_REGION` + `AWS_PROFILE` or `AWS_ACCESS_KEY_ID`) |
+| Qwen Portal (OAuth) | `qwen-oauth` | `hermes model` (Qwen Portal OAuth; optional: `HERMES_QWEN_BASE_URL`) |
 | OpenCode Zen | `opencode-zen` | `OPENCODE_ZEN_API_KEY` |
 | OpenCode Go | `opencode-go` | `OPENCODE_GO_API_KEY` |
 | Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
@@ -55,18 +61,18 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
 | Arcee AI | `arcee` | `ARCEEAI_API_KEY` |
 | Alibaba / DashScope | `alibaba` | `DASHSCOPE_API_KEY` |
 | Hugging Face | `huggingface` | `HF_TOKEN` |
-| Custom endpoint | `custom` | `base_url` + `api_key_env` (see below) |
+| Custom endpoint | `custom` | `base_url` + `key_env` (see below) |
 
 ### Custom Endpoint Fallback
 
-For a custom OpenAI-compatible endpoint, add `base_url` and optionally `api_key_env`:
+For a custom OpenAI-compatible endpoint, add `base_url` and optionally `key_env`:
 
 ```yaml
 fallback_model:
   provider: custom
   model: my-local-model
   base_url: http://localhost:8000/v1
-  api_key_env: MY_LOCAL_KEY          # env var name containing the API key
+  key_env: MY_LOCAL_KEY              # env var name containing the API key
 ```
 
 ### When Fallback Triggers
@@ -122,7 +128,7 @@ fallback_model:
   provider: custom
   model: llama-3.1-70b
   base_url: http://localhost:8000/v1
-  api_key_env: LOCAL_API_KEY
+  key_env: LOCAL_API_KEY
 ```
 
 **Codex OAuth as fallback:**
@@ -163,6 +169,8 @@ Hermes uses separate lightweight models for side tasks. Each task has its own pr
 | Skills Hub | Skill search and discovery | `auxiliary.skills_hub` |
 | MCP | MCP helper operations | `auxiliary.mcp` |
 | Memory Flush | Memory consolidation | `auxiliary.flush_memories` |
+| Approval | Smart command-approval classification | `auxiliary.approval` |
+| Title Generation | Session title summaries | `auxiliary.title_generation` |
 
 ### Auto-Detection Chain
 
@@ -336,5 +344,7 @@ See [Scheduled Tasks (Cron)](/docs/user-guide/features/cron) for full configurat
 | Skills hub | Auto-detection chain | `auxiliary.skills_hub` |
 | MCP helpers | Auto-detection chain | `auxiliary.mcp` |
 | Memory flush | Auto-detection chain | `auxiliary.flush_memories` |
+| Approval classification | Auto-detection chain | `auxiliary.approval` |
+| Title generation | Auto-detection chain | `auxiliary.title_generation` |
 | Delegation | Provider override only (no automatic fallback) | `delegation.provider` / `delegation.model` |
 | Cron jobs | Per-job provider override only (no automatic fallback) | Per-job `provider` / `model` |
diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index 2040949d258..60e82b4b08f 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs:
 | Knob | Controls | Default |
 |------|----------|---------|
 | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` |
-| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `3` |
+| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `2` (recommended 1–5) |
 | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` |
 
 These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes.
@@ -94,6 +94,14 @@ Each pass uses a proportional reasoning level (lighter early passes, base level
 
 Passes bail out early if the prior pass returned strong signal (long, structured output), so depth 3 doesn't always mean 3 LLM calls.
 
+### Session-Start Prewarm
+
+On session init, Honcho fires a dialectic call in the background at the full configured `dialecticDepth` and hands the result directly to turn 1's context assembly. A single-pass prewarm on a cold peer often returns thin output — multi-pass depth runs the audit/reconcile cycle before the user ever speaks. If prewarm hasn't landed by turn 1, turn 1 falls back to a synchronous call with a bounded timeout.
+
+### Query-Adaptive Reasoning Level
+
+The auto-injected dialectic scales `dialecticReasoningLevel` by query length: +1 level at ≥120 chars, +2 at ≥400, clamped at `reasoningLevelCap` (default `"high"`). Disable with `reasoningHeuristic: false` to pin every auto call to `dialecticReasoningLevel`. Available levels: `minimal`, `low`, `medium`, `high`, `max`.
+
 ## Configuration Options
 
 Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho.json` (profile-local). The setup wizard handles this for you.
@@ -104,7 +112,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho
 |-----|---------|-------------|
 | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries |
 | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
-| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly |
+| `dialecticCadence` | `2` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). Recommended 1–5. In `tools` mode, irrelevant — model calls explicitly |
 | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 |
 | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
 | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
@@ -142,6 +150,41 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho
 
 In `tools` mode, the model is fully in control — it calls `honcho_reasoning` when it wants, at whatever `reasoning_level` it picks. Cadence and budget settings only apply to modes with auto-injection (`hybrid` and `context`).
 
+## Observation (Directional vs. Unified)
+
+Honcho models a conversation as peers exchanging messages. Each peer has two observation toggles that map 1:1 to Honcho's `SessionPeerConfig`:
+
+| Toggle | Effect |
+|--------|--------|
+| `observeMe` | Honcho builds a representation of this peer from its own messages |
+| `observeOthers` | This peer observes the other peer's messages (feeds cross-peer reasoning) |
+
+Two peers × two toggles = four flags. `observationMode` is a shorthand preset:
+
+| Preset | User flags | AI flags | Semantics |
+|--------|-----------|----------|-----------|
+| `"directional"` (default) | me: on, others: on | me: on, others: on | Full mutual observation. Enables cross-peer dialectic — "what does the AI know about the user, based on what the user said and the AI replied." |
+| `"unified"` | me: on, others: off | me: off, others: on | Shared-pool semantics — the AI observes the user's messages only, the user peer only self-models. Single-observer pool. |
+
+Override the preset with an explicit `observation` block for per-peer control:
+
+```json
+"observation": {
+  "user": { "observeMe": true,  "observeOthers": true },
+  "ai":   { "observeMe": true,  "observeOthers": false }
+}
+```
+
+Common patterns:
+
+| Intent | Config |
+|--------|--------|
+| Full observation (most users) | `"observationMode": "directional"` |
+| AI shouldn't re-model the user from its own replies | `"ai": {"observeMe": true, "observeOthers": false}` |
+| Strong persona the AI peer shouldn't update from self-observation | `"ai": {"observeMe": false, "observeOthers": true}` |
+
+Server-side toggles set via the [Honcho dashboard](https://app.honcho.dev) win over local defaults — Hermes syncs them back at session init.
+
 ## Tools
 
 When Honcho is active as the memory provider, five tools become available:
diff --git a/website/docs/user-guide/features/hooks.md b/website/docs/user-guide/features/hooks.md
index c1c7ef05bf7..a64f3220956 100644
--- a/website/docs/user-guide/features/hooks.md
+++ b/website/docs/user-guide/features/hooks.md
@@ -243,6 +243,8 @@ def register(ctx):
 | [`post_llm_call`](#post_llm_call) | Once per turn, after the tool-calling loop | ignored |
 | [`on_session_start`](#on_session_start) | New session created (first turn only) | ignored |
 | [`on_session_end`](#on_session_end) | Session ends | ignored |
+| [`on_session_finalize`](#on_session_finalize) | CLI/gateway tears down an active session (flush, save, stats) | ignored |
+| [`on_session_reset`](#on_session_reset) | Gateway swaps in a fresh session key (e.g. `/new`, `/reset`) | ignored |
 
 ---
 
@@ -600,4 +602,50 @@ def register(ctx):
 
 ---
 
+### `on_session_finalize`
+
+Fires when the CLI or gateway **tears down** an active session — for example, when the user runs `/new`, the gateway GC'd an idle session, or the CLI quit with an active agent. This is the last chance to flush state tied to the outgoing session before its identity is gone.
+
+**Callback signature:**
+
+```python
+def my_callback(session_id: str | None, platform: str, **kwargs):
+```
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `session_id` | `str` or `None` | The outgoing session ID. May be `None` if no active session existed. |
+| `platform` | `str` | `"cli"` or the messaging platform name (`"telegram"`, `"discord"`, etc.). |
+
+**Fires:** In `cli.py` (on `/new` / CLI exit) and `gateway/run.py` (when a session is reset or GC'd). Always paired with `on_session_reset` on the gateway side.
+
+**Return value:** Ignored.
+
+**Use cases:** Persist final session metrics before the session ID is discarded, close per-session resources, emit a final telemetry event, drain queued writes.
+
+---
+
+### `on_session_reset`
+
+Fires when the gateway **swaps in a new session key** for an active chat — the user invoked `/new`, `/reset`, `/clear`, or the adapter picked a fresh session after an idle window. This lets plugins react to the fact that conversation state has been wiped without waiting for the next `on_session_start`.
+
+**Callback signature:**
+
+```python
+def my_callback(session_id: str, platform: str, **kwargs):
+```
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `session_id` | `str` | The new session's ID (already rotated to the fresh value). |
+| `platform` | `str` | The messaging platform name. |
+
+**Fires:** In `gateway/run.py`, immediately after the new session key is allocated but before the next inbound message is processed. On the gateway, the order is: `on_session_finalize(old_id)` → swap → `on_session_reset(new_id)` → `on_session_start(new_id)` on the first inbound turn.
+
+**Return value:** Ignored.
+
+**Use cases:** Reset per-session caches keyed by `session_id`, emit "session rotated" analytics, prime a fresh state bucket.
+
+---
+
 See the **[Build a Plugin guide](/docs/guides/build-a-hermes-plugin)** for the full walkthrough including tool schemas, handlers, and advanced hook patterns.
diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md
index f571c7d48f1..d11c36657a3 100644
--- a/website/docs/user-guide/features/memory-providers.md
+++ b/website/docs/user-guide/features/memory-providers.md
@@ -82,7 +82,7 @@ hermes memory setup        # select "honcho"
 | `workspace` | host key | Shared workspace ID |
 | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries |
 | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
-| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes |
+| `dialecticCadence` | `2` | Minimum turns between `peer.chat()` LLM calls. Recommended 1–5. Only applies to `hybrid`/`context` modes |
 | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation |
 | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
 | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
@@ -140,23 +140,64 @@ hermes memory setup        # select "honcho"
 If you previously used `hermes honcho setup`, your config and all server-side data are intact. Just re-enable through the setup wizard again or manually set `memory.provider: honcho` to reactivate via the new system.
 :::
 
-**Multi-agent / Profiles:**
+**Multi-peer setup:**
 
-Each Hermes profile gets its own Honcho AI peer while sharing the same workspace -- all profiles see the same user representation, but each agent builds its own identity and observations.
+Honcho models conversations as peers exchanging messages — one user peer plus one AI peer per Hermes profile, all sharing a workspace. The workspace is the shared environment: the user peer is global across profiles, each AI peer is its own identity. Every AI peer builds an independent representation / card from its own observations, so a `coder` profile stays code-oriented while a `writer` profile stays editorial against the same user.
+
+The mapping:
+
+| Concept | What it is |
+|---------|-----------|
+| **Workspace** | Shared environment. All Hermes profiles under one workspace see the same user identity. |
+| **User peer** (`peerName`) | The human. Shared across profiles in the workspace. |
+| **AI peer** (`aiPeer`) | One per Hermes profile. Host key `hermes` → default; `hermes.<profile>` for others. |
+| **Observation** | Per-peer toggles controlling what Honcho models from whose messages. `directional` (default, all four on) or `unified` (single-observer pool). |
+
+### New profile, fresh Honcho peer
 
 ```bash
-hermes profile create coder --clone   # creates honcho peer "coder", inherits config from default
+hermes profile create coder --clone
 ```
 
-What `--clone` does: creates a `hermes.coder` host block in `honcho.json` with `aiPeer: "coder"`, shared `workspace`, inherited `peerName`, `recallMode`, `writeFrequency`, `observation`, etc. The peer is eagerly created in Honcho so it exists before first message.
+`--clone` creates a `hermes.coder` host block in `honcho.json` with `aiPeer: "coder"`, shared `workspace`, inherited `peerName`, `recallMode`, `writeFrequency`, `observation`, etc. The AI peer is eagerly created in Honcho so it exists before the first message.
 
-For profiles created before Honcho was set up:
+### Existing profiles, backfill Honcho peers
 
 ```bash
-hermes honcho sync   # scans all profiles, creates host blocks for any missing ones
+hermes honcho sync
 ```
 
-This inherits settings from the default `hermes` host block and creates new AI peers for each profile. Idempotent -- skips profiles that already have a host block.
+Scans every Hermes profile, creates host blocks for any profile without one, inherits settings from the default `hermes` block, and creates the new AI peers eagerly. Idempotent — skips profiles that already have a host block.
+
+### Per-profile observation
+
+Each host block can override the observation config independently. Example: a code-focused profile where the AI peer observes the user but doesn't self-model:
+
+```json
+"hermes.coder": {
+  "aiPeer": "coder",
+  "observation": {
+    "user": { "observeMe": true, "observeOthers": true },
+    "ai":   { "observeMe": false, "observeOthers": true }
+  }
+}
+```
+
+**Observation toggles (one set per peer):**
+
+| Toggle | Effect |
+|--------|--------|
+| `observeMe` | Honcho builds a representation of this peer from its own messages |
+| `observeOthers` | This peer observes the other peer's messages (feeds cross-peer reasoning) |
+
+Presets via `observationMode`:
+
+- **`"directional"`** (default) — all four flags on. Full mutual observation; enables cross-peer dialectic.
+- **`"unified"`** — user `observeMe: true`, AI `observeOthers: true`, rest false. Single-observer pool; AI models the user but not itself, user peer only self-models.
+
+Server-side toggles set via the [Honcho dashboard](https://app.honcho.dev) win over local defaults — synced back at session init.
+
+See the [Honcho page](./honcho.md#observation-directional-vs-unified) for the full observation reference.
 
 <details>
 <summary>Full honcho.json example (multi-profile)</summary>
@@ -181,7 +222,7 @@ This inherits settings from the default `hermes` host block and creates new AI p
       },
       "dialecticReasoningLevel": "low",
       "dialecticDynamic": true,
-      "dialecticCadence": 3,
+      "dialecticCadence": 2,
       "dialecticDepth": 1,
       "dialecticMaxChars": 600,
       "contextCadence": 1,
diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md
index 9b0fe8b3afc..6f7fc895062 100644
--- a/website/docs/user-guide/features/tts.md
+++ b/website/docs/user-guide/features/tts.md
@@ -14,7 +14,7 @@ If you have a paid [Nous Portal](https://portal.nousresearch.com) subscription,
 
 ## Text-to-Speech
 
-Convert text to speech with seven providers:
+Convert text to speech with eight providers:
 
 | Provider | Quality | Cost | API Key |
 |----------|---------|------|---------|
@@ -24,6 +24,7 @@ Convert text to speech with seven providers:
 | **MiniMax TTS** | Excellent | Paid | `MINIMAX_API_KEY` |
 | **Mistral (Voxtral TTS)** | Excellent | Paid | `MISTRAL_API_KEY` |
 | **Google Gemini TTS** | Excellent | Free tier | `GEMINI_API_KEY` |
+| **xAI TTS** | Excellent | Paid | `XAI_API_KEY` |
 | **NeuTTS** | Good | Free | None needed |
 
 ### Platform Delivery
@@ -40,7 +41,7 @@ Convert text to speech with seven providers:
 ```yaml
 # In ~/.hermes/config.yaml
 tts:
-  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "neutts"
+  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "xai" | "neutts"
   speed: 1.0                    # Global speed multiplier (provider-specific settings override this)
   edge:
     voice: "en-US-AriaNeural"   # 322 voices, 74 languages
@@ -65,6 +66,12 @@ tts:
   gemini:
     model: "gemini-2.5-flash-preview-tts"  # or gemini-2.5-pro-preview-tts
     voice: "Kore"               # 30 prebuilt voices: Zephyr, Puck, Kore, Enceladus, Gacrux, etc.
+  xai:
+    voice_id: "eve"             # xAI TTS voice (see https://docs.x.ai/docs/api-reference#tts)
+    language: "en"              # ISO 639-1 code
+    sample_rate: 24000          # 22050 / 24000 (default) / 44100 / 48000
+    bit_rate: 128000            # MP3 bitrate; only applies when codec=mp3
+    # base_url: "https://api.x.ai/v1"   # Override via XAI_BASE_URL env var
   neutts:
     ref_audio: ''
     ref_text: ''
@@ -82,6 +89,7 @@ Telegram voice bubbles require Opus/OGG audio format:
 - **Edge TTS** (default) outputs MP3 and needs **ffmpeg** to convert:
 - **MiniMax TTS** outputs MP3 and needs **ffmpeg** to convert for Telegram voice bubbles
 - **Google Gemini TTS** outputs raw PCM and uses **ffmpeg** to encode Opus directly for Telegram voice bubbles
+- **xAI TTS** outputs MP3 and needs **ffmpeg** to convert for Telegram voice bubbles
 - **NeuTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles
 
 ```bash
diff --git a/website/docs/user-guide/messaging/dingtalk.md b/website/docs/user-guide/messaging/dingtalk.md
index d88c1a952f6..9e8e74ee26f 100644
--- a/website/docs/user-guide/messaging/dingtalk.md
+++ b/website/docs/user-guide/messaging/dingtalk.md
@@ -100,7 +100,14 @@ Run the guided setup command:
 hermes gateway setup
 ```
 
-Select **DingTalk** when prompted, then paste your Client ID, Client Secret, and allowed user IDs when asked.
+Select **DingTalk** when prompted. The setup wizard can authorize via one of two paths:
+
+- **QR-code device flow (recommended).** Scan the QR that prints in your terminal with the DingTalk mobile app — your Client ID and Client Secret are returned automatically and written to `~/.hermes/.env`. No developer-console trip needed.
+- **Manual paste.** If you already have credentials (or QR scanning isn't convenient), paste your Client ID, Client Secret, and allowed user IDs when prompted.
+
+:::note openClaw branding disclosure
+Because DingTalk's `verification_uri_complete` is hardcoded to the openClaw identity at the API layer, the QR currently authorizes under an `openClaw` source string until Alibaba / DingTalk-Real-AI registers a Hermes-specific template server-side. This is purely how DingTalk presents the consent screen — the bot you create is fully yours and private to your tenant.
+:::
 
 ### Option B: Manual Configuration
 
diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md
index 233f544d9c6..0efe909b0d1 100644
--- a/website/docs/user-guide/messaging/discord.md
+++ b/website/docs/user-guide/messaging/discord.md
@@ -271,7 +271,8 @@ Discord behavior is controlled through two files: **`~/.hermes/.env`** for crede
 | Variable | Required | Default | Description |
 |----------|----------|---------|-------------|
 | `DISCORD_BOT_TOKEN` | **Yes** | — | Bot token from the [Discord Developer Portal](https://discord.com/developers/applications). |
-| `DISCORD_ALLOWED_USERS` | **Yes** | — | Comma-separated Discord user IDs allowed to interact with the bot. Without this, the gateway denies all users. |
+| `DISCORD_ALLOWED_USERS` | **Yes** | — | Comma-separated Discord user IDs allowed to interact with the bot. Without this **or** `DISCORD_ALLOWED_ROLES`, the gateway denies all users. |
+| `DISCORD_ALLOWED_ROLES` | No | — | Comma-separated Discord role IDs. Any member with one of these roles is authorized — OR semantics with `DISCORD_ALLOWED_USERS`. Auto-enables the **Server Members Intent** on connect. Useful when moderation teams churn: new mods get access as soon as the role is granted, no config push needed. |
 | `DISCORD_HOME_CHANNEL` | No | — | Channel ID where the bot sends proactive messages (cron output, reminders, notifications). |
 | `DISCORD_HOME_CHANNEL_NAME` | No | `"Home"` | Display name for the home channel in logs and status output. |
 | `DISCORD_REQUIRE_MENTION` | No | `true` | When `true`, the bot only responds in server channels when `@mentioned`. Set to `false` to respond to all messages in every channel. |
@@ -281,12 +282,16 @@ Discord behavior is controlled through two files: **`~/.hermes/.env`** for crede
 | `DISCORD_ALLOW_BOTS` | No | `"none"` | Controls how the bot handles messages from other Discord bots. `"none"` — ignore all other bots. `"mentions"` — only accept bot messages that `@mention` Hermes. `"all"` — accept all bot messages. |
 | `DISCORD_REACTIONS` | No | `true` | When `true`, the bot adds emoji reactions to messages during processing (👀 when starting, ✅ on success, ❌ on error). Set to `false` to disable reactions entirely. |
 | `DISCORD_IGNORED_CHANNELS` | No | — | Comma-separated channel IDs where the bot **never** responds, even when `@mentioned`. Takes priority over all other channel settings. |
+| `DISCORD_ALLOWED_CHANNELS` | No | — | Comma-separated channel IDs. When set, the bot **only** responds in these channels (plus DMs if allowed). Overrides `config.yaml` `discord.allowed_channels`. Combine with `DISCORD_IGNORED_CHANNELS` to express allow/deny rules. |
 | `DISCORD_NO_THREAD_CHANNELS` | No | — | Comma-separated channel IDs where the bot responds directly in the channel instead of creating a thread. Only relevant when `DISCORD_AUTO_THREAD` is `true`. |
 | `DISCORD_REPLY_TO_MODE` | No | `"first"` | Controls reply-reference behavior: `"off"` — never reply to the original message, `"first"` — reply-reference on the first message chunk only (default), `"all"` — reply-reference on every chunk. |
 | `DISCORD_ALLOW_MENTION_EVERYONE` | No | `false` | When `false` (default), the bot cannot ping `@everyone` or `@here` even if its response contains those tokens. Set to `true` to opt back in. See [Mention Control](#mention-control) below. |
 | `DISCORD_ALLOW_MENTION_ROLES` | No | `false` | When `false` (default), the bot cannot ping `@role` mentions. Set to `true` to allow. |
 | `DISCORD_ALLOW_MENTION_USERS` | No | `true` | When `true` (default), the bot can ping individual users by ID. |
 | `DISCORD_ALLOW_MENTION_REPLIED_USER` | No | `true` | When `true` (default), replying to a message pings the original author. |
+| `DISCORD_PROXY` | No | — | Proxy URL for Discord connections (HTTP, WebSocket, REST). Overrides `HTTPS_PROXY`/`ALL_PROXY`. Supports `http://`, `https://`, and `socks5://` schemes. |
+| `HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS` | No | `0.6` | Grace window the adapter waits before flushing a queued text chunk. Useful for smoothing streamed output. |
+| `HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS` | No | `0.1` | Delay between split chunks when a single message exceeds Discord's length limit. |
 
 ### Config File (`config.yaml`)
 
@@ -569,9 +574,27 @@ If you intentionally want a shared room conversation, leave it off — just expe
 ## Security
 
 :::warning
-Always set `DISCORD_ALLOWED_USERS` to restrict who can interact with the bot. Without it, the gateway denies all users by default as a safety measure. Only add User IDs of people you trust — authorized users have full access to the agent's capabilities, including tool use and system access.
+Always set `DISCORD_ALLOWED_USERS` (or `DISCORD_ALLOWED_ROLES`) to restrict who can interact with the bot. Without either, the gateway denies all users by default as a safety measure. Only authorize people you trust — authorized users have full access to the agent's capabilities, including tool use and system access.
 :::
 
+### Role-Based Access Control
+
+For servers where access is managed by roles instead of individual user lists (moderator teams, support staff, internal tooling), use `DISCORD_ALLOWED_ROLES` — a comma-separated list of role IDs. Any member with one of those roles is authorized.
+
+```bash
+# ~/.hermes/.env — works alongside or instead of DISCORD_ALLOWED_USERS
+DISCORD_ALLOWED_ROLES=987654321098765432,876543210987654321
+```
+
+Semantics:
+
+- **OR with user allowlist.** A user is authorized if their ID is in `DISCORD_ALLOWED_USERS` **or** they have any role in `DISCORD_ALLOWED_ROLES`.
+- **Server Members Intent auto-enabled.** When `DISCORD_ALLOWED_ROLES` is set, the bot enables the Members intent on connect — required for Discord to send role information with member records.
+- **Role IDs, not names.** Grab them from Discord: **User Settings → Advanced → Developer Mode ON**, then right-click any role → **Copy Role ID**.
+- **DM fallback.** In DMs the role check scans mutual guilds; a user with an allowed role in any shared server is authorized in DMs too.
+
+This is the preferred pattern when the moderation team churns — new moderators get access the moment the role is granted, with no `.env` edit or gateway restart.
+
 ### Mention Control
 
 By default, Hermes blocks the bot from pinging `@everyone`, `@here`, and role mentions, even if its reply contains those tokens. This prevents a poorly-worded prompt or echoed user content from spamming a whole server. Individual `@user` pings and reply-reference pings (the little "replying to…" chip) stay enabled so normal conversation still works.
diff --git a/website/docs/user-guide/messaging/feishu.md b/website/docs/user-guide/messaging/feishu.md
index 4d9783d402b..6e9f1d0e7fb 100644
--- a/website/docs/user-guide/messaging/feishu.md
+++ b/website/docs/user-guide/messaging/feishu.md
@@ -244,6 +244,54 @@ Interactive cards require **three** configuration steps in the Feishu Developer
 Without all three steps, Feishu will successfully *send* interactive cards (sending only requires `im:message:send` permission), but clicking any button will return error 200340. The card appears to work — the error only surfaces when a user interacts with it.
 :::
 
+## Document Comment Intelligent Reply
+
+Beyond chat, the adapter can also answer `@`-mentions left on **Feishu/Lark documents**. When a user comments on a document (local text selection or whole-doc comment) and @-mentions the bot, Hermes reads the document plus the surrounding comment thread and posts an LLM reply inline on the thread.
+
+Powered by the `drive.notice.comment_add_v1` event, the handler:
+
+- Fetches the document content and comment timeline in parallel (20 messages for whole-doc threads, 12 for local-selection threads).
+- Runs the agent with the `feishu_doc` + `feishu_drive` toolsets scoped to that single comment session.
+- Chunks replies at 4000 chars and posts them back as threaded replies.
+- Caches per-document sessions for 1 hour with a 50-message cap so follow-up comments on the same doc keep context.
+
+### 3-Tier Access Control
+
+Document-comment replies are **explicit-grant only** — there is no implicit allow-all mode. Permissions resolve in this order (first match wins, per field):
+
+1. **Exact doc** — rule scoped to a specific document token.
+2. **Wildcard** — rule that matches a pattern of docs.
+3. **Top-level** — default rule for the workspace.
+
+Two policies are available per rule:
+
+- **`allowlist`** — a static list of users / tenants.
+- **`pairing`** — static list ∪ runtime-approved store. Useful for rollouts where moderators can grant access live.
+
+Rules live in `~/.hermes/feishu_comment_rules.json` (pairing grants in `~/.hermes/feishu_comment_pairing.json`) with mtime-cached hot-reload — edits take effect on the next comment event without restarting the gateway.
+
+CLI:
+
+```bash
+# Inspect current rules and pairing state
+python -m gateway.platforms.feishu_comment_rules status
+
+# Simulate an access check for a specific doc + user
+python -m gateway.platforms.feishu_comment_rules check <fileType:fileToken> <user_open_id>
+
+# Manage pairing grants at runtime
+python -m gateway.platforms.feishu_comment_rules pairing list
+python -m gateway.platforms.feishu_comment_rules pairing add <user_open_id>
+python -m gateway.platforms.feishu_comment_rules pairing remove <user_open_id>
+```
+
+### Required Feishu App Configuration
+
+On top of the chat/card permissions already granted, add the drive comment event:
+
+- Subscribe to `drive.notice.comment_add_v1` in **Event Subscriptions**.
+- Grant the `docs:doc:readonly` and `drive:drive:readonly` scopes so the handler can read document content.
+
 ## Media Support
 
 ### Inbound (receiving)
diff --git a/website/docs/user-guide/messaging/matrix.md b/website/docs/user-guide/messaging/matrix.md
index ec77b5bc33e..255806c01ba 100644
--- a/website/docs/user-guide/messaging/matrix.md
+++ b/website/docs/user-guide/messaging/matrix.md
@@ -72,8 +72,13 @@ MATRIX_REQUIRE_MENTION=true
 MATRIX_FREE_RESPONSE_ROOMS=!abc123:matrix.org,!def456:matrix.org
 MATRIX_AUTO_THREAD=true
 MATRIX_DM_MENTION_THREADS=false
+MATRIX_REACTIONS=true          # default: true — emoji reactions during processing
 ```
 
+:::tip Disabling reactions
+`MATRIX_REACTIONS=false` turns off the processing-lifecycle emoji reactions (👀/✅/❌) the bot posts on inbound messages. Useful for rooms where reaction events are noisy or aren't supported by all participating clients.
+:::
+
 :::note
 If you are upgrading from a version that did not have `MATRIX_REQUIRE_MENTION`, the bot previously responded to all messages in rooms. To preserve that behavior, set `MATRIX_REQUIRE_MENTION=false`.
 :::
diff --git a/website/docs/user-guide/messaging/open-webui.md b/website/docs/user-guide/messaging/open-webui.md
index b26d23eddfd..efdf901371b 100644
--- a/website/docs/user-guide/messaging/open-webui.md
+++ b/website/docs/user-guide/messaging/open-webui.md
@@ -198,7 +198,7 @@ Make sure your `OPENAI_API_KEY` in Open WebUI matches the `API_SERVER_KEY` in He
 
 ## Multi-User Setup with Profiles
 
-To run separate Hermes instances per user — each with their own config, memory, and skills — use [profiles](/docs/user-guide/features/profiles). Each profile runs its own API server on a different port and automatically advertises the profile name as the model in Open WebUI.
+To run separate Hermes instances per user — each with their own config, memory, and skills — use [profiles](/docs/user-guide/profiles). Each profile runs its own API server on a different port and automatically advertises the profile name as the model in Open WebUI.
 
 ### 1. Create profiles and configure API servers
 
diff --git a/website/docs/user-guide/messaging/qqbot.md b/website/docs/user-guide/messaging/qqbot.md
index d9da90d5868..8da6f92def5 100644
--- a/website/docs/user-guide/messaging/qqbot.md
+++ b/website/docs/user-guide/messaging/qqbot.md
@@ -28,7 +28,7 @@ The QQ Bot adapter uses the [Official QQ Bot API](https://bot.q.qq.com/wiki/deve
 ### Interactive setup
 
 ```bash
-hermes setup gateway
+hermes gateway setup
 ```
 
 Select **QQ Bot** from the platform list and follow the prompts.
@@ -52,7 +52,7 @@ QQ_CLIENT_SECRET=your-app-secret
 | `QQBOT_HOME_CHANNEL_NAME` | Display name for home channel | `Home` |
 | `QQ_ALLOWED_USERS` | Comma-separated user OpenIDs for DM access | open (all users) |
 | `QQ_ALLOW_ALL_USERS` | Set to `true` to allow all DMs | `false` |
-| `QQ_MARKDOWN_SUPPORT` | Enable QQ markdown (msg_type 2) | `true` |
+| `QQ_SANDBOX` | Route requests to the QQ sandbox gateway for development testing | `false` |
 | `QQ_STT_API_KEY` | API key for voice-to-text provider | — |
 | `QQ_STT_BASE_URL` | Base URL for STT provider | `https://open.bigmodel.cn/api/coding/paas/v4` |
 | `QQ_STT_MODEL` | STT model name | `glm-asr` |
@@ -68,7 +68,7 @@ platforms:
     extra:
       app_id: "your-app-id"
       client_secret: "your-secret"
-      markdown_support: true
+      markdown_support: true       # enable QQ markdown (msg_type 2). Config-only; no env-var equivalent.
       dm_policy: "open"          # open | allowlist | disabled
       allow_from:
         - "user_openid_1"
diff --git a/website/docs/user-guide/messaging/slack.md b/website/docs/user-guide/messaging/slack.md
index 5f6492216a9..a7eff683da8 100644
--- a/website/docs/user-guide/messaging/slack.md
+++ b/website/docs/user-guide/messaging/slack.md
@@ -283,7 +283,7 @@ slack:
 ```
 
 :::info
-Unlike Discord and Telegram, Slack does not have a `free_response_channels` equivalent. The Slack adapter requires `@mention` to start a conversation in channels. However, once the bot has an active session in a thread, subsequent thread replies do not require a mention. In DMs, the bot always responds without needing a mention.
+Slack supports both patterns: `@mention` required to start a conversation by default, but you can opt specific channels out via `SLACK_FREE_RESPONSE_CHANNELS` (comma-separated channel IDs) or `slack.free_response_channels` in `config.yaml`. Once the bot has an active session in a thread, subsequent thread replies do not require a mention. In DMs the bot always responds without needing a mention.
 :::
 
 ### Unauthorized User Handling
diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md
index 0fa2e830b9d..a92fc8d2232 100644
--- a/website/docs/user-guide/messaging/telegram.md
+++ b/website/docs/user-guide/messaging/telegram.md
@@ -112,6 +112,38 @@ hermes gateway
 
 The bot should come online within seconds. Send it a message on Telegram to verify.
 
+## Sending Generated Files from Docker-backed Terminals
+
+If your terminal backend is `docker`, keep in mind that Telegram attachments are
+sent by the **gateway process**, not from inside the container. That means the
+final `MEDIA:/...` path must be readable on the host where the gateway is
+running.
+
+Common pitfall:
+
+- the agent writes a file inside Docker to `/workspace/report.txt`
+- the model emits `MEDIA:/workspace/report.txt`
+- Telegram delivery fails because `/workspace/report.txt` only exists inside the
+  container, not on the host
+
+Recommended pattern:
+
+```yaml
+terminal:
+  backend: docker
+  docker_volumes:
+    - "/home/user/.hermes/cache/documents:/output"
+```
+
+Then:
+
+- write files inside Docker to `/output/...`
+- emit the **host-visible** path in `MEDIA:`, for example:
+  `MEDIA:/home/user/.hermes/cache/documents/report.txt`
+
+If you already have a `docker_volumes:` section, add the new mount to the same
+list. YAML duplicate keys silently override earlier ones.
+
 ## Webhook Mode
 
 By default, Hermes connects to Telegram using **long polling** — the gateway makes outbound requests to Telegram's servers to fetch new updates. This works well for local and always-on deployments.
@@ -422,40 +454,6 @@ The current model and provider are displayed at the top. All navigation happens
 If you know the exact model name, type `/model <name>` directly to skip the picker. You can also type `/model <name> --global` to persist the change across sessions.
 :::
 
-## Webhook Mode
-
-By default, the Telegram adapter connects via **long polling** — the gateway makes outbound connections to Telegram's servers. This works everywhere but keeps a persistent connection open.
-
-**Webhook mode** is an alternative where Telegram pushes updates to your server over HTTPS. This is ideal for **serverless and cloud deployments** (Fly.io, Railway, etc.) where inbound HTTP can wake a suspended machine.
-
-### Configuration
-
-Set the `TELEGRAM_WEBHOOK_URL` environment variable to enable webhook mode:
-
-```bash
-# Required — your public HTTPS endpoint
-TELEGRAM_WEBHOOK_URL=https://app.fly.dev/telegram
-
-# Optional — local listen port (default: 8443)
-TELEGRAM_WEBHOOK_PORT=8443
-
-# Optional — secret token for update verification (auto-generated if not set)
-TELEGRAM_WEBHOOK_SECRET=my-secret-token
-```
-
-Or in `~/.hermes/config.yaml`:
-
-```yaml
-telegram:
-  webhook_mode: true
-```
-
-When `TELEGRAM_WEBHOOK_URL` is set, the gateway starts an HTTP server listening on `0.0.0.0:<port>` and registers the webhook URL with Telegram. The URL path is extracted from the webhook URL (defaults to `/telegram`).
-
-:::warning
-Telegram requires a **valid TLS certificate** on the webhook endpoint. Self-signed certificates will be rejected. Use a reverse proxy (nginx, Caddy) or a platform that provides TLS termination (Fly.io, Railway, Cloudflare Tunnel).
-:::
-
 ## DNS-over-HTTPS Fallback IPs
 
 In some restricted networks, `api.telegram.org` may resolve to an IP that is unreachable. The Telegram adapter includes a **fallback IP** mechanism that transparently retries connections against alternative IPs while preserving the correct TLS hostname and SNI.
diff --git a/website/docs/user-guide/messaging/webhooks.md b/website/docs/user-guide/messaging/webhooks.md
index bbf04bcb4f8..2c60624fb64 100644
--- a/website/docs/user-guide/messaging/webhooks.md
+++ b/website/docs/user-guide/messaging/webhooks.md
@@ -72,6 +72,7 @@ Routes define how different webhook sources are handled. Each route is a named e
 | `skills` | No | List of skill names to load for the agent run. |
 | `deliver` | No | Where to send the response: `github_comment`, `telegram`, `discord`, `slack`, `signal`, `sms`, `whatsapp`, `matrix`, `mattermost`, `homeassistant`, `email`, `dingtalk`, `feishu`, `wecom`, `weixin`, `bluebubbles`, `qqbot`, or `log` (default). |
 | `deliver_extra` | No | Additional delivery config — keys depend on `deliver` type (e.g. `repo`, `pr_number`, `chat_id`). Values support the same `{dot.notation}` templates as `prompt`. |
+| `deliver_only` | No | If `true`, skip the agent entirely — the rendered `prompt` template becomes the literal message that gets delivered. Zero LLM cost, sub-second delivery. See [Direct Delivery Mode](#direct-delivery-mode) for use cases. Requires `deliver` to be a real target (not `log`). |
 
 ### Full example
 
@@ -240,6 +241,80 @@ For cross-platform delivery, the target platform must also be enabled and connec
 
 ---
 
+## Direct Delivery Mode {#direct-delivery-mode}
+
+By default, every webhook POST triggers an agent run — the payload becomes a prompt, the agent processes it, and the agent's response is delivered. This costs LLM tokens on every event.
+
+For use cases where you just want to **push a plain notification** — no reasoning, no agent loop, just deliver the message — set `deliver_only: true` on the route. The rendered `prompt` template becomes the literal message body, and the adapter dispatches it directly to the configured delivery target.
+
+### When to use direct delivery
+
+- **External service push** — Supabase/Firebase webhook fires on a database change → notify a user in Telegram instantly
+- **Monitoring alerts** — Datadog/Grafana alert webhook → push to a Discord channel
+- **Inter-agent pings** — Agent A notifies Agent B's user that a long-running task finished
+- **Background job completion** — Cron job finishes → post result to Slack
+
+Benefits:
+
+- **Zero LLM tokens** — the agent is never invoked
+- **Sub-second delivery** — a single adapter call, no reasoning loop
+- **Same security as agent mode** — HMAC auth, rate limits, idempotency, and body-size limits all still apply
+- **Synchronous response** — the POST returns `200 OK` once delivery succeeds, or `502` if the target rejects it, so your upstream service can retry intelligently
+
+### Example: Telegram push from Supabase
+
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      port: 8644
+      secret: "global-secret"
+      routes:
+        antenna-matches:
+          secret: "antenna-webhook-secret"
+          deliver: "telegram"
+          deliver_only: true
+          prompt: "🎉 New match: {match.user_name} matched with you!"
+          deliver_extra:
+            chat_id: "{match.telegram_chat_id}"
+```
+
+Your Supabase edge function signs the payload with HMAC-SHA256 and POSTs to `https://your-server:8644/webhooks/antenna-matches`. The webhook adapter validates the signature, renders the template from the payload, delivers to Telegram, and returns `200 OK`.
+
+### Example: Dynamic subscription via CLI
+
+```bash
+hermes webhook subscribe antenna-matches \
+  --deliver telegram \
+  --deliver-chat-id "123456789" \
+  --deliver-only \
+  --prompt "🎉 New match: {match.user_name} matched with you!" \
+  --description "Antenna match notifications"
+```
+
+### Response codes
+
+| Status | Meaning |
+|--------|---------|
+| `200 OK` | Delivered successfully. Body: `{"status": "delivered", "route": "...", "target": "...", "delivery_id": "..."}` |
+| `200 OK` (status=duplicate) | Duplicate `X-GitHub-Delivery` ID within the idempotency TTL (1 hour). Not re-delivered. |
+| `401 Unauthorized` | HMAC signature invalid or missing. |
+| `400 Bad Request` | Malformed JSON body. |
+| `404 Not Found` | Unknown route name. |
+| `413 Payload Too Large` | Body exceeded `max_body_bytes`. |
+| `429 Too Many Requests` | Route rate limit exceeded. |
+| `502 Bad Gateway` | Target adapter rejected the message or raised. The error is logged server-side; the response body is a generic `Delivery failed` to avoid leaking adapter internals. |
+
+### Configuration gotchas
+
+- `deliver_only: true` requires `deliver` to be a real target. `deliver: log` (or omitting `deliver`) is rejected at startup — the adapter refuses to start if it finds a misconfigured route.
+- The `skills` field is ignored in direct delivery mode (no agent runs, so there's nothing to inject skills into).
+- Template rendering uses the same `{dot.notation}` syntax as agent mode, including the `{__raw__}` token.
+- Idempotency uses the same `X-GitHub-Delivery` / `X-Request-ID` header — retries with the same ID return `status=duplicate` and do NOT re-deliver.
+
+---
+
 ## Dynamic Subscriptions (CLI) {#dynamic-subscriptions}
 
 In addition to static routes in `config.yaml`, you can create webhook subscriptions dynamically using the `hermes webhook` CLI command. This is especially useful when the agent itself needs to set up event-driven triggers.
diff --git a/website/docs/user-guide/profiles.md b/website/docs/user-guide/profiles.md
index 67609564f7e..aef4d10b215 100644
--- a/website/docs/user-guide/profiles.md
+++ b/website/docs/user-guide/profiles.md
@@ -4,11 +4,11 @@ sidebar_position: 2
 
 # Profiles: Running Multiple Agents
 
-Run multiple independent Hermes agents on the same machine — each with its own config, API keys, memory, sessions, skills, and gateway.
+Run multiple independent Hermes agents on the same machine — each with its own config, API keys, memory, sessions, skills, and gateway state.
 
 ## What are profiles?
 
-A profile is a fully isolated Hermes environment. Each profile gets its own directory containing its own `config.yaml`, `.env`, `SOUL.md`, memories, sessions, skills, cron jobs, and state database. Profiles let you run separate agents for different purposes — a coding assistant, a personal bot, a research agent — without any cross-contamination.
+A profile is a separate Hermes home directory. Each profile gets its own directory containing its own `config.yaml`, `.env`, `SOUL.md`, memories, sessions, skills, cron jobs, and state database. Profiles let you run separate agents for different purposes — a coding assistant, a personal bot, a research agent — without mixing up Hermes state.
 
 When you create a profile, it automatically becomes its own command. Create a profile called `coder` and you immediately have `coder chat`, `coder setup`, `coder gateway start`, etc.
 
@@ -20,7 +20,7 @@ coder setup                       # configure API keys and model
 coder chat                        # start chatting
 ```
 
-That's it. `coder` is now a fully independent agent. It has its own config, its own memory, its own everything.
+That's it. `coder` is now its own Hermes profile with its own config, memory, and state.
 
 ## Creating a profile
 
@@ -104,6 +104,32 @@ The CLI always shows which profile is active:
 - **Banner**: Shows `Profile: coder` on startup
 - **`hermes profile`**: Shows current profile name, path, model, gateway status
 
+## Profiles vs workspaces vs sandboxing
+
+Profiles are often confused with workspaces or sandboxes, but they are different things:
+
+- A **profile** gives Hermes its own state directory: `config.yaml`, `.env`, `SOUL.md`, sessions, memory, logs, cron jobs, and gateway state.
+- A **workspace** or **working directory** is where terminal commands start. That is controlled separately by `terminal.cwd`.
+- A **sandbox** is what limits filesystem access. Profiles do **not** sandbox the agent.
+
+On the default `local` terminal backend, the agent still has the same filesystem access as your user account. A profile does not stop it from accessing folders outside the profile directory.
+
+If you want a profile to start in a specific project folder, set an explicit absolute `terminal.cwd` in that profile's `config.yaml`:
+
+```yaml
+terminal:
+  backend: local
+  cwd: /absolute/path/to/project
+```
+
+Using `cwd: "."` on the local backend means "the directory Hermes was launched from", not "the profile directory".
+
+Also note:
+
+- `SOUL.md` can guide the model, but it does not enforce a workspace boundary.
+- Changes to `SOUL.md` take effect cleanly on a new session. Existing sessions may still be using the old prompt state.
+- Asking the model "what directory are you in?" is not a reliable isolation test. If you need a predictable starting directory for tools, set `terminal.cwd` explicitly.
+
 ## Running gateways
 
 Each profile runs its own gateway as a separate process with its own bot token:
@@ -151,6 +177,12 @@ coder config set model.model anthropic/claude-sonnet-4
 echo "You are a focused coding assistant." > ~/.hermes/profiles/coder/SOUL.md
 ```
 
+If you want this profile to work in a specific project by default, also set its own `terminal.cwd`:
+
+```bash
+coder config set terminal.cwd /absolute/path/to/project
+```
+
 ## Updating
 
 `hermes update` pulls code once (shared) and syncs new bundled skills to **all** profiles automatically:
@@ -201,6 +233,8 @@ Add the line to your `~/.bashrc` or `~/.zshrc` for persistent completion. Comple
 
 ## How it works
 
-Profiles use the `HERMES_HOME` environment variable. When you run `coder chat`, the wrapper script sets `HERMES_HOME=~/.hermes/profiles/coder` before launching hermes. Since 119+ files in the codebase resolve paths via `get_hermes_home()`, everything automatically scopes to the profile's directory — config, sessions, memory, skills, state database, gateway PID, logs, and cron jobs.
+Profiles use the `HERMES_HOME` environment variable. When you run `coder chat`, the wrapper script sets `HERMES_HOME=~/.hermes/profiles/coder` before launching hermes. Since 119+ files in the codebase resolve paths via `get_hermes_home()`, Hermes state automatically scopes to the profile's directory — config, sessions, memory, skills, state database, gateway PID, logs, and cron jobs.
+
+This is separate from terminal working directory. Tool execution starts from `terminal.cwd` (or the launch directory when `cwd: "."` on the local backend), not automatically from `HERMES_HOME`.
 
 The default profile is simply `~/.hermes` itself. No migration needed — existing installs work identically.
diff --git a/website/sidebars.ts b/website/sidebars.ts
index c84184c4e67..d57a71dcc2c 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -162,6 +162,8 @@ const sidebars: SidebarsConfig = {
         'guides/cron-troubleshooting',
         'guides/work-with-skills',
         'guides/delegation-patterns',
+        'guides/github-pr-review-agent',
+        'guides/webhook-github-pr-review',
         'guides/migrate-from-openclaw',
         'guides/aws-bedrock',
       ],