diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4577454e4..b4c1ee09d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -88,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
 touch ~/.hermes/.env
 
 # Add at minimum an LLM provider key:
-echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
+echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
 ```
 
 ### Run
diff --git a/Dockerfile b/Dockerfile
index a684f9fb3..8904c4c74 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,7 +12,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # Install system dependencies in one layer, clear APT cache
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
+        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli && \
     rm -rf /var/lib/apt/lists/*
 
 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -50,5 +50,6 @@ RUN uv venv && \
 # ---------- Runtime ----------
 ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
+ENV PATH="/opt/data/.local/bin:${PATH}"
 VOLUME [ "/opt/data" ]
 ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
diff --git a/README.md b/README.md
index 622910b3a..70b65debd 100644
--- a/README.md
+++ b/README.md
@@ -173,7 +173,6 @@ python -m pytest tests/ -q
 - 💬 [Discord](https://discord.gg/NousResearch)
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
-- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
 - 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
 
 ---
diff --git a/acp_adapter/permissions.py b/acp_adapter/permissions.py
index 68f61e340..c2e1a5982 100644
--- a/acp_adapter/permissions.py
+++ b/acp_adapter/permissions.py
@@ -63,6 +63,9 @@ def make_approval_callback(
             logger.warning("Permission request timed out or failed: %s", exc)
             return "deny"
 
+        if response is None:
+            return "deny"
+
         outcome = response.outcome
         if isinstance(outcome, AllowedOutcome):
             option_id = outcome.option_id
diff --git a/acp_adapter/server.py b/acp_adapter/server.py
index 4685a68a8..d73c71157 100644
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@@ -4,6 +4,7 @@ from __future__ import annotations
 
 import asyncio
 import logging
+import os
 from collections import defaultdict, deque
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Deque, Optional
@@ -51,7 +52,7 @@ try:
 except ImportError:
     from acp.schema import AuthMethod as AuthMethodAgent  # type: ignore[attr-defined]
 
-from acp_adapter.auth import detect_provider, has_provider
+from acp_adapter.auth import detect_provider
 from acp_adapter.events import (
     make_message_cb,
     make_step_cb,
@@ -71,6 +72,11 @@ except Exception:
 # Thread pool for running AIAgent (synchronous) in parallel.
 _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")
 
+# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
+# does not expose a client-side limit, so this is a fixed cap that clients
+# paginate against using `cursor` / `next_cursor`.
+_LIST_SESSIONS_PAGE_SIZE = 50
+
 
 def _extract_text(
     prompt: list[
@@ -351,9 +357,18 @@ class HermesACPAgent(acp.Agent):
         )
 
     async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
-        if has_provider():
-            return AuthenticateResponse()
-        return None
+        # Only accept authenticate() calls whose method_id matches the
+        # provider we advertised in initialize(). Without this check,
+        # authenticate() would acknowledge any method_id as long as the
+        # server has provider credentials configured — harmless under
+        # Hermes' threat model (ACP is stdio-only, local-trust), but poor
+        # API hygiene and confusing if ACP ever grows multi-method auth.
+        provider = detect_provider()
+        if not provider:
+            return None
+        if not isinstance(method_id, str) or method_id.strip().lower() != provider:
+            return None
+        return AuthenticateResponse()
 
     # ---- Session management -------------------------------------------------
 
@@ -437,7 +452,28 @@ class HermesACPAgent(acp.Agent):
         cwd: str | None = None,
         **kwargs: Any,
     ) -> ListSessionsResponse:
+        """List ACP sessions with optional ``cwd`` filtering and cursor pagination.
+
+        ``cwd`` is passed through to ``SessionManager.list_sessions`` which already
+        normalizes and filters by working directory. ``cursor`` is a ``session_id``
+        previously returned as ``next_cursor``; results resume after that entry.
+        Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
+        results remain, ``next_cursor`` is set to the last returned ``session_id``.
+        """
         infos = self.session_manager.list_sessions(cwd=cwd)
+
+        if cursor:
+            for idx, s in enumerate(infos):
+                if s["session_id"] == cursor:
+                    infos = infos[idx + 1:]
+                    break
+            else:
+                # Unknown cursor -> empty page (do not fall back to full list).
+                infos = []
+
+        has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
+        infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
+
         sessions = []
         for s in infos:
             updated_at = s.get("updated_at")
@@ -451,7 +487,9 @@ class HermesACPAgent(acp.Agent):
                     updated_at=updated_at,
                 )
             )
-        return ListSessionsResponse(sessions=sessions)
+
+        next_cursor = sessions[-1].session_id if has_more and sessions else None
+        return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
 
     # ---- Prompt (core) ------------------------------------------------------
 
@@ -517,15 +555,32 @@ class HermesACPAgent(acp.Agent):
         agent.step_callback = step_cb
         agent.message_callback = message_cb
 
-        if approval_cb:
-            try:
-                from tools import terminal_tool as _terminal_tool
-                previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
-                _terminal_tool.set_approval_callback(approval_cb)
-            except Exception:
-                logger.debug("Could not set ACP approval callback", exc_info=True)
+        # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
+        # Set it INSIDE _run_agent so the TLS write happens in the executor
+        # thread — setting it here would write to the event-loop thread's TLS,
+        # not the executor's. Also set HERMES_INTERACTIVE so approval.py
+        # takes the CLI-interactive path (which calls the registered
+        # callback via prompt_dangerous_approval) instead of the
+        # non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
+        # ACP's conn.request_permission maps cleanly to the interactive
+        # callback shape — not the gateway-queue HERMES_EXEC_ASK path,
+        # which requires a notify_cb registered in _gateway_notify_cbs.
+        previous_approval_cb = None
+        previous_interactive = None
 
         def _run_agent() -> dict:
+            nonlocal previous_approval_cb, previous_interactive
+            if approval_cb:
+                try:
+                    from tools import terminal_tool as _terminal_tool
+                    previous_approval_cb = _terminal_tool._get_approval_callback()
+                    _terminal_tool.set_approval_callback(approval_cb)
+                except Exception:
+                    logger.debug("Could not set ACP approval callback", exc_info=True)
+            # Signal to tools.approval that we have an interactive callback
+            # and the non-interactive auto-approve path must not fire.
+            previous_interactive = os.environ.get("HERMES_INTERACTIVE")
+            os.environ["HERMES_INTERACTIVE"] = "1"
             try:
                 result = agent.run_conversation(
                     user_message=user_text,
@@ -537,6 +592,11 @@ class HermesACPAgent(acp.Agent):
                 logger.exception("Agent error in session %s", session_id)
                 return {"final_response": f"Error: {e}", "messages": state.history}
             finally:
+                # Restore HERMES_INTERACTIVE.
+                if previous_interactive is None:
+                    os.environ.pop("HERMES_INTERACTIVE", None)
+                else:
+                    os.environ["HERMES_INTERACTIVE"] = previous_interactive
                 if approval_cb:
                     try:
                         from tools import terminal_tool as _terminal_tool
@@ -613,8 +673,8 @@ class HermesACPAgent(acp.Agent):
             await self._conn.session_update(
                 session_id=session_id,
                 update=AvailableCommandsUpdate(
-                    sessionUpdate="available_commands_update",
-                    availableCommands=self._available_commands(),
+                    session_update="available_commands_update",
+                    available_commands=self._available_commands(),
                 ),
             )
         except Exception:
diff --git a/agent/account_usage.py b/agent/account_usage.py
new file mode 100644
index 000000000..0e9562dcc
--- /dev/null
+++ b/agent/account_usage.py
@@ -0,0 +1,326 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any, Optional
+
+import httpx
+
+from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
+from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
+from hermes_cli.runtime_provider import resolve_runtime_provider
+
+
+def _utc_now() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+@dataclass(frozen=True)
+class AccountUsageWindow:
+    label: str
+    used_percent: Optional[float] = None
+    reset_at: Optional[datetime] = None
+    detail: Optional[str] = None
+
+
+@dataclass(frozen=True)
+class AccountUsageSnapshot:
+    provider: str
+    source: str
+    fetched_at: datetime
+    title: str = "Account limits"
+    plan: Optional[str] = None
+    windows: tuple[AccountUsageWindow, ...] = ()
+    details: tuple[str, ...] = ()
+    unavailable_reason: Optional[str] = None
+
+    @property
+    def available(self) -> bool:
+        return bool(self.windows or self.details) and not self.unavailable_reason
+
+
+def _title_case_slug(value: Optional[str]) -> Optional[str]:
+    cleaned = str(value or "").strip()
+    if not cleaned:
+        return None
+    return cleaned.replace("_", " ").replace("-", " ").title()
+
+
+def _parse_dt(value: Any) -> Optional[datetime]:
+    if value in (None, ""):
+        return None
+    if isinstance(value, (int, float)):
+        return datetime.fromtimestamp(float(value), tz=timezone.utc)
+    if isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return None
+        if text.endswith("Z"):
+            text = text[:-1] + "+00:00"
+        try:
+            dt = datetime.fromisoformat(text)
+            return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
+        except ValueError:
+            return None
+    return None
+
+
+def _format_reset(dt: Optional[datetime]) -> str:
+    if not dt:
+        return "unknown"
+    local_dt = dt.astimezone()
+    delta = dt - _utc_now()
+    total_seconds = int(delta.total_seconds())
+    if total_seconds <= 0:
+        return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
+    hours, rem = divmod(total_seconds, 3600)
+    minutes = rem // 60
+    if hours >= 24:
+        days, hours = divmod(hours, 24)
+        rel = f"in {days}d {hours}h"
+    elif hours > 0:
+        rel = f"in {hours}h {minutes}m"
+    else:
+        rel = f"in {minutes}m"
+    return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
+
+
+def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
+    if not snapshot:
+        return []
+    header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
+    lines = [header]
+    if snapshot.plan:
+        lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
+    else:
+        lines.append(f"Provider: {snapshot.provider}")
+    for window in snapshot.windows:
+        if window.used_percent is None:
+            base = f"{window.label}: unavailable"
+        else:
+            remaining = max(0, round(100 - float(window.used_percent)))
+            used = max(0, round(float(window.used_percent)))
+            base = f"{window.label}: {remaining}% remaining ({used}% used)"
+        if window.reset_at:
+            base += f" • resets {_format_reset(window.reset_at)}"
+        elif window.detail:
+            base += f" • {window.detail}"
+        lines.append(base)
+    for detail in snapshot.details:
+        lines.append(detail)
+    if snapshot.unavailable_reason:
+        lines.append(f"Unavailable: {snapshot.unavailable_reason}")
+    return lines
+
+
+def _resolve_codex_usage_url(base_url: str) -> str:
+    normalized = (base_url or "").strip().rstrip("/")
+    if not normalized:
+        normalized = "https://chatgpt.com/backend-api/codex"
+    if normalized.endswith("/codex"):
+        normalized = normalized[: -len("/codex")]
+    if "/backend-api" in normalized:
+        return normalized + "/wham/usage"
+    return normalized + "/api/codex/usage"
+
+
+def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
+    creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
+    token_data = _read_codex_tokens()
+    tokens = token_data.get("tokens") or {}
+    account_id = str(tokens.get("account_id", "") or "").strip() or None
+    headers = {
+        "Authorization": f"Bearer {creds['api_key']}",
+        "Accept": "application/json",
+        "User-Agent": "codex-cli",
+    }
+    if account_id:
+        headers["ChatGPT-Account-Id"] = account_id
+    with httpx.Client(timeout=15.0) as client:
+        response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
+        response.raise_for_status()
+    payload = response.json() or {}
+    rate_limit = payload.get("rate_limit") or {}
+    windows: list[AccountUsageWindow] = []
+    for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
+        window = rate_limit.get(key) or {}
+        used = window.get("used_percent")
+        if used is None:
+            continue
+        windows.append(
+            AccountUsageWindow(
+                label=label,
+                used_percent=float(used),
+                reset_at=_parse_dt(window.get("reset_at")),
+            )
+        )
+    details: list[str] = []
+    credits = payload.get("credits") or {}
+    if credits.get("has_credits"):
+        balance = credits.get("balance")
+        if isinstance(balance, (int, float)):
+            details.append(f"Credits balance: ${float(balance):.2f}")
+        elif credits.get("unlimited"):
+            details.append("Credits balance: unlimited")
+    return AccountUsageSnapshot(
+        provider="openai-codex",
+        source="usage_api",
+        fetched_at=_utc_now(),
+        plan=_title_case_slug(payload.get("plan_type")),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
+    token = (resolve_anthropic_token() or "").strip()
+    if not token:
+        return None
+    if not _is_oauth_token(token):
+        return AccountUsageSnapshot(
+            provider="anthropic",
+            source="oauth_usage_api",
+            fetched_at=_utc_now(),
+            unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
+        )
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/json",
+        "Content-Type": "application/json",
+        "anthropic-beta": "oauth-2025-04-20",
+        "User-Agent": "claude-code/2.1.0",
+    }
+    with httpx.Client(timeout=15.0) as client:
+        response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
+        response.raise_for_status()
+    payload = response.json() or {}
+    windows: list[AccountUsageWindow] = []
+    mapping = (
+        ("five_hour", "Current session"),
+        ("seven_day", "Current week"),
+        ("seven_day_opus", "Opus week"),
+        ("seven_day_sonnet", "Sonnet week"),
+    )
+    for key, label in mapping:
+        window = payload.get(key) or {}
+        util = window.get("utilization")
+        if util is None:
+            continue
+        used = float(util) * 100 if float(util) <= 1 else float(util)
+        windows.append(
+            AccountUsageWindow(
+                label=label,
+                used_percent=used,
+                reset_at=_parse_dt(window.get("resets_at")),
+            )
+        )
+    details: list[str] = []
+    extra = payload.get("extra_usage") or {}
+    if extra.get("is_enabled"):
+        used_credits = extra.get("used_credits")
+        monthly_limit = extra.get("monthly_limit")
+        currency = extra.get("currency") or "USD"
+        if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
+            details.append(
+                f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
+            )
+    return AccountUsageSnapshot(
+        provider="anthropic",
+        source="oauth_usage_api",
+        fetched_at=_utc_now(),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
+    runtime = resolve_runtime_provider(
+        requested="openrouter",
+        explicit_base_url=base_url,
+        explicit_api_key=api_key,
+    )
+    token = str(runtime.get("api_key", "") or "").strip()
+    if not token:
+        return None
+    normalized = str(runtime.get("base_url", "") or "").rstrip("/")
+    credits_url = f"{normalized}/credits"
+    key_url = f"{normalized}/key"
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/json",
+    }
+    with httpx.Client(timeout=10.0) as client:
+        credits_resp = client.get(credits_url, headers=headers)
+        credits_resp.raise_for_status()
+        credits = (credits_resp.json() or {}).get("data") or {}
+        try:
+            key_resp = client.get(key_url, headers=headers)
+            key_resp.raise_for_status()
+            key_data = (key_resp.json() or {}).get("data") or {}
+        except Exception:
+            key_data = {}
+    total_credits = float(credits.get("total_credits") or 0.0)
+    total_usage = float(credits.get("total_usage") or 0.0)
+    details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
+    windows: list[AccountUsageWindow] = []
+    limit = key_data.get("limit")
+    limit_remaining = key_data.get("limit_remaining")
+    limit_reset = str(key_data.get("limit_reset") or "").strip()
+    usage = key_data.get("usage")
+    if (
+        isinstance(limit, (int, float))
+        and float(limit) > 0
+        and isinstance(limit_remaining, (int, float))
+        and 0 <= float(limit_remaining) <= float(limit)
+    ):
+        limit_value = float(limit)
+        remaining_value = float(limit_remaining)
+        used_percent = ((limit_value - remaining_value) / limit_value) * 100
+        detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
+        if limit_reset:
+            detail_parts.append(f"resets {limit_reset}")
+        windows.append(
+            AccountUsageWindow(
+                label="API key quota",
+                used_percent=used_percent,
+                detail=" • ".join(detail_parts),
+            )
+        )
+    if isinstance(usage, (int, float)):
+        usage_parts = [f"API key usage: ${float(usage):.2f} total"]
+        for value, label in (
+            (key_data.get("usage_daily"), "today"),
+            (key_data.get("usage_weekly"), "this week"),
+            (key_data.get("usage_monthly"), "this month"),
+        ):
+            if isinstance(value, (int, float)) and float(value) > 0:
+                usage_parts.append(f"${float(value):.2f} {label}")
+        details.append(" • ".join(usage_parts))
+    return AccountUsageSnapshot(
+        provider="openrouter",
+        source="credits_api",
+        fetched_at=_utc_now(),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def fetch_account_usage(
+    provider: Optional[str],
+    *,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+) -> Optional[AccountUsageSnapshot]:
+    normalized = str(provider or "").strip().lower()
+    if normalized in {"", "auto", "custom"}:
+        return None
+    try:
+        if normalized == "openai-codex":
+            return _fetch_codex_account_usage()
+        if normalized == "anthropic":
+            return _fetch_anthropic_account_usage()
+        if normalized == "openrouter":
+            return _fetch_openrouter_account_usage(base_url, api_key)
+    except Exception:
+        return None
+    return None
diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index d8d181cc1..fb2408525 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -19,6 +19,7 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
+from utils import normalize_proxy_env_vars
 
 try:
     import anthropic as _anthropic_sdk
@@ -116,6 +117,63 @@ def _get_anthropic_max_output(model: str) -> int:
     return best_val
 
 
+def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
+    """Return ``value`` floored to a positive int, or ``None`` if it is not a
+    finite positive number. Ported from openclaw/openclaw#66664.
+
+    Anthropic's Messages API rejects ``max_tokens`` values that are 0,
+    negative, non-integer, or non-finite with HTTP 400. Python's ``or``
+    idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
+    negative ints and fractional floats (``-1``, ``0.5``) through to the
+    API, producing a user-visible failure instead of a local error.
+    """
+    # Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
+    # silently become 1 and ``False`` doesn't become 0.
+    if isinstance(value, bool):
+        return None
+    if not isinstance(value, (int, float)):
+        return None
+    try:
+        import math
+        if not math.isfinite(value):
+            return None
+    except Exception:
+        return None
+    floored = int(value)  # truncates toward zero for floats
+    return floored if floored > 0 else None
+
+
+def _resolve_anthropic_messages_max_tokens(
+    requested,
+    model: str,
+    context_length: Optional[int] = None,
+) -> int:
+    """Resolve the ``max_tokens`` budget for an Anthropic Messages call.
+
+    Prefers ``requested`` when it is a positive finite number; otherwise
+    falls back to the model's output ceiling. Raises ``ValueError`` if no
+    positive budget can be resolved (should not happen with current model
+    table defaults, but guards against a future regression where
+    ``_get_anthropic_max_output`` could return ``0``).
+
+    Separately, callers apply a context-window clamp — this resolver does
+    not, to keep the positive-value contract independent of endpoint
+    specifics.
+
+    Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
+    """
+    resolved = _resolve_positive_anthropic_max_tokens(requested)
+    if resolved is not None:
+        return resolved
+    fallback = _get_anthropic_max_output(model)
+    if fallback > 0:
+        return fallback
+    raise ValueError(
+        f"Anthropic Messages adapter requires a positive max_tokens value for "
+        f"model {model!r}; got {requested!r} and no model default resolved."
+    )
+
+
 def _supports_adaptive_thinking(model: str) -> bool:
     """Return True for Claude 4.6+ models that support adaptive thinking."""
     return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
@@ -265,6 +323,14 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
     return True  # Any other endpoint is a third-party proxy
 
 
+def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
+    """Return True for Kimi's /coding endpoint that requires claude-code UA."""
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
+        return False
+    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
+
+
 def _requires_bearer_auth(base_url: str | None) -> bool:
     """Return True for Anthropic-compatible providers that require Bearer auth.
 
@@ -308,6 +374,9 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
             "The 'anthropic' package is required for the Anthropic provider. "
             "Install it with: pip install 'anthropic>=0.39.0'"
         )
+
+    normalize_proxy_env_vars()
+
     from httpx import Timeout
 
     normalized_base_url = _normalize_base_url_text(base_url)
@@ -319,9 +388,18 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
         kwargs["base_url"] = normalized_base_url
     common_betas = _common_betas_for_base_url(normalized_base_url)
 
-    if _requires_bearer_auth(normalized_base_url):
+    if _is_kimi_coding_endpoint(base_url):
+        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
+        # to be recognized as a valid Coding Agent. Without it, returns 403.
+        # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
+        kwargs["api_key"] = api_key
+        kwargs["default_headers"] = {
+            "User-Agent": "claude-code/0.1.0",
+            **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
+        }
+    elif _requires_bearer_auth(normalized_base_url):
         # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
-        # Authorization: Bearer even for regular API keys. Route those endpoints
+        # Authorization: Bearer *** for regular API keys. Route those endpoints
         # through auth_token so the SDK sends Bearer auth instead of x-api-key.
         # Check this before OAuth token shape detection because MiniMax secrets do
         # not use Anthropic's sk-ant-api prefix and would otherwise be misread as
@@ -1062,6 +1140,31 @@ def convert_messages_to_anthropic(
                     "name": fn.get("name", ""),
                     "input": parsed_args,
                 })
+            # Kimi's /coding endpoint (Anthropic protocol) requires assistant
+            # tool-call messages to carry reasoning_content when thinking is
+            # enabled server-side.  Preserve it as a thinking block so Kimi
+            # can validate the message history.  See hermes-agent#13848.
+            #
+            # Accept empty string "" — _copy_reasoning_content_for_api()
+            # injects "" as a tier-3 fallback for Kimi tool-call messages
+            # that had no reasoning.  Kimi requires the field to exist, even
+            # if empty.
+            #
+            # Prepend (not append): Anthropic protocol requires thinking
+            # blocks before text and tool_use blocks.
+            #
+            # Guard: only add when reasoning_details didn't already contribute
+            # thinking blocks.  On native Anthropic, reasoning_details produces
+            # signed thinking blocks — adding another unsigned one from
+            # reasoning_content would create a duplicate (same text) that gets
+            # downgraded to a spurious text block on the last assistant message.
+            reasoning_content = m.get("reasoning_content")
+            _already_has_thinking = any(
+                isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
+                for b in blocks
+            )
+            if isinstance(reasoning_content, str) and not _already_has_thinking:
+                blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
             # Anthropic rejects empty assistant content
             effective = blocks or content
             if not effective or effective == "":
@@ -1217,6 +1320,7 @@ def convert_messages_to_anthropic(
     #    cache markers can interfere with signature validation.
     _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
     _is_third_party = _is_third_party_anthropic_endpoint(base_url)
+    _is_kimi = _is_kimi_coding_endpoint(base_url)
 
     last_assistant_idx = None
     for i in range(len(result) - 1, -1, -1):
@@ -1228,7 +1332,25 @@ def convert_messages_to_anthropic(
         if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
             continue
 
-        if _is_third_party or idx != last_assistant_idx:
+        if _is_kimi:
+            # Kimi's /coding endpoint enables thinking server-side and
+            # requires unsigned thinking blocks on replayed assistant
+            # tool-call messages.  Strip signed Anthropic blocks (Kimi
+            # can't validate signatures) but preserve the unsigned ones
+            # we synthesised from reasoning_content above.
+            new_content = []
+            for b in m["content"]:
+                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
+                    new_content.append(b)
+                    continue
+                if b.get("signature") or b.get("data"):
+                    # Anthropic-signed block — Kimi can't validate, strip
+                    continue
+                # Unsigned thinking (synthesised from reasoning_content) —
+                # keep it: Kimi needs it for message-history validation.
+                new_content.append(b)
+            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
+        elif _is_third_party or idx != last_assistant_idx:
             # Third-party endpoint: strip ALL thinking blocks from every
             # assistant message — signatures are Anthropic-proprietary.
             # Direct Anthropic: strip from non-latest assistant messages only.
@@ -1326,7 +1448,12 @@ def build_anthropic_kwargs(
 
     model = normalize_model_name(model, preserve_dots=preserve_dots)
     # effective_max_tokens = output cap for this call (≠ total context window)
-    effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
+    # Use the resolver helper so non-positive values (negative ints,
+    # fractional floats, NaN, non-numeric) fail locally with a clear error
+    # rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
+    effective_max_tokens = _resolve_anthropic_messages_max_tokens(
+        max_tokens, model, context_length=context_length
+    )
 
     # Clamp output cap to fit inside the total context window.
     # Only matters for small custom endpoints where context_length < native
@@ -1405,11 +1532,25 @@ def build_anthropic_kwargs(
     # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
     # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
     #
+    # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
+    # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
+    # validates the message history and requires every prior assistant
+    # tool-call message to carry OpenAI-style ``reasoning_content``.  The
+    # Anthropic path never populates that field, and
+    # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
+    # on third-party endpoints — so the request fails with HTTP 400
+    # "thinking is enabled but reasoning_content is missing in assistant
+    # tool call message at index N".  Kimi's reasoning is driven server-side
+    # on the /coding route, so skip Anthropic's thinking parameter entirely
+    # for that host.  (Kimi on chat_completions enables thinking via
+    # extra_body in the ChatCompletionsTransport — see #13503.)
+    #
     # On 4.7+ the `thinking.display` field defaults to "omitted", which
     # silently hides reasoning text that Hermes surfaces in its CLI. We
     # request "summarized" so the reasoning blocks stay populated — matching
     # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    if reasoning_config and isinstance(reasoning_config, dict):
+    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
+    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
         if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
             effort = str(reasoning_config.get("effort", "medium")).lower()
             budget = THINKING_BUDGET.get(effort, 8000)
@@ -1525,42 +1666,3 @@ def normalize_anthropic_response(
         ),
         finish_reason,
     )
-
-
-def normalize_anthropic_response_v2(
-    response,
-    strip_tool_prefix: bool = False,
-) -> "NormalizedResponse":
-    """Normalize Anthropic response to NormalizedResponse.
-
-    Wraps the existing normalize_anthropic_response() and maps its output
-    to the shared transport types.  This allows incremental migration —
-    one call site at a time — without changing the original function.
-    """
-    from agent.transports.types import NormalizedResponse, build_tool_call
-
-    assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
-
-    tool_calls = None
-    if assistant_msg.tool_calls:
-        tool_calls = [
-            build_tool_call(
-                id=tc.id,
-                name=tc.function.name,
-                arguments=tc.function.arguments,
-            )
-            for tc in assistant_msg.tool_calls
-        ]
-
-    provider_data = {}
-    if getattr(assistant_msg, "reasoning_details", None):
-        provider_data["reasoning_details"] = assistant_msg.reasoning_details
-
-    return NormalizedResponse(
-        content=assistant_msg.content,
-        tool_calls=tool_calls,
-        finish_reason=finish_reason,
-        reasoning=getattr(assistant_msg, "reasoning", None),
-        usage=None,  # Anthropic usage is on the raw response, not the normaliser
-        provider_data=provider_data or None,
-    )
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 50d4d86af..4f8c9a0a4 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -48,7 +48,7 @@ from openai import OpenAI
 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
-from utils import base_url_host_matches, base_url_hostname
+from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars
 
 logger = logging.getLogger(__name__)
 
@@ -134,6 +134,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
     "gemini": "gemini-3-flash-preview",
     "zai": "glm-4.5-flash",
     "kimi-coding": "kimi-k2-turbo-preview",
+    "stepfun": "step-3.5-flash",
     "kimi-coding-cn": "kimi-k2-turbo-preview",
     "minimax": "MiniMax-M2.7",
     "minimax-cn": "MiniMax-M2.7",
@@ -182,8 +183,6 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
-_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
-_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -728,6 +727,33 @@ def _nous_base_url() -> str:
     return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
 
 
+def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
+    """Return fresh Nous runtime credentials when available.
+
+    This mirrors the main agent's 401 recovery path and keeps auxiliary
+    clients aligned with the singleton auth store + mint flow instead of
+    relying only on whatever raw tokens happen to be sitting in auth.json
+    or the credential pool.
+    """
+    try:
+        from hermes_cli.auth import resolve_nous_runtime_credentials
+
+        creds = resolve_nous_runtime_credentials(
+            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            force_mint=force_refresh,
+        )
+    except Exception as exc:
+        logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
+        return None
+
+    api_key = str(creds.get("api_key") or "").strip()
+    base_url = str(creds.get("base_url") or "").strip().rstrip("/")
+    if not api_key or not base_url:
+        return None
+    return api_key, base_url
+
+
 def _read_codex_access_token() -> Optional[str]:
     """Read a valid, non-expired Codex OAuth access token from Hermes auth store.
 
@@ -818,7 +844,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                     return GeminiNativeClient(api_key=api_key, base_url=base_url), model
             extra = {}
             if base_url_host_matches(base_url, "api.kimi.com"):
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
             elif base_url_host_matches(base_url, "api.githubcopilot.com"):
                 from hermes_cli.models import copilot_default_headers
 
@@ -844,7 +870,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                 return GeminiNativeClient(api_key=api_key, base_url=base_url), model
         extra = {}
         if base_url_host_matches(base_url, "api.kimi.com"):
-            extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
         elif base_url_host_matches(base_url, "api.githubcopilot.com"):
             from hermes_cli.models import copilot_default_headers
 
@@ -894,29 +920,50 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
         pass
 
     nous = _read_nous_auth()
-    if not nous:
+    runtime = _resolve_nous_runtime_api(force_refresh=False)
+    if runtime is None and not nous:
         return None, None
     global auxiliary_is_nous
     auxiliary_is_nous = True
     logger.debug("Auxiliary client: Nous Portal")
-    if nous.get("source") == "pool":
-        model = "gemini-3-flash"
-    else:
-        model = _NOUS_MODEL
-    # Free-tier users can't use paid auxiliary models — use the free
-    # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
+
+    # Ask the Portal which model it currently recommends for this task type.
+    # The /api/nous/recommended-models endpoint is the authoritative source:
+    # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
+    # auto-detects the caller's tier via check_nous_free_tier().  Fall back to
+    # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
+    # or returns a null recommendation for this task type.
+    model = _NOUS_MODEL
     try:
-        from hermes_cli.models import check_nous_free_tier
-        if check_nous_free_tier():
-            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
-            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
-                         model, "vision" if vision else "text")
-    except Exception:
-        pass
+        from hermes_cli.models import get_nous_recommended_aux_model
+        recommended = get_nous_recommended_aux_model(vision=vision)
+        if recommended:
+            model = recommended
+            logger.debug(
+                "Auxiliary/%s: using Portal-recommended model %s",
+                "vision" if vision else "text", model,
+            )
+        else:
+            logger.debug(
+                "Auxiliary/%s: no Portal recommendation, falling back to %s",
+                "vision" if vision else "text", model,
+            )
+    except Exception as exc:
+        logger.debug(
+            "Auxiliary/%s: recommended-models lookup failed (%s); "
+            "falling back to %s",
+            "vision" if vision else "text", exc, model,
+        )
+
+    if runtime is not None:
+        api_key, base_url = runtime
+    else:
+        api_key = _nous_api_key(nous or {})
+        base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
     return (
         OpenAI(
-            api_key=_nous_api_key(nous),
-            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
+            api_key=api_key,
+            base_url=base_url,
         ),
         model,
     )
@@ -1028,6 +1075,8 @@ def _validate_proxy_env_urls() -> None:
     """
     from urllib.parse import urlparse
 
+    normalize_proxy_env_vars()
+
     for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                 "https_proxy", "http_proxy", "all_proxy"):
         value = str(os.environ.get(key) or "").strip()
@@ -1258,6 +1307,15 @@ def _is_connection_error(exc: Exception) -> bool:
     return False
 
 
+def _is_auth_error(exc: Exception) -> bool:
+    """Detect auth failures that should trigger provider-specific refresh."""
+    status = getattr(exc, "status_code", None)
+    if status == 401:
+        return True
+    err_lower = str(exc).lower()
+    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
+
+
 def _try_payment_fallback(
     failed_provider: str,
     task: str = None,
@@ -1441,7 +1499,7 @@ def _to_async_client(sync_client, model: str):
 
         async_kwargs["default_headers"] = copilot_default_headers()
     elif base_url_host_matches(sync_base_url, "api.kimi.com"):
-        async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
     return AsyncOpenAI(**async_kwargs), model
 
 
@@ -1565,7 +1623,13 @@ def resolve_provider_client(
 
     # ── Nous Portal (OAuth) ──────────────────────────────────────────
     if provider == "nous":
-        client, default = _try_nous()
+        # Detect vision tasks: either explicit model override from
+        # _PROVIDER_VISION_MODELS, or caller passed a known vision model.
+        _is_vision = (
+            model in _PROVIDER_VISION_MODELS.values()
+            or (model or "").strip().lower() == "mimo-v2-omni"
+        )
+        client, default = _try_nous(vision=_is_vision)
         if client is None:
             logger.warning("resolve_provider_client: nous requested "
                            "but Nous Portal not configured (run: hermes auth)")
@@ -1622,7 +1686,7 @@ def resolve_provider_client(
             )
             extra = {}
             if base_url_host_matches(custom_base, "api.kimi.com"):
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
             elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
                 from hermes_cli.models import copilot_default_headers
                 extra["default_headers"] = copilot_default_headers()
@@ -1729,7 +1793,7 @@ def resolve_provider_client(
         # Provider-specific headers
         headers = {}
         if base_url_host_matches(base_url, "api.kimi.com"):
-            headers["User-Agent"] = "KimiCLI/1.30.0"
+            headers["User-Agent"] = "claude-code/0.1.0"
         elif base_url_host_matches(base_url, "api.githubcopilot.com"):
             from hermes_cli.models import copilot_default_headers
 
@@ -1961,24 +2025,35 @@ def resolve_vision_provider_client(
         #      _PROVIDER_VISION_MODELS provides per-provider vision model
         #      overrides when the provider has a dedicated multimodal model
         #      that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
-        #      zai → glm-5v-turbo).
+        #      zai → glm-5v-turbo). Nous is the exception: it has a dedicated
+        #      strict vision backend with tier-aware defaults, so it must not
+        #      fall through to the user's text chat model here.
         #   2. OpenRouter  (vision-capable aggregator fallback)
         #   3. Nous Portal (vision-capable aggregator fallback)
         #   4. Stop
         main_provider = _read_main_provider()
         main_model = _read_main_model()
         if main_provider and main_provider not in ("auto", ""):
-            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
-            rpc_client, rpc_model = resolve_provider_client(
-                main_provider, vision_model,
-                api_mode=resolved_api_mode)
-            if rpc_client is not None:
-                logger.info(
-                    "Vision auto-detect: using main provider %s (%s)",
-                    main_provider, rpc_model or vision_model,
-                )
-                return _finalize(
-                    main_provider, rpc_client, rpc_model or vision_model)
+            if main_provider == "nous":
+                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
+                if sync_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using main provider %s (%s)",
+                        main_provider, default_model or resolved_model or main_model,
+                    )
+                    return _finalize(main_provider, sync_client, default_model)
+            else:
+                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
+                rpc_client, rpc_model = resolve_provider_client(
+                    main_provider, vision_model,
+                    api_mode=resolved_api_mode)
+                if rpc_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using main provider %s (%s)",
+                        main_provider, rpc_model or vision_model,
+                    )
+                    return _finalize(
+                        main_provider, rpc_client, rpc_model or vision_model)
 
         # Fall back through aggregators (uses their dedicated vision model,
         # not the user's main model) when main provider has no client.
@@ -2053,6 +2128,76 @@ _client_cache_lock = threading.Lock()
 _CLIENT_CACHE_MAX_SIZE = 64  # safety belt — evict oldest when exceeded
 
 
+def _client_cache_key(
+    provider: str,
+    *,
+    async_mode: bool,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_mode: Optional[str] = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> tuple:
+    runtime = _normalize_main_runtime(main_runtime)
+    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+
+
+def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
+    with _client_cache_lock:
+        old_entry = _client_cache.get(cache_key)
+        if old_entry is not None and old_entry[0] is not client:
+            _force_close_async_httpx(old_entry[0])
+            try:
+                close_fn = getattr(old_entry[0], "close", None)
+                if callable(close_fn):
+                    close_fn()
+            except Exception:
+                pass
+        _client_cache[cache_key] = (client, default_model, bound_loop)
+
+
+def _refresh_nous_auxiliary_client(
+    *,
+    cache_provider: str,
+    model: Optional[str],
+    async_mode: bool,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_mode: Optional[str] = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> Tuple[Optional[Any], Optional[str]]:
+    """Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
+    runtime = _resolve_nous_runtime_api(force_refresh=True)
+    if runtime is None:
+        return None, model
+
+    fresh_key, fresh_base_url = runtime
+    sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
+    final_model = model
+
+    current_loop = None
+    if async_mode:
+        try:
+            import asyncio as _aio
+            current_loop = _aio.get_event_loop()
+        except RuntimeError:
+            pass
+        client, final_model = _to_async_client(sync_client, final_model or "")
+    else:
+        client = sync_client
+
+    cache_key = _client_cache_key(
+        cache_provider,
+        async_mode=async_mode,
+        base_url=base_url,
+        api_key=api_key,
+        api_mode=api_mode,
+        main_runtime=main_runtime,
+    )
+    _store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
+    return client, final_model
+
+
 def neuter_async_httpx_del() -> None:
     """Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.
 
@@ -2206,8 +2351,14 @@ def _get_cached_client(
         except RuntimeError:
             pass
     runtime = _normalize_main_runtime(main_runtime)
-    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+    cache_key = _client_cache_key(
+        provider,
+        async_mode=async_mode,
+        base_url=base_url,
+        api_key=api_key,
+        api_mode=api_mode,
+        main_runtime=main_runtime,
+    )
     with _client_cache_lock:
         if cache_key in _client_cache:
             cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -2655,6 +2806,29 @@ def call_llm(
                     raise
                 first_err = retry_err
 
+        # ── Nous auth refresh parity with main agent ──────────────────
+        client_is_nous = (
+            resolved_provider == "nous"
+            or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
+        )
+        if _is_auth_error(first_err) and client_is_nous:
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=False,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+                main_runtime=main_runtime,
+            )
+            if refreshed_client is not None:
+                logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
+                            task or "call")
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                return _validate_llm_response(
+                    refreshed_client.chat.completions.create(**kwargs), task)
+
         # ── Payment / credit exhaustion fallback ──────────────────────
         # When the resolved provider returns 402 or a credit-related error,
         # try alternative providers instead of giving up.  This handles the
@@ -2853,6 +3027,28 @@ async def async_call_llm(
                     raise
                 first_err = retry_err
 
+        # ── Nous auth refresh parity with main agent ──────────────────
+        client_is_nous = (
+            resolved_provider == "nous"
+            or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
+        )
+        if _is_auth_error(first_err) and client_is_nous:
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=True,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+            )
+            if refreshed_client is not None:
+                logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
+                            task or "call")
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                return _validate_llm_response(
+                    await refreshed_client.chat.completions.create(**kwargs), task)
+
         # ── Payment / connection fallback (mirrors sync call_llm) ─────
         should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
         is_auto = resolved_provider in ("auto", "", None)
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index f56515dab..f8036851f 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -64,6 +64,47 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
 
 
+def _content_text_for_contains(content: Any) -> str:
+    """Return a best-effort text view of message content.
+
+    Used only for substring checks when we need to know whether we've already
+    appended a note to a message. Keeps multimodal lists intact elsewhere.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            elif isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+        return "\n".join(part for part in parts if part)
+    return str(content)
+
+
+def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
+    """Append or prepend plain text to message content safely.
+
+    Compression sometimes needs to add a note or merge a summary into an
+    existing message. Message content may be plain text or a multimodal list of
+    blocks, so direct string concatenation is not always safe.
+    """
+    if content is None:
+        return text
+    if isinstance(content, str):
+        return text + content if prepend else content + text
+    if isinstance(content, list):
+        text_block = {"type": "text", "text": text}
+        return [text_block, *content] if prepend else [*content, text_block]
+    rendered = str(content)
+    return text + rendered if prepend else rendered + text
+
+
 def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
     """Shrink long string values inside a tool-call arguments JSON blob while
     preserving JSON validity.
@@ -807,7 +848,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                 )
                 self.summary_model = ""  # empty = use main model
                 self._summary_failure_cooldown_until = 0.0  # no cooldown
-                return self._generate_summary(messages, summary_budget)  # retry immediately
+                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately
 
             # Transient errors (timeout, rate limit, network) — shorter cooldown
             _transient_cooldown = 60
@@ -1144,10 +1185,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
         for i in range(compress_start):
             msg = messages[i].copy()
             if i == 0 and msg.get("role") == "system":
-                existing = msg.get("content") or ""
+                existing = msg.get("content")
                 _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                if _compression_note not in existing:
-                    msg["content"] = existing + "\n\n" + _compression_note
+                if _compression_note not in _content_text_for_contains(existing):
+                    msg["content"] = _append_text_to_content(
+                        existing,
+                        "\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
+                    )
             compressed.append(msg)
 
         # If LLM summary failed, insert a static fallback so the model
@@ -1191,12 +1235,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
         for i in range(compress_end, n_messages):
             msg = messages[i].copy()
             if _merge_summary_into_tail and i == compress_end:
-                original = msg.get("content") or ""
-                msg["content"] = (
+                merged_prefix = (
                     summary
                     + "\n\n--- END OF CONTEXT SUMMARY — "
                     "respond to the message below, not the summary above ---\n\n"
-                    + original
+                )
+                msg["content"] = _append_text_to_content(
+                    msg.get("content"),
+                    merged_prefix,
+                    prepend=True,
                 )
                 _merge_summary_into_tail = False
             compressed.append(msg)
diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py
index 031c58d70..783f94956 100644
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -21,6 +21,9 @@ from pathlib import Path
 from types import SimpleNamespace
 from typing import Any
 
+from agent.file_safety import get_read_block_error, is_write_denied
+from agent.redact import redact_sensitive_text
+
 ACP_MARKER_BASE_URL = "acp://copilot"
 _DEFAULT_TIMEOUT_SECONDS = 900.0
 
@@ -54,6 +57,18 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
     }
 
 
+def _permission_denied(message_id: Any) -> dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": message_id,
+        "result": {
+            "outcome": {
+                "outcome": "cancelled",
+            }
+        },
+    }
+
+
 def _format_messages_as_prompt(
     messages: list[dict[str, Any]],
     model: str | None = None,
@@ -386,6 +401,8 @@ class CopilotACPClient:
         stderr_tail: deque[str] = deque(maxlen=40)
 
         def _stdout_reader() -> None:
+            if proc.stdout is None:
+                return
             for line in proc.stdout:
                 try:
                     inbox.put(json.loads(line))
@@ -533,18 +550,13 @@ class CopilotACPClient:
         params = msg.get("params") or {}
 
         if method == "session/request_permission":
-            response = {
-                "jsonrpc": "2.0",
-                "id": message_id,
-                "result": {
-                    "outcome": {
-                        "outcome": "allow_once",
-                    }
-                },
-            }
+            response = _permission_denied(message_id)
         elif method == "fs/read_text_file":
             try:
                 path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                block_error = get_read_block_error(str(path))
+                if block_error:
+                    raise PermissionError(block_error)
                 content = path.read_text() if path.exists() else ""
                 line = params.get("line")
                 limit = params.get("limit")
@@ -553,6 +565,8 @@ class CopilotACPClient:
                     start = line - 1
                     end = start + limit if isinstance(limit, int) and limit > 0 else None
                     content = "".join(lines[start:end])
+                if content:
+                    content = redact_sensitive_text(content)
                 response = {
                     "jsonrpc": "2.0",
                     "id": message_id,
@@ -565,6 +579,10 @@ class CopilotACPClient:
         elif method == "fs/write_text_file":
             try:
                 path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                if is_write_denied(str(path)):
+                    raise PermissionError(
+                        f"Write denied: '{path}' is a protected system/credential file."
+                    )
                 path.parent.mkdir(parents=True, exist_ok=True)
                 path.write_text(str(params.get("content") or ""))
                 response = {
diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index b02514e99..de8d03185 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -983,6 +983,14 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
     active_sources: Set[str] = set()
     auth_store = _load_auth_store()
 
+    # Shared suppression gate — used at every upsert site so
+    # `hermes auth remove <provider> <N>` is stable across all source types.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_suppressed
+    except ImportError:
+        def _is_suppressed(_p, _s):  # type: ignore[misc]
+            return False
+
     if provider == "anthropic":
         # Only auto-discover external credentials (Claude Code, Hermes PKCE)
         # when the user has explicitly configured anthropic as their provider.
@@ -1002,13 +1010,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
             ("claude_code", read_claude_code_credentials()),
         ):
             if creds and creds.get("accessToken"):
-                # Check if user explicitly removed this source
-                try:
-                    from hermes_cli.auth import is_source_suppressed
-                    if is_source_suppressed(provider, source_name):
-                        continue
-                except ImportError:
-                    pass
+                if _is_suppressed(provider, source_name):
+                    continue
                 active_sources.add(source_name)
                 changed |= _upsert_entry(
                     entries,
@@ -1026,7 +1029,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 
     elif provider == "nous":
         state = _load_provider_state(auth_store, "nous")
-        if state:
+        if state and not _is_suppressed(provider, "device_code"):
             active_sources.add("device_code")
             # Prefer a user-supplied label embedded in the singleton state
             # (set by persist_nous_credentials(label=...) when the user ran
@@ -1067,20 +1070,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
             token, source = resolve_copilot_token()
             if token:
                 source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
-                active_sources.add(source_name)
-                pconfig = PROVIDER_REGISTRY.get(provider)
-                changed |= _upsert_entry(
-                    entries,
-                    provider,
-                    source_name,
-                    {
-                        "source": source_name,
-                        "auth_type": AUTH_TYPE_API_KEY,
-                        "access_token": token,
-                        "base_url": pconfig.inference_base_url if pconfig else "",
-                        "label": source,
-                    },
-                )
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    pconfig = PROVIDER_REGISTRY.get(provider)
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_API_KEY,
+                            "access_token": token,
+                            "base_url": pconfig.inference_base_url if pconfig else "",
+                            "label": source,
+                        },
+                    )
         except Exception as exc:
             logger.debug("Copilot token seed failed: %s", exc)
 
@@ -1096,20 +1100,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
             token = creds.get("api_key", "")
             if token:
                 source_name = creds.get("source", "qwen-cli")
-                active_sources.add(source_name)
-                changed |= _upsert_entry(
-                    entries,
-                    provider,
-                    source_name,
-                    {
-                        "source": source_name,
-                        "auth_type": AUTH_TYPE_OAUTH,
-                        "access_token": token,
-                        "expires_at_ms": creds.get("expires_at_ms"),
-                        "base_url": creds.get("base_url", ""),
-                        "label": creds.get("auth_file", source_name),
-                    },
-                )
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_OAUTH,
+                            "access_token": token,
+                            "expires_at_ms": creds.get("expires_at_ms"),
+                            "base_url": creds.get("base_url", ""),
+                            "label": creds.get("auth_file", source_name),
+                        },
+                    )
         except Exception as exc:
             logger.debug("Qwen OAuth token seed failed: %s", exc)
 
@@ -1118,13 +1123,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
         # the device_code source as suppressed so it won't be re-seeded from
         # the Hermes auth store.  Without this gate the removal is instantly
         # undone on the next load_pool() call.
-        codex_suppressed = False
-        try:
-            from hermes_cli.auth import is_source_suppressed
-            codex_suppressed = is_source_suppressed(provider, "device_code")
-        except ImportError:
-            pass
-        if codex_suppressed:
+        if _is_suppressed(provider, "device_code"):
             return changed, active_sources
 
         state = _load_provider_state(auth_store, "openai-codex")
@@ -1158,10 +1157,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
     changed = False
     active_sources: Set[str] = set()
+    # Honour user suppression — `hermes auth remove <provider> <N>` for an
+    # env-seeded credential marks the env:<VAR> source as suppressed so it
+    # won't be re-seeded from the user's shell environment or ~/.hermes/.env.
+    # Without this gate the removal is silently undone on the next
+    # load_pool() call whenever the var is still exported by the shell.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
+    except ImportError:
+        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
+            return False
     if provider == "openrouter":
         token = os.getenv("OPENROUTER_API_KEY", "").strip()
         if token:
             source = "env:OPENROUTER_API_KEY"
+            if _is_source_suppressed(provider, source):
+                return changed, active_sources
             active_sources.add(source)
             changed |= _upsert_entry(
                 entries,
@@ -1198,6 +1209,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
         if not token:
             continue
         source = f"env:{env_var}"
+        if _is_source_suppressed(provider, source):
+            continue
         active_sources.add(source)
         auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
         base_url = env_url or pconfig.inference_base_url
@@ -1242,6 +1255,13 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
     changed = False
     active_sources: Set[str] = set()
 
+    # Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_suppressed
+    except ImportError:
+        def _is_suppressed(_p, _s):  # type: ignore[misc]
+            return False
+
     # Seed from the custom_providers config entry's api_key field
     cp_config = _get_custom_provider_config(pool_key)
     if cp_config:
@@ -1250,19 +1270,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
         name = str(cp_config.get("name") or "").strip()
         if api_key:
             source = f"config:{name}"
-            active_sources.add(source)
-            changed |= _upsert_entry(
-                entries,
-                pool_key,
-                source,
-                {
-                    "source": source,
-                    "auth_type": AUTH_TYPE_API_KEY,
-                    "access_token": api_key,
-                    "base_url": base_url,
-                    "label": name or source,
-                },
-            )
+            if not _is_suppressed(pool_key, source):
+                active_sources.add(source)
+                changed |= _upsert_entry(
+                    entries,
+                    pool_key,
+                    source,
+                    {
+                        "source": source,
+                        "auth_type": AUTH_TYPE_API_KEY,
+                        "access_token": api_key,
+                        "base_url": base_url,
+                        "label": name or source,
+                    },
+                )
 
     # Seed from model.api_key if model.provider=='custom' and model.base_url matches
     try:
@@ -1282,19 +1303,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
                 matched_key = get_custom_provider_pool_key(model_base_url)
                 if matched_key == pool_key:
                     source = "model_config"
-                    active_sources.add(source)
-                    changed |= _upsert_entry(
-                        entries,
-                        pool_key,
-                        source,
-                        {
-                            "source": source,
-                            "auth_type": AUTH_TYPE_API_KEY,
-                            "access_token": model_api_key,
-                            "base_url": model_base_url,
-                            "label": "model_config",
-                        },
-                    )
+                    if not _is_suppressed(pool_key, source):
+                        active_sources.add(source)
+                        changed |= _upsert_entry(
+                            entries,
+                            pool_key,
+                            source,
+                            {
+                                "source": source,
+                                "auth_type": AUTH_TYPE_API_KEY,
+                                "access_token": model_api_key,
+                                "base_url": model_base_url,
+                                "label": "model_config",
+                            },
+                        )
     except Exception:
         pass
 
diff --git a/agent/credential_sources.py b/agent/credential_sources.py
new file mode 100644
index 000000000..8ad2fade0
--- /dev/null
+++ b/agent/credential_sources.py
@@ -0,0 +1,401 @@
+"""Unified removal contract for every credential source Hermes reads from.
+
+Hermes seeds its credential pool from many places:
+
+    env:<VAR>     — os.environ / ~/.hermes/.env
+    claude_code   — ~/.claude/.credentials.json
+    hermes_pkce   — ~/.hermes/.anthropic_oauth.json
+    device_code   — auth.json providers.<provider> (nous, openai-codex, ...)
+    qwen-cli      — ~/.qwen/oauth_creds.json
+    gh_cli        — gh auth token
+    config:<name> — custom_providers config entry
+    model_config  — model.api_key when model.provider == "custom"
+    manual        — user ran `hermes auth add`
+
+Each source has its own reader inside ``agent.credential_pool._seed_from_*``
+(which keep their existing shape — we haven't restructured them).  What we
+unify here is **removal**:
+
+    ``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
+
+Before this module, every source had an ad-hoc removal branch in
+``auth_remove_command``, and several sources had no branch at all — so
+``auth remove`` silently reverted on the next ``load_pool()`` call for
+qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
+custom-config sources.
+
+Now every source registers a ``RemovalStep`` that does exactly three things
+in the same shape:
+
+    1. Clean up whatever externally-readable state the source reads from
+       (.env line, auth.json block, OAuth file, etc.)
+    2. Suppress the ``(provider, source_id)`` in auth.json so the
+       corresponding ``_seed_from_*`` branch skips the upsert on re-load
+    3. Return ``RemovalResult`` describing what was cleaned and any
+       diagnostic hints the user should see (shell-exported env vars,
+       external credential files we deliberately don't delete, etc.)
+
+Adding a new credential source is:
+    - wire up a reader branch in ``_seed_from_*`` (existing pattern)
+    - gate that reader behind ``is_source_suppressed(provider, source_id)``
+    - register a ``RemovalStep`` here
+
+No more per-source if/elif chain in ``auth_remove_command``.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Callable, List, Optional
+
+
+@dataclass
+class RemovalResult:
+    """Outcome of removing a credential source.
+
+    Attributes:
+        cleaned: Short strings describing external state that was actually
+            mutated (``"Cleared XAI_API_KEY from .env"``,
+            ``"Cleared openai-codex OAuth tokens from auth store"``).
+            Printed as plain lines to the user.
+        hints: Diagnostic lines ABOUT state the user may need to clean up
+            themselves or is deliberately left intact (shell-exported env
+            var, Claude Code credential file we don't delete, etc.).
+            Printed as plain lines to the user.  Always non-destructive.
+        suppress: Whether to call ``suppress_credential_source`` after
+            cleanup so future ``load_pool`` calls skip this source.
+            Default True — almost every source needs this to stay sticky.
+            The only legitimate False is ``manual`` entries, which aren't
+            seeded from anywhere external.
+    """
+
+    cleaned: List[str] = field(default_factory=list)
+    hints: List[str] = field(default_factory=list)
+    suppress: bool = True
+
+
+@dataclass
+class RemovalStep:
+    """How to remove one specific credential source cleanly.
+
+    Attributes:
+        provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
+            Special value ``"*"`` means "matches any provider" — used for
+            sources like ``manual`` that aren't provider-specific.
+        source_id: Source identifier as it appears in
+            ``PooledCredential.source``.  May be a literal (``"claude_code"``)
+            or a prefix pattern matched via ``match_fn``.
+        match_fn: Optional predicate overriding literal ``source_id``
+            matching.  Gets the removed entry's source string.  Used for
+            ``env:*`` (any env-seeded key), ``config:*`` (any custom
+            pool), and ``manual:*`` (any manual-source variant).
+        remove_fn: ``(provider, removed_entry) -> RemovalResult``.  Does the
+            actual cleanup and returns what happened for the user.
+        description: One-line human-readable description for docs / tests.
+    """
+
+    provider: str
+    source_id: str
+    remove_fn: Callable[..., RemovalResult]
+    match_fn: Optional[Callable[[str], bool]] = None
+    description: str = ""
+
+    def matches(self, provider: str, source: str) -> bool:
+        if self.provider != "*" and self.provider != provider:
+            return False
+        if self.match_fn is not None:
+            return self.match_fn(source)
+        return source == self.source_id
+
+
+_REGISTRY: List[RemovalStep] = []
+
+
+def register(step: RemovalStep) -> RemovalStep:
+    _REGISTRY.append(step)
+    return step
+
+
+def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
+    """Return the first matching RemovalStep, or None if unregistered.
+
+    Unregistered sources fall through to the default remove path in
+    ``auth_remove_command``: the pool entry is already gone (that happens
+    before dispatch), no external cleanup, no suppression.  This is the
+    correct behaviour for ``manual`` entries — they were only ever stored
+    in the pool, nothing external to clean up.
+    """
+    for step in _REGISTRY:
+        if step.matches(provider, source):
+            return step
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Individual RemovalStep implementations — one per source.
+# ---------------------------------------------------------------------------
+# Each remove_fn is intentionally small and single-purpose.  Adding a new
+# credential source means adding ONE entry here — no other changes to
+# auth_remove_command.
+
+
+def _remove_env_source(provider: str, removed) -> RemovalResult:
+    """env:<VAR> — the most common case.
+
+    Handles three user situations:
+      1. Var lives only in ~/.hermes/.env  → clear it
+      2. Var lives only in the user's shell (shell profile, systemd
+         EnvironmentFile, launchd plist) → hint them where to unset it
+      3. Var lives in both → clear from .env, hint about shell
+    """
+    from hermes_cli.config import get_env_path, remove_env_value
+
+    result = RemovalResult()
+    env_var = removed.source[len("env:"):]
+    if not env_var:
+        return result
+
+    # Detect shell vs .env BEFORE remove_env_value pops os.environ.
+    env_in_process = bool(os.getenv(env_var))
+    env_in_dotenv = False
+    try:
+        env_path = get_env_path()
+        if env_path.exists():
+            env_in_dotenv = any(
+                line.strip().startswith(f"{env_var}=")
+                for line in env_path.read_text(errors="replace").splitlines()
+            )
+    except OSError:
+        pass
+    shell_exported = env_in_process and not env_in_dotenv
+
+    cleared = remove_env_value(env_var)
+    if cleared:
+        result.cleaned.append(f"Cleared {env_var} from .env")
+
+    if shell_exported:
+        result.hints.extend([
+            f"Note: {env_var} is still set in your shell environment "
+            f"(not in ~/.hermes/.env).",
+            "  Unset it there (shell profile, systemd EnvironmentFile, "
+            "launchd plist, etc.) or it will keep being visible to Hermes.",
+            f"  The pool entry is now suppressed — Hermes will ignore "
+            f"{env_var} until you run `hermes auth add {provider}`.",
+        ])
+    else:
+        result.hints.append(
+            f"Suppressed env:{env_var} — it will not be re-seeded even "
+            f"if the variable is re-exported later."
+        )
+    return result
+
+
+def _remove_claude_code(provider: str, removed) -> RemovalResult:
+    """~/.claude/.credentials.json is owned by Claude Code itself.
+
+    We don't delete it — the user's Claude Code install still needs to
+    work.  We just suppress it so Hermes stops reading it.
+    """
+    return RemovalResult(hints=[
+        "Suppressed claude_code credential — it will not be re-seeded.",
+        "Note: Claude Code credentials still live in ~/.claude/.credentials.json",
+        "Run `hermes auth add anthropic` to re-enable if needed.",
+    ])
+
+
+def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
+    """~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
+    from hermes_constants import get_hermes_home
+
+    result = RemovalResult()
+    oauth_file = get_hermes_home() / ".anthropic_oauth.json"
+    if oauth_file.exists():
+        try:
+            oauth_file.unlink()
+            result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
+        except OSError as exc:
+            result.hints.append(f"Could not delete {oauth_file}: {exc}")
+    return result
+
+
+def _clear_auth_store_provider(provider: str) -> bool:
+    """Delete auth_store.providers[provider].  Returns True if deleted."""
+    from hermes_cli.auth import (
+        _auth_store_lock,
+        _load_auth_store,
+        _save_auth_store,
+    )
+
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        providers_dict = auth_store.get("providers")
+        if isinstance(providers_dict, dict) and provider in providers_dict:
+            del providers_dict[provider]
+            _save_auth_store(auth_store)
+            return True
+    return False
+
+
+def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
+    """Nous OAuth lives in auth.json providers.nous — clear it and suppress.
+
+    We suppress in addition to clearing because nothing else stops the
+    user's next `hermes login` run from writing providers.nous again
+    before they decide to.  Suppression forces them to go through
+    `hermes auth add nous` to re-engage, which is the documented re-add
+    path and clears the suppression atomically.
+    """
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    return result
+
+
+def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
+    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
+
+    refresh_codex_oauth_pure() writes both every time, so clearing only
+    the Hermes auth store is not enough — _seed_from_singletons() would
+    re-import from ~/.codex/auth.json on the next load_pool() call and
+    the removal would be instantly undone.  We suppress instead of
+    deleting Codex CLI's file, so the Codex CLI itself keeps working.
+
+    The canonical source name in ``_seed_from_singletons`` is
+    ``"device_code"`` (no prefix).  Entries may show up in the pool as
+    either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
+    via ``hermes auth add openai-codex``), but in both cases the re-seed
+    gate lives at the ``"device_code"`` suppression key.  We suppress
+    that canonical key here; the central dispatcher also suppresses
+    ``removed.source`` which is fine — belt-and-suspenders, idempotent.
+    """
+    from hermes_cli.auth import suppress_credential_source
+
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    # Suppress the canonical re-seed source, not just whatever source the
+    # removed entry had.  Otherwise `manual:device_code` removals wouldn't
+    # block the `device_code` re-seed path.
+    suppress_credential_source(provider, "device_code")
+    result.hints.extend([
+        "Suppressed openai-codex device_code source — it will not be re-seeded.",
+        "Note: Codex CLI credentials still live in ~/.codex/auth.json",
+        "Run `hermes auth add openai-codex` to re-enable if needed.",
+    ])
+    return result
+
+
+def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
+    """~/.qwen/oauth_creds.json is owned by the Qwen CLI.
+
+    Same pattern as claude_code — suppress, don't delete.  The user's
+    Qwen CLI install still reads from that file.
+    """
+    return RemovalResult(hints=[
+        "Suppressed qwen-cli credential — it will not be re-seeded.",
+        "Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
+        "Run `hermes auth add qwen-oauth` to re-enable if needed.",
+    ])
+
+
+def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
+    """Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
+
+    Copilot is special: the same token can be seeded as multiple source
+    entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
+    ``_seed_from_env``), so removing one entry without suppressing the
+    others lets the duplicates resurrect.  We suppress ALL known copilot
+    sources here so removal is stable regardless of which entry the
+    user clicked.
+
+    We don't touch the user's gh CLI or shell state — just suppress so
+    Hermes stops picking the token up.
+    """
+    # Suppress ALL copilot source variants up-front so no path resurrects
+    # the pool entry.  The central dispatcher in auth_remove_command will
+    # ALSO suppress removed.source, but it's idempotent so double-calling
+    # is harmless.
+    from hermes_cli.auth import suppress_credential_source
+    suppress_credential_source(provider, "gh_cli")
+    for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
+        suppress_credential_source(provider, f"env:{env_var}")
+
+    return RemovalResult(hints=[
+        "Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
+        "Note: Your gh CLI / shell environment is unchanged.",
+        "Run `hermes auth add copilot` to re-enable if needed.",
+    ])
+
+
+def _remove_custom_config(provider: str, removed) -> RemovalResult:
+    """Custom provider pools are seeded from custom_providers config or
+    model.api_key.  Both are in config.yaml — modifying that from here
+    is more invasive than suppression.  We suppress; the user can edit
+    config.yaml if they want to remove the key from disk entirely.
+    """
+    source_label = removed.source
+    return RemovalResult(hints=[
+        f"Suppressed {source_label} — it will not be re-seeded.",
+        "Note: The underlying value in config.yaml is unchanged.  Edit it "
+        "directly if you want to remove the credential from disk.",
+    ])
+
+
+def _register_all_sources() -> None:
+    """Called once on module import.
+
+    ORDER MATTERS — ``find_removal_step`` returns the first match.  Put
+    provider-specific steps before the generic ``env:*`` step so that e.g.
+    copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
+    doesn't touch the user's shell), not the generic env-var removal
+    (which would try to clear .env).
+    """
+    register(RemovalStep(
+        provider="copilot", source_id="gh_cli",
+        match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
+        remove_fn=_remove_copilot_gh,
+        description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
+    ))
+    register(RemovalStep(
+        provider="*", source_id="env:",
+        match_fn=lambda src: src.startswith("env:"),
+        remove_fn=_remove_env_source,
+        description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
+    ))
+    register(RemovalStep(
+        provider="anthropic", source_id="claude_code",
+        remove_fn=_remove_claude_code,
+        description="~/.claude/.credentials.json",
+    ))
+    register(RemovalStep(
+        provider="anthropic", source_id="hermes_pkce",
+        remove_fn=_remove_hermes_pkce,
+        description="~/.hermes/.anthropic_oauth.json",
+    ))
+    register(RemovalStep(
+        provider="nous", source_id="device_code",
+        remove_fn=_remove_nous_device_code,
+        description="auth.json providers.nous",
+    ))
+    register(RemovalStep(
+        provider="openai-codex", source_id="device_code",
+        match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
+        remove_fn=_remove_codex_device_code,
+        description="auth.json providers.openai-codex + ~/.codex/auth.json",
+    ))
+    register(RemovalStep(
+        provider="qwen-oauth", source_id="qwen-cli",
+        remove_fn=_remove_qwen_cli,
+        description="~/.qwen/oauth_creds.json",
+    ))
+    register(RemovalStep(
+        provider="*", source_id="config:",
+        match_fn=lambda src: src.startswith("config:") or src == "model_config",
+        remove_fn=_remove_custom_config,
+        description="Custom provider config.yaml api_key field",
+    ))
+
+
+_register_all_sources()
diff --git a/agent/error_classifier.py b/agent/error_classifier.py
index fcdb8ba67..04875b6a5 100644
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -220,12 +220,25 @@ _TRANSPORT_ERROR_TYPES = frozenset({
     "ConnectionAbortedError", "BrokenPipeError",
     "TimeoutError", "ReadError",
     "ServerDisconnectedError",
+    # SSL/TLS transport errors — transient mid-stream handshake/record
+    # failures that should retry rather than surface as a stalled session.
+    # ssl.SSLError subclasses OSError (caught by isinstance) but we list
+    # the type names here so provider-wrapped SSL errors (e.g. when the
+    # SDK re-raises without preserving the exception chain) still classify
+    # as transport rather than falling through to the unknown bucket.
+    "SSLError", "SSLZeroReturnError", "SSLWantReadError",
+    "SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
     # OpenAI SDK errors (not subclasses of Python builtins)
     "APIConnectionError",
     "APITimeoutError",
 })
 
-# Server disconnect patterns (no status code, but transport-level)
+# Server disconnect patterns (no status code, but transport-level).
+# These are the "ambiguous" patterns — a plain connection close could be
+# transient transport hiccup OR server-side context overflow rejection
+# (common when the API gateway disconnects instead of returning an HTTP
+# error for oversized requests).  A large session + one of these patterns
+# triggers the context-overflow-with-compression recovery path.
 _SERVER_DISCONNECT_PATTERNS = [
     "server disconnected",
     "peer closed connection",
@@ -236,6 +249,40 @@ _SERVER_DISCONNECT_PATTERNS = [
     "incomplete chunked read",
 ]
 
+# SSL/TLS transient failure patterns — intentionally distinct from
+# _SERVER_DISCONNECT_PATTERNS above.
+#
+# An SSL alert mid-stream is almost always a transport-layer hiccup
+# (flaky network, mid-session TLS renegotiation failure, load balancer
+# dropping the connection) — NOT a server-side context overflow signal.
+# So we want the retry path but NOT the compression path; lumping these
+# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
+# expensive) context compression on any large-session SSL hiccup.
+#
+# The OpenSSL library constructs error codes by prepending a format string
+# to the uppercased alert reason; OpenSSL 3.x changed the separator
+# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
+# which silently stopped matching anything explicit.  Matching on the
+# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
+# survives future OpenSSL format churn without code changes.
+_SSL_TRANSIENT_PATTERNS = [
+    # Space-separated (human-readable form, Python ssl module, most SDKs)
+    "bad record mac",
+    "ssl alert",
+    "tls alert",
+    "ssl handshake failure",
+    "tlsv1 alert",
+    "sslv3 alert",
+    # Underscore-separated (OpenSSL error code tokens, e.g.
+    # `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
+    "bad_record_mac",
+    "ssl_alert",
+    "tls_alert",
+    "tls_alert_internal_error",
+    # Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
+    "[ssl:",
+]
+
 
 # ── Classification pipeline ─────────────────────────────────────────────
 
@@ -255,9 +302,10 @@ def classify_api_error(
       2. HTTP status code + message-aware refinement
       3. Error code classification (from body)
       4. Message pattern matching (billing vs rate_limit vs context vs auth)
-      5. Transport error heuristics
+      5. SSL/TLS transient alert patterns → retry as timeout
       6. Server disconnect + large session → context overflow
-      7. Fallback: unknown (retryable with backoff)
+      7. Transport error heuristics
+      8. Fallback: unknown (retryable with backoff)
 
     Args:
         error: The exception from the API call.
@@ -388,7 +436,18 @@ def classify_api_error(
     if classified is not None:
         return classified
 
-    # ── 5. Server disconnect + large session → context overflow ─────
+    # ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
+    # SSL alerts mid-stream are transport hiccups, not server-side context
+    # overflow signals.  Classify before the disconnect check so a large
+    # session doesn't incorrectly trigger context compression when the real
+    # cause is a flaky TLS handshake.  Also matches when the error is
+    # wrapped in a generic exception whose message string carries the SSL
+    # alert text but the type isn't ssl.SSLError (happens with some SDKs
+    # that re-raise without chaining).
+    if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
+        return _result(FailoverReason.timeout, retryable=True)
+
+    # ── 6. Server disconnect + large session → context overflow ─────
     # Must come BEFORE generic transport error catch — a disconnect on
     # a large session is more likely context overflow than a transient
     # transport hiccup.  Without this ordering, RemoteProtocolError
@@ -405,12 +464,12 @@ def classify_api_error(
             )
         return _result(FailoverReason.timeout, retryable=True)
 
-    # ── 6. Transport / timeout heuristics ───────────────────────────
+    # ── 7. Transport / timeout heuristics ───────────────────────────
 
     if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
         return _result(FailoverReason.timeout, retryable=True)
 
-    # ── 7. Fallback: unknown ────────────────────────────────────────
+    # ── 8. Fallback: unknown ────────────────────────────────────────
 
     return _result(FailoverReason.unknown, retryable=True)
 
@@ -470,11 +529,16 @@ def _classify_by_status(
                 retryable=False,
                 should_fallback=True,
             )
-        # Generic 404 — could be model or endpoint
+        # Generic 404 with no "model not found" signal — could be a wrong
+        # endpoint path (common with local llama.cpp / Ollama / vLLM when
+        # the URL is slightly misconfigured), a proxy routing glitch, or
+        # a transient backend issue.  Classifying these as model_not_found
+        # silently falls back to a different provider and tells the model
+        # the model is missing, which is wrong and wastes a turn.  Treat
+        # as unknown so the retry loop surfaces the real error instead.
         return result_fn(
-            FailoverReason.model_not_found,
-            retryable=False,
-            should_fallback=True,
+            FailoverReason.unknown,
+            retryable=True,
         )
 
     if status_code == 413:
diff --git a/agent/file_safety.py b/agent/file_safety.py
new file mode 100644
index 000000000..09da46caf
--- /dev/null
+++ b/agent/file_safety.py
@@ -0,0 +1,111 @@
+"""Shared file safety rules used by both tools and ACP shims."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Optional
+
+
+def _hermes_home_path() -> Path:
+    """Resolve the active HERMES_HOME (profile-aware) without circular imports."""
+    try:
+        from hermes_constants import get_hermes_home  # local import to avoid cycles
+        return get_hermes_home()
+    except Exception:
+        return Path(os.path.expanduser("~/.hermes"))
+
+
+def build_write_denied_paths(home: str) -> set[str]:
+    """Return exact sensitive paths that must never be written."""
+    hermes_home = _hermes_home_path()
+    return {
+        os.path.realpath(p)
+        for p in [
+            os.path.join(home, ".ssh", "authorized_keys"),
+            os.path.join(home, ".ssh", "id_rsa"),
+            os.path.join(home, ".ssh", "id_ed25519"),
+            os.path.join(home, ".ssh", "config"),
+            str(hermes_home / ".env"),
+            os.path.join(home, ".bashrc"),
+            os.path.join(home, ".zshrc"),
+            os.path.join(home, ".profile"),
+            os.path.join(home, ".bash_profile"),
+            os.path.join(home, ".zprofile"),
+            os.path.join(home, ".netrc"),
+            os.path.join(home, ".pgpass"),
+            os.path.join(home, ".npmrc"),
+            os.path.join(home, ".pypirc"),
+            "/etc/sudoers",
+            "/etc/passwd",
+            "/etc/shadow",
+        ]
+    }
+
+
+def build_write_denied_prefixes(home: str) -> list[str]:
+    """Return sensitive directory prefixes that must never be written."""
+    return [
+        os.path.realpath(p) + os.sep
+        for p in [
+            os.path.join(home, ".ssh"),
+            os.path.join(home, ".aws"),
+            os.path.join(home, ".gnupg"),
+            os.path.join(home, ".kube"),
+            "/etc/sudoers.d",
+            "/etc/systemd",
+            os.path.join(home, ".docker"),
+            os.path.join(home, ".azure"),
+            os.path.join(home, ".config", "gh"),
+        ]
+    ]
+
+
+def get_safe_write_root() -> Optional[str]:
+    """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
+    root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
+    if not root:
+        return None
+    try:
+        return os.path.realpath(os.path.expanduser(root))
+    except Exception:
+        return None
+
+
+def is_write_denied(path: str) -> bool:
+    """Return True if path is blocked by the write denylist or safe root."""
+    home = os.path.realpath(os.path.expanduser("~"))
+    resolved = os.path.realpath(os.path.expanduser(str(path)))
+
+    if resolved in build_write_denied_paths(home):
+        return True
+    for prefix in build_write_denied_prefixes(home):
+        if resolved.startswith(prefix):
+            return True
+
+    safe_root = get_safe_write_root()
+    if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
+        return True
+
+    return False
+
+
+def get_read_block_error(path: str) -> Optional[str]:
+    """Return an error message when a read targets internal Hermes cache files."""
+    resolved = Path(path).expanduser().resolve()
+    hermes_home = _hermes_home_path().resolve()
+    blocked_dirs = [
+        hermes_home / "skills" / ".hub" / "index-cache",
+        hermes_home / "skills" / ".hub",
+    ]
+    for blocked in blocked_dirs:
+        try:
+            resolved.relative_to(blocked)
+        except ValueError:
+            continue
+        return (
+            f"Access denied: {path} is an internal Hermes cache file "
+            "and cannot be read directly to prevent prompt injection. "
+            "Use the skills_list or skill_view tools instead."
+        )
+    return None
diff --git a/agent/gemini_cloudcode_adapter.py b/agent/gemini_cloudcode_adapter.py
index b5a8fb927..24866c3a5 100644
--- a/agent/gemini_cloudcode_adapter.py
+++ b/agent/gemini_cloudcode_adapter.py
@@ -799,7 +799,8 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
         err_obj = {}
     err_status = str(err_obj.get("status") or "").strip()
     err_message = str(err_obj.get("message") or "").strip()
-    err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
+    _raw_details = err_obj.get("details")
+    err_details_list = _raw_details if isinstance(_raw_details, list) else []
 
     # Extract google.rpc.ErrorInfo reason + metadata.  There may be more
     # than one ErrorInfo (rare), so we pick the first one with a reason.
diff --git a/agent/gemini_native_adapter.py b/agent/gemini_native_adapter.py
index 8418cec98..406e4a19b 100644
--- a/agent/gemini_native_adapter.py
+++ b/agent/gemini_native_adapter.py
@@ -613,7 +613,8 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
         err_obj = {}
     err_status = str(err_obj.get("status") or "").strip()
     err_message = str(err_obj.get("message") or "").strip()
-    details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
+    _raw_details = err_obj.get("details")
+    details_list = _raw_details if isinstance(_raw_details, list) else []
 
     reason = ""
     retry_after: Optional[float] = None
diff --git a/agent/image_gen_provider.py b/agent/image_gen_provider.py
new file mode 100644
index 000000000..47f65c1b3
--- /dev/null
+++ b/agent/image_gen_provider.py
@@ -0,0 +1,242 @@
+"""
+Image Generation Provider ABC
+=============================
+
+Defines the pluggable-backend interface for image generation. Providers register
+instances via ``PluginContext.register_image_gen_provider()``; the active one
+(selected via ``image_gen.provider`` in ``config.yaml``) services every
+``image_generate`` tool call.
+
+Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
+as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
+via ``plugins.enabled``).
+
+Response shape
+--------------
+All providers return a dict that :func:`success_response` / :func:`error_response`
+produce. The tool wrapper JSON-serializes it. Keys:
+
+    success        bool
+    image          str | None       URL or absolute file path
+    model          str              provider-specific model identifier
+    prompt         str              echoed prompt
+    aspect_ratio   str              "landscape" | "square" | "portrait"
+    provider       str              provider name (for diagnostics)
+    error          str              only when success=False
+    error_type     str              only when success=False
+"""
+
+from __future__ import annotations
+
+import abc
+import base64
+import datetime
+import logging
+import uuid
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
+DEFAULT_ASPECT_RATIO = "landscape"
+
+
+# ---------------------------------------------------------------------------
+# ABC
+# ---------------------------------------------------------------------------
+
+
+class ImageGenProvider(abc.ABC):
+    """Abstract base class for an image generation backend.
+
+    Subclasses must implement :meth:`generate`. Everything else has sane
+    defaults — override only what your provider needs.
+    """
+
+    @property
+    @abc.abstractmethod
+    def name(self) -> str:
+        """Stable short identifier used in ``image_gen.provider`` config.
+
+        Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
+        """
+
+    @property
+    def display_name(self) -> str:
+        """Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
+        return self.name.title()
+
+    def is_available(self) -> bool:
+        """Return True when this provider can service calls.
+
+        Typically checks for a required API key. Default: True
+        (providers with no external dependencies are always available).
+        """
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        """Return catalog entries for ``hermes tools`` model picker.
+
+        Each entry::
+
+            {
+                "id": "gpt-image-1.5",               # required
+                "display": "GPT Image 1.5",          # optional; defaults to id
+                "speed": "~10s",                     # optional
+                "strengths": "...",                  # optional
+                "price": "$...",                     # optional
+            }
+
+        Default: empty list (provider has no user-selectable models).
+        """
+        return []
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        """Return provider metadata for the ``hermes tools`` picker.
+
+        Used by ``tools_config.py`` to inject this provider as a row in
+        the Image Generation provider list. Shape::
+
+            {
+                "name": "OpenAI",                     # picker label
+                "badge": "paid",                      # optional short tag
+                "tag": "One-line description...",     # optional subtitle
+                "env_vars": [                         # keys to prompt for
+                    {"key": "OPENAI_API_KEY",
+                     "prompt": "OpenAI API key",
+                     "url": "https://platform.openai.com/api-keys"},
+                ],
+            }
+
+        Default: minimal entry derived from ``display_name``. Override to
+        expose API key prompts and custom badges.
+        """
+        return {
+            "name": self.display_name,
+            "badge": "",
+            "tag": "",
+            "env_vars": [],
+        }
+
+    def default_model(self) -> Optional[str]:
+        """Return the default model id, or None if not applicable."""
+        models = self.list_models()
+        if models:
+            return models[0].get("id")
+        return None
+
+    @abc.abstractmethod
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Generate an image.
+
+        Implementations should return the dict from :func:`success_response`
+        or :func:`error_response`. ``kwargs`` may contain forward-compat
+        parameters future versions of the schema will expose — implementations
+        should ignore unknown keys.
+        """
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def resolve_aspect_ratio(value: Optional[str]) -> str:
+    """Clamp an aspect_ratio value to the valid set, defaulting to landscape.
+
+    Invalid values are coerced rather than rejected so the tool surface is
+    forgiving of agent mistakes.
+    """
+    if not isinstance(value, str):
+        return DEFAULT_ASPECT_RATIO
+    v = value.strip().lower()
+    if v in VALID_ASPECT_RATIOS:
+        return v
+    return DEFAULT_ASPECT_RATIO
+
+
+def _images_cache_dir() -> Path:
+    """Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
+    from hermes_constants import get_hermes_home
+
+    path = get_hermes_home() / "cache" / "images"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def save_b64_image(
+    b64_data: str,
+    *,
+    prefix: str = "image",
+    extension: str = "png",
+) -> Path:
+    """Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
+
+    Returns the absolute :class:`Path` to the saved file.
+
+    Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
+    """
+    raw = base64.b64decode(b64_data)
+    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    short = uuid.uuid4().hex[:8]
+    path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
+    path.write_bytes(raw)
+    return path
+
+
+def success_response(
+    *,
+    image: str,
+    model: str,
+    prompt: str,
+    aspect_ratio: str,
+    provider: str,
+    extra: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """Build a uniform success response dict.
+
+    ``image`` may be an HTTP URL or an absolute filesystem path (for b64
+    providers like OpenAI). Callers that need to pass through additional
+    backend-specific fields can supply ``extra``.
+    """
+    payload: Dict[str, Any] = {
+        "success": True,
+        "image": image,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
+    if extra:
+        for k, v in extra.items():
+            payload.setdefault(k, v)
+    return payload
+
+
+def error_response(
+    *,
+    error: str,
+    error_type: str = "provider_error",
+    provider: str = "",
+    model: str = "",
+    prompt: str = "",
+    aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+) -> Dict[str, Any]:
+    """Build a uniform error response dict."""
+    return {
+        "success": False,
+        "image": None,
+        "error": error,
+        "error_type": error_type,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
diff --git a/agent/image_gen_registry.py b/agent/image_gen_registry.py
new file mode 100644
index 000000000..715133231
--- /dev/null
+++ b/agent/image_gen_registry.py
@@ -0,0 +1,120 @@
+"""
+Image Generation Provider Registry
+==================================
+
+Central map of registered providers. Populated by plugins at import-time via
+``PluginContext.register_image_gen_provider()``; consumed by the
+``image_generate`` tool to dispatch each call to the active backend.
+
+Active selection
+----------------
+The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
+If unset, :func:`get_active_provider` applies fallback logic:
+
+1. If exactly one provider is registered, use it.
+2. Otherwise if a provider named ``fal`` is registered, use it (legacy
+   default — matches pre-plugin behavior).
+3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
+   the user at ``hermes tools``).
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from typing import Dict, List, Optional
+
+from agent.image_gen_provider import ImageGenProvider
+
+logger = logging.getLogger(__name__)
+
+
+_providers: Dict[str, ImageGenProvider] = {}
+_lock = threading.Lock()
+
+
+def register_provider(provider: ImageGenProvider) -> None:
+    """Register an image generation provider.
+
+    Re-registration (same ``name``) overwrites the previous entry and logs
+    a debug message — this makes hot-reload scenarios (tests, dev loops)
+    behave predictably.
+    """
+    if not isinstance(provider, ImageGenProvider):
+        raise TypeError(
+            f"register_provider() expects an ImageGenProvider instance, "
+            f"got {type(provider).__name__}"
+        )
+    name = provider.name
+    if not isinstance(name, str) or not name.strip():
+        raise ValueError("Image gen provider .name must be a non-empty string")
+    with _lock:
+        existing = _providers.get(name)
+        _providers[name] = provider
+    if existing is not None:
+        logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
+    else:
+        logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
+
+
+def list_providers() -> List[ImageGenProvider]:
+    """Return all registered providers, sorted by name."""
+    with _lock:
+        items = list(_providers.values())
+    return sorted(items, key=lambda p: p.name)
+
+
+def get_provider(name: str) -> Optional[ImageGenProvider]:
+    """Return the provider registered under *name*, or None."""
+    if not isinstance(name, str):
+        return None
+    with _lock:
+        return _providers.get(name.strip())
+
+
+def get_active_provider() -> Optional[ImageGenProvider]:
+    """Resolve the currently-active provider.
+
+    Reads ``image_gen.provider`` from config.yaml; falls back per the
+    module docstring.
+    """
+    configured: Optional[str] = None
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        if isinstance(section, dict):
+            raw = section.get("provider")
+            if isinstance(raw, str) and raw.strip():
+                configured = raw.strip()
+    except Exception as exc:
+        logger.debug("Could not read image_gen.provider from config: %s", exc)
+
+    with _lock:
+        snapshot = dict(_providers)
+
+    if configured:
+        provider = snapshot.get(configured)
+        if provider is not None:
+            return provider
+        logger.debug(
+            "image_gen.provider='%s' configured but not registered; falling back",
+            configured,
+        )
+
+    # Fallback: single-provider case
+    if len(snapshot) == 1:
+        return next(iter(snapshot.values()))
+
+    # Fallback: prefer legacy FAL for backward compat
+    if "fal" in snapshot:
+        return snapshot["fal"]
+
+    return None
+
+
+def _reset_for_tests() -> None:
+    """Clear the registry. **Test-only.**"""
+    with _lock:
+        _providers.clear()
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 6506bffe6..e3c07684c 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -4,6 +4,7 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
 and run_agent.py for pre-flight context checks.
 """
 
+import ipaddress
 import logging
 import re
 import time
@@ -25,7 +26,7 @@ logger = logging.getLogger(__name__)
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
     "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
     "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
     "qwen-oauth",
     "xiaomi",
@@ -36,7 +37,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
     "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
     "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
     "ollama",
-    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
+    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
     "mimo", "xiaomi-mimo",
     "arcee-ai", "arceeai",
     "xai", "x-ai", "x.ai", "grok",
@@ -51,6 +52,13 @@ _OLLAMA_TAG_PATTERN = re.compile(
 )
 
 
+# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
+# block, so without an explicit check Ollama reached over Tailscale (e.g.
+# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
+# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
+_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
+
+
 def _strip_provider_prefix(model: str) -> str:
     """Strip a recognised provider prefix from a model string.
 
@@ -125,6 +133,8 @@ DEFAULT_CONTEXT_LENGTHS = {
     # Google
     "gemini": 1048576,
     # Gemma (open models served via AI Studio)
+    "gemma-4": 256000,  # Gemma 4 family
+    "gemma4": 256000,  # Ollama-style naming (e.g. gemma4:31b-cloud)
     "gemma-4-31b": 256000,
     "gemma-3": 131072,
     "gemma": 8192,  # fallback for older gemma models
@@ -177,6 +187,8 @@ DEFAULT_CONTEXT_LENGTHS = {
     "mimo-v2-pro": 1000000,
     "mimo-v2-omni": 256000,
     "mimo-v2-flash": 256000,
+    "mimo-v2.5-pro": 1000000,
+    "mimo-v2.5": 1000000,
     "zai-org/GLM-5": 202752,
 }
 
@@ -191,6 +203,7 @@ _CONTEXT_LENGTH_KEYS = (
     "max_seq_len",
     "n_ctx_train",
     "n_ctx",
+    "ctx_size",
 )
 
 _MAX_COMPLETION_KEYS = (
@@ -234,9 +247,12 @@ _URL_TO_PROVIDER: Dict[str, str] = {
     "chatgpt.com": "openai",
     "api.anthropic.com": "anthropic",
     "api.z.ai": "zai",
+    "open.bigmodel.cn": "zai",
     "api.moonshot.ai": "kimi-coding",
     "api.moonshot.cn": "kimi-coding-cn",
     "api.kimi.com": "kimi-coding",
+    "api.stepfun.ai": "stepfun",
+    "api.stepfun.com": "stepfun",
     "api.arcee.ai": "arcee",
     "api.minimax": "minimax",
     "dashscope.aliyuncs.com": "alibaba",
@@ -281,7 +297,15 @@ def _is_known_provider_base_url(base_url: str) -> bool:
 
 
 def is_local_endpoint(base_url: str) -> bool:
-    """Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
+    """Return True if base_url points to a local machine.
+
+    Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
+    container-internal DNS names (``host.docker.internal`` et al.),
+    RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
+    link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
+    is included so remote-but-trusted Ollama boxes reached over a
+    Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
+    """
     normalized = _normalize_base_url(base_url)
     if not normalized:
         return False
@@ -296,14 +320,17 @@ def is_local_endpoint(base_url: str) -> bool:
     # Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
     if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
         return True
-    # RFC-1918 private ranges and link-local
-    import ipaddress
+    # RFC-1918 private ranges, link-local, and Tailscale CGNAT
     try:
         addr = ipaddress.ip_address(host)
-        return addr.is_private or addr.is_loopback or addr.is_link_local
+        if addr.is_private or addr.is_loopback or addr.is_link_local:
+            return True
+        if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
+            return True
     except ValueError:
         pass
     # Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
+    # or Tailscale CGNAT (100.64.x.x–100.127.x.x).
     parts = host.split(".")
     if len(parts) == 4:
         try:
@@ -314,6 +341,8 @@ def is_local_endpoint(base_url: str) -> bool:
                 return True
             if first == 192 and second == 168:
                 return True
+            if first == 100 and 64 <= second <= 127:
+                return True
         except ValueError:
             pass
     return False
diff --git a/agent/models_dev.py b/agent/models_dev.py
index 3e5c911e7..2f06a75d8 100644
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -146,6 +146,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
     "openai-codex": "openai",
     "zai": "zai",
     "kimi-coding": "kimi-for-coding",
+    "stepfun": "stepfun",
     "kimi-coding-cn": "kimi-for-coding",
     "minimax": "minimax",
     "minimax-cn": "minimax-cn",
diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index 2a2104349..8e061f831 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -350,7 +350,13 @@ PLATFORM_HINTS = {
     ),
     "cli": (
         "You are a CLI AI Agent. Try not to use markdown but simple text "
-        "renderable inside a terminal."
+        "renderable inside a terminal. "
+        "File delivery: there is no attachment channel — the user reads your "
+        "response directly in their terminal. Do NOT emit MEDIA:/path tags "
+        "(those are only intercepted on messaging platforms like Telegram, "
+        "Discord, Slack, etc.; on the CLI they render as literal text). "
+        "When referring to a file you created or changed, just state its "
+        "absolute path in plain text; the user can open it from there."
     ),
     "sms": (
         "You are communicating via SMS. Keep responses concise and use plain text "
diff --git a/agent/skill_commands.py b/agent/skill_commands.py
index 280105dac..a4345ca8c 100644
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -8,6 +8,7 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
 import json
 import logging
 import re
+import subprocess
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional
@@ -22,6 +23,110 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
 
+# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
+# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
+# left as-is so the user can debug them.
+_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
+
+# Matches inline shell snippets like:  !`date +%Y-%m-%d`
+# Non-greedy, single-line only — no newlines inside the backticks.
+_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
+
+# Cap inline-shell output so a runaway command can't blow out the context.
+_INLINE_SHELL_MAX_OUTPUT = 4000
+
+
+def _load_skills_config() -> dict:
+    """Load the ``skills`` section of config.yaml (best-effort)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config() or {}
+        skills_cfg = cfg.get("skills")
+        if isinstance(skills_cfg, dict):
+            return skills_cfg
+    except Exception:
+        logger.debug("Could not read skills config", exc_info=True)
+    return {}
+
+
+def _substitute_template_vars(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None,
+) -> str:
+    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
+
+    Only substitutes tokens for which a concrete value is available —
+    unresolved tokens are left in place so the author can spot them.
+    """
+    if not content:
+        return content
+
+    skill_dir_str = str(skill_dir) if skill_dir else None
+
+    def _replace(match: re.Match) -> str:
+        token = match.group(1)
+        if token == "HERMES_SKILL_DIR" and skill_dir_str:
+            return skill_dir_str
+        if token == "HERMES_SESSION_ID" and session_id:
+            return str(session_id)
+        return match.group(0)
+
+    return _SKILL_TEMPLATE_RE.sub(_replace, content)
+
+
+def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
+    """Execute a single inline-shell snippet and return its stdout (trimmed).
+
+    Failures return a short ``[inline-shell error: ...]`` marker instead of
+    raising, so one bad snippet can't wreck the whole skill message.
+    """
+    try:
+        completed = subprocess.run(
+            ["bash", "-c", command],
+            cwd=str(cwd) if cwd else None,
+            capture_output=True,
+            text=True,
+            timeout=max(1, int(timeout)),
+            check=False,
+        )
+    except subprocess.TimeoutExpired:
+        return f"[inline-shell timeout after {timeout}s: {command}]"
+    except FileNotFoundError:
+        return f"[inline-shell error: bash not found]"
+    except Exception as exc:
+        return f"[inline-shell error: {exc}]"
+
+    output = (completed.stdout or "").rstrip("\n")
+    if not output and completed.stderr:
+        output = completed.stderr.rstrip("\n")
+    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
+        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
+    return output
+
+
+def _expand_inline_shell(
+    content: str,
+    skill_dir: Path | None,
+    timeout: int,
+) -> str:
+    """Replace every !`cmd` snippet in ``content`` with its stdout.
+
+    Runs each snippet with the skill directory as CWD so relative paths in
+    the snippet work the way the author expects.
+    """
+    if "!`" not in content:
+        return content
+
+    def _replace(match: re.Match) -> str:
+        cmd = match.group(1).strip()
+        if not cmd:
+            return ""
+        return _run_inline_shell(cmd, skill_dir, timeout)
+
+    return _INLINE_SHELL_RE.sub(_replace, content)
+
 
 def build_plan_path(
     user_instruction: str = "",
@@ -133,14 +238,36 @@ def _build_skill_message(
     activation_note: str,
     user_instruction: str = "",
     runtime_note: str = "",
+    session_id: str | None = None,
 ) -> str:
     """Format a loaded skill into a user/system message payload."""
     from tools.skills_tool import SKILLS_DIR
 
     content = str(loaded_skill.get("content") or "")
 
+    # ── Template substitution and inline-shell expansion ──
+    # Done before anything else so downstream blocks (setup notes,
+    # supporting-file hints) see the expanded content.
+    skills_cfg = _load_skills_config()
+    if skills_cfg.get("template_vars", True):
+        content = _substitute_template_vars(content, skill_dir, session_id)
+    if skills_cfg.get("inline_shell", False):
+        timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
+        content = _expand_inline_shell(content, skill_dir, timeout)
+
     parts = [activation_note, "", content.strip()]
 
+    # ── Inject the absolute skill directory so the agent can reference
+    #    bundled scripts without an extra skill_view() round-trip. ──
+    if skill_dir:
+        parts.append("")
+        parts.append(f"[Skill directory: {skill_dir}]")
+        parts.append(
+            "Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
+            "`templates/config.yaml`) against that directory, then run them "
+            "with the terminal tool using the absolute path."
+        )
+
     # ── Inject resolved skill config values ──
     _inject_skill_config(loaded_skill, parts)
 
@@ -188,11 +315,13 @@ def _build_skill_message(
             # Skill is from an external dir — use the skill name instead
             skill_view_target = skill_dir.name
         parts.append("")
-        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
+        parts.append("[This skill has supporting files:]")
         for sf in supporting:
-            parts.append(f"- {sf}")
+            parts.append(f"- {sf}  ->  {skill_dir / sf}")
         parts.append(
-            f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
+            f'\nLoad any of these with skill_view(name="{skill_view_target}", '
+            f'file_path="<path>"), or run scripts directly by absolute path '
+            f"(e.g. `node {skill_dir}/scripts/foo.js`)."
         )
 
     if user_instruction:
@@ -332,6 +461,7 @@ def build_skill_invocation_message(
         activation_note,
         user_instruction=user_instruction,
         runtime_note=runtime_note,
+        session_id=task_id,
     )
 
 
@@ -370,6 +500,7 @@ def build_preloaded_skills_prompt(
                 loaded_skill,
                 skill_dir,
                 activation_note,
+                session_id=task_id,
             )
         )
         loaded_names.append(skill_name)
diff --git a/agent/skill_utils.py b/agent/skill_utils.py
index f7979122e..d4d94f7e2 100644
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@@ -435,7 +435,7 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
     Excludes ``.git``, ``.github``, ``.hub`` directories.
     """
     matches = []
-    for root, dirs, files in os.walk(skills_dir):
+    for root, dirs, files in os.walk(skills_dir, followlinks=True):
         dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
         if filename in files:
             matches.append(Path(root) / filename)
diff --git a/agent/transports/__init__.py b/agent/transports/__init__.py
index 6ee1c5117..575211332 100644
--- a/agent/transports/__init__.py
+++ b/agent/transports/__init__.py
@@ -1 +1,51 @@
-"""Transport layer types for provider response normalization."""
+"""Transport layer types and registry for provider response normalization.
+
+Usage:
+    from agent.transports import get_transport
+    transport = get_transport("anthropic_messages")
+    result = transport.normalize_response(raw_response)
+"""
+
+from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason  # noqa: F401
+
+_REGISTRY: dict = {}
+
+
+def register_transport(api_mode: str, transport_cls: type) -> None:
+    """Register a transport class for an api_mode string."""
+    _REGISTRY[api_mode] = transport_cls
+
+
+def get_transport(api_mode: str):
+    """Get a transport instance for the given api_mode.
+
+    Returns None if no transport is registered for this api_mode.
+    This allows gradual migration — call sites can check for None
+    and fall back to the legacy code path.
+    """
+    if not _REGISTRY:
+        _discover_transports()
+    cls = _REGISTRY.get(api_mode)
+    if cls is None:
+        return None
+    return cls()
+
+
+def _discover_transports() -> None:
+    """Import all transport modules to trigger auto-registration."""
+    try:
+        import agent.transports.anthropic  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.codex  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.chat_completions  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.bedrock  # noqa: F401
+    except ImportError:
+        pass
diff --git a/agent/transports/anthropic.py b/agent/transports/anthropic.py
new file mode 100644
index 000000000..6e7943aed
--- /dev/null
+++ b/agent/transports/anthropic.py
@@ -0,0 +1,156 @@
+"""Anthropic Messages API transport.
+
+Delegates to the existing adapter functions in agent/anthropic_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse
+
+
+class AnthropicTransport(ProviderTransport):
+    """Transport for api_mode='anthropic_messages'.
+
+    Wraps the existing functions in anthropic_adapter.py behind the
+    ProviderTransport ABC.  Each method delegates — no logic is duplicated.
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "anthropic_messages"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI messages to Anthropic (system, messages) tuple.
+
+        kwargs:
+            base_url: Optional[str] — affects thinking signature handling.
+        """
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        base_url = kwargs.get("base_url")
+        return convert_messages_to_anthropic(messages, base_url=base_url)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Anthropic input_schema format."""
+        from agent.anthropic_adapter import convert_tools_to_anthropic
+
+        return convert_tools_to_anthropic(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Anthropic messages.create() kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params (all optional):
+            max_tokens: int
+            reasoning_config: dict | None
+            tool_choice: str | None
+            is_oauth: bool
+            preserve_dots: bool
+            context_length: int | None
+            base_url: str | None
+            fast_mode: bool
+        """
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        return build_anthropic_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 16384),
+            reasoning_config=params.get("reasoning_config"),
+            tool_choice=params.get("tool_choice"),
+            is_oauth=params.get("is_oauth", False),
+            preserve_dots=params.get("preserve_dots", False),
+            context_length=params.get("context_length"),
+            base_url=params.get("base_url"),
+            fast_mode=params.get("fast_mode", False),
+        )
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Anthropic response to NormalizedResponse.
+
+        Calls the adapter's v1 normalize and maps the (SimpleNamespace, finish_reason)
+        tuple to the shared NormalizedResponse type.
+        """
+        from agent.anthropic_adapter import normalize_anthropic_response
+        from agent.transports.types import build_tool_call
+
+        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
+        assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
+
+        tool_calls = None
+        if assistant_msg.tool_calls:
+            tool_calls = [
+                build_tool_call(id=tc.id, name=tc.function.name, arguments=tc.function.arguments)
+                for tc in assistant_msg.tool_calls
+            ]
+
+        provider_data = {}
+        if getattr(assistant_msg, "reasoning_details", None):
+            provider_data["reasoning_details"] = assistant_msg.reasoning_details
+
+        return NormalizedResponse(
+            content=assistant_msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=getattr(assistant_msg, "reasoning", None),
+            usage=None,
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Anthropic response structure is valid.
+
+        An empty content list is legitimate when ``stop_reason == "end_turn"``
+        — the model's canonical way of signalling "nothing more to add" after
+        a tool turn that already delivered the user-facing text. Treating it
+        as invalid falsely retries a completed response.
+        """
+        if response is None:
+            return False
+        content_blocks = getattr(response, "content", None)
+        if not isinstance(content_blocks, list):
+            return False
+        if not content_blocks:
+            return getattr(response, "stop_reason", None) == "end_turn"
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Extract Anthropic cache_read and cache_creation token counts."""
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        cached = getattr(usage, "cache_read_input_tokens", 0) or 0
+        written = getattr(usage, "cache_creation_input_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+    # Promote the adapter's canonical mapping to module level so it's shared
+    _STOP_REASON_MAP = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
+    }
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Anthropic stop_reason to OpenAI finish_reason."""
+        return self._STOP_REASON_MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("anthropic_messages", AnthropicTransport)
diff --git a/agent/transports/base.py b/agent/transports/base.py
new file mode 100644
index 000000000..b516967b6
--- /dev/null
+++ b/agent/transports/base.py
@@ -0,0 +1,89 @@
+"""Abstract base for provider transports.
+
+A transport owns the data path for one api_mode:
+  convert_messages → convert_tools → build_kwargs → normalize_response
+
+It does NOT own: client construction, streaming, credential refresh,
+prompt caching, interrupt handling, or retry logic.  Those stay on AIAgent.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+from agent.transports.types import NormalizedResponse
+
+
+class ProviderTransport(ABC):
+    """Base class for provider-specific format conversion and normalization."""
+
+    @property
+    @abstractmethod
+    def api_mode(self) -> str:
+        """The api_mode string this transport handles (e.g. 'anthropic_messages')."""
+        ...
+
+    @abstractmethod
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI-format messages to provider-native format.
+
+        Returns provider-specific structure (e.g. (system, messages) for Anthropic,
+        or the messages list unchanged for chat_completions).
+        """
+        ...
+
+    @abstractmethod
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI-format tool definitions to provider-native format.
+
+        Returns provider-specific tool list (e.g. Anthropic input_schema format).
+        """
+        ...
+
+    @abstractmethod
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build the complete API call kwargs dict.
+
+        This is the primary entry point — it typically calls convert_messages()
+        and convert_tools() internally, then adds model-specific config.
+
+        Returns a dict ready to be passed to the provider's SDK client.
+        """
+        ...
+
+    @abstractmethod
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize a raw provider response to the shared NormalizedResponse type.
+
+        This is the only method that returns a transport-layer type.
+        """
+        ...
+
+    def validate_response(self, response: Any) -> bool:
+        """Optional: check if the raw response is structurally valid.
+
+        Returns True if valid, False if the response should be treated as invalid.
+        Default implementation always returns True.
+        """
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Optional: extract provider-specific cache hit/creation stats.
+
+        Returns dict with 'cached_tokens' and 'creation_tokens', or None.
+        Default returns None.
+        """
+        return None
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Optional: map provider-specific stop reason to OpenAI equivalent.
+
+        Default returns the raw reason unchanged.  Override for providers
+        with different stop reason vocabularies.
+        """
+        return raw_reason
diff --git a/agent/transports/bedrock.py b/agent/transports/bedrock.py
new file mode 100644
index 000000000..af549e7ea
--- /dev/null
+++ b/agent/transports/bedrock.py
@@ -0,0 +1,154 @@
+"""AWS Bedrock Converse API transport.
+
+Delegates to the existing adapter functions in agent/bedrock_adapter.py.
+Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
+owns format conversion and normalization, while client construction and
+boto3 calls stay on AIAgent.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class BedrockTransport(ProviderTransport):
+    """Transport for api_mode='bedrock_converse'."""
+
+    @property
+    def api_mode(self) -> str:
+        return "bedrock_converse"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI messages to Bedrock Converse format."""
+        from agent.bedrock_adapter import convert_messages_to_converse
+        return convert_messages_to_converse(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
+        from agent.bedrock_adapter import convert_tools_to_converse
+        return convert_tools_to_converse(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Bedrock converse() kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            max_tokens: int — output token limit (default 4096)
+            temperature: float | None
+            guardrail_config: dict | None — Bedrock guardrails
+            region: str — AWS region (default 'us-east-1')
+        """
+        from agent.bedrock_adapter import build_converse_kwargs
+
+        region = params.get("region", "us-east-1")
+        guardrail = params.get("guardrail_config")
+
+        kwargs = build_converse_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 4096),
+            temperature=params.get("temperature"),
+            guardrail_config=guardrail,
+        )
+        # Sentinel keys for dispatch — agent pops these before the boto3 call
+        kwargs["__bedrock_converse__"] = True
+        kwargs["__bedrock_region__"] = region
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Bedrock response to NormalizedResponse.
+
+        Handles two shapes:
+        1. Raw boto3 dict (from direct converse() calls)
+        2. Already-normalized SimpleNamespace with .choices (from dispatch site)
+        """
+        from agent.bedrock_adapter import normalize_converse_response
+
+        # Normalize to OpenAI-compatible SimpleNamespace
+        if hasattr(response, "choices") and response.choices:
+            # Already normalized at dispatch site
+            ns = response
+        else:
+            # Raw boto3 dict
+            ns = normalize_converse_response(response)
+
+        choice = ns.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = [
+                ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                )
+                for tc in msg.tool_calls
+            ]
+
+        usage = None
+        if hasattr(ns, "usage") and ns.usage:
+            u = ns.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Bedrock response structure.
+
+        After normalize_converse_response, the response has OpenAI-compatible
+        .choices — same check as chat_completions.
+        """
+        if response is None:
+            return False
+        # Raw Bedrock dict response — check for 'output' key
+        if isinstance(response, dict):
+            return "output" in response
+        # Already-normalized SimpleNamespace
+        if hasattr(response, "choices"):
+            return bool(response.choices)
+        return False
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Bedrock stop reason to OpenAI finish_reason.
+
+        The adapter already does this mapping inside normalize_converse_response,
+        so this is only used for direct access to raw responses.
+        """
+        _MAP = {
+            "end_turn": "stop",
+            "tool_use": "tool_calls",
+            "max_tokens": "length",
+            "stop_sequence": "stop",
+            "guardrail_intervened": "content_filter",
+            "content_filtered": "content_filter",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("bedrock_converse", BedrockTransport)
diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py
new file mode 100644
index 000000000..900f59dcf
--- /dev/null
+++ b/agent/transports/chat_completions.py
@@ -0,0 +1,387 @@
+"""OpenAI Chat Completions transport.
+
+Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
+providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
+
+Messages and tools are already in OpenAI format — convert_messages and
+convert_tools are near-identity.  The complexity lives in build_kwargs
+which has provider-specific conditionals for max_tokens defaults,
+reasoning configuration, temperature handling, and extra_body assembly.
+"""
+
+import copy
+from typing import Any, Dict, List, Optional
+
+from agent.prompt_builder import DEVELOPER_ROLE_MODELS
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ChatCompletionsTransport(ProviderTransport):
+    """Transport for api_mode='chat_completions'.
+
+    The default path for OpenAI-compatible providers.
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "chat_completions"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
+        """Messages are already in OpenAI format — sanitize Codex leaks only.
+
+        Strips Codex Responses API fields (``codex_reasoning_items`` on the
+        message, ``call_id``/``response_item_id`` on tool_calls) that strict
+        chat-completions providers reject with 400/422.
+        """
+        needs_sanitize = False
+        for msg in messages:
+            if not isinstance(msg, dict):
+                continue
+            if "codex_reasoning_items" in msg:
+                needs_sanitize = True
+                break
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
+                        needs_sanitize = True
+                        break
+                if needs_sanitize:
+                    break
+
+        if not needs_sanitize:
+            return messages
+
+        sanitized = copy.deepcopy(messages)
+        for msg in sanitized:
+            if not isinstance(msg, dict):
+                continue
+            msg.pop("codex_reasoning_items", None)
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict):
+                        tc.pop("call_id", None)
+                        tc.pop("response_item_id", None)
+        return sanitized
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Tools are already in OpenAI format — identity."""
+        return tools
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build chat.completions.create() kwargs.
+
+        This is the most complex transport method — it handles ~16 providers
+        via params rather than subclasses.
+
+        params:
+            timeout: float — API call timeout
+            max_tokens: int | None — user-configured max tokens
+            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
+            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
+            reasoning_config: dict | None
+            request_overrides: dict | None
+            session_id: str | None
+            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
+            model_lower: str — lowercase model name for pattern matching
+            # Provider detection flags (all optional, default False)
+            is_openrouter: bool
+            is_nous: bool
+            is_qwen_portal: bool
+            is_github_models: bool
+            is_nvidia_nim: bool
+            is_kimi: bool
+            is_custom_provider: bool
+            ollama_num_ctx: int | None
+            # Provider routing
+            provider_preferences: dict | None
+            # Qwen-specific
+            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
+            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
+            # Temperature
+            fixed_temperature: Any — from _fixed_temperature_for_model()
+            omit_temperature: bool
+            # Reasoning
+            supports_reasoning: bool
+            github_reasoning_extra: dict | None
+            # Claude on OpenRouter/Nous max output
+            anthropic_max_output: int | None
+            # Extra
+            extra_body_additions: dict | None — pre-built extra_body entries
+        """
+        # Codex sanitization: drop reasoning_items / call_id / response_item_id
+        sanitized = self.convert_messages(messages)
+
+        # Qwen portal prep AFTER codex sanitization.  If sanitize already
+        # deepcopied, reuse that copy via the in-place variant to avoid a
+        # second deepcopy.
+        is_qwen = params.get("is_qwen_portal", False)
+        if is_qwen:
+            qwen_prep = params.get("qwen_prepare_fn")
+            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
+            if sanitized is messages:
+                if qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+            else:
+                # Already deepcopied — transform in place
+                if qwen_prep_inplace is not None:
+                    qwen_prep_inplace(sanitized)
+                elif qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+
+        # Developer role swap for GPT-5/Codex models
+        model_lower = params.get("model_lower", (model or "").lower())
+        if (
+            sanitized
+            and isinstance(sanitized[0], dict)
+            and sanitized[0].get("role") == "system"
+            and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
+        ):
+            sanitized = list(sanitized)
+            sanitized[0] = {**sanitized[0], "role": "developer"}
+
+        api_kwargs: Dict[str, Any] = {
+            "model": model,
+            "messages": sanitized,
+        }
+
+        timeout = params.get("timeout")
+        if timeout is not None:
+            api_kwargs["timeout"] = timeout
+
+        # Temperature
+        fixed_temp = params.get("fixed_temperature")
+        omit_temp = params.get("omit_temperature", False)
+        if omit_temp:
+            api_kwargs.pop("temperature", None)
+        elif fixed_temp is not None:
+            api_kwargs["temperature"] = fixed_temp
+
+        # Qwen metadata (caller precomputes {sessionId, promptId})
+        qwen_meta = params.get("qwen_session_metadata")
+        if qwen_meta and is_qwen:
+            api_kwargs["metadata"] = qwen_meta
+
+        # Tools
+        if tools:
+            api_kwargs["tools"] = tools
+
+        # max_tokens resolution — priority: ephemeral > user > provider default
+        max_tokens_fn = params.get("max_tokens_param_fn")
+        ephemeral = params.get("ephemeral_max_output_tokens")
+        max_tokens = params.get("max_tokens")
+        anthropic_max_out = params.get("anthropic_max_output")
+        is_nvidia_nim = params.get("is_nvidia_nim", False)
+        is_kimi = params.get("is_kimi", False)
+        reasoning_config = params.get("reasoning_config")
+
+        if ephemeral is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(ephemeral))
+        elif max_tokens is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(max_tokens))
+        elif is_nvidia_nim and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(16384))
+        elif is_qwen and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(65536))
+        elif is_kimi and max_tokens_fn:
+            # Kimi/Moonshot: 32000 matches Kimi CLI's default
+            api_kwargs.update(max_tokens_fn(32000))
+        elif anthropic_max_out is not None:
+            api_kwargs["max_tokens"] = anthropic_max_out
+
+        # Kimi: top-level reasoning_effort (unless thinking disabled)
+        if is_kimi:
+            _kimi_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _kimi_thinking_off:
+                _kimi_effort = "medium"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _kimi_effort = _e
+                api_kwargs["reasoning_effort"] = _kimi_effort
+
+        # extra_body assembly
+        extra_body: Dict[str, Any] = {}
+
+        is_openrouter = params.get("is_openrouter", False)
+        is_nous = params.get("is_nous", False)
+        is_github_models = params.get("is_github_models", False)
+
+        provider_prefs = params.get("provider_preferences")
+        if provider_prefs and is_openrouter:
+            extra_body["provider"] = provider_prefs
+
+        # Kimi extra_body.thinking
+        if is_kimi:
+            _kimi_thinking_enabled = True
+            if reasoning_config and isinstance(reasoning_config, dict):
+                if reasoning_config.get("enabled") is False:
+                    _kimi_thinking_enabled = False
+            extra_body["thinking"] = {
+                "type": "enabled" if _kimi_thinking_enabled else "disabled",
+            }
+
+        # Reasoning
+        if params.get("supports_reasoning", False):
+            if is_github_models:
+                gh_reasoning = params.get("github_reasoning_extra")
+                if gh_reasoning is not None:
+                    extra_body["reasoning"] = gh_reasoning
+            else:
+                if reasoning_config is not None:
+                    rc = dict(reasoning_config)
+                    if is_nous and rc.get("enabled") is False:
+                        pass  # omit for Nous when disabled
+                    else:
+                        extra_body["reasoning"] = rc
+                else:
+                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+
+        if is_nous:
+            extra_body["tags"] = ["product=hermes-agent"]
+
+        # Ollama num_ctx
+        ollama_ctx = params.get("ollama_num_ctx")
+        if ollama_ctx:
+            options = extra_body.get("options", {})
+            options["num_ctx"] = ollama_ctx
+            extra_body["options"] = options
+
+        # Ollama/custom think=false
+        if params.get("is_custom_provider", False):
+            if reasoning_config and isinstance(reasoning_config, dict):
+                _effort = (reasoning_config.get("effort") or "").strip().lower()
+                _enabled = reasoning_config.get("enabled", True)
+                if _effort == "none" or _enabled is False:
+                    extra_body["think"] = False
+
+        if is_qwen:
+            extra_body["vl_high_resolution_images"] = True
+
+        # Merge any pre-built extra_body additions
+        additions = params.get("extra_body_additions")
+        if additions:
+            extra_body.update(additions)
+
+        if extra_body:
+            api_kwargs["extra_body"] = extra_body
+
+        # Request overrides last (service_tier etc.)
+        overrides = params.get("request_overrides")
+        if overrides:
+            api_kwargs.update(overrides)
+
+        return api_kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize OpenAI ChatCompletion to NormalizedResponse.
+
+        For chat_completions, this is near-identity — the response is already
+        in OpenAI format.  extra_content on tool_calls (Gemini thought_signature)
+        is preserved via ToolCall.provider_data.  reasoning_details (OpenRouter
+        unified format) and reasoning_content (DeepSeek/Moonshot) are also
+        preserved for downstream replay.
+        """
+        choice = response.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                # Preserve provider-specific extras on the tool call.
+                # Gemini 3 thinking models attach extra_content with
+                # thought_signature — without replay on the next turn the API
+                # rejects the request with 400.
+                tc_provider_data: Dict[str, Any] = {}
+                extra = getattr(tc, "extra_content", None)
+                if extra is None and hasattr(tc, "model_extra"):
+                    extra = (tc.model_extra or {}).get("extra_content")
+                if extra is not None:
+                    if hasattr(extra, "model_dump"):
+                        try:
+                            extra = extra.model_dump()
+                        except Exception:
+                            pass
+                    tc_provider_data["extra_content"] = extra
+                tool_calls.append(ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                    provider_data=tc_provider_data or None,
+                ))
+
+        usage = None
+        if hasattr(response, "usage") and response.usage:
+            u = response.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        # Preserve reasoning fields separately.  DeepSeek/Moonshot use
+        # ``reasoning_content``; others use ``reasoning``.  Downstream code
+        # (_extract_reasoning, thinking-prefill retry) reads both distinctly,
+        # so keep them apart in provider_data rather than merging.
+        reasoning = getattr(msg, "reasoning", None)
+        reasoning_content = getattr(msg, "reasoning_content", None)
+
+        provider_data: Dict[str, Any] = {}
+        if reasoning_content:
+            provider_data["reasoning_content"] = reasoning_content
+        rd = getattr(msg, "reasoning_details", None)
+        if rd:
+            provider_data["reasoning_details"] = rd
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check that response has valid choices."""
+        if response is None:
+            return False
+        if not hasattr(response, "choices") or response.choices is None:
+            return False
+        if not response.choices:
+            return False
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        details = getattr(usage, "prompt_tokens_details", None)
+        if details is None:
+            return None
+        cached = getattr(details, "cached_tokens", 0) or 0
+        written = getattr(details, "cache_write_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("chat_completions", ChatCompletionsTransport)
diff --git a/agent/transports/codex.py b/agent/transports/codex.py
new file mode 100644
index 000000000..ec4835219
--- /dev/null
+++ b/agent/transports/codex.py
@@ -0,0 +1,217 @@
+"""OpenAI Responses API (Codex) transport.
+
+Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle,
+streaming, or the _run_codex_stream() call path.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ResponsesApiTransport(ProviderTransport):
+    """Transport for api_mode='codex_responses'.
+
+    Wraps the functions extracted into codex_responses_adapter.py (PR 1).
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "codex_responses"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI chat messages to Responses API input items."""
+        from agent.codex_responses_adapter import _chat_messages_to_responses_input
+        return _chat_messages_to_responses_input(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Responses API function definitions."""
+        from agent.codex_responses_adapter import _responses_tools
+        return _responses_tools(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Responses API kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            instructions: str — system prompt (extracted from messages[0] if not given)
+            reasoning_config: dict | None — {effort, enabled}
+            session_id: str | None — used for prompt_cache_key + xAI conv header
+            max_tokens: int | None — max_output_tokens
+            request_overrides: dict | None — extra kwargs merged in
+            provider: str | None — provider name for backend-specific logic
+            base_url: str | None — endpoint URL
+            base_url_hostname: str | None — hostname for backend detection
+            is_github_responses: bool — Copilot/GitHub models backend
+            is_codex_backend: bool — chatgpt.com/backend-api/codex
+            is_xai_responses: bool — xAI/Grok backend
+            github_reasoning_extra: dict | None — Copilot reasoning params
+        """
+        from agent.codex_responses_adapter import (
+            _chat_messages_to_responses_input,
+            _responses_tools,
+        )
+
+        from run_agent import DEFAULT_AGENT_IDENTITY
+
+        instructions = params.get("instructions", "")
+        payload_messages = messages
+        if not instructions:
+            if messages and messages[0].get("role") == "system":
+                instructions = str(messages[0].get("content") or "").strip()
+                payload_messages = messages[1:]
+        if not instructions:
+            instructions = DEFAULT_AGENT_IDENTITY
+
+        is_github_responses = params.get("is_github_responses", False)
+        is_codex_backend = params.get("is_codex_backend", False)
+        is_xai_responses = params.get("is_xai_responses", False)
+
+        # Resolve reasoning effort
+        reasoning_effort = "medium"
+        reasoning_enabled = True
+        reasoning_config = params.get("reasoning_config")
+        if reasoning_config and isinstance(reasoning_config, dict):
+            if reasoning_config.get("enabled") is False:
+                reasoning_enabled = False
+            elif reasoning_config.get("effort"):
+                reasoning_effort = reasoning_config["effort"]
+
+        _effort_clamp = {"minimal": "low"}
+        reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
+
+        kwargs = {
+            "model": model,
+            "instructions": instructions,
+            "input": _chat_messages_to_responses_input(payload_messages),
+            "tools": _responses_tools(tools),
+            "tool_choice": "auto",
+            "parallel_tool_calls": True,
+            "store": False,
+        }
+
+        session_id = params.get("session_id")
+        if not is_github_responses and session_id:
+            kwargs["prompt_cache_key"] = session_id
+
+        if reasoning_enabled and is_xai_responses:
+            kwargs["include"] = ["reasoning.encrypted_content"]
+        elif reasoning_enabled:
+            if is_github_responses:
+                github_reasoning = params.get("github_reasoning_extra")
+                if github_reasoning is not None:
+                    kwargs["reasoning"] = github_reasoning
+            else:
+                kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
+                kwargs["include"] = ["reasoning.encrypted_content"]
+        elif not is_github_responses and not is_xai_responses:
+            kwargs["include"] = []
+
+        request_overrides = params.get("request_overrides")
+        if request_overrides:
+            kwargs.update(request_overrides)
+
+        max_tokens = params.get("max_tokens")
+        if max_tokens is not None and not is_codex_backend:
+            kwargs["max_output_tokens"] = max_tokens
+
+        if is_xai_responses and session_id:
+            kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
+
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Codex Responses API response to NormalizedResponse."""
+        from agent.codex_responses_adapter import (
+            _normalize_codex_response,
+            _extract_responses_message_text,
+            _extract_responses_reasoning_text,
+        )
+
+        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
+        msg, finish_reason = _normalize_codex_response(response)
+
+        tool_calls = None
+        if msg and msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                provider_data = {}
+                if hasattr(tc, "call_id") and tc.call_id:
+                    provider_data["call_id"] = tc.call_id
+                if hasattr(tc, "response_item_id") and tc.response_item_id:
+                    provider_data["response_item_id"] = tc.response_item_id
+                tool_calls.append(ToolCall(
+                    id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
+                    name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
+                    arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
+                    provider_data=provider_data or None,
+                ))
+
+        # Extract reasoning items for provider_data
+        provider_data = {}
+        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
+            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
+        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
+            provider_data["reasoning_details"] = msg.reasoning_details
+
+        return NormalizedResponse(
+            content=msg.content if msg else None,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason or "stop",
+            reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
+            usage=None,  # Codex usage is extracted separately in normalize_usage()
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Codex Responses API response has valid output structure.
+
+        Returns True only if response.output is a non-empty list.
+        Does NOT check output_text fallback — the caller handles that
+        with diagnostic logging for stream backfill recovery.
+        """
+        if response is None:
+            return False
+        output = getattr(response, "output", None)
+        if not isinstance(output, list) or not output:
+            return False
+        return True
+
+    def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
+        """Validate and sanitize Codex API kwargs before the call.
+
+        Normalizes input items, strips unsupported fields, validates structure.
+        """
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Codex response.status to OpenAI finish_reason.
+
+        Codex uses response.status ('completed', 'incomplete') +
+        response.incomplete_details.reason for granular mapping.
+        This method handles the simple status string; the caller
+        should check incomplete_details separately for 'max_output_tokens'.
+        """
+        _MAP = {
+            "completed": "stop",
+            "incomplete": "length",
+            "failed": "stop",
+            "cancelled": "stop",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("codex_responses", ResponsesApiTransport)
diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py
index 3554c5b99..1dfe59ea3 100644
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@@ -533,10 +533,22 @@ def normalize_usage(
         prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
         output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
         details = getattr(response_usage, "prompt_tokens_details", None)
+        # Primary: OpenAI-style prompt_tokens_details. Fallback: Anthropic-style
+        # top-level fields that some OpenAI-compatible proxies (OpenRouter, Vercel
+        # AI Gateway, Cline) expose when routing Claude models — without this
+        # fallback, cache writes are undercounted as 0 and cache reads can be
+        # missed when the proxy only surfaces them at the top level.
+        # Port of cline/cline#10266.
         cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
+        if not cache_read_tokens:
+            cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
         cache_write_tokens = _to_int(
             getattr(details, "cache_write_tokens", 0) if details else 0
         )
+        if not cache_write_tokens:
+            cache_write_tokens = _to_int(
+                getattr(response_usage, "cache_creation_input_tokens", 0)
+            )
         input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)
 
     reasoning_tokens = 0
diff --git a/batch_runner.py b/batch_runner.py
index c8f275a14..7413ad59f 100644
--- a/batch_runner.py
+++ b/batch_runner.py
@@ -1190,12 +1190,12 @@ def main(
     """
     # Handle list distributions
     if list_distributions:
-        from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
-        
+        from toolset_distributions import print_distribution_info
+
         print("📊 Available Toolset Distributions")
         print("=" * 70)
-        
-        all_dists = get_all_dists()
+
+        all_dists = list_distributions()
         for dist_name in sorted(all_dists.keys()):
             print_distribution_info(dist_name)
         
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index a4a5ffda7..64927c2b6 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -770,10 +770,13 @@ code_execution:
 # Subagent Delegation
 # =============================================================================
 # The delegate_task tool spawns child agents with isolated context.
-# Supports single tasks and batch mode (up to 3 parallel).
+# Supports single tasks and batch mode (default 3 parallel, configurable).
 delegation:
   max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  default_toolsets: ["terminal", "file", "web"]  # Default toolsets for subagents
+  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
+  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
+  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
+  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
   # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
   # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
   #                                           # Resolves full credentials (base_url, api_key) automatically.
diff --git a/cli.py b/cli.py
index 4b315f9b6..159d77079 100644
--- a/cli.py
+++ b/cli.py
@@ -19,12 +19,14 @@ import shutil
 import sys
 import json
 import re
+import concurrent.futures
 import base64
 import atexit
 import tempfile
 import time
 import uuid
 import textwrap
+from urllib.parse import unquote, urlparse
 from contextlib import contextmanager
 from pathlib import Path
 from datetime import datetime
@@ -65,6 +67,7 @@ from agent.usage_pricing import (
     format_duration_compact,
     format_token_count_compact,
 )
+from agent.account_usage import fetch_account_usage, render_account_usage_lines
 from hermes_cli.banner import _format_context_length, format_banner_version_label
 
 _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
@@ -105,6 +108,11 @@ def _strip_reasoning_tags(text: str) -> str:
     ``<thought>`` (Gemma 4).  Must stay in sync with
     ``run_agent.py::_strip_think_blocks`` and the stream consumer's
     ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples.
+
+    Also strips tool-call XML blocks some open models leak into visible
+    content (``<tool_call>``, ``<function_calls>``, Gemma-style
+    ``<function name="…">…</function>``). Ported from
+    openclaw/openclaw#67318.
     """
     cleaned = text
     for tag in _REASONING_TAGS:
@@ -129,6 +137,31 @@ def _strip_reasoning_tags(text: str) -> str:
             cleaned,
             flags=re.IGNORECASE,
         )
+    # Tool-call XML blocks (openclaw/openclaw#67318).
+    for tc_tag in ("tool_call", "tool_calls", "tool_result",
+                   "function_call", "function_calls"):
+        cleaned = re.sub(
+            rf"<{tc_tag}\b[^>]*>.*?</{tc_tag}>\s*",
+            "",
+            cleaned,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+    # <function name="..."> — boundary + attribute gated to avoid prose FPs.
+    cleaned = re.sub(
+        r'(?:(?<=^)|(?<=[\n\r.!?:]))[ \t]*'
+        r'<function\b[^>]*\bname\s*=[^>]*>'
+        r'(?:(?:(?!</function>).)*)</function>\s*',
+        '',
+        cleaned,
+        flags=re.DOTALL | re.IGNORECASE,
+    )
+    # Stray tool-call close tags.
+    cleaned = re.sub(
+        r'</(?:tool_call|tool_calls|tool_result|function_call|function_calls|function)>\s*',
+        '',
+        cleaned,
+        flags=re.IGNORECASE,
+    )
     return cleaned.strip()
 
 
@@ -368,7 +401,6 @@ def load_cli_config() -> Dict[str, Any]:
         },
         "delegation": {
             "max_iterations": 45,  # Max tool-calling turns per child agent
-            "default_toolsets": ["terminal", "file", "web"],  # Default toolsets for subagents
             "model": "",       # Subagent model override (empty = inherit parent model)
             "provider": "",    # Subagent provider override (empty = inherit parent provider)
             "base_url": "",    # Direct OpenAI-compatible endpoint for subagents
@@ -529,7 +561,6 @@ def load_cli_config() -> Dict[str, Any]:
             if _file_has_terminal_config or env_var not in os.environ:
                 val = terminal_config[config_key]
                 if isinstance(val, list):
-                    import json
                     os.environ[env_var] = json.dumps(val)
                 else:
                     os.environ[env_var] = str(val)
@@ -913,6 +944,32 @@ def _cleanup_worktree(info: Dict[str, str] = None) -> None:
     print(f"\033[32m✓ Worktree cleaned up: {wt_path}\033[0m")
 
 
+def _run_state_db_auto_maintenance(session_db) -> None:
+    """Call ``SessionDB.maybe_auto_prune_and_vacuum`` using current config.
+
+    Reads the ``sessions:`` section from config.yaml via
+    :func:`hermes_cli.config.load_config` (the authoritative loader that
+    deep-merges DEFAULT_CONFIG, so unmigrated configs still get default
+    values). Honours ``auto_prune`` / ``retention_days`` /
+    ``vacuum_after_prune`` / ``min_interval_hours``, and delegates to the
+    DB. Never raises — maintenance must never block interactive startup.
+    """
+    if session_db is None:
+        return
+    try:
+        from hermes_cli.config import load_config as _load_full_config
+        cfg = (_load_full_config().get("sessions") or {})
+        if not cfg.get("auto_prune", False):
+            return
+        session_db.maybe_auto_prune_and_vacuum(
+            retention_days=int(cfg.get("retention_days", 90)),
+            min_interval_hours=int(cfg.get("min_interval_hours", 24)),
+            vacuum=bool(cfg.get("vacuum_after_prune", True)),
+        )
+    except Exception as exc:
+        logger.debug("state.db auto-maintenance skipped: %s", exc)
+
+
 def _prune_stale_worktrees(repo_root: str, max_age_hours: int = 24) -> None:
     """Remove stale worktrees and orphaned branches on startup.
 
@@ -1144,8 +1201,6 @@ def _rich_text_from_ansi(text: str) -> _RichText:
 
 def _strip_markdown_syntax(text: str) -> str:
     """Best-effort markdown marker removal for plain-text display."""
-    import re
-
     plain = _rich_text_from_ansi(text or "").plain
     plain = re.sub(r"^\s{0,3}(?:[-*_]\s*){3,}$", "", plain, flags=re.MULTILINE)
     plain = re.sub(r"^\s{0,3}#{1,6}\s+", "", plain, flags=re.MULTILINE)
@@ -1155,11 +1210,11 @@ def _strip_markdown_syntax(text: str) -> str:
     plain = re.sub(r"!\[([^\]]*)\]\([^\)]*\)", r"\1", plain)
     plain = re.sub(r"\[([^\]]+)\]\([^\)]*\)", r"\1", plain)
     plain = re.sub(r"\*\*\*([^*]+)\*\*\*", r"\1", plain)
-    plain = re.sub(r"___([^_]+)___", r"\1", plain)
+    plain = re.sub(r"(?<!\w)___([^_]+)___(?!\w)", r"\1", plain)
     plain = re.sub(r"\*\*([^*]+)\*\*", r"\1", plain)
-    plain = re.sub(r"__([^_]+)__", r"\1", plain)
+    plain = re.sub(r"(?<!\w)__([^_]+)__(?!\w)", r"\1", plain)
     plain = re.sub(r"\*([^*]+)\*", r"\1", plain)
-    plain = re.sub(r"_([^_]+)_", r"\1", plain)
+    plain = re.sub(r"(?<!\w)_([^_]+)_(?!\w)", r"\1", plain)
     plain = re.sub(r"~~([^~]+)~~", r"\1", plain)
     plain = re.sub(r"\n{3,}", "\n\n", plain)
     return plain.strip("\n")
@@ -1272,10 +1327,21 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
 
     if (token.startswith('"') and token.endswith('"')) or (token.startswith("'") and token.endswith("'")):
         token = token[1:-1].strip()
+    token = token.replace('\\ ', ' ')
     if not token:
         return None
 
-    expanded = os.path.expandvars(os.path.expanduser(token))
+    expanded = token
+    if token.startswith("file://"):
+        try:
+            parsed = urlparse(token)
+            if parsed.scheme == "file":
+                expanded = unquote(parsed.path or "")
+                if parsed.netloc and os.name == "nt":
+                    expanded = f"//{parsed.netloc}{expanded}"
+        except Exception:
+            expanded = token
+    expanded = os.path.expandvars(os.path.expanduser(expanded))
     if os.name != "nt":
         normalized = expanded.replace("\\", "/")
         if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha():
@@ -1362,6 +1428,7 @@ def _detect_file_drop(user_input: str) -> "dict | None":
         or stripped.startswith("~")
         or stripped.startswith("./")
         or stripped.startswith("../")
+        or stripped.startswith("file://")
         or (len(stripped) >= 3 and stripped[1] == ":" and stripped[2] in ("\\", "/") and stripped[0].isalpha())
         or stripped.startswith('"/')
         or stripped.startswith('"~')
@@ -1372,8 +1439,25 @@ def _detect_file_drop(user_input: str) -> "dict | None":
     if not starts_like_path:
         return None
 
+    direct_path = _resolve_attachment_path(stripped)
+    if direct_path is not None:
+        return {
+            "path": direct_path,
+            "is_image": direct_path.suffix.lower() in _IMAGE_EXTENSIONS,
+            "remainder": "",
+        }
+
     first_token, remainder = _split_path_input(stripped)
     drop_path = _resolve_attachment_path(first_token)
+    if drop_path is None and " " in stripped and stripped[0] not in {"'", '"'}:
+        space_positions = [idx for idx, ch in enumerate(stripped) if ch == " "]
+        for pos in reversed(space_positions):
+            candidate = stripped[:pos].rstrip()
+            resolved = _resolve_attachment_path(candidate)
+            if resolved is not None:
+                drop_path = resolved
+                remainder = stripped[pos + 1 :].strip()
+                break
     if drop_path is None:
         return None
 
@@ -1933,7 +2017,13 @@ class HermesCLI:
             self._session_db = SessionDB()
         except Exception as e:
             logger.warning("Failed to initialize SessionDB — session will NOT be indexed for search: %s", e)
-        
+
+        # Opportunistic state.db maintenance — runs at most once per
+        # min_interval_hours, tracked via state_meta in state.db itself so
+        # it's shared across all Hermes processes for this HERMES_HOME.
+        # Never blocks startup on failure.
+        _run_state_db_auto_maintenance(self._session_db)
+
         # Deferred title: stored in memory until the session is created in the DB
         self._pending_title: Optional[str] = None
         
@@ -2002,8 +2092,7 @@ class HermesCLI:
 
     def _invalidate(self, min_interval: float = 0.25) -> None:
         """Throttled UI repaint — prevents terminal blinking on slow/SSH connections."""
-        import time as _time
-        now = _time.monotonic()
+        now = time.monotonic()
         if hasattr(self, "_app") and self._app and (now - self._last_invalidate) >= min_interval:
             self._last_invalidate = now
             self._app.invalidate()
@@ -2221,8 +2310,7 @@ class HermesCLI:
             return ""
         t0 = getattr(self, "_tool_start_time", 0) or 0
         if t0 > 0:
-            import time as _time
-            elapsed = _time.monotonic() - t0
+            elapsed = time.monotonic() - t0
             if elapsed >= 60:
                 _m, _s = int(elapsed // 60), int(elapsed % 60)
                 elapsed_str = f"{_m}m {_s}s"
@@ -2477,9 +2565,6 @@ class HermesCLI:
 
     def _emit_reasoning_preview(self, reasoning_text: str) -> None:
         """Render a buffered reasoning preview as a single [thinking] block."""
-        import re
-        import textwrap
-
         preview_text = reasoning_text.strip()
         if not preview_text:
             return
@@ -2598,9 +2683,7 @@ class HermesCLI:
         """Expand [Pasted text #N -> file] placeholders into file contents."""
         if not isinstance(text, str) or "[Pasted text #" not in text:
             return text or ""
-        import re as _re
-
-        paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
+        paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
 
         def _expand_ref(match):
             path = Path(match.group(1))
@@ -2923,9 +3006,7 @@ class HermesCLI:
 
     def _command_spinner_frame(self) -> str:
         """Return the current spinner frame for slow slash commands."""
-        import time as _time
-
-        frame_idx = int(_time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
+        frame_idx = int(time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
         return _COMMAND_SPINNER_FRAMES[frame_idx]
 
     @contextmanager
@@ -3936,7 +4017,6 @@ class HermesCLI:
         image later with ``vision_analyze`` if needed.
         """
         import asyncio as _asyncio
-        import json as _json
         from tools.vision_tools import vision_analyze_tool
 
         analysis_prompt = (
@@ -3956,7 +4036,7 @@ class HermesCLI:
                 result_json = _asyncio.run(
                     vision_analyze_tool(image_url=str(img_path), user_prompt=analysis_prompt)
                 )
-                result = _json.loads(result_json)
+                result = json.loads(result_json)
                 if result.get("success"):
                     description = result.get("analysis", "")
                     enriched_parts.append(
@@ -6282,8 +6362,7 @@ class HermesCLI:
                 # with the output (fixes #2718).
                 if self._app:
                     self._app.invalidate()
-                    import time as _tmod
-                    _tmod.sleep(0.05)  # brief pause for refresh
+                    time.sleep(0.05)  # brief pause for refresh
                 print()
                 ChatConsole().print(f"[{_accent_hex()}]{'─' * 40}[/]")
                 _cprint(f"  ✅ Background task #{task_num} complete")
@@ -6323,8 +6402,7 @@ class HermesCLI:
                 # Same TUI refresh pattern as success path (#2718)
                 if self._app:
                     self._app.invalidate()
-                    import time as _tmod
-                    _tmod.sleep(0.05)
+                    time.sleep(0.05)
                 print()
                 _cprint(f"  ❌ Background task #{task_num} failed: {e}")
             finally:
@@ -6544,7 +6622,6 @@ class HermesCLI:
                 _launched = self._try_launch_chrome_debug(_port, _plat.system())
                 if _launched:
                     # Wait for the port to come up
-                    import time as _time
                     for _wait in range(10):
                         try:
                             s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -6554,7 +6631,7 @@ class HermesCLI:
                             _already_open = True
                             break
                         except (OSError, socket.timeout):
-                            _time.sleep(0.5)
+                            time.sleep(0.5)
                     if _already_open:
                         print(f"   ✓ Chrome launched and listening on port {_port}")
                     else:
@@ -7034,6 +7111,27 @@ class HermesCLI:
         if cost_result.status == "unknown":
             print(f"  Note:             Pricing unknown for {agent.model}")
 
+        # Account limits -- fetched off-thread with a hard timeout so slow
+        # provider APIs don't hang the prompt.
+        provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
+        base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
+        api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
+        account_snapshot = None
+        if provider:
+            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
+                try:
+                    account_snapshot = _pool.submit(
+                        fetch_account_usage, provider,
+                        base_url=base_url, api_key=api_key,
+                    ).result(timeout=10.0)
+                except (concurrent.futures.TimeoutError, Exception):
+                    account_snapshot = None
+        account_lines = [f"  {line}" for line in render_account_usage_lines(account_snapshot)]
+        if account_lines:
+            print()
+            for line in account_lines:
+                print(line)
+
         if self.verbose:
             logging.getLogger().setLevel(logging.DEBUG)
             for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
@@ -7084,7 +7182,6 @@ class HermesCLI:
         known state.  When a change is detected, triggers _reload_mcp() and
         informs the user so they know the tool list has been refreshed.
         """
-        import time
         import yaml as _yaml
 
         CONFIG_WATCH_INTERVAL = 5.0  # seconds between config.yaml stat() calls
@@ -7176,7 +7273,6 @@ class HermesCLI:
 
             # Refresh the agent's tool list so the model can call new tools
             if self.agent is not None:
-                from model_tools import get_tool_definitions
                 self.agent.tools = get_tool_definitions(
                     enabled_toolsets=self.agent.enabled_toolsets
                     if hasattr(self.agent, "enabled_toolsets") else None,
@@ -7259,7 +7355,6 @@ class HermesCLI:
         full history of tool calls (not just the current one in the spinner).
         """
         if event_type == "tool.completed":
-            import time as _time
             self._tool_start_time = 0.0
             # Print stacked scrollback line for "all" / "new" modes
             if function_name and self.tool_progress_mode in ("all", "new"):
@@ -7288,7 +7383,6 @@ class HermesCLI:
         if event_type != "tool.started":
             return
         if function_name and not function_name.startswith("_"):
-            import time as _time
             from agent.display import get_tool_emoji
             emoji = get_tool_emoji(function_name)
             label = preview or function_name
@@ -7297,7 +7391,7 @@ class HermesCLI:
             if _pl > 0 and len(label) > _pl:
                 label = label[:_pl - 3] + "..."
             self._spinner_text = f"{emoji} {label}"
-            self._tool_start_time = _time.monotonic()
+            self._tool_start_time = time.monotonic()
             # Store args for stacked scrollback line on completion
             self._pending_tool_info.setdefault(function_name, []).append(
                 function_args if function_args is not None else {}
@@ -7414,11 +7508,12 @@ class HermesCLI:
             self._voice_stop_and_transcribe()
 
         # Audio cue: single beep BEFORE starting stream (avoid CoreAudio conflict)
-        try:
-            from tools.voice_mode import play_beep
-            play_beep(frequency=880, count=1)
-        except Exception:
-            pass
+        if self._voice_beeps_enabled():
+            try:
+                from tools.voice_mode import play_beep
+                play_beep(frequency=880, count=1)
+            except Exception:
+                pass
 
         try:
             self._voice_recorder.start(on_silence_stop=_on_silence)
@@ -7466,11 +7561,12 @@ class HermesCLI:
             wav_path = self._voice_recorder.stop()
 
             # Audio cue: double beep after stream stopped (no CoreAudio conflict)
-            try:
-                from tools.voice_mode import play_beep
-                play_beep(frequency=660, count=2)
-            except Exception:
-                pass
+            if self._voice_beeps_enabled():
+                try:
+                    from tools.voice_mode import play_beep
+                    play_beep(frequency=660, count=2)
+                except Exception:
+                    pass
 
             if wav_path is None:
                 _cprint(f"{_DIM}No speech detected.{_RST}")
@@ -7553,7 +7649,6 @@ class HermesCLI:
         try:
             from tools.tts_tool import text_to_speech_tool
             from tools.voice_mode import play_audio_file
-            import re
 
             # Strip markdown and non-speech content for cleaner TTS
             tts_text = text[:4000] if len(text) > 4000 else text
@@ -7621,6 +7716,17 @@ class HermesCLI:
             _cprint(f"Unknown voice subcommand: {subcommand}")
             _cprint("Usage: /voice [on|off|tts|status]")
 
+    def _voice_beeps_enabled(self) -> bool:
+        """Return whether CLI voice mode should play record start/stop beeps."""
+        try:
+            from hermes_cli.config import load_config
+            voice_cfg = load_config().get("voice", {})
+            if isinstance(voice_cfg, dict):
+                return bool(voice_cfg.get("beep_enabled", True))
+        except Exception:
+            pass
+        return True
+
     def _enable_voice_mode(self):
         """Enable voice mode after checking requirements."""
         if self._voice_mode:
@@ -7930,7 +8036,9 @@ class HermesCLI:
             return
 
         selected = state.get("selected", 0)
-        choices = state.get("choices") or []
+        choices = state.get("choices")
+        if not isinstance(choices, list):
+            choices = []
         if not (0 <= selected < len(choices)):
             return
 
@@ -8022,8 +8130,18 @@ class HermesCLI:
         choice_wrapped: list[tuple[int, str]] = []
         for i, choice in enumerate(choices):
             label = choice_labels.get(choice, choice)
-            prefix = '❯ ' if i == selected else '  '
-            for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent="  "):
+            # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
+            if i < 9:
+                num_prefix = str(i + 1)
+            elif i == 9:
+                num_prefix = '0'
+            else:
+                num_prefix = ' '  # No number for items beyond 10th
+            if i == selected:
+                prefix = f'❯ {num_prefix}. '
+            else:
+                prefix = f'  {num_prefix}. '
+            for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent="    "):
                 choice_wrapped.append((i, wrapped))
 
         # Budget vertical space so HSplit never clips the command or choices.
@@ -8314,6 +8432,17 @@ class HermesCLI:
 
             def run_agent():
                 nonlocal result
+                # Set callbacks inside the agent thread so thread-local storage
+                # in terminal_tool is populated for this thread.  The main thread
+                # registration (run() line ~9046) is invisible here because
+                # _callback_tls is threading.local().  Matches the pattern used
+                # by acp_adapter/server.py for ACP sessions.
+                set_sudo_password_callback(self._sudo_password_callback)
+                set_approval_callback(self._approval_callback)
+                try:
+                    set_secret_capture_callback(self._secret_capture_callback)
+                except Exception:
+                    pass
                 agent_message = _voice_prefix + message if _voice_prefix else message
                 # Prepend pending model switch note so the model knows about the switch
                 _msn = getattr(self, '_pending_model_switch_note', None)
@@ -8339,6 +8468,15 @@ class HermesCLI:
                         "failed": True,
                         "error": _summary,
                     }
+                finally:
+                    # Clear thread-local callbacks so a reused thread doesn't
+                    # hold stale references to a disposed CLI instance.
+                    try:
+                        set_sudo_password_callback(None)
+                        set_approval_callback(None)
+                        set_secret_capture_callback(None)
+                    except Exception:
+                        pass
 
             # Start agent in background thread (daemon so it cannot keep the
             # process alive when the user closes the terminal tab — SIGHUP
@@ -8376,8 +8514,7 @@ class HermesCLI:
                             try:
                                 _dbg = _hermes_home / "interrupt_debug.log"
                                 with open(_dbg, "a") as _f:
-                                    import time as _t
-                                    _f.write(f"{_t.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, "
+                                    _f.write(f"{time.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, "
                                              f"children={len(self.agent._active_children)}, "
                                              f"parent._interrupt={self.agent._interrupt_requested}\n")
                                     for _ci, _ch in enumerate(self.agent._active_children):
@@ -8453,9 +8590,8 @@ class HermesCLI:
             # buffer so tool/status lines render ABOVE our response box.
             # The flush pushes data into the renderer queue; the short
             # sleep lets the renderer actually paint it before we draw.
-            import time as _time
             sys.stdout.flush()
-            _time.sleep(0.15)
+            time.sleep(0.15)
 
             # Update history with full conversation
             self.conversation_history = result.get("messages", self.conversation_history) if result else self.conversation_history
@@ -9121,8 +9257,7 @@ class HermesCLI:
                         try:
                             _dbg = _hermes_home / "interrupt_debug.log"
                             with open(_dbg, "a") as _f:
-                                import time as _t
-                                _f.write(f"{_t.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, "
+                                _f.write(f"{time.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, "
                                          f"agent_running={self._agent_running}\n")
                         except Exception:
                             pass
@@ -9201,6 +9336,29 @@ class HermesCLI:
                 self._clarify_state["selected"] = min(max_idx, self._clarify_state["selected"] + 1)
                 event.app.invalidate()
 
+        # Number keys for quick clarify selection (1-9, 0 for 10th item)
+        def _make_clarify_number_handler(idx):
+            def handler(event):
+                if self._clarify_state and not self._clarify_freetext:
+                    choices = self._clarify_state.get("choices") or []
+                    # Map index to choice (treating "Other" as the last option)
+                    if idx < len(choices):
+                        # Select a numbered choice
+                        self._clarify_state["response_queue"].put(choices[idx])
+                        self._clarify_state = None
+                        self._clarify_freetext = False
+                        event.app.invalidate()
+                    elif idx == len(choices):
+                        # Select "Other" option
+                        self._clarify_freetext = True
+                        event.app.invalidate()
+            return handler
+
+        for _num in range(10):
+            # 1-9 select items 0-8, 0 selects item 9 (10thitem)
+            _idx = 9 if _num == 0 else _num - 1
+            kb.add(str(_num), filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))(_make_clarify_number_handler(_idx))
+
         # --- Dangerous command approval: arrow-key navigation ---
 
         @kb.add('up', filter=Condition(lambda: bool(self._approval_state)))
@@ -9242,6 +9400,20 @@ class HermesCLI:
             event.app.current_buffer.reset()
             event.app.invalidate()
 
+        # Number keys for quick approval selection (1-9, 0 for 10th item)
+        def _make_approval_number_handler(idx):
+            def handler(event):
+                if self._approval_state and idx < len(self._approval_state["choices"]):
+                    self._approval_state["selected"] = idx
+                    self._handle_approval_selection()
+                    event.app.invalidate()
+            return handler
+
+        for _num in range(10):
+            # 1-9 select items 0-8, 0 selects item 9 (10th item)
+            _idx = 9 if _num == 0 else _num - 1
+            kb.add(str(_num), filter=Condition(lambda: bool(self._approval_state)))(_make_approval_number_handler(_idx))
+
         # --- History navigation: up/down browse history in normal input mode ---
         # The TextArea is multiline, so by default up/down only move the cursor.
         # Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
@@ -9270,8 +9442,7 @@ class HermesCLI:
             2. Interrupt the running agent (first press)
             3. Force exit (second press within 2s, or when idle)
             """
-            import time as _time
-            now = _time.time()
+            now = time.time()
 
             # Cancel active voice recording.
             # Run cancel() in a background thread to prevent blocking the
@@ -9379,12 +9550,11 @@ class HermesCLI:
         @kb.add('c-z')
         def handle_ctrl_z(event):
             """Handle Ctrl+Z - suspend process to background (Unix only)."""
-            import sys
             if sys.platform == 'win32':
                 _cprint(f"\n{_DIM}Suspend (Ctrl+Z) is not supported on Windows.{_RST}")
                 event.app.invalidate()
                 return
-            import os, signal as _sig
+            import signal as _sig
             from prompt_toolkit.application import run_in_terminal
             from hermes_cli.skin_engine import get_active_skin
             agent_name = get_active_skin().get_branding("agent_name", "Hermes Agent")
@@ -9698,31 +9868,29 @@ class HermesCLI:
         # extra instructions (sudo countdown, approval navigation, clarify).
         # The agent-running interrupt hint is now an inline placeholder above.
         def get_hint_text():
-            import time as _time
-
             if cli_ref._sudo_state:
-                remaining = max(0, int(cli_ref._sudo_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._sudo_deadline - time.monotonic()))
                 return [
                     ('class:hint', '  password hidden · Enter to skip'),
                     ('class:clarify-countdown', f'  ({remaining}s)'),
                 ]
 
             if cli_ref._secret_state:
-                remaining = max(0, int(cli_ref._secret_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._secret_deadline - time.monotonic()))
                 return [
                     ('class:hint', '  secret hidden · Enter to skip'),
                     ('class:clarify-countdown', f'  ({remaining}s)'),
                 ]
 
             if cli_ref._approval_state:
-                remaining = max(0, int(cli_ref._approval_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._approval_deadline - time.monotonic()))
                 return [
                     ('class:hint', '  ↑/↓ to select, Enter to confirm'),
                     ('class:clarify-countdown', f'  ({remaining}s)'),
                 ]
 
             if cli_ref._clarify_state:
-                remaining = max(0, int(cli_ref._clarify_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._clarify_deadline - time.monotonic()))
                 countdown = f'  ({remaining}s)' if cli_ref._clarify_deadline else ''
                 if cli_ref._clarify_freetext:
                     return [
@@ -9814,14 +9982,32 @@ class HermesCLI:
             selected = state.get("selected", 0)
             preview_lines = _wrap_panel_text(question, 60)
             for i, choice in enumerate(choices):
-                prefix = "❯ " if i == selected and not cli_ref._clarify_freetext else "  "
-                preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent="  "))
+                # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
+                if i < 9:
+                    num_prefix = str(i + 1)
+                elif i == 9:
+                    num_prefix = '0'
+                else:
+                    num_prefix = ' '
+                if i == selected and not cli_ref._clarify_freetext:
+                    prefix = f"❯ {num_prefix}. "
+                else:
+                    prefix = f"  {num_prefix}. "
+                preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent="    "))
+            # "Other" option in preview
+            other_num = len(choices) + 1
+            if other_num < 10:
+                other_num_prefix = str(other_num)
+            elif other_num == 10:
+                other_num_prefix = '0'
+            else:
+                other_num_prefix = ' '
             other_label = (
-                "❯ Other (type below)" if cli_ref._clarify_freetext
-                else "❯ Other (type your answer)" if selected == len(choices)
-                else "  Other (type your answer)"
+                f"❯ {other_num_prefix}. Other (type below)" if cli_ref._clarify_freetext
+                else f"❯ {other_num_prefix}. Other (type your answer)" if selected == len(choices)
+                else f"  {other_num_prefix}. Other (type your answer)"
             )
-            preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent="  "))
+            preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent="    "))
             box_width = _panel_box_width("Hermes needs your input", preview_lines)
             inner_text_width = max(8, box_width - 2)
 
@@ -9829,18 +10015,35 @@ class HermesCLI:
             choice_wrapped: list[tuple[int, str]] = []
             if choices:
                 for i, choice in enumerate(choices):
-                    prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else '  '
-                    for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent="  "):
+                    # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
+                    if i < 9:
+                        num_prefix = str(i + 1)
+                    elif i == 9:
+                        num_prefix = '0'
+                    else:
+                        num_prefix = ' '
+                    if i == selected and not cli_ref._clarify_freetext:
+                        prefix = f'❯ {num_prefix}. '
+                    else:
+                        prefix = f'  {num_prefix}. '
+                    for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent="    "):
                         choice_wrapped.append((i, wrapped))
                 # Trailing Other row(s)
                 other_idx = len(choices)
-                if selected == other_idx and not cli_ref._clarify_freetext:
-                    other_label_mand = '❯ Other (type your answer)'
-                elif cli_ref._clarify_freetext:
-                    other_label_mand = '❯ Other (type below)'
+                other_num = other_idx + 1
+                if other_num < 10:
+                    other_num_prefix = str(other_num)
+                elif other_num == 10:
+                    other_num_prefix = '0'
                 else:
-                    other_label_mand = '  Other (type your answer)'
-                other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent="  ")
+                    other_num_prefix = ' '
+                if selected == other_idx and not cli_ref._clarify_freetext:
+                    other_label_mand = f'❯ {other_num_prefix}. Other (type your answer)'
+                elif cli_ref._clarify_freetext:
+                    other_label_mand = f'❯ {other_num_prefix}. Other (type below)'
+                else:
+                    other_label_mand = f'  {other_num_prefix}. Other (type your answer)'
+                other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent="    ")
             elif cli_ref._clarify_freetext:
                 # Freetext-only mode: the guidance line takes the place of choices.
                 other_wrapped = _wrap_panel_text(
@@ -9905,6 +10108,15 @@ class HermesCLI:
 
                 # "Other" option (trailing row(s), only shown when choices exist)
                 other_idx = len(choices)
+                # Calculate number prefix for "Other" option
+                other_num = other_idx + 1
+                if other_num < 10:
+                    other_num_prefix = str(other_num)
+                elif other_num == 10:
+                    other_num_prefix = '0'
+                else:
+                    other_num_prefix = ' '
+                
                 if selected == other_idx and not cli_ref._clarify_freetext:
                     other_style = 'class:clarify-selected'
                 elif cli_ref._clarify_freetext:
@@ -10012,7 +10224,8 @@ class HermesCLI:
             if stage == "provider":
                 title = "⚙ Model Picker — Select Provider"
                 choices = []
-                for p in state.get("providers") or []:
+                _providers = state.get("providers")
+                for p in _providers if isinstance(_providers, list) else []:
                     count = p.get("total_models", len(p.get("models", [])))
                     label = f"{p['name']} ({count} model{'s' if count != 1 else ''})"
                     if p.get("is_current"):
@@ -10269,22 +10482,20 @@ class HermesCLI:
         app._on_resize = _resize_clear_ghosts
 
         def spinner_loop():
-            import time as _time
-
             last_idle_refresh = 0.0
             while not self._should_exit:
                 if not self._app:
-                    _time.sleep(0.1)
+                    time.sleep(0.1)
                     continue
                 if self._command_running:
                     self._invalidate(min_interval=0.1)
-                    _time.sleep(0.1)
+                    time.sleep(0.1)
                 else:
-                    now = _time.monotonic()
+                    now = time.monotonic()
                     if now - last_idle_refresh >= 1.0:
                         last_idle_refresh = now
                         self._invalidate(min_interval=1.0)
-                    _time.sleep(0.2)
+                    time.sleep(0.2)
 
         spinner_thread = threading.Thread(target=spinner_loop, daemon=True)
         spinner_thread.start()
@@ -10353,8 +10564,7 @@ class HermesCLI:
                         continue
                     
                     # Expand paste references back to full content
-                    import re as _re
-                    _paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
+                    _paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
                     paste_refs = list(_paste_ref_re.finditer(user_input)) if isinstance(user_input, str) else []
                     if paste_refs:
                         user_input = self._expand_paste_references(user_input)
@@ -10446,13 +10656,12 @@ class HermesCLI:
             try:
                 if getattr(self, "agent", None) and getattr(self, "_agent_running", False):
                     self.agent.interrupt(f"received signal {signum}")
-                    import time as _t
                     try:
                         _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
                     except (TypeError, ValueError):
                         _grace = 1.5
                     if _grace > 0:
-                        _t.sleep(_grace)
+                        time.sleep(_grace)
             except Exception:
                 pass  # never block signal handling
             raise KeyboardInterrupt()
@@ -10485,8 +10694,7 @@ class HermesCLI:
         # uv-managed Python, fd 0 can be invalid or unregisterable with the
         # asyncio selector, causing "KeyError: '0 is not registered'" (#6393).
         try:
-            import os as _os
-            _os.fstat(0)
+            os.fstat(0)
         except OSError:
             print(
                 "Error: stdin (fd 0) is not available.\n"
@@ -10779,13 +10987,12 @@ def main(
             _agent = getattr(cli, "agent", None)
             if _agent is not None:
                 _agent.interrupt(f"received signal {signum}")
-                import time as _t
                 try:
                     _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
                 except (TypeError, ValueError):
                     _grace = 1.5
                 if _grace > 0:
-                    _t.sleep(_grace)
+                    time.sleep(_grace)
         except Exception:
             pass  # never block signal handling
         raise KeyboardInterrupt()
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 4b131859b..61d5537d9 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -252,7 +252,11 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
                 coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
 
             future = asyncio.run_coroutine_threadsafe(coro, loop)
-            result = future.result(timeout=30)
+            try:
+                result = future.result(timeout=30)
+            except TimeoutError:
+                future.cancel()
+                raise
             if result and not getattr(result, "success", True):
                 logger.warning(
                     "Job '%s': media send failed for %s: %s",
@@ -382,7 +386,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                         runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
                         loop,
                     )
-                    send_result = future.result(timeout=60)
+                    try:
+                        send_result = future.result(timeout=60)
+                    except TimeoutError:
+                        future.cancel()
+                        raise
                     if send_result and not getattr(send_result, "success", True):
                         err = getattr(send_result, "error", "unknown")
                         logger.warning(
@@ -422,7 +430,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                 # prevent "coroutine was never awaited" RuntimeWarning, then retry in a
                 # fresh thread that has no running loop.
                 coro.close()
-                import concurrent.futures
                 with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
                     future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
                     result = future.result(timeout=30)
@@ -810,14 +817,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
         prefill_messages = None
         prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
         if prefill_file:
-            import json as _json
             pfpath = Path(prefill_file).expanduser()
             if not pfpath.is_absolute():
                 pfpath = _hermes_home / pfpath
             if pfpath.exists():
                 try:
                     with open(pfpath, "r", encoding="utf-8") as _pf:
-                        prefill_messages = _json.load(_pf)
+                        prefill_messages = json.load(_pf)
                     if not isinstance(prefill_messages, list):
                         prefill_messages = None
                 except Exception as e:
@@ -1085,7 +1091,6 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
             logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
         if _max_workers is None:
             try:
-                from hermes_cli.config import load_config
                 _ucfg = load_config() or {}
                 _cfg_par = (
                     _ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index c46497dcc..18f8fff4e 100755
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -68,4 +68,19 @@ if [ -d "$INSTALL_DIR/skills" ]; then
     python3 "$INSTALL_DIR/tools/skills_sync.py"
 fi
 
+# Final exec: two supported invocation patterns.
+#
+#   docker run <image>                 -> exec `hermes` with no args (legacy default)
+#   docker run <image> chat -q "..."   -> exec `hermes chat -q "..."` (legacy wrap)
+#   docker run <image> sleep infinity  -> exec `sleep infinity` directly
+#   docker run <image> bash            -> exec `bash` directly
+#
+# If the first positional arg resolves to an executable on PATH, we assume the
+# caller wants to run it directly (needed by the launcher which runs long-lived
+# `sleep infinity` sandbox containers — see tools/environments/docker.py).
+# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
+# preserving the documented `docker run <image> <subcommand>` behavior.
+if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
+    exec "$@"
+fi
 exec hermes "$@"
diff --git a/environments/tool_context.py b/environments/tool_context.py
index 10f537d72..550c5e851 100644
--- a/environments/tool_context.py
+++ b/environments/tool_context.py
@@ -53,7 +53,6 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
     try:
         loop = asyncio.get_running_loop()
         # We're in an async context -- need to run in thread
-        import concurrent.futures
         with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
             future = pool.submit(
                 handle_function_call, tool_name, arguments, task_id
diff --git a/gateway/config.py b/gateway/config.py
index 7e95a87a8..67ebf7346 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -616,6 +616,8 @@ def load_gateway_config() -> GatewayConfig:
                     if isinstance(frc, list):
                         frc = ",".join(str(v) for v in frc)
                     os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
+                if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"):
+                    os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower()
 
             # Discord settings → env vars (env vars take precedence)
             discord_cfg = yaml_cfg.get("discord", {})
@@ -670,8 +672,7 @@ def load_gateway_config() -> GatewayConfig:
                 if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
                     os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
                 if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
-                    import json as _json
-                    os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
+                    os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
                 frc = telegram_cfg.get("free_response_chats")
                 if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
                     if isinstance(frc, list):
@@ -1259,7 +1260,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
             if legacy_home:
                 qq_home = legacy_home
                 qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
-                import logging
                 logging.getLogger(__name__).warning(
                     "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
                     "in your .env for consistency with the platform key."
diff --git a/gateway/hooks.py b/gateway/hooks.py
index c50394b20..374e5b25f 100644
--- a/gateway/hooks.py
+++ b/gateway/hooks.py
@@ -135,9 +135,22 @@ class HookRegistry:
             except Exception as e:
                 print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)
 
+    def _resolve_handlers(self, event_type: str) -> List[Callable]:
+        """Return all handlers that should fire for ``event_type``.
+
+        Exact matches fire first, followed by wildcard matches (e.g.
+        ``command:*`` matches ``command:reset``).
+        """
+        handlers = list(self._handlers.get(event_type, []))
+        if ":" in event_type:
+            base = event_type.split(":")[0]
+            wildcard_key = f"{base}:*"
+            handlers.extend(self._handlers.get(wildcard_key, []))
+        return handlers
+
     async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
         """
-        Fire all handlers registered for an event.
+        Fire all handlers registered for an event, discarding return values.
 
         Supports wildcard matching: handlers registered for "command:*" will
         fire for any "command:..." event. Handlers registered for a base type
@@ -151,16 +164,7 @@ class HookRegistry:
         if context is None:
             context = {}
 
-        # Collect handlers: exact match + wildcard match
-        handlers = list(self._handlers.get(event_type, []))
-
-        # Check for wildcard patterns (e.g., "command:*" matches "command:reset")
-        if ":" in event_type:
-            base = event_type.split(":")[0]
-            wildcard_key = f"{base}:*"
-            handlers.extend(self._handlers.get(wildcard_key, []))
-
-        for fn in handlers:
+        for fn in self._resolve_handlers(event_type):
             try:
                 result = fn(event_type, context)
                 # Support both sync and async handlers
@@ -168,3 +172,32 @@ class HookRegistry:
                     await result
             except Exception as e:
                 print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+
+    async def emit_collect(
+        self,
+        event_type: str,
+        context: Optional[Dict[str, Any]] = None,
+    ) -> List[Any]:
+        """Fire handlers and return their non-None return values in order.
+
+        Like :meth:`emit` but captures each handler's return value. Used for
+        decision-style hooks (e.g. ``command:<name>`` policies that want to
+        allow/deny/rewrite the command before normal dispatch).
+
+        Exceptions from individual handlers are logged but do not abort the
+        remaining handlers.
+        """
+        if context is None:
+            context = {}
+
+        results: List[Any] = []
+        for fn in self._resolve_handlers(event_type):
+            try:
+                result = fn(event_type, context)
+                if asyncio.iscoroutine(result):
+                    result = await result
+                if result is not None:
+                    results.append(result)
+            except Exception as e:
+                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+        return results
diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index 8bbf16e17..a6b52ff32 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -323,7 +323,6 @@ class ResponseStore:
         ).fetchone()
         if row is None:
             return None
-        import time
         self._conn.execute(
             "UPDATE responses SET accessed_at = ? WHERE response_id = ?",
             (time.time(), response_id),
@@ -333,7 +332,6 @@ class ResponseStore:
 
     def put(self, response_id: str, data: Dict[str, Any]) -> None:
         """Store a response, evicting the oldest if at capacity."""
-        import time
         self._conn.execute(
             "INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)",
             (response_id, json.dumps(data, default=str), time.time()),
@@ -474,8 +472,7 @@ class _IdempotencyCache:
         self._max = max_items
 
     def _purge(self):
-        import time as _t
-        now = _t.time()
+        now = time.time()
         expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
         for k in expired:
             self._store.pop(k, None)
@@ -537,6 +534,30 @@ def _derive_chat_session_id(
     return f"api-{digest}"
 
 
+_CRON_AVAILABLE = False
+try:
+    from cron.jobs import (
+        list_jobs as _cron_list,
+        get_job as _cron_get,
+        create_job as _cron_create,
+        update_job as _cron_update,
+        remove_job as _cron_remove,
+        pause_job as _cron_pause,
+        resume_job as _cron_resume,
+        trigger_job as _cron_trigger,
+    )
+    _CRON_AVAILABLE = True
+except ImportError:
+    _cron_list = None
+    _cron_get = None
+    _cron_create = None
+    _cron_update = None
+    _cron_remove = None
+    _cron_pause = None
+    _cron_resume = None
+    _cron_trigger = None
+
+
 class APIServerAdapter(BasePlatformAdapter):
     """
     OpenAI-compatible HTTP API server adapter.
@@ -1866,44 +1887,16 @@ class APIServerAdapter(BasePlatformAdapter):
     # Cron jobs API
     # ------------------------------------------------------------------
 
-    # Check cron module availability once (not per-request)
-    _CRON_AVAILABLE = False
-    try:
-        from cron.jobs import (
-            list_jobs as _cron_list,
-            get_job as _cron_get,
-            create_job as _cron_create,
-            update_job as _cron_update,
-            remove_job as _cron_remove,
-            pause_job as _cron_pause,
-            resume_job as _cron_resume,
-            trigger_job as _cron_trigger,
-        )
-        # Wrap as staticmethod to prevent descriptor binding — these are plain
-        # module functions, not instance methods.  Without this, self._cron_*()
-        # injects ``self`` as the first positional argument and every call
-        # raises TypeError.
-        _cron_list = staticmethod(_cron_list)
-        _cron_get = staticmethod(_cron_get)
-        _cron_create = staticmethod(_cron_create)
-        _cron_update = staticmethod(_cron_update)
-        _cron_remove = staticmethod(_cron_remove)
-        _cron_pause = staticmethod(_cron_pause)
-        _cron_resume = staticmethod(_cron_resume)
-        _cron_trigger = staticmethod(_cron_trigger)
-        _CRON_AVAILABLE = True
-    except ImportError:
-        pass
-
     _JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}")
     # Allowed fields for update — prevents clients injecting arbitrary keys
     _UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"}
     _MAX_NAME_LENGTH = 200
     _MAX_PROMPT_LENGTH = 5000
 
-    def _check_jobs_available(self) -> Optional["web.Response"]:
+    @staticmethod
+    def _check_jobs_available() -> Optional["web.Response"]:
         """Return error response if cron module isn't available."""
-        if not self._CRON_AVAILABLE:
+        if not _CRON_AVAILABLE:
             return web.json_response(
                 {"error": "Cron module not available"}, status=501,
             )
@@ -1928,7 +1921,7 @@ class APIServerAdapter(BasePlatformAdapter):
             return cron_err
         try:
             include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
-            jobs = self._cron_list(include_disabled=include_disabled)
+            jobs = _cron_list(include_disabled=include_disabled)
             return web.json_response({"jobs": jobs})
         except Exception as e:
             return web.json_response({"error": str(e)}, status=500)
@@ -1976,7 +1969,7 @@ class APIServerAdapter(BasePlatformAdapter):
             if repeat is not None:
                 kwargs["repeat"] = repeat
 
-            job = self._cron_create(**kwargs)
+            job = _cron_create(**kwargs)
             return web.json_response({"job": job})
         except Exception as e:
             return web.json_response({"error": str(e)}, status=500)
@@ -1993,7 +1986,7 @@ class APIServerAdapter(BasePlatformAdapter):
         if id_err:
             return id_err
         try:
-            job = self._cron_get(job_id)
+            job = _cron_get(job_id)
             if not job:
                 return web.json_response({"error": "Job not found"}, status=404)
             return web.json_response({"job": job})
@@ -2026,7 +2019,7 @@ class APIServerAdapter(BasePlatformAdapter):
                 return web.json_response(
                     {"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400,
                 )
-            job = self._cron_update(job_id, sanitized)
+            job = _cron_update(job_id, sanitized)
             if not job:
                 return web.json_response({"error": "Job not found"}, status=404)
             return web.json_response({"job": job})
@@ -2045,7 +2038,7 @@ class APIServerAdapter(BasePlatformAdapter):
         if id_err:
             return id_err
         try:
-            success = self._cron_remove(job_id)
+            success = _cron_remove(job_id)
             if not success:
                 return web.json_response({"error": "Job not found"}, status=404)
             return web.json_response({"ok": True})
@@ -2064,7 +2057,7 @@ class APIServerAdapter(BasePlatformAdapter):
         if id_err:
             return id_err
         try:
-            job = self._cron_pause(job_id)
+            job = _cron_pause(job_id)
             if not job:
                 return web.json_response({"error": "Job not found"}, status=404)
             return web.json_response({"job": job})
@@ -2083,7 +2076,7 @@ class APIServerAdapter(BasePlatformAdapter):
         if id_err:
             return id_err
         try:
-            job = self._cron_resume(job_id)
+            job = _cron_resume(job_id)
             if not job:
                 return web.json_response({"error": "Job not found"}, status=404)
             return web.json_response({"job": job})
@@ -2102,7 +2095,7 @@ class APIServerAdapter(BasePlatformAdapter):
         if id_err:
             return id_err
         try:
-            job = self._cron_trigger(job_id)
+            job = _cron_trigger(job_id)
             if not job:
                 return web.json_response({"error": "Job not found"}, status=404)
             return web.json_response({"job": job})
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index bda137cf3..56bb3c5cb 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -19,6 +19,8 @@ import uuid
 from abc import ABC, abstractmethod
 from urllib.parse import urlsplit
 
+from utils import normalize_proxy_url
+
 logger = logging.getLogger(__name__)
 
 
@@ -159,13 +161,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
     if platform_env_var:
         value = (os.environ.get(platform_env_var) or "").strip()
         if value:
-            return value
+            return normalize_proxy_url(value)
     for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                 "https_proxy", "http_proxy", "all_proxy"):
         value = (os.environ.get(key) or "").strip()
         if value:
-            return value
-    return _detect_macos_system_proxy()
+            return normalize_proxy_url(value)
+    return normalize_proxy_url(_detect_macos_system_proxy())
 
 
 def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -391,12 +393,9 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
     if not is_safe_url(url):
         raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
 
-    import asyncio
     import httpx
-    import logging as _logging
-    _log = _logging.getLogger(__name__)
+    _log = logging.getLogger(__name__)
 
-    last_exc = None
     async with httpx.AsyncClient(
         timeout=30.0,
         follow_redirects=True,
@@ -414,7 +413,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                 response.raise_for_status()
                 return cache_image_from_bytes(response.content, ext)
             except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                last_exc = exc
                 if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                     raise
                 if attempt < retries:
@@ -430,7 +428,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                     await asyncio.sleep(wait)
                     continue
                 raise
-    raise last_exc
 
 
 def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -510,12 +507,9 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
     if not is_safe_url(url):
         raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
 
-    import asyncio
     import httpx
-    import logging as _logging
-    _log = _logging.getLogger(__name__)
+    _log = logging.getLogger(__name__)
 
-    last_exc = None
     async with httpx.AsyncClient(
         timeout=30.0,
         follow_redirects=True,
@@ -533,7 +527,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                 response.raise_for_status()
                 return cache_audio_from_bytes(response.content, ext)
             except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                last_exc = exc
                 if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                     raise
                 if attempt < retries:
@@ -549,7 +542,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                     await asyncio.sleep(wait)
                     continue
                 raise
-    raise last_exc
 
 
 # ---------------------------------------------------------------------------
@@ -1351,7 +1343,7 @@ class BasePlatformAdapter(ABC):
         # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
         # and quoted/backticked paths for LLM-formatted outputs.
         media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
         )
         for match in media_pattern.finditer(content):
             path = match.group("path").strip()
@@ -1787,8 +1779,6 @@ class BasePlatformAdapter(ABC):
           HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
           HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
         """
-        import random
-
         mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
         if mode == "off":
             return 0.0
diff --git a/gateway/platforms/bluebubbles.py b/gateway/platforms/bluebubbles.py
index a8a292969..39d4e537e 100644
--- a/gateway/platforms/bluebubbles.py
+++ b/gateway/platforms/bluebubbles.py
@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
 def check_bluebubbles_requirements() -> bool:
     try:
         import aiohttp  # noqa: F401
-        import httpx as _httpx  # noqa: F401
+        import httpx  # noqa: F401
     except ImportError:
         return False
     return True
diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 2b45b2b58..9857b8ffd 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -541,7 +541,6 @@ class DiscordAdapter(BasePlatformAdapter):
             # ctypes.util.find_library fails on macOS with Homebrew-installed libs,
             # so fall back to known Homebrew paths if needed.
             if not opus_path:
-                import sys
                 _homebrew_paths = (
                     "/opt/homebrew/lib/libopus.dylib",  # Apple Silicon
                     "/usr/local/lib/libopus.dylib",     # Intel Mac
@@ -1422,8 +1421,7 @@ class DiscordAdapter(BasePlatformAdapter):
         speaking_user_ids: set = set()
         receiver = self._voice_receivers.get(guild_id)
         if receiver:
-            import time as _time
-            now = _time.monotonic()
+            now = time.monotonic()
             with receiver._lock:
                 for ssrc, last_t in receiver._last_packet_time.items():
                     # Consider "speaking" if audio received within last 2 seconds
@@ -2131,10 +2129,42 @@ class DiscordAdapter(BasePlatformAdapter):
         # This ensures new commands added to COMMAND_REGISTRY in
         # hermes_cli/commands.py automatically appear as Discord slash
         # commands without needing a manual entry here.
+        def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
+            """Build a discord.app_commands.Command that proxies to _run_simple_slash."""
+            discord_name = _name.lower()[:32]
+            desc = (_description or f"Run /{_name}")[:100]
+            has_args = bool(_args_hint)
+
+            if has_args:
+                def _make_args_handler(__name: str, __hint: str):
+                    @discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
+                    async def _handler(interaction: discord.Interaction, args: str = ""):
+                        await self._run_simple_slash(
+                            interaction, f"/{__name} {args}".strip()
+                        )
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_args_handler(_name, _args_hint)
+            else:
+                def _make_simple_handler(__name: str):
+                    async def _handler(interaction: discord.Interaction):
+                        await self._run_simple_slash(interaction, f"/{__name}")
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_simple_handler(_name)
+
+            return discord.app_commands.Command(
+                name=discord_name,
+                description=desc,
+                callback=handler,
+            )
+
+        already_registered: set[str] = set()
         try:
             from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates
 
-            already_registered = set()
             try:
                 already_registered = {cmd.name for cmd in tree.get_commands()}
             except Exception:
@@ -2149,38 +2179,10 @@ class DiscordAdapter(BasePlatformAdapter):
                 discord_name = cmd_def.name.lower()[:32]
                 if discord_name in already_registered:
                     continue
-                # Skip aliases that overlap with already-registered names
-                # (aliases for explicitly registered commands are handled above).
-                desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
-                has_args = bool(cmd_def.args_hint)
-
-                if has_args:
-                    # Command takes optional arguments — create handler with
-                    # an optional ``args`` string parameter.
-                    def _make_args_handler(_name: str, _hint: str):
-                        @discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
-                        async def _handler(interaction: discord.Interaction, args: str = ""):
-                            await self._run_simple_slash(
-                                interaction, f"/{_name} {args}".strip()
-                            )
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
-                else:
-                    # Parameterless command.
-                    def _make_simple_handler(_name: str):
-                        async def _handler(interaction: discord.Interaction):
-                            await self._run_simple_slash(interaction, f"/{_name}")
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_simple_handler(cmd_def.name)
-
-                auto_cmd = discord.app_commands.Command(
-                    name=discord_name,
-                    description=desc,
-                    callback=handler,
+                auto_cmd = _build_auto_slash_command(
+                    cmd_def.name,
+                    cmd_def.description,
+                    cmd_def.args_hint,
                 )
                 try:
                     tree.add_command(auto_cmd)
@@ -2197,6 +2199,35 @@ class DiscordAdapter(BasePlatformAdapter):
         except Exception as e:
             logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)
 
+        # ── Plugin-registered slash commands ──
+        # Plugins register via PluginContext.register_command(); we mirror
+        # those into Discord's native slash picker so users get the same
+        # autocomplete UX as for built-in commands. No per-platform plugin
+        # API needed — plugin commands are platform-agnostic.
+        try:
+            from hermes_cli.commands import _iter_plugin_command_entries
+
+            for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
+                discord_name = plugin_name.lower()[:32]
+                if discord_name in already_registered:
+                    continue
+                auto_cmd = _build_auto_slash_command(
+                    plugin_name,
+                    plugin_desc,
+                    plugin_args_hint,
+                )
+                try:
+                    tree.add_command(auto_cmd)
+                    already_registered.add(discord_name)
+                except Exception:
+                    # Silently skip commands that fail registration (e.g.
+                    # name conflict with a subcommand group).
+                    pass
+        except Exception as e:
+            logger.warning(
+                "Discord auto-register from plugin commands failed: %s", e
+            )
+
         # Register skills under a single /skill command group with category
         # subcommand groups.  This uses 1 top-level slot instead of N,
         # supporting up to 25 categories × 25 skills = 625 skills.
diff --git a/gateway/platforms/email.py b/gateway/platforms/email.py
index d4261ccfb..2a38d699e 100644
--- a/gateway/platforms/email.py
+++ b/gateway/platforms/email.py
@@ -545,6 +545,7 @@ class EmailAdapter(BasePlatformAdapter):
         caption: Optional[str] = None,
         file_name: Optional[str] = None,
         reply_to: Optional[str] = None,
+        **kwargs,
     ) -> SendResult:
         """Send a file as an email attachment."""
         try:
diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 85cebe538..7ab478df0 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -14,6 +14,35 @@ Supports:
 - Interactive card button-click events routed as synthetic COMMAND events
 - Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker)
 - Verification token validation as second auth layer (matches openclaw)
+
+Feishu identity model
+---------------------
+Feishu uses three user-ID tiers (official docs:
+https://open.feishu.cn/document/home/user-identity-introduction/introduction):
+
+  open_id  (ou_xxx)  — **App-scoped**.  The same person gets a different
+                        open_id under each Feishu app.  Always available in
+                        event payloads without extra permissions.
+  user_id  (u_xxx)   — **Tenant-scoped**.  Stable within a company but
+                        requires the ``contact:user.employee_id:readonly``
+                        scope.  May not be present.
+  union_id (on_xxx)  — **Developer-scoped**.  Same across all apps owned by
+                        one developer/ISV.  Best cross-app stable ID.
+
+For bots specifically:
+
+  app_id              — The application's canonical credential identifier.
+  bot open_id         — Returned by ``/bot/v3/info``.  This is the bot's own
+                        open_id *within its app context* and is what Feishu
+                        puts in ``mentions[].id.open_id`` when someone
+                        @-mentions the bot.  Used for mention gating only.
+
+In single-bot mode (what Hermes currently supports), open_id works as a
+de-facto unique user identifier since there is only one app context.
+
+Session-key participant isolation prefers ``union_id`` (via user_id_alt)
+over ``open_id`` (via user_id) so that sessions stay stable if the same
+user is seen through different apps in the future.
 """
 
 from __future__ import annotations
@@ -35,7 +64,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Sequence
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -73,7 +102,9 @@ try:
         UpdateMessageRequest,
         UpdateMessageRequestBody,
     )
+    from lark_oapi.core import AccessTokenType, HttpMethod
     from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
+    from lark_oapi.core.model import BaseRequest
     from lark_oapi.event.callback.model.p2_card_action_trigger import (
         CallBackCard,
         P2CardActionTriggerResponse,
@@ -234,6 +265,8 @@ FALLBACK_ATTACHMENT_TEXT = "[Attachment]"
 _PREFERRED_LOCALES = ("zh_cn", "en_us")
 _MARKDOWN_SPECIAL_CHARS_RE = re.compile(r"([\\`*_{}\[\]()#+\-!|>~])")
 _MENTION_PLACEHOLDER_RE = re.compile(r"@_user_\d+")
+_MENTION_BOUNDARY_CHARS = frozenset(" \t\n\r.,;:!?、，。；：！？()[]{}<>\"'`")
+_TRAILING_TERMINAL_PUNCT = frozenset(" \t\n\r.!?。！？")
 _WHITESPACE_RE = re.compile(r"\s+")
 _SUPPORTED_CARD_TEXT_KEYS = (
     "title",
@@ -277,12 +310,36 @@ class FeishuPostMediaRef:
     resource_type: str = "file"
 
 
+@dataclass(frozen=True)
+class FeishuMentionRef:
+    name: str = ""
+    open_id: str = ""
+    is_all: bool = False
+    is_self: bool = False
+
+
+@dataclass(frozen=True)
+class _FeishuBotIdentity:
+    open_id: str = ""
+    user_id: str = ""
+    name: str = ""
+
+    def matches(self, *, open_id: str, user_id: str, name: str) -> bool:
+        # Precedence: open_id > user_id > name. IDs are authoritative when both
+        # sides have them; the next tier is only considered when either side
+        # lacks the current one.
+        if open_id and self.open_id:
+            return open_id == self.open_id
+        if user_id and self.user_id:
+            return user_id == self.user_id
+        return bool(self.name) and name == self.name
+
+
 @dataclass(frozen=True)
 class FeishuPostParseResult:
     text_content: str
     image_keys: List[str] = field(default_factory=list)
     media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)
 
 
 @dataclass(frozen=True)
@@ -292,14 +349,14 @@ class FeishuNormalizedMessage:
     preferred_message_type: str = "text"
     image_keys: List[str] = field(default_factory=list)
     media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)
+    mentions: List[FeishuMentionRef] = field(default_factory=list)
     relation_kind: str = "plain"
     metadata: Dict[str, Any] = field(default_factory=dict)
 
 
 @dataclass(frozen=True)
 class FeishuAdapterSettings:
-    app_id: str
+    app_id: str  # Canonical bot/app identifier (credential, not from event payloads)
     app_secret: str
     domain_name: str
     connection_mode: str
@@ -307,7 +364,11 @@ class FeishuAdapterSettings:
     verification_token: str
     group_policy: str
     allowed_group_users: frozenset[str]
+    # Bot's own open_id (app-scoped) — returned by /bot/v3/info.  Used only for
+    # @mention matching: Feishu puts this value in mentions[].id.open_id when
+    # a user @-mentions the bot in a group chat.
     bot_open_id: str
+    # Bot's user_id (tenant-scoped) — optional, used as fallback mention match.
     bot_user_id: str
     bot_name: str
     dedup_cache_size: int
@@ -505,14 +566,17 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
     return rows or [[{"tag": "md", "text": content}]]
 
 
-def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
+def parse_feishu_post_payload(
+    payload: Any,
+    *,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> FeishuPostParseResult:
     resolved = _resolve_post_payload(payload)
     if not resolved:
         return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)
 
     image_keys: List[str] = []
     media_refs: List[FeishuPostMediaRef] = []
-    mentioned_ids: List[str] = []
     parts: List[str] = []
 
     title = _normalize_feishu_text(str(resolved.get("title", "")).strip())
@@ -523,7 +587,10 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
         if not isinstance(row, list):
             continue
         row_text = _normalize_feishu_text(
-            "".join(_render_post_element(item, image_keys, media_refs, mentioned_ids) for item in row)
+            "".join(
+                _render_post_element(item, image_keys, media_refs, mentions_map)
+                for item in row
+            )
         )
         if row_text:
             parts.append(row_text)
@@ -532,7 +599,6 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
         text_content="\n".join(parts).strip() or FALLBACK_POST_TEXT,
         image_keys=image_keys,
         media_refs=media_refs,
-        mentioned_ids=mentioned_ids,
     )
 
 
@@ -584,7 +650,7 @@ def _render_post_element(
     element: Any,
     image_keys: List[str],
     media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
     if isinstance(element, str):
         return element
@@ -602,19 +668,21 @@ def _render_post_element(
         escaped_label = _escape_markdown_text(label)
         return f"[{escaped_label}]({href})" if href else escaped_label
     if tag == "at":
-        mentioned_id = (
-            str(element.get("open_id", "")).strip()
-            or str(element.get("user_id", "")).strip()
-        )
-        if mentioned_id and mentioned_id not in mentioned_ids:
-            mentioned_ids.append(mentioned_id)
-        display_name = (
-            str(element.get("user_name", "")).strip()
-            or str(element.get("name", "")).strip()
-            or str(element.get("text", "")).strip()
-            or mentioned_id
-        )
-        return f"@{_escape_markdown_text(display_name)}" if display_name else "@"
+        # Post <at>.user_id is a placeholder ("@_user_N" or "@_all"); look up
+        # the real ref in mentions_map for the display name.
+        placeholder = str(element.get("user_id", "")).strip()
+        if placeholder == "@_all":
+            # Feishu SDK sometimes omits @_all from the top-level mentions
+            # payload; record it here so the caller's mention list stays complete.
+            if mentions_map is not None and "@_all" not in mentions_map:
+                mentions_map["@_all"] = FeishuMentionRef(is_all=True)
+            return "@all"
+        ref = (mentions_map or {}).get(placeholder)
+        if ref is not None:
+            display_name = ref.name or ref.open_id or "user"
+        else:
+            display_name = str(element.get("user_name", "")).strip() or "user"
+        return f"@{_escape_markdown_text(display_name)}"
     if tag in {"img", "image"}:
         image_key = str(element.get("image_key", "")).strip()
         if image_key and image_key not in image_keys:
@@ -652,8 +720,7 @@ def _render_post_element(
 
     nested_parts: List[str] = []
     for key in ("text", "title", "content", "children", "elements"):
-        value = element.get(key)
-        extracted = _render_nested_post(value, image_keys, media_refs, mentioned_ids)
+        extracted = _render_nested_post(element.get(key), image_keys, media_refs, mentions_map)
         if extracted:
             nested_parts.append(extracted)
     return " ".join(part for part in nested_parts if part)
@@ -663,7 +730,7 @@ def _render_nested_post(
     value: Any,
     image_keys: List[str],
     media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
     if isinstance(value, str):
         return _escape_markdown_text(value)
@@ -671,17 +738,17 @@ def _render_nested_post(
         return " ".join(
             part
             for item in value
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
             if part
         )
     if isinstance(value, dict):
-        direct = _render_post_element(value, image_keys, media_refs, mentioned_ids)
+        direct = _render_post_element(value, image_keys, media_refs, mentions_map)
         if direct:
             return direct
         return " ".join(
             part
             for item in value.values()
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
             if part
         )
     return ""
@@ -692,31 +759,48 @@ def _render_nested_post(
 # ---------------------------------------------------------------------------
 
 
-def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNormalizedMessage:
+def normalize_feishu_message(
+    *,
+    message_type: str,
+    raw_content: str,
+    mentions: Optional[Sequence[Any]] = None,
+    bot: _FeishuBotIdentity = _FeishuBotIdentity(),
+) -> FeishuNormalizedMessage:
     normalized_type = str(message_type or "").strip().lower()
     payload = _load_feishu_payload(raw_content)
+    mentions_map = _build_mentions_map(mentions, bot)
 
     if normalized_type == "text":
+        text = str(payload.get("text", "") or "")
+        # Feishu SDK sometimes omits @_all from the mentions payload even when
+        # the text literal contains it (confirmed via im.v1.message.get).
+        if "@_all" in text and "@_all" not in mentions_map:
+            mentions_map["@_all"] = FeishuMentionRef(is_all=True)
         return FeishuNormalizedMessage(
             raw_type=normalized_type,
-            text_content=_normalize_feishu_text(str(payload.get("text", "") or "")),
+            text_content=_normalize_feishu_text(text, mentions_map),
+            mentions=list(mentions_map.values()),
         )
     if normalized_type == "post":
-        parsed_post = parse_feishu_post_payload(payload)
+        # The walker writes back to mentions_map if it encounters
+        # <at user_id="@_all">, so reading .values() after parsing is enough.
+        parsed_post = parse_feishu_post_payload(payload, mentions_map=mentions_map)
         return FeishuNormalizedMessage(
             raw_type=normalized_type,
             text_content=parsed_post.text_content,
             image_keys=list(parsed_post.image_keys),
             media_refs=list(parsed_post.media_refs),
-            mentioned_ids=list(parsed_post.mentioned_ids),
+            mentions=list(mentions_map.values()),
             relation_kind="post",
         )
+    mention_refs = list(mentions_map.values())
     if normalized_type == "image":
         image_key = str(payload.get("image_key", "") or "").strip()
         alt_text = _normalize_feishu_text(
             str(payload.get("text", "") or "")
             or str(payload.get("alt", "") or "")
-            or FALLBACK_IMAGE_TEXT
+            or FALLBACK_IMAGE_TEXT,
+            mentions_map,
         )
         return FeishuNormalizedMessage(
             raw_type=normalized_type,
@@ -724,6 +808,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
             preferred_message_type="photo",
             image_keys=[image_key] if image_key else [],
             relation_kind="image",
+            mentions=mention_refs,
         )
     if normalized_type in {"file", "audio", "media"}:
         media_ref = _build_media_ref_from_payload(payload, resource_type=normalized_type)
@@ -735,6 +820,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
             media_refs=[media_ref] if media_ref.file_key else [],
             relation_kind=normalized_type,
             metadata={"placeholder_text": placeholder},
+            mentions=mention_refs,
         )
     if normalized_type == "merge_forward":
         return _normalize_merge_forward_message(payload)
@@ -1009,8 +1095,20 @@ def _first_non_empty_text(*values: Any) -> str:
 # ---------------------------------------------------------------------------
 
 
-def _normalize_feishu_text(text: str) -> str:
-    cleaned = _MENTION_PLACEHOLDER_RE.sub(" ", text or "")
+def _normalize_feishu_text(
+    text: str,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> str:
+    def _sub(match: "re.Match[str]") -> str:
+        key = match.group(0)
+        ref = (mentions_map or {}).get(key)
+        if ref is None:
+            return " "
+        name = ref.name or ref.open_id or "user"
+        return f"@{name}"
+
+    cleaned = _MENTION_PLACEHOLDER_RE.sub(_sub, text or "")
+    cleaned = cleaned.replace("@_all", "@all")
     cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
     cleaned = "\n".join(_WHITESPACE_RE.sub(" ", line).strip() for line in cleaned.split("\n"))
     cleaned = "\n".join(line for line in cleaned.split("\n") if line)
@@ -1029,6 +1127,117 @@ def _unique_lines(lines: List[str]) -> List[str]:
     return unique
 
 
+# ---------------------------------------------------------------------------
+# Mention helpers
+# ---------------------------------------------------------------------------
+
+
+def _extract_mention_ids(mention: Any) -> tuple[str, str]:
+    # Returns (open_id, user_id). im.v1.message.get hands back id as a string
+    # plus id_type discriminator; event payloads hand back a nested UserId
+    # object carrying both fields.
+    mention_id = getattr(mention, "id", None)
+    if isinstance(mention_id, str):
+        id_type = str(getattr(mention, "id_type", "") or "").lower()
+        if id_type == "open_id":
+            return mention_id, ""
+        if id_type == "user_id":
+            return "", mention_id
+        return "", ""
+    if mention_id is None:
+        return "", ""
+    return (
+        str(getattr(mention_id, "open_id", "") or ""),
+        str(getattr(mention_id, "user_id", "") or ""),
+    )
+
+
+def _build_mentions_map(
+    mentions: Optional[Sequence[Any]],
+    bot: _FeishuBotIdentity,
+) -> Dict[str, FeishuMentionRef]:
+    result: Dict[str, FeishuMentionRef] = {}
+    for mention in mentions or []:
+        key = str(getattr(mention, "key", "") or "")
+        if not key:
+            continue
+        if key == "@_all":
+            result[key] = FeishuMentionRef(is_all=True)
+            continue
+        open_id, user_id = _extract_mention_ids(mention)
+        name = str(getattr(mention, "name", "") or "").strip()
+        result[key] = FeishuMentionRef(
+            name=name,
+            open_id=open_id,
+            is_self=bot.matches(open_id=open_id, user_id=user_id, name=name),
+        )
+    return result
+
+
+def _build_mention_hint(mentions: Sequence[FeishuMentionRef]) -> str:
+    parts: List[str] = []
+    seen: set = set()
+    for ref in mentions:
+        if ref.is_self:
+            continue
+        signature = (ref.is_all, ref.open_id, ref.name)
+        if signature in seen:
+            continue
+        seen.add(signature)
+        if ref.is_all:
+            parts.append("@all")
+        elif ref.open_id:
+            parts.append(f"{ref.name or 'unknown'} (open_id={ref.open_id})")
+        else:
+            parts.append(ref.name or "unknown")
+    return f"[Mentioned: {', '.join(parts)}]" if parts else ""
+
+
+def _strip_edge_self_mentions(
+    text: str,
+    mentions: Sequence[FeishuMentionRef],
+) -> str:
+    # Leading: strip consecutive self-mentions unconditionally.
+    # Trailing: strip only when followed by whitespace/terminal punct, so
+    # mid-sentence references ("don't @Bot again") stay intact.
+    # Leading word-boundary prevents @Al from eating @Alice.
+    if not text:
+        return text
+    self_names = [
+        f"@{ref.name or ref.open_id or 'user'}"
+        for ref in mentions
+        if ref.is_self
+    ]
+    if not self_names:
+        return text
+
+    remaining = text.lstrip()
+    while True:
+        for nm in self_names:
+            if not remaining.startswith(nm):
+                continue
+            after = remaining[len(nm):]
+            if after and after[0] not in _MENTION_BOUNDARY_CHARS:
+                continue
+            remaining = after.lstrip()
+            break
+        else:
+            break
+
+    while True:
+        i = len(remaining)
+        while i > 0 and remaining[i - 1] in _TRAILING_TERMINAL_PUNCT:
+            i -= 1
+        body = remaining[:i]
+        tail = remaining[i:]
+        for nm in self_names:
+            if body.endswith(nm):
+                remaining = body[: -len(nm)].rstrip() + tail
+                break
+        else:
+            return remaining
+
+
 def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
     """Run the official Lark WS client in its own thread-local event loop."""
     import lark_oapi.ws.client as ws_client_module
@@ -2470,13 +2679,22 @@ class FeishuAdapter(BasePlatformAdapter):
         chat_type: str,
         message_id: str,
     ) -> None:
-        text, inbound_type, media_urls, media_types = await self._extract_message_content(message)
+        text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
+
+        if inbound_type == MessageType.TEXT:
+            text = _strip_edge_self_mentions(text, mentions)
+            if text.startswith("/"):
+                inbound_type = MessageType.COMMAND
+
+        # Guard runs post-strip so a pure "@Bot" message (stripped to "") is dropped.
         if inbound_type == MessageType.TEXT and not text and not media_urls:
-            logger.debug("[Feishu] Ignoring unsupported or empty message type: %s", getattr(message, "message_type", ""))
+            logger.debug("[Feishu] Ignoring empty text message id=%s", message_id)
             return
 
-        if inbound_type == MessageType.TEXT and text.startswith("/"):
-            inbound_type = MessageType.COMMAND
+        if inbound_type != MessageType.COMMAND:
+            hint = _build_mention_hint(mentions)
+            if hint:
+                text = f"{hint}\n\n{text}" if text else hint
 
         reply_to_message_id = (
             getattr(message, "parent_id", None)
@@ -2935,14 +3153,20 @@ class FeishuAdapter(BasePlatformAdapter):
     # Message content extraction and resource download
     # =========================================================================
 
-    async def _extract_message_content(self, message: Any) -> tuple[str, MessageType, List[str], List[str]]:
-        """Extract text and cached media from a normalized Feishu message."""
+    async def _extract_message_content(
+        self, message: Any
+    ) -> tuple[str, MessageType, List[str], List[str], List[FeishuMentionRef]]:
         raw_content = getattr(message, "content", "") or ""
         raw_type = getattr(message, "message_type", "") or ""
         message_id = str(getattr(message, "message_id", "") or "")
         logger.info("[Feishu] Received raw message type=%s message_id=%s", raw_type, message_id)
 
-        normalized = normalize_feishu_message(message_type=raw_type, raw_content=raw_content)
+        normalized = normalize_feishu_message(
+            message_type=raw_type,
+            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
+        )
         media_urls, media_types = await self._download_feishu_message_resources(
             message_id=message_id,
             normalized=normalized,
@@ -2959,7 +3183,7 @@ class FeishuAdapter(BasePlatformAdapter):
             if injected:
                 text = injected
 
-        return text, inbound_type, media_urls, media_types
+        return text, inbound_type, media_urls, media_types, list(normalized.mentions)
 
     async def _download_feishu_message_resources(
         self,
@@ -3223,10 +3447,22 @@ class FeishuAdapter(BasePlatformAdapter):
         return "group"
 
     async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
+        """Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.
+
+        Preference order for the primary ``user_id`` field:
+          1. user_id  (tenant-scoped, most stable — requires permission scope)
+          2. open_id  (app-scoped, always available — different per bot app)
+
+        ``user_id_alt`` carries the union_id (developer-scoped, stable across
+        all apps by the same developer).  Session-key generation prefers
+        user_id_alt when present, so participant isolation stays stable even
+        if the primary ID is the app-scoped open_id.
+        """
         open_id = getattr(sender_id, "open_id", None) or None
         user_id = getattr(sender_id, "user_id", None) or None
         union_id = getattr(sender_id, "union_id", None) or None
-        primary_id = open_id or user_id
+        # Prefer tenant-scoped user_id; fall back to app-scoped open_id.
+        primary_id = user_id or open_id
         display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
         return {
             "user_id": primary_id,
@@ -3308,15 +3544,31 @@ class FeishuAdapter(BasePlatformAdapter):
             body = getattr(parent, "body", None)
             msg_type = getattr(parent, "msg_type", "") or ""
             raw_content = getattr(body, "content", "") or ""
-            text = self._extract_text_from_raw_content(msg_type=msg_type, raw_content=raw_content)
+            parent_mentions = getattr(parent, "mentions", None) if parent else None
+            text = self._extract_text_from_raw_content(
+                msg_type=msg_type,
+                raw_content=raw_content,
+                mentions=parent_mentions,
+            )
             self._message_text_cache[message_id] = text
             return text
         except Exception:
             logger.warning("[Feishu] Failed to fetch parent message %s", message_id, exc_info=True)
             return None
 
-    def _extract_text_from_raw_content(self, *, msg_type: str, raw_content: str) -> Optional[str]:
-        normalized = normalize_feishu_message(message_type=msg_type, raw_content=raw_content)
+    def _extract_text_from_raw_content(
+        self,
+        *,
+        msg_type: str,
+        raw_content: str,
+        mentions: Optional[Sequence[Any]] = None,
+    ) -> Optional[str]:
+        normalized = normalize_feishu_message(
+            message_type=msg_type,
+            raw_content=raw_content,
+            mentions=mentions,
+            bot=self._bot_identity(),
+        )
         if normalized.text_content:
             return normalized.text_content
         placeholder = normalized.metadata.get("placeholder_text") if isinstance(normalized.metadata, dict) else None
@@ -3386,10 +3638,10 @@ class FeishuAdapter(BasePlatformAdapter):
         normalized = normalize_feishu_message(
             message_type=getattr(message, "message_type", "") or "",
             raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
         )
-        if normalized.mentioned_ids:
-            return self._post_mentions_bot(normalized.mentioned_ids)
-        return False
+        return self._post_mentions_bot(normalized.mentions)
 
     def _is_self_sent_bot_message(self, event: Any) -> bool:
         """Return True only for Feishu events emitted by this Hermes bot."""
@@ -3409,30 +3661,37 @@ class FeishuAdapter(BasePlatformAdapter):
         return False
 
     def _message_mentions_bot(self, mentions: List[Any]) -> bool:
-        """Check whether any mention targets the configured or inferred bot identity."""
+        # IDs trump names: when both sides have open_id (or both user_id),
+        # match requires equal IDs. Name fallback only when either side
+        # lacks an ID.
         for mention in mentions:
             mention_id = getattr(mention, "id", None)
-            mention_open_id = getattr(mention_id, "open_id", None)
-            mention_user_id = getattr(mention_id, "user_id", None)
+            mention_open_id = (getattr(mention_id, "open_id", None) or "").strip()
+            mention_user_id = (getattr(mention_id, "user_id", None) or "").strip()
             mention_name = (getattr(mention, "name", None) or "").strip()
 
-            if self._bot_open_id and mention_open_id == self._bot_open_id:
-                return True
-            if self._bot_user_id and mention_user_id == self._bot_user_id:
-                return True
+            if mention_open_id and self._bot_open_id:
+                if mention_open_id == self._bot_open_id:
+                    return True
+                continue  # IDs differ — not the bot; skip name fallback.
+            if mention_user_id and self._bot_user_id:
+                if mention_user_id == self._bot_user_id:
+                    return True
+                continue
             if self._bot_name and mention_name == self._bot_name:
                 return True
 
         return False
 
-    def _post_mentions_bot(self, mentioned_ids: List[str]) -> bool:
-        if not mentioned_ids:
-            return False
-        if self._bot_open_id and self._bot_open_id in mentioned_ids:
-            return True
-        if self._bot_user_id and self._bot_user_id in mentioned_ids:
-            return True
-        return False
+    def _post_mentions_bot(self, mentions: List[FeishuMentionRef]) -> bool:
+        return any(m.is_self for m in mentions)
+
+    def _bot_identity(self) -> _FeishuBotIdentity:
+        return _FeishuBotIdentity(
+            open_id=self._bot_open_id,
+            user_id=self._bot_user_id,
+            name=self._bot_name,
+        )
 
     async def _hydrate_bot_identity(self) -> None:
         """Best-effort discovery of bot identity for precise group mention gating
@@ -3457,14 +3716,15 @@ class FeishuAdapter(BasePlatformAdapter):
         # uses via probe_bot().
         if not self._bot_open_id or not self._bot_name:
             try:
-                resp = await asyncio.to_thread(
-                    self._client.request,
-                    method="GET",
-                    url="/open-apis/bot/v3/info",
-                    body=None,
-                    raw_response=True,
+                req = (
+                    BaseRequest.builder()
+                    .http_method(HttpMethod.GET)
+                    .uri("/open-apis/bot/v3/info")
+                    .token_types({AccessTokenType.TENANT})
+                    .build()
                 )
-                content = getattr(resp, "content", None)
+                resp = await asyncio.to_thread(self._client.request, req)
+                content = getattr(getattr(resp, "raw", None), "content", None)
                 if content:
                     payload = json.loads(content)
                     parsed = _parse_bot_response(payload) or {}
@@ -4212,6 +4472,9 @@ def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
 
     Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
     Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
+
+    Note: ``bot_open_id`` here is the bot's app-scoped open_id — the same ID
+    that Feishu puts in @mention payloads.  It is NOT the app_id.
     """
     if FEISHU_AVAILABLE:
         return _probe_bot_sdk(app_id, app_secret, domain)
@@ -4232,12 +4495,12 @@ def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:
 
 
 def _parse_bot_response(data: dict) -> Optional[dict]:
-    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
+    # /bot/v3/info returns bot.app_name; legacy paths used bot_name — accept both.
     if data.get("code") != 0:
         return None
     bot = data.get("bot") or data.get("data", {}).get("bot") or {}
     return {
-        "bot_name": bot.get("bot_name"),
+        "bot_name": bot.get("app_name") or bot.get("bot_name"),
         "bot_open_id": bot.get("open_id"),
     }
 
@@ -4246,13 +4509,18 @@ def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
     """Probe bot info using lark_oapi SDK."""
     try:
         client = _build_onboard_client(app_id, app_secret, domain)
-        resp = client.request(
-            method="GET",
-            url="/open-apis/bot/v3/info",
-            body=None,
-            raw_response=True,
+        req = (
+            BaseRequest.builder()
+            .http_method(HttpMethod.GET)
+            .uri("/open-apis/bot/v3/info")
+            .token_types({AccessTokenType.TENANT})
+            .build()
         )
-        return _parse_bot_response(json.loads(resp.content))
+        resp = client.request(req)
+        content = getattr(getattr(resp, "raw", None), "content", None)
+        if content is None:
+            return None
+        return _parse_bot_response(json.loads(content))
     except Exception as exc:
         logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
         return None
diff --git a/gateway/platforms/mattermost.py b/gateway/platforms/mattermost.py
index 10539bf64..0e6c9631d 100644
--- a/gateway/platforms/mattermost.py
+++ b/gateway/platforms/mattermost.py
@@ -410,7 +410,6 @@ class MattermostAdapter(BasePlatformAdapter):
             logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
             return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
 
-        import asyncio
         import aiohttp
 
         last_exc = None
diff --git a/gateway/platforms/qqbot/__init__.py b/gateway/platforms/qqbot/__init__.py
index 7119dd979..130269b5f 100644
--- a/gateway/platforms/qqbot/__init__.py
+++ b/gateway/platforms/qqbot/__init__.py
@@ -26,9 +26,8 @@ from .adapter import (  # noqa: F401
 # -- Onboard (QR-code scan-to-configure) -----------------------------------
 from .onboard import (  # noqa: F401
     BindStatus,
-    create_bind_task,
-    poll_bind_result,
     build_connect_url,
+    qr_register,
 )
 from .crypto import decrypt_secret, generate_bind_key  # noqa: F401
 
@@ -44,9 +43,8 @@ __all__ = [
     "_ssrf_redirect_guard",
     # onboard
     "BindStatus",
-    "create_bind_task",
-    "poll_bind_result",
     "build_connect_url",
+    "qr_register",
     # crypto
     "decrypt_secret",
     "generate_bind_key",
diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py
index ced744271..df3987f2e 100644
--- a/gateway/platforms/qqbot/adapter.py
+++ b/gateway/platforms/qqbot/adapter.py
@@ -1086,11 +1086,8 @@ class QQAdapter(BasePlatformAdapter):
             return MessageType.VIDEO
         if "image" in first_type or "photo" in first_type:
             return MessageType.PHOTO
-        # Unknown content type with an attachment — don't assume PHOTO
-        # to prevent non-image files from being sent to vision analysis.
         logger.debug(
-            "[%s] Unknown media content_type '%s', defaulting to TEXT",
-            self._log_tag,
+            "Unknown media content_type '%s', defaulting to TEXT",
             first_type,
         )
         return MessageType.TEXT
@@ -1826,14 +1823,12 @@ class QQAdapter(BasePlatformAdapter):
             body["file_name"] = file_name
 
         # Retry transient upload failures
-        last_exc = None
         for attempt in range(3):
             try:
                 return await self._api_request(
                     "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
                 )
             except RuntimeError as exc:
-                last_exc = exc
                 err_msg = str(exc)
                 if any(
                         kw in err_msg
@@ -1842,8 +1837,8 @@ class QQAdapter(BasePlatformAdapter):
                     raise
                 if attempt < 2:
                     await asyncio.sleep(1.5 * (attempt + 1))
-
-        raise last_exc  # type: ignore[misc]
+                else:
+                    raise
 
     # Maximum time (seconds) to wait for reconnection before giving up on send.
     _RECONNECT_WAIT_SECONDS = 15.0
diff --git a/gateway/platforms/qqbot/onboard.py b/gateway/platforms/qqbot/onboard.py
index 65750b3f1..b48c39a4f 100644
--- a/gateway/platforms/qqbot/onboard.py
+++ b/gateway/platforms/qqbot/onboard.py
@@ -1,6 +1,10 @@
 """
 QQBot scan-to-configure (QR code onboard) module.
 
+Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
+entry-point ``qr_register()`` that handles the full flow (create task →
+display QR code → poll → decrypt credentials).
+
 Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
 generate a QR-code URL and poll for scan completion.  On success the caller
 receives the bot's *app_id*, *client_secret* (decrypted locally), and the
@@ -12,18 +16,20 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/
 from __future__ import annotations
 
 import logging
+import time
 from enum import IntEnum
-from typing import Tuple
+from typing import Optional, Tuple
 from urllib.parse import quote
 
 from .constants import (
     ONBOARD_API_TIMEOUT,
     ONBOARD_CREATE_PATH,
+    ONBOARD_POLL_INTERVAL,
     ONBOARD_POLL_PATH,
     PORTAL_HOST,
     QR_URL_TEMPLATE,
 )
-from .crypto import generate_bind_key
+from .crypto import decrypt_secret, generate_bind_key
 from .utils import get_api_headers
 
 logger = logging.getLogger(__name__)
@@ -35,7 +41,7 @@ logger = logging.getLogger(__name__)
 
 
 class BindStatus(IntEnum):
-    """Status codes returned by ``poll_bind_result``."""
+    """Status codes returned by ``_poll_bind_result``."""
 
     NONE = 0
     PENDING = 1
@@ -44,18 +50,40 @@ class BindStatus(IntEnum):
 
 
 # ---------------------------------------------------------------------------
-# Public API
+# QR rendering
+# ---------------------------------------------------------------------------
+
+try:
+    import qrcode as _qrcode_mod
+except (ImportError, TypeError):
+    _qrcode_mod = None  # type: ignore[assignment]
+
+
+def _render_qr(url: str) -> bool:
+    """Try to render a QR code in the terminal. Returns True if successful."""
+    if _qrcode_mod is None:
+        return False
+    try:
+        qr = _qrcode_mod.QRCode(
+            error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
+            border=2,
+        )
+        qr.add_data(url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        return True
+    except Exception:
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
 # ---------------------------------------------------------------------------
 
 
-async def create_bind_task(
-    timeout: float = ONBOARD_API_TIMEOUT,
-) -> Tuple[str, str]:
+def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
     """Create a bind task and return *(task_id, aes_key_base64)*.
 
-    The AES key is generated locally and sent to the server so it can
-    encrypt the bot credentials before returning them.
-
     Raises:
         RuntimeError: If the API returns a non-zero ``retcode``.
     """
@@ -64,8 +92,8 @@ async def create_bind_task(
     url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
     key = generate_bind_key()
 
-    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-        resp = await client.post(url, json={"key": key}, headers=get_api_headers())
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        resp = client.post(url, json={"key": key}, headers=get_api_headers())
         resp.raise_for_status()
         data = resp.json()
 
@@ -80,7 +108,7 @@ async def create_bind_task(
     return task_id, key
 
 
-async def poll_bind_result(
+def _poll_bind_result(
     task_id: str,
     timeout: float = ONBOARD_API_TIMEOUT,
 ) -> Tuple[BindStatus, str, str, str]:
@@ -89,12 +117,6 @@ async def poll_bind_result(
     Returns:
         A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
 
-        * ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
-          :func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
-          key from :func:`create_bind_task`.
-        * ``user_openid`` is the OpenID of the person who scanned the code
-          (available when ``status == COMPLETED``).
-
     Raises:
         RuntimeError: If the API returns a non-zero ``retcode``.
     """
@@ -102,8 +124,8 @@ async def poll_bind_result(
 
     url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
 
-    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-        resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
         resp.raise_for_status()
         data = resp.json()
 
@@ -122,3 +144,77 @@ async def poll_bind_result(
 def build_connect_url(task_id: str) -> str:
     """Build the QR-code target URL for a given *task_id*."""
     return QR_URL_TEMPLATE.format(task_id=quote(task_id))
+
+
+# ---------------------------------------------------------------------------
+# Public entry-point
+# ---------------------------------------------------------------------------
+
+_MAX_REFRESHES = 3
+
+
+def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
+    """Run the QQBot scan-to-configure QR registration flow.
+
+    Mirrors ``feishu.qr_register()``: handles create → display → poll →
+    decrypt in one call.  Unexpected errors propagate to the caller.
+
+    :returns:
+        ``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
+        success, or ``None`` on failure / expiry / cancellation.
+    """
+    deadline = time.monotonic() + timeout_seconds
+
+    for refresh_count in range(_MAX_REFRESHES + 1):
+        # ── Create bind task ──
+        try:
+            task_id, aes_key = _create_bind_task()
+        except Exception as exc:
+            logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
+            return None
+
+        url = build_connect_url(task_id)
+
+        # ── Display QR code + URL ──
+        print()
+        if _render_qr(url):
+            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
+        else:
+            print(f"  Open this URL in QQ on your phone:\n  {url}")
+            print("  Tip: pip install qrcode  to display a scannable QR code here")
+        print()
+
+        # ── Poll loop ──
+        while time.monotonic() < deadline:
+            try:
+                status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
+            except Exception:
+                time.sleep(ONBOARD_POLL_INTERVAL)
+                continue
+
+            if status == BindStatus.COMPLETED:
+                client_secret = decrypt_secret(encrypted_secret, aes_key)
+                print()
+                print(f"  QR scan complete! (App ID: {app_id})")
+                if user_openid:
+                    print(f"  Scanner's OpenID: {user_openid}")
+                return {
+                    "app_id": app_id,
+                    "client_secret": client_secret,
+                    "user_openid": user_openid,
+                }
+
+            if status == BindStatus.EXPIRED:
+                if refresh_count >= _MAX_REFRESHES:
+                    logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
+                    return None
+                print(f"\n  QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
+                break  # next for-loop iteration creates a new task
+
+            time.sleep(ONBOARD_POLL_INTERVAL)
+        else:
+            # deadline reached without completing
+            logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
+            return None
+
+    return None
diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py
index d3d218794..191689a5a 100644
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -38,6 +38,7 @@ from gateway.platforms.base import (
     BasePlatformAdapter,
     MessageEvent,
     MessageType,
+    ProcessingOutcome,
     SendResult,
     SUPPORTED_DOCUMENT_TYPES,
     safe_url_for_log,
@@ -113,6 +114,11 @@ class SlackAdapter(BasePlatformAdapter):
         # Cache for _fetch_thread_context results: cache_key → _ThreadContextCache
         self._thread_context_cache: Dict[str, _ThreadContextCache] = {}
         self._THREAD_CACHE_TTL = 60.0
+        # Track message IDs that should get reaction lifecycle (DMs / @mentions).
+        self._reacting_message_ids: set = set()
+        # Track active assistant thread status indicators so stop_typing can
+        # clear them (chat_id → thread_ts).
+        self._active_status_threads: Dict[str, str] = {}
 
     async def connect(self) -> bool:
         """Connect to Slack via Socket Mode."""
@@ -362,6 +368,7 @@ class SlackAdapter(BasePlatformAdapter):
         if not thread_ts:
             return  # Can only set status in a thread context
 
+        self._active_status_threads[chat_id] = thread_ts
         try:
             await self._get_client(chat_id).assistant_threads_setStatus(
                 channel_id=chat_id,
@@ -373,6 +380,22 @@ class SlackAdapter(BasePlatformAdapter):
             # in an assistant-enabled context. Falls back to reactions.
             logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)
 
+    async def stop_typing(self, chat_id: str) -> None:
+        """Clear the assistant thread status indicator."""
+        if not self._app:
+            return
+        thread_ts = self._active_status_threads.pop(chat_id, None)
+        if not thread_ts:
+            return
+        try:
+            await self._get_client(chat_id).assistant_threads_setStatus(
+                channel_id=chat_id,
+                thread_ts=thread_ts,
+                status="",
+            )
+        except Exception as e:
+            logger.debug("[Slack] assistant.threads.setStatus clear failed: %s", e)
+
     def _dm_top_level_threads_as_sessions(self) -> bool:
         """Whether top-level Slack DMs get per-message session threads.
 
@@ -584,6 +607,38 @@ class SlackAdapter(BasePlatformAdapter):
             logger.debug("[Slack] reactions.remove failed (%s): %s", emoji, e)
             return False
 
+    def _reactions_enabled(self) -> bool:
+        """Check if message reactions are enabled via config/env."""
+        return os.getenv("SLACK_REACTIONS", "true").lower() not in ("false", "0", "no")
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        """Add an in-progress reaction when message processing begins."""
+        if not self._reactions_enabled():
+            return
+        ts = getattr(event, "message_id", None)
+        if not ts or ts not in self._reacting_message_ids:
+            return
+        channel_id = getattr(event.source, "chat_id", None)
+        if channel_id:
+            await self._add_reaction(channel_id, ts, "eyes")
+
+    async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
+        """Swap the in-progress reaction for a final success/failure reaction."""
+        if not self._reactions_enabled():
+            return
+        ts = getattr(event, "message_id", None)
+        if not ts or ts not in self._reacting_message_ids:
+            return
+        self._reacting_message_ids.discard(ts)
+        channel_id = getattr(event.source, "chat_id", None)
+        if not channel_id:
+            return
+        await self._remove_reaction(channel_id, ts, "eyes")
+        if outcome == ProcessingOutcome.SUCCESS:
+            await self._add_reaction(channel_id, ts, "white_check_mark")
+        elif outcome == ProcessingOutcome.FAILURE:
+            await self._add_reaction(channel_id, ts, "x")
+
     # ----- User identity resolution -----
 
     async def _resolve_user_name(self, user_id: str, chat_id: str = "") -> str:
@@ -1213,17 +1268,12 @@ class SlackAdapter(BasePlatformAdapter):
         # Only react when bot is directly addressed (DM or @mention).
         # In listen-all channels (require_mention=false), reacting to every
         # casual message would be noisy.
-        _should_react = is_dm or is_mentioned
-
+        _should_react = (is_dm or is_mentioned) and self._reactions_enabled()
         if _should_react:
-            await self._add_reaction(channel_id, ts, "eyes")
+            self._reacting_message_ids.add(ts)
 
         await self.handle_message(msg_event)
 
-        if _should_react:
-            await self._remove_reaction(channel_id, ts, "eyes")
-            await self._add_reaction(channel_id, ts, "white_check_mark")
-
     # ----- Approval button support (Block Kit) -----
 
     async def send_exec_approval(
@@ -1600,11 +1650,9 @@ class SlackAdapter(BasePlatformAdapter):
 
     async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
         """Download a Slack file using the bot token for auth, with retry."""
-        import asyncio
         import httpx
 
         bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
-        last_exc = None
 
         async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
             for attempt in range(3):
@@ -1634,7 +1682,6 @@ class SlackAdapter(BasePlatformAdapter):
                         from gateway.platforms.base import cache_image_from_bytes
                         return cache_image_from_bytes(response.content, ext)
                 except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                    last_exc = exc
                     if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                         raise
                     if attempt < 2:
@@ -1643,15 +1690,12 @@ class SlackAdapter(BasePlatformAdapter):
                         await asyncio.sleep(1.5 * (attempt + 1))
                         continue
                     raise
-        raise last_exc
 
     async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
         """Download a Slack file and return raw bytes, with retry."""
-        import asyncio
         import httpx
 
         bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
-        last_exc = None
 
         async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
             for attempt in range(3):
@@ -1663,7 +1707,6 @@ class SlackAdapter(BasePlatformAdapter):
                     response.raise_for_status()
                     return response.content
                 except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                    last_exc = exc
                     if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                         raise
                     if attempt < 2:
@@ -1672,7 +1715,6 @@ class SlackAdapter(BasePlatformAdapter):
                         await asyncio.sleep(1.5 * (attempt + 1))
                         continue
                     raise
-        raise last_exc
 
     # ── Channel mention gating ─────────────────────────────────────────────
 
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 67be808be..bec0d690a 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -794,8 +794,28 @@ class TelegramAdapter(BasePlatformAdapter):
                 # Telegram pushes updates to our HTTP endpoint.  This
                 # enables cloud platforms (Fly.io, Railway) to auto-wake
                 # suspended machines on inbound HTTP traffic.
+                #
+                # SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
+                # python-telegram-bot passes secret_token=None and the
+                # webhook endpoint accepts any HTTP POST — attackers can
+                # inject forged updates as if from Telegram. Refuse to
+                # start rather than silently run in fail-open mode.
+                # See GHSA-3vpc-7q5r-276h.
                 webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
-                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
+                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
+                if not webhook_secret:
+                    raise RuntimeError(
+                        "TELEGRAM_WEBHOOK_SECRET is required when "
+                        "TELEGRAM_WEBHOOK_URL is set. Without it, the "
+                        "webhook endpoint accepts forged updates from "
+                        "anyone who can reach it — see "
+                        "https://github.com/NousResearch/hermes-agent/"
+                        "security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
+                        "Generate a secret and set it in your .env:\n"
+                        "  export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
+                        "Then register it with Telegram when setting the "
+                        "webhook via setWebhook's secret_token parameter."
+                    )
                 from urllib.parse import urlparse
                 webhook_path = urlparse(webhook_url).path or "/telegram"
 
@@ -1713,7 +1733,6 @@ class TelegramAdapter(BasePlatformAdapter):
             return SendResult(success=False, error="Not connected")
         
         try:
-            import os
             if not os.path.exists(audio_path):
                 return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
             
@@ -1762,7 +1781,6 @@ class TelegramAdapter(BasePlatformAdapter):
             return SendResult(success=False, error="Not connected")
 
         try:
-            import os
             if not os.path.exists(image_path):
                 return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))
 
@@ -2335,10 +2353,16 @@ class TelegramAdapter(BasePlatformAdapter):
         DMs remain unrestricted. Group/supergroup messages are accepted when:
         - the chat is explicitly allowlisted in ``free_response_chats``
         - ``require_mention`` is disabled
-        - the message is a command
         - the message replies to the bot
         - the bot is @mentioned
         - the text/caption matches a configured regex wake-word pattern
+
+        When ``require_mention`` is enabled, slash commands are not given
+        special treatment — they must pass the same mention/reply checks
+        as any other group message.  Users can still trigger commands via
+        the Telegram bot menu (``/command@botname``) or by explicitly
+        mentioning the bot (``@botname /command``), both of which are
+        recognised as mentions by :meth:`_message_mentions_bot`.
         """
         if not self._is_group_chat(message):
             return True
@@ -2353,8 +2377,6 @@ class TelegramAdapter(BasePlatformAdapter):
             return True
         if not self._telegram_require_mention():
             return True
-        if is_command:
-            return True
         if self._is_reply_to_bot(message):
             return True
         if self._message_mentions_bot(message):
@@ -2823,13 +2845,11 @@ class TelegramAdapter(BasePlatformAdapter):
             logger.info("[Telegram] Analyzing sticker at %s", cached_path)
 
             from tools.vision_tools import vision_analyze_tool
-            import json as _json
-
             result_json = await vision_analyze_tool(
                 image_url=cached_path,
                 user_prompt=STICKER_VISION_PROMPT,
             )
-            result = _json.loads(result_json)
+            result = json.loads(result_json)
 
             if result.get("success"):
                 description = result.get("analysis", "a sticker")
diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py
index 9e5dd04e0..a6506d18a 100644
--- a/gateway/platforms/wecom.py
+++ b/gateway/platforms/wecom.py
@@ -624,13 +624,16 @@ class WeComAdapter(BasePlatformAdapter):
         msgtype = str(body.get("msgtype") or "").lower()
 
         if msgtype == "mixed":
-            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
-            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
+            _raw_mixed = body.get("mixed")
+            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
+            _raw_items = mixed.get("msg_item")
+            items = _raw_items if isinstance(_raw_items, list) else []
             for item in items:
                 if not isinstance(item, dict):
                     continue
                 if str(item.get("msgtype") or "").lower() == "text":
-                    text_block = item.get("text") if isinstance(item.get("text"), dict) else {}
+                    _raw_text = item.get("text")
+                    text_block = _raw_text if isinstance(_raw_text, dict) else {}
                     content = str(text_block.get("content") or "").strip()
                     if content:
                         text_parts.append(content)
@@ -672,8 +675,10 @@ class WeComAdapter(BasePlatformAdapter):
         msgtype = str(body.get("msgtype") or "").lower()
 
         if msgtype == "mixed":
-            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
-            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
+            _raw_mixed = body.get("mixed")
+            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
+            _raw_items = mixed.get("msg_item")
+            items = _raw_items if isinstance(_raw_items, list) else []
             for item in items:
                 if not isinstance(item, dict):
                     continue
@@ -1459,3 +1464,134 @@ class WeComAdapter(BasePlatformAdapter):
             "name": chat_id,
             "type": "group" if chat_id and chat_id.lower().startswith("group") else "dm",
         }
+
+
+# ------------------------------------------------------------------
+# QR code scan flow for obtaining bot credentials
+# ------------------------------------------------------------------
+
+_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate"
+_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result"
+_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode="
+_QR_POLL_INTERVAL = 3  # seconds
+_QR_POLL_TIMEOUT = 300  # 5 minutes
+
+
+def qr_scan_for_bot_info(
+    *,
+    timeout_seconds: int = _QR_POLL_TIMEOUT,
+) -> Optional[Dict[str, str]]:
+    """Run the WeCom QR scan flow to obtain bot_id and secret.
+
+    Fetches a QR code from WeCom, renders it in the terminal, and polls
+    until the user scans it or the timeout expires.
+
+    Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on
+    failure or timeout.
+
+    Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints
+    used here are not part of WeCom's public developer API — they back the
+    admin-console web UI's bot-creation flow and may change without notice.
+    The same pattern is used by the feishu/dingtalk QR setup wizards.
+    """
+    try:
+        import urllib.request
+        import urllib.parse
+    except ImportError:  # pragma: no cover
+        logger.error("urllib is required for WeCom QR scan")
+        return None
+
+    generate_url = f"{_QR_GENERATE_URL}?source=hermes"
+
+    # ── Step 1: Fetch QR code ──
+    print("  Connecting to WeCom...", end="", flush=True)
+    try:
+        req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"})
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            raw = json.loads(resp.read().decode("utf-8"))
+    except Exception as exc:
+        logger.error("WeCom QR: failed to fetch QR code: %s", exc)
+        print(f" failed: {exc}")
+        return None
+
+    data = raw.get("data") or {}
+    scode = str(data.get("scode") or "").strip()
+    auth_url = str(data.get("auth_url") or "").strip()
+
+    if not scode or not auth_url:
+        logger.error("WeCom QR: unexpected response format: %s", raw)
+        print(" failed: unexpected response format")
+        return None
+
+    print(" done.")
+
+    # ── Step 2: Render QR code in terminal ──
+    print()
+    qr_rendered = False
+    try:
+        import qrcode as _qrcode
+        qr = _qrcode.QRCode()
+        qr.add_data(auth_url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        qr_rendered = True
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}"
+    if qr_rendered:
+        print(f"\n  Scan the QR code above, or open this URL directly:\n  {page_url}")
+    else:
+        print(f"  Open this URL in WeCom on your phone:\n\n  {page_url}\n")
+        print("  Tip: pip install qrcode  to display a scannable QR code here next time")
+    print()
+    print("  Fetching configuration results...", end="", flush=True)
+
+    # ── Step 3: Poll for result ──
+    import time
+    deadline = time.time() + timeout_seconds
+    query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}"
+    poll_count = 0
+
+    while time.time() < deadline:
+        try:
+            req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"})
+            with urllib.request.urlopen(req, timeout=10) as resp:
+                result = json.loads(resp.read().decode("utf-8"))
+        except Exception as exc:
+            logger.debug("WeCom QR poll error: %s", exc)
+            time.sleep(_QR_POLL_INTERVAL)
+            continue
+
+        poll_count += 1
+        # Print a dot on every poll so progress is visible within 3s.
+        print(".", end="", flush=True)
+
+        result_data = result.get("data") or {}
+        status = str(result_data.get("status") or "").lower()
+
+        if status == "success":
+            print()  # newline after "Fetching configuration results..." dots
+            bot_info = result_data.get("bot_info") or {}
+            bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip()
+            secret = str(bot_info.get("secret") or "").strip()
+            if bot_id and secret:
+                return {"bot_id": bot_id, "secret": secret}
+            logger.warning(
+                "WeCom QR: scan reported success but bot_info missing or incomplete: %s",
+                result_data,
+            )
+            print(
+                "  QR scan reported success but no bot credentials were returned.\n"
+                "  This usually means the bot was not actually created on the WeCom side.\n"
+                "  Falling back to manual credential entry."
+            )
+            return None
+
+        time.sleep(_QR_POLL_INTERVAL)
+
+    print()  # newline after dots
+    print(f"  QR scan timed out ({timeout_seconds // 60} minutes). Please try again.")
+    return None
diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py
index 767908023..a82417a60 100644
--- a/gateway/platforms/whatsapp.py
+++ b/gateway/platforms/whatsapp.py
@@ -399,7 +399,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
             
             # Check if bridge is already running and connected
             import aiohttp
-            import asyncio
             try:
                 async with aiohttp.ClientSession() as session:
                     async with session.get(
diff --git a/gateway/run.py b/gateway/run.py
index 6ce409ff1..a024649cb 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -30,6 +30,8 @@ from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List
 
+from agent.account_usage import fetch_account_usage, render_account_usage_lines
+
 # --- Agent cache tuning ---------------------------------------------------
 # Bounds the per-session AIAgent cache to prevent unbounded growth in
 # long-lived gateways (each AIAgent holds LLM clients, tool schemas,
@@ -279,6 +281,7 @@ from gateway.session import (
     build_session_context,
     build_session_context_prompt,
     build_session_key,
+    is_shared_multi_user_session,
 )
 from gateway.delivery import DeliveryRouter
 from gateway.platforms.base import (
@@ -707,7 +710,26 @@ class GatewayRunner:
             self._session_db = SessionDB()
         except Exception as e:
             logger.debug("SQLite session store not available: %s", e)
-        
+
+        # Opportunistic state.db maintenance: prune ended sessions older
+        # than sessions.retention_days + optional VACUUM. Tracks last-run
+        # in state_meta so it only actually executes once per
+        # sessions.min_interval_hours.  Gateway is long-lived so blocking
+        # a few seconds once per day is acceptable; failures are logged
+        # but never raised.
+        if self._session_db is not None:
+            try:
+                from hermes_cli.config import load_config as _load_full_config
+                _sess_cfg = (_load_full_config().get("sessions") or {})
+                if _sess_cfg.get("auto_prune", False):
+                    self._session_db.maybe_auto_prune_and_vacuum(
+                        retention_days=int(_sess_cfg.get("retention_days", 90)),
+                        min_interval_hours=int(_sess_cfg.get("min_interval_hours", 24)),
+                        vacuum=bool(_sess_cfg.get("vacuum_after_prune", True)),
+                    )
+            except Exception as exc:
+                logger.debug("state.db auto-maintenance skipped: %s", exc)
+
         # DM pairing store for code-based user authorization
         from gateway.pairing import PairingStore
         self.pairing_store = PairingStore()
@@ -1266,7 +1288,6 @@ class GatewayRunner:
         the prefill_messages_file key in ~/.hermes/config.yaml.
         Relative paths are resolved from ~/.hermes/.
         """
-        import json as _json
         file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
         if not file_path:
             try:
@@ -1288,7 +1309,7 @@ class GatewayRunner:
             return []
         try:
             with open(path, "r", encoding="utf-8") as f:
-                data = _json.load(f)
+                data = json.load(f)
             if not isinstance(data, list):
                 logger.warning("Prefill messages file must contain a JSON array: %s", path)
                 return []
@@ -2666,8 +2687,9 @@ class GatewayRunner:
                 except Exception as _e:
                     logger.debug("SessionDB close error: %s", _e)
 
-            from gateway.status import remove_pid_file
+            from gateway.status import remove_pid_file, release_gateway_runtime_lock
             remove_pid_file()
+            release_gateway_runtime_lock()
 
             # Write a clean-shutdown marker so the next startup knows this
             # wasn't a crash.  suspend_recently_active() only needs to run
@@ -3275,10 +3297,9 @@ class GatewayRunner:
                     return "Usage: /queue <prompt>"
                 adapter = self.adapters.get(source.platform)
                 if adapter:
-                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
-                    queued_event = _ME(
+                    queued_event = MessageEvent(
                         text=queued_text,
-                        message_type=_MT.TEXT,
+                        message_type=MessageType.TEXT,
                         source=event.source,
                         message_id=event.message_id,
                         channel_prompt=event.channel_prompt,
@@ -3300,10 +3321,9 @@ class GatewayRunner:
                     # Agent hasn't started yet — queue as turn-boundary fallback.
                     adapter = self.adapters.get(source.platform)
                     if adapter:
-                        from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
-                        queued_event = _ME(
+                        queued_event = MessageEvent(
                             text=steer_text,
-                            message_type=_MT.TEXT,
+                            message_type=MessageType.TEXT,
                             source=event.source,
                             message_id=event.message_id,
                             channel_prompt=event.channel_prompt,
@@ -3323,10 +3343,9 @@ class GatewayRunner:
                 # Running agent is missing or lacks steer() — fall back to queue.
                 adapter = self.adapters.get(source.platform)
                 if adapter:
-                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
-                    queued_event = _ME(
+                    queued_event = MessageEvent(
                         text=steer_text,
-                        message_type=_MT.TEXT,
+                        message_type=MessageType.TEXT,
                         source=event.source,
                         message_id=event.message_id,
                         channel_prompt=event.channel_prompt,
@@ -3467,23 +3486,73 @@ class GatewayRunner:
 
         # Check for commands
         command = event.get_command()
-        
-        # Emit command:* hook for any recognized slash command.
-        # GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
-        # in hermes_cli/commands.py — no hardcoded set to maintain here.
-        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
-        if command and command in GATEWAY_KNOWN_COMMANDS:
-            await self.hooks.emit(f"command:{command}", {
-                "platform": source.platform.value if source.platform else "",
-                "user_id": source.user_id,
-                "command": command,
-                "args": event.get_command_args().strip(),
-            })
 
-        # Resolve aliases to canonical name so dispatch only checks canonicals.
+        from hermes_cli.commands import (
+            GATEWAY_KNOWN_COMMANDS,
+            is_gateway_known_command,
+            resolve_command as _resolve_cmd,
+        )
+
+        # Resolve aliases to canonical name so dispatch and hook names
+        # don't depend on the exact alias the user typed.
         _cmd_def = _resolve_cmd(command) if command else None
         canonical = _cmd_def.name if _cmd_def else command
 
+        # Fire the ``command:<canonical>`` hook for any recognized slash
+        # command — built-in OR plugin-registered. Handlers can return a
+        # dict with ``{"decision": "deny" | "handled" | "rewrite", ...}``
+        # to intercept dispatch before core handling runs. This replaces
+        # the previous fire-and-forget emit(): return values are now
+        # honored, but handlers that return nothing behave exactly as
+        # before (telemetry-style hooks keep working).
+        if command and is_gateway_known_command(canonical):
+            raw_args = event.get_command_args().strip()
+            hook_ctx = {
+                "platform": source.platform.value if source.platform else "",
+                "user_id": source.user_id,
+                "command": canonical,
+                "raw_command": command,
+                "args": raw_args,
+                "raw_args": raw_args,
+            }
+            try:
+                hook_results = await self.hooks.emit_collect(
+                    f"command:{canonical}", hook_ctx
+                )
+            except Exception as _hook_err:
+                logger.debug(
+                    "command:%s hook dispatch failed (non-fatal): %s",
+                    canonical, _hook_err,
+                )
+                hook_results = []
+
+            for hook_result in hook_results:
+                if not isinstance(hook_result, dict):
+                    continue
+                decision = str(hook_result.get("decision", "")).strip().lower()
+                if not decision or decision == "allow":
+                    continue
+                if decision == "deny":
+                    message = hook_result.get("message")
+                    if isinstance(message, str) and message:
+                        return message
+                    return f"Command `/{command}` was blocked by a hook."
+                if decision == "handled":
+                    message = hook_result.get("message")
+                    return message if isinstance(message, str) and message else None
+                if decision == "rewrite":
+                    new_command = str(
+                        hook_result.get("command_name", "")
+                    ).strip().lstrip("/")
+                    if not new_command:
+                        continue
+                    new_args = str(hook_result.get("raw_args", "")).strip()
+                    event.text = f"/{new_command} {new_args}".strip()
+                    command = event.get_command()
+                    _cmd_def = _resolve_cmd(command) if command else None
+                    canonical = _cmd_def.name if _cmd_def else command
+                    break
+
         if canonical == "new":
             return await self._handle_reset_command(event)
         
@@ -3675,9 +3744,8 @@ class GatewayRunner:
                 plugin_handler = get_plugin_command_handler(command.replace("_", "-"))
                 if plugin_handler:
                     user_args = event.get_command_args().strip()
-                    import asyncio as _aio
                     result = plugin_handler(user_args)
-                    if _aio.iscoroutine(result):
+                    if asyncio.iscoroutine(result):
                         result = await result
                     return str(result) if result else None
             except Exception as e:
@@ -3794,12 +3862,12 @@ class GatewayRunner:
         history = history or []
         message_text = event.text or ""
 
-        _is_shared_thread = (
-            source.chat_type != "dm"
-            and source.thread_id
-            and not getattr(self.config, "thread_sessions_per_user", False)
+        _is_shared_multi_user = is_shared_multi_user_session(
+            source,
+            group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
+            thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
         )
-        if _is_shared_thread and source.user_name:
+        if _is_shared_multi_user and source.user_name:
             message_text = f"[{source.user_name}] {message_text}"
 
         if event.media_urls:
@@ -3859,9 +3927,7 @@ class GatewayRunner:
             for i, path in enumerate(event.media_urls):
                 mtype = event.media_types[i] if i < len(event.media_types) else ""
                 if mtype in ("", "application/octet-stream"):
-                    import os as _os2
-
-                    _ext = _os2.path.splitext(path)[1].lower()
+                    _ext = os.path.splitext(path)[1].lower()
                     if _ext in _TEXT_EXTENSIONS:
                         mtype = "text/plain"
                     else:
@@ -3871,13 +3937,10 @@ class GatewayRunner:
                 if not mtype.startswith(("application/", "text/")):
                     continue
 
-                import os as _os
-                import re as _re
-
-                basename = _os.path.basename(path)
+                basename = os.path.basename(path)
                 parts = basename.split("_", 2)
                 display_name = parts[2] if len(parts) >= 3 else basename
-                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)
+                display_name = re.sub(r'[^\w.\- ]', '_', display_name)
 
                 if mtype.startswith("text/"):
                     context_note = (
@@ -3894,14 +3957,14 @@ class GatewayRunner:
                 message_text = f"{context_note}\n\n{message_text}"
 
         if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
+            # Always inject the reply-to pointer — even when the quoted text
+            # already appears in history. The prefix isn't deduplication, it's
+            # disambiguation: it tells the agent *which* prior message the user
+            # is referencing. History can contain the same or similar text
+            # multiple times, and without an explicit pointer the agent has to
+            # guess (or answer for both subjects). Token overhead is minimal.
             reply_snippet = event.reply_to_text[:500]
-            found_in_history = any(
-                reply_snippet[:200] in (msg.get("content") or "")
-                for msg in history
-                if msg.get("role") in ("assistant", "user", "tool")
-            )
-            if not found_in_history:
-                message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
+            message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
 
         if "@" in message_text:
             try:
@@ -4908,6 +4971,11 @@ class GatewayRunner:
         # the configured default instead of the previously switched model.
         self._session_model_overrides.pop(session_key, None)
 
+        # Clear session-scoped dangerous-command approvals and /yolo state.
+        # /new is a conversation-boundary operation — approval state from the
+        # previous conversation must not survive the reset.
+        self._clear_session_boundary_security_state(session_key)
+
         # Fire plugin on_session_finalize hook (session boundary)
         try:
             from hermes_cli.plugins import invoke_hook as _invoke_hook
@@ -5175,7 +5243,6 @@ class GatewayRunner:
         # Save the requester's routing info so the new gateway process can
         # notify them once it comes back online.
         try:
-            import json as _json
             notify_data = {
                 "platform": event.source.platform.value if event.source.platform else None,
                 "chat_id": event.source.chat_id,
@@ -5183,7 +5250,7 @@ class GatewayRunner:
             if event.source.thread_id:
                 notify_data["thread_id"] = event.source.thread_id
             (_hermes_home / ".restart_notify.json").write_text(
-                _json.dumps(notify_data)
+                json.dumps(notify_data)
             )
         except Exception as e:
             logger.debug("Failed to write restart notify file: %s", e)
@@ -5194,16 +5261,14 @@ class GatewayRunner:
         # marker persists so the new gateway can still detect a delayed
         # /restart redelivery from Telegram.  Overwritten on every /restart.
         try:
-            import json as _json
-            import time as _time
             dedup_data = {
                 "platform": event.source.platform.value if event.source.platform else None,
-                "requested_at": _time.time(),
+                "requested_at": time.time(),
             }
             if event.platform_update_id is not None:
                 dedup_data["update_id"] = event.platform_update_id
             (_hermes_home / ".restart_last_processed.json").write_text(
-                _json.dumps(dedup_data)
+                json.dumps(dedup_data)
             )
         except Exception as e:
             logger.debug("Failed to write restart dedup marker: %s", e)
@@ -5251,12 +5316,10 @@ class GatewayRunner:
             return False
 
         try:
-            import json as _json
-            import time as _time
             marker_path = _hermes_home / ".restart_last_processed.json"
             if not marker_path.exists():
                 return False
-            data = _json.loads(marker_path.read_text())
+            data = json.loads(marker_path.read_text())
         except Exception:
             return False
 
@@ -5270,7 +5333,7 @@ class GatewayRunner:
         # swallow a fresh /restart from the user.
         requested_at = data.get("requested_at")
         if isinstance(requested_at, (int, float)):
-            if _time.time() - requested_at > 300:
+            if time.time() - requested_at > 300:
                 return False
         return event.platform_update_id <= recorded_uid
 
@@ -6468,6 +6531,11 @@ class GatewayRunner:
                     session_id=task_id,
                     platform=platform_key,
                     user_id=source.user_id,
+                    user_name=source.user_name,
+                    chat_id=source.chat_id,
+                    chat_name=source.chat_name,
+                    chat_type=source.chat_type,
+                    thread_id=source.thread_id,
                     session_db=self._session_db,
                     fallback_model=self._fallback_model,
                 )
@@ -7154,6 +7222,7 @@ class GatewayRunner:
         new_entry = self.session_store.switch_session(session_key, target_id)
         if not new_entry:
             return "Failed to switch session."
+        self._clear_session_boundary_security_state(session_key)
 
         # Get the title for confirmation
         title = self._session_db.get_session_title(target_id) or name
@@ -7228,6 +7297,7 @@ class GatewayRunner:
                     tool_calls=msg.get("tool_calls"),
                     tool_call_id=msg.get("tool_call_id"),
                     reasoning=msg.get("reasoning"),
+                    reasoning_content=msg.get("reasoning_content"),
                 )
             except Exception:
                 pass  # Best-effort copy
@@ -7242,6 +7312,7 @@ class GatewayRunner:
         new_entry = self.session_store.switch_session(session_key, new_session_id)
         if not new_entry:
             return "Branch created but failed to switch to it."
+        self._clear_session_boundary_security_state(session_key)
 
         # Evict any cached agent for this session
         self._evict_cached_agent(session_key)
@@ -7276,6 +7347,38 @@ class GatewayRunner:
                     if cached:
                         agent = cached[0]
 
+        # Resolve provider/base_url/api_key for the account-usage fetch.
+        # Prefer the live agent; fall back to persisted billing data on the
+        # SessionDB row so `/usage` still returns account info between turns
+        # when no agent is resident.
+        provider = getattr(agent, "provider", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
+        base_url = getattr(agent, "base_url", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
+        api_key = getattr(agent, "api_key", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
+        if not provider and getattr(self, "_session_db", None) is not None:
+            try:
+                _entry_for_billing = self.session_store.get_or_create_session(source)
+                persisted = self._session_db.get_session(_entry_for_billing.session_id) or {}
+            except Exception:
+                persisted = {}
+            provider = provider or persisted.get("billing_provider")
+            base_url = base_url or persisted.get("billing_base_url")
+
+        # Fetch account usage off the event loop so slow provider APIs don't
+        # block the gateway. Failures are non-fatal -- account_lines stays [].
+        account_lines: list[str] = []
+        if provider:
+            try:
+                account_snapshot = await asyncio.to_thread(
+                    fetch_account_usage,
+                    provider,
+                    base_url=base_url,
+                    api_key=api_key,
+                )
+            except Exception:
+                account_snapshot = None
+            if account_snapshot:
+                account_lines = render_account_usage_lines(account_snapshot, markdown=True)
+
         if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
             lines = []
 
@@ -7333,6 +7436,10 @@ class GatewayRunner:
             if ctx.compression_count:
                 lines.append(f"Compressions: {ctx.compression_count}")
 
+            if account_lines:
+                lines.append("")
+                lines.extend(account_lines)
+
             return "\n".join(lines)
 
         # No agent at all -- check session history for a rough count
@@ -7342,23 +7449,26 @@ class GatewayRunner:
             from agent.model_metadata import estimate_messages_tokens_rough
             msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
             approx = estimate_messages_tokens_rough(msgs)
-            return (
-                f"📊 **Session Info**\n"
-                f"Messages: {len(msgs)}\n"
-                f"Estimated context: ~{approx:,} tokens\n"
-                f"_(Detailed usage available after the first agent response)_"
-            )
+            lines = [
+                "📊 **Session Info**",
+                f"Messages: {len(msgs)}",
+                f"Estimated context: ~{approx:,} tokens",
+                "_(Detailed usage available after the first agent response)_",
+            ]
+            if account_lines:
+                lines.append("")
+                lines.extend(account_lines)
+            return "\n".join(lines)
+        if account_lines:
+            return "\n".join(account_lines)
         return "No usage data available for this session."
 
     async def _handle_insights_command(self, event: MessageEvent) -> str:
         """Handle /insights command -- show usage insights and analytics."""
-        import asyncio as _asyncio
-
         args = event.get_command_args().strip()
 
         # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
-        import re as _re
-        args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
+        args = re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
 
         days = 30
         source = None
@@ -7387,7 +7497,7 @@ class GatewayRunner:
             from hermes_state import SessionDB
             from agent.insights import InsightsEngine
 
-            loop = _asyncio.get_running_loop()
+            loop = asyncio.get_running_loop()
 
             def _run_insights():
                 db = SessionDB()
@@ -7593,13 +7703,14 @@ class GatewayRunner:
         from hermes_cli.debug import (
             _capture_dump, collect_debug_report,
             upload_to_pastebin, _schedule_auto_delete,
-            _GATEWAY_PRIVACY_NOTICE,
+            _GATEWAY_PRIVACY_NOTICE, _best_effort_sweep_expired_pastes,
         )
 
         loop = asyncio.get_running_loop()
 
         # Run blocking I/O (dump capture, log reads, uploads) in a thread.
         def _collect_and_upload():
+            _best_effort_sweep_expired_pastes()
             dump_text = _capture_dump()
             report = collect_debug_report(log_lines=200, dump_text=dump_text)
 
@@ -7745,9 +7856,6 @@ class GatewayRunner:
         the messenger.  The user's next message is intercepted by
         ``_handle_message`` and written to ``.update_response``.
         """
-        import json
-        import re as _re
-
         pending_path = _hermes_home / ".update_pending.json"
         claimed_path = _hermes_home / ".update_pending.claimed.json"
         output_path = _hermes_home / ".update_output.txt"
@@ -7792,7 +7900,7 @@ class GatewayRunner:
             return
 
         def _strip_ansi(text: str) -> str:
-            return _re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)
+            return re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)
 
         bytes_sent = 0
         last_stream_time = loop.time()
@@ -7940,9 +8048,6 @@ class GatewayRunner:
         cannot resolve the adapter (e.g. after a gateway restart where the
         platform hasn't reconnected yet).
         """
-        import json
-        import re as _re
-
         pending_path = _hermes_home / ".update_pending.json"
         claimed_path = _hermes_home / ".update_pending.claimed.json"
         output_path = _hermes_home / ".update_output.txt"
@@ -7988,7 +8093,7 @@ class GatewayRunner:
 
             if adapter and chat_id:
                 # Strip ANSI escape codes for clean display
-                output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
+                output = re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
                 if output:
                     if len(output) > 3500:
                         output = "…" + output[-3500:]
@@ -8021,14 +8126,12 @@ class GatewayRunner:
 
     async def _send_restart_notification(self) -> None:
         """Notify the chat that initiated /restart that the gateway is back."""
-        import json as _json
-
         notify_path = _hermes_home / ".restart_notify.json"
         if not notify_path.exists():
             return
 
         try:
-            data = _json.loads(notify_path.read_text())
+            data = json.loads(notify_path.read_text())
             platform_str = data.get("platform")
             chat_id = data.get("chat_id")
             thread_id = data.get("thread_id")
@@ -8114,7 +8217,6 @@ class GatewayRunner:
             The enriched message string with vision descriptions prepended.
         """
         from tools.vision_tools import vision_analyze_tool
-        import json as _json
 
         analysis_prompt = (
             "Describe everything visible in this image in thorough detail. "
@@ -8130,7 +8232,7 @@ class GatewayRunner:
                     image_url=path,
                     user_prompt=analysis_prompt,
                 )
-                result = _json.loads(result_json)
+                result = json.loads(result_json)
                 if result.get("success"):
                     description = result.get("analysis", "")
                     enriched_parts.append(
@@ -8189,7 +8291,6 @@ class GatewayRunner:
             return disabled_note
 
         from tools.transcription_tools import transcribe_audio
-        import asyncio
 
         enriched_parts = []
         for path in audio_paths:
@@ -8325,7 +8426,6 @@ class GatewayRunner:
         if not adapter:
             return
         try:
-            from gateway.platforms.base import MessageEvent, MessageType
             synth_event = MessageEvent(
                 text=synth_text,
                 message_type=MessageType.TEXT,
@@ -8430,7 +8530,6 @@ class GatewayRunner:
                             break
                     if adapter and source.chat_id:
                         try:
-                            from gateway.platforms.base import MessageEvent, MessageType
                             synth_event = MessageEvent(
                                 text=synth_text,
                                 message_type=MessageType.TEXT,
@@ -8588,6 +8687,29 @@ class GatewayRunner:
         if hasattr(self, "_busy_ack_ts"):
             self._busy_ack_ts.pop(session_key, None)
 
+    def _clear_session_boundary_security_state(self, session_key: str) -> None:
+        """Clear approval state that must not survive a real conversation switch."""
+        if not session_key:
+            return
+
+        pending_approvals = getattr(self, "_pending_approvals", None)
+        if isinstance(pending_approvals, dict):
+            pending_approvals.pop(session_key, None)
+
+        try:
+            from tools.approval import clear_session as _clear_approval_session
+        except Exception:
+            return
+
+        try:
+            _clear_approval_session(session_key)
+        except Exception as e:
+            logger.debug(
+                "Failed to clear approval state for session boundary %s: %s",
+                session_key,
+                e,
+            )
+
     def _begin_session_run_generation(self, session_key: str) -> int:
         """Claim a fresh run generation token for ``session_key``.
 
@@ -8952,7 +9074,6 @@ class GatewayRunner:
         if _streaming_enabled:
             try:
                 from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
-                from gateway.config import Platform
                 _adapter = self.adapters.get(source.platform)
                 if _adapter:
                     _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
@@ -9236,8 +9357,7 @@ class GatewayRunner:
                 if args:
                     from agent.display import get_tool_preview_max_len
                     _pl = get_tool_preview_max_len()
-                    import json as _json
-                    args_str = _json.dumps(args, ensure_ascii=False, default=str)
+                    args_str = json.dumps(args, ensure_ascii=False, default=str)
                     # When tool_preview_length is 0 (default), don't truncate
                     # in verbose mode — the user explicitly asked for full
                     # detail.  Platform message-length limits handle the rest.
@@ -9303,8 +9423,7 @@ class GatewayRunner:
             # Skip tool progress for platforms that don't support message
             # editing (e.g. iMessage/BlueBubbles) — each progress update
             # would become a separate message bubble, which is noisy.
-            from gateway.platforms.base import BasePlatformAdapter as _BaseAdapter
-            if type(adapter).edit_message is _BaseAdapter.edit_message:
+            if type(adapter).edit_message is BasePlatformAdapter.edit_message:
                 while not progress_queue.empty():
                     try:
                         progress_queue.get_nowait()
@@ -9686,6 +9805,11 @@ class GatewayRunner:
                     session_id=session_id,
                     platform=platform_key,
                     user_id=source.user_id,
+                    user_name=source.user_name,
+                    chat_id=source.chat_id,
+                    chat_name=source.chat_name,
+                    chat_type=source.chat_type,
+                    thread_id=source.thread_id,
                     gateway_session_key=session_key,
                     session_db=self._session_db,
                     fallback_model=self._fallback_model,
@@ -10752,8 +10876,13 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
     # The PID file is scoped to HERMES_HOME, so future multi-profile
     # setups (each profile using a distinct HERMES_HOME) will naturally
     # allow concurrent instances without tripping this guard.
-    import time as _time
-    from gateway.status import get_running_pid, remove_pid_file, terminate_pid
+    from gateway.status import (
+        acquire_gateway_runtime_lock,
+        get_running_pid,
+        release_gateway_runtime_lock,
+        remove_pid_file,
+        terminate_pid,
+    )
     existing_pid = get_running_pid()
     if existing_pid is not None and existing_pid != os.getpid():
         if replace:
@@ -10792,7 +10921,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
             for _ in range(20):
                 try:
                     os.kill(existing_pid, 0)
-                    _time.sleep(0.5)
+                    time.sleep(0.5)
                 except (ProcessLookupError, PermissionError):
                     break  # Process is gone
             else:
@@ -10803,10 +10932,16 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
                 )
                 try:
                     terminate_pid(existing_pid, force=True)
-                    _time.sleep(0.5)
+                    time.sleep(0.5)
                 except (ProcessLookupError, PermissionError, OSError):
                     pass
             remove_pid_file()
+            # remove_pid_file() is a no-op when the PID doesn't match.
+            # Force-unlink to cover the old-process-crashed case.
+            try:
+                (get_hermes_home() / "gateway.pid").unlink(missing_ok=True)
+            except Exception:
+                pass
             # Clean up any takeover marker the old process didn't consume
             # (e.g. SIGKILL'd before its shutdown handler could read it).
             try:
@@ -10945,6 +11080,37 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
     else:
         logger.info("Skipping signal handlers (not running in main thread).")
     
+    # Claim the PID file BEFORE bringing up any platform adapters.
+    # This closes the --replace race window: two concurrent `gateway run
+    # --replace` invocations both pass the termination-wait above, but
+    # only the winner of the O_CREAT|O_EXCL race below will ever open
+    # Telegram polling, Discord gateway sockets, etc. The loser exits
+    # cleanly before touching any external service.
+    import atexit
+    from gateway.status import write_pid_file, remove_pid_file, get_running_pid
+    _current_pid = get_running_pid()
+    if _current_pid is not None and _current_pid != os.getpid():
+        logger.error(
+            "Another gateway instance (PID %d) started during our startup. "
+            "Exiting to avoid double-running.", _current_pid
+        )
+        return False
+    if not acquire_gateway_runtime_lock():
+        logger.error(
+            "Gateway runtime lock is already held by another instance. Exiting."
+        )
+        return False
+    try:
+        write_pid_file()
+    except FileExistsError:
+        release_gateway_runtime_lock()
+        logger.error(
+            "PID file race lost to another gateway instance. Exiting."
+        )
+        return False
+    atexit.register(remove_pid_file)
+    atexit.register(release_gateway_runtime_lock)
+
     # Start the gateway
     success = await runner.start()
     if not success:
@@ -10954,12 +11120,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
             logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
         return True
     
-    # Write PID file so CLI can detect gateway is running
-    import atexit
-    from gateway.status import write_pid_file, remove_pid_file
-    write_pid_file()
-    atexit.register(remove_pid_file)
-    
     # Start background cron ticker so scheduled jobs fire automatically.
     # Pass the event loop so cron delivery can use live adapters (E2EE support).
     cron_stop = threading.Event()
diff --git a/gateway/session.py b/gateway/session.py
index 81278e852..db90d3121 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -80,7 +80,7 @@ class SessionSource:
     user_name: Optional[str] = None
     thread_id: Optional[str] = None  # For forum topics, Discord threads, etc.
     chat_topic: Optional[str] = None  # Channel topic/description (Discord, Slack)
-    user_id_alt: Optional[str] = None  # Signal UUID (alternative to phone number)
+    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
     chat_id_alt: Optional[str] = None  # Signal group internal ID
     is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
     
@@ -152,6 +152,7 @@ class SessionContext:
     source: SessionSource
     connected_platforms: List[Platform]
     home_channels: Dict[Platform, HomeChannel]
+    shared_multi_user_session: bool = False
     
     # Session metadata
     session_key: str = ""
@@ -166,6 +167,7 @@ class SessionContext:
             "home_channels": {
                 p.value: hc.to_dict() for p, hc in self.home_channels.items()
             },
+            "shared_multi_user_session": self.shared_multi_user_session,
             "session_key": self.session_key,
             "session_id": self.session_id,
             "created_at": self.created_at.isoformat() if self.created_at else None,
@@ -240,18 +242,16 @@ def build_session_context_prompt(
         lines.append(f"**Channel Topic:** {context.source.chat_topic}")
 
     # User identity.
-    # In shared thread sessions (non-DM with thread_id), multiple users
-    # contribute to the same conversation.  Don't pin a single user name
-    # in the system prompt — it changes per-turn and would bust the prompt
-    # cache.  Instead, note that this is a multi-user thread; individual
-    # sender names are prefixed on each user message by the gateway.
-    _is_shared_thread = (
-        context.source.chat_type != "dm"
-        and context.source.thread_id
-    )
-    if _is_shared_thread:
+    # In shared multi-user sessions (shared threads OR shared non-thread groups
+    # when group_sessions_per_user=False), multiple users contribute to the same
+    # conversation.  Don't pin a single user name in the system prompt — it
+    # changes per-turn and would bust the prompt cache.  Instead, note that
+    # this is a multi-user session; individual sender names are prefixed on
+    # each user message by the gateway.
+    if context.shared_multi_user_session:
+        session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
         lines.append(
-            "**Session type:** Multi-user thread — messages are prefixed "
+            f"**Session type:** {session_label} — messages are prefixed "
             "with [sender name]. Multiple users may participate."
         )
     elif context.source.user_name:
@@ -467,6 +467,27 @@ class SessionEntry:
         )
 
 
+def is_shared_multi_user_session(
+    source: SessionSource,
+    *,
+    group_sessions_per_user: bool = True,
+    thread_sessions_per_user: bool = False,
+) -> bool:
+    """Return True when a non-DM session is shared across participants.
+
+    Mirrors the isolation rules in :func:`build_session_key`:
+      - DMs are never shared.
+      - Threads are shared unless ``thread_sessions_per_user`` is True.
+      - Non-thread group/channel sessions are shared unless
+        ``group_sessions_per_user`` is True (default: True = isolated).
+    """
+    if source.chat_type == "dm":
+        return False
+    if source.thread_id:
+        return not thread_sessions_per_user
+    return not group_sessions_per_user
+
+
 def build_session_key(
     source: SessionSource,
     group_sessions_per_user: bool = True,
@@ -1126,6 +1147,10 @@ class SessionStore:
                     tool_name=message.get("tool_name"),
                     tool_calls=message.get("tool_calls"),
                     tool_call_id=message.get("tool_call_id"),
+                    reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
+                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
+                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
+                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
                 )
             except Exception as e:
                 logger.debug("Session DB operation failed: %s", e)
@@ -1155,6 +1180,7 @@ class SessionStore:
                         tool_calls=msg.get("tool_calls"),
                         tool_call_id=msg.get("tool_call_id"),
                         reasoning=msg.get("reasoning") if role == "assistant" else None,
+                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                         reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                         codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                     )
@@ -1238,6 +1264,11 @@ def build_session_context(
         source=source,
         connected_platforms=connected,
         home_channels=home_channels,
+        shared_multi_user_session=is_shared_multi_user_session(
+            source,
+            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
+            thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
+        ),
     )
     
     if session_entry:
diff --git a/gateway/status.py b/gateway/status.py
index e1598e179..4cdf8f810 100644
--- a/gateway/status.py
+++ b/gateway/status.py
@@ -22,11 +22,18 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Any, Optional
 
+if sys.platform == "win32":
+    import msvcrt
+else:
+    import fcntl
+
 _GATEWAY_KIND = "hermes-gateway"
 _RUNTIME_STATUS_FILE = "gateway_state.json"
 _LOCKS_DIRNAME = "gateway-locks"
 _IS_WINDOWS = sys.platform == "win32"
 _UNSET = object()
+_GATEWAY_LOCK_FILENAME = "gateway.lock"
+_gateway_lock_handle = None
 
 
 def _get_pid_path() -> Path:
@@ -35,6 +42,14 @@ def _get_pid_path() -> Path:
     return home / "gateway.pid"
 
 
+def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
+    """Return the path to the runtime gateway lock file."""
+    if pid_path is not None:
+        return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
+    home = get_hermes_home()
+    return home / _GATEWAY_LOCK_FILENAME
+
+
 def _get_runtime_status_path() -> Path:
     """Return the persisted runtime health/status file path."""
     return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
@@ -121,6 +136,7 @@ def _looks_like_gateway_process(pid: int) -> bool:
         "hermes_cli.main gateway",
         "hermes_cli/main.py gateway",
         "hermes gateway",
+        "hermes-gateway",
         "gateway/run.py",
     )
     return any(pattern in cmdline for pattern in patterns)
@@ -212,21 +228,160 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
     return None
 
 
+def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
+    return _read_pid_record(lock_path or _get_gateway_lock_path())
+
+
+def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
+    if not record:
+        return None
+    try:
+        return int(record["pid"])
+    except (KeyError, TypeError, ValueError):
+        return None
+
+
 def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
+    """Delete a stale gateway PID file (and its sibling lock metadata).
+
+    Called from ``get_running_pid()`` after the runtime lock has already been
+    confirmed inactive, so the on-disk metadata is known to belong to a dead
+    process.  Unlike ``remove_pid_file()`` (which defensively refuses to delete
+    a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
+    ``--replace`` handoffs), this path force-unlinks both files so the next
+    startup sees a clean slate.
+    """
     if not cleanup_stale:
         return
     try:
-        if pid_path == _get_pid_path():
-            remove_pid_file()
-        else:
-            pid_path.unlink(missing_ok=True)
+        pid_path.unlink(missing_ok=True)
+    except Exception:
+        pass
+    try:
+        _get_gateway_lock_path(pid_path).unlink(missing_ok=True)
     except Exception:
         pass
 
 
+def _write_gateway_lock_record(handle) -> None:
+    handle.seek(0)
+    handle.truncate()
+    json.dump(_build_pid_record(), handle)
+    handle.flush()
+    try:
+        os.fsync(handle.fileno())
+    except OSError:
+        pass
+
+
+def _try_acquire_file_lock(handle) -> bool:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0, os.SEEK_END)
+            if handle.tell() == 0:
+                handle.write("\n")
+                handle.flush()
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+        return True
+    except (BlockingIOError, OSError):
+        return False
+
+
+def _release_file_lock(handle) -> None:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
+    except OSError:
+        pass
+
+
+def acquire_gateway_runtime_lock() -> bool:
+    """Claim the cross-process runtime lock for the gateway.
+
+    Unlike the PID file, the lock is owned by the live process itself. If the
+    process dies abruptly, the OS releases the lock automatically.
+    """
+    global _gateway_lock_handle
+    if _gateway_lock_handle is not None:
+        return True
+
+    path = _get_gateway_lock_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    handle = open(path, "a+", encoding="utf-8")
+    if not _try_acquire_file_lock(handle):
+        handle.close()
+        return False
+    _write_gateway_lock_record(handle)
+    _gateway_lock_handle = handle
+    return True
+
+
+def release_gateway_runtime_lock() -> None:
+    """Release the gateway runtime lock when owned by this process."""
+    global _gateway_lock_handle
+    handle = _gateway_lock_handle
+    if handle is None:
+        return
+    _gateway_lock_handle = None
+    _release_file_lock(handle)
+    try:
+        handle.close()
+    except OSError:
+        pass
+
+
+def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
+    """Return True when some process currently owns the gateway runtime lock."""
+    global _gateway_lock_handle
+    resolved_lock_path = lock_path or _get_gateway_lock_path()
+    if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
+        return True
+
+    if not resolved_lock_path.exists():
+        return False
+
+    handle = open(resolved_lock_path, "a+", encoding="utf-8")
+    try:
+        if _try_acquire_file_lock(handle):
+            _release_file_lock(handle)
+            return False
+        return True
+    finally:
+        try:
+            handle.close()
+        except OSError:
+            pass
+
+
 def write_pid_file() -> None:
-    """Write the current process PID and metadata to the gateway PID file."""
-    _write_json_file(_get_pid_path(), _build_pid_record())
+    """Write the current process PID and metadata to the gateway PID file.
+
+    Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace
+    invocations race: exactly one process wins and the rest get
+    FileExistsError.
+    """
+    path = _get_pid_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    record = json.dumps(_build_pid_record())
+    try:
+        fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
+    except FileExistsError:
+        raise  # Let caller decide: another gateway is racing us
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            f.write(record)
+    except Exception:
+        try:
+            path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        raise
 
 
 def write_runtime_status(
@@ -563,35 +718,42 @@ def get_running_pid(
     Cleans up stale PID files automatically.
     """
     resolved_pid_path = pid_path or _get_pid_path()
-    record = _read_pid_record(resolved_pid_path)
-    if not record:
+    resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
+    lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
+    if not lock_active:
         _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
         return None
 
-    try:
-        pid = int(record["pid"])
-    except (KeyError, TypeError, ValueError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    primary_record = _read_pid_record(resolved_pid_path)
+    fallback_record = _read_gateway_lock_record(resolved_lock_path)
 
-    try:
-        os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
-    except (ProcessLookupError, PermissionError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    for record in (primary_record, fallback_record):
+        pid = _pid_from_record(record)
+        if pid is None:
+            continue
 
-    recorded_start = record.get("start_time")
-    current_start = _get_process_start_time(pid)
-    if recorded_start is not None and current_start is not None and current_start != recorded_start:
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+        try:
+            os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
+        except ProcessLookupError:
+            continue
+        except PermissionError:
+            # The process exists but belongs to another user/service scope.
+            # With the runtime lock still held, prefer keeping it visible
+            # rather than deleting the PID file as "stale".
+            if _record_looks_like_gateway(record):
+                return pid
+            continue
 
-    if not _looks_like_gateway_process(pid):
-        if not _record_looks_like_gateway(record):
-            _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-            return None
+        recorded_start = record.get("start_time")
+        current_start = _get_process_start_time(pid)
+        if recorded_start is not None and current_start is not None and current_start != recorded_start:
+            continue
 
-    return pid
+        if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
+            return pid
+
+    _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+    return None
 
 
 def is_gateway_running(
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index c82bad3f0..98ac4edb3 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -72,6 +72,8 @@ DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
 DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
+STEPFUN_STEP_PLAN_INTL_BASE_URL = "https://api.stepfun.ai/step_plan/v1"
+STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1"
 CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
 CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@@ -168,8 +170,11 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
         id="kimi-coding",
         name="Kimi / Moonshot",
         auth_type="api_key",
+        # Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat).
+        # sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding
+        # by _resolve_kimi_base_url() below.
         inference_base_url="https://api.moonshot.ai/v1",
-        api_key_env_vars=("KIMI_API_KEY",),
+        api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
         base_url_env_var="KIMI_BASE_URL",
     ),
     "kimi-coding-cn": ProviderConfig(
@@ -179,6 +184,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
         inference_base_url="https://api.moonshot.cn/v1",
         api_key_env_vars=("KIMI_CN_API_KEY",),
     ),
+    "stepfun": ProviderConfig(
+        id="stepfun",
+        name="StepFun Step Plan",
+        auth_type="api_key",
+        inference_base_url=STEPFUN_STEP_PLAN_INTL_BASE_URL,
+        api_key_env_vars=("STEPFUN_API_KEY",),
+        base_url_env_var="STEPFUN_BASE_URL",
+    ),
     "arcee": ProviderConfig(
         id="arcee",
         name="Arcee AI",
@@ -201,6 +214,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
         auth_type="api_key",
         inference_base_url="https://api.anthropic.com",
         api_key_env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
+        base_url_env_var="ANTHROPIC_BASE_URL",
     ),
     "alibaba": ProviderConfig(
         id="alibaba",
@@ -340,10 +354,16 @@ def get_anthropic_key() -> str:
 # =============================================================================
 
 # Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
-# on api.kimi.com/coding/v1.  Legacy keys from platform.moonshot.ai work on
-# api.moonshot.ai/v1 (the default).  Auto-detect when user hasn't set
+# on api.kimi.com/coding.  Legacy keys from platform.moonshot.ai work on
+# api.moonshot.ai/v1 (the old default).  Auto-detect when user hasn't set
 # KIMI_BASE_URL explicitly.
-KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1"
+#
+# Note: the base URL intentionally has NO /v1 suffix.  The /coding endpoint
+# speaks the Anthropic Messages protocol, and the anthropic SDK appends
+# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages"
+# (the correct target). Using "/coding/v1" here would produce
+# "/coding/v1/v1/messages" (a 404).
+KIMI_CODE_BASE_URL = "https://api.kimi.com/coding"
 
 
 def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str:
@@ -983,6 +1003,7 @@ def resolve_provider(
         "x-ai": "xai", "x.ai": "xai", "grok": "xai",
         "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
         "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
+        "step": "stepfun", "stepfun-coding-plan": "stepfun",
         "arcee-ai": "arcee", "arceeai": "arcee",
         "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
         "claude": "anthropic", "claude-code": "anthropic",
@@ -3375,7 +3396,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                 )
 
             from hermes_cli.models import (
-                _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
+                _PROVIDER_MODELS, get_pricing_for_provider,
                 check_nous_free_tier, partition_nous_models_by_tier,
             )
             model_ids = _PROVIDER_MODELS.get("nous", [])
@@ -3384,7 +3405,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
             unavailable_models: list = []
             if model_ids:
                 pricing = get_pricing_for_provider("nous")
-                model_ids = filter_nous_free_models(model_ids, pricing)
                 free_tier = check_nous_free_tier()
                 if free_tier:
                     model_ids, unavailable_models = partition_nous_models_by_tier(
diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py
index 30e518294..9c3320010 100644
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@@ -152,6 +152,23 @@ def auth_add_command(args) -> None:
 
     pool = load_pool(provider)
 
+    # Clear ALL suppressions for this provider — re-adding a credential is
+    # a strong signal the user wants auth re-enabled.  This covers env:*
+    # (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
+    # device_code (codex), etc.  One consistent re-engagement pattern.
+    # Matches the Codex device_code re-link pattern that predates this.
+    if not provider.startswith(CUSTOM_POOL_PREFIX):
+        try:
+            from hermes_cli.auth import (
+                _load_auth_store,
+                unsuppress_credential_source,
+            )
+            suppressed = _load_auth_store().get("suppressed_sources", {})
+            for src in list(suppressed.get(provider, []) or []):
+                unsuppress_credential_source(provider, src)
+        except Exception:
+            pass
+
     if requested_type == AUTH_TYPE_API_KEY:
         token = (getattr(args, "api_key", None) or "").strip()
         if not token:
@@ -338,71 +355,28 @@ def auth_remove_command(args) -> None:
         raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
     print(f"Removed {provider} credential #{index} ({removed.label})")
 
-    # If this was an env-seeded credential, also clear the env var from .env
-    # so it doesn't get re-seeded on the next load_pool() call.
-    if removed.source.startswith("env:"):
-        env_var = removed.source[len("env:"):]
-        if env_var:
-            from hermes_cli.config import remove_env_value
-            cleared = remove_env_value(env_var)
-            if cleared:
-                print(f"Cleared {env_var} from .env")
+    # Unified removal dispatch.  Every credential source Hermes reads from
+    # (env vars, external OAuth files, auth.json blocks, custom config)
+    # has a RemovalStep registered in agent.credential_sources.  The step
+    # handles its source-specific cleanup and we centralise suppression +
+    # user-facing output here so every source behaves identically from
+    # the user's perspective.
+    from agent.credential_sources import find_removal_step
+    from hermes_cli.auth import suppress_credential_source
 
-    # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
-    # clear the underlying auth store / credential file so it doesn't get
-    # re-seeded on the next load_pool() call.
-    elif provider == "openai-codex" and (
-        removed.source == "device_code" or removed.source.endswith(":device_code")
-    ):
-        # Codex tokens live in TWO places: the Hermes auth store and
-        # ~/.codex/auth.json (the Codex CLI shared file).  On every refresh,
-        # refresh_codex_oauth_pure() writes to both.  So clearing only the
-        # Hermes auth store is not enough — _seed_from_singletons() will
-        # auto-import from ~/.codex/auth.json on the next load_pool() and
-        # the removal is instantly undone.  Mark the source as suppressed
-        # so auto-import is skipped; leave ~/.codex/auth.json untouched so
-        # the Codex CLI itself keeps working.
-        from hermes_cli.auth import (
-            _load_auth_store, _save_auth_store, _auth_store_lock,
-            suppress_credential_source,
-        )
-        with _auth_store_lock():
-            auth_store = _load_auth_store()
-            providers_dict = auth_store.get("providers")
-            if isinstance(providers_dict, dict) and provider in providers_dict:
-                del providers_dict[provider]
-                _save_auth_store(auth_store)
-                print(f"Cleared {provider} OAuth tokens from auth store")
-        suppress_credential_source(provider, "device_code")
-        print("Suppressed openai-codex device_code source — it will not be re-seeded.")
-        print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
-        print("Run `hermes auth add openai-codex` to re-enable if needed.")
+    step = find_removal_step(provider, removed.source)
+    if step is None:
+        # Unregistered source — e.g. "manual", which has nothing external
+        # to clean up.  The pool entry is already gone; we're done.
+        return
 
-    elif removed.source == "device_code" and provider == "nous":
-        from hermes_cli.auth import (
-            _load_auth_store, _save_auth_store, _auth_store_lock,
-        )
-        with _auth_store_lock():
-            auth_store = _load_auth_store()
-            providers_dict = auth_store.get("providers")
-            if isinstance(providers_dict, dict) and provider in providers_dict:
-                del providers_dict[provider]
-                _save_auth_store(auth_store)
-                print(f"Cleared {provider} OAuth tokens from auth store")
-
-    elif removed.source == "hermes_pkce" and provider == "anthropic":
-        from hermes_constants import get_hermes_home
-        oauth_file = get_hermes_home() / ".anthropic_oauth.json"
-        if oauth_file.exists():
-            oauth_file.unlink()
-            print("Cleared Hermes Anthropic OAuth credentials")
-
-    elif removed.source == "claude_code" and provider == "anthropic":
-        from hermes_cli.auth import suppress_credential_source
-        suppress_credential_source(provider, "claude_code")
-        print("Suppressed claude_code credential — it will not be re-seeded.")
-        print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
-        print("Run `hermes auth add anthropic` to re-enable if needed.")
+    result = step.remove_fn(provider, removed)
+    for line in result.cleaned:
+        print(line)
+    if result.suppress:
+        suppress_credential_source(provider, removed.source)
+    for line in result.hints:
+        print(line)
 
 
 def auth_reset_command(args) -> None:
diff --git a/hermes_cli/claw.py b/hermes_cli/claw.py
index e62efe47e..aa0c28828 100644
--- a/hermes_cli/claw.py
+++ b/hermes_cli/claw.py
@@ -249,7 +249,7 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
             state_path = child / state_name
             if state_path.exists():
                 kind = "directory" if state_path.is_dir() else "file"
-                rel = state_path.relative_to(source_dir)
+                rel = state_path.relative_to(source_dir).as_posix()
                 findings.append((state_path, f"Workspace {kind}: {rel}"))
 
     return findings
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 797acab5e..87d73af58 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -260,6 +260,26 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
 )
 
 
+def is_gateway_known_command(name: str | None) -> bool:
+    """Return True if ``name`` resolves to a gateway-dispatchable slash command.
+
+    This covers both built-in commands (``GATEWAY_KNOWN_COMMANDS`` derived
+    from ``COMMAND_REGISTRY``) and plugin-registered commands, which are
+    looked up lazily so importing this module never forces plugin
+    discovery. Gateway code uses this to decide whether to emit
+    ``command:<name>`` hooks — plugin commands get the same lifecycle
+    events as built-ins.
+    """
+    if not name:
+        return False
+    if name in GATEWAY_KNOWN_COMMANDS:
+        return True
+    for plugin_name, _description, _args_hint in _iter_plugin_command_entries():
+        if plugin_name == name:
+            return True
+    return False
+
+
 # Commands with explicit Level-2 running-agent handlers in gateway/run.py.
 # Listed here for introspection / tests; semantically a subset of
 # "all resolvable commands" — which is the real bypass set (see
@@ -371,12 +391,47 @@ def gateway_help_lines() -> list[str]:
     return lines
 
 
+def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
+    """Yield (name, description, args_hint) tuples for all plugin slash commands.
+
+    Plugin commands are registered via
+    :func:`hermes_cli.plugins.PluginContext.register_command`. They behave
+    like ``CommandDef`` entries for gateway surfacing: they appear in the
+    Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
+    (via :func:`gateway.platforms.discord._register_slash_commands`) in
+    Discord's native slash command picker.
+
+    Lookup is lazy so importing this module never forces plugin discovery
+    (which can trigger filesystem scans and environment-dependent
+    behavior).
+    """
+    try:
+        from hermes_cli.plugins import get_plugin_commands
+    except Exception:
+        return []
+    try:
+        commands = get_plugin_commands() or {}
+    except Exception:
+        return []
+    entries: list[tuple[str, str, str]] = []
+    for name, meta in commands.items():
+        if not isinstance(name, str) or not isinstance(meta, dict):
+            continue
+        description = str(meta.get("description") or f"Run /{name}")
+        args_hint = str(meta.get("args_hint") or "").strip()
+        entries.append((name, description, args_hint))
+    return entries
+
+
 def telegram_bot_commands() -> list[tuple[str, str]]:
     """Return (command_name, description) pairs for Telegram setMyCommands.
 
     Telegram command names cannot contain hyphens, so they are replaced with
     underscores.  Aliases are skipped -- Telegram shows one menu entry per
     canonical command.
+
+    Plugin-registered slash commands are included so plugins get native
+    autocomplete in Telegram without touching core code.
     """
     overrides = _resolve_config_gates()
     result: list[tuple[str, str]] = []
@@ -386,6 +441,10 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
         tg_name = _sanitize_telegram_name(cmd.name)
         if tg_name:
             result.append((tg_name, cmd.description))
+    for name, description, _args_hint in _iter_plugin_command_entries():
+        tg_name = _sanitize_telegram_name(name)
+        if tg_name:
+            result.append((tg_name, description))
     return result
 
 
@@ -750,6 +809,9 @@ def slack_subcommand_map() -> dict[str, str]:
 
     Maps both canonical names and aliases so /hermes bg do stuff works
     the same as /hermes background do stuff.
+
+    Plugin-registered slash commands are included so ``/hermes <plugin-cmd>``
+    routes through the plugin handler.
     """
     overrides = _resolve_config_gates()
     mapping: dict[str, str] = {}
@@ -759,6 +821,9 @@ def slack_subcommand_map() -> dict[str, str]:
         mapping[cmd.name] = f"/{cmd.name}"
         for alias in cmd.aliases:
             mapping[alias] = f"/{alias}"
+    for name, _description, _args_hint in _iter_plugin_command_entries():
+        if name not in mapping:
+            mapping[name] = f"/{name}"
     return mapping
 
 
@@ -924,12 +989,22 @@ class SlashCommandCompleter(Completer):
                     display_meta=meta,
                 )
 
-        # If the user typed @file: or @folder:, delegate to path completions
+        # If the user typed @file: / @folder: (or just @file / @folder with
+        # no colon yet), delegate to path completions.  Accepting the bare
+        # form lets the picker surface directories as soon as the user has
+        # typed `@folder`, without requiring them to first accept the static
+        # `@folder:` hint and re-trigger completion.
         for prefix in ("@file:", "@folder:"):
-            if word.startswith(prefix):
-                path_part = word[len(prefix):] or "."
+            bare = prefix[:-1]
+
+            if word == bare or word.startswith(prefix):
+                want_dir = prefix == "@folder:"
+                path_part = '' if word == bare else word[len(prefix):]
                 expanded = os.path.expanduser(path_part)
-                if expanded.endswith("/"):
+
+                if not expanded or expanded == ".":
+                    search_dir, match_prefix = ".", ""
+                elif expanded.endswith("/"):
                     search_dir, match_prefix = expanded, ""
                 else:
                     search_dir = os.path.dirname(expanded) or "."
@@ -945,15 +1020,21 @@ class SlashCommandCompleter(Completer):
                 for entry in sorted(entries):
                     if match_prefix and not entry.lower().startswith(prefix_lower):
                         continue
-                    if count >= limit:
-                        break
                     full_path = os.path.join(search_dir, entry)
                     is_dir = os.path.isdir(full_path)
+                    # `@folder:` must only surface directories; `@file:` only
+                    # regular files.  Without this filter `@folder:` listed
+                    # every .env / .gitignore in the cwd, defeating the
+                    # explicit prefix and confusing users expecting a
+                    # directory picker.
+                    if want_dir != is_dir:
+                        continue
+                    if count >= limit:
+                        break
                     display_path = os.path.relpath(full_path)
                     suffix = "/" if is_dir else ""
-                    kind = "folder" if is_dir else "file"
                     meta = "dir" if is_dir else _file_size_label(full_path)
-                    completion = f"@{kind}:{display_path}{suffix}"
+                    completion = f"{prefix}{display_path}{suffix}"
                     yield Completion(
                         completion,
                         start_position=-len(word),
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 5f10f0de2..30427bd25 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -387,6 +387,26 @@ DEFAULT_CONFIG = {
         # (terminal and execute_code).  Skill-declared required_environment_variables
         # are passed through automatically; this list is for non-skill use cases.
         "env_passthrough": [],
+        # Extra files to source in the login shell when building the
+        # per-session environment snapshot.  Use this when tools like nvm,
+        # pyenv, asdf, or custom PATH entries are registered by files that
+        # a bash login shell would skip — most commonly ``~/.bashrc``
+        # (bash doesn't source bashrc in non-interactive login mode) or
+        # zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
+        # Paths support ``~`` / ``${VAR}``. Missing files are silently
+        # skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
+        # snapshot shell is bash (this is the ``auto_source_bashrc``
+        # behaviour — disable with that key if you want strict login-only
+        # semantics).
+        "shell_init_files": [],
+        # When true (default), Hermes sources ``~/.bashrc`` in the login
+        # shell used to build the environment snapshot.  This captures
+        # PATH additions, shell functions, and aliases defined in the
+        # user's bashrc — which a plain ``bash -l -c`` would otherwise
+        # miss because bash skips bashrc in non-interactive login mode.
+        # Turn this off if you have a bashrc that misbehaves when sourced
+        # non-interactively (e.g. one that hard-exits on TTY checks).
+        "auto_source_bashrc": True,
         "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
         "docker_forward_env": [],
         # Explicit environment variables to set inside Docker containers.
@@ -593,6 +613,10 @@ DEFAULT_CONFIG = {
     },
     
     # Text-to-speech configuration
+    # Each provider supports an optional `max_text_length:` override for the
+    # per-request input-character cap. Omit it to use the provider's documented
+    # limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
+    # Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
     "tts": {
         "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
         "edge": {
@@ -645,6 +669,7 @@ DEFAULT_CONFIG = {
         "record_key": "ctrl+b",
         "max_recording_seconds": 120,
         "auto_tts": False,
+        "beep_enabled": True,         # Play record start/stop beeps in CLI voice mode
         "silence_threshold": 200,     # RMS below this = silence (0-32767)
         "silence_duration": 3.0,      # Seconds of silence before auto-stop
     },
@@ -687,10 +712,22 @@ DEFAULT_CONFIG = {
         "provider": "",    # e.g. "openrouter" (empty = inherit parent provider + credentials)
         "base_url": "",    # direct OpenAI-compatible endpoint for subagents
         "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
+        # When delegate_task narrows child toolsets explicitly, preserve any
+        # MCP toolsets the parent already has enabled. On by default so
+        # narrowing (e.g. toolsets=["web","browser"]) expresses "I want these
+        # extras" without silently stripping MCP tools the parent already has.
+        # Set to false for strict intersection.
+        "inherit_mcp_toolsets": True,
         "max_iterations": 50,  # per-subagent iteration cap (each subagent gets its own budget,
                                # independent of the parent's max_iterations)
         "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
                                  # "low", "minimal", "none" (empty = inherit parent's level)
+        "max_concurrent_children": 3,  # max parallel children per batch; floor of 1 enforced, no ceiling
+        # Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
+        # and _get_orchestrator_enabled).  Values are clamped to [1, 3] with a
+        # warning log if out of range.
+        "max_spawn_depth": 1,        # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
+        "orchestrator_enabled": True,  # kill switch for role="orchestrator"
     },
 
     # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -703,6 +740,20 @@ DEFAULT_CONFIG = {
     # always goes to ~/.hermes/skills/.
     "skills": {
         "external_dirs": [],   # e.g. ["~/.agents/skills", "/shared/team-skills"]
+        # Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md
+        # content with the absolute skill directory and the active session id
+        # before the agent sees it.  Lets skill authors reference bundled
+        # scripts without the agent having to join paths.
+        "template_vars": True,
+        # Pre-execute inline shell snippets written as !`cmd` in SKILL.md
+        # body.  Their stdout is inlined into the skill message before the
+        # agent reads it, so skills can inject dynamic context (dates, git
+        # state, detected tool versions, …).  Off by default because any
+        # content from the skill author runs on the host without approval;
+        # only enable for skill sources you trust.
+        "inline_shell": False,
+        # Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
+        "inline_shell_timeout": 10,
     },
 
     # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@@ -795,6 +846,7 @@ DEFAULT_CONFIG = {
 
     # Pre-exec security scanning via tirith
     "security": {
+        "allow_private_urls": False,  # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
         "redact_secrets": True,
         "tirith_enabled": True,
         "tirith_path": "tirith",
@@ -848,8 +900,36 @@ DEFAULT_CONFIG = {
         "force_ipv4": False,
     },
 
+    # Session storage — controls automatic cleanup of ~/.hermes/state.db.
+    # state.db accumulates every session, message, tool call, and FTS5 index
+    # entry forever.  Without auto-pruning, a heavy user (gateway + cron)
+    # reports 384MB+ databases with 68K+ messages, which slows down FTS5
+    # inserts, /resume listing, and insights queries.
+    "sessions": {
+        # When true, prune ended sessions older than retention_days once
+        # per (roughly) min_interval_hours at CLI/gateway/cron startup.
+        # Only touches ended sessions — active sessions are always preserved.
+        # Default false: session history is valuable for search recall, and
+        # silently deleting it could surprise users.  Opt in explicitly.
+        "auto_prune": False,
+        # How many days of ended-session history to keep.  Matches the
+        # default of ``hermes sessions prune``.
+        "retention_days": 90,
+        # VACUUM after a prune that actually deleted rows.  SQLite does not
+        # reclaim disk space on DELETE — freed pages are just reused on
+        # subsequent INSERTs — so without VACUUM the file stays bloated
+        # even after pruning.  VACUUM blocks writes for a few seconds per
+        # 100MB, so it only runs at startup, and only when prune deleted
+        # ≥1 session.
+        "vacuum_after_prune": True,
+        # Minimum hours between auto-maintenance runs (avoids repeating
+        # the sweep on every CLI invocation).  Tracked via state_meta in
+        # state.db itself, so it's shared across all processes.
+        "min_interval_hours": 24,
+    },
+
     # Config schema version - bump this when adding new required fields
-    "_config_version": 21,
+    "_config_version": 22,
 }
 
 # =============================================================================
@@ -1005,6 +1085,22 @@ OPTIONAL_ENV_VARS = {
         "category": "provider",
         "advanced": True,
     },
+    "STEPFUN_API_KEY": {
+        "description": "StepFun Step Plan API key",
+        "prompt": "StepFun Step Plan API key",
+        "url": "https://platform.stepfun.com/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "STEPFUN_BASE_URL": {
+        "description": "StepFun Step Plan base URL override",
+        "prompt": "StepFun Step Plan base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
     "ARCEEAI_API_KEY": {
         "description": "Arcee AI API key",
         "prompt": "Arcee AI API key",
@@ -2057,6 +2153,7 @@ _KNOWN_ROOT_KEYS = {
     "fallback_providers", "credential_pool_strategies", "toolsets",
     "agent", "terminal", "display", "compression", "delegation",
     "auxiliary", "custom_providers", "context", "memory", "gateway",
+    "sessions",
 }
 
 # Valid fields inside a custom_providers list entry
@@ -2214,7 +2311,6 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
     if not issues:
         return
 
-    import sys
     lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"]
     for ci in issues:
         marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m"
@@ -2229,7 +2325,6 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non
     These env vars are deprecated — the canonical setting is terminal.cwd
     in config.yaml.  Prints a migration hint to stderr.
     """
-    import os, sys
     messaging_cwd = os.environ.get("MESSAGING_CWD")
     terminal_cwd_env = os.environ.get("TERMINAL_CWD")
 
@@ -2572,8 +2667,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
             # Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins.
             grandfathered: List[str] = []
             try:
-                from hermes_constants import get_hermes_home as _ghome
-                user_plugins_dir = _ghome() / "plugins"
+                user_plugins_dir = get_hermes_home() / "plugins"
                 if user_plugins_dir.is_dir():
                     for child in sorted(user_plugins_dir.iterdir()):
                         if not child.is_dir():
@@ -3075,7 +3169,7 @@ def save_config(config: Dict[str, Any]):
     if not sec or sec.get("redact_secrets") is None:
         parts.append(_SECURITY_COMMENT)
     fb = normalized.get("fallback_model", {})
-    if not fb or not (fb.get("provider") and fb.get("model")):
+    if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
         parts.append(_FALLBACK_COMMENT)
 
     atomic_yaml_write(
@@ -3238,7 +3332,6 @@ def _check_non_ascii_credential(key: str, value: str) -> str:
             bad_chars.append(f"  position {i}: {ch!r} (U+{ord(ch):04X})")
     sanitized = value.encode("ascii", errors="ignore").decode("ascii")
 
-    import sys
     print(
         f"\n  Warning: {key} contains non-ASCII characters that will break API requests.\n"
         f"  This usually happens when copy-pasting from a PDF, rich-text editor,\n"
diff --git a/hermes_cli/debug.py b/hermes_cli/debug.py
index 9dde9d7c1..8915d8a6a 100644
--- a/hermes_cli/debug.py
+++ b/hermes_cli/debug.py
@@ -13,6 +13,7 @@ import time
 import urllib.error
 import urllib.parse
 import urllib.request
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional
 
@@ -147,6 +148,14 @@ def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
     return (deleted, len(remaining))
 
 
+def _best_effort_sweep_expired_pastes() -> None:
+    """Attempt pending-paste cleanup without letting /debug fail offline."""
+    try:
+        _sweep_expired_pastes()
+    except Exception:
+        pass
+
+
 # ---------------------------------------------------------------------------
 # Privacy / delete helpers
 # ---------------------------------------------------------------------------
@@ -314,72 +323,128 @@ def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
 # Log file reading
 # ---------------------------------------------------------------------------
 
-def _resolve_log_path(log_name: str) -> Optional[Path]:
-    """Find the log file for *log_name*, falling back to the .1 rotation.
 
-    Returns the path if found, or None.
-    """
+@dataclass
+class LogSnapshot:
+    """Single-read snapshot of a log file used by debug-share."""
+
+    path: Optional[Path]
+    tail_text: str
+    full_text: Optional[str]
+
+
+def _primary_log_path(log_name: str) -> Optional[Path]:
+    """Where *log_name* would live if present. Doesn't check existence."""
     from hermes_cli.logs import LOG_FILES
 
     filename = LOG_FILES.get(log_name)
-    if not filename:
+    return (get_hermes_home() / "logs" / filename) if filename else None
+
+
+def _resolve_log_path(log_name: str) -> Optional[Path]:
+    """Find the log file for *log_name*, falling back to the .1 rotation.
+
+    Returns the first non-empty candidate (primary, then .1), or None.
+    Callers distinguish 'empty primary' from 'truly missing' via
+    :func:`_primary_log_path`.
+    """
+    primary = _primary_log_path(log_name)
+    if primary is None:
         return None
 
-    log_dir = get_hermes_home() / "logs"
-    primary = log_dir / filename
     if primary.exists() and primary.stat().st_size > 0:
         return primary
 
-    # Fall back to the most recent rotated file (.1).
-    rotated = log_dir / f"{filename}.1"
+    rotated = primary.parent / f"{primary.name}.1"
     if rotated.exists() and rotated.stat().st_size > 0:
         return rotated
 
     return None
 
 
-def _read_log_tail(log_name: str, num_lines: int) -> str:
-    """Read the last *num_lines* from a log file, or return a placeholder."""
-    from hermes_cli.logs import _read_last_n_lines
+def _capture_log_snapshot(
+    log_name: str,
+    *,
+    tail_lines: int,
+    max_bytes: int = _MAX_LOG_BYTES,
+) -> LogSnapshot:
+    """Capture a log once and derive summary/full-log views from it.
 
-    log_path = _resolve_log_path(log_name)
-    if log_path is None:
-        return "(file not found)"
-
-    try:
-        lines = _read_last_n_lines(log_path, num_lines)
-        return "".join(lines).rstrip("\n")
-    except Exception as exc:
-        return f"(error reading: {exc})"
-
-
-def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
-    """Read a log file for standalone upload.
-
-    Returns the file content (last *max_bytes* if truncated), or None if the
-    file doesn't exist or is empty.
+    The report tail and standalone log upload must come from the same file
+    snapshot. Otherwise a rotation/truncate between reads can make the report
+    look newer than the uploaded ``agent.log`` paste.
     """
     log_path = _resolve_log_path(log_name)
     if log_path is None:
-        return None
+        primary = _primary_log_path(log_name)
+        tail = "(file empty)" if primary and primary.exists() else "(file not found)"
+        return LogSnapshot(path=None, tail_text=tail, full_text=None)
 
     try:
         size = log_path.stat().st_size
         if size == 0:
-            return None
+            # race: file was truncated between _resolve_log_path and stat
+            return LogSnapshot(path=log_path, tail_text="(file empty)", full_text=None)
 
-        if size <= max_bytes:
-            return log_path.read_text(encoding="utf-8", errors="replace")
-
-        # File is larger than max_bytes — read the tail.
         with open(log_path, "rb") as f:
-            f.seek(size - max_bytes)
-            # Skip partial line at the seek point.
-            f.readline()
-            content = f.read().decode("utf-8", errors="replace")
-        return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
-    except Exception:
-        return None
+            if size <= max_bytes:
+                raw = f.read()
+                truncated = False
+            else:
+                # Read from the end until we have enough bytes for the
+                # standalone upload and enough newline context to render the
+                # summary tail from the same snapshot.
+                chunk_size = 8192
+                pos = size
+                chunks: list[bytes] = []
+                total = 0
+                newline_count = 0
+
+                while pos > 0 and (total < max_bytes or newline_count <= tail_lines + 1) and total < max_bytes * 2:
+                    read_size = min(chunk_size, pos)
+                    pos -= read_size
+                    f.seek(pos)
+                    chunk = f.read(read_size)
+                    chunks.insert(0, chunk)
+                    total += len(chunk)
+                    newline_count += chunk.count(b"\n")
+                    chunk_size = min(chunk_size * 2, 65536)
+
+                raw = b"".join(chunks)
+                truncated = pos > 0
+
+        full_raw = raw
+        if truncated and len(full_raw) > max_bytes:
+            cut = len(full_raw) - max_bytes
+            # Check whether the cut lands exactly on a line boundary.  If the
+            # byte just before the cut position is a newline the first retained
+            # byte starts a complete line and we should keep it.  Only drop a
+            # partial first line when we're genuinely mid-line.
+            on_boundary = cut > 0 and full_raw[cut - 1 : cut] == b"\n"
+            full_raw = full_raw[cut:]
+            if not on_boundary and b"\n" in full_raw:
+                full_raw = full_raw.split(b"\n", 1)[1]
+
+        all_text = raw.decode("utf-8", errors="replace")
+        tail_text = "".join(all_text.splitlines(keepends=True)[-tail_lines:]).rstrip("\n")
+
+        full_text = full_raw.decode("utf-8", errors="replace")
+        if truncated:
+            full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
+
+        return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
+    except Exception as exc:
+        return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
+
+
+def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
+    """Capture all logs used by debug-share exactly once."""
+    errors_lines = min(log_lines, 100)
+    return {
+        "agent": _capture_log_snapshot("agent", tail_lines=log_lines),
+        "errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
+        "gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
+    }
 
 
 # ---------------------------------------------------------------------------
@@ -405,7 +470,12 @@ def _capture_dump() -> str:
     return capture.getvalue()
 
 
-def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
+def collect_debug_report(
+    *,
+    log_lines: int = 200,
+    dump_text: str = "",
+    log_snapshots: Optional[dict[str, LogSnapshot]] = None,
+) -> str:
     """Build the summary debug report: system dump + log tails.
 
     Parameters
@@ -424,19 +494,22 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
         dump_text = _capture_dump()
     buf.write(dump_text)
 
+    if log_snapshots is None:
+        log_snapshots = _capture_default_log_snapshots(log_lines)
+
     # ── Recent log tails (summary only) ──────────────────────────────────
     buf.write("\n\n")
     buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
-    buf.write(_read_log_tail("agent", log_lines))
+    buf.write(log_snapshots["agent"].tail_text)
     buf.write("\n\n")
 
     errors_lines = min(log_lines, 100)
     buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("errors", errors_lines))
+    buf.write(log_snapshots["errors"].tail_text)
     buf.write("\n\n")
 
     buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("gateway", errors_lines))
+    buf.write(log_snapshots["gateway"].tail_text)
     buf.write("\n")
 
     return buf.getvalue()
@@ -448,6 +521,8 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
 
 def run_debug_share(args):
     """Collect debug report + full logs, upload each, print URLs."""
+    _best_effort_sweep_expired_pastes()
+
     log_lines = getattr(args, "lines", 200)
     expiry = getattr(args, "expire", 7)
     local_only = getattr(args, "local", False)
@@ -459,10 +534,15 @@ def run_debug_share(args):
 
     # Capture dump once — prepended to every paste for context.
     dump_text = _capture_dump()
+    log_snapshots = _capture_default_log_snapshots(log_lines)
 
-    report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
-    agent_log = _read_full_log("agent")
-    gateway_log = _read_full_log("gateway")
+    report = collect_debug_report(
+        log_lines=log_lines,
+        dump_text=dump_text,
+        log_snapshots=log_snapshots,
+    )
+    agent_log = log_snapshots["agent"].full_text
+    gateway_log = log_snapshots["gateway"].full_text
 
     # Prepend dump header to each full log so every paste is self-contained.
     if agent_log:
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index e16f0bf5e..064b1d68d 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -912,6 +912,7 @@ def run_doctor(args):
     _apikey_providers = [
         ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
         ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
+        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
         ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
         ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
         ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
@@ -943,18 +944,22 @@ def run_doctor(args):
             try:
                 import httpx
                 _base = os.getenv(_base_env, "") if _base_env else ""
-                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
+                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
+                # (OpenAI-compat surface, which exposes /models for health check).
                 if not _base and _key.startswith("sk-kimi-"):
                     _base = "https://api.kimi.com/coding/v1"
-                # Anthropic-compat endpoints (/anthropic) don't support /models.
-                # Rewrite to the OpenAI-compat /v1 surface for health checks.
+                # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
+                # with no /v1) don't support /models.  Rewrite to the OpenAI-compat
+                # /v1 surface for health checks.
                 if _base and _base.rstrip("/").endswith("/anthropic"):
                     from agent.auxiliary_client import _to_openai_base_url
                     _base = _to_openai_base_url(_base)
+                if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
+                    _base = _base.rstrip("/") + "/v1"
                 _url = (_base.rstrip("/") + "/models") if _base else _default_url
                 _headers = {"Authorization": f"Bearer {_key}"}
                 if base_url_host_matches(_base, "api.kimi.com"):
-                    _headers["User-Agent"] = "KimiCLI/1.30.0"
+                    _headers["User-Agent"] = "claude-code/0.1.0"
                 _resp = httpx.get(
                     _url,
                     headers=_headers,
diff --git a/hermes_cli/env_loader.py b/hermes_cli/env_loader.py
index aa0a05924..009f3de27 100644
--- a/hermes_cli/env_loader.py
+++ b/hermes_cli/env_loader.py
@@ -160,6 +160,8 @@ def load_hermes_dotenv(
     # Fix corrupted .env files before python-dotenv parses them (#8908).
     if user_env.exists():
         _sanitize_env_file_if_needed(user_env)
+    if project_env_path and project_env_path.exists():
+        _sanitize_env_file_if_needed(project_env_path)
 
     if user_env.exists():
         _load_dotenv_with_fallback(user_env, override=True)
diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index bc809cadf..8b360087c 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -333,6 +333,147 @@ def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
     return selected_system, result.stdout.strip() == "active"
 
 
+def _read_systemd_unit_properties(
+    system: bool = False,
+    properties: tuple[str, ...] = (
+        "ActiveState",
+        "SubState",
+        "Result",
+        "ExecMainStatus",
+    ),
+) -> dict[str, str]:
+    """Return selected ``systemctl show`` properties for the gateway unit."""
+    selected_system = _select_systemd_scope(system)
+    try:
+        result = _run_systemctl(
+            [
+                "show",
+                get_service_name(),
+                "--no-pager",
+                "--property",
+                ",".join(properties),
+            ],
+            system=selected_system,
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+    except (RuntimeError, subprocess.TimeoutExpired, OSError):
+        return {}
+
+    if result.returncode != 0:
+        return {}
+
+    parsed: dict[str, str] = {}
+    for line in result.stdout.splitlines():
+        if "=" not in line:
+            continue
+        key, value = line.split("=", 1)
+        parsed[key] = value.strip()
+    return parsed
+
+
+def _wait_for_systemd_service_restart(
+    *,
+    system: bool = False,
+    previous_pid: int | None = None,
+    timeout: float = 60.0,
+) -> bool:
+    """Wait for the gateway service to become active after a restart handoff."""
+    import time
+
+    svc = get_service_name()
+    scope_label = _service_scope_label(system).capitalize()
+    deadline = time.time() + timeout
+
+    while time.time() < deadline:
+        props = _read_systemd_unit_properties(system=system)
+        active_state = props.get("ActiveState", "")
+        sub_state = props.get("SubState", "")
+        new_pid = None
+        try:
+            from gateway.status import get_running_pid
+
+            new_pid = get_running_pid()
+        except Exception:
+            new_pid = None
+
+        if active_state == "active":
+            if new_pid and (previous_pid is None or new_pid != previous_pid):
+                print(f"✓ {scope_label} service restarted (PID {new_pid})")
+                return True
+            if previous_pid is None:
+                print(f"✓ {scope_label} service restarted")
+                return True
+
+        if active_state == "activating" and sub_state == "auto-restart":
+            time.sleep(1)
+            continue
+
+        time.sleep(2)
+
+    print(
+        f"⚠ {scope_label} service did not become active within {int(timeout)}s.\n"
+        f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
+        f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} -l --since '2 min ago'"
+    )
+    return False
+
+
+def _recover_pending_systemd_restart(system: bool = False, previous_pid: int | None = None) -> bool:
+    """Recover a planned service restart that is stuck in systemd state."""
+    props = _read_systemd_unit_properties(system=system)
+    if not props:
+        return False
+
+    try:
+        from gateway.status import read_runtime_status
+    except Exception:
+        return False
+
+    runtime_state = read_runtime_status() or {}
+    if not runtime_state.get("restart_requested"):
+        return False
+
+    active_state = props.get("ActiveState", "")
+    sub_state = props.get("SubState", "")
+    exec_main_status = props.get("ExecMainStatus", "")
+    result = props.get("Result", "")
+
+    if active_state == "activating" and sub_state == "auto-restart":
+        print("⏳ Service restart already pending — waiting for systemd relaunch...")
+        return _wait_for_systemd_service_restart(
+            system=system,
+            previous_pid=previous_pid,
+        )
+
+    if active_state == "failed" and (
+        exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE)
+        or result == "exit-code"
+    ):
+        svc = get_service_name()
+        scope_label = _service_scope_label(system).capitalize()
+        print(f"↻ Clearing failed state for pending {scope_label.lower()} service restart...")
+        _run_systemctl(
+            ["reset-failed", svc],
+            system=system,
+            check=False,
+            timeout=30,
+        )
+        _run_systemctl(
+            ["start", svc],
+            system=system,
+            check=False,
+            timeout=90,
+        )
+        return _wait_for_systemd_service_restart(
+            system=system,
+            previous_pid=previous_pid,
+        )
+
+    return False
+
+
 def _probe_launchd_service_running() -> bool:
     if not get_launchd_plist_path().exists():
         return False
@@ -470,7 +611,8 @@ def stop_profile_gateway() -> bool:
         except (ProcessLookupError, PermissionError):
             break
 
-    remove_pid_file()
+    if get_running_pid() is None:
+        remove_pid_file()
     return True
 
 
@@ -994,8 +1136,6 @@ def get_systemd_linger_status() -> tuple[bool | None, str]:
     if not is_linux():
         return None, "not supported on this platform"
 
-    import shutil
-
     if not shutil.which("loginctl"):
         return None, "loginctl not found"
 
@@ -1347,7 +1487,6 @@ def _ensure_linger_enabled() -> None:
         return
 
     import getpass
-    import shutil
 
     username = getpass.getuser()
     linger_file = Path(f"/var/lib/systemd/linger/{username}")
@@ -1508,14 +1647,9 @@ def systemd_restart(system: bool = False):
 
     pid = get_running_pid()
     if pid is not None and _request_gateway_self_restart(pid):
-        # SIGUSR1 sent — the gateway will drain active agents, exit with
-        # code 75, and systemd will restart it after RestartSec (30s).
-        # Wait for the old process to die and the new one to become active
-        # so the CLI doesn't return while the service is still restarting.
         import time
         scope_label = _service_scope_label(system).capitalize()
         svc = get_service_name()
-        scope_cmd = _systemctl_cmd(system)
 
         # Phase 1: wait for old process to exit (drain + shutdown)
         print(f"⏳ {scope_label} service draining active work...")
@@ -1529,48 +1663,41 @@ def systemd_restart(system: bool = False):
         else:
             print(f"⚠ Old process (PID {pid}) still alive after 90s")
 
-        # Phase 2: wait for systemd to start the new process
-        print(f"⏳ Waiting for {svc} to restart...")
-        deadline = time.time() + 60
-        while time.time() < deadline:
-            try:
-                result = subprocess.run(
-                    scope_cmd + ["is-active", svc],
-                    capture_output=True, text=True, timeout=5,
-                )
-                if result.stdout.strip() == "active":
-                    # Verify it's a NEW process, not the old one somehow
-                    new_pid = get_running_pid()
-                    if new_pid and new_pid != pid:
-                        print(f"✓ {scope_label} service restarted (PID {new_pid})")
-                        return
-            except (subprocess.TimeoutExpired, FileNotFoundError):
-                pass
-            time.sleep(2)
-
-        # Timed out — check final state
-        try:
-            result = subprocess.run(
-                scope_cmd + ["is-active", svc],
-                capture_output=True, text=True, timeout=5,
-            )
-            if result.stdout.strip() == "active":
-                print(f"✓ {scope_label} service restarted")
-                return
-        except Exception:
-            pass
-        print(
-            f"⚠ {scope_label} service did not become active within 60s.\n"
-            f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
-            f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'"
+        # The gateway exits with code 75 for a planned service restart.
+        # systemd can sit in the RestartSec window or even wedge itself into a
+        # failed/rate-limited state if the operator asks for another restart in
+        # the middle of that handoff. Clear any stale failed state and kick the
+        # unit immediately so `hermes gateway restart` behaves idempotently.
+        _run_systemctl(
+            ["reset-failed", svc],
+            system=system,
+            check=False,
+            timeout=30,
         )
+        _run_systemctl(
+            ["start", svc],
+            system=system,
+            check=False,
+            timeout=90,
+        )
+        _wait_for_systemd_service_restart(system=system, previous_pid=pid)
         return
+
+    if _recover_pending_systemd_restart(system=system, previous_pid=pid):
+        return
+
+    _run_systemctl(
+        ["reset-failed", get_service_name()],
+        system=system,
+        check=False,
+        timeout=30,
+    )
     _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
     print(f"✓ {_service_scope_label(system).capitalize()} service restarted")
 
 
 
-def systemd_status(deep: bool = False, system: bool = False):
+def systemd_status(deep: bool = False, system: bool = False, full: bool = False):
     system = _select_systemd_scope(system)
     unit_path = get_systemd_unit_path(system=system)
     scope_flag = " --system" if system else ""
@@ -1593,8 +1720,12 @@ def systemd_status(deep: bool = False, system: bool = False):
         print(f"  Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag}  # auto-refreshes the unit")
         print()
 
+    status_cmd = ["status", get_service_name(), "--no-pager"]
+    if full:
+        status_cmd.append("-l")
+
     _run_systemctl(
-        ["status", get_service_name(), "--no-pager"],
+        status_cmd,
         system=system,
         capture_output=False,
         timeout=10,
@@ -1627,6 +1758,19 @@ def systemd_status(deep: bool = False, system: bool = False):
         for line in runtime_lines:
             print(f"  {line}")
 
+    unit_props = _read_systemd_unit_properties(system=system)
+    active_state = unit_props.get("ActiveState", "")
+    sub_state = unit_props.get("SubState", "")
+    exec_main_status = unit_props.get("ExecMainStatus", "")
+    result_code = unit_props.get("Result", "")
+    if active_state == "activating" and sub_state == "auto-restart":
+        print("  ⏳ Restart pending: systemd is waiting to relaunch the gateway")
+    elif active_state == "failed" and exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE):
+        print("  ⚠ Planned restart is stuck in systemd failed state (exit 75)")
+        print(f"  Run: systemctl {'--user ' if not system else ''}reset-failed {get_service_name()} && {'sudo ' if system else ''}hermes gateway start{scope_flag}")
+    elif active_state == "failed" and result_code:
+        print(f"  ⚠ Systemd unit result: {result_code}")
+
     if system:
         print("✓ System service starts at boot without requiring systemd linger")
     elif deep:
@@ -1642,7 +1786,10 @@ def systemd_status(deep: bool = False, system: bool = False):
     if deep:
         print()
         print("Recent logs:")
-        subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"], timeout=10)
+        log_cmd = _journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"]
+        if full:
+            log_cmd.append("-l")
+        subprocess.run(log_cmd, timeout=10)
 
 
 # =============================================================================
@@ -1656,7 +1803,6 @@ def get_launchd_label() -> str:
 
 
 def _launchd_domain() -> str:
-    import os
     return f"gui/{os.getuid()}"
 
 
@@ -2643,9 +2789,120 @@ def _setup_dingtalk():
 
 
 def _setup_wecom():
-    """Configure WeCom (Enterprise WeChat) via the standard platform setup."""
-    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom")
-    _setup_standard_platform(wecom_platform)
+    """Interactive setup for WeCom — scan QR code or manual credential input."""
+    print()
+    print(color("  ─── 💬 WeCom (Enterprise WeChat) Setup ───", Colors.CYAN))
+
+    existing_bot_id = get_env_value("WECOM_BOT_ID")
+    existing_secret = get_env_value("WECOM_SECRET")
+    if existing_bot_id and existing_secret:
+        print()
+        print_success("WeCom is already configured.")
+        if not prompt_yes_no("  Reconfigure WeCom?", False):
+            return
+
+    # ── Choose setup method ──
+    print()
+    method_choices = [
+        "Scan QR code to obtain Bot ID and Secret automatically (recommended)",
+        "Enter existing Bot ID and Secret manually",
+    ]
+    method_idx = prompt_choice("  How would you like to set up WeCom?", method_choices, 0)
+
+    bot_id = None
+    secret = None
+
+    if method_idx == 0:
+        # ── QR scan flow ──
+        try:
+            from gateway.platforms.wecom import qr_scan_for_bot_info
+        except Exception as exc:
+            print_error(f"  WeCom QR scan import failed: {exc}")
+            qr_scan_for_bot_info = None
+
+        if qr_scan_for_bot_info is not None:
+            try:
+                credentials = qr_scan_for_bot_info()
+            except KeyboardInterrupt:
+                print()
+                print_warning("  WeCom setup cancelled.")
+                return
+            except Exception as exc:
+                print_warning(f"  QR scan failed: {exc}")
+                credentials = None
+            if credentials:
+                bot_id = credentials.get("bot_id", "")
+                secret = credentials.get("secret", "")
+                print_success("  ✔ QR scan successful! Bot ID and Secret obtained.")
+
+        if not bot_id or not secret:
+            print_info("  QR scan did not complete. Continuing with manual input.")
+            bot_id = None
+            secret = None
+
+    # ── Manual credential input ──
+    if not bot_id or not secret:
+        print()
+        print_info("  1. Go to WeCom Application → Workspace → Smart Robot -> Create smart robots")
+        print_info("  2. Select API Mode")
+        print_info("  3. Copy the Bot ID and Secret from the bot's credentials info")
+        print_info("  4. The bot connects via WebSocket — no public endpoint needed")
+        print()
+        bot_id = prompt("  Bot ID", password=False)
+        if not bot_id:
+            print_warning("  Skipped — WeCom won't work without a Bot ID.")
+            return
+        secret = prompt("  Secret", password=True)
+        if not secret:
+            print_warning("  Skipped — WeCom won't work without a Secret.")
+            return
+
+    # ── Save core credentials ──
+    save_env_value("WECOM_BOT_ID", bot_id)
+    save_env_value("WECOM_SECRET", secret)
+
+    # ── Allowed users (deny-by-default security) ──
+    print()
+    print_info("  The gateway DENIES all users by default for security.")
+    print_info("  Enter user IDs to create an allowlist, or leave empty.")
+    allowed = prompt("  Allowed user IDs (comma-separated, or empty)", password=False)
+    if allowed:
+        cleaned = allowed.replace(" ", "")
+        save_env_value("WECOM_ALLOWED_USERS", cleaned)
+        print_success("  Saved — only these users can interact with the bot.")
+    else:
+        print()
+        access_choices = [
+            "Enable open access (anyone can message the bot)",
+            "Use DM pairing (unknown users request access, you approve with 'hermes pairing approve')",
+            "Disable direct messages",
+            "Skip for now (bot will deny all users until configured)",
+        ]
+        access_idx = prompt_choice("  How should unauthorized users be handled?", access_choices, 1)
+        if access_idx == 0:
+            save_env_value("WECOM_DM_POLICY", "open")
+            save_env_value("GATEWAY_ALLOW_ALL_USERS", "true")
+            print_warning("  Open access enabled — anyone can use your bot!")
+        elif access_idx == 1:
+            save_env_value("WECOM_DM_POLICY", "pairing")
+            print_success("  DM pairing mode — users will receive a code to request access.")
+            print_info("  Approve with: hermes pairing approve <platform> <code>")
+        elif access_idx == 2:
+            save_env_value("WECOM_DM_POLICY", "disabled")
+            print_warning("  Direct messages disabled.")
+        else:
+            print_info("  Skipped — configure later with 'hermes gateway setup'")
+
+    # ── Home channel (optional) ──
+    print()
+    print_info("  Chat ID for scheduled results and notifications.")
+    home = prompt("  Home chat ID (optional, for cron/notifications)", password=False)
+    if home:
+        save_env_value("WECOM_HOME_CHANNEL", home)
+        print_success(f"  Home channel set to {home}")
+
+    print()
+    print_success("💬 WeCom configured!")
 
 
 def _is_service_installed() -> bool:
@@ -3025,7 +3282,8 @@ def _setup_qqbot():
     if method_idx == 0:
         # ── QR scan-to-configure ──
         try:
-            credentials = _qqbot_qr_flow()
+            from gateway.platforms.qqbot import qr_register
+            credentials = qr_register()
         except KeyboardInterrupt:
             print()
             print_warning("  QQ Bot setup cancelled.")
@@ -3107,106 +3365,6 @@ def _setup_qqbot():
     print_info(f"  App ID: {credentials['app_id']}")
 
 
-def _qqbot_render_qr(url: str) -> bool:
-    """Try to render a QR code in the terminal. Returns True if successful."""
-    try:
-        import qrcode as _qr
-        qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
-        qr.add_data(url)
-        qr.make(fit=True)
-        qr.print_ascii(invert=True)
-        return True
-    except Exception:
-        return False
-
-
-def _qqbot_qr_flow():
-    """Run the QR-code scan-to-configure flow.
-
-    Returns a dict with app_id, client_secret, user_openid on success,
-    or None on failure/cancel.
-    """
-    try:
-        from gateway.platforms.qqbot import (
-            create_bind_task, poll_bind_result, build_connect_url,
-            decrypt_secret, BindStatus,
-        )
-        from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
-    except Exception as exc:
-        print_error(f"  QQBot onboard import failed: {exc}")
-        return None
-
-    import asyncio
-    import time
-
-    MAX_REFRESHES = 3
-    refresh_count = 0
-
-    while refresh_count <= MAX_REFRESHES:
-        loop = asyncio.new_event_loop()
-
-        # ── Create bind task ──
-        try:
-            task_id, aes_key = loop.run_until_complete(create_bind_task())
-        except Exception as e:
-            print_warning(f"  Failed to create bind task: {e}")
-            loop.close()
-            return None
-
-        url = build_connect_url(task_id)
-
-        # ── Display QR code + URL ──
-        print()
-        if _qqbot_render_qr(url):
-            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
-        else:
-            print(f"  Open this URL in QQ on your phone:\n  {url}")
-            print_info("  Tip: pip install qrcode  to show a scannable QR code here")
-
-        # ── Poll loop (silent — keep QR visible at bottom) ──
-        try:
-            while True:
-                try:
-                    status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
-                        poll_bind_result(task_id)
-                    )
-                except Exception:
-                    time.sleep(ONBOARD_POLL_INTERVAL)
-                    continue
-
-                if status == BindStatus.COMPLETED:
-                    client_secret = decrypt_secret(encrypted_secret, aes_key)
-                    print()
-                    print_success(f"  QR scan complete! (App ID: {app_id})")
-                    if user_openid:
-                        print_info(f"  Scanner's OpenID: {user_openid}")
-                    return {
-                        "app_id": app_id,
-                        "client_secret": client_secret,
-                        "user_openid": user_openid,
-                    }
-
-                if status == BindStatus.EXPIRED:
-                    refresh_count += 1
-                    if refresh_count > MAX_REFRESHES:
-                        print()
-                        print_warning(f"  QR code expired {MAX_REFRESHES} times — giving up.")
-                        return None
-                    print()
-                    print_warning(f"  QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
-                    loop.close()
-                    break  # outer while creates a new task
-
-                time.sleep(ONBOARD_POLL_INTERVAL)
-        except KeyboardInterrupt:
-            loop.close()
-            raise
-        finally:
-            loop.close()
-
-    return None
-
-
 def _setup_signal():
     """Interactive setup for Signal messenger."""
     import shutil
@@ -3394,6 +3552,8 @@ def gateway_setup():
             _setup_feishu()
         elif platform["key"] == "qqbot":
             _setup_qqbot()
+        elif platform["key"] == "wecom":
+            _setup_wecom()
         else:
             _setup_standard_platform(platform)
 
@@ -3752,12 +3912,13 @@ def gateway_command(args):
     
     elif subcmd == "status":
         deep = getattr(args, 'deep', False)
+        full = getattr(args, 'full', False)
         system = getattr(args, 'system', False)
         snapshot = get_gateway_runtime_snapshot(system=system)
         
         # Check for service first
         if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
-            systemd_status(deep, system=system)
+            systemd_status(deep, system=system, full=full)
             _print_gateway_process_mismatch(snapshot)
         elif is_macos() and get_launchd_plist_path().exists():
             launchd_status(deep)
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index f88c42dda..5657e4b5f 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -618,7 +618,6 @@ def _exec_in_container(container_info: dict, cli_args: list):
         container_info: dict with backend, container_name, exec_user, hermes_bin
         cli_args: the original CLI arguments (everything after 'hermes')
     """
-    import shutil
 
     backend = container_info["backend"]
     container_name = container_info["container_name"]
@@ -1181,8 +1180,6 @@ def cmd_gateway(args):
 def cmd_whatsapp(args):
     """Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
     _require_tty("whatsapp")
-    import subprocess
-    from pathlib import Path
     from hermes_cli.config import get_env_value, save_env_value
 
     print()
@@ -1330,8 +1327,6 @@ def cmd_whatsapp(args):
         except (EOFError, KeyboardInterrupt):
             response = "n"
         if response.lower() in ("y", "yes"):
-            import shutil
-
             shutil.rmtree(session_dir, ignore_errors=True)
             session_dir.mkdir(parents=True, exist_ok=True)
             print("  ✓ Session cleared")
@@ -1427,8 +1422,6 @@ def select_provider_and_model(args=None):
 
     # Read effective provider the same way the CLI does at startup:
     # config.yaml model.provider > env var > auto-detect
-    import os
-
     config_provider = None
     model_cfg = config.get("model")
     if isinstance(model_cfg, dict):
@@ -1573,6 +1566,8 @@ def select_provider_and_model(args=None):
         _model_flow_anthropic(config, current_model)
     elif selected_provider == "kimi-coding":
         _model_flow_kimi(config, current_model)
+    elif selected_provider == "stepfun":
+        _model_flow_stepfun(config, current_model)
     elif selected_provider == "bedrock":
         _model_flow_bedrock(config, current_model)
     elif selected_provider in (
@@ -2134,7 +2129,6 @@ def _model_flow_nous(config, current_model="", args=None):
         save_env_value,
     )
     from hermes_cli.nous_subscription import prompt_enable_tool_gateway
-    import argparse
 
     state = get_provider_auth_state("nous")
     if not state or not state.get("access_token"):
@@ -2173,7 +2167,6 @@ def _model_flow_nous(config, current_model="", args=None):
     from hermes_cli.models import (
         _PROVIDER_MODELS,
         get_pricing_for_provider,
-        filter_nous_free_models,
         check_nous_free_tier,
         partition_nous_models_by_tier,
     )
@@ -2216,10 +2209,8 @@ def _model_flow_nous(config, current_model="", args=None):
     # Check if user is on free tier
     free_tier = check_nous_free_tier()
 
-    # For both tiers: apply the allowlist filter first (removes non-allowlisted
-    # free models and allowlist models that aren't actually free).
-    # Then for free users: partition remaining models into selectable/unavailable.
-    model_ids = filter_nous_free_models(model_ids, pricing)
+    # For free users: partition models into selectable/unavailable based on
+    # whether they are free per the Portal-reported pricing.
     unavailable_models: list[str] = []
     if free_tier:
         model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -2302,7 +2293,6 @@ def _model_flow_openai_codex(config, current_model=""):
         DEFAULT_CODEX_BASE_URL,
     )
     from hermes_cli.codex_models import get_codex_model_ids
-    import argparse
 
     status = get_codex_auth_status()
     if not status.get("logged_in"):
@@ -3474,6 +3464,140 @@ def _model_flow_kimi(config, current_model=""):
         print("No change.")
 
 
+def _infer_stepfun_region(base_url: str) -> str:
+    """Infer the current StepFun region from the configured endpoint."""
+    normalized = (base_url or "").strip().lower()
+    if "api.stepfun.com" in normalized:
+        return "china"
+    return "international"
+
+
+def _stepfun_base_url_for_region(region: str) -> str:
+    from hermes_cli.auth import (
+        STEPFUN_STEP_PLAN_CN_BASE_URL,
+        STEPFUN_STEP_PLAN_INTL_BASE_URL,
+    )
+
+    return (
+        STEPFUN_STEP_PLAN_CN_BASE_URL
+        if region == "china"
+        else STEPFUN_STEP_PLAN_INTL_BASE_URL
+    )
+
+
+def _model_flow_stepfun(config, current_model=""):
+    """StepFun Step Plan flow with region-specific endpoints."""
+    from hermes_cli.auth import (
+        PROVIDER_REGISTRY,
+        _prompt_model_selection,
+        _save_model_choice,
+        deactivate_provider,
+    )
+    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+    from hermes_cli.models import fetch_api_models
+
+    provider_id = "stepfun"
+    pconfig = PROVIDER_REGISTRY[provider_id]
+    key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
+    base_url_env = pconfig.base_url_env_var or ""
+
+    existing_key = ""
+    for ev in pconfig.api_key_env_vars:
+        existing_key = get_env_value(ev) or os.getenv(ev, "")
+        if existing_key:
+            break
+
+    if not existing_key:
+        print(f"No {pconfig.name} API key configured.")
+        if key_env:
+            try:
+                import getpass
+                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
+            except (KeyboardInterrupt, EOFError):
+                print()
+                return
+            if not new_key:
+                print("Cancelled.")
+                return
+            save_env_value(key_env, new_key)
+            existing_key = new_key
+            print("API key saved.")
+            print()
+    else:
+        print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
+        print()
+
+    current_base = ""
+    if base_url_env:
+        current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
+    if not current_base:
+        model_cfg = config.get("model")
+        if isinstance(model_cfg, dict):
+            current_base = str(model_cfg.get("base_url") or "").strip()
+    current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
+
+    region_choices = [
+        ("international", f"International ({_stepfun_base_url_for_region('international')})"),
+        ("china", f"China ({_stepfun_base_url_for_region('china')})"),
+    ]
+    ordered_regions = []
+    for region_key, label in region_choices:
+        if region_key == current_region:
+            ordered_regions.insert(0, (region_key, f"{label}  ← currently active"))
+        else:
+            ordered_regions.append((region_key, label))
+    ordered_regions.append(("cancel", "Cancel"))
+
+    region_idx = _prompt_provider_choice([label for _, label in ordered_regions])
+    if region_idx is None or ordered_regions[region_idx][0] == "cancel":
+        print("No change.")
+        return
+
+    selected_region = ordered_regions[region_idx][0]
+    effective_base = _stepfun_base_url_for_region(selected_region)
+    if base_url_env:
+        save_env_value(base_url_env, effective_base)
+
+    live_models = fetch_api_models(existing_key, effective_base)
+    if live_models:
+        model_list = live_models
+        print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
+    else:
+        model_list = _PROVIDER_MODELS.get(provider_id, [])
+        if model_list:
+            print(
+                f"  Could not auto-detect models from {pconfig.name} API — "
+                "showing Step Plan fallback catalog."
+            )
+
+    if model_list:
+        selected = _prompt_model_selection(model_list, current_model=current_model)
+    else:
+        try:
+            selected = input("Model name: ").strip()
+        except (KeyboardInterrupt, EOFError):
+            selected = None
+
+    if selected:
+        _save_model_choice(selected)
+
+        cfg = load_config()
+        model = cfg.get("model")
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = provider_id
+        model["base_url"] = effective_base
+        model.pop("api_mode", None)
+        save_config(cfg)
+        deactivate_provider()
+
+        config["model"] = dict(model)
+        print(f"Default model set to: {selected} (via {pconfig.name})")
+    else:
+        print("No change.")
+
+
 def _model_flow_bedrock_api_key(config, region, current_model=""):
     """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.
 
@@ -4289,9 +4413,7 @@ def _clear_bytecode_cache(root: Path) -> int:
         ]
         if os.path.basename(dirpath) == "__pycache__":
             try:
-                import shutil as _shutil
-
-                _shutil.rmtree(dirpath)
+                shutil.rmtree(dirpath)
                 removed += 1
             except OSError:
                 pass
@@ -4330,8 +4452,6 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0)
     tmp.replace(prompt_path)
 
     # Poll for response
-    import time as _time
-
     deadline = _time.monotonic() + timeout
     while _time.monotonic() < deadline:
         if response_path.exists():
@@ -4363,7 +4483,6 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
     """
     if not (web_dir / "package.json").exists():
         return True
-    import shutil
 
     npm = shutil.which("npm")
     if not npm:
@@ -4400,7 +4519,6 @@ def _update_via_zip(args):
     Used on Windows when git file I/O is broken (antivirus, NTFS filter
     drivers causing 'Invalid argument' errors on file creation).
     """
-    import shutil
     import tempfile
     import zipfile
     from urllib.request import urlretrieve
@@ -4477,7 +4595,6 @@ def _update_via_zip(args):
     # breaks on this machine, keep base deps and reinstall the remaining extras
     # individually so update does not silently strip working capabilities.
     print("→ Updating Python dependencies...")
-    import subprocess
 
     uv_bin = shutil.which("uv")
     if uv_bin:
@@ -5228,9 +5345,11 @@ def _install_hangup_protection(gateway_mode: bool = False):
     # (2) Mirror output to update.log and wrap stdio for broken-pipe
     # tolerance.  Any failure here is non-fatal; we just skip the wrap.
     try:
-        from hermes_cli.config import get_hermes_home
+        # Late-bound import so tests can monkeypatch
+        # hermes_cli.config.get_hermes_home to simulate setup failure.
+        from hermes_cli.config import get_hermes_home as _get_hermes_home
 
-        logs_dir = get_hermes_home() / "logs"
+        logs_dir = _get_hermes_home() / "logs"
         logs_dir.mkdir(parents=True, exist_ok=True)
         log_path = logs_dir / "update.log"
         log_file = open(log_path, "a", buffering=1, encoding="utf-8")
@@ -5805,8 +5924,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                     # Verify the service actually survived the
                                     # restart.  systemctl restart returns 0 even
                                     # if the new process crashes immediately.
-                                    import time as _time
-
                                     _time.sleep(3)
                                     verify = subprocess.run(
                                         scope_cmd + ["is-active", svc_name],
@@ -6549,6 +6666,7 @@ For more help on a command:
             "zai",
             "kimi-coding",
             "kimi-coding-cn",
+            "stepfun",
             "minimax",
             "minimax-cn",
             "kilocode",
@@ -6770,6 +6888,12 @@ For more help on a command:
     # gateway status
     gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
     gateway_status.add_argument("--deep", action="store_true", help="Deep status check")
+    gateway_status.add_argument(
+        "-l",
+        "--full",
+        action="store_true",
+        help="Show full, untruncated service/log output where supported",
+    )
     gateway_status.add_argument(
         "--system",
         action="store_true",
@@ -7693,9 +7817,7 @@ Examples:
             )
             cmd_info["setup_fn"](plugin_parser)
     except Exception as _exc:
-        import logging as _log
-
-        _log.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
+        logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
 
     # =========================================================================
     # memory command
@@ -8080,7 +8202,6 @@ Examples:
                     return
                 line = _json.dumps(data, ensure_ascii=False) + "\n"
                 if args.output == "-":
-                    import sys
 
                     sys.stdout.write(line)
                 else:
@@ -8090,7 +8211,6 @@ Examples:
             else:
                 sessions = db.export_all(source=args.source)
                 if args.output == "-":
-                    import sys
 
                     for s in sessions:
                         sys.stdout.write(_json.dumps(s, ensure_ascii=False) + "\n")
@@ -8161,8 +8281,6 @@ Examples:
 
             # Launch hermes --resume <id> by replacing the current process
             print(f"Resuming session: {selected_id}")
-            import shutil
-
             hermes_bin = shutil.which("hermes")
             if hermes_bin:
                 os.execvp(hermes_bin, ["hermes", "--resume", selected_id])
diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py
index 22721f9a4..63712060e 100644
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -143,7 +143,7 @@ MODEL_ALIASES: dict[str, ModelIdentity] = {
     # Z.AI / GLM
     "glm":       ModelIdentity("z-ai", "glm"),
 
-    # StepFun
+    # Step Plan (StepFun)
     "step":      ModelIdentity("stepfun", "step"),
 
     # Xiaomi
@@ -678,6 +678,7 @@ def switch_model(
         _da = DIRECT_ALIASES.get(resolved_alias)
         if _da is not None and _da.base_url:
             base_url = _da.base_url
+            api_mode = ""  # clear so determine_api_mode re-detects from URL
             if not api_key:
                 api_key = "no-key-required"
 
@@ -809,7 +810,10 @@ def list_authenticated_providers(
         get_provider_info as _mdev_pinfo,
     )
     from hermes_cli.auth import PROVIDER_REGISTRY
-    from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
+    from hermes_cli.models import (
+        OPENROUTER_MODELS, _PROVIDER_MODELS,
+        _MODELS_DEV_PREFERRED, _merge_with_models_dev,
+    )
 
     results: List[dict] = []
     seen_slugs: set = set()  # lowercase-normalized to catch case variants (#9545)
@@ -855,8 +859,13 @@ def list_authenticated_providers(
         if not has_creds:
             continue
 
-        # Use curated list, falling back to models.dev if no curated list
+        # Use curated list, falling back to models.dev if no curated list.
+        # For preferred providers, merge models.dev entries into the curated
+        # catalog so newly released models (e.g. mimo-v2.5-pro on opencode-go)
+        # show up in the picker without requiring a Hermes release.
         model_ids = curated.get(hermes_id, [])
+        if hermes_id in _MODELS_DEV_PREFERRED:
+            model_ids = _merge_with_models_dev(hermes_id, model_ids)
         total = len(model_ids)
         top = model_ids[:max_models]
 
@@ -960,6 +969,9 @@ def list_authenticated_providers(
 
         # Use curated list — look up by Hermes slug, fall back to overlay key
         model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
+        # Merge with models.dev for preferred providers (same rationale as above).
+        if hermes_slug in _MODELS_DEV_PREFERRED:
+            model_ids = _merge_with_models_dev(hermes_slug, model_ids)
         total = len(model_ids)
         top = model_ids[:max_models]
 
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 046df3519..bc7f40258 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -42,7 +42,8 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("openrouter/elephant-alpha",       "free"),
     ("openai/gpt-5.4",                  ""),
     ("openai/gpt-5.4-mini",             ""),
-    ("xiaomi/mimo-v2-pro",               ""),
+    ("xiaomi/mimo-v2.5-pro",             ""),
+    ("xiaomi/mimo-v2.5",                 ""),
     ("openai/gpt-5.3-codex",            ""),
     ("google/gemini-3-pro-image-preview", ""),
     ("google/gemini-3-flash-preview",   ""),
@@ -53,6 +54,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("stepfun/step-3.5-flash",          ""),
     ("minimax/minimax-m2.7",            ""),
     ("minimax/minimax-m2.5",            ""),
+    ("minimax/minimax-m2.5:free",       "free"),
     ("z-ai/glm-5.1",                    ""),
     ("z-ai/glm-5v-turbo",               ""),
     ("z-ai/glm-5-turbo",                ""),
@@ -107,7 +109,8 @@ def _codex_curated_models() -> list[str]:
 _PROVIDER_MODELS: dict[str, list[str]] = {
     "nous": [
         "moonshotai/kimi-k2.6",
-        "xiaomi/mimo-v2-pro",
+        "xiaomi/mimo-v2.5-pro",
+        "xiaomi/mimo-v2.5",
         "anthropic/claude-opus-4.7",
         "anthropic/claude-opus-4.6",
         "anthropic/claude-sonnet-4.6",
@@ -125,17 +128,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "stepfun/step-3.5-flash",
         "minimax/minimax-m2.7",
         "minimax/minimax-m2.5",
+        "minimax/minimax-m2.5:free",
         "z-ai/glm-5.1",
         "z-ai/glm-5v-turbo",
         "z-ai/glm-5-turbo",
         "x-ai/grok-4.20-beta",
         "nvidia/nemotron-3-super-120b-a12b",
-        "nvidia/nemotron-3-super-120b-a12b:free",
-        "arcee-ai/trinity-large-preview:free",
         "arcee-ai/trinity-large-thinking",
         "openai/gpt-5.4-pro",
         "openai/gpt-5.4-nano",
-        "openrouter/elephant-alpha",
     ],
     "openai-codex": _codex_curated_models(),
     "copilot-acp": [
@@ -211,6 +212,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "kimi-k2-turbo-preview",
         "kimi-k2-0905-preview",
     ],
+    "stepfun": [
+        "step-3.5-flash",
+        "step-3.5-flash-2603",
+    ],
     "moonshot": [
         "kimi-k2.6",
         "kimi-k2.5",
@@ -292,6 +297,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "big-pickle",
     ],
     "opencode-go": [
+        "kimi-k2.6",
         "kimi-k2.5",
         "glm-5.1",
         "glm-5",
@@ -299,6 +305,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "mimo-v2-omni",
         "minimax-m2.7",
         "minimax-m2.5",
+        "qwen3.6-plus",
+        "qwen3.5-plus",
     ],
     "kilocode": [
         "anthropic/claude-opus-4.6",
@@ -359,17 +367,11 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
 _PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
 
 # ---------------------------------------------------------------------------
-# Nous Portal free-model filtering
+# Nous Portal free-model helper
 # ---------------------------------------------------------------------------
-# Models that are ALLOWED to appear when priced as free on Nous Portal.
-# Any other free model is hidden — prevents promotional/temporary free models
-# from cluttering the selection when users are paying subscribers.
-# Models in this list are ALSO filtered out if they are NOT free (i.e. they
-# should only appear in the menu when they are genuinely free).
-_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
-    "xiaomi/mimo-v2-pro",
-    "xiaomi/mimo-v2-omni",
-})
+# The Nous Portal models endpoint is the source of truth for which models
+# are currently offered (free or paid). We trust whatever it returns and
+# surface it to users as-is — no local allowlist filtering.
 
 
 def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
@@ -383,35 +385,6 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
         return False
 
 
-def filter_nous_free_models(
-    model_ids: list[str],
-    pricing: dict[str, dict[str, str]],
-) -> list[str]:
-    """Filter the Nous Portal model list according to free-model policy.
-
-    Rules:
-      • Paid models that are NOT in the allowlist → keep (normal case).
-      • Free models that are NOT in the allowlist → drop.
-      • Allowlist models that ARE free → keep.
-      • Allowlist models that are NOT free → drop.
-    """
-    if not pricing:
-        return model_ids  # no pricing data — can't filter, show everything
-
-    result: list[str] = []
-    for mid in model_ids:
-        free = _is_model_free(mid, pricing)
-        if mid in _NOUS_ALLOWED_FREE_MODELS:
-            # Allowlist model: only show when it's actually free
-            if free:
-                result.append(mid)
-        else:
-            # Regular model: keep only when it's NOT free
-            if not free:
-                result.append(mid)
-    return result
-
-
 # ---------------------------------------------------------------------------
 # Nous Portal account tier detection
 # ---------------------------------------------------------------------------
@@ -475,8 +448,7 @@ def partition_nous_models_by_tier(
 ) -> tuple[list[str], list[str]]:
     """Split Nous models into (selectable, unavailable) based on user tier.
 
-    For paid-tier users: all models are selectable, none unavailable
-    (free-model filtering is handled separately by ``filter_nous_free_models``).
+    For paid-tier users: all models are selectable, none unavailable.
 
     For free-tier users: only free models are selectable; paid models
     are returned as unavailable (shown grayed out in the menu).
@@ -515,8 +487,6 @@ def check_nous_free_tier() -> bool:
     Returns False (assume paid) on any error — never blocks paying users.
     """
     global _free_tier_cache
-    import time
-
     now = time.monotonic()
     if _free_tier_cache is not None:
         cached_result, cached_at = _free_tier_cache
@@ -548,6 +518,157 @@ def check_nous_free_tier() -> bool:
         return False  # default to paid on error — don't block users
 
 
+# ---------------------------------------------------------------------------
+# Nous Portal recommended models
+#
+# The Portal publishes a curated list of suggested models (separated into
+# paid and free tiers) plus dedicated recommendations for compaction (text
+# summarisation / auxiliary) and vision tasks. We fetch it once per process
+# with a TTL cache so callers can ask "what's the best aux model right now?"
+# without hitting the network on every lookup.
+#
+# Shape of the response (fields we care about):
+#   {
+#     "paidRecommendedModels":     [ {modelName, ...}, ... ],
+#     "freeRecommendedModels":     [ {modelName, ...}, ... ],
+#     "paidRecommendedCompactionModel":  {modelName, ...} | null,
+#     "paidRecommendedVisionModel":      {modelName, ...} | null,
+#     "freeRecommendedCompactionModel":  {modelName, ...} | null,
+#     "freeRecommendedVisionModel":      {modelName, ...} | null,
+#   }
+# ---------------------------------------------------------------------------
+
+NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
+_NOUS_RECOMMENDED_CACHE_TTL: int = 600  # seconds (10 minutes)
+# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
+_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
+
+
+def fetch_nous_recommended_models(
+    portal_base_url: str = "",
+    timeout: float = 5.0,
+    *,
+    force_refresh: bool = False,
+) -> dict[str, Any]:
+    """Fetch the Nous Portal's curated recommended-models payload.
+
+    Hits ``<portal>/api/nous/recommended-models``. The endpoint is public —
+    no auth is required. Results are cached per portal URL for
+    ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
+    bypass the cache.
+
+    Returns the parsed JSON dict on success, or ``{}`` on any failure
+    (network, parse, non-2xx). Callers must treat missing/null fields as
+    "no recommendation" and fall back to their own default.
+    """
+    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
+    now = time.monotonic()
+    cached = _nous_recommended_cache.get(base)
+    if not force_refresh and cached is not None:
+        payload, cached_at = cached
+        if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
+            return payload
+
+    url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
+    try:
+        req = urllib.request.Request(
+            url,
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = json.loads(resp.read().decode())
+        if not isinstance(data, dict):
+            data = {}
+    except Exception:
+        data = {}
+
+    _nous_recommended_cache[base] = (data, now)
+    return data
+
+
+def _resolve_nous_portal_url() -> str:
+    """Best-effort lookup of the Portal base URL the user is authed against."""
+    try:
+        from hermes_cli.auth import (
+            DEFAULT_NOUS_PORTAL_URL,
+            get_provider_auth_state,
+        )
+        state = get_provider_auth_state("nous") or {}
+        portal = str(state.get("portal_base_url") or "").strip()
+        if portal:
+            return portal.rstrip("/")
+        return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+    except Exception:
+        return "https://portal.nousresearch.com"
+
+
+def _extract_model_name(entry: Any) -> Optional[str]:
+    """Pull the ``modelName`` field from a recommended-model entry, else None."""
+    if not isinstance(entry, dict):
+        return None
+    model_name = entry.get("modelName")
+    if isinstance(model_name, str) and model_name.strip():
+        return model_name.strip()
+    return None
+
+
+def get_nous_recommended_aux_model(
+    *,
+    vision: bool = False,
+    free_tier: Optional[bool] = None,
+    portal_base_url: str = "",
+    force_refresh: bool = False,
+) -> Optional[str]:
+    """Return the Portal's recommended model name for an auxiliary task.
+
+    Picks the best field from the Portal's recommended-models payload:
+
+    * ``vision=True``  → ``paidRecommendedVisionModel``  (paid tier) or
+                         ``freeRecommendedVisionModel``  (free tier)
+    * ``vision=False`` → ``paidRecommendedCompactionModel`` or
+                         ``freeRecommendedCompactionModel``
+
+    When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
+    via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
+    detection — useful for tests or when the caller already knows the tier.
+
+    For paid-tier users we prefer the paid recommendation but gracefully fall
+    back to the free recommendation if the Portal returned ``null`` for the
+    paid field (common during the staged rollout of new paid models).
+
+    Returns ``None`` when every candidate is missing, null, or the fetch
+    fails — callers should fall back to their own default (currently
+    ``google/gemini-3-flash-preview``).
+    """
+    base = portal_base_url or _resolve_nous_portal_url()
+    payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
+    if not payload:
+        return None
+
+    if free_tier is None:
+        try:
+            free_tier = check_nous_free_tier()
+        except Exception:
+            # On any detection error, assume paid — paid users see both fields
+            # anyway so this is a safe default that maximises model quality.
+            free_tier = False
+
+    if vision:
+        paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
+    else:
+        paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
+
+    # Preference order:
+    #   free tier  → free only
+    #   paid tier  → paid, then free (if paid field is null)
+    candidates = [free_key] if free_tier else [paid_key, free_key]
+    for key in candidates:
+        name = _extract_model_name(payload.get(key))
+        if name:
+            return name
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Canonical provider list — single source of truth for provider identity.
 # Every code path that lists, displays, or iterates providers derives from
@@ -584,6 +705,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("zai",            "Z.AI / GLM",               "Z.AI / GLM (Zhipu AI direct API)"),
     ProviderEntry("kimi-coding",    "Kimi / Kimi Coding Plan",  "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
     ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
+    ProviderEntry("stepfun",        "StepFun Step Plan",       "StepFun Step Plan (agent/coding models via Step Plan API)"),
     ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
     ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
     ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
@@ -618,6 +740,8 @@ _PROVIDER_ALIASES = {
     "moonshot": "kimi-coding",
     "kimi-cn": "kimi-coding-cn",
     "moonshot-cn": "kimi-coding-cn",
+    "step": "stepfun",
+    "stepfun-coding-plan": "stepfun",
     "arcee-ai": "arcee",
     "arceeai": "arcee",
     "minimax-china": "minimax-cn",
@@ -687,6 +811,31 @@ def _openrouter_model_is_free(pricing: Any) -> bool:
         return False
 
 
+def _openrouter_model_supports_tools(item: Any) -> bool:
+    """Return True when the model's ``supported_parameters`` advertise tool calling.
+
+    hermes-agent is tool-calling-first — every provider path assumes the model
+    can invoke tools. Models that don't advertise ``tools`` in their
+    ``supported_parameters`` (e.g. image-only or completion-only models) cannot
+    be driven by the agent loop and would fail at the first tool call.
+
+    **Permissive when the field is missing.** Some OpenRouter-compatible gateways
+    (Nous Portal, private mirrors, older catalog snapshots) don't populate
+    ``supported_parameters`` at all. Treat that as "unknown capability → allow"
+    so the picker doesn't silently empty for those users. Only hide models
+    whose ``supported_parameters`` is an explicit list that omits ``tools``.
+
+    Ported from Kilo-Org/kilocode#9068.
+    """
+    if not isinstance(item, dict):
+        return True
+    params = item.get("supported_parameters")
+    if not isinstance(params, list):
+        # Field absent / malformed / None — be permissive.
+        return True
+    return "tools" in params
+
+
 def fetch_openrouter_models(
     timeout: float = 8.0,
     *,
@@ -729,6 +878,11 @@ def fetch_openrouter_models(
         live_item = live_by_id.get(preferred_id)
         if live_item is None:
             continue
+        # Hide models that don't advertise tool-calling support — hermes-agent
+        # requires it and surfacing them leads to immediate runtime failures
+        # when the user selects them. Ported from Kilo-Org/kilocode#9068.
+        if not _openrouter_model_supports_tools(live_item):
+            continue
         desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
         curated.append((preferred_id, desc))
 
@@ -1259,7 +1413,6 @@ def detect_provider_for_model(
             from hermes_cli.auth import PROVIDER_REGISTRY
             pconfig = PROVIDER_REGISTRY.get(direct_match)
             if pconfig:
-                import os
                 for env_var in pconfig.api_key_env_vars:
                     if os.getenv(env_var, "").strip():
                         has_creds = True
@@ -1436,11 +1589,84 @@ def _resolve_copilot_catalog_api_key() -> str:
         return ""
 
 
+# Providers where models.dev is treated as authoritative: curated static
+# lists are kept only as an offline fallback and to capture custom additions
+# the registry doesn't publish yet. Adding a provider here causes its
+# curated list to be merged with fresh models.dev entries (fresh first, any
+# curated-only names appended) for both the CLI and the gateway /model picker.
+#
+# DELIBERATELY EXCLUDED:
+#   - "openrouter": curated list is already a hand-picked agentic subset of
+#     OpenRouter's 400+ catalog. Blindly merging would dump everything.
+#   - "nous": curated list and Portal /models endpoint are the source of
+#     truth for the subscription tier.
+# Also excluded: providers that already have dedicated live-endpoint
+# branches below (copilot, anthropic, ai-gateway, ollama-cloud, custom,
+# stepfun, openai-codex) — those paths handle freshness themselves.
+_MODELS_DEV_PREFERRED: frozenset[str] = frozenset({
+    "opencode-go",
+    "opencode-zen",
+    "deepseek",
+    "kilocode",
+    "fireworks",
+    "mistral",
+    "togetherai",
+    "cohere",
+    "perplexity",
+    "groq",
+    "nvidia",
+    "huggingface",
+    "zai",
+    "gemini",
+    "google",
+})
+
+
+def _merge_with_models_dev(provider: str, curated: list[str]) -> list[str]:
+    """Merge curated list with fresh models.dev entries for a preferred provider.
+
+    Returns models.dev entries first (in models.dev order), then any
+    curated-only entries appended. Preserves case for curated fallbacks
+    (e.g. ``MiniMax-M2.7``) while trusting models.dev for newer variants.
+
+    If models.dev is unreachable or returns nothing, the curated list is
+    returned unchanged — this is the offline/CI fallback path.
+    """
+    try:
+        from agent.models_dev import list_agentic_models
+        mdev = list_agentic_models(provider)
+    except Exception:
+        mdev = []
+
+    if not mdev:
+        return list(curated)
+
+    # Case-insensitive dedup while preserving order and curated casing.
+    seen_lower: set[str] = set()
+    merged: list[str] = []
+    for mid in mdev:
+        key = str(mid).lower()
+        if key in seen_lower:
+            continue
+        seen_lower.add(key)
+        merged.append(mid)
+    for mid in curated:
+        key = str(mid).lower()
+        if key in seen_lower:
+            continue
+        seen_lower.add(key)
+        merged.append(mid)
+    return merged
+
+
 def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) -> list[str]:
     """Return the best known model catalog for a provider.
 
     Tries live API endpoints for providers that support them (Codex, Nous),
-    falling back to static lists.
+    falling back to static lists. For providers in ``_MODELS_DEV_PREFERRED``
+    (opencode-go/zen, xiaomi, deepseek, smaller inference providers, etc.),
+    models.dev entries are merged on top of curated so new models released
+    on the platform appear in ``/model`` without a Hermes release.
     """
     normalized = normalize_provider(provider)
     if normalized == "openrouter":
@@ -1469,6 +1695,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                     return live
         except Exception:
             pass
+    if normalized == "stepfun":
+        try:
+            from hermes_cli.auth import resolve_api_key_provider_credentials
+
+            creds = resolve_api_key_provider_credentials("stepfun")
+            api_key = str(creds.get("api_key") or "").strip()
+            base_url = str(creds.get("base_url") or "").strip()
+            if api_key and base_url:
+                live = fetch_api_models(api_key, base_url)
+                if live:
+                    return live
+        except Exception:
+            pass
     if normalized == "anthropic":
         live = _fetch_anthropic_models()
         if live:
@@ -1493,7 +1732,10 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
             live = fetch_api_models(api_key, base_url)
             if live:
                 return live
-    return list(_PROVIDER_MODELS.get(normalized, []))
+    curated_static = list(_PROVIDER_MODELS.get(normalized, []))
+    if normalized in _MODELS_DEV_PREFERRED:
+        return _merge_with_models_dev(normalized, curated_static)
+    return curated_static
 
 
 def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
@@ -2396,13 +2638,70 @@ def validate_requested_model(
         except Exception:
             pass  # Fall through to generic warning
 
+    # Static-catalog fallback: when the /models probe was unreachable,
+    # validate against the curated list from provider_model_ids() — same
+    # pattern as the openai-codex and minimax branches above.  This fixes
+    # /model switches in the gateway for providers like opencode-go and
+    # opencode-zen whose /models endpoint returns 404 against the HTML
+    # marketing site.  Without this block, validate_requested_model would
+    # reject every model on such providers, switch_model() would return
+    # success=False, and the gateway would never write to
+    # _session_model_overrides.
     provider_label = _PROVIDER_LABELS.get(normalized, normalized)
+    try:
+        catalog_models = provider_model_ids(normalized)
+    except Exception:
+        catalog_models = []
+
+    if catalog_models:
+        catalog_lower = {m.lower(): m for m in catalog_models}
+        if requested_for_lookup.lower() in catalog_lower:
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": True,
+                "message": None,
+            }
+        catalog_lower_list = list(catalog_lower.keys())
+        auto = get_close_matches(
+            requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9
+        )
+        if auto:
+            corrected = catalog_lower[auto[0]]
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": True,
+                "corrected_model": corrected,
+                "message": f"Auto-corrected `{requested}` → `{corrected}`",
+            }
+        suggestions = get_close_matches(
+            requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5
+        )
+        suggestion_text = ""
+        if suggestions:
+            suggestion_text = "\n  Similar models: " + ", ".join(
+                f"`{catalog_lower[s]}`" for s in suggestions
+            )
+        return {
+            "accepted": True,
+            "persist": True,
+            "recognized": False,
+            "message": (
+                f"Note: `{requested}` was not found in the {provider_label} curated catalog "
+                f"and the /models endpoint was unreachable.{suggestion_text}"
+                f"\n  The model may still work if it exists on the provider."
+            ),
+        }
+
+    # No catalog available — accept with a warning, matching the comment's
+    # stated intent ("Accept and persist, but warn").
     return {
-        "accepted": False,
-        "persist": False,
+        "accepted": True,
+        "persist": True,
         "recognized": False,
         "message": (
-            f"Could not reach the {provider_label} API to validate `{requested}`. "
+            f"Note: could not reach the {provider_label} API to validate `{requested}`. "
             f"If the service isn't down, this model may not be valid."
         ),
     }
diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py
index 691126a4c..78181aab2 100644
--- a/hermes_cli/nous_subscription.py
+++ b/hermes_cli/nous_subscription.py
@@ -10,6 +10,7 @@ from hermes_cli.auth import get_nous_auth_status
 from hermes_cli.config import get_env_value, load_config
 from tools.managed_tool_gateway import is_managed_tool_gateway_ready
 from tools.tool_backend_helpers import (
+    fal_key_is_configured,
     has_direct_modal_credentials,
     managed_nous_tools_enabled,
     normalize_browser_cloud_provider,
@@ -271,7 +272,7 @@ def get_nous_subscription_features(
     direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
     direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
     direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
-    direct_fal = bool(get_env_value("FAL_KEY"))
+    direct_fal = fal_key_is_configured()
     direct_openai_tts = bool(resolve_openai_audio_api_key())
     direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
     direct_camofox = bool(get_env_value("CAMOFOX_URL"))
@@ -520,7 +521,7 @@ def apply_nous_managed_defaults(
         browser_cfg["cloud_provider"] = "browser-use"
         changed.add("browser")
 
-    if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
+    if "image_gen" in selected_toolsets and not fal_key_is_configured():
         changed.add("image_gen")
 
     return changed
@@ -548,7 +549,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
             or get_env_value("TAVILY_API_KEY")
             or get_env_value("EXA_API_KEY")
         ),
-        "image_gen": bool(get_env_value("FAL_KEY")),
+        "image_gen": fal_key_is_configured(),
         "tts": bool(
             resolve_openai_audio_api_key()
             or get_env_value("ELEVENLABS_API_KEY")
@@ -586,7 +587,6 @@ def get_gateway_eligible_tools(
         return [], [], []
 
     if config is None:
-        from hermes_cli.config import load_config
         config = load_config() or {}
 
     # Quick provider check without the heavy get_nous_subscription_features call
diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py
index a593782e6..2dc1b50ea 100644
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@@ -133,6 +133,9 @@ def _get_enabled_plugins() -> Optional[set]:
 # Data classes
 # ---------------------------------------------------------------------------
 
+_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive"}
+
+
 @dataclass
 class PluginManifest:
     """Parsed representation of a plugin.yaml manifest."""
@@ -146,6 +149,23 @@ class PluginManifest:
     provides_hooks: List[str] = field(default_factory=list)
     source: str = ""        # "user", "project", or "entrypoint"
     path: Optional[str] = None
+    # Plugin kind — see plugins.py module docstring for semantics.
+    # ``standalone`` (default): hooks/tools of its own; opt-in via
+    #                           ``plugins.enabled``.
+    # ``backend``: pluggable backend for an existing core tool (e.g.
+    #              image_gen). Built-in (bundled) backends auto-load;
+    #              user-installed still gated by ``plugins.enabled``.
+    # ``exclusive``: category with exactly one active provider (memory).
+    #              Selection via ``<category>.provider`` config key; the
+    #              category's own discovery system handles loading and the
+    #              general scanner skips these.
+    kind: str = "standalone"
+    # Registry key — path-derived, used by ``plugins.enabled``/``disabled``
+    # lookups and by ``hermes plugins list``. For a flat plugin at
+    # ``plugins/disk-cleanup/`` the key is ``disk-cleanup``; for a nested
+    # category plugin at ``plugins/image_gen/openai/`` the key is
+    # ``image_gen/openai``. When empty, falls back to ``name``.
+    key: str = ""
 
 
 @dataclass
@@ -263,6 +283,7 @@ class PluginContext:
         name: str,
         handler: Callable,
         description: str = "",
+        args_hint: str = "",
     ) -> None:
         """Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.
 
@@ -273,6 +294,13 @@ class PluginContext:
         terminal commands), this registers in-session slash commands that users
         invoke during a conversation.
 
+        ``args_hint`` is an optional short string (e.g. ``"<file>"`` or
+        ``"dias:7 formato:json"``) used by gateway adapters to surface the
+        command with an argument field — for example Discord's native slash
+        command picker. Plugin commands without ``args_hint`` register as
+        parameterless in Discord and still accept trailing text when invoked
+        as free-form chat.
+
         Names conflicting with built-in commands are rejected with a warning.
         """
         clean = name.lower().strip().lstrip("/").replace(" ", "-")
@@ -300,6 +328,7 @@ class PluginContext:
             "handler": handler,
             "description": description or "Plugin command",
             "plugin": self.manifest.name,
+            "args_hint": (args_hint or "").strip(),
         }
         logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)
 
@@ -366,6 +395,33 @@ class PluginContext:
             self.manifest.name, engine.name,
         )
 
+    # -- image gen provider registration ------------------------------------
+
+    def register_image_gen_provider(self, provider) -> None:
+        """Register an image generation backend.
+
+        ``provider`` must be an instance of
+        :class:`agent.image_gen_provider.ImageGenProvider`. The
+        ``provider.name`` attribute is what ``image_gen.provider`` in
+        ``config.yaml`` matches against when routing ``image_generate``
+        tool calls.
+        """
+        from agent.image_gen_provider import ImageGenProvider
+        from agent.image_gen_registry import register_provider
+
+        if not isinstance(provider, ImageGenProvider):
+            logger.warning(
+                "Plugin '%s' tried to register an image_gen provider that does "
+                "not inherit from ImageGenProvider. Ignoring.",
+                self.manifest.name,
+            )
+            return
+        register_provider(provider)
+        logger.info(
+            "Plugin '%s' registered image_gen provider: %s",
+            self.manifest.name, provider.name,
+        )
+
     # -- hook registration --------------------------------------------------
 
     def register_hook(self, hook_name: str, callback: Callable) -> None:
@@ -465,11 +521,16 @@ class PluginManager:
         manifests: List[PluginManifest] = []
 
         # 1. Bundled plugins (<repo>/plugins/<name>/)
-        # Repo-shipped generic plugins live next to hermes_cli/.  Memory and
-        # context_engine subdirs are handled by their own discovery paths, so
-        # skip those names here.  Bundled plugins are discovered (so they
-        # show up in `hermes plugins`) but only loaded when added to
-        # `plugins.enabled` in config.yaml — opt-in like any other plugin.
+        #
+        # Repo-shipped plugins live next to hermes_cli/. Two layouts are
+        # supported (see ``_scan_directory`` for details):
+        #
+        #   - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone)
+        #   - category: ``plugins/image_gen/openai/plugin.yaml`` (backend)
+        #
+        # ``memory/`` and ``context_engine/`` are skipped at the top level —
+        # they have their own discovery systems. Porting those to the
+        # category-namespace ``kind: exclusive`` model is a future PR.
         repo_plugins = Path(__file__).resolve().parent.parent / "plugins"
         manifests.extend(
             self._scan_directory(
@@ -492,36 +553,69 @@ class PluginManager:
         manifests.extend(self._scan_entry_points())
 
         # Load each manifest (skip user-disabled plugins).
-        # Later sources override earlier ones on name collision — user plugins
-        # take precedence over bundled, project plugins take precedence over
-        # user.  Dedup here so we only load the final winner.
+        # Later sources override earlier ones on key collision — user
+        # plugins take precedence over bundled, project plugins take
+        # precedence over user. Dedup here so we only load the final
+        # winner. Keys are path-derived (``image_gen/openai``,
+        # ``disk-cleanup``) so ``tts/openai`` and ``image_gen/openai``
+        # don't collide even when both manifests say ``name: openai``.
         disabled = _get_disabled_plugins()
         enabled = _get_enabled_plugins()  # None = opt-in default (nothing enabled)
         winners: Dict[str, PluginManifest] = {}
         for manifest in manifests:
-            winners[manifest.name] = manifest
+            winners[manifest.key or manifest.name] = manifest
         for manifest in winners.values():
-            # Explicit disable always wins.
-            if manifest.name in disabled:
+            lookup_key = manifest.key or manifest.name
+
+            # Explicit disable always wins (matches on key or on legacy
+            # bare name for back-compat with existing user configs).
+            if lookup_key in disabled or manifest.name in disabled:
                 loaded = LoadedPlugin(manifest=manifest, enabled=False)
                 loaded.error = "disabled via config"
-                self._plugins[manifest.name] = loaded
-                logger.debug("Skipping disabled plugin '%s'", manifest.name)
+                self._plugins[lookup_key] = loaded
+                logger.debug("Skipping disabled plugin '%s'", lookup_key)
                 continue
-            # Opt-in gate: plugins must be in the enabled allow-list.
-            # If the allow-list is missing (None), treat as "nothing enabled"
-            # — users have to explicitly enable plugins to load them.
-            # Memory and context_engine providers are excluded from this gate
-            # since they have their own single-select config (memory.provider
-            # / context.engine), not the enabled list.
-            if enabled is None or manifest.name not in enabled:
+
+            # Exclusive plugins (memory providers) have their own
+            # discovery/activation path. The general loader records the
+            # manifest for introspection but does not load the module.
+            if manifest.kind == "exclusive":
                 loaded = LoadedPlugin(manifest=manifest, enabled=False)
-                loaded.error = "not enabled in config (run `hermes plugins enable {}` to activate)".format(
-                    manifest.name
+                loaded.error = (
+                    "exclusive plugin — activate via <category>.provider config"
                 )
-                self._plugins[manifest.name] = loaded
+                self._plugins[lookup_key] = loaded
                 logger.debug(
-                    "Skipping '%s' (not in plugins.enabled)", manifest.name
+                    "Skipping '%s' (exclusive, handled by category discovery)",
+                    lookup_key,
+                )
+                continue
+
+            # Built-in backends auto-load — they ship with hermes and must
+            # just work. Selection among them (e.g. which image_gen backend
+            # services calls) is driven by ``<category>.provider`` config,
+            # enforced by the tool wrapper.
+            if manifest.kind == "backend" and manifest.source == "bundled":
+                self._load_plugin(manifest)
+                continue
+
+            # Everything else (standalone, user-installed backends,
+            # entry-point plugins) is opt-in via plugins.enabled.
+            # Accept both the path-derived key and the legacy bare name
+            # so existing configs keep working.
+            is_enabled = (
+                enabled is not None
+                and (lookup_key in enabled or manifest.name in enabled)
+            )
+            if not is_enabled:
+                loaded = LoadedPlugin(manifest=manifest, enabled=False)
+                loaded.error = (
+                    "not enabled in config (run `hermes plugins enable {}` to activate)"
+                    .format(lookup_key)
+                )
+                self._plugins[lookup_key] = loaded
+                logger.debug(
+                    "Skipping '%s' (not in plugins.enabled)", lookup_key
                 )
                 continue
             self._load_plugin(manifest)
@@ -545,9 +639,37 @@ class PluginManager:
     ) -> List[PluginManifest]:
         """Read ``plugin.yaml`` manifests from subdirectories of *path*.
 
-        *skip_names* is an optional allow-list of names to ignore (used
-        for the bundled scan to exclude ``memory`` / ``context_engine``
-        subdirs that have their own discovery path).
+        Supports two layouts, mixed freely:
+
+        * **Flat** — ``<root>/<plugin-name>/plugin.yaml``. Key is
+          ``<plugin-name>`` (e.g. ``disk-cleanup``).
+        * **Category** — ``<root>/<category>/<plugin-name>/plugin.yaml``,
+          where the ``<category>`` directory itself has no ``plugin.yaml``.
+          Key is ``<category>/<plugin-name>`` (e.g. ``image_gen/openai``).
+          Depth is capped at two segments.
+
+        *skip_names* is an optional allow-list of names to ignore at the
+        top level (kept for back-compat; the current call sites no longer
+        pass it now that categories are first-class).
+        """
+        return self._scan_directory_level(
+            path, source, skip_names=skip_names, prefix="", depth=0
+        )
+
+    def _scan_directory_level(
+        self,
+        path: Path,
+        source: str,
+        *,
+        skip_names: Optional[Set[str]],
+        prefix: str,
+        depth: int,
+    ) -> List[PluginManifest]:
+        """Recursive implementation of :meth:`_scan_directory`.
+
+        ``prefix`` is the category path already accumulated ("" at root,
+        "image_gen" one level in). ``depth`` is the recursion depth; we
+        cap at 2 so ``<root>/a/b/c/`` is ignored.
         """
         manifests: List[PluginManifest] = []
         if not path.is_dir():
@@ -556,37 +678,112 @@ class PluginManager:
         for child in sorted(path.iterdir()):
             if not child.is_dir():
                 continue
-            if skip_names and child.name in skip_names:
+            if depth == 0 and skip_names and child.name in skip_names:
                 continue
             manifest_file = child / "plugin.yaml"
             if not manifest_file.exists():
                 manifest_file = child / "plugin.yml"
-            if not manifest_file.exists():
-                logger.debug("Skipping %s (no plugin.yaml)", child)
+
+            if manifest_file.exists():
+                manifest = self._parse_manifest(
+                    manifest_file, child, source, prefix
+                )
+                if manifest is not None:
+                    manifests.append(manifest)
                 continue
 
-            try:
-                if yaml is None:
-                    logger.warning("PyYAML not installed – cannot load %s", manifest_file)
-                    continue
-                data = yaml.safe_load(manifest_file.read_text()) or {}
-                manifest = PluginManifest(
-                    name=data.get("name", child.name),
-                    version=str(data.get("version", "")),
-                    description=data.get("description", ""),
-                    author=data.get("author", ""),
-                    requires_env=data.get("requires_env", []),
-                    provides_tools=data.get("provides_tools", []),
-                    provides_hooks=data.get("provides_hooks", []),
-                    source=source,
-                    path=str(child),
+            # No manifest at this level. If we're still within the depth
+            # cap, treat this directory as a category namespace and recurse
+            # one level in looking for children with manifests.
+            if depth >= 1:
+                logger.debug("Skipping %s (no plugin.yaml, depth cap reached)", child)
+                continue
+
+            sub_prefix = f"{prefix}/{child.name}" if prefix else child.name
+            manifests.extend(
+                self._scan_directory_level(
+                    child,
+                    source,
+                    skip_names=None,
+                    prefix=sub_prefix,
+                    depth=depth + 1,
                 )
-                manifests.append(manifest)
-            except Exception as exc:
-                logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            )
 
         return manifests
 
+    def _parse_manifest(
+        self,
+        manifest_file: Path,
+        plugin_dir: Path,
+        source: str,
+        prefix: str,
+    ) -> Optional[PluginManifest]:
+        """Parse a single ``plugin.yaml`` into a :class:`PluginManifest`.
+
+        Returns ``None`` on parse failure (logs a warning).
+        """
+        try:
+            if yaml is None:
+                logger.warning("PyYAML not installed – cannot load %s", manifest_file)
+                return None
+            data = yaml.safe_load(manifest_file.read_text()) or {}
+
+            name = data.get("name", plugin_dir.name)
+            key = f"{prefix}/{plugin_dir.name}" if prefix else name
+
+            raw_kind = data.get("kind", "standalone")
+            if not isinstance(raw_kind, str):
+                raw_kind = "standalone"
+            kind = raw_kind.strip().lower()
+            if kind not in _VALID_PLUGIN_KINDS:
+                logger.warning(
+                    "Plugin %s: unknown kind '%s' (valid: %s); treating as 'standalone'",
+                    key, raw_kind, ", ".join(sorted(_VALID_PLUGIN_KINDS)),
+                )
+                kind = "standalone"
+
+            # Auto-coerce user-installed memory providers to kind="exclusive"
+            # so they're routed to plugins/memory discovery instead of being
+            # loaded by the general PluginManager (which has no
+            # register_memory_provider on PluginContext). Mirrors the
+            # heuristic in plugins/memory/__init__.py:_is_memory_provider_dir.
+            # Bundled memory providers are already skipped via skip_names.
+            if kind == "standalone" and "kind" not in data:
+                init_file = plugin_dir / "__init__.py"
+                if init_file.exists():
+                    try:
+                        source_text = init_file.read_text(errors="replace")[:8192]
+                        if (
+                            "register_memory_provider" in source_text
+                            or "MemoryProvider" in source_text
+                        ):
+                            kind = "exclusive"
+                            logger.debug(
+                                "Plugin %s: detected memory provider, "
+                                "treating as kind='exclusive'",
+                                key,
+                            )
+                    except Exception:
+                        pass
+
+            return PluginManifest(
+                name=name,
+                version=str(data.get("version", "")),
+                description=data.get("description", ""),
+                author=data.get("author", ""),
+                requires_env=data.get("requires_env", []),
+                provides_tools=data.get("provides_tools", []),
+                provides_hooks=data.get("provides_hooks", []),
+                source=source,
+                path=str(plugin_dir),
+                kind=kind,
+                key=key,
+            )
+        except Exception as exc:
+            logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            return None
+
     # -----------------------------------------------------------------------
     # Entry-point scanning
     # -----------------------------------------------------------------------
@@ -609,6 +806,7 @@ class PluginManager:
                     name=ep.name,
                     source="entrypoint",
                     path=ep.value,
+                    key=ep.name,
                 )
                 manifests.append(manifest)
         except Exception as exc:
@@ -670,10 +868,16 @@ class PluginManager:
             loaded.error = str(exc)
             logger.warning("Failed to load plugin '%s': %s", manifest.name, exc)
 
-        self._plugins[manifest.name] = loaded
+        self._plugins[manifest.key or manifest.name] = loaded
 
     def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType:
-        """Import a directory-based plugin as ``hermes_plugins.<name>``."""
+        """Import a directory-based plugin as ``hermes_plugins.<slug>``.
+
+        The module slug is derived from ``manifest.key`` so category-namespaced
+        plugins (``image_gen/openai``) import as
+        ``hermes_plugins.image_gen__openai`` without colliding with any
+        future ``tts/openai``.
+        """
         plugin_dir = Path(manifest.path)  # type: ignore[arg-type]
         init_file = plugin_dir / "__init__.py"
         if not init_file.exists():
@@ -686,7 +890,9 @@ class PluginManager:
             ns_pkg.__package__ = _NS_PARENT
             sys.modules[_NS_PARENT] = ns_pkg
 
-        module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}"
+        key = manifest.key or manifest.name
+        slug = key.replace("/", "__").replace("-", "_")
+        module_name = f"{_NS_PARENT}.{slug}"
         spec = importlib.util.spec_from_file_location(
             module_name,
             init_file,
@@ -767,10 +973,12 @@ class PluginManager:
     def list_plugins(self) -> List[Dict[str, Any]]:
         """Return a list of info dicts for all discovered plugins."""
         result: List[Dict[str, Any]] = []
-        for name, loaded in sorted(self._plugins.items()):
+        for key, loaded in sorted(self._plugins.items()):
             result.append(
                 {
-                    "name": name,
+                    "name": loaded.manifest.name,
+                    "key": loaded.manifest.key or loaded.manifest.name,
+                    "kind": loaded.manifest.kind,
                     "version": loaded.manifest.version,
                     "description": loaded.manifest.description,
                     "source": loaded.manifest.source,
diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py
index 1764474aa..e842086a4 100644
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@@ -94,6 +94,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
         transport="openai_chat",
         base_url_env_var="KIMI_BASE_URL",
     ),
+    "stepfun": HermesOverlay(
+        transport="openai_chat",
+        extra_env_vars=("STEPFUN_API_KEY",),
+        base_url_override="https://api.stepfun.ai/step_plan/v1",
+        base_url_env_var="STEPFUN_BASE_URL",
+    ),
     "minimax": HermesOverlay(
         transport="anthropic_messages",
         base_url_env_var="MINIMAX_BASE_URL",
@@ -210,6 +216,10 @@ ALIASES: Dict[str, str] = {
     "kimi-coding-cn": "kimi-for-coding",
     "moonshot": "kimi-for-coding",
 
+    # stepfun
+    "step": "stepfun",
+    "stepfun-coding-plan": "stepfun",
+
     # minimax-cn
     "minimax-china": "minimax-cn",
     "minimax_cn": "minimax-cn",
@@ -294,6 +304,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
     "nous": "Nous Portal",
     "openai-codex": "OpenAI Codex",
     "copilot-acp": "GitHub Copilot ACP",
+    "stepfun": "StepFun Step Plan",
     "xiaomi": "Xiaomi MiMo",
     "local": "Local endpoint",
     "bedrock": "AWS Bedrock",
@@ -427,6 +438,16 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
     """
     pdef = get_provider(provider)
     if pdef is not None:
+        # Even for known providers, check URL heuristics for special endpoints
+        # (e.g. kimi /coding endpoint needs anthropic_messages even on 'custom')
+        if base_url:
+            url_lower = base_url.rstrip("/").lower()
+            if "api.kimi.com/coding" in url_lower:
+                return "anthropic_messages"
+            if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
+                return "anthropic_messages"
+            if "api.openai.com" in url_lower:
+                return "codex_responses"
         return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")
 
     # Direct provider checks for providers not in HERMES_OVERLAYS
@@ -439,6 +460,8 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
         hostname = base_url_hostname(base_url)
         if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com":
             return "anthropic_messages"
+        if hostname == "api.kimi.com" and "/coding" in url_lower:
+            return "anthropic_messages"
         if hostname == "api.openai.com":
             return "codex_responses"
         if hostname.startswith("bedrock-runtime.") and base_url_host_matches(base_url, "amazonaws.com"):
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 3b2b4cab3..922946e2a 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -46,6 +46,9 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
       protocol under a ``/anthropic`` suffix — treat those as
       ``anthropic_messages`` transport instead of the default
       ``chat_completions``.
+    - Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the
+      Anthropic Messages protocol (the /coding route accepts Claude
+      Code's native request shape).
     """
     normalized = (base_url or "").strip().lower().rstrip("/")
     hostname = base_url_hostname(base_url)
@@ -55,6 +58,8 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
         return "codex_responses"
     if normalized.endswith("/anthropic"):
         return "anthropic_messages"
+    if hostname == "api.kimi.com" and "/coding" in normalized:
+        return "anthropic_messages"
     return None
 
 
@@ -205,7 +210,8 @@ def _resolve_runtime_from_pool_entry(
             api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
         else:
             # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
-            # api.openai.com → codex_responses, api.x.ai → codex_responses).
+            # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
+            # codex_responses).
             detected = _detect_api_mode_for_url(base_url)
             if detected:
                 api_mode = detected
@@ -492,8 +498,12 @@ def _resolve_openrouter_runtime(
     else:
         # Custom endpoint: use api_key from config when using config base_url (#1760).
         # When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's
-        # the canonical env var for ollama.com authentication.
-        _is_ollama_url = "ollama.com" in base_url.lower()
+        # the canonical env var for ollama.com authentication. Match on
+        # HOST, not substring — a custom base_url whose path contains
+        # "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
+        # hostname is a look-alike (ollama.com.attacker.test) must not
+        # receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
+        _is_ollama_url = base_url_host_matches(base_url, "ollama.com")
         api_key_candidates = [
             explicit_api_key,
             (cfg_api_key if use_config_base_url else ""),
@@ -656,7 +666,8 @@ def _resolve_explicit_runtime(
             if configured_mode:
                 api_mode = configured_mode
             else:
-                # Auto-detect Anthropic-compatible endpoints (/anthropic suffix).
+                # Auto-detect from URL (Anthropic /anthropic suffix,
+                # api.openai.com → Responses, Kimi /coding, etc.).
                 detected = _detect_api_mode_for_url(base_url)
                 if detected:
                     api_mode = detected
@@ -906,8 +917,7 @@ def resolve_runtime_provider(
                 code="no_aws_credentials",
             )
         # Read bedrock-specific config from config.yaml
-        from hermes_cli.config import load_config as _load_bedrock_config
-        _bedrock_cfg = _load_bedrock_config().get("bedrock", {})
+        _bedrock_cfg = load_config().get("bedrock", {})
         # Region priority: config.yaml bedrock.region → env var → us-east-1
         region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
         auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 53b0c180a..1fe5ae058 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -96,13 +96,14 @@ _DEFAULT_PROVIDER_MODELS = {
     "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
     "kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
     "kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "stepfun": ["step-3.5-flash", "step-3.5-flash-2603"],
     "arcee": ["trinity-large-thinking", "trinity-large-preview", "trinity-mini"],
     "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
     "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
     "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
     "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
     "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
-    "opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
+    "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
     "huggingface": [
         "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
         "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -408,13 +409,36 @@ def _print_setup_summary(config: dict, hermes_home):
             ("Browser Automation", False, missing_browser_hint)
         )
 
-    # FAL (image generation)
+    # Image generation — FAL (direct or via Nous), or any plugin-registered
+    # provider (OpenAI, etc.)
     if subscription_features.image_gen.managed_by_nous:
         tool_status.append(("Image Generation (Nous subscription)", True, None))
     elif subscription_features.image_gen.available:
         tool_status.append(("Image Generation", True, None))
     else:
-        tool_status.append(("Image Generation", False, "FAL_KEY"))
+        # Fall back to probing plugin-registered providers so OpenAI-only
+        # setups don't show as "missing FAL_KEY".
+        _img_backend = None
+        try:
+            from agent.image_gen_registry import list_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            for _p in list_providers():
+                if _p.name == "fal":
+                    continue
+                try:
+                    if _p.is_available():
+                        _img_backend = _p.display_name
+                        break
+                except Exception:
+                    continue
+        except Exception:
+            pass
+        if _img_backend:
+            tool_status.append((f"Image Generation ({_img_backend})", True, None))
+        else:
+            tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY"))
 
     # TTS — show configured provider
     tts_provider = config.get("tts", {}).get("provider", "edge")
@@ -434,7 +458,6 @@ def _print_setup_summary(config: dict, hermes_home):
         tool_status.append(("Text-to-Speech (Google Gemini)", True, None))
     elif tts_provider == "neutts":
         try:
-            import importlib.util
             neutts_ok = importlib.util.find_spec("neutts") is not None
         except Exception:
             neutts_ok = False
@@ -442,6 +465,16 @@ def _print_setup_summary(config: dict, hermes_home):
             tool_status.append(("Text-to-Speech (NeuTTS local)", True, None))
         else:
             tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'"))
+    elif tts_provider == "kittentts":
+        try:
+            import importlib.util
+            kittentts_ok = importlib.util.find_spec("kittentts") is not None
+        except Exception:
+            kittentts_ok = False
+        if kittentts_ok:
+            tool_status.append(("Text-to-Speech (KittenTTS local)", True, None))
+        else:
+            tool_status.append(("Text-to-Speech (KittenTTS — not installed)", False, "run 'hermes setup tts'"))
     else:
         tool_status.append(("Text-to-Speech (Edge TTS)", True, None))
 
@@ -772,6 +805,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
             "zai": "Z.AI / GLM",
             "kimi-coding": "Kimi / Moonshot",
             "kimi-coding-cn": "Kimi / Moonshot (China)",
+            "stepfun": "StepFun Step Plan",
             "minimax": "MiniMax",
             "minimax-cn": "MiniMax CN",
             "anthropic": "Anthropic",
@@ -849,7 +883,6 @@ def setup_model_provider(config: dict, *, quick: bool = False):
 
 def _check_espeak_ng() -> bool:
     """Check if espeak-ng is installed."""
-    import shutil
     return shutil.which("espeak-ng") is not None or shutil.which("espeak") is not None
 
 
@@ -903,6 +936,31 @@ def _install_neutts_deps() -> bool:
         return False
 
 
+def _install_kittentts_deps() -> bool:
+    """Install KittenTTS dependencies with user approval. Returns True on success."""
+    import subprocess
+    import sys
+
+    wheel_url = (
+        "https://github.com/KittenML/KittenTTS/releases/download/"
+        "0.8.1/kittentts-0.8.1-py3-none-any.whl"
+    )
+    print()
+    print_info("Installing kittentts Python package (~25-80MB model downloaded on first use)...")
+    print()
+    try:
+        subprocess.run(
+            [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
+            check=True, timeout=300,
+        )
+        print_success("kittentts installed successfully")
+        return True
+    except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
+        print_error(f"Failed to install kittentts: {e}")
+        print_info(f"Try manually: python -m pip install -U '{wheel_url}' soundfile")
+        return False
+
+
 def _setup_tts_provider(config: dict):
     """Interactive TTS provider selection with install flow for NeuTTS."""
     tts_config = config.get("tts", {})
@@ -918,6 +976,7 @@ def _setup_tts_provider(config: dict):
         "mistral": "Mistral Voxtral TTS",
         "gemini": "Google Gemini TTS",
         "neutts": "NeuTTS",
+        "kittentts": "KittenTTS",
     }
     current_label = provider_labels.get(current_provider, current_provider)
 
@@ -941,9 +1000,10 @@ def _setup_tts_provider(config: dict):
             "Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
             "Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
             "NeuTTS (local on-device, free, ~300MB model download)",
+            "KittenTTS (local on-device, free, lightweight ~25-80MB ONNX)",
         ]
     )
-    providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"])
+    providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts", "kittentts"])
     choices.append(f"Keep current ({current_label})")
     keep_current_idx = len(choices) - 1
     idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@@ -964,7 +1024,6 @@ def _setup_tts_provider(config: dict):
     if selected == "neutts":
         # Check if already installed
         try:
-            import importlib.util
             already_installed = importlib.util.find_spec("neutts") is not None
         except Exception:
             already_installed = False
@@ -1063,6 +1122,29 @@ def _setup_tts_provider(config: dict):
                 print_warning("No API key provided. Falling back to Edge TTS.")
                 selected = "edge"
 
+    elif selected == "kittentts":
+        # Check if already installed
+        try:
+            import importlib.util
+            already_installed = importlib.util.find_spec("kittentts") is not None
+        except Exception:
+            already_installed = False
+
+        if already_installed:
+            print_success("KittenTTS is already installed")
+        else:
+            print()
+            print_info("KittenTTS is lightweight (~25-80MB, CPU-only, no API key required).")
+            print_info("Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
+            print()
+            if prompt_yes_no("Install KittenTTS now?", True):
+                if not _install_kittentts_deps():
+                    print_warning("KittenTTS installation incomplete. Falling back to Edge TTS.")
+                    selected = "edge"
+            else:
+                print_info("Skipping install. Set tts.provider to 'kittentts' after installing manually.")
+                selected = "edge"
+
     # Save the selection
     if "tts" not in config:
         config["tts"] = {}
@@ -1084,8 +1166,6 @@ def setup_tts(config: dict):
 def setup_terminal_backend(config: dict):
     """Configure the terminal execution backend."""
     import platform as _platform
-    import shutil
-
     print_header("Terminal Backend")
     print_info("Choose where Hermes runs shell commands and code.")
     print_info("This affects tool execution, file access, and isolation.")
diff --git a/hermes_cli/skin_engine.py b/hermes_cli/skin_engine.py
index 4222a966e..5619e7405 100644
--- a/hermes_cli/skin_engine.py
+++ b/hermes_cli/skin_engine.py
@@ -30,6 +30,14 @@ All fields are optional. Missing values inherit from the ``default`` skin.
       prompt: "#FFF8DC"                  # Prompt text color
       input_rule: "#CD7F32"              # Input area horizontal rule
       response_border: "#FFD700"         # Response box border (ANSI)
+      status_bar_bg: "#1a1a2e"           # Status bar background
+      status_bar_text: "#C0C0C0"         # Status bar default text
+      status_bar_strong: "#FFD700"       # Status bar highlighted text
+      status_bar_dim: "#8B8682"          # Status bar separators/muted text
+      status_bar_good: "#8FBC8F"         # Healthy context usage
+      status_bar_warn: "#FFD700"         # Warning context usage
+      status_bar_bad: "#FF8C00"          # High context usage
+      status_bar_critical: "#FF6B6B"     # Critical context usage
       session_label: "#DAA520"           # Session label color
       session_border: "#8B8682"          # Session ID dim color
       status_bar_bg: "#1a1a2e"          # TUI status/usage bar background
@@ -170,6 +178,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
             "prompt": "#FFF8DC",
             "input_rule": "#CD7F32",
             "response_border": "#FFD700",
+            "status_bar_bg": "#1a1a2e",
             "session_label": "#DAA520",
             "session_border": "#8B8682",
         },
@@ -203,6 +212,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
             "prompt": "#F1E6CF",
             "input_rule": "#9F1C1C",
             "response_border": "#C7A96B",
+            "status_bar_bg": "#2A1212",
+            "status_bar_text": "#F1E6CF",
+            "status_bar_strong": "#C7A96B",
+            "status_bar_dim": "#6E584B",
+            "status_bar_good": "#7BC96F",
+            "status_bar_warn": "#C7A96B",
+            "status_bar_bad": "#DD4A3A",
+            "status_bar_critical": "#EF5350",
             "session_label": "#C7A96B",
             "session_border": "#6E584B",
         },
@@ -267,6 +284,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
             "prompt": "#c9d1d9",
             "input_rule": "#444444",
             "response_border": "#aaaaaa",
+            "status_bar_bg": "#1F1F1F",
+            "status_bar_text": "#C9D1D9",
+            "status_bar_strong": "#E6EDF3",
+            "status_bar_dim": "#777777",
+            "status_bar_good": "#B5B5B5",
+            "status_bar_warn": "#AAAAAA",
+            "status_bar_bad": "#D0D0D0",
+            "status_bar_critical": "#F0F0F0",
             "session_label": "#888888",
             "session_border": "#555555",
         },
@@ -298,6 +323,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
             "prompt": "#c9d1d9",
             "input_rule": "#4169e1",
             "response_border": "#7eb8f6",
+            "status_bar_bg": "#151C2F",
+            "status_bar_text": "#C9D1D9",
+            "status_bar_strong": "#7EB8F6",
+            "status_bar_dim": "#4B5563",
+            "status_bar_good": "#63D0A6",
+            "status_bar_warn": "#E6A855",
+            "status_bar_bad": "#F7A072",
+            "status_bar_critical": "#FF7A7A",
             "session_label": "#7eb8f6",
             "session_border": "#4b5563",
         },
@@ -403,6 +436,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
             "prompt": "#EAF7FF",
             "input_rule": "#2A6FB9",
             "response_border": "#5DB8F5",
+            "status_bar_bg": "#0F2440",
+            "status_bar_text": "#EAF7FF",
+            "status_bar_strong": "#A9DFFF",
+            "status_bar_dim": "#496884",
+            "status_bar_good": "#6ED7B0",
+            "status_bar_warn": "#5DB8F5",
+            "status_bar_bad": "#2A6FB9",
+            "status_bar_critical": "#D94F4F",
             "session_label": "#A9DFFF",
             "session_border": "#496884",
         },
@@ -467,6 +508,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
             "prompt": "#F5F5F5",
             "input_rule": "#656565",
             "response_border": "#B7B7B7",
+            "status_bar_bg": "#202020",
+            "status_bar_text": "#D3D3D3",
+            "status_bar_strong": "#F5F5F5",
+            "status_bar_dim": "#656565",
+            "status_bar_good": "#B7B7B7",
+            "status_bar_warn": "#D3D3D3",
+            "status_bar_bad": "#E7E7E7",
+            "status_bar_critical": "#F5F5F5",
             "session_label": "#919191",
             "session_border": "#656565",
         },
@@ -532,6 +581,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
             "prompt": "#FFF0D4",
             "input_rule": "#C75B1D",
             "response_border": "#F29C38",
+            "status_bar_bg": "#2B160E",
+            "status_bar_text": "#FFF0D4",
+            "status_bar_strong": "#FFD39A",
+            "status_bar_dim": "#6C4724",
+            "status_bar_good": "#6BCB77",
+            "status_bar_warn": "#F29C38",
+            "status_bar_bad": "#E2832B",
+            "status_bar_critical": "#EF5350",
             "session_label": "#FFD39A",
             "session_border": "#6C4724",
         },
@@ -770,6 +827,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
     warn = skin.get_color("ui_warn", "#FF8C00")
     error = skin.get_color("ui_error", "#FF6B6B")
     status_bg = skin.get_color("status_bar_bg", "#1a1a2e")
+    status_text = skin.get_color("status_bar_text", text)
+    status_strong = skin.get_color("status_bar_strong", title)
+    status_dim = skin.get_color("status_bar_dim", dim)
+    status_good = skin.get_color("status_bar_good", skin.get_color("ui_ok", "#8FBC8F"))
+    status_warn = skin.get_color("status_bar_warn", warn)
+    status_bad = skin.get_color("status_bar_bad", skin.get_color("banner_accent", warn))
+    status_critical = skin.get_color("status_bar_critical", error)
     voice_bg = skin.get_color("voice_status_bg", status_bg)
     menu_bg = skin.get_color("completion_menu_bg", "#1a1a2e")
     menu_current_bg = skin.get_color("completion_menu_current_bg", "#333355")
@@ -782,13 +846,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
         "prompt": prompt,
         "prompt-working": f"{dim} italic",
         "hint": f"{dim} italic",
-        "status-bar": f"bg:{status_bg} {text}",
-        "status-bar-strong": f"bg:{status_bg} {title} bold",
-        "status-bar-dim": f"bg:{status_bg} {dim}",
-        "status-bar-good": f"bg:{status_bg} {skin.get_color('ui_ok', '#8FBC8F')} bold",
-        "status-bar-warn": f"bg:{status_bg} {warn} bold",
-        "status-bar-bad": f"bg:{status_bg} {skin.get_color('banner_accent', warn)} bold",
-        "status-bar-critical": f"bg:{status_bg} {error} bold",
+        "status-bar": f"bg:{status_bg} {status_text}",
+        "status-bar-strong": f"bg:{status_bg} {status_strong} bold",
+        "status-bar-dim": f"bg:{status_bg} {status_dim}",
+        "status-bar-good": f"bg:{status_bg} {status_good} bold",
+        "status-bar-warn": f"bg:{status_bg} {status_warn} bold",
+        "status-bar-bad": f"bg:{status_bg} {status_bad} bold",
+        "status-bar-critical": f"bg:{status_bg} {status_critical} bold",
         "input-rule": input_rule,
         "image-badge": f"{label} bold",
         "completion-menu": f"bg:{menu_bg} {text}",
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 540afc303..8541f0a05 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -122,6 +122,7 @@ def show_status(args):
         "OpenAI": "OPENAI_API_KEY",
         "Z.AI/GLM": "GLM_API_KEY",
         "Kimi": "KIMI_API_KEY",
+        "StepFun Step Plan": "STEPFUN_API_KEY",
         "MiniMax": "MINIMAX_API_KEY",
         "MiniMax-CN": "MINIMAX_CN_API_KEY",
         "Firecrawl": "FIRECRAWL_API_KEY",
@@ -252,6 +253,7 @@ def show_status(args):
     apikey_providers = {
         "Z.AI / GLM":       ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
         "Kimi / Moonshot":  ("KIMI_API_KEY",),
+        "StepFun Step Plan": ("STEPFUN_API_KEY",),
         "MiniMax":          ("MINIMAX_API_KEY",),
         "MiniMax (China)":  ("MINIMAX_CN_API_KEY",),
     }
diff --git a/hermes_cli/tips.py b/hermes_cli/tips.py
index 71bace524..24acc15f5 100644
--- a/hermes_cli/tips.py
+++ b/hermes_cli/tips.py
@@ -127,7 +127,7 @@ TIPS = [
 
     # --- Tools & Capabilities ---
     "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
-    "delegate_task spawns up to 3 concurrent sub-agents with isolated contexts for parallel work.",
+    "delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
     "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
     "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
     "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 23a03b3bd..7a9a598f9 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -24,7 +24,7 @@ from hermes_cli.nous_subscription import (
     apply_nous_managed_defaults,
     get_nous_subscription_features,
 )
-from tools.tool_backend_helpers import managed_nous_tools_enabled
+from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
 from utils import base_url_hostname
 
 logger = logging.getLogger(__name__)
@@ -182,6 +182,14 @@ TOOL_CATEGORIES = {
                 ],
                 "tts_provider": "gemini",
             },
+            {
+                "name": "KittenTTS",
+                "badge": "local · free",
+                "tag": "Lightweight local ONNX TTS (~25MB), no API key",
+                "env_vars": [],
+                "tts_provider": "kittentts",
+                "post_setup": "kittentts",
+            },
         ],
     },
     "web": {
@@ -423,6 +431,36 @@ def _run_post_setup(post_setup_key: str):
             _print_warning("    Node.js not found. Install Camofox via Docker:")
             _print_info("      docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
 
+    elif post_setup_key == "kittentts":
+        try:
+            __import__("kittentts")
+            _print_success("    kittentts is already installed")
+            return
+        except ImportError:
+            pass
+        import subprocess
+        _print_info("    Installing kittentts (~25-80MB model, CPU-only)...")
+        wheel_url = (
+            "https://github.com/KittenML/KittenTTS/releases/download/"
+            "0.8.1/kittentts-0.8.1-py3-none-any.whl"
+        )
+        try:
+            result = subprocess.run(
+                [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
+                capture_output=True, text=True, timeout=300,
+            )
+            if result.returncode == 0:
+                _print_success("    kittentts installed")
+                _print_info("    Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
+                _print_info("    Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)")
+            else:
+                _print_warning("    kittentts install failed:")
+                _print_info(f"      {result.stderr.strip()[:300]}")
+                _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")
+        except subprocess.TimeoutExpired:
+            _print_warning("    kittentts install timed out (>5min)")
+            _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")
+
     elif post_setup_key == "rl_training":
         try:
             __import__("tinker_atropos")
@@ -809,6 +847,51 @@ def _configure_toolset(ts_key: str, config: dict):
         _configure_simple_requirements(ts_key)
 
 
+def _plugin_image_gen_providers() -> list[dict]:
+    """Build picker-row dicts from plugin-registered image gen providers.
+
+    Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
+    row but carries an ``image_gen_plugin_name`` marker so downstream
+    code (config writing, model picker) knows to route through the
+    plugin registry instead of the in-tree FAL backend.
+
+    FAL is skipped — it's already exposed by the hardcoded
+    ``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
+    a plugin in a follow-up PR, the hardcoded entries go away and this
+    function surfaces it alongside OpenAI automatically.
+    """
+    try:
+        from agent.image_gen_registry import list_providers
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        providers = list_providers()
+    except Exception:
+        return []
+
+    rows: list[dict] = []
+    for provider in providers:
+        if getattr(provider, "name", None) == "fal":
+            # FAL has its own hardcoded rows today.
+            continue
+        try:
+            schema = provider.get_setup_schema()
+        except Exception:
+            continue
+        if not isinstance(schema, dict):
+            continue
+        rows.append(
+            {
+                "name": schema.get("name", provider.display_name),
+                "badge": schema.get("badge", ""),
+                "tag": schema.get("tag", ""),
+                "env_vars": schema.get("env_vars", []),
+                "image_gen_plugin_name": provider.name,
+            }
+        )
+    return rows
+
+
 def _visible_providers(cat: dict, config: dict) -> list[dict]:
     """Return provider entries visible for the current auth/config state."""
     features = get_nous_subscription_features(config)
@@ -819,6 +902,12 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
         if provider.get("requires_nous_auth") and not features.nous_auth_present:
             continue
         visible.append(provider)
+
+    # Inject plugin-registered image_gen backends (OpenAI today, more
+    # later) so the picker lists them alongside FAL / Nous Subscription.
+    if cat.get("name") == "Image Generation":
+        visible.extend(_plugin_image_gen_providers())
+
     return visible
 
 
@@ -838,7 +927,24 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
         browser_cfg = config.get("browser", {})
         return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
     if ts_key == "image_gen":
-        return not get_env_value("FAL_KEY")
+        # Satisfied when the in-tree FAL backend is configured OR any
+        # plugin-registered image gen provider is available.
+        if fal_key_is_configured():
+            return False
+        try:
+            from agent.image_gen_registry import list_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            for provider in list_providers():
+                try:
+                    if provider.is_available():
+                        return False
+                except Exception:
+                    continue
+        except Exception:
+            pass
+        return True
 
     return not _toolset_has_keys(ts_key, config)
 
@@ -1057,6 +1163,88 @@ def _configure_imagegen_model(backend_name: str, config: dict) -> None:
     _print_success(f"  Model set to: {chosen}")
 
 
+def _plugin_image_gen_catalog(plugin_name: str):
+    """Return ``(catalog_dict, default_model_id)`` for a plugin provider.
+
+    ``catalog_dict`` is shaped like the legacy ``FAL_MODELS`` table —
+    ``{model_id: {"display", "speed", "strengths", "price", ...}}`` —
+    so the existing picker code paths work without change. Returns
+    ``({}, None)`` if the provider isn't registered or has no models.
+    """
+    try:
+        from agent.image_gen_registry import get_provider
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        provider = get_provider(plugin_name)
+    except Exception:
+        return {}, None
+    if provider is None:
+        return {}, None
+    try:
+        models = provider.list_models() or []
+        default = provider.default_model()
+    except Exception:
+        return {}, None
+    catalog = {m["id"]: m for m in models if isinstance(m, dict) and "id" in m}
+    return catalog, default
+
+
+def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None:
+    """Prompt the user to pick a model for a plugin-registered backend.
+
+    Writes selection to ``image_gen.model``. Mirrors
+    :func:`_configure_imagegen_model` but sources its catalog from the
+    plugin registry instead of :data:`IMAGEGEN_BACKENDS`.
+    """
+    catalog, default_model = _plugin_image_gen_catalog(plugin_name)
+    if not catalog:
+        return
+
+    cur_cfg = config.setdefault("image_gen", {})
+    if not isinstance(cur_cfg, dict):
+        cur_cfg = {}
+        config["image_gen"] = cur_cfg
+    current_model = cur_cfg.get("model") or default_model
+    if current_model not in catalog:
+        current_model = default_model
+
+    model_ids = list(catalog.keys())
+    ordered = [current_model] + [m for m in model_ids if m != current_model]
+
+    widths = {
+        "model": max(len(m) for m in model_ids),
+        "speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6),
+        "strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0),
+    }
+
+    print()
+    header = (
+        f"  {'Model':<{widths['model']}}  "
+        f"{'Speed':<{widths['speed']}}  "
+        f"{'Strengths':<{widths['strengths']}}  "
+        f"Price"
+    )
+    print(color(header, Colors.CYAN))
+
+    rows = []
+    for mid in ordered:
+        row = _format_imagegen_model_row(mid, catalog[mid], widths)
+        if mid == current_model:
+            row += "  ← currently in use"
+        rows.append(row)
+
+    idx = _prompt_choice(
+        f"  Choose {plugin_name} model:",
+        rows,
+        default=0,
+    )
+
+    chosen = ordered[idx]
+    cur_cfg["model"] = chosen
+    _print_success(f"  Model set to: {chosen}")
+
+
 def _configure_provider(provider: dict, config: dict):
     """Configure a single provider - prompt for API keys and set config."""
     env_vars = provider.get("env_vars", [])
@@ -1113,10 +1301,28 @@ def _configure_provider(provider: dict, config: dict):
         _print_success(f"  {provider['name']} - no configuration needed!")
         if managed_feature:
             _print_info("  Requests for this tool will be billed to your Nous subscription.")
+        # Plugin-registered image_gen provider: write image_gen.provider
+        # and route model selection to the plugin's own catalog.
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
+            return
         # Imagegen backends prompt for model selection after backend pick.
         backend = provider.get("imagegen_backend")
         if backend:
             _configure_imagegen_model(backend, config)
+            # In-tree FAL is the only non-plugin backend today. Keep
+            # image_gen.provider clear so the dispatch shim falls through
+            # to the legacy FAL path.
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+                img_cfg["provider"] = "fal"
         return
 
     # Prompt for each required env var
@@ -1151,10 +1357,23 @@ def _configure_provider(provider: dict, config: dict):
 
     if all_configured:
         _print_success(f"  {provider['name']} configured!")
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
+            return
         # Imagegen backends prompt for model selection after env vars are in.
         backend = provider.get("imagegen_backend")
         if backend:
             _configure_imagegen_model(backend, config)
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+                img_cfg["provider"] = "fal"
 
 
 def _configure_simple_requirements(ts_key: str):
@@ -1186,7 +1405,6 @@ def _configure_simple_requirements(ts_key: str):
             if api_key and api_key.strip():
                 save_env_value("OPENAI_API_KEY", api_key.strip())
                 # Save vision base URL to config (not .env — only secrets go there)
-                from hermes_cli.config import load_config, save_config
                 _cfg = load_config()
                 _aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {})
                 _aux["base_url"] = base_url
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index a75f4ca30..c815927ea 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -16,6 +16,7 @@ import json
 import logging
 import os
 import secrets
+import subprocess
 import sys
 import threading
 import time
@@ -114,6 +115,91 @@ def _require_token(request: Request) -> None:
         raise HTTPException(status_code=401, detail="Unauthorized")
 
 
+# Accepted Host header values for loopback binds. DNS rebinding attacks
+# point a victim browser at an attacker-controlled hostname (evil.test)
+# which resolves to 127.0.0.1 after a TTL flip — bypassing same-origin
+# checks because the browser now considers evil.test and our dashboard
+# "same origin". Validating the Host header at the app layer rejects any
+# request whose Host isn't one we bound for. See GHSA-ppp5-vxwm-4cf7.
+_LOOPBACK_HOST_VALUES: frozenset = frozenset({
+    "localhost", "127.0.0.1", "::1",
+})
+
+
+def _is_accepted_host(host_header: str, bound_host: str) -> bool:
+    """True if the Host header targets the interface we bound to.
+
+    Accepts:
+    - Exact bound host (with or without port suffix)
+    - Loopback aliases when bound to loopback
+    - Any host when bound to 0.0.0.0 (explicit opt-in to non-loopback,
+      no protection possible at this layer)
+    """
+    if not host_header:
+        return False
+    # Strip port suffix. IPv6 addresses use bracket notation:
+    #   [::1]         — no port
+    #   [::1]:9119    — with port
+    # Plain hosts/v4:
+    #   localhost:9119
+    #   127.0.0.1:9119
+    h = host_header.strip()
+    if h.startswith("["):
+        # IPv6 bracketed — port (if any) follows "]:"
+        close = h.find("]")
+        if close != -1:
+            host_only = h[1:close]  # strip brackets
+        else:
+            host_only = h.strip("[]")
+    else:
+        host_only = h.rsplit(":", 1)[0] if ":" in h else h
+    host_only = host_only.lower()
+
+    # 0.0.0.0 bind means operator explicitly opted into all-interfaces
+    # (requires --insecure per web_server.start_server). No Host-layer
+    # defence can protect that mode; rely on operator network controls.
+    if bound_host in ("0.0.0.0", "::"):
+        return True
+
+    # Loopback bind: accept the loopback names
+    bound_lc = bound_host.lower()
+    if bound_lc in _LOOPBACK_HOST_VALUES:
+        return host_only in _LOOPBACK_HOST_VALUES
+
+    # Explicit non-loopback bind: require exact host match
+    return host_only == bound_lc
+
+
+@app.middleware("http")
+async def host_header_middleware(request: Request, call_next):
+    """Reject requests whose Host header doesn't match the bound interface.
+
+    Defends against DNS rebinding: a victim browser on a localhost
+    dashboard is tricked into fetching from an attacker hostname that
+    TTL-flips to 127.0.0.1. CORS and same-origin checks don't help —
+    the browser now treats the attacker origin as same-origin with the
+    dashboard. Host-header validation at the app layer catches it.
+
+    See GHSA-ppp5-vxwm-4cf7.
+    """
+    # Store the bound host on app.state so this middleware can read it —
+    # set by start_server() at listen time.
+    bound_host = getattr(app.state, "bound_host", None)
+    if bound_host:
+        host_header = request.headers.get("host", "")
+        if not _is_accepted_host(host_header, bound_host):
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "detail": (
+                        "Invalid Host header. Dashboard requests must use "
+                        "the hostname the server was bound to."
+                    ),
+                },
+            )
+    return await call_next(request)
+
+
 @app.middleware("http")
 async def auth_middleware(request: Request, call_next):
     """Require the session token on all /api/ routes except the public list."""
@@ -476,6 +562,138 @@ async def get_status():
     }
 
 
+# ---------------------------------------------------------------------------
+# Gateway + update actions (invoked from the Status page).
+#
+# Both commands are spawned as detached subprocesses so the HTTP request
+# returns immediately.  stdin is closed (``DEVNULL``) so any stray ``input()``
+# calls fail fast with EOF rather than hanging forever.  stdout/stderr are
+# streamed to a per-action log file under ``~/.hermes/logs/<action>.log`` so
+# the dashboard can tail them back to the user.
+# ---------------------------------------------------------------------------
+
+_ACTION_LOG_DIR: Path = get_hermes_home() / "logs"
+
+# Short ``name`` (from the URL) → absolute log file path.
+_ACTION_LOG_FILES: Dict[str, str] = {
+    "gateway-restart": "gateway-restart.log",
+    "hermes-update": "hermes-update.log",
+}
+
+# ``name`` → most recently spawned Popen handle.  Used so ``status`` can
+# report liveness and exit code without shelling out to ``ps``.
+_ACTION_PROCS: Dict[str, subprocess.Popen] = {}
+
+
+def _spawn_hermes_action(subcommand: List[str], name: str) -> subprocess.Popen:
+    """Spawn ``hermes <subcommand>`` detached and record the Popen handle.
+
+    Uses the running interpreter's ``hermes_cli.main`` module so the action
+    inherits the same venv/PYTHONPATH the web server is using.
+    """
+    log_file_name = _ACTION_LOG_FILES[name]
+    _ACTION_LOG_DIR.mkdir(parents=True, exist_ok=True)
+    log_path = _ACTION_LOG_DIR / log_file_name
+    log_file = open(log_path, "ab", buffering=0)
+    log_file.write(
+        f"\n=== {name} started {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n".encode()
+    )
+
+    cmd = [sys.executable, "-m", "hermes_cli.main", *subcommand]
+
+    popen_kwargs: Dict[str, Any] = {
+        "cwd": str(PROJECT_ROOT),
+        "stdin": subprocess.DEVNULL,
+        "stdout": log_file,
+        "stderr": subprocess.STDOUT,
+        "env": {**os.environ, "HERMES_NONINTERACTIVE": "1"},
+    }
+    if sys.platform == "win32":
+        popen_kwargs["creationflags"] = (
+            subprocess.CREATE_NEW_PROCESS_GROUP  # type: ignore[attr-defined]
+            | getattr(subprocess, "DETACHED_PROCESS", 0)
+        )
+    else:
+        popen_kwargs["start_new_session"] = True
+
+    proc = subprocess.Popen(cmd, **popen_kwargs)
+    _ACTION_PROCS[name] = proc
+    return proc
+
+
+def _tail_lines(path: Path, n: int) -> List[str]:
+    """Return the last ``n`` lines of ``path``.  Reads the whole file — fine
+    for our small per-action logs.  Binary-decoded with ``errors='replace'``
+    so log corruption doesn't 500 the endpoint."""
+    if not path.exists():
+        return []
+    try:
+        text = path.read_text(errors="replace")
+    except OSError:
+        return []
+    lines = text.splitlines()
+    return lines[-n:] if n > 0 else lines
+
+
+@app.post("/api/gateway/restart")
+async def restart_gateway():
+    """Kick off a ``hermes gateway restart`` in the background."""
+    try:
+        proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart")
+    except Exception as exc:
+        _log.exception("Failed to spawn gateway restart")
+        raise HTTPException(status_code=500, detail=f"Failed to restart gateway: {exc}")
+    return {
+        "ok": True,
+        "pid": proc.pid,
+        "name": "gateway-restart",
+    }
+
+
+@app.post("/api/hermes/update")
+async def update_hermes():
+    """Kick off ``hermes update`` in the background."""
+    try:
+        proc = _spawn_hermes_action(["update"], "hermes-update")
+    except Exception as exc:
+        _log.exception("Failed to spawn hermes update")
+        raise HTTPException(status_code=500, detail=f"Failed to start update: {exc}")
+    return {
+        "ok": True,
+        "pid": proc.pid,
+        "name": "hermes-update",
+    }
+
+
+@app.get("/api/actions/{name}/status")
+async def get_action_status(name: str, lines: int = 200):
+    """Tail an action log and report whether the process is still running."""
+    log_file_name = _ACTION_LOG_FILES.get(name)
+    if log_file_name is None:
+        raise HTTPException(status_code=404, detail=f"Unknown action: {name}")
+
+    log_path = _ACTION_LOG_DIR / log_file_name
+    tail = _tail_lines(log_path, min(max(lines, 1), 2000))
+
+    proc = _ACTION_PROCS.get(name)
+    if proc is None:
+        running = False
+        exit_code: Optional[int] = None
+        pid: Optional[int] = None
+    else:
+        exit_code = proc.poll()
+        running = exit_code is None
+        pid = proc.pid
+
+    return {
+        "name": name,
+        "running": running,
+        "exit_code": exit_code,
+        "pid": pid,
+        "lines": tail,
+    }
+
+
 @app.get("/api/sessions")
 async def get_sessions(limit: int = 20, offset: int = 0):
     try:
@@ -1971,7 +2189,8 @@ async def get_usage_analytics(days: int = 30):
                    SUM(reasoning_tokens) as reasoning_tokens,
                    COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
                    COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
-                   COUNT(*) as sessions
+                   COUNT(*) as sessions,
+                   SUM(COALESCE(api_call_count, 0)) as api_calls
             FROM sessions WHERE started_at > ?
             GROUP BY day ORDER BY day
         """, (cutoff,))
@@ -1982,7 +2201,8 @@ async def get_usage_analytics(days: int = 30):
                    SUM(input_tokens) as input_tokens,
                    SUM(output_tokens) as output_tokens,
                    COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
-                   COUNT(*) as sessions
+                   COUNT(*) as sessions,
+                   SUM(COALESCE(api_call_count, 0)) as api_calls
             FROM sessions WHERE started_at > ? AND model IS NOT NULL
             GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
         """, (cutoff,))
@@ -1995,7 +2215,8 @@ async def get_usage_analytics(days: int = 30):
                    SUM(reasoning_tokens) as total_reasoning,
                    COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
                    COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
-                   COUNT(*) as total_sessions
+                   COUNT(*) as total_sessions,
+                   SUM(COALESCE(api_call_count, 0)) as total_api_calls
             FROM sessions WHERE started_at > ?
         """, (cutoff,))
         totals = dict(cur3.fetchone())
@@ -2465,13 +2686,15 @@ def start_server(
             "authentication. Only use on trusted networks.", host,
         )
 
+    # Record the bound host so host_header_middleware can validate incoming
+    # Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
+    app.state.bound_host = host
+
     if open_browser:
-        import threading
         import webbrowser
 
         def _open():
-            import time as _t
-            _t.sleep(1.0)
+            time.sleep(1.0)
             webbrowser.open(f"http://{host}:{port}")
 
         threading.Thread(target=_open, daemon=True).start()
diff --git a/hermes_state.py b/hermes_state.py
index 2d8a0fd4a..0ea9815b5 100644
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -31,7 +31,7 @@ T = TypeVar("T")
 
 DEFAULT_DB_PATH = get_hermes_home() / "state.db"
 
-SCHEMA_VERSION = 6
+SCHEMA_VERSION = 8
 
 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -65,6 +65,7 @@ CREATE TABLE IF NOT EXISTS sessions (
     cost_source TEXT,
     pricing_version TEXT,
     title TEXT,
+    api_call_count INTEGER DEFAULT 0,
     FOREIGN KEY (parent_session_id) REFERENCES sessions(id)
 );
 
@@ -80,10 +81,16 @@ CREATE TABLE IF NOT EXISTS messages (
     token_count INTEGER,
     finish_reason TEXT,
     reasoning TEXT,
+    reasoning_content TEXT,
     reasoning_details TEXT,
     codex_reasoning_items TEXT
 );
 
+CREATE TABLE IF NOT EXISTS state_meta (
+    key TEXT PRIMARY KEY,
+    value TEXT
+);
+
 CREATE INDEX IF NOT EXISTS idx_sessions_source ON sessions(source);
 CREATE INDEX IF NOT EXISTS idx_sessions_parent ON sessions(parent_session_id);
 CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC);
@@ -329,6 +336,26 @@ class SessionDB:
                     except sqlite3.OperationalError:
                         pass  # Column already exists
                 cursor.execute("UPDATE schema_version SET version = 6")
+            if current_version < 7:
+                # v7: preserve provider-native reasoning_content separately from
+                # normalized reasoning text. Kimi/Moonshot replay can require
+                # this field on assistant tool-call messages when thinking is on.
+                try:
+                    cursor.execute('ALTER TABLE messages ADD COLUMN "reasoning_content" TEXT')
+                except sqlite3.OperationalError:
+                    pass  # Column already exists
+                cursor.execute("UPDATE schema_version SET version = 7")
+            if current_version < 8:
+                # v8: add api_call_count column to sessions — tracks the number
+                # of individual LLM API calls made within a session (as opposed
+                # to the session count itself).
+                try:
+                    cursor.execute(
+                        'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0'
+                    )
+                except sqlite3.OperationalError:
+                    pass  # Column already exists
+                cursor.execute("UPDATE schema_version SET version = 8")
 
         # Unique title index — always ensure it exists (safe to run after migrations
         # since the title column is guaranteed to exist at this point)
@@ -435,6 +462,7 @@ class SessionDB:
         billing_provider: Optional[str] = None,
         billing_base_url: Optional[str] = None,
         billing_mode: Optional[str] = None,
+        api_call_count: int = 0,
         absolute: bool = False,
     ) -> None:
         """Update token counters and backfill model if not already set.
@@ -464,7 +492,8 @@ class SessionDB:
                    billing_provider = COALESCE(billing_provider, ?),
                    billing_base_url = COALESCE(billing_base_url, ?),
                    billing_mode = COALESCE(billing_mode, ?),
-                   model = COALESCE(model, ?)
+                   model = COALESCE(model, ?),
+                   api_call_count = ?
                    WHERE id = ?"""
         else:
             sql = """UPDATE sessions SET
@@ -484,7 +513,8 @@ class SessionDB:
                    billing_provider = COALESCE(billing_provider, ?),
                    billing_base_url = COALESCE(billing_base_url, ?),
                    billing_mode = COALESCE(billing_mode, ?),
-                   model = COALESCE(model, ?)
+                   model = COALESCE(model, ?),
+                   api_call_count = COALESCE(api_call_count, 0) + ?
                    WHERE id = ?"""
         params = (
             input_tokens,
@@ -502,6 +532,7 @@ class SessionDB:
             billing_base_url,
             billing_mode,
             model,
+            api_call_count,
             session_id,
         )
         def _do(conn):
@@ -922,6 +953,7 @@ class SessionDB:
         token_count: int = None,
         finish_reason: str = None,
         reasoning: str = None,
+        reasoning_content: str = None,
         reasoning_details: Any = None,
         codex_reasoning_items: Any = None,
     ) -> int:
@@ -951,8 +983,8 @@ class SessionDB:
             cursor = conn.execute(
                 """INSERT INTO messages (session_id, role, content, tool_call_id,
                    tool_calls, tool_name, timestamp, token_count, finish_reason,
-                   reasoning, reasoning_details, codex_reasoning_items)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                   reasoning, reasoning_content, reasoning_details, codex_reasoning_items)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                 (
                     session_id,
                     role,
@@ -964,6 +996,7 @@ class SessionDB:
                     token_count,
                     finish_reason,
                     reasoning,
+                    reasoning_content,
                     reasoning_details_json,
                     codex_items_json,
                 ),
@@ -1014,7 +1047,7 @@ class SessionDB:
         with self._lock:
             cursor = self._conn.execute(
                 "SELECT role, content, tool_call_id, tool_calls, tool_name, "
-                "reasoning, reasoning_details, codex_reasoning_items "
+                "reasoning, reasoning_content, reasoning_details, codex_reasoning_items "
                 "FROM messages WHERE session_id = ? ORDER BY timestamp, id",
                 (session_id,),
             )
@@ -1038,6 +1071,8 @@ class SessionDB:
             if row["role"] == "assistant":
                 if row["reasoning"]:
                     msg["reasoning"] = row["reasoning"]
+                if row["reasoning_content"] is not None:
+                    msg["reasoning_content"] = row["reasoning_content"]
                 if row["reasoning_details"]:
                     try:
                         msg["reasoning_details"] = json.loads(row["reasoning_details"])
@@ -1441,3 +1476,116 @@ class SessionDB:
             return len(session_ids)
 
         return self._execute_write(_do)
+
+    # ── Meta key/value (for scheduler bookkeeping) ──
+
+    def get_meta(self, key: str) -> Optional[str]:
+        """Read a value from the state_meta key/value store."""
+        with self._lock:
+            row = self._conn.execute(
+                "SELECT value FROM state_meta WHERE key = ?", (key,)
+            ).fetchone()
+        if row is None:
+            return None
+        return row["value"] if isinstance(row, sqlite3.Row) else row[0]
+
+    def set_meta(self, key: str, value: str) -> None:
+        """Write a value to the state_meta key/value store."""
+        def _do(conn):
+            conn.execute(
+                "INSERT INTO state_meta (key, value) VALUES (?, ?) "
+                "ON CONFLICT(key) DO UPDATE SET value = excluded.value",
+                (key, value),
+            )
+        self._execute_write(_do)
+
+    # ── Space reclamation ──
+
+    def vacuum(self) -> None:
+        """Run VACUUM to reclaim disk space after large deletes.
+
+        SQLite does not shrink the database file when rows are deleted —
+        freed pages just get reused on the next insert. After a prune that
+        removed hundreds of sessions, the file stays bloated unless we
+        explicitly VACUUM.
+
+        VACUUM rewrites the entire DB, so it's expensive (seconds per
+        100MB) and cannot run inside a transaction. It also acquires an
+        exclusive lock, so callers must ensure no other writers are
+        active. Safe to call at startup before the gateway/CLI starts
+        serving traffic.
+        """
+        # VACUUM cannot be executed inside a transaction.
+        with self._lock:
+            # Best-effort WAL checkpoint first, then VACUUM.
+            try:
+                self._conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
+            except Exception:
+                pass
+            self._conn.execute("VACUUM")
+
+    def maybe_auto_prune_and_vacuum(
+        self,
+        retention_days: int = 90,
+        min_interval_hours: int = 24,
+        vacuum: bool = True,
+    ) -> Dict[str, Any]:
+        """Idempotent auto-maintenance: prune old sessions + optional VACUUM.
+
+        Records the last run timestamp in state_meta so subsequent calls
+        within ``min_interval_hours`` no-op. Designed to be called once at
+        startup from long-lived entrypoints (CLI, gateway, cron scheduler).
+
+        Never raises. On any failure, logs a warning and returns a dict
+        with ``"error"`` set.
+
+        Returns a dict with keys:
+          - ``"skipped"`` (bool) — true if within min_interval_hours of last run
+          - ``"pruned"`` (int)   — number of sessions deleted
+          - ``"vacuumed"`` (bool) — true if VACUUM ran
+          - ``"error"`` (str, optional) — present only on failure
+        """
+        result: Dict[str, Any] = {"skipped": False, "pruned": 0, "vacuumed": False}
+        try:
+            # Skip if another process/call did maintenance recently.
+            last_raw = self.get_meta("last_auto_prune")
+            now = time.time()
+            if last_raw:
+                try:
+                    last_ts = float(last_raw)
+                    if now - last_ts < min_interval_hours * 3600:
+                        result["skipped"] = True
+                        return result
+                except (TypeError, ValueError):
+                    pass  # corrupt meta; treat as no prior run
+
+            pruned = self.prune_sessions(older_than_days=retention_days)
+            result["pruned"] = pruned
+
+            # Only VACUUM if we actually freed rows — VACUUM on a tight DB
+            # is wasted I/O. Threshold keeps small DBs from paying the cost.
+            if vacuum and pruned > 0:
+                try:
+                    self.vacuum()
+                    result["vacuumed"] = True
+                except Exception as exc:
+                    logger.warning("state.db VACUUM failed: %s", exc)
+
+            # Record the attempt even if pruned == 0, so we don't retry
+            # every startup within the min_interval_hours window.
+            self.set_meta("last_auto_prune", str(now))
+
+            if pruned > 0:
+                logger.info(
+                    "state.db auto-maintenance: pruned %d session(s) older than %d days%s",
+                    pruned,
+                    retention_days,
+                    " + VACUUM" if result["vacuumed"] else "",
+                )
+        except Exception as exc:
+            # Maintenance must never block startup. Log and return error marker.
+            logger.warning("state.db auto-maintenance failed: %s", exc)
+            result["error"] = str(exc)
+
+        return result
+
diff --git a/model_tools.py b/model_tools.py
index db4b46326..bee80f49b 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -108,9 +108,15 @@ def _run_async(coro):
     if loop and loop.is_running():
         # Inside an async context (gateway, RL env) — run in a fresh thread.
         import concurrent.futures
-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-            future = pool.submit(asyncio.run, coro)
+        pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+        future = pool.submit(asyncio.run, coro)
+        try:
             return future.result(timeout=300)
+        except concurrent.futures.TimeoutError:
+            future.cancel()
+            raise
+        finally:
+            pool.shutdown(wait=False, cancel_futures=True)
 
     # If we're on a worker thread (e.g., parallel tool execution in
     # delegate_task), use a per-thread persistent loop.  This avoids
diff --git a/nix/nixosModules.nix b/nix/nixosModules.nix
index 3f2709f81..641b98d1d 100644
--- a/nix/nixosModules.nix
+++ b/nix/nixosModules.nix
@@ -28,7 +28,7 @@
 
   let
     cfg = config.services.hermes-agent;
-    hermes-agent = inputs.self.packages.${pkgs.system}.default;
+    hermes-agent = inputs.self.packages.${pkgs.stdenv.hostPlatform.system}.default;
 
     # Deep-merge config type (from 0xrsydn/nix-hermes-agent)
     deepConfigType = lib.types.mkOptionType {
diff --git a/optional-skills/dogfood/DESCRIPTION.md b/optional-skills/dogfood/DESCRIPTION.md
new file mode 100644
index 000000000..f083fd72b
--- /dev/null
+++ b/optional-skills/dogfood/DESCRIPTION.md
@@ -0,0 +1,3 @@
+# Dogfood — Advanced QA & Testing Skills
+
+Specialized QA workflows that go beyond basic bug-finding. These skills use structured methodologies to surface UX friction, accessibility issues, and product-level problems that standard testing misses.
diff --git a/optional-skills/dogfood/adversarial-ux-test/SKILL.md b/optional-skills/dogfood/adversarial-ux-test/SKILL.md
new file mode 100644
index 000000000..1777e083d
--- /dev/null
+++ b/optional-skills/dogfood/adversarial-ux-test/SKILL.md
@@ -0,0 +1,190 @@
+---
+name: adversarial-ux-test
+description: Roleplay the most difficult, tech-resistant user for your product. Browse the app as that persona, find every UX pain point, then filter complaints through a pragmatism layer to separate real problems from noise. Creates actionable tickets from genuine issues only.
+version: 1.0.0
+author: Omni @ Comelse
+license: MIT
+metadata:
+  hermes:
+    tags: [qa, ux, testing, adversarial, dogfood, personas, user-testing]
+    related_skills: [dogfood]
+---
+
+# Adversarial UX Test
+
+Roleplay the worst-case user for your product — the person who hates technology, doesn't want your software, and will find every reason to complain. Then filter their feedback through a pragmatism layer to separate real UX problems from "I hate computers" noise.
+
+Think of it as an automated "mom test" — but angry.
+
+## Why This Works
+
+Most QA finds bugs. This finds **friction**. A technically correct app can still be unusable for real humans. The adversarial persona catches:
+- Confusing terminology that makes sense to developers but not users
+- Too many steps to accomplish basic tasks
+- Missing onboarding or "aha moments"
+- Accessibility issues (font size, contrast, click targets)
+- Cold-start problems (empty states, no demo content)
+- Paywall/signup friction that kills conversion
+
+The **pragmatism filter** (Phase 3) is what makes this useful instead of just entertaining. Without it, you'd add a "print this page" button to every screen because Grandpa can't figure out PDFs.
+
+## How to Use
+
+Tell the agent:
+```
+"Run an adversarial UX test on [URL]"
+"Be a grumpy [persona type] and test [app name]"
+"Do an asshole user test on my staging site"
+```
+
+You can provide a persona or let the agent generate one based on your product's target audience.
+
+## Step 1: Define the Persona
+
+If no persona is provided, generate one by answering:
+
+1. **Who is the HARDEST user for this product?** (age 50+, non-technical role, decades of experience doing it "the old way")
+2. **What is their tech comfort level?** (the lower the better — WhatsApp-only, paper notebooks, wife set up their email)
+3. **What is the ONE thing they need to accomplish?** (their core job, not your feature list)
+4. **What would make them give up?** (too many clicks, jargon, slow, confusing)
+5. **How do they talk when frustrated?** (blunt, sweary, dismissive, sighing)
+
+### Good Persona Example
+> **"Big Mick" McAllister** — 58-year-old S&C coach. Uses WhatsApp and that's it. His "spreadsheet" is a paper notebook. "If I can't figure it out in 10 seconds I'm going back to my notebook." Needs to log session results for 25 players. Hates small text, jargon, and passwords.
+
+### Bad Persona Example
+> "A user who doesn't like the app" — too vague, no constraints, no voice.
+
+The persona must be **specific enough to stay in character** for 20 minutes of testing.
+
+## Step 2: Become the Asshole (Browse as the Persona)
+
+1. Read any available project docs for app context and URLs
+2. **Fully inhabit the persona** — their frustrations, limitations, goals
+3. Navigate to the app using browser tools
+4. **Attempt the persona's ACTUAL TASKS** (not a feature tour):
+   - Can they do what they came to do?
+   - How many clicks/screens to accomplish it?
+   - What confuses them?
+   - What makes them angry?
+   - Where do they get lost?
+   - What would make them give up and go back to their old way?
+
+5. Test these friction categories:
+   - **First impression** — would they even bother past the landing page?
+   - **Core workflow** — the ONE thing they need to do most often
+   - **Error recovery** — what happens when they do something wrong?
+   - **Readability** — text size, contrast, information density
+   - **Speed** — does it feel faster than their current method?
+   - **Terminology** — any jargon they wouldn't understand?
+   - **Navigation** — can they find their way back? do they know where they are?
+
+6. Take screenshots of every pain point
+7. Check browser console for JS errors on every page
+
+## Step 3: The Rant (Write Feedback in Character)
+
+Write the feedback AS THE PERSONA — in their voice, with their frustrations. This is not a bug report. This is a real human venting.
+
+```
+[PERSONA NAME]'s Review of [PRODUCT]
+
+Overall: [Would they keep using it? Yes/No/Maybe with conditions]
+
+THE GOOD (grudging admission):
+- [things even they have to admit work]
+
+THE BAD (legitimate UX issues):
+- [real problems that would stop them from using the product]
+
+THE UGLY (showstoppers):
+- [things that would make them uninstall/cancel immediately]
+
+SPECIFIC COMPLAINTS:
+1. [Page/feature]: "[quote in persona voice]" — [what happened, expected]
+2. ...
+
+VERDICT: "[one-line persona quote summarizing their experience]"
+```
+
+## Step 4: The Pragmatism Filter (Critical — Do Not Skip)
+
+Step OUT of the persona. Evaluate each complaint as a product person:
+
+- **RED: REAL UX BUG** — Any user would have this problem, not just grumpy ones. Fix it.
+- **YELLOW: VALID BUT LOW PRIORITY** — Real issue but only for extreme users. Note it.
+- **WHITE: PERSONA NOISE** — "I hate computers" talking, not a product problem. Skip it.
+- **GREEN: FEATURE REQUEST** — Good idea hidden in the complaint. Consider it.
+
+### Filter Criteria
+1. Would a 35-year-old competent-but-busy user have the same complaint? → RED
+2. Is this a genuine accessibility issue (font size, contrast, click targets)? → RED
+3. Is this "I want it to work like paper" resistance to digital? → WHITE
+4. Is this a real workflow inefficiency the persona stumbled on? → YELLOW or RED
+5. Would fixing this add complexity for the 80% who are fine? → WHITE
+6. Does the complaint reveal a missing onboarding moment? → GREEN
+
+**This filter is MANDATORY.** Never ship raw persona complaints as tickets.
+
+## Step 5: Create Tickets
+
+For **RED** and **GREEN** items only:
+- Clear, actionable title
+- Include the persona's verbatim quote (entertaining + memorable)
+- The real UX issue underneath (objective)
+- A suggested fix (actionable)
+- Tag/label: "ux-review"
+
+For **YELLOW** items: one catch-all ticket with all notes.
+
+**WHITE** items appear in the report only. No tickets.
+
+**Max 10 tickets per session** — focus on the worst issues.
+
+## Step 6: Report
+
+Deliver:
+1. The persona rant (Step 3) — entertaining and visceral
+2. The filtered assessment (Step 4) — pragmatic and actionable
+3. Tickets created (Step 5) — with links
+4. Screenshots of key issues
+
+## Tips
+
+- **One persona per session.** Don't mix perspectives.
+- **Stay in character during Steps 2-3.** Break character only at Step 4.
+- **Test the CORE WORKFLOW first.** Don't get distracted by settings pages.
+- **Empty states are gold.** New user experience reveals the most friction.
+- **The best findings are RED items the persona found accidentally** while trying to do something else.
+- **If the persona has zero complaints, your persona is too tech-savvy.** Make them older, less patient, more set in their ways.
+- **Run this before demos, launches, or after shipping a batch of features.**
+- **Register as a NEW user when possible.** Don't use pre-seeded admin accounts — the cold start experience is where most friction lives.
+- **Zero WHITE items is a signal, not a failure.** If the pragmatism filter finds no noise, your product has real UX problems, not just a grumpy persona.
+- **Check known issues in project docs AFTER the test.** If the persona found a bug that's already in the known issues list, that's actually the most damning finding — it means the team knew about it but never felt the user's pain.
+- **Subscription/paywall testing is critical.** Test with expired accounts, not just active ones. The "what happens when you can't pay" experience reveals whether the product respects users or holds their data hostage.
+- **Count the clicks to accomplish the persona's ONE task.** If it's more than 5, that's almost always a RED finding regardless of persona tech level.
+
+## Example Personas by Industry
+
+These are starting points — customize for your specific product:
+
+| Product Type | Persona | Age | Key Trait |
+|-------------|---------|-----|-----------|
+| CRM | Retirement home director | 68 | Filing cabinet is the current CRM |
+| Photography SaaS | Rural wedding photographer | 62 | Books clients by phone, invoices on paper |
+| AI/ML Tool | Department store buyer | 55 | Burned by 3 failed tech startups |
+| Fitness App | Old-school gym coach | 58 | Paper notebook, thick fingers, bad eyes |
+| Accounting | Family bakery owner | 64 | Shoebox of receipts, hates subscriptions |
+| E-commerce | Market stall vendor | 60 | Cash only, smartphone is for calls |
+| Healthcare | Senior GP | 63 | Dictates notes, nurse handles the computer |
+| Education | Veteran teacher | 57 | Chalk and talk, worksheets in ring binders |
+
+## Rules
+
+- Stay in character during Steps 2-3
+- Be genuinely mean but fair — find real problems, not manufactured ones
+- The pragmatism filter (Step 4) is **MANDATORY**
+- Screenshots required for every complaint
+- Max 10 tickets per session
+- Test on staging/deployed app, not local dev
+- One persona, one session, one report
diff --git a/optional-skills/web-development/DESCRIPTION.md b/optional-skills/web-development/DESCRIPTION.md
new file mode 100644
index 000000000..588817bbc
--- /dev/null
+++ b/optional-skills/web-development/DESCRIPTION.md
@@ -0,0 +1,5 @@
+# Web Development
+
+Optional skills for client-side web development workflows — embedding agents, copilots, and AI-native UX patterns into user-facing web apps.
+
+These are distinct from Hermes' own browser automation (Browserbase, Camofox), which operate *on* websites from outside. Web-development skills here help users build *into* their own websites.
diff --git a/optional-skills/web-development/page-agent/SKILL.md b/optional-skills/web-development/page-agent/SKILL.md
new file mode 100644
index 000000000..caab19901
--- /dev/null
+++ b/optional-skills/web-development/page-agent/SKILL.md
@@ -0,0 +1,189 @@
+---
+name: page-agent
+description: Embed alibaba/page-agent into your own web application — a pure-JavaScript in-page GUI agent that ships as a single <script> tag or npm package and lets end-users of your site drive the UI with natural language ("click login, fill username as John"). No Python, no headless browser, no extension required. Use this skill when the user is a web developer who wants to add an AI copilot to their SaaS / admin panel / B2B tool, make a legacy web app accessible via natural language, or evaluate page-agent against a local (Ollama) or cloud (Qwen / OpenAI / OpenRouter) LLM. NOT for server-side browser automation — point those users to Hermes' built-in browser tool instead.
+version: 1.0.0
+author: Hermes Agent
+license: MIT
+metadata:
+  hermes:
+    tags: [web, javascript, agent, browser, gui, alibaba, embed, copilot, saas]
+    category: web-development
+---
+
+# page-agent
+
+alibaba/page-agent (https://github.com/alibaba/page-agent, 17k+ stars, MIT) is an in-page GUI agent written in TypeScript. It lives inside a webpage, reads the DOM as text (no screenshots, no multi-modal LLM), and executes natural-language instructions like "click the login button, then fill username as John" against the current page. Pure client-side — the host site just includes a script and passes an OpenAI-compatible LLM endpoint.
+
+## When to use this skill
+
+Load this skill when a user wants to:
+
+- **Ship an AI copilot inside their own web app** (SaaS, admin panel, B2B tool, ERP, CRM) — "users on my dashboard should be able to type 'create invoice for Acme Corp and email it' instead of clicking through five screens"
+- **Modernize a legacy web app** without rewriting the frontend — page-agent drops on top of existing DOM
+- **Add accessibility via natural language** — voice / screen-reader users drive the UI by describing what they want
+- **Demo or evaluate page-agent** against a local (Ollama) or hosted (Qwen, OpenAI, OpenRouter) LLM
+- **Build interactive training / product demos** — let an AI walk a user through "how to submit an expense report" live in the real UI
+
+## When NOT to use this skill
+
+- User wants **Hermes itself to drive a browser** → use Hermes' built-in browser tool (Browserbase / Camofox). page-agent is the *opposite* direction.
+- User wants **cross-tab automation without embedding** → use Playwright, browser-use, or the page-agent Chrome extension
+- User needs **visual grounding / screenshots** → page-agent is text-DOM only; use a multimodal browser agent instead
+
+## Prerequisites
+
+- Node 22.13+ or 24+, npm 10+ (docs claim 11+ but 10.9 works fine)
+- An OpenAI-compatible LLM endpoint: Qwen (DashScope), OpenAI, Ollama, OpenRouter, or anything speaking `/v1/chat/completions`
+- Browser with devtools (for debugging)
+
+## Path 1 — 30-second demo via CDN (no install)
+
+Fastest way to see it work. Uses alibaba's free testing LLM proxy — **for evaluation only**, subject to their terms.
+
+Add to any HTML page (or paste into the devtools console as a bookmarklet):
+
+```html
+<script src="https://cdn.jsdelivr.net/npm/page-agent@1.8.0/dist/iife/page-agent.demo.js" crossorigin="true"></script>
+```
+
+A panel appears. Type an instruction. Done.
+
+Bookmarklet form (drop into bookmarks bar, click on any page):
+
+```javascript
+javascript:(function(){var s=document.createElement('script');s.src='https://cdn.jsdelivr.net/npm/page-agent@1.8.0/dist/iife/page-agent.demo.js';document.head.appendChild(s);})();
+```
+
+## Path 2 — npm install into your own web app (production use)
+
+Inside an existing web project (React / Vue / Svelte / plain):
+
+```bash
+npm install page-agent
+```
+
+Wire it up with your own LLM endpoint — **never ship the demo CDN to real users**:
+
+```javascript
+import { PageAgent } from 'page-agent'
+
+const agent = new PageAgent({
+    model: 'qwen3.5-plus',
+    baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
+    apiKey: process.env.LLM_API_KEY,   // never hardcode
+    language: 'en-US',
+})
+
+// Show the panel for end users:
+agent.panel.show()
+
+// Or drive it programmatically:
+await agent.execute('Click submit button, then fill username as John')
+```
+
+Provider examples (any OpenAI-compatible endpoint works):
+
+| Provider | `baseURL` | `model` |
+|----------|-----------|---------|
+| Qwen / DashScope | `https://dashscope.aliyuncs.com/compatible-mode/v1` | `qwen3.5-plus` |
+| OpenAI | `https://api.openai.com/v1` | `gpt-4o-mini` |
+| Ollama (local) | `http://localhost:11434/v1` | `qwen3:14b` |
+| OpenRouter | `https://openrouter.ai/api/v1` | `anthropic/claude-sonnet-4.6` |
+
+**Key config fields** (passed to `new PageAgent({...})`):
+
+- `model`, `baseURL`, `apiKey` — LLM connection
+- `language` — UI language (`en-US`, `zh-CN`, etc.)
+- Allowlist and data-masking hooks exist for locking down what the agent can touch — see https://alibaba.github.io/page-agent/ for the full option list
+
+**Security.** Don't put your `apiKey` in client-side code for a real deployment — proxy LLM calls through your backend and point `baseURL` at your proxy. The demo CDN exists because alibaba runs that proxy for evaluation.
+
+## Path 3 — clone the source repo (contributing, or hacking on it)
+
+Use this when the user wants to modify page-agent itself, test it against arbitrary sites via a local IIFE bundle, or develop the browser extension.
+
+```bash
+git clone https://github.com/alibaba/page-agent.git
+cd page-agent
+npm ci              # exact lockfile install (or `npm i` to allow updates)
+```
+
+Create `.env` in the repo root with an LLM endpoint. Example:
+
+```
+LLM_MODEL_NAME=gpt-4o-mini
+LLM_API_KEY=sk-...
+LLM_BASE_URL=https://api.openai.com/v1
+```
+
+Ollama flavor:
+
+```
+LLM_BASE_URL=http://localhost:11434/v1
+LLM_API_KEY=NA
+LLM_MODEL_NAME=qwen3:14b
+```
+
+Common commands:
+
+```bash
+npm start           # docs/website dev server
+npm run build       # build every package
+npm run dev:demo    # serve IIFE bundle at http://localhost:5174/page-agent.demo.js
+npm run dev:ext     # develop the browser extension (WXT + React)
+npm run build:ext   # build the extension
+```
+
+**Test on any website** using the local IIFE bundle. Add this bookmarklet:
+
+```javascript
+javascript:(function(){var s=document.createElement('script');s.src=`http://localhost:5174/page-agent.demo.js?t=${Math.random()}`;s.onload=()=>console.log('PageAgent ready!');document.head.appendChild(s);})();
+```
+
+Then: `npm run dev:demo`, click the bookmarklet on any page, and the local build injects. Auto-rebuilds on save.
+
+**Warning:** your `.env` `LLM_API_KEY` is inlined into the IIFE bundle during dev builds. Don't share the bundle. Don't commit it. Don't paste the URL into Slack. (Verified: grepping the public dev bundle returns the literal values from `.env`.)
+
+## Repo layout (Path 3)
+
+Monorepo with npm workspaces. Key packages:
+
+| Package | Path | Purpose |
+|---------|------|---------|
+| `page-agent` | `packages/page-agent/` | Main entry with UI panel |
+| `@page-agent/core` | `packages/core/` | Core agent logic, no UI |
+| `@page-agent/mcp` | `packages/mcp/` | MCP server (beta) |
+| — | `packages/llms/` | LLM client |
+| — | `packages/page-controller/` | DOM ops + visual feedback |
+| — | `packages/ui/` | Panel + i18n |
+| — | `packages/extension/` | Chrome/Firefox extension |
+| — | `packages/website/` | Docs + landing site |
+
+## Verifying it works
+
+After Path 1 or Path 2:
+1. Open the page in a browser with devtools open
+2. You should see a floating panel. If not, check the console for errors (most common: CORS on the LLM endpoint, wrong `baseURL`, or a bad API key)
+3. Type a simple instruction matching something visible on the page ("click the Login link")
+4. Watch the Network tab — you should see a request to your `baseURL`
+
+After Path 3:
+1. `npm run dev:demo` prints `Accepting connections at http://localhost:5174`
+2. `curl -I http://localhost:5174/page-agent.demo.js` returns `HTTP/1.1 200 OK` with `Content-Type: application/javascript`
+3. Click the bookmarklet on any site; panel appears
+
+## Pitfalls
+
+- **Demo CDN in production** — don't. It's rate-limited, uses alibaba's free proxy, and their terms forbid production use.
+- **API key exposure** — any key passed to `new PageAgent({apiKey: ...})` ships in your JS bundle. Always proxy through your own backend for real deployments.
+- **Non-OpenAI-compatible endpoints** fail silently or with cryptic errors. If your provider needs native Anthropic/Gemini formatting, use an OpenAI-compatibility proxy (LiteLLM, OpenRouter) in front.
+- **CSP blocks** — sites with strict Content-Security-Policy may refuse to load the CDN script or disallow inline eval. In that case, self-host from your origin.
+- **Restart dev server** after editing `.env` in Path 3 — Vite only reads env at startup.
+- **Node version** — the repo declares `^22.13.0 || >=24`. Node 20 will fail `npm ci` with engine errors.
+- **npm 10 vs 11** — docs say npm 11+; npm 10.9 actually works fine.
+
+## Reference
+
+- Repo: https://github.com/alibaba/page-agent
+- Docs: https://alibaba.github.io/page-agent/
+- License: MIT (built on browser-use's DOM processing internals, Copyright 2024 Gregor Zunic)
diff --git a/package-lock.json b/package-lock.json
index 9d0ae80cd..8309e3b7a 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -11,28 +11,12 @@
       "license": "MIT",
       "dependencies": {
         "@askjo/camofox-browser": "^1.5.2",
-        "agent-browser": "^0.13.0"
+        "agent-browser": "^0.26.0"
       },
       "engines": {
         "node": ">=20.0.0"
       }
     },
-    "node_modules/@appium/logger": {
-      "version": "1.7.1",
-      "resolved": "https://registry.npmjs.org/@appium/logger/-/logger-1.7.1.tgz",
-      "integrity": "sha512-9C2o9X/lBEDBUnKfAi3mRo9oG7Z03nmISLwsGkWxIWjMAvBdJD0RRSJMekWVKzfXN3byrI1WlCXTITzN4LAoLw==",
-      "license": "ISC",
-      "dependencies": {
-        "console-control-strings": "1.1.0",
-        "lodash": "4.17.21",
-        "lru-cache": "10.4.3",
-        "set-blocking": "2.0.0"
-      },
-      "engines": {
-        "node": "^14.17.0 || ^16.13.0 || >=18.0.0",
-        "npm": ">=8"
-      }
-    },
     "node_modules/@askjo/camofox-browser": {
       "version": "1.5.2",
       "resolved": "https://registry.npmjs.org/@askjo/camofox-browser/-/camofox-browser-1.5.2.tgz",
@@ -52,75 +36,6 @@
         "node": ">=18"
       }
     },
-    "node_modules/@isaacs/cliui": {
-      "version": "8.0.2",
-      "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz",
-      "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==",
-      "license": "ISC",
-      "dependencies": {
-        "string-width": "^5.1.2",
-        "string-width-cjs": "npm:string-width@^4.2.0",
-        "strip-ansi": "^7.0.1",
-        "strip-ansi-cjs": "npm:strip-ansi@^6.0.1",
-        "wrap-ansi": "^8.1.0",
-        "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0"
-      },
-      "engines": {
-        "node": ">=12"
-      }
-    },
-    "node_modules/@isaacs/cliui/node_modules/ansi-styles": {
-      "version": "6.2.3",
-      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz",
-      "integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
-      }
-    },
-    "node_modules/@isaacs/cliui/node_modules/emoji-regex": {
-      "version": "9.2.2",
-      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz",
-      "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==",
-      "license": "MIT"
-    },
-    "node_modules/@isaacs/cliui/node_modules/string-width": {
-      "version": "5.1.2",
-      "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz",
-      "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==",
-      "license": "MIT",
-      "dependencies": {
-        "eastasianwidth": "^0.2.0",
-        "emoji-regex": "^9.2.2",
-        "strip-ansi": "^7.0.1"
-      },
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/@isaacs/cliui/node_modules/wrap-ansi": {
-      "version": "8.1.0",
-      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
-      "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==",
-      "license": "MIT",
-      "dependencies": {
-        "ansi-styles": "^6.1.0",
-        "string-width": "^5.0.1",
-        "strip-ansi": "^7.0.1"
-      },
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
-      }
-    },
     "node_modules/@opentelemetry/api": {
       "version": "1.9.1",
       "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.1.tgz",
@@ -130,105 +45,6 @@
         "node": ">=8.0.0"
       }
     },
-    "node_modules/@pkgjs/parseargs": {
-      "version": "0.11.0",
-      "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
-      "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==",
-      "license": "MIT",
-      "optional": true,
-      "engines": {
-        "node": ">=14"
-      }
-    },
-    "node_modules/@promptbook/utils": {
-      "version": "0.69.5",
-      "resolved": "https://registry.npmjs.org/@promptbook/utils/-/utils-0.69.5.tgz",
-      "integrity": "sha512-xm5Ti/Hp3o4xHrsK9Yy3MS6KbDxYbq485hDsFvxqaNA7equHLPdo8H8faTitTeb14QCDfLW4iwCxdVYu5sn6YQ==",
-      "funding": [
-        {
-          "type": "individual",
-          "url": "https://buymeacoffee.com/hejny"
-        },
-        {
-          "type": "github",
-          "url": "https://github.com/webgptorg/promptbook/blob/main/README.md#%EF%B8%8F-contributing"
-        }
-      ],
-      "license": "CC-BY-4.0",
-      "dependencies": {
-        "spacetrim": "0.11.59"
-      }
-    },
-    "node_modules/@puppeteer/browsers": {
-      "version": "2.13.0",
-      "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.13.0.tgz",
-      "integrity": "sha512-46BZJYJjc/WwmKjsvDFykHtXrtomsCIrwYQPOP7VfMJoZY2bsDF9oROBABR3paDjDcmkUye1Pb1BqdcdiipaWA==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "debug": "^4.4.3",
-        "extract-zip": "^2.0.1",
-        "progress": "^2.0.3",
-        "proxy-agent": "^6.5.0",
-        "semver": "^7.7.4",
-        "tar-fs": "^3.1.1",
-        "yargs": "^17.7.2"
-      },
-      "bin": {
-        "browsers": "lib/cjs/main-cli.js"
-      },
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@puppeteer/browsers/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/@puppeteer/browsers/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/@puppeteer/browsers/node_modules/tar-fs": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.2.tgz",
-      "integrity": "sha512-QGxxTxxyleAdyM3kpFs14ymbYmNFrfY+pHj7Z8FgtbZ7w2//VAgLMac7sT6nRpIHjppXO2AwwEOg0bPFVRcmXw==",
-      "license": "MIT",
-      "dependencies": {
-        "pump": "^3.0.0",
-        "tar-stream": "^3.1.5"
-      },
-      "optionalDependencies": {
-        "bare-fs": "^4.0.1",
-        "bare-path": "^3.0.0"
-      }
-    },
-    "node_modules/@puppeteer/browsers/node_modules/tar-stream": {
-      "version": "3.1.8",
-      "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.8.tgz",
-      "integrity": "sha512-U6QpVRyCGHva435KoNWy9PRoi2IFYCgtEhq9nmrPPpbRacPs9IH4aJ3gbrFC8dPcXvdSZ4XXfXT5Fshbp2MtlQ==",
-      "license": "MIT",
-      "dependencies": {
-        "b4a": "^1.6.4",
-        "bare-fs": "^4.5.5",
-        "fast-fifo": "^1.2.0",
-        "streamx": "^2.15.0"
-      }
-    },
     "node_modules/@sindresorhus/is": {
       "version": "4.6.0",
       "resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-4.6.0.tgz",
@@ -241,12 +57,6 @@
         "url": "https://github.com/sindresorhus/is?sponsor=1"
       }
     },
-    "node_modules/@tootallnate/quickjs-emscripten": {
-      "version": "0.23.0",
-      "resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz",
-      "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==",
-      "license": "MIT"
-    },
     "node_modules/@types/debug": {
       "version": "4.1.13",
       "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.13.tgz",
@@ -262,225 +72,6 @@
       "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==",
       "license": "MIT"
     },
-    "node_modules/@types/node": {
-      "version": "20.19.39",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.39.tgz",
-      "integrity": "sha512-orrrD74MBUyK8jOAD/r0+lfa1I2MO6I+vAkmAWzMYbCcgrN4lCrmK52gRFQq/JRxfYPfonkr4b0jcY7Olqdqbw==",
-      "license": "MIT",
-      "dependencies": {
-        "undici-types": "~6.21.0"
-      }
-    },
-    "node_modules/@types/sinonjs__fake-timers": {
-      "version": "8.1.5",
-      "resolved": "https://registry.npmjs.org/@types/sinonjs__fake-timers/-/sinonjs__fake-timers-8.1.5.tgz",
-      "integrity": "sha512-mQkU2jY8jJEF7YHjHvsQO8+3ughTL1mcnn96igfhONmR+fUPSKIkefQYpSe8bsly2Ep7oQbn/6VG5/9/0qcArQ==",
-      "license": "MIT"
-    },
-    "node_modules/@types/which": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/@types/which/-/which-2.0.2.tgz",
-      "integrity": "sha512-113D3mDkZDjo+EeUEHCFy0qniNc1ZpecGiAU7WSo7YDoSzolZIQKpYFHrPpjkB2nuyahcKfrmLXeQlh7gqJYdw==",
-      "license": "MIT"
-    },
-    "node_modules/@types/ws": {
-      "version": "8.18.1",
-      "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz",
-      "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==",
-      "license": "MIT",
-      "dependencies": {
-        "@types/node": "*"
-      }
-    },
-    "node_modules/@types/yauzl": {
-      "version": "2.10.3",
-      "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz",
-      "integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==",
-      "license": "MIT",
-      "optional": true,
-      "dependencies": {
-        "@types/node": "*"
-      }
-    },
-    "node_modules/@wdio/config": {
-      "version": "9.27.0",
-      "resolved": "https://registry.npmjs.org/@wdio/config/-/config-9.27.0.tgz",
-      "integrity": "sha512-9y8z7ugIbU6ycKrA2SqCpKh1/hobut2rDq9CLt/BNVzSlebBBVOTMiAt1XroZzcPnA7/ZqpbkpOsbpPUaAQuNQ==",
-      "license": "MIT",
-      "dependencies": {
-        "@wdio/logger": "9.18.0",
-        "@wdio/types": "9.27.0",
-        "@wdio/utils": "9.27.0",
-        "deepmerge-ts": "^7.0.3",
-        "glob": "^10.2.2",
-        "import-meta-resolve": "^4.0.0",
-        "jiti": "^2.6.1"
-      },
-      "engines": {
-        "node": ">=18.20.0"
-      }
-    },
-    "node_modules/@wdio/config/node_modules/balanced-match": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-      "license": "MIT"
-    },
-    "node_modules/@wdio/config/node_modules/brace-expansion": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz",
-      "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==",
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0"
-      }
-    },
-    "node_modules/@wdio/config/node_modules/glob": {
-      "version": "10.5.0",
-      "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz",
-      "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==",
-      "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me",
-      "license": "ISC",
-      "dependencies": {
-        "foreground-child": "^3.1.0",
-        "jackspeak": "^3.1.2",
-        "minimatch": "^9.0.4",
-        "minipass": "^7.1.2",
-        "package-json-from-dist": "^1.0.0",
-        "path-scurry": "^1.11.1"
-      },
-      "bin": {
-        "glob": "dist/esm/bin.mjs"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/@wdio/config/node_modules/minimatch": {
-      "version": "9.0.9",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.9.tgz",
-      "integrity": "sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==",
-      "license": "ISC",
-      "dependencies": {
-        "brace-expansion": "^2.0.2"
-      },
-      "engines": {
-        "node": ">=16 || 14 >=14.17"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/@wdio/config/node_modules/path-scurry": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz",
-      "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==",
-      "license": "BlueOak-1.0.0",
-      "dependencies": {
-        "lru-cache": "^10.2.0",
-        "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
-      },
-      "engines": {
-        "node": ">=16 || 14 >=14.18"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/@wdio/logger": {
-      "version": "9.18.0",
-      "resolved": "https://registry.npmjs.org/@wdio/logger/-/logger-9.18.0.tgz",
-      "integrity": "sha512-HdzDrRs+ywAqbXGKqe1i/bLtCv47plz4TvsHFH3j729OooT5VH38ctFn5aLXgECmiAKDkmH/A6kOq2Zh5DIxww==",
-      "license": "MIT",
-      "dependencies": {
-        "chalk": "^5.1.2",
-        "loglevel": "^1.6.0",
-        "loglevel-plugin-prefix": "^0.8.4",
-        "safe-regex2": "^5.0.0",
-        "strip-ansi": "^7.1.0"
-      },
-      "engines": {
-        "node": ">=18.20.0"
-      }
-    },
-    "node_modules/@wdio/protocols": {
-      "version": "9.27.0",
-      "resolved": "https://registry.npmjs.org/@wdio/protocols/-/protocols-9.27.0.tgz",
-      "integrity": "sha512-rIk69BsY1+6uU2PEN5FiRpI6K7HJ86YHzZRFBe4iRzKXQgGNk1zWzbdVJIuNFoOWsnmYUkK42KSSOT4Le6EmiQ==",
-      "license": "MIT"
-    },
-    "node_modules/@wdio/repl": {
-      "version": "9.16.2",
-      "resolved": "https://registry.npmjs.org/@wdio/repl/-/repl-9.16.2.tgz",
-      "integrity": "sha512-FLTF0VL6+o5BSTCO7yLSXocm3kUnu31zYwzdsz4n9s5YWt83sCtzGZlZpt7TaTzb3jVUfxuHNQDTb8UMkCu0lQ==",
-      "license": "MIT",
-      "dependencies": {
-        "@types/node": "^20.1.0"
-      },
-      "engines": {
-        "node": ">=18.20.0"
-      }
-    },
-    "node_modules/@wdio/types": {
-      "version": "9.27.0",
-      "resolved": "https://registry.npmjs.org/@wdio/types/-/types-9.27.0.tgz",
-      "integrity": "sha512-DQJ+OdRBqUBcQ30DN2Z651hEVh3OoxnlDUSRqlWy9An2AY6v9rYWTj825B6zsj5pLLEToYO1tfwWq0ab183pXg==",
-      "license": "MIT",
-      "dependencies": {
-        "@types/node": "^20.1.0"
-      },
-      "engines": {
-        "node": ">=18.20.0"
-      }
-    },
-    "node_modules/@wdio/utils": {
-      "version": "9.27.0",
-      "resolved": "https://registry.npmjs.org/@wdio/utils/-/utils-9.27.0.tgz",
-      "integrity": "sha512-fUasd5OKJTy2seJfWnYZ9xlxTtY0p/Kyeuh7Tbb8kcofBqmBi2fTvM3sfZlo1tGQX9yCh+IS2N7hlfyFMmuZ+w==",
-      "license": "MIT",
-      "dependencies": {
-        "@puppeteer/browsers": "^2.2.0",
-        "@wdio/logger": "9.18.0",
-        "@wdio/types": "9.27.0",
-        "decamelize": "^6.0.0",
-        "deepmerge-ts": "^7.0.3",
-        "edgedriver": "^6.1.2",
-        "geckodriver": "^6.1.0",
-        "get-port": "^7.0.0",
-        "import-meta-resolve": "^4.0.0",
-        "locate-app": "^2.2.24",
-        "mitt": "^3.0.1",
-        "safaridriver": "^1.0.0",
-        "split2": "^4.2.0",
-        "wait-port": "^1.1.0"
-      },
-      "engines": {
-        "node": ">=18.20.0"
-      }
-    },
-    "node_modules/@zip.js/zip.js": {
-      "version": "2.8.26",
-      "resolved": "https://registry.npmjs.org/@zip.js/zip.js/-/zip.js-2.8.26.tgz",
-      "integrity": "sha512-RQ4h9F6DOiHxpdocUDrOl6xBM+yOtz+LkUol47AVWcfebGBDpZ7w7Xvz9PS24JgXvLGiXXzSAfdCdVy1tPlaFA==",
-      "license": "BSD-3-Clause",
-      "engines": {
-        "bun": ">=0.7.0",
-        "deno": ">=1.0.0",
-        "node": ">=18.0.0"
-      }
-    },
-    "node_modules/abort-controller": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
-      "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
-      "license": "MIT",
-      "dependencies": {
-        "event-target-shim": "^5.0.0"
-      },
-      "engines": {
-        "node": ">=6.5"
-      }
-    },
     "node_modules/accepts": {
       "version": "1.3.8",
       "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
@@ -503,263 +94,16 @@
         "node": ">=12.0"
       }
     },
-    "node_modules/agent-base": {
-      "version": "7.1.4",
-      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz",
-      "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 14"
-      }
-    },
     "node_modules/agent-browser": {
-      "version": "0.13.0",
-      "resolved": "https://registry.npmjs.org/agent-browser/-/agent-browser-0.13.0.tgz",
-      "integrity": "sha512-KGtiqzu8EA8nPAZIp+1lq+PBG86brLEvB28aE/Aeh1ErOVBHICsh/ShwCPUKMjMIS65qiVV/FKG/3xN0jn8J3A==",
+      "version": "0.26.0",
+      "resolved": "https://registry.npmjs.org/agent-browser/-/agent-browser-0.26.0.tgz",
+      "integrity": "sha512-pdqSfjwbFSp+qnwlb2g23e9wXveIOfMi19xpPA9xZUbzEAUp6W4YBZj6Ybj8z4M7WkcbGDDYc+oDIHDt9R3EDQ==",
       "hasInstallScript": true,
       "license": "Apache-2.0",
-      "dependencies": {
-        "node-simctl": "^7.4.0",
-        "playwright-core": "^1.57.0",
-        "webdriverio": "^9.15.0",
-        "ws": "^8.19.0",
-        "zod": "^3.22.4"
-      },
       "bin": {
         "agent-browser": "bin/agent-browser.js"
       }
     },
-    "node_modules/ansi-regex": {
-      "version": "6.2.2",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz",
-      "integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/ansi-regex?sponsor=1"
-      }
-    },
-    "node_modules/ansi-styles": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
-      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
-      "license": "MIT",
-      "dependencies": {
-        "color-convert": "^2.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
-      }
-    },
-    "node_modules/archiver": {
-      "version": "7.0.1",
-      "resolved": "https://registry.npmjs.org/archiver/-/archiver-7.0.1.tgz",
-      "integrity": "sha512-ZcbTaIqJOfCc03QwD468Unz/5Ir8ATtvAHsK+FdXbDIbGfihqh9mrvdcYunQzqn4HrvWWaFyaxJhGZagaJJpPQ==",
-      "license": "MIT",
-      "dependencies": {
-        "archiver-utils": "^5.0.2",
-        "async": "^3.2.4",
-        "buffer-crc32": "^1.0.0",
-        "readable-stream": "^4.0.0",
-        "readdir-glob": "^1.1.2",
-        "tar-stream": "^3.0.0",
-        "zip-stream": "^6.0.1"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/archiver-utils": {
-      "version": "5.0.2",
-      "resolved": "https://registry.npmjs.org/archiver-utils/-/archiver-utils-5.0.2.tgz",
-      "integrity": "sha512-wuLJMmIBQYCsGZgYLTy5FIB2pF6Lfb6cXMSF8Qywwk3t20zWnAi7zLcQFdKQmIB8wyZpY5ER38x08GbwtR2cLA==",
-      "license": "MIT",
-      "dependencies": {
-        "glob": "^10.0.0",
-        "graceful-fs": "^4.2.0",
-        "is-stream": "^2.0.1",
-        "lazystream": "^1.0.0",
-        "lodash": "^4.17.15",
-        "normalize-path": "^3.0.0",
-        "readable-stream": "^4.0.0"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/archiver-utils/node_modules/balanced-match": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-      "license": "MIT"
-    },
-    "node_modules/archiver-utils/node_modules/brace-expansion": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz",
-      "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==",
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0"
-      }
-    },
-    "node_modules/archiver-utils/node_modules/buffer": {
-      "version": "6.0.3",
-      "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
-      "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "base64-js": "^1.3.1",
-        "ieee754": "^1.2.1"
-      }
-    },
-    "node_modules/archiver-utils/node_modules/glob": {
-      "version": "10.5.0",
-      "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz",
-      "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==",
-      "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me",
-      "license": "ISC",
-      "dependencies": {
-        "foreground-child": "^3.1.0",
-        "jackspeak": "^3.1.2",
-        "minimatch": "^9.0.4",
-        "minipass": "^7.1.2",
-        "package-json-from-dist": "^1.0.0",
-        "path-scurry": "^1.11.1"
-      },
-      "bin": {
-        "glob": "dist/esm/bin.mjs"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/archiver-utils/node_modules/minimatch": {
-      "version": "9.0.9",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.9.tgz",
-      "integrity": "sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==",
-      "license": "ISC",
-      "dependencies": {
-        "brace-expansion": "^2.0.2"
-      },
-      "engines": {
-        "node": ">=16 || 14 >=14.17"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/archiver-utils/node_modules/path-scurry": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz",
-      "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==",
-      "license": "BlueOak-1.0.0",
-      "dependencies": {
-        "lru-cache": "^10.2.0",
-        "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
-      },
-      "engines": {
-        "node": ">=16 || 14 >=14.18"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/archiver-utils/node_modules/readable-stream": {
-      "version": "4.7.0",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.7.0.tgz",
-      "integrity": "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==",
-      "license": "MIT",
-      "dependencies": {
-        "abort-controller": "^3.0.0",
-        "buffer": "^6.0.3",
-        "events": "^3.3.0",
-        "process": "^0.11.10",
-        "string_decoder": "^1.3.0"
-      },
-      "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
-      }
-    },
-    "node_modules/archiver/node_modules/buffer": {
-      "version": "6.0.3",
-      "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
-      "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "base64-js": "^1.3.1",
-        "ieee754": "^1.2.1"
-      }
-    },
-    "node_modules/archiver/node_modules/readable-stream": {
-      "version": "4.7.0",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.7.0.tgz",
-      "integrity": "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==",
-      "license": "MIT",
-      "dependencies": {
-        "abort-controller": "^3.0.0",
-        "buffer": "^6.0.3",
-        "events": "^3.3.0",
-        "process": "^0.11.10",
-        "string_decoder": "^1.3.0"
-      },
-      "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
-      }
-    },
-    "node_modules/archiver/node_modules/tar-stream": {
-      "version": "3.1.8",
-      "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.8.tgz",
-      "integrity": "sha512-U6QpVRyCGHva435KoNWy9PRoi2IFYCgtEhq9nmrPPpbRacPs9IH4aJ3gbrFC8dPcXvdSZ4XXfXT5Fshbp2MtlQ==",
-      "license": "MIT",
-      "dependencies": {
-        "b4a": "^1.6.4",
-        "bare-fs": "^4.5.5",
-        "fast-fifo": "^1.2.0",
-        "streamx": "^2.15.0"
-      }
-    },
-    "node_modules/aria-query": {
-      "version": "5.3.2",
-      "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz",
-      "integrity": "sha512-COROpnaoap1E2F000S62r6A60uHZnmlvomhfyT2DlTcrY1OrBKn2UhH7qn5wTC9zMvD0AY7csdPSNwKP+7WiQw==",
-      "license": "Apache-2.0",
-      "engines": {
-        "node": ">= 0.4"
-      }
-    },
     "node_modules/arr-union": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz",
@@ -775,52 +119,6 @@
       "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==",
       "license": "MIT"
     },
-    "node_modules/ast-types": {
-      "version": "0.13.4",
-      "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz",
-      "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==",
-      "license": "MIT",
-      "dependencies": {
-        "tslib": "^2.0.1"
-      },
-      "engines": {
-        "node": ">=4"
-      }
-    },
-    "node_modules/async": {
-      "version": "3.2.6",
-      "resolved": "https://registry.npmjs.org/async/-/async-3.2.6.tgz",
-      "integrity": "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==",
-      "license": "MIT"
-    },
-    "node_modules/asyncbox": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/asyncbox/-/asyncbox-3.0.0.tgz",
-      "integrity": "sha512-X7U0nedUMKV3nn9c4R0Zgvdvv6cw97tbDlHSZicq1snGPi/oX9DgGmFSURWtxDdnBWd3V0YviKhqAYAVvoWQ/A==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "bluebird": "^3.5.1",
-        "lodash": "^4.17.4",
-        "source-map-support": "^0.x"
-      },
-      "engines": {
-        "node": ">=16"
-      }
-    },
-    "node_modules/b4a": {
-      "version": "1.8.0",
-      "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.8.0.tgz",
-      "integrity": "sha512-qRuSmNSkGQaHwNbM7J78Wwy+ghLEYF1zNrSeMxj4Kgw6y33O3mXcQ6Ie9fRvfU/YnxWkOchPXbaLb73TkIsfdg==",
-      "license": "Apache-2.0",
-      "peerDependencies": {
-        "react-native-b4a": "*"
-      },
-      "peerDependenciesMeta": {
-        "react-native-b4a": {
-          "optional": true
-        }
-      }
-    },
     "node_modules/balanced-match": {
       "version": "4.0.4",
       "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
@@ -830,97 +128,6 @@
         "node": "18 || 20 || >=22"
       }
     },
-    "node_modules/bare-events": {
-      "version": "2.8.2",
-      "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz",
-      "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==",
-      "license": "Apache-2.0",
-      "peerDependencies": {
-        "bare-abort-controller": "*"
-      },
-      "peerDependenciesMeta": {
-        "bare-abort-controller": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/bare-fs": {
-      "version": "4.7.0",
-      "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.7.0.tgz",
-      "integrity": "sha512-xzqKsCFxAek9aezYhjJuJRXBIaYlg/0OGDTZp+T8eYmYMlm66cs6cYko02drIyjN2CBbi+I6L7YfXyqpqtKRXA==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "bare-events": "^2.5.4",
-        "bare-path": "^3.0.0",
-        "bare-stream": "^2.6.4",
-        "bare-url": "^2.2.2",
-        "fast-fifo": "^1.3.2"
-      },
-      "engines": {
-        "bare": ">=1.16.0"
-      },
-      "peerDependencies": {
-        "bare-buffer": "*"
-      },
-      "peerDependenciesMeta": {
-        "bare-buffer": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/bare-os": {
-      "version": "3.8.7",
-      "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.8.7.tgz",
-      "integrity": "sha512-G4Gr1UsGeEy2qtDTZwL7JFLo2wapUarz7iTMcYcMFdS89AIQuBoyjgXZz0Utv7uHs3xA9LckhVbeBi8lEQrC+w==",
-      "license": "Apache-2.0",
-      "engines": {
-        "bare": ">=1.14.0"
-      }
-    },
-    "node_modules/bare-path": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
-      "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "bare-os": "^3.0.1"
-      }
-    },
-    "node_modules/bare-stream": {
-      "version": "2.13.0",
-      "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.13.0.tgz",
-      "integrity": "sha512-3zAJRZMDFGjdn+RVnNpF9kuELw+0Fl3lpndM4NcEOhb9zwtSo/deETfuIwMSE5BXanA0FrN1qVjffGwAg2Y7EA==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "streamx": "^2.25.0",
-        "teex": "^1.0.1"
-      },
-      "peerDependencies": {
-        "bare-abort-controller": "*",
-        "bare-buffer": "*",
-        "bare-events": "*"
-      },
-      "peerDependenciesMeta": {
-        "bare-abort-controller": {
-          "optional": true
-        },
-        "bare-buffer": {
-          "optional": true
-        },
-        "bare-events": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/bare-url": {
-      "version": "2.4.0",
-      "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.4.0.tgz",
-      "integrity": "sha512-NSTU5WN+fy/L0DDenfE8SXQna4voXuW0FHM7wH8i3/q9khUSchfPbPezO4zSFMnDGIf9YE+mt/RWhZgNRKRIXA==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "bare-path": "^3.0.0"
-      }
-    },
     "node_modules/base64-js": {
       "version": "1.5.1",
       "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
@@ -953,15 +160,6 @@
         "node": ">=6.0.0"
       }
     },
-    "node_modules/basic-ftp": {
-      "version": "5.2.2",
-      "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.2.2.tgz",
-      "integrity": "sha512-1tDrzKsdCg70WGvbFss/ulVAxupNauGnOlgpyjKzeQxzyllBLS0CGLV7tjIXTK3ZQA9/FBEm9qyFFN1bciA6pw==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=10.0.0"
-      }
-    },
     "node_modules/better-sqlite3": {
       "version": "12.9.0",
       "resolved": "https://registry.npmjs.org/better-sqlite3/-/better-sqlite3-12.9.0.tgz",
@@ -1002,12 +200,6 @@
         "readable-stream": "^3.4.0"
       }
     },
-    "node_modules/bluebird": {
-      "version": "3.7.2",
-      "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.7.2.tgz",
-      "integrity": "sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==",
-      "license": "MIT"
-    },
     "node_modules/body-parser": {
       "version": "1.20.4",
       "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.4.tgz",
@@ -1032,12 +224,6 @@
         "npm": "1.2.8000 || >= 1.4.16"
       }
     },
-    "node_modules/boolbase": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
-      "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==",
-      "license": "ISC"
-    },
     "node_modules/brace-expansion": {
       "version": "5.0.5",
       "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
@@ -1107,21 +293,6 @@
         "ieee754": "^1.1.13"
       }
     },
-    "node_modules/buffer-crc32": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-1.0.0.tgz",
-      "integrity": "sha512-Db1SbgBS/fg/392AblrMJk97KggmvYhr4pB5ZIMTWtaivCPMWLkmb7m21cJvpvgK+J3nsU2CmmixNBZx4vFj/w==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8.0.0"
-      }
-    },
-    "node_modules/buffer-from": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz",
-      "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
-      "license": "MIT"
-    },
     "node_modules/bytes": {
       "version": "3.1.2",
       "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
@@ -1217,101 +388,12 @@
       ],
       "license": "CC-BY-4.0"
     },
-    "node_modules/chalk": {
-      "version": "5.6.2",
-      "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz",
-      "integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==",
-      "license": "MIT",
-      "engines": {
-        "node": "^12.17.0 || ^14.13 || >=16.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/chalk?sponsor=1"
-      }
-    },
-    "node_modules/cheerio": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.2.0.tgz",
-      "integrity": "sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg==",
-      "license": "MIT",
-      "dependencies": {
-        "cheerio-select": "^2.1.0",
-        "dom-serializer": "^2.0.0",
-        "domhandler": "^5.0.3",
-        "domutils": "^3.2.2",
-        "encoding-sniffer": "^0.2.1",
-        "htmlparser2": "^10.1.0",
-        "parse5": "^7.3.0",
-        "parse5-htmlparser2-tree-adapter": "^7.1.0",
-        "parse5-parser-stream": "^7.1.2",
-        "undici": "^7.19.0",
-        "whatwg-mimetype": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=20.18.1"
-      },
-      "funding": {
-        "url": "https://github.com/cheeriojs/cheerio?sponsor=1"
-      }
-    },
-    "node_modules/cheerio-select": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
-      "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
-      "license": "BSD-2-Clause",
-      "dependencies": {
-        "boolbase": "^1.0.0",
-        "css-select": "^5.1.0",
-        "css-what": "^6.1.0",
-        "domelementtype": "^2.3.0",
-        "domhandler": "^5.0.3",
-        "domutils": "^3.0.1"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/fb55"
-      }
-    },
     "node_modules/chownr": {
       "version": "1.1.4",
       "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
       "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==",
       "license": "ISC"
     },
-    "node_modules/cliui": {
-      "version": "8.0.1",
-      "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
-      "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
-      "license": "ISC",
-      "dependencies": {
-        "string-width": "^4.2.0",
-        "strip-ansi": "^6.0.1",
-        "wrap-ansi": "^7.0.0"
-      },
-      "engines": {
-        "node": ">=12"
-      }
-    },
-    "node_modules/cliui/node_modules/ansi-regex": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
-      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/cliui/node_modules/strip-ansi": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
-      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
-      "license": "MIT",
-      "dependencies": {
-        "ansi-regex": "^5.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/clone-deep": {
       "version": "0.2.4",
       "resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz",
@@ -1328,24 +410,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/color-convert": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
-      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
-      "license": "MIT",
-      "dependencies": {
-        "color-name": "~1.1.4"
-      },
-      "engines": {
-        "node": ">=7.0.0"
-      }
-    },
-    "node_modules/color-name": {
-      "version": "1.1.4",
-      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
-      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
-      "license": "MIT"
-    },
     "node_modules/commander": {
       "version": "14.0.3",
       "resolved": "https://registry.npmjs.org/commander/-/commander-14.0.3.tgz",
@@ -1355,74 +419,12 @@
         "node": ">=20"
       }
     },
-    "node_modules/compress-commons": {
-      "version": "6.0.2",
-      "resolved": "https://registry.npmjs.org/compress-commons/-/compress-commons-6.0.2.tgz",
-      "integrity": "sha512-6FqVXeETqWPoGcfzrXb37E50NP0LXT8kAMu5ooZayhWWdgEY4lBEEcbQNXtkuKQsGduxiIcI4gOTsxTmuq/bSg==",
-      "license": "MIT",
-      "dependencies": {
-        "crc-32": "^1.2.0",
-        "crc32-stream": "^6.0.0",
-        "is-stream": "^2.0.1",
-        "normalize-path": "^3.0.0",
-        "readable-stream": "^4.0.0"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/compress-commons/node_modules/buffer": {
-      "version": "6.0.3",
-      "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
-      "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "base64-js": "^1.3.1",
-        "ieee754": "^1.2.1"
-      }
-    },
-    "node_modules/compress-commons/node_modules/readable-stream": {
-      "version": "4.7.0",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.7.0.tgz",
-      "integrity": "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==",
-      "license": "MIT",
-      "dependencies": {
-        "abort-controller": "^3.0.0",
-        "buffer": "^6.0.3",
-        "events": "^3.3.0",
-        "process": "^0.11.10",
-        "string_decoder": "^1.3.0"
-      },
-      "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
-      }
-    },
     "node_modules/concat-map": {
       "version": "0.0.1",
       "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
       "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
       "license": "MIT"
     },
-    "node_modules/console-control-strings": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/console-control-strings/-/console-control-strings-1.1.0.tgz",
-      "integrity": "sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==",
-      "license": "ISC"
-    },
     "node_modules/content-disposition": {
       "version": "0.5.4",
       "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz",
@@ -1459,160 +461,6 @@
       "integrity": "sha512-NXdYc3dLr47pBkpUCHtKSwIOQXLVn8dZEuywboCOJY/osA0wFSLlSawr3KN8qXJEyX66FcONTH8EIlVuK0yyFA==",
       "license": "MIT"
     },
-    "node_modules/core-util-is": {
-      "version": "1.0.3",
-      "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz",
-      "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==",
-      "license": "MIT"
-    },
-    "node_modules/crc-32": {
-      "version": "1.2.2",
-      "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz",
-      "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==",
-      "license": "Apache-2.0",
-      "bin": {
-        "crc32": "bin/crc32.njs"
-      },
-      "engines": {
-        "node": ">=0.8"
-      }
-    },
-    "node_modules/crc32-stream": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/crc32-stream/-/crc32-stream-6.0.0.tgz",
-      "integrity": "sha512-piICUB6ei4IlTv1+653yq5+KoqfBYmj9bw6LqXoOneTMDXk5nM1qt12mFW1caG3LlJXEKW1Bp0WggEmIfQB34g==",
-      "license": "MIT",
-      "dependencies": {
-        "crc-32": "^1.2.0",
-        "readable-stream": "^4.0.0"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/crc32-stream/node_modules/buffer": {
-      "version": "6.0.3",
-      "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
-      "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "base64-js": "^1.3.1",
-        "ieee754": "^1.2.1"
-      }
-    },
-    "node_modules/crc32-stream/node_modules/readable-stream": {
-      "version": "4.7.0",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.7.0.tgz",
-      "integrity": "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==",
-      "license": "MIT",
-      "dependencies": {
-        "abort-controller": "^3.0.0",
-        "buffer": "^6.0.3",
-        "events": "^3.3.0",
-        "process": "^0.11.10",
-        "string_decoder": "^1.3.0"
-      },
-      "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
-      }
-    },
-    "node_modules/cross-spawn": {
-      "version": "7.0.6",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
-      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
-      "license": "MIT",
-      "dependencies": {
-        "path-key": "^3.1.0",
-        "shebang-command": "^2.0.0",
-        "which": "^2.0.1"
-      },
-      "engines": {
-        "node": ">= 8"
-      }
-    },
-    "node_modules/cross-spawn/node_modules/isexe": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
-      "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
-      "license": "ISC"
-    },
-    "node_modules/cross-spawn/node_modules/which": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
-      "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
-      "license": "ISC",
-      "dependencies": {
-        "isexe": "^2.0.0"
-      },
-      "bin": {
-        "node-which": "bin/node-which"
-      },
-      "engines": {
-        "node": ">= 8"
-      }
-    },
-    "node_modules/css-select": {
-      "version": "5.2.2",
-      "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz",
-      "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==",
-      "license": "BSD-2-Clause",
-      "dependencies": {
-        "boolbase": "^1.0.0",
-        "css-what": "^6.1.0",
-        "domhandler": "^5.0.2",
-        "domutils": "^3.0.1",
-        "nth-check": "^2.0.1"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/fb55"
-      }
-    },
-    "node_modules/css-shorthand-properties": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/css-shorthand-properties/-/css-shorthand-properties-1.1.2.tgz",
-      "integrity": "sha512-C2AugXIpRGQTxaCW0N7n5jD/p5irUmCrwl03TrnMFBHDbdq44CFWR2zO7rK9xPN4Eo3pUxC4vQzQgbIpzrD1PQ==",
-      "license": "MIT"
-    },
-    "node_modules/css-value": {
-      "version": "0.0.1",
-      "resolved": "https://registry.npmjs.org/css-value/-/css-value-0.0.1.tgz",
-      "integrity": "sha512-FUV3xaJ63buRLgHrLQVlVgQnQdR4yqdLGaDu7g8CQcWjInDfM9plBTPI9FRfpahju1UBSaMckeb2/46ApS/V1Q=="
-    },
-    "node_modules/css-what": {
-      "version": "6.2.2",
-      "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz",
-      "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==",
-      "license": "BSD-2-Clause",
-      "engines": {
-        "node": ">= 6"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/fb55"
-      }
-    },
-    "node_modules/data-uri-to-buffer": {
-      "version": "6.0.2",
-      "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
-      "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 14"
-      }
-    },
     "node_modules/debug": {
       "version": "2.6.9",
       "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
@@ -1622,18 +470,6 @@
         "ms": "2.0.0"
       }
     },
-    "node_modules/decamelize": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-6.0.1.tgz",
-      "integrity": "sha512-G7Cqgaelq68XHJNGlZ7lrNQyhZGsFqpwtGFexqUv4IQdjKoSYF7ipZ9UuTJZUSQXFj/XaoBLuEVIVqr8EJngEQ==",
-      "license": "MIT",
-      "engines": {
-        "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/decompress-response": {
       "version": "6.0.0",
       "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
@@ -1667,29 +503,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/deepmerge-ts": {
-      "version": "7.1.5",
-      "resolved": "https://registry.npmjs.org/deepmerge-ts/-/deepmerge-ts-7.1.5.tgz",
-      "integrity": "sha512-HOJkrhaYsweh+W+e74Yn7YStZOilkoPb6fycpwNLKzSPtruFs48nYis0zy5yJz1+ktUhHxoRDJ27RQAWLIJVJw==",
-      "license": "BSD-3-Clause",
-      "engines": {
-        "node": ">=16.0.0"
-      }
-    },
-    "node_modules/degenerator": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz",
-      "integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==",
-      "license": "MIT",
-      "dependencies": {
-        "ast-types": "^0.13.4",
-        "escodegen": "^2.1.0",
-        "esprima": "^4.0.1"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
     "node_modules/depd": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
@@ -1738,61 +551,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/dom-serializer": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
-      "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
-      "license": "MIT",
-      "dependencies": {
-        "domelementtype": "^2.3.0",
-        "domhandler": "^5.0.2",
-        "entities": "^4.2.0"
-      },
-      "funding": {
-        "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
-      }
-    },
-    "node_modules/domelementtype": {
-      "version": "2.3.0",
-      "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
-      "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/fb55"
-        }
-      ],
-      "license": "BSD-2-Clause"
-    },
-    "node_modules/domhandler": {
-      "version": "5.0.3",
-      "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
-      "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
-      "license": "BSD-2-Clause",
-      "dependencies": {
-        "domelementtype": "^2.3.0"
-      },
-      "engines": {
-        "node": ">= 4"
-      },
-      "funding": {
-        "url": "https://github.com/fb55/domhandler?sponsor=1"
-      }
-    },
-    "node_modules/domutils": {
-      "version": "3.2.2",
-      "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz",
-      "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==",
-      "license": "BSD-2-Clause",
-      "dependencies": {
-        "dom-serializer": "^2.0.0",
-        "domelementtype": "^2.3.0",
-        "domhandler": "^5.0.3"
-      },
-      "funding": {
-        "url": "https://github.com/fb55/domutils?sponsor=1"
-      }
-    },
     "node_modules/dot-prop": {
       "version": "6.0.1",
       "resolved": "https://registry.npmjs.org/dot-prop/-/dot-prop-6.0.1.tgz",
@@ -1822,96 +580,6 @@
         "node": ">= 0.4"
       }
     },
-    "node_modules/eastasianwidth": {
-      "version": "0.2.0",
-      "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz",
-      "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==",
-      "license": "MIT"
-    },
-    "node_modules/edge-paths": {
-      "version": "3.0.5",
-      "resolved": "https://registry.npmjs.org/edge-paths/-/edge-paths-3.0.5.tgz",
-      "integrity": "sha512-sB7vSrDnFa4ezWQk9nZ/n0FdpdUuC6R1EOrlU3DL+bovcNFK28rqu2emmAUjujYEJTWIgQGqgVVWUZXMnc8iWg==",
-      "license": "MIT",
-      "dependencies": {
-        "@types/which": "^2.0.1",
-        "which": "^2.0.2"
-      },
-      "engines": {
-        "node": ">=14.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/shirshak55"
-      }
-    },
-    "node_modules/edge-paths/node_modules/isexe": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
-      "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
-      "license": "ISC"
-    },
-    "node_modules/edge-paths/node_modules/which": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
-      "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
-      "license": "ISC",
-      "dependencies": {
-        "isexe": "^2.0.0"
-      },
-      "bin": {
-        "node-which": "bin/node-which"
-      },
-      "engines": {
-        "node": ">= 8"
-      }
-    },
-    "node_modules/edgedriver": {
-      "version": "6.3.0",
-      "resolved": "https://registry.npmjs.org/edgedriver/-/edgedriver-6.3.0.tgz",
-      "integrity": "sha512-ggEQL+oEyIcM4nP2QC3AtCQ04o4kDNefRM3hja0odvlPSnsaxiruMxEZ93v3gDCKWYW6BXUr51PPradb+3nffw==",
-      "hasInstallScript": true,
-      "license": "MIT",
-      "dependencies": {
-        "@wdio/logger": "^9.18.0",
-        "@zip.js/zip.js": "^2.8.11",
-        "decamelize": "^6.0.1",
-        "edge-paths": "^3.0.5",
-        "fast-xml-parser": "^5.3.3",
-        "http-proxy-agent": "^7.0.2",
-        "https-proxy-agent": "^7.0.6",
-        "which": "^6.0.0"
-      },
-      "bin": {
-        "edgedriver": "bin/edgedriver.js"
-      },
-      "engines": {
-        "node": ">=20.0.0"
-      }
-    },
-    "node_modules/edgedriver/node_modules/isexe": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/isexe/-/isexe-4.0.0.tgz",
-      "integrity": "sha512-FFUtZMpoZ8RqHS3XeXEmHWLA4thH+ZxCv2lOiPIn1Xc7CxrqhWzNSDzD+/chS/zbYezmiwWLdQC09JdQKmthOw==",
-      "license": "BlueOak-1.0.0",
-      "engines": {
-        "node": ">=20"
-      }
-    },
-    "node_modules/edgedriver/node_modules/which": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/which/-/which-6.0.1.tgz",
-      "integrity": "sha512-oGLe46MIrCRqX7ytPUf66EAYvdeMIZYn3WaocqqKZAxrBpkqHfL/qvTyJ/bTk5+AqHCjXmrv3CEWgy368zhRUg==",
-      "license": "ISC",
-      "dependencies": {
-        "isexe": "^4.0.0"
-      },
-      "bin": {
-        "node-which": "bin/which.js"
-      },
-      "engines": {
-        "node": "^20.17.0 || >=22.9.0"
-      }
-    },
     "node_modules/ee-first": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
@@ -1924,12 +592,6 @@
       "integrity": "sha512-q9n5T4BR4Xwa2cwbrwcsDJtHD/enpQ5S1xF1IAtdqf5AAgqDFmR/aakqH3ChFdqd/QXJhS3rnnXFtexU7rax6Q==",
       "license": "ISC"
     },
-    "node_modules/emoji-regex": {
-      "version": "8.0.0",
-      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
-      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
-      "license": "MIT"
-    },
     "node_modules/encodeurl": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
@@ -1939,31 +601,6 @@
         "node": ">= 0.8"
       }
     },
-    "node_modules/encoding-sniffer": {
-      "version": "0.2.1",
-      "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz",
-      "integrity": "sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==",
-      "license": "MIT",
-      "dependencies": {
-        "iconv-lite": "^0.6.3",
-        "whatwg-encoding": "^3.1.1"
-      },
-      "funding": {
-        "url": "https://github.com/fb55/encoding-sniffer?sponsor=1"
-      }
-    },
-    "node_modules/encoding-sniffer/node_modules/iconv-lite": {
-      "version": "0.6.3",
-      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
-      "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
-      "license": "MIT",
-      "dependencies": {
-        "safer-buffer": ">= 2.1.2 < 3.0.0"
-      },
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
     "node_modules/end-of-stream": {
       "version": "1.4.5",
       "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
@@ -1973,18 +610,6 @@
         "once": "^1.4.0"
       }
     },
-    "node_modules/entities": {
-      "version": "4.5.0",
-      "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
-      "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
-      "license": "BSD-2-Clause",
-      "engines": {
-        "node": ">=0.12"
-      },
-      "funding": {
-        "url": "https://github.com/fb55/entities?sponsor=1"
-      }
-    },
     "node_modules/es-define-property": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
@@ -2030,58 +655,6 @@
       "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==",
       "license": "MIT"
     },
-    "node_modules/escodegen": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz",
-      "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==",
-      "license": "BSD-2-Clause",
-      "dependencies": {
-        "esprima": "^4.0.1",
-        "estraverse": "^5.2.0",
-        "esutils": "^2.0.2"
-      },
-      "bin": {
-        "escodegen": "bin/escodegen.js",
-        "esgenerate": "bin/esgenerate.js"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "optionalDependencies": {
-        "source-map": "~0.6.1"
-      }
-    },
-    "node_modules/esprima": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
-      "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==",
-      "license": "BSD-2-Clause",
-      "bin": {
-        "esparse": "bin/esparse.js",
-        "esvalidate": "bin/esvalidate.js"
-      },
-      "engines": {
-        "node": ">=4"
-      }
-    },
-    "node_modules/estraverse": {
-      "version": "5.3.0",
-      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
-      "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
-      "license": "BSD-2-Clause",
-      "engines": {
-        "node": ">=4.0"
-      }
-    },
-    "node_modules/esutils": {
-      "version": "2.0.3",
-      "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
-      "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==",
-      "license": "BSD-2-Clause",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
     "node_modules/etag": {
       "version": "1.8.1",
       "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
@@ -2091,33 +664,6 @@
         "node": ">= 0.6"
       }
     },
-    "node_modules/event-target-shim": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
-      "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=6"
-      }
-    },
-    "node_modules/events": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz",
-      "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.8.x"
-      }
-    },
-    "node_modules/events-universal": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz",
-      "integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "bare-events": "^2.7.0"
-      }
-    },
     "node_modules/expand-template": {
       "version": "2.0.3",
       "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
@@ -2173,105 +719,6 @@
         "url": "https://opencollective.com/express"
       }
     },
-    "node_modules/extract-zip": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz",
-      "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==",
-      "license": "BSD-2-Clause",
-      "dependencies": {
-        "debug": "^4.1.1",
-        "get-stream": "^5.1.0",
-        "yauzl": "^2.10.0"
-      },
-      "bin": {
-        "extract-zip": "cli.js"
-      },
-      "engines": {
-        "node": ">= 10.17.0"
-      },
-      "optionalDependencies": {
-        "@types/yauzl": "^2.9.1"
-      }
-    },
-    "node_modules/extract-zip/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/extract-zip/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/fast-deep-equal": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz",
-      "integrity": "sha512-bCK/2Z4zLidyB4ReuIsvALH6w31YfAQDmXMqMx6FyfHqvBxtjC0eRumeSu4Bs3XtXwpyIywtSTrVT99BxY1f9w==",
-      "license": "MIT"
-    },
-    "node_modules/fast-fifo": {
-      "version": "1.3.2",
-      "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz",
-      "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==",
-      "license": "MIT"
-    },
-    "node_modules/fast-xml-builder": {
-      "version": "1.1.4",
-      "resolved": "https://registry.npmjs.org/fast-xml-builder/-/fast-xml-builder-1.1.4.tgz",
-      "integrity": "sha512-f2jhpN4Eccy0/Uz9csxh3Nu6q4ErKxf0XIsasomfOihuSUa3/xw6w8dnOtCDgEItQFJG8KyXPzQXzcODDrrbOg==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/NaturalIntelligence"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "path-expression-matcher": "^1.1.3"
-      }
-    },
-    "node_modules/fast-xml-parser": {
-      "version": "5.5.11",
-      "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-5.5.11.tgz",
-      "integrity": "sha512-QL0eb0YbSTVWF6tTf1+LEMSgtCEjBYPpnAjoLC8SscESlAjXEIRJ7cHtLG0pLeDFaZLa4VKZLArtA/60ZS7vyA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/NaturalIntelligence"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "fast-xml-builder": "^1.1.4",
-        "path-expression-matcher": "^1.4.0",
-        "strnum": "^2.2.3"
-      },
-      "bin": {
-        "fxparser": "src/cli/cli.js"
-      }
-    },
-    "node_modules/fd-slicer": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz",
-      "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==",
-      "license": "MIT",
-      "dependencies": {
-        "pend": "~1.2.0"
-      }
-    },
     "node_modules/file-uri-to-path": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
@@ -2331,22 +778,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/foreground-child": {
-      "version": "3.3.1",
-      "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz",
-      "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==",
-      "license": "ISC",
-      "dependencies": {
-        "cross-spawn": "^7.0.6",
-        "signal-exit": "^4.0.1"
-      },
-      "engines": {
-        "node": ">=14"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
     "node_modules/forwarded": {
       "version": "0.2.0",
       "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
@@ -2414,27 +845,6 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/geckodriver": {
-      "version": "6.1.0",
-      "resolved": "https://registry.npmjs.org/geckodriver/-/geckodriver-6.1.0.tgz",
-      "integrity": "sha512-ZRXLa4ZaYTTgUO4Eefw+RsQCleugU2QLb1ME7qTYxxuRj51yAhfnXaItXNs5/vUzfIaDHuZ+YnSF005hfp07nQ==",
-      "hasInstallScript": true,
-      "license": "MIT",
-      "dependencies": {
-        "@wdio/logger": "^9.18.0",
-        "@zip.js/zip.js": "^2.8.11",
-        "decamelize": "^6.0.1",
-        "http-proxy-agent": "^7.0.2",
-        "https-proxy-agent": "^7.0.6",
-        "modern-tar": "^0.7.2"
-      },
-      "bin": {
-        "geckodriver": "bin/geckodriver.js"
-      },
-      "engines": {
-        "node": ">=20.0.0"
-      }
-    },
     "node_modules/generative-bayesian-network": {
       "version": "2.1.82",
       "resolved": "https://registry.npmjs.org/generative-bayesian-network/-/generative-bayesian-network-2.1.82.tgz",
@@ -2445,15 +855,6 @@
         "tslib": "^2.4.0"
       }
     },
-    "node_modules/get-caller-file": {
-      "version": "2.0.5",
-      "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
-      "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
-      "license": "ISC",
-      "engines": {
-        "node": "6.* || 8.* || >= 10.*"
-      }
-    },
     "node_modules/get-intrinsic": {
       "version": "1.3.0",
       "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
@@ -2478,18 +879,6 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/get-port": {
-      "version": "7.2.0",
-      "resolved": "https://registry.npmjs.org/get-port/-/get-port-7.2.0.tgz",
-      "integrity": "sha512-afP4W205ONCuMoPBqcR6PSXnzX35KTcJygfJfcp+QY+uwm3p20p1YczWXhlICIzGMCxYBQcySEcOgsJcrkyobg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=16"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/get-proto": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
@@ -2503,58 +892,6 @@
         "node": ">= 0.4"
       }
     },
-    "node_modules/get-stream": {
-      "version": "5.2.0",
-      "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz",
-      "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==",
-      "license": "MIT",
-      "dependencies": {
-        "pump": "^3.0.0"
-      },
-      "engines": {
-        "node": ">=8"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/get-uri": {
-      "version": "6.0.5",
-      "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.5.tgz",
-      "integrity": "sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==",
-      "license": "MIT",
-      "dependencies": {
-        "basic-ftp": "^5.0.2",
-        "data-uri-to-buffer": "^6.0.2",
-        "debug": "^4.3.4"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/get-uri/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/get-uri/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
     "node_modules/github-from-package": {
       "version": "0.0.0",
       "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
@@ -2596,21 +933,6 @@
       "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
       "license": "ISC"
     },
-    "node_modules/grapheme-splitter": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz",
-      "integrity": "sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ==",
-      "license": "MIT"
-    },
-    "node_modules/has-flag": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
-      "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/has-symbols": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
@@ -2650,43 +972,6 @@
         "node": ">=16.0.0"
       }
     },
-    "node_modules/htmlfy": {
-      "version": "0.8.1",
-      "resolved": "https://registry.npmjs.org/htmlfy/-/htmlfy-0.8.1.tgz",
-      "integrity": "sha512-xWROBw9+MEGwxpotll0h672KCaLrKKiCYzsyN8ZgL9cQbVumFnyvsk2JqiB9ELAV1GLj1GG/jxZUjV9OZZi/yQ==",
-      "license": "MIT"
-    },
-    "node_modules/htmlparser2": {
-      "version": "10.1.0",
-      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz",
-      "integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==",
-      "funding": [
-        "https://github.com/fb55/htmlparser2?sponsor=1",
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/fb55"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "domelementtype": "^2.3.0",
-        "domhandler": "^5.0.3",
-        "domutils": "^3.2.2",
-        "entities": "^7.0.1"
-      }
-    },
-    "node_modules/htmlparser2/node_modules/entities": {
-      "version": "7.0.1",
-      "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz",
-      "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==",
-      "license": "BSD-2-Clause",
-      "engines": {
-        "node": ">=0.12"
-      },
-      "funding": {
-        "url": "https://github.com/fb55/entities?sponsor=1"
-      }
-    },
     "node_modules/http-errors": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz",
@@ -2707,78 +992,6 @@
         "url": "https://opencollective.com/express"
       }
     },
-    "node_modules/http-proxy-agent": {
-      "version": "7.0.2",
-      "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
-      "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
-      "license": "MIT",
-      "dependencies": {
-        "agent-base": "^7.1.0",
-        "debug": "^4.3.4"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/http-proxy-agent/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/http-proxy-agent/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/https-proxy-agent": {
-      "version": "7.0.6",
-      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
-      "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==",
-      "license": "MIT",
-      "dependencies": {
-        "agent-base": "^7.1.2",
-        "debug": "4"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/https-proxy-agent/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/https-proxy-agent/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
     "node_modules/iconv-lite": {
       "version": "0.4.24",
       "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
@@ -2811,12 +1024,6 @@
       ],
       "license": "BSD-3-Clause"
     },
-    "node_modules/immediate": {
-      "version": "3.0.6",
-      "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
-      "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==",
-      "license": "MIT"
-    },
     "node_modules/impit": {
       "version": "0.7.6",
       "resolved": "https://registry.npmjs.org/impit/-/impit-0.7.6.tgz",
@@ -2964,16 +1171,6 @@
         "node": ">= 10"
       }
     },
-    "node_modules/import-meta-resolve": {
-      "version": "4.2.0",
-      "resolved": "https://registry.npmjs.org/import-meta-resolve/-/import-meta-resolve-4.2.0.tgz",
-      "integrity": "sha512-Iqv2fzaTQN28s/FwZAoFq0ZSs/7hMAHJVX+w8PZl3cY19Pxk6jFFalxQoIfW2826i/fDLXv8IiEZRIT0lDuWcg==",
-      "license": "MIT",
-      "funding": {
-        "type": "github",
-        "url": "https://github.com/sponsors/wooorm"
-      }
-    },
     "node_modules/inflight": {
       "version": "1.0.6",
       "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
@@ -2997,15 +1194,6 @@
       "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
       "license": "ISC"
     },
-    "node_modules/ip-address": {
-      "version": "10.1.0",
-      "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz",
-      "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 12"
-      }
-    },
     "node_modules/ipaddr.js": {
       "version": "1.9.1",
       "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
@@ -3030,15 +1218,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/is-fullwidth-code-point": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
-      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/is-obj": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/is-obj/-/is-obj-2.0.0.tgz",
@@ -3048,18 +1227,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/is-plain-obj": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz",
-      "integrity": "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/is-plain-object": {
       "version": "2.0.4",
       "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz",
@@ -3092,33 +1259,6 @@
       ],
       "license": "MIT"
     },
-    "node_modules/is-stream": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz",
-      "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/isarray": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
-      "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
-      "license": "MIT"
-    },
-    "node_modules/isexe": {
-      "version": "3.1.5",
-      "resolved": "https://registry.npmjs.org/isexe/-/isexe-3.1.5.tgz",
-      "integrity": "sha512-6B3tLtFqtQS4ekarvLVMZ+X+VlvQekbe4taUkf/rhVO3d/h0M2rfARm/pXLcPEsjjMsFgrFgSrhQIxcSVrBz8w==",
-      "license": "BlueOak-1.0.0",
-      "engines": {
-        "node": ">=18"
-      }
-    },
     "node_modules/isobject": {
       "version": "3.0.1",
       "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
@@ -3128,30 +1268,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/jackspeak": {
-      "version": "3.4.3",
-      "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz",
-      "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==",
-      "license": "BlueOak-1.0.0",
-      "dependencies": {
-        "@isaacs/cliui": "^8.0.2"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      },
-      "optionalDependencies": {
-        "@pkgjs/parseargs": "^0.11.0"
-      }
-    },
-    "node_modules/jiti": {
-      "version": "2.6.1",
-      "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz",
-      "integrity": "sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==",
-      "license": "MIT",
-      "bin": {
-        "jiti": "lib/jiti-cli.mjs"
-      }
-    },
     "node_modules/jsonfile": {
       "version": "6.2.0",
       "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.0.tgz",
@@ -3164,48 +1280,6 @@
         "graceful-fs": "^4.1.6"
       }
     },
-    "node_modules/jszip": {
-      "version": "3.10.1",
-      "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz",
-      "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==",
-      "license": "(MIT OR GPL-3.0-or-later)",
-      "dependencies": {
-        "lie": "~3.3.0",
-        "pako": "~1.0.2",
-        "readable-stream": "~2.3.6",
-        "setimmediate": "^1.0.5"
-      }
-    },
-    "node_modules/jszip/node_modules/readable-stream": {
-      "version": "2.3.8",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz",
-      "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
-      "license": "MIT",
-      "dependencies": {
-        "core-util-is": "~1.0.0",
-        "inherits": "~2.0.3",
-        "isarray": "~1.0.0",
-        "process-nextick-args": "~2.0.0",
-        "safe-buffer": "~5.1.1",
-        "string_decoder": "~1.1.1",
-        "util-deprecate": "~1.0.1"
-      }
-    },
-    "node_modules/jszip/node_modules/safe-buffer": {
-      "version": "5.1.2",
-      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
-      "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
-      "license": "MIT"
-    },
-    "node_modules/jszip/node_modules/string_decoder": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
-      "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
-      "license": "MIT",
-      "dependencies": {
-        "safe-buffer": "~5.1.0"
-      }
-    },
     "node_modules/kind-of": {
       "version": "3.2.2",
       "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
@@ -3245,90 +1319,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/lazystream": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/lazystream/-/lazystream-1.0.1.tgz",
-      "integrity": "sha512-b94GiNHQNy6JNTrt5w6zNyffMrNkXZb3KTkCZJb2V1xaEGCk093vkZ2jk3tpaeP33/OiXC+WvK9AxUebnf5nbw==",
-      "license": "MIT",
-      "dependencies": {
-        "readable-stream": "^2.0.5"
-      },
-      "engines": {
-        "node": ">= 0.6.3"
-      }
-    },
-    "node_modules/lazystream/node_modules/readable-stream": {
-      "version": "2.3.8",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz",
-      "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
-      "license": "MIT",
-      "dependencies": {
-        "core-util-is": "~1.0.0",
-        "inherits": "~2.0.3",
-        "isarray": "~1.0.0",
-        "process-nextick-args": "~2.0.0",
-        "safe-buffer": "~5.1.1",
-        "string_decoder": "~1.1.1",
-        "util-deprecate": "~1.0.1"
-      }
-    },
-    "node_modules/lazystream/node_modules/safe-buffer": {
-      "version": "5.1.2",
-      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
-      "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
-      "license": "MIT"
-    },
-    "node_modules/lazystream/node_modules/string_decoder": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
-      "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
-      "license": "MIT",
-      "dependencies": {
-        "safe-buffer": "~5.1.0"
-      }
-    },
-    "node_modules/lie": {
-      "version": "3.3.0",
-      "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz",
-      "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==",
-      "license": "MIT",
-      "dependencies": {
-        "immediate": "~3.0.5"
-      }
-    },
-    "node_modules/locate-app": {
-      "version": "2.5.0",
-      "resolved": "https://registry.npmjs.org/locate-app/-/locate-app-2.5.0.tgz",
-      "integrity": "sha512-xIqbzPMBYArJRmPGUZD9CzV9wOqmVtQnaAn3wrj3s6WYW0bQvPI7x+sPYUGmDTYMHefVK//zc6HEYZ1qnxIK+Q==",
-      "funding": [
-        {
-          "type": "individual",
-          "url": "https://buymeacoffee.com/hejny"
-        },
-        {
-          "type": "github",
-          "url": "https://github.com/hejny/locate-app/blob/main/README.md#%EF%B8%8F-contributing"
-        }
-      ],
-      "license": "Apache-2.0",
-      "dependencies": {
-        "@promptbook/utils": "0.69.5",
-        "type-fest": "4.26.0",
-        "userhome": "1.0.1"
-      }
-    },
-    "node_modules/lodash": {
-      "version": "4.18.1",
-      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.18.1.tgz",
-      "integrity": "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==",
-      "license": "MIT"
-    },
-    "node_modules/lodash.clonedeep": {
-      "version": "4.5.0",
-      "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
-      "integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ==",
-      "license": "MIT"
-    },
     "node_modules/lodash.isequal": {
       "version": "4.5.0",
       "resolved": "https://registry.npmjs.org/lodash.isequal/-/lodash.isequal-4.5.0.tgz",
@@ -3336,37 +1326,6 @@
       "deprecated": "This package is deprecated. Use require('node:util').isDeepStrictEqual instead.",
       "license": "MIT"
     },
-    "node_modules/lodash.zip": {
-      "version": "4.2.0",
-      "resolved": "https://registry.npmjs.org/lodash.zip/-/lodash.zip-4.2.0.tgz",
-      "integrity": "sha512-C7IOaBBK/0gMORRBd8OETNx3kmOkgIWIPvyDpZSCTwUrpYmgZwJkjZeOD8ww4xbOUOs4/attY+pciKvadNfFbg==",
-      "license": "MIT"
-    },
-    "node_modules/loglevel": {
-      "version": "1.9.2",
-      "resolved": "https://registry.npmjs.org/loglevel/-/loglevel-1.9.2.tgz",
-      "integrity": "sha512-HgMmCqIJSAKqo68l0rS2AanEWfkxaZ5wNiEFb5ggm08lDs9Xl2KxBlX3PTcaD2chBM1gXAYf491/M2Rv8Jwayg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.6.0"
-      },
-      "funding": {
-        "type": "tidelift",
-        "url": "https://tidelift.com/funding/github/npm/loglevel"
-      }
-    },
-    "node_modules/loglevel-plugin-prefix": {
-      "version": "0.8.4",
-      "resolved": "https://registry.npmjs.org/loglevel-plugin-prefix/-/loglevel-plugin-prefix-0.8.4.tgz",
-      "integrity": "sha512-WpG9CcFAOjz/FtNht+QJeGpvVl/cdR6P0z6OcXSkr8wFJOsV2GRj2j10JLfjuA4aYkcKCNIEqRGCyTife9R8/g==",
-      "license": "MIT"
-    },
-    "node_modules/lru-cache": {
-      "version": "10.4.3",
-      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz",
-      "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==",
-      "license": "ISC"
-    },
     "node_modules/math-intrinsics": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
@@ -3509,12 +1468,6 @@
         "node": ">=16 || 14 >=14.17"
       }
     },
-    "node_modules/mitt": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz",
-      "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==",
-      "license": "MIT"
-    },
     "node_modules/mixin-object": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/mixin-object/-/mixin-object-2.0.1.tgz",
@@ -3553,15 +1506,6 @@
         "npm": ">=6"
       }
     },
-    "node_modules/modern-tar": {
-      "version": "0.7.6",
-      "resolved": "https://registry.npmjs.org/modern-tar/-/modern-tar-0.7.6.tgz",
-      "integrity": "sha512-sweCIVXzx1aIGTCdzcMlSZt1h8k5Tmk08VNAuRk3IU28XamGiOH5ypi11g6De2CH7PhYqSSnGy2A/EFhbWnVKg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=18.0.0"
-      }
-    },
     "node_modules/ms": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
@@ -3583,15 +1527,6 @@
         "node": ">= 0.6"
       }
     },
-    "node_modules/netmask": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.1.1.tgz",
-      "integrity": "sha512-eonl3sLUha+S1GzTPxychyhnUzKyeQkZ7jLjKrBagJgPla13F+uQ71HgpFefyHgqrjEbCPkDArxYsjY8/+gLKA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.4.0"
-      }
-    },
     "node_modules/node-abi": {
       "version": "3.89.0",
       "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.89.0.tgz",
@@ -3610,49 +1545,6 @@
       "integrity": "sha512-1h5gKZCF+pO/o3Iqt5Jp7wc9rH3eJJ0+nh/CIoiRwjRxde/hAHyLPXYN4V3CqKAbiZPSeJFSWHmJsbkicta0Eg==",
       "license": "MIT"
     },
-    "node_modules/node-simctl": {
-      "version": "7.7.5",
-      "resolved": "https://registry.npmjs.org/node-simctl/-/node-simctl-7.7.5.tgz",
-      "integrity": "sha512-lWflzDW9xLuOOvR6mTJ9efbDtO/iSCH6rEGjxFxTV0vGgz5XjoZlW2BkNCCZib0B6Y23tCOiYhYJaMQYB8FKIQ==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "@appium/logger": "^1.3.0",
-        "asyncbox": "^3.0.0",
-        "bluebird": "^3.5.1",
-        "lodash": "^4.2.1",
-        "rimraf": "^5.0.0",
-        "semver": "^7.0.0",
-        "source-map-support": "^0.x",
-        "teen_process": "^2.2.0",
-        "uuid": "^11.0.1",
-        "which": "^5.0.0"
-      },
-      "engines": {
-        "node": ">=14",
-        "npm": ">=8"
-      }
-    },
-    "node_modules/normalize-path": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
-      "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/nth-check": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
-      "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
-      "license": "BSD-2-Clause",
-      "dependencies": {
-        "boolbase": "^1.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/fb55/nth-check?sponsor=1"
-      }
-    },
     "node_modules/object-inspect": {
       "version": "1.13.4",
       "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz",
@@ -3705,122 +1597,6 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/pac-proxy-agent": {
-      "version": "7.2.0",
-      "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
-      "integrity": "sha512-TEB8ESquiLMc0lV8vcd5Ql/JAKAoyzHFXaStwjkzpOpC5Yv+pIzLfHvjTSdf3vpa2bMiUQrg9i6276yn8666aA==",
-      "license": "MIT",
-      "dependencies": {
-        "@tootallnate/quickjs-emscripten": "^0.23.0",
-        "agent-base": "^7.1.2",
-        "debug": "^4.3.4",
-        "get-uri": "^6.0.1",
-        "http-proxy-agent": "^7.0.0",
-        "https-proxy-agent": "^7.0.6",
-        "pac-resolver": "^7.0.1",
-        "socks-proxy-agent": "^8.0.5"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/pac-proxy-agent/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/pac-proxy-agent/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/pac-resolver": {
-      "version": "7.0.1",
-      "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz",
-      "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==",
-      "license": "MIT",
-      "dependencies": {
-        "degenerator": "^5.0.0",
-        "netmask": "^2.0.2"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/package-json-from-dist": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz",
-      "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==",
-      "license": "BlueOak-1.0.0"
-    },
-    "node_modules/pako": {
-      "version": "1.0.11",
-      "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
-      "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==",
-      "license": "(MIT AND Zlib)"
-    },
-    "node_modules/parse5": {
-      "version": "7.3.0",
-      "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz",
-      "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==",
-      "license": "MIT",
-      "dependencies": {
-        "entities": "^6.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/inikulin/parse5?sponsor=1"
-      }
-    },
-    "node_modules/parse5-htmlparser2-tree-adapter": {
-      "version": "7.1.0",
-      "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz",
-      "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==",
-      "license": "MIT",
-      "dependencies": {
-        "domhandler": "^5.0.3",
-        "parse5": "^7.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/inikulin/parse5?sponsor=1"
-      }
-    },
-    "node_modules/parse5-parser-stream": {
-      "version": "7.1.2",
-      "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz",
-      "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==",
-      "license": "MIT",
-      "dependencies": {
-        "parse5": "^7.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/inikulin/parse5?sponsor=1"
-      }
-    },
-    "node_modules/parse5/node_modules/entities": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz",
-      "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==",
-      "license": "BSD-2-Clause",
-      "engines": {
-        "node": ">=0.12"
-      },
-      "funding": {
-        "url": "https://github.com/fb55/entities?sponsor=1"
-      }
-    },
     "node_modules/parseurl": {
       "version": "1.3.3",
       "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
@@ -3830,21 +1606,6 @@
         "node": ">= 0.8"
       }
     },
-    "node_modules/path-expression-matcher": {
-      "version": "1.5.0",
-      "resolved": "https://registry.npmjs.org/path-expression-matcher/-/path-expression-matcher-1.5.0.tgz",
-      "integrity": "sha512-cbrerZV+6rvdQrrD+iGMcZFEiiSrbv9Tfdkvnusy6y0x0GKBXREFg/Y65GhIfm0tnLntThhzCnfKwp1WRjeCyQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/NaturalIntelligence"
-        }
-      ],
-      "license": "MIT",
-      "engines": {
-        "node": ">=14.0.0"
-      }
-    },
     "node_modules/path-is-absolute": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
@@ -3854,15 +1615,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/path-key": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
-      "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/path-scurry": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.2.tgz",
@@ -3894,12 +1646,6 @@
       "integrity": "sha512-A/AGNMFN3c8bOlvV9RreMdrv7jsmF9XIfDeCd87+I8RNg6s78BhJxMu69NEMHBSJFxKidViTEdruRwEk/WIKqA==",
       "license": "MIT"
     },
-    "node_modules/pend": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
-      "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==",
-      "license": "MIT"
-    },
     "node_modules/picocolors": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
@@ -4010,21 +1756,6 @@
         "node": ">=10"
       }
     },
-    "node_modules/process": {
-      "version": "0.11.10",
-      "resolved": "https://registry.npmjs.org/process/-/process-0.11.10.tgz",
-      "integrity": "sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.6.0"
-      }
-    },
-    "node_modules/process-nextick-args": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
-      "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
-      "license": "MIT"
-    },
     "node_modules/progress": {
       "version": "2.0.3",
       "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
@@ -4060,63 +1791,6 @@
         "node": ">= 0.10"
       }
     },
-    "node_modules/proxy-agent": {
-      "version": "6.5.0",
-      "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.5.0.tgz",
-      "integrity": "sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==",
-      "license": "MIT",
-      "dependencies": {
-        "agent-base": "^7.1.2",
-        "debug": "^4.3.4",
-        "http-proxy-agent": "^7.0.1",
-        "https-proxy-agent": "^7.0.6",
-        "lru-cache": "^7.14.1",
-        "pac-proxy-agent": "^7.1.0",
-        "proxy-from-env": "^1.1.0",
-        "socks-proxy-agent": "^8.0.5"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/proxy-agent/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/proxy-agent/node_modules/lru-cache": {
-      "version": "7.18.3",
-      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz",
-      "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==",
-      "license": "ISC",
-      "engines": {
-        "node": ">=12"
-      }
-    },
-    "node_modules/proxy-agent/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/proxy-from-env": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
-      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
-      "license": "MIT"
-    },
     "node_modules/pump": {
       "version": "3.0.4",
       "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz",
@@ -4405,12 +2079,6 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/query-selector-shadow-dom": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/query-selector-shadow-dom/-/query-selector-shadow-dom-1.0.1.tgz",
-      "integrity": "sha512-lT5yCqEBgfoMYpf3F2xQRK7zEr1rhIIZuceDK6+xRkJQ4NMbHTwXqk4NkwDwQMNqXgG9r9fyHnzwNVs6zV5KRw==",
-      "license": "MIT"
-    },
     "node_modules/range-parser": {
       "version": "1.2.1",
       "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
@@ -4464,166 +2132,6 @@
         "node": ">= 6"
       }
     },
-    "node_modules/readdir-glob": {
-      "version": "1.1.3",
-      "resolved": "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.3.tgz",
-      "integrity": "sha512-v05I2k7xN8zXvPD9N+z/uhXPaj0sUFCe2rcWZIpBsqxfP7xXFQ0tipAd/wjj1YxWyWtUS5IDJpOG82JKt2EAVA==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "minimatch": "^5.1.0"
-      }
-    },
-    "node_modules/readdir-glob/node_modules/balanced-match": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-      "license": "MIT"
-    },
-    "node_modules/readdir-glob/node_modules/brace-expansion": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz",
-      "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==",
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0"
-      }
-    },
-    "node_modules/readdir-glob/node_modules/minimatch": {
-      "version": "5.1.9",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.9.tgz",
-      "integrity": "sha512-7o1wEA2RyMP7Iu7GNba9vc0RWWGACJOCZBJX2GJWip0ikV+wcOsgVuY9uE8CPiyQhkGFSlhuSkZPavN7u1c2Fw==",
-      "license": "ISC",
-      "dependencies": {
-        "brace-expansion": "^2.0.1"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/require-directory": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
-      "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/resq": {
-      "version": "1.11.0",
-      "resolved": "https://registry.npmjs.org/resq/-/resq-1.11.0.tgz",
-      "integrity": "sha512-G10EBz+zAAy3zUd/CDoBbXRL6ia9kOo3xRHrMDsHljI0GDkhYlyjwoCx5+3eCC4swi1uCoZQhskuJkj7Gp57Bw==",
-      "license": "MIT",
-      "dependencies": {
-        "fast-deep-equal": "^2.0.1"
-      }
-    },
-    "node_modules/ret": {
-      "version": "0.5.0",
-      "resolved": "https://registry.npmjs.org/ret/-/ret-0.5.0.tgz",
-      "integrity": "sha512-I1XxrZSQ+oErkRR4jYbAyEEu2I0avBvvMM5JN+6EBprOGRCs63ENqZ3vjavq8fBw2+62G5LF5XelKwuJpcvcxw==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/rgb2hex": {
-      "version": "0.2.5",
-      "resolved": "https://registry.npmjs.org/rgb2hex/-/rgb2hex-0.2.5.tgz",
-      "integrity": "sha512-22MOP1Rh7sAo1BZpDG6R5RFYzR2lYEgwq7HEmyW2qcsOqR2lQKmn+O//xV3YG/0rrhMC6KVX2hU+ZXuaw9a5bw==",
-      "license": "MIT"
-    },
-    "node_modules/rimraf": {
-      "version": "5.0.10",
-      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.10.tgz",
-      "integrity": "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==",
-      "license": "ISC",
-      "dependencies": {
-        "glob": "^10.3.7"
-      },
-      "bin": {
-        "rimraf": "dist/esm/bin.mjs"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/rimraf/node_modules/balanced-match": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-      "license": "MIT"
-    },
-    "node_modules/rimraf/node_modules/brace-expansion": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz",
-      "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==",
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0"
-      }
-    },
-    "node_modules/rimraf/node_modules/glob": {
-      "version": "10.5.0",
-      "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz",
-      "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==",
-      "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me",
-      "license": "ISC",
-      "dependencies": {
-        "foreground-child": "^3.1.0",
-        "jackspeak": "^3.1.2",
-        "minimatch": "^9.0.4",
-        "minipass": "^7.1.2",
-        "package-json-from-dist": "^1.0.0",
-        "path-scurry": "^1.11.1"
-      },
-      "bin": {
-        "glob": "dist/esm/bin.mjs"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/rimraf/node_modules/minimatch": {
-      "version": "9.0.9",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.9.tgz",
-      "integrity": "sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==",
-      "license": "ISC",
-      "dependencies": {
-        "brace-expansion": "^2.0.2"
-      },
-      "engines": {
-        "node": ">=16 || 14 >=14.17"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/rimraf/node_modules/path-scurry": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz",
-      "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==",
-      "license": "BlueOak-1.0.0",
-      "dependencies": {
-        "lru-cache": "^10.2.0",
-        "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
-      },
-      "engines": {
-        "node": ">=16 || 14 >=14.18"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/safaridriver": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/safaridriver/-/safaridriver-1.0.1.tgz",
-      "integrity": "sha512-jkg4434cYgtrIF2AeY/X0Wmd2W73cK5qIEFE3hDrrQenJH/2SDJIXGvPAigfvQTcE9+H31zkiNHbUqcihEiMRA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=18.0.0"
-      }
-    },
     "node_modules/safe-buffer": {
       "version": "5.2.1",
       "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
@@ -4644,28 +2152,6 @@
       ],
       "license": "MIT"
     },
-    "node_modules/safe-regex2": {
-      "version": "5.1.0",
-      "resolved": "https://registry.npmjs.org/safe-regex2/-/safe-regex2-5.1.0.tgz",
-      "integrity": "sha512-pNHAuBW7TrcleFHsxBr5QMi/Iyp0ENjUKz7GCcX1UO7cMh+NmVK6HxQckNL1tJp1XAJVjG6B8OKIPqodqj9rtw==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/fastify"
-        },
-        {
-          "type": "opencollective",
-          "url": "https://opencollective.com/fastify"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "ret": "~0.5.0"
-      },
-      "bin": {
-        "safe-regex2": "bin/safe-regex2.js"
-      }
-    },
     "node_modules/safer-buffer": {
       "version": "2.1.2",
       "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
@@ -4723,33 +2209,6 @@
       "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
       "license": "MIT"
     },
-    "node_modules/serialize-error": {
-      "version": "12.0.0",
-      "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-12.0.0.tgz",
-      "integrity": "sha512-ZYkZLAvKTKQXWuh5XpBw7CdbSzagarX39WyZ2H07CDLC5/KfsRGlIXV8d4+tfqX1M7916mRqR1QfNHSij+c9Pw==",
-      "license": "MIT",
-      "dependencies": {
-        "type-fest": "^4.31.0"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/serialize-error/node_modules/type-fest": {
-      "version": "4.41.0",
-      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz",
-      "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==",
-      "license": "(MIT OR CC0-1.0)",
-      "engines": {
-        "node": ">=16"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/serve-static": {
       "version": "1.16.3",
       "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.3.tgz",
@@ -4765,18 +2224,6 @@
         "node": ">= 0.8.0"
       }
     },
-    "node_modules/set-blocking": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz",
-      "integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==",
-      "license": "ISC"
-    },
-    "node_modules/setimmediate": {
-      "version": "1.0.5",
-      "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz",
-      "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==",
-      "license": "MIT"
-    },
     "node_modules/setprototypeof": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
@@ -4819,39 +2266,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/shebang-command": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
-      "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
-      "license": "MIT",
-      "dependencies": {
-        "shebang-regex": "^3.0.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/shebang-regex": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
-      "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/shell-quote": {
-      "version": "1.8.3",
-      "resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.8.3.tgz",
-      "integrity": "sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
     "node_modules/side-channel": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz",
@@ -4924,18 +2338,6 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/signal-exit": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz",
-      "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==",
-      "license": "ISC",
-      "engines": {
-        "node": ">=14"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
     "node_modules/simple-concat": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz",
@@ -4981,111 +2383,6 @@
         "simple-concat": "^1.0.0"
       }
     },
-    "node_modules/smart-buffer": {
-      "version": "4.2.0",
-      "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz",
-      "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 6.0.0",
-        "npm": ">= 3.0.0"
-      }
-    },
-    "node_modules/socks": {
-      "version": "2.8.7",
-      "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.7.tgz",
-      "integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==",
-      "license": "MIT",
-      "dependencies": {
-        "ip-address": "^10.0.1",
-        "smart-buffer": "^4.2.0"
-      },
-      "engines": {
-        "node": ">= 10.0.0",
-        "npm": ">= 3.0.0"
-      }
-    },
-    "node_modules/socks-proxy-agent": {
-      "version": "8.0.5",
-      "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.5.tgz",
-      "integrity": "sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==",
-      "license": "MIT",
-      "dependencies": {
-        "agent-base": "^7.1.2",
-        "debug": "^4.3.4",
-        "socks": "^2.8.3"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/socks-proxy-agent/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/socks-proxy-agent/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/source-map": {
-      "version": "0.6.1",
-      "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
-      "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
-      "license": "BSD-3-Clause",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/source-map-support": {
-      "version": "0.5.21",
-      "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.21.tgz",
-      "integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==",
-      "license": "MIT",
-      "dependencies": {
-        "buffer-from": "^1.0.0",
-        "source-map": "^0.6.0"
-      }
-    },
-    "node_modules/spacetrim": {
-      "version": "0.11.59",
-      "resolved": "https://registry.npmjs.org/spacetrim/-/spacetrim-0.11.59.tgz",
-      "integrity": "sha512-lLYsktklSRKprreOm7NXReW8YiX2VBjbgmXYEziOoGf/qsJqAEACaDvoTtUOycwjpaSh+bT8eu0KrJn7UNxiCg==",
-      "funding": [
-        {
-          "type": "individual",
-          "url": "https://buymeacoffee.com/hejny"
-        },
-        {
-          "type": "github",
-          "url": "https://github.com/hejny/spacetrim/blob/main/README.md#%EF%B8%8F-contributing"
-        }
-      ],
-      "license": "Apache-2.0"
-    },
-    "node_modules/split2": {
-      "version": "4.2.0",
-      "resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz",
-      "integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==",
-      "license": "ISC",
-      "engines": {
-        "node": ">= 10.x"
-      }
-    },
     "node_modules/statuses": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz",
@@ -5095,17 +2392,6 @@
         "node": ">= 0.8"
       }
     },
-    "node_modules/streamx": {
-      "version": "2.25.0",
-      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.25.0.tgz",
-      "integrity": "sha512-0nQuG6jf1w+wddNEEXCF4nTg3LtufWINB5eFEN+5TNZW7KWJp6x87+JFL43vaAUPyCfH1wID+mNVyW6OHtFamg==",
-      "license": "MIT",
-      "dependencies": {
-        "events-universal": "^1.0.0",
-        "fast-fifo": "^1.3.2",
-        "text-decoder": "^1.1.0"
-      }
-    },
     "node_modules/string_decoder": {
       "version": "1.3.0",
       "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
@@ -5115,114 +2401,6 @@
         "safe-buffer": "~5.2.0"
       }
     },
-    "node_modules/string-width": {
-      "version": "4.2.3",
-      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
-      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
-      "license": "MIT",
-      "dependencies": {
-        "emoji-regex": "^8.0.0",
-        "is-fullwidth-code-point": "^3.0.0",
-        "strip-ansi": "^6.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/string-width-cjs": {
-      "name": "string-width",
-      "version": "4.2.3",
-      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
-      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
-      "license": "MIT",
-      "dependencies": {
-        "emoji-regex": "^8.0.0",
-        "is-fullwidth-code-point": "^3.0.0",
-        "strip-ansi": "^6.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/string-width-cjs/node_modules/ansi-regex": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
-      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/string-width-cjs/node_modules/strip-ansi": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
-      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
-      "license": "MIT",
-      "dependencies": {
-        "ansi-regex": "^5.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/string-width/node_modules/ansi-regex": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
-      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/string-width/node_modules/strip-ansi": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
-      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
-      "license": "MIT",
-      "dependencies": {
-        "ansi-regex": "^5.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/strip-ansi": {
-      "version": "7.2.0",
-      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.2.0.tgz",
-      "integrity": "sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==",
-      "license": "MIT",
-      "dependencies": {
-        "ansi-regex": "^6.2.2"
-      },
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/strip-ansi?sponsor=1"
-      }
-    },
-    "node_modules/strip-ansi-cjs": {
-      "name": "strip-ansi",
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
-      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
-      "license": "MIT",
-      "dependencies": {
-        "ansi-regex": "^5.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/strip-ansi-cjs/node_modules/ansi-regex": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
-      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/strip-json-comments": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
@@ -5232,30 +2410,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/strnum": {
-      "version": "2.2.3",
-      "resolved": "https://registry.npmjs.org/strnum/-/strnum-2.2.3.tgz",
-      "integrity": "sha512-oKx6RUCuHfT3oyVjtnrmn19H1SiCqgJSg+54XqURKp5aCMbrXrhLjRN9TjuwMjiYstZ0MzDrHqkGZ5dFTKd+zg==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/NaturalIntelligence"
-        }
-      ],
-      "license": "MIT"
-    },
-    "node_modules/supports-color": {
-      "version": "7.2.0",
-      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
-      "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
-      "license": "MIT",
-      "dependencies": {
-        "has-flag": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/tar-fs": {
       "version": "2.1.4",
       "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz",
@@ -5293,40 +2447,6 @@
         "bintrees": "1.0.2"
       }
     },
-    "node_modules/teen_process": {
-      "version": "2.3.3",
-      "resolved": "https://registry.npmjs.org/teen_process/-/teen_process-2.3.3.tgz",
-      "integrity": "sha512-NIdeetf/6gyEqLjnzvfgQe7PfipSceq2xDQM2Py2BkBnIIeWh3HRD3vNhulyO5WppfCv9z4mtsEHyq8kdiULTA==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "bluebird": "^3.7.2",
-        "lodash": "^4.17.21",
-        "shell-quote": "^1.8.1",
-        "source-map-support": "^0.x"
-      },
-      "engines": {
-        "node": "^16.13.0 || >=18.0.0",
-        "npm": ">=8"
-      }
-    },
-    "node_modules/teex": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/teex/-/teex-1.0.1.tgz",
-      "integrity": "sha512-eYE6iEI62Ni1H8oIa7KlDU6uQBtqr4Eajni3wX7rpfXD8ysFx8z0+dri+KWEPWpBsxXfxu58x/0jvTVT1ekOSg==",
-      "license": "MIT",
-      "dependencies": {
-        "streamx": "^2.12.5"
-      }
-    },
-    "node_modules/text-decoder": {
-      "version": "1.2.7",
-      "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz",
-      "integrity": "sha512-vlLytXkeP4xvEq2otHeJfSQIRyWxo/oZGEbXrtEEF9Hnmrdly59sUbzZ/QgyWuLYHctCHxFF4tRQZNQ9k60ExQ==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "b4a": "^1.6.4"
-      }
-    },
     "node_modules/tiny-lru": {
       "version": "13.0.0",
       "resolved": "https://registry.npmjs.org/tiny-lru/-/tiny-lru-13.0.0.tgz",
@@ -5363,18 +2483,6 @@
         "node": "*"
       }
     },
-    "node_modules/type-fest": {
-      "version": "4.26.0",
-      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.26.0.tgz",
-      "integrity": "sha512-OduNjVJsFbifKb57UqZ2EMP1i4u64Xwow3NYXUtBbD4vIwJdQd4+xl8YDou1dlm4DVrtwT/7Ky8z8WyCULVfxw==",
-      "license": "(MIT OR CC0-1.0)",
-      "engines": {
-        "node": ">=16"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/type-is": {
       "version": "1.6.18",
       "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
@@ -5439,21 +2547,6 @@
         "node": "*"
       }
     },
-    "node_modules/undici": {
-      "version": "7.24.8",
-      "resolved": "https://registry.npmjs.org/undici/-/undici-7.24.8.tgz",
-      "integrity": "sha512-6KQ/+QxK49Z/p3HO6E5ZCZWNnCasyZLa5ExaVYyvPxUwKtbCPMKELJOqh7EqOle0t9cH/7d2TaaTRRa6Nhs4YQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=20.18.1"
-      }
-    },
-    "node_modules/undici-types": {
-      "version": "6.21.0",
-      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
-      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
-      "license": "MIT"
-    },
     "node_modules/universalify": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz",
@@ -5502,21 +2595,6 @@
         "browserslist": ">= 4.21.0"
       }
     },
-    "node_modules/urlpattern-polyfill": {
-      "version": "10.1.0",
-      "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.1.0.tgz",
-      "integrity": "sha512-IGjKp/o0NL3Bso1PymYURCJxMPNAf/ILOpendP9f5B6e1rTJgdgiOvgfoT8VxCAdY+Wisb9uhGaJJf3yZ2V9nw==",
-      "license": "MIT"
-    },
-    "node_modules/userhome": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/userhome/-/userhome-1.0.1.tgz",
-      "integrity": "sha512-5cnLm4gseXjAclKowC4IjByaGsjtAoV6PrOQOljplNB54ReUYJP8HdAFq2muHinSDAh09PPX/uXDPfdxRHvuSA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.8.0"
-      }
-    },
     "node_modules/util-deprecate": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
@@ -5532,19 +2610,6 @@
         "node": ">= 0.4.0"
       }
     },
-    "node_modules/uuid": {
-      "version": "11.1.0",
-      "resolved": "https://registry.npmjs.org/uuid/-/uuid-11.1.0.tgz",
-      "integrity": "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==",
-      "funding": [
-        "https://github.com/sponsors/broofa",
-        "https://github.com/sponsors/ctavan"
-      ],
-      "license": "MIT",
-      "bin": {
-        "uuid": "dist/esm/bin/uuid"
-      }
-    },
     "node_modules/vali-date": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/vali-date/-/vali-date-1.0.0.tgz",
@@ -5563,299 +2628,12 @@
         "node": ">= 0.8"
       }
     },
-    "node_modules/wait-port": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/wait-port/-/wait-port-1.1.0.tgz",
-      "integrity": "sha512-3e04qkoN3LxTMLakdqeWth8nih8usyg+sf1Bgdf9wwUkp05iuK1eSY/QpLvscT/+F/gA89+LpUmmgBtesbqI2Q==",
-      "license": "MIT",
-      "dependencies": {
-        "chalk": "^4.1.2",
-        "commander": "^9.3.0",
-        "debug": "^4.3.4"
-      },
-      "bin": {
-        "wait-port": "bin/wait-port.js"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/wait-port/node_modules/chalk": {
-      "version": "4.1.2",
-      "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
-      "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
-      "license": "MIT",
-      "dependencies": {
-        "ansi-styles": "^4.1.0",
-        "supports-color": "^7.1.0"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/chalk?sponsor=1"
-      }
-    },
-    "node_modules/wait-port/node_modules/commander": {
-      "version": "9.5.0",
-      "resolved": "https://registry.npmjs.org/commander/-/commander-9.5.0.tgz",
-      "integrity": "sha512-KRs7WVDKg86PWiuAqhDrAQnTXZKraVcCc6vFdL14qrZ/DcWwuRo7VoiYXalXO7S5GKpqYiVEwCbgFDfxNHKJBQ==",
-      "license": "MIT",
-      "engines": {
-        "node": "^12.20.0 || >=14"
-      }
-    },
-    "node_modules/wait-port/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/wait-port/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/webdriver": {
-      "version": "9.27.0",
-      "resolved": "https://registry.npmjs.org/webdriver/-/webdriver-9.27.0.tgz",
-      "integrity": "sha512-w07ThZND48SIr0b4S7eFougYUyclmoUwdmju8yXvEJiXYjDjeYUpl8wZrYPEYRBylxpSx+sBHfEUBrPQkcTTRQ==",
-      "license": "MIT",
-      "dependencies": {
-        "@types/node": "^20.1.0",
-        "@types/ws": "^8.5.3",
-        "@wdio/config": "9.27.0",
-        "@wdio/logger": "9.18.0",
-        "@wdio/protocols": "9.27.0",
-        "@wdio/types": "9.27.0",
-        "@wdio/utils": "9.27.0",
-        "deepmerge-ts": "^7.0.3",
-        "https-proxy-agent": "^7.0.6",
-        "undici": "^6.21.3",
-        "ws": "^8.8.0"
-      },
-      "engines": {
-        "node": ">=18.20.0"
-      }
-    },
-    "node_modules/webdriver/node_modules/undici": {
-      "version": "6.24.1",
-      "resolved": "https://registry.npmjs.org/undici/-/undici-6.24.1.tgz",
-      "integrity": "sha512-sC+b0tB1whOCzbtlx20fx3WgCXwkW627p4EA9uM+/tNNPkSS+eSEld6pAs9nDv7WbY1UUljBMYPtu9BCOrCWKA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=18.17"
-      }
-    },
-    "node_modules/webdriverio": {
-      "version": "9.27.0",
-      "resolved": "https://registry.npmjs.org/webdriverio/-/webdriverio-9.27.0.tgz",
-      "integrity": "sha512-Y4FbMf4bKBXpPB0lYpglzQ2GfDDe6uojmMZl85uPyrDx18NW7mqN84ZawGoIg/FRvcLaVhcOzc98WOPo725Rag==",
-      "license": "MIT",
-      "dependencies": {
-        "@types/node": "^20.11.30",
-        "@types/sinonjs__fake-timers": "^8.1.5",
-        "@wdio/config": "9.27.0",
-        "@wdio/logger": "9.18.0",
-        "@wdio/protocols": "9.27.0",
-        "@wdio/repl": "9.16.2",
-        "@wdio/types": "9.27.0",
-        "@wdio/utils": "9.27.0",
-        "archiver": "^7.0.1",
-        "aria-query": "^5.3.0",
-        "cheerio": "^1.0.0-rc.12",
-        "css-shorthand-properties": "^1.1.1",
-        "css-value": "^0.0.1",
-        "grapheme-splitter": "^1.0.4",
-        "htmlfy": "^0.8.1",
-        "is-plain-obj": "^4.1.0",
-        "jszip": "^3.10.1",
-        "lodash.clonedeep": "^4.5.0",
-        "lodash.zip": "^4.2.0",
-        "query-selector-shadow-dom": "^1.0.1",
-        "resq": "^1.11.0",
-        "rgb2hex": "0.2.5",
-        "serialize-error": "^12.0.0",
-        "urlpattern-polyfill": "^10.0.0",
-        "webdriver": "9.27.0"
-      },
-      "engines": {
-        "node": ">=18.20.0"
-      },
-      "peerDependencies": {
-        "puppeteer-core": ">=22.x || <=24.x"
-      },
-      "peerDependenciesMeta": {
-        "puppeteer-core": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/whatwg-encoding": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz",
-      "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==",
-      "deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation",
-      "license": "MIT",
-      "dependencies": {
-        "iconv-lite": "0.6.3"
-      },
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/whatwg-encoding/node_modules/iconv-lite": {
-      "version": "0.6.3",
-      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
-      "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
-      "license": "MIT",
-      "dependencies": {
-        "safer-buffer": ">= 2.1.2 < 3.0.0"
-      },
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/whatwg-mimetype": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz",
-      "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/which": {
-      "version": "5.0.0",
-      "resolved": "https://registry.npmjs.org/which/-/which-5.0.0.tgz",
-      "integrity": "sha512-JEdGzHwwkrbWoGOlIHqQ5gtprKGOenpDHpxE9zVR1bWbOtYRyPPHMe9FaP6x61CmNaTThSkb0DAJte5jD+DmzQ==",
-      "license": "ISC",
-      "dependencies": {
-        "isexe": "^3.1.1"
-      },
-      "bin": {
-        "node-which": "bin/which.js"
-      },
-      "engines": {
-        "node": "^18.17.0 || >=20.5.0"
-      }
-    },
-    "node_modules/wrap-ansi": {
-      "version": "7.0.0",
-      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
-      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
-      "license": "MIT",
-      "dependencies": {
-        "ansi-styles": "^4.0.0",
-        "string-width": "^4.1.0",
-        "strip-ansi": "^6.0.0"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
-      }
-    },
-    "node_modules/wrap-ansi-cjs": {
-      "name": "wrap-ansi",
-      "version": "7.0.0",
-      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
-      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
-      "license": "MIT",
-      "dependencies": {
-        "ansi-styles": "^4.0.0",
-        "string-width": "^4.1.0",
-        "strip-ansi": "^6.0.0"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
-      }
-    },
-    "node_modules/wrap-ansi-cjs/node_modules/ansi-regex": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
-      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/wrap-ansi-cjs/node_modules/strip-ansi": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
-      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
-      "license": "MIT",
-      "dependencies": {
-        "ansi-regex": "^5.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/wrap-ansi/node_modules/ansi-regex": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
-      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/wrap-ansi/node_modules/strip-ansi": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
-      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
-      "license": "MIT",
-      "dependencies": {
-        "ansi-regex": "^5.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/wrappy": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
       "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
       "license": "ISC"
     },
-    "node_modules/ws": {
-      "version": "8.20.0",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
-      "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=10.0.0"
-      },
-      "peerDependencies": {
-        "bufferutil": "^4.0.1",
-        "utf-8-validate": ">=5.0.2"
-      },
-      "peerDependenciesMeta": {
-        "bufferutil": {
-          "optional": true
-        },
-        "utf-8-validate": {
-          "optional": true
-        }
-      }
-    },
     "node_modules/xml2js": {
       "version": "0.6.2",
       "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.6.2.tgz",
@@ -5877,124 +2655,6 @@
       "engines": {
         "node": ">=4.0"
       }
-    },
-    "node_modules/y18n": {
-      "version": "5.0.8",
-      "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
-      "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
-      "license": "ISC",
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/yargs": {
-      "version": "17.7.2",
-      "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
-      "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
-      "license": "MIT",
-      "dependencies": {
-        "cliui": "^8.0.1",
-        "escalade": "^3.1.1",
-        "get-caller-file": "^2.0.5",
-        "require-directory": "^2.1.1",
-        "string-width": "^4.2.3",
-        "y18n": "^5.0.5",
-        "yargs-parser": "^21.1.1"
-      },
-      "engines": {
-        "node": ">=12"
-      }
-    },
-    "node_modules/yargs-parser": {
-      "version": "21.1.1",
-      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
-      "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
-      "license": "ISC",
-      "engines": {
-        "node": ">=12"
-      }
-    },
-    "node_modules/yauzl": {
-      "version": "2.10.0",
-      "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz",
-      "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==",
-      "license": "MIT",
-      "dependencies": {
-        "buffer-crc32": "~0.2.3",
-        "fd-slicer": "~1.1.0"
-      }
-    },
-    "node_modules/yauzl/node_modules/buffer-crc32": {
-      "version": "0.2.13",
-      "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
-      "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==",
-      "license": "MIT",
-      "engines": {
-        "node": "*"
-      }
-    },
-    "node_modules/zip-stream": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/zip-stream/-/zip-stream-6.0.1.tgz",
-      "integrity": "sha512-zK7YHHz4ZXpW89AHXUPbQVGKI7uvkd3hzusTdotCg1UxyaVtg0zFJSTfW/Dq5f7OBBVnq6cZIaC8Ti4hb6dtCA==",
-      "license": "MIT",
-      "dependencies": {
-        "archiver-utils": "^5.0.0",
-        "compress-commons": "^6.0.2",
-        "readable-stream": "^4.0.0"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/zip-stream/node_modules/buffer": {
-      "version": "6.0.3",
-      "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
-      "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "base64-js": "^1.3.1",
-        "ieee754": "^1.2.1"
-      }
-    },
-    "node_modules/zip-stream/node_modules/readable-stream": {
-      "version": "4.7.0",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.7.0.tgz",
-      "integrity": "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==",
-      "license": "MIT",
-      "dependencies": {
-        "abort-controller": "^3.0.0",
-        "buffer": "^6.0.3",
-        "events": "^3.3.0",
-        "process": "^0.11.10",
-        "string_decoder": "^1.3.0"
-      },
-      "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
-      }
-    },
-    "node_modules/zod": {
-      "version": "3.25.76",
-      "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
-      "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
-      "license": "MIT",
-      "funding": {
-        "url": "https://github.com/sponsors/colinhacks"
-      }
     }
   }
 }
diff --git a/package.json b/package.json
index 458da8044..8fcf5cea6 100644
--- a/package.json
+++ b/package.json
@@ -16,8 +16,8 @@
   },
   "homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
   "dependencies": {
-    "agent-browser": "^0.13.0",
-    "@askjo/camofox-browser": "^1.5.2"
+    "@askjo/camofox-browser": "^1.5.2",
+    "agent-browser": "^0.26.0"
   },
   "overrides": {
     "lodash": "4.18.1"
diff --git a/plugins/image_gen/openai/__init__.py b/plugins/image_gen/openai/__init__.py
new file mode 100644
index 000000000..c1a719f91
--- /dev/null
+++ b/plugins/image_gen/openai/__init__.py
@@ -0,0 +1,303 @@
+"""OpenAI image generation backend.
+
+Exposes OpenAI's ``gpt-image-2`` model at three quality tiers as an
+:class:`ImageGenProvider` implementation. The tiers are implemented as
+three virtual model IDs so the ``hermes tools`` model picker and the
+``image_gen.model`` config key behave like any other multi-model backend:
+
+    gpt-image-2-low     ~15s   fastest, good for iteration
+    gpt-image-2-medium  ~40s   default — balanced
+    gpt-image-2-high    ~2min  slowest, highest fidelity
+
+All three hit the same underlying API model (``gpt-image-2``) with a
+different ``quality`` parameter. Output is base64 JSON → saved under
+``$HERMES_HOME/cache/images/``.
+
+Selection precedence (first hit wins):
+
+1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests)
+2. ``image_gen.openai.model`` in ``config.yaml``
+3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs)
+4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium``
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any, Dict, List, Optional, Tuple
+
+from agent.image_gen_provider import (
+    DEFAULT_ASPECT_RATIO,
+    ImageGenProvider,
+    error_response,
+    resolve_aspect_ratio,
+    save_b64_image,
+    success_response,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Model catalog
+# ---------------------------------------------------------------------------
+#
+# All three IDs resolve to the same underlying API model with a different
+# ``quality`` setting. ``api_model`` is what gets sent to OpenAI;
+# ``quality`` is the knob that changes generation time and output fidelity.
+
+API_MODEL = "gpt-image-2"
+
+_MODELS: Dict[str, Dict[str, Any]] = {
+    "gpt-image-2-low": {
+        "display": "GPT Image 2 (Low)",
+        "speed": "~15s",
+        "strengths": "Fast iteration, lowest cost",
+        "quality": "low",
+    },
+    "gpt-image-2-medium": {
+        "display": "GPT Image 2 (Medium)",
+        "speed": "~40s",
+        "strengths": "Balanced — default",
+        "quality": "medium",
+    },
+    "gpt-image-2-high": {
+        "display": "GPT Image 2 (High)",
+        "speed": "~2min",
+        "strengths": "Highest fidelity, strongest prompt adherence",
+        "quality": "high",
+    },
+}
+
+DEFAULT_MODEL = "gpt-image-2-medium"
+
+_SIZES = {
+    "landscape": "1536x1024",
+    "square": "1024x1024",
+    "portrait": "1024x1536",
+}
+
+
+def _load_openai_config() -> Dict[str, Any]:
+    """Read ``image_gen`` from config.yaml (returns {} on any failure)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        return section if isinstance(section, dict) else {}
+    except Exception as exc:
+        logger.debug("Could not load image_gen config: %s", exc)
+        return {}
+
+
+def _resolve_model() -> Tuple[str, Dict[str, Any]]:
+    """Decide which tier to use and return ``(model_id, meta)``."""
+    env_override = os.environ.get("OPENAI_IMAGE_MODEL")
+    if env_override and env_override in _MODELS:
+        return env_override, _MODELS[env_override]
+
+    cfg = _load_openai_config()
+    openai_cfg = cfg.get("openai") if isinstance(cfg.get("openai"), dict) else {}
+    candidate: Optional[str] = None
+    if isinstance(openai_cfg, dict):
+        value = openai_cfg.get("model")
+        if isinstance(value, str) and value in _MODELS:
+            candidate = value
+    if candidate is None:
+        top = cfg.get("model")
+        if isinstance(top, str) and top in _MODELS:
+            candidate = top
+
+    if candidate is not None:
+        return candidate, _MODELS[candidate]
+
+    return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
+
+
+# ---------------------------------------------------------------------------
+# Provider
+# ---------------------------------------------------------------------------
+
+
+class OpenAIImageGenProvider(ImageGenProvider):
+    """OpenAI ``images.generate`` backend — gpt-image-2 at low/medium/high."""
+
+    @property
+    def name(self) -> str:
+        return "openai"
+
+    @property
+    def display_name(self) -> str:
+        return "OpenAI"
+
+    def is_available(self) -> bool:
+        if not os.environ.get("OPENAI_API_KEY"):
+            return False
+        try:
+            import openai  # noqa: F401
+        except ImportError:
+            return False
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        return [
+            {
+                "id": model_id,
+                "display": meta["display"],
+                "speed": meta["speed"],
+                "strengths": meta["strengths"],
+                "price": "varies",
+            }
+            for model_id, meta in _MODELS.items()
+        ]
+
+    def default_model(self) -> Optional[str]:
+        return DEFAULT_MODEL
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "OpenAI",
+            "badge": "paid",
+            "tag": "gpt-image-2 at low/medium/high quality tiers",
+            "env_vars": [
+                {
+                    "key": "OPENAI_API_KEY",
+                    "prompt": "OpenAI API key",
+                    "url": "https://platform.openai.com/api-keys",
+                },
+            ],
+        }
+
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        prompt = (prompt or "").strip()
+        aspect = resolve_aspect_ratio(aspect_ratio)
+
+        if not prompt:
+            return error_response(
+                error="Prompt is required and must be a non-empty string",
+                error_type="invalid_argument",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        if not os.environ.get("OPENAI_API_KEY"):
+            return error_response(
+                error=(
+                    "OPENAI_API_KEY not set. Run `hermes tools` → Image "
+                    "Generation → OpenAI to configure, or `hermes setup` "
+                    "to add the key."
+                ),
+                error_type="auth_required",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        try:
+            import openai
+        except ImportError:
+            return error_response(
+                error="openai Python package not installed (pip install openai)",
+                error_type="missing_dependency",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        tier_id, meta = _resolve_model()
+        size = _SIZES.get(aspect, _SIZES["square"])
+
+        # gpt-image-2 returns b64_json unconditionally and REJECTS
+        # ``response_format`` as an unknown parameter. Don't send it.
+        payload: Dict[str, Any] = {
+            "model": API_MODEL,
+            "prompt": prompt,
+            "size": size,
+            "n": 1,
+            "quality": meta["quality"],
+        }
+
+        try:
+            client = openai.OpenAI()
+            response = client.images.generate(**payload)
+        except Exception as exc:
+            logger.debug("OpenAI image generation failed", exc_info=True)
+            return error_response(
+                error=f"OpenAI image generation failed: {exc}",
+                error_type="api_error",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        data = getattr(response, "data", None) or []
+        if not data:
+            return error_response(
+                error="OpenAI returned no image data",
+                error_type="empty_response",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        first = data[0]
+        b64 = getattr(first, "b64_json", None)
+        url = getattr(first, "url", None)
+        revised_prompt = getattr(first, "revised_prompt", None)
+
+        if b64:
+            try:
+                saved_path = save_b64_image(b64, prefix=f"openai_{tier_id}")
+            except Exception as exc:
+                return error_response(
+                    error=f"Could not save image to cache: {exc}",
+                    error_type="io_error",
+                    provider="openai",
+                    model=tier_id,
+                    prompt=prompt,
+                    aspect_ratio=aspect,
+                )
+            image_ref = str(saved_path)
+        elif url:
+            # Defensive — gpt-image-2 returns b64 today, but fall back
+            # gracefully if the API ever changes.
+            image_ref = url
+        else:
+            return error_response(
+                error="OpenAI response contained neither b64_json nor URL",
+                error_type="empty_response",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        extra: Dict[str, Any] = {"size": size, "quality": meta["quality"]}
+        if revised_prompt:
+            extra["revised_prompt"] = revised_prompt
+
+        return success_response(
+            image=image_ref,
+            model=tier_id,
+            prompt=prompt,
+            aspect_ratio=aspect,
+            provider="openai",
+            extra=extra,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    """Plugin entry point — wire ``OpenAIImageGenProvider`` into the registry."""
+    ctx.register_image_gen_provider(OpenAIImageGenProvider())
diff --git a/plugins/image_gen/openai/plugin.yaml b/plugins/image_gen/openai/plugin.yaml
new file mode 100644
index 000000000..18e4d8639
--- /dev/null
+++ b/plugins/image_gen/openai/plugin.yaml
@@ -0,0 +1,7 @@
+name: openai
+version: 1.0.0
+description: "OpenAI image generation backend (gpt-image-2). Saves generated images to $HERMES_HOME/cache/images/."
+author: NousResearch
+kind: backend
+requires_env:
+  - OPENAI_API_KEY
diff --git a/plugins/memory/hindsight/README.md b/plugins/memory/hindsight/README.md
index 024a99303..3fbdc2aba 100644
--- a/plugins/memory/hindsight/README.md
+++ b/plugins/memory/hindsight/README.md
@@ -84,7 +84,10 @@ Config file: `~/.hermes/hindsight/config.json`
 | `retain_async` | `true` | Process retain asynchronously on the Hindsight server |
 | `retain_every_n_turns` | `1` | Retain every N turns (1 = every turn) |
 | `retain_context` | `conversation between Hermes Agent and the User` | Context label for retained memories |
-| `tags` | — | Tags applied when storing memories |
+| `retain_tags` | — | Default tags applied to retained memories; merged with per-call tool tags |
+| `retain_source` | — | Optional `metadata.source` attached to retained memories |
+| `retain_user_prefix` | `User` | Label used before user turns in auto-retained transcripts |
+| `retain_assistant_prefix` | `Assistant` | Label used before assistant turns in auto-retained transcripts |
 
 ### Integration
 
@@ -113,7 +116,7 @@ Available in `hybrid` and `tools` memory modes:
 
 | Tool | Description |
 |------|-------------|
-| `hindsight_retain` | Store information with auto entity extraction |
+| `hindsight_retain` | Store information with auto entity extraction; supports optional per-call `tags` |
 | `hindsight_recall` | Multi-strategy search (semantic + entity graph) |
 | `hindsight_reflect` | Cross-memory synthesis (LLM-powered) |
 
diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py
index c39679b73..2b233e265 100644
--- a/plugins/memory/hindsight/__init__.py
+++ b/plugins/memory/hindsight/__init__.py
@@ -6,11 +6,15 @@ retrieval. Supports cloud (API key) and local modes.
 Original PR #1811 by benfrank241, adapted to MemoryProvider ABC.
 
 Config via environment variables:
-  HINDSIGHT_API_KEY   — API key for Hindsight Cloud
-  HINDSIGHT_BANK_ID   — memory bank identifier (default: hermes)
-  HINDSIGHT_BUDGET    — recall budget: low/mid/high (default: mid)
-  HINDSIGHT_API_URL   — API endpoint
-  HINDSIGHT_MODE      — cloud or local (default: cloud)
+  HINDSIGHT_API_KEY                — API key for Hindsight Cloud
+  HINDSIGHT_BANK_ID                — memory bank identifier (default: hermes)
+  HINDSIGHT_BUDGET                 — recall budget: low/mid/high (default: mid)
+  HINDSIGHT_API_URL                — API endpoint
+  HINDSIGHT_MODE                   — cloud or local (default: cloud)
+  HINDSIGHT_RETAIN_TAGS            — comma-separated tags attached to retained memories
+  HINDSIGHT_RETAIN_SOURCE          — metadata source value attached to retained memories
+  HINDSIGHT_RETAIN_USER_PREFIX     — label used before user turns in retained transcripts
+  HINDSIGHT_RETAIN_ASSISTANT_PREFIX — label used before assistant turns in retained transcripts
 
 Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to
 ~/.hindsight/config.json (legacy, shared) for backward compatibility.
@@ -24,7 +28,7 @@ import logging
 import os
 import threading
 
-from hermes_constants import get_hermes_home
+from datetime import datetime, timezone
 from typing import Any, Dict, List
 
 from agent.memory_provider import MemoryProvider
@@ -99,6 +103,11 @@ RETAIN_SCHEMA = {
         "properties": {
             "content": {"type": "string", "description": "The information to store."},
             "context": {"type": "string", "description": "Short label (e.g. 'user preference', 'project decision')."},
+            "tags": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Optional per-call tags to merge with configured default retain tags.",
+            },
         },
         "required": ["content"],
     },
@@ -168,6 +177,10 @@ def _load_config() -> dict:
     return {
         "mode": os.environ.get("HINDSIGHT_MODE", "cloud"),
         "apiKey": os.environ.get("HINDSIGHT_API_KEY", ""),
+        "retain_tags": os.environ.get("HINDSIGHT_RETAIN_TAGS", ""),
+        "retain_source": os.environ.get("HINDSIGHT_RETAIN_SOURCE", ""),
+        "retain_user_prefix": os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User"),
+        "retain_assistant_prefix": os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant"),
         "banks": {
             "hermes": {
                 "bankId": os.environ.get("HINDSIGHT_BANK_ID", "hermes"),
@@ -178,6 +191,48 @@ def _load_config() -> dict:
     }
 
 
+def _normalize_retain_tags(value: Any) -> List[str]:
+    """Normalize tag config/tool values to a deduplicated list of strings."""
+    if value is None:
+        return []
+
+    raw_items: list[Any]
+    if isinstance(value, list):
+        raw_items = value
+    elif isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return []
+        if text.startswith("["):
+            try:
+                parsed = json.loads(text)
+            except Exception:
+                parsed = None
+            if isinstance(parsed, list):
+                raw_items = parsed
+            else:
+                raw_items = text.split(",")
+        else:
+            raw_items = text.split(",")
+    else:
+        raw_items = [value]
+
+    normalized = []
+    seen = set()
+    for item in raw_items:
+        tag = str(item).strip()
+        if not tag or tag in seen:
+            continue
+        seen.add(tag)
+        normalized.append(tag)
+    return normalized
+
+
+def _utc_timestamp() -> str:
+    """Return current UTC timestamp in ISO-8601 with milliseconds and Z suffix."""
+    return datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z")
+
+
 # ---------------------------------------------------------------------------
 # MemoryProvider implementation
 # ---------------------------------------------------------------------------
@@ -195,6 +250,19 @@ class HindsightMemoryProvider(MemoryProvider):
         self._llm_base_url = ""
         self._memory_mode = "hybrid"  # "context", "tools", or "hybrid"
         self._prefetch_method = "recall"  # "recall" or "reflect"
+        self._retain_tags: List[str] = []
+        self._retain_source = ""
+        self._retain_user_prefix = "User"
+        self._retain_assistant_prefix = "Assistant"
+        self._platform = ""
+        self._user_id = ""
+        self._user_name = ""
+        self._chat_id = ""
+        self._chat_name = ""
+        self._chat_type = ""
+        self._thread_id = ""
+        self._agent_identity = ""
+        self._turn_index = 0
         self._client = None
         self._prefetch_result = ""
         self._prefetch_lock = threading.Lock()
@@ -210,6 +278,7 @@ class HindsightMemoryProvider(MemoryProvider):
         # Retain controls
         self._auto_retain = True
         self._retain_every_n_turns = 1
+        self._retain_async = True
         self._retain_context = "conversation between Hermes Agent and the User"
         self._turn_counter = 0
         self._session_turns: list[str] = []  # accumulates ALL turns for the session
@@ -224,7 +293,6 @@ class HindsightMemoryProvider(MemoryProvider):
         # Bank
         self._bank_mission = ""
         self._bank_retain_mission: str | None = None
-        self._retain_async = True
 
     @property
     def name(self) -> str:
@@ -423,7 +491,10 @@ class HindsightMemoryProvider(MemoryProvider):
             {"key": "recall_budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
             {"key": "memory_mode", "description": "Memory integration mode", "default": "hybrid", "choices": ["hybrid", "context", "tools"]},
             {"key": "recall_prefetch_method", "description": "Auto-recall method", "default": "recall", "choices": ["recall", "reflect"]},
-            {"key": "tags", "description": "Tags applied when storing memories (comma-separated)", "default": ""},
+            {"key": "retain_tags", "description": "Default tags applied to retained memories (comma-separated)", "default": ""},
+            {"key": "retain_source", "description": "Metadata source value attached to retained memories", "default": ""},
+            {"key": "retain_user_prefix", "description": "Label used before user turns in retained transcripts", "default": "User"},
+            {"key": "retain_assistant_prefix", "description": "Label used before assistant turns in retained transcripts", "default": "Assistant"},
             {"key": "recall_tags", "description": "Tags to filter when searching memories (comma-separated)", "default": ""},
             {"key": "recall_tags_match", "description": "Tag matching mode for recall", "default": "any", "choices": ["any", "all", "any_strict", "all_strict"]},
             {"key": "auto_recall", "description": "Automatically recall memories before each turn", "default": True},
@@ -467,7 +538,7 @@ class HindsightMemoryProvider(MemoryProvider):
         return self._client
 
     def initialize(self, session_id: str, **kwargs) -> None:
-        self._session_id = session_id
+        self._session_id = str(session_id or "").strip()
 
         # Check client version and auto-upgrade if needed
         try:
@@ -496,6 +567,16 @@ class HindsightMemoryProvider(MemoryProvider):
             pass  # packaging not available or other issue — proceed anyway
 
         self._config = _load_config()
+        self._platform = str(kwargs.get("platform") or "").strip()
+        self._user_id = str(kwargs.get("user_id") or "").strip()
+        self._user_name = str(kwargs.get("user_name") or "").strip()
+        self._chat_id = str(kwargs.get("chat_id") or "").strip()
+        self._chat_name = str(kwargs.get("chat_name") or "").strip()
+        self._chat_type = str(kwargs.get("chat_type") or "").strip()
+        self._thread_id = str(kwargs.get("thread_id") or "").strip()
+        self._agent_identity = str(kwargs.get("agent_identity") or "").strip()
+        self._turn_index = 0
+        self._session_turns = []
         self._mode = self._config.get("mode", "cloud")
         # "local" is a legacy alias for "local_embedded"
         if self._mode == "local":
@@ -513,7 +594,7 @@ class HindsightMemoryProvider(MemoryProvider):
         memory_mode = self._config.get("memory_mode", "hybrid")
         self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid"
 
-        prefetch_method = self._config.get("recall_prefetch_method", "recall")
+        prefetch_method = self._config.get("recall_prefetch_method") or self._config.get("prefetch_method", "recall")
         self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall"
 
         # Bank options
@@ -521,9 +602,22 @@ class HindsightMemoryProvider(MemoryProvider):
         self._bank_retain_mission = self._config.get("bank_retain_mission") or None
 
         # Tags
-        self._tags = self._config.get("tags") or None
+        self._retain_tags = _normalize_retain_tags(
+            self._config.get("retain_tags")
+            or os.environ.get("HINDSIGHT_RETAIN_TAGS", "")
+        )
+        self._tags = self._retain_tags or None
         self._recall_tags = self._config.get("recall_tags") or None
         self._recall_tags_match = self._config.get("recall_tags_match", "any")
+        self._retain_source = str(
+            self._config.get("retain_source") or os.environ.get("HINDSIGHT_RETAIN_SOURCE", "")
+        ).strip()
+        self._retain_user_prefix = str(
+            self._config.get("retain_user_prefix") or os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User")
+        ).strip() or "User"
+        self._retain_assistant_prefix = str(
+            self._config.get("retain_assistant_prefix") or os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant")
+        ).strip() or "Assistant"
 
         # Retain controls
         self._auto_retain = self._config.get("auto_retain", True)
@@ -547,11 +641,9 @@ class HindsightMemoryProvider(MemoryProvider):
         logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s, client=%s",
                      self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method, _client_version)
         logger.debug("Hindsight config: auto_retain=%s, auto_recall=%s, retain_every_n=%d, "
-                     "retain_async=%s, retain_context=%s, "
-                     "recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
+                     "retain_async=%s, retain_context=%s, recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
                      self._auto_retain, self._auto_recall, self._retain_every_n_turns,
-                     self._retain_async, self._retain_context,
-                     self._recall_max_tokens, self._recall_max_input_chars,
+                     self._retain_async, self._retain_context, self._recall_max_tokens, self._recall_max_input_chars,
                      self._tags, self._recall_tags)
 
         # For local mode, start the embedded daemon in the background so it
@@ -712,6 +804,78 @@ class HindsightMemoryProvider(MemoryProvider):
         self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="hindsight-prefetch")
         self._prefetch_thread.start()
 
+    def _build_turn_messages(self, user_content: str, assistant_content: str) -> List[Dict[str, str]]:
+        now = datetime.now(timezone.utc).isoformat()
+        return [
+            {
+                "role": "user",
+                "content": f"{self._retain_user_prefix}: {user_content}",
+                "timestamp": now,
+            },
+            {
+                "role": "assistant",
+                "content": f"{self._retain_assistant_prefix}: {assistant_content}",
+                "timestamp": now,
+            },
+        ]
+
+    def _build_metadata(self, *, message_count: int, turn_index: int) -> Dict[str, str]:
+        metadata: Dict[str, str] = {
+            "retained_at": _utc_timestamp(),
+            "message_count": str(message_count),
+            "turn_index": str(turn_index),
+        }
+        if self._retain_source:
+            metadata["source"] = self._retain_source
+        if self._session_id:
+            metadata["session_id"] = self._session_id
+        if self._platform:
+            metadata["platform"] = self._platform
+        if self._user_id:
+            metadata["user_id"] = self._user_id
+        if self._user_name:
+            metadata["user_name"] = self._user_name
+        if self._chat_id:
+            metadata["chat_id"] = self._chat_id
+        if self._chat_name:
+            metadata["chat_name"] = self._chat_name
+        if self._chat_type:
+            metadata["chat_type"] = self._chat_type
+        if self._thread_id:
+            metadata["thread_id"] = self._thread_id
+        if self._agent_identity:
+            metadata["agent_identity"] = self._agent_identity
+        return metadata
+
+    def _build_retain_kwargs(
+        self,
+        content: str,
+        *,
+        context: str | None = None,
+        document_id: str | None = None,
+        metadata: Dict[str, str] | None = None,
+        tags: List[str] | None = None,
+        retain_async: bool | None = None,
+    ) -> Dict[str, Any]:
+        kwargs: Dict[str, Any] = {
+            "bank_id": self._bank_id,
+            "content": content,
+            "metadata": metadata or self._build_metadata(message_count=1, turn_index=self._turn_index),
+        }
+        if context is not None:
+            kwargs["context"] = context
+        if document_id:
+            kwargs["document_id"] = document_id
+        if retain_async is not None:
+            kwargs["retain_async"] = retain_async
+        merged_tags = _normalize_retain_tags(self._retain_tags)
+        for tag in _normalize_retain_tags(tags):
+            if tag not in merged_tags:
+                merged_tags.append(tag)
+        if merged_tags:
+            kwargs["tags"] = merged_tags
+        return kwargs
+
     def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
         """Retain conversation turn in background (non-blocking).
 
@@ -721,19 +885,14 @@ class HindsightMemoryProvider(MemoryProvider):
             logger.debug("sync_turn: skipped (auto_retain disabled)")
             return
 
-        from datetime import datetime, timezone
-        now = datetime.now(timezone.utc).isoformat()
+        if session_id:
+            self._session_id = str(session_id).strip()
 
-        messages = [
-            {"role": "user", "content": user_content, "timestamp": now},
-            {"role": "assistant", "content": assistant_content, "timestamp": now},
-        ]
-
-        turn = json.dumps(messages)
+        turn = json.dumps(self._build_turn_messages(user_content, assistant_content))
         self._session_turns.append(turn)
         self._turn_counter += 1
+        self._turn_index = self._turn_counter
 
-        # Only retain every N turns
         if self._turn_counter % self._retain_every_n_turns != 0:
             logger.debug("sync_turn: buffered turn %d (will retain at turn %d)",
                          self._turn_counter, self._turn_counter + (self._retain_every_n_turns - self._turn_counter % self._retain_every_n_turns))
@@ -741,19 +900,21 @@ class HindsightMemoryProvider(MemoryProvider):
 
         logger.debug("sync_turn: retaining %d turns, total session content %d chars",
                      len(self._session_turns), sum(len(t) for t in self._session_turns))
-        # Send the ENTIRE session as a single JSON array (document_id deduplicates).
-        # Each element in _session_turns is a JSON string of that turn's messages.
         content = "[" + ",".join(self._session_turns) + "]"
 
         def _sync():
             try:
                 client = self._get_client()
-                item: dict = {
-                    "content": content,
-                    "context": self._retain_context,
-                }
-                if self._tags:
-                    item["tags"] = self._tags
+                item = self._build_retain_kwargs(
+                    content,
+                    context=self._retain_context,
+                    metadata=self._build_metadata(
+                        message_count=len(self._session_turns) * 2,
+                        turn_index=self._turn_index,
+                    ),
+                )
+                item.pop("bank_id", None)
+                item.pop("retain_async", None)
                 logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d",
                              self._bank_id, self._session_id, self._retain_async, len(content), len(self._session_turns))
                 _run_sync(client.aretain_batch(
@@ -789,11 +950,11 @@ class HindsightMemoryProvider(MemoryProvider):
                 return tool_error("Missing required parameter: content")
             context = args.get("context")
             try:
-                retain_kwargs: dict = {
-                    "bank_id": self._bank_id, "content": content, "context": context,
-                }
-                if self._tags:
-                    retain_kwargs["tags"] = self._tags
+                retain_kwargs = self._build_retain_kwargs(
+                    content,
+                    context=context,
+                    tags=args.get("tags"),
+                )
                 logger.debug("Tool hindsight_retain: bank=%s, content_len=%d, context=%s",
                              self._bank_id, len(content), context)
                 _run_sync(client.aretain(**retain_kwargs))
diff --git a/pyproject.toml b/pyproject.toml
index bd8367365..992e548f9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -126,7 +126,7 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector
 hermes_cli = ["web_dist/**/*"]
 
 [tool.setuptools.packages.find]
-include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
+include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 96f48e77f..000000000
--- a/requirements.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-# NOTE: This file is maintained for convenience only.
-# The canonical dependency list is in pyproject.toml.
-# Preferred install: pip install -e ".[all]"
-
-# Core dependencies
-openai
-python-dotenv
-fire
-httpx
-rich
-tenacity
-prompt_toolkit
-pyyaml
-requests
-jinja2
-pydantic>=2.0
-PyJWT[crypto]
-debugpy
-
-# Web tools
-firecrawl-py
-parallel-web>=0.4.2
-
-# Image generation
-fal-client
-
-# Text-to-speech (Edge TTS is free, no API key needed)
-edge-tts
-
-# Optional: For cron expression parsing (cronjob scheduling)
-croniter
-
-# Optional: For messaging platform integrations (gateway)
-python-telegram-bot[webhooks]>=22.6
-discord.py>=2.0
-aiohttp>=3.9.0
diff --git a/run_agent.py b/run_agent.py
index e69d30ff2..eaafac5b4 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -76,8 +76,6 @@ from tools.interrupt import set_interrupt as _set_interrupt
 from tools.browser_tool import cleanup_browser
 
 
-from hermes_constants import OPENROUTER_BASE_URL
-
 # Agent internals extracted to agent/ package for modularity
 from agent.memory_manager import build_memory_context_block, sanitize_context
 from agent.retry_utils import jittered_backoff
@@ -98,19 +96,11 @@ from agent.model_metadata import (
 from agent.context_compressor import ContextCompressor
 from agent.subdirectory_hints import SubdirectoryHintTracker
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
+from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.codex_responses_adapter import (
-    _chat_content_to_responses_parts,
-    _chat_messages_to_responses_input as _codex_chat_messages_to_responses_input,
     _derive_responses_function_call_id as _codex_derive_responses_function_call_id,
     _deterministic_call_id as _codex_deterministic_call_id,
-    _extract_responses_message_text as _codex_extract_responses_message_text,
-    _extract_responses_reasoning_text as _codex_extract_responses_reasoning_text,
-    _normalize_codex_response as _codex_normalize_codex_response,
-    _preflight_codex_api_kwargs as _codex_preflight_codex_api_kwargs,
-    _preflight_codex_input_items as _codex_preflight_codex_input_items,
-    _responses_tools as _codex_responses_tools,
     _split_responses_tool_id as _codex_split_responses_tool_id,
     _summarize_user_message_for_log,
 )
@@ -124,7 +114,7 @@ from agent.trajectory import (
     convert_scratchpad_to_think, has_incomplete_scratchpad,
     save_trajectory as _save_trajectory_to_file,
 )
-from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled
+from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled, normalize_proxy_url
 
 
 
@@ -187,7 +177,7 @@ def _get_proxy_from_env() -> Optional[str]:
                 "https_proxy", "http_proxy", "all_proxy"):
         value = os.environ.get(key, "").strip()
         if value:
-            return value
+            return normalize_proxy_url(value)
     return None
 
 
@@ -385,9 +375,8 @@ def _sanitize_surrogates(text: str) -> str:
     return text
 
 
-# _chat_content_to_responses_parts and _summarize_user_message_for_log are
-# imported from agent.codex_responses_adapter (see import block above).
-# They remain importable from run_agent for backward compatibility.
+# _summarize_user_message_for_log is imported from agent.codex_responses_adapter
+# (see import block above). Remains importable from run_agent for backward compat.
 
 
 def _sanitize_structure_surrogates(payload: Any) -> bool:
@@ -751,6 +740,11 @@ class AIAgent:
         prefill_messages: List[Dict[str, Any]] = None,
         platform: str = None,
         user_id: str = None,
+        user_name: str = None,
+        chat_id: str = None,
+        chat_name: str = None,
+        chat_type: str = None,
+        thread_id: str = None,
         gateway_session_key: str = None,
         skip_context_files: bool = False,
         skip_memory: bool = False,
@@ -820,6 +814,11 @@ class AIAgent:
         self.ephemeral_system_prompt = ephemeral_system_prompt
         self.platform = platform  # "cli", "telegram", "discord", "whatsapp", etc.
         self._user_id = user_id  # Platform user identifier (gateway sessions)
+        self._user_name = user_name
+        self._chat_id = chat_id
+        self._chat_name = chat_name
+        self._chat_type = chat_type
+        self._thread_id = thread_id
         self._gateway_session_key = gateway_session_key  # Stable per-chat key (e.g. agent:main:telegram:dm:123)
         # Pluggable print function — CLI replaces this with _cprint so that
         # raw ANSI status lines are routed through prompt_toolkit's renderer
@@ -872,6 +871,13 @@ class AIAgent:
         else:
             self.api_mode = "chat_completions"
 
+        # Eagerly warm the transport cache so import errors surface at init,
+        # not mid-conversation.  Also validates the api_mode is registered.
+        try:
+            self._get_transport()
+        except Exception:
+            pass  # Non-fatal — transport may not exist for all modes yet
+
         try:
             from hermes_cli.model_normalize import (
                 _AGGREGATOR_PROVIDERS,
@@ -907,6 +913,10 @@ class AIAgent:
             )
         ):
             self.api_mode = "codex_responses"
+            # Invalidate the eager-warmed transport cache — api_mode changed
+            # from chat_completions to codex_responses after the warm at __init__.
+            if hasattr(self, "_transport_cache"):
+                self._transport_cache.clear()
 
         # Pre-warm OpenRouter model metadata cache in a background thread.
         # fetch_model_metadata() is cached for 1 hour; this avoids a blocking
@@ -1088,8 +1098,7 @@ class AIAgent:
             _is_bedrock_anthropic = self.provider == "bedrock"
             if _is_bedrock_anthropic:
                 from agent.anthropic_adapter import build_anthropic_bedrock_client
-                import re as _re
-                _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
+                _region_match = re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
                 _br_region = _region_match.group(1) if _region_match else "us-east-1"
                 self._bedrock_region = _br_region
                 self._anthropic_client = build_anthropic_bedrock_client(_br_region)
@@ -1130,8 +1139,7 @@ class AIAgent:
         elif self.api_mode == "bedrock_converse":
             # AWS Bedrock — uses boto3 directly, no OpenAI client needed.
             # Region is extracted from the base_url or defaults to us-east-1.
-            import re as _re
-            _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
+            _region_match = re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
             self._bedrock_region = _region_match.group(1) if _region_match else "us-east-1"
             # Guardrail config — read from config.yaml at init time.
             self._bedrock_guardrail_config = None
@@ -1177,7 +1185,7 @@ class AIAgent:
                     client_kwargs["default_headers"] = copilot_default_headers()
                 elif base_url_host_matches(effective_base, "api.kimi.com"):
                     client_kwargs["default_headers"] = {
-                        "User-Agent": "KimiCLI/1.30.0",
+                        "User-Agent": "claude-code/0.1.0",
                     }
                 elif base_url_host_matches(effective_base, "portal.qwen.ai"):
                     client_kwargs["default_headers"] = _qwen_portal_headers()
@@ -1455,11 +1463,10 @@ class AIAgent:
                     if _mp and _mp.is_available():
                         self._memory_manager.add_provider(_mp)
                     if self._memory_manager.providers:
-                        from hermes_constants import get_hermes_home as _ghh
                         _init_kwargs = {
                             "session_id": self.session_id,
                             "platform": platform or "cli",
-                            "hermes_home": str(_ghh()),
+                            "hermes_home": str(get_hermes_home()),
                             "agent_context": "primary",
                         }
                         # Thread session title for memory provider scoping
@@ -1474,6 +1481,16 @@ class AIAgent:
                         # Thread gateway user identity for per-user memory scoping
                         if self._user_id:
                             _init_kwargs["user_id"] = self._user_id
+                        if self._user_name:
+                            _init_kwargs["user_name"] = self._user_name
+                        if self._chat_id:
+                            _init_kwargs["chat_id"] = self._chat_id
+                        if self._chat_name:
+                            _init_kwargs["chat_name"] = self._chat_name
+                        if self._chat_type:
+                            _init_kwargs["chat_type"] = self._chat_type
+                        if self._thread_id:
+                            _init_kwargs["thread_id"] = self._thread_id
                         # Thread gateway session key for stable per-chat Honcho session isolation
                         if self._gateway_session_key:
                             _init_kwargs["gateway_session_key"] = self._gateway_session_key
@@ -1576,7 +1593,6 @@ class AIAgent:
                     "Falling back to auto-detection.",
                     _config_context_length,
                 )
-                import sys
                 print(
                     f"\n⚠ Invalid model.context_length in config.yaml: {_config_context_length!r}\n"
                     f"  Must be a plain integer (e.g. 256000, not '256K').\n"
@@ -1618,7 +1634,6 @@ class AIAgent:
                                         "Falling back to auto-detection.",
                                         self.model, _cp_ctx,
                                     )
-                                    import sys
                                     print(
                                         f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
                                         f"  Must be a plain integer (e.g. 256000, not '256K').\n"
@@ -1881,8 +1896,6 @@ class AIAgent:
         change persists across turns (unlike fallback which is
         turn-scoped).
         """
-        import logging
-        import re as _re
         from hermes_cli.providers import determine_api_mode
 
         # ── Determine api_mode if not provided ──
@@ -1900,7 +1913,7 @@ class AIAgent:
             and isinstance(base_url, str)
             and base_url
         ):
-            base_url = _re.sub(r"/v1/?$", "", base_url)
+            base_url = re.sub(r"/v1/?$", "", base_url)
 
         old_model = self.model
         old_provider = self.provider
@@ -1910,6 +1923,9 @@ class AIAgent:
         self.provider = new_provider
         self.base_url = base_url or self.base_url
         self.api_mode = api_mode
+        # Invalidate transport cache — new api_mode may need a different transport
+        if hasattr(self, "_transport_cache"):
+            self._transport_cache.clear()
         if api_key:
             self.api_key = api_key
 
@@ -2012,6 +2028,22 @@ class AIAgent:
         self._fallback_activated = False
         self._fallback_index = 0
 
+        # When the user deliberately swaps primary providers (e.g. openrouter
+        # → anthropic), drop any fallback entries that target the OLD primary
+        # or the NEW one.  The chain was seeded from config at agent init for
+        # the original provider — without pruning, a failed turn on the new
+        # primary silently re-activates the provider the user just rejected,
+        # which is exactly what was reported during TUI v2 blitz testing
+        # ("switched to anthropic, tui keeps trying openrouter").
+        old_norm = (old_provider or "").strip().lower()
+        new_norm = (new_provider or "").strip().lower()
+        if old_norm and new_norm and old_norm != new_norm:
+            self._fallback_chain = [
+                entry for entry in self._fallback_chain
+                if (entry.get("provider") or "").strip().lower() not in {old_norm, new_norm}
+            ]
+            self._fallback_model = self._fallback_chain[0] if self._fallback_chain else None
+
         logging.info(
             "Model switched in-place: %s (%s) -> %s (%s)",
             old_model, old_provider, new_model, new_provider,
@@ -2362,6 +2394,13 @@ class AIAgent:
         cost reduction as direct Anthropic callers, provided their
         gateway implements the Anthropic cache_control contract
         (MiniMax, Zhipu GLM, LiteLLM's Anthropic proxy mode all do).
+
+        Qwen / Alibaba-family models on OpenCode, OpenCode Go, and direct
+        Alibaba (DashScope) also honour Anthropic-style ``cache_control``
+        markers on OpenAI-wire chat completions. Upstream pi-mono #3392 /
+        pi #3393 documented this for opencode-go Qwen. Without markers
+        these providers serve zero cache hits, re-billing the full prompt
+        on every turn.
         """
         eff_provider = (provider if provider is not None else self.provider) or ""
         eff_base_url = base_url if base_url is not None else (self.base_url or "")
@@ -2369,7 +2408,9 @@ class AIAgent:
         eff_model = (model if model is not None else self.model) or ""
 
         base_lower = eff_base_url.lower()
-        is_claude = "claude" in eff_model.lower()
+        model_lower = eff_model.lower()
+        provider_lower = eff_provider.lower()
+        is_claude = "claude" in model_lower
         is_openrouter = base_url_host_matches(eff_base_url, "openrouter.ai")
         is_anthropic_wire = eff_api_mode == "anthropic_messages"
         is_native_anthropic = (
@@ -2384,6 +2425,22 @@ class AIAgent:
         if is_anthropic_wire and is_claude:
             # Third-party Anthropic-compatible gateway.
             return True, True
+
+        # Qwen/Alibaba on OpenCode (Zen/Go) and native DashScope: OpenAI-wire
+        # transport that accepts Anthropic-style cache_control markers and
+        # rewards them with real cache hits.  Without this branch
+        # qwen3.6-plus on opencode-go reports 0% cached tokens and burns
+        # through the subscription on every turn.
+        model_is_qwen = "qwen" in model_lower
+        provider_is_alibaba_family = provider_lower in {
+            "opencode", "opencode-zen", "opencode-go", "alibaba",
+        }
+        if provider_is_alibaba_family and model_is_qwen:
+            # Envelope layout (native_anthropic=False): markers on inner
+            # content parts, not top-level tool messages.  Matches
+            # pi-mono's "alibaba" cacheControlFormat.
+            return True, False
+
         return False, False
 
     @staticmethod
@@ -2469,6 +2526,20 @@ class AIAgent:
           4. Tag variants: ``<think>``, ``<thinking>``, ``<reasoning>``,
              ``<REASONING_SCRATCHPAD>``, ``<thought>`` (Gemma 4), all
              case-insensitive.
+
+        Additionally strips standalone tool-call XML blocks that some open
+        models (notably Gemma variants on OpenRouter) emit inside assistant
+        content instead of via the structured ``tool_calls`` field:
+          * ``<tool_call>…</tool_call>``
+          * ``<tool_calls>…</tool_calls>``
+          * ``<tool_result>…</tool_result>``
+          * ``<function_call>…</function_call>``
+          * ``<function_calls>…</function_calls>``
+          * ``<function name="…">…</function>`` (Gemma style)
+        Ported from openclaw/openclaw#67318. The ``<function>`` variant is
+        boundary-gated (only strips when the tag sits at start-of-line or
+        after punctuation and carries a ``name="..."`` attribute) so prose
+        mentions like "Use <function> in JavaScript" are preserved.
         """
         if not content:
             return ""
@@ -2480,6 +2551,30 @@ class AIAgent:
         content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL | re.IGNORECASE)
         content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL | re.IGNORECASE)
         content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)
+        # 1b. Tool-call XML blocks (openclaw/openclaw#67318). Handle the
+        #     generic tag names first — they have no attribute gating since
+        #     a literal <tool_call> in prose is already vanishingly rare.
+        for _tc_name in ("tool_call", "tool_calls", "tool_result",
+                          "function_call", "function_calls"):
+            content = re.sub(
+                rf'<{_tc_name}\b[^>]*>.*?</{_tc_name}>',
+                '',
+                content,
+                flags=re.DOTALL | re.IGNORECASE,
+            )
+        # 1c. <function name="...">...</function> — Gemma-style standalone
+        #     tool call. Only strip when the tag sits at a block boundary
+        #     (start of text, after a newline, or after sentence-ending
+        #     punctuation) AND carries a name="..." attribute. This keeps
+        #     prose mentions like "Use <function> to declare" safe.
+        content = re.sub(
+            r'(?:(?<=^)|(?<=[\n\r.!?:]))[ \t]*'
+            r'<function\b[^>]*\bname\s*=[^>]*>'
+            r'(?:(?:(?!</function>).)*)</function>',
+            '',
+            content,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
         # 2. Unterminated reasoning block — open tag at a block boundary
         #    (start of text, or after a newline) with no matching close.
         #    Strip from the tag to end of string.  Fixes #8878 / #9568
@@ -2497,6 +2592,16 @@ class AIAgent:
             content,
             flags=re.IGNORECASE,
         )
+        # 3b. Stray tool-call closers. (We do NOT strip bare <function> or
+        #     unterminated <function name="..."> because a truncated tail
+        #     during streaming may still be valuable to the user; matches
+        #     OpenClaw's intentional asymmetry.)
+        content = re.sub(
+            r'</(?:tool_call|tool_calls|tool_result|function_call|function_calls|function)>\s*',
+            '',
+            content,
+            flags=re.IGNORECASE,
+        )
         return content
 
     @staticmethod
@@ -2783,10 +2888,10 @@ class AIAgent:
             prompt = self._SKILL_REVIEW_PROMPT
 
         def _run_review():
-            import contextlib, os as _os
+            import contextlib
             review_agent = None
             try:
-                with open(_os.devnull, "w") as _devnull, \
+                with open(os.devnull, "w") as _devnull, \
                      contextlib.redirect_stdout(_devnull), \
                      contextlib.redirect_stderr(_devnull):
                     review_agent = AIAgent(
@@ -2916,7 +3021,7 @@ class AIAgent:
                 role = msg.get("role", "unknown")
                 content = msg.get("content")
                 tool_calls_data = None
-                if hasattr(msg, "tool_calls") and msg.tool_calls:
+                if hasattr(msg, "tool_calls") and isinstance(msg.tool_calls, list) and msg.tool_calls:
                     tool_calls_data = [
                         {"name": tc.function.name, "arguments": tc.function.arguments}
                         for tc in msg.tool_calls
@@ -2932,6 +3037,7 @@ class AIAgent:
                     tool_call_id=msg.get("tool_call_id"),
                     finish_reason=msg.get("finish_reason"),
                     reasoning=msg.get("reasoning") if role == "assistant" else None,
+                    reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                     reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                     codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                 )
@@ -3182,15 +3288,14 @@ class AIAgent:
         <title> tag instead of dumping raw HTML.  Falls back to a truncated
         str(error) for everything else.
         """
-        import re as _re
         raw = str(error)
 
         # Cloudflare / proxy HTML pages: grab the <title> for a clean summary
         if "<!DOCTYPE" in raw or "<html" in raw:
-            m = _re.search(r"<title[^>]*>([^<]+)</title>", raw, _re.IGNORECASE)
+            m = re.search(r"<title[^>]*>([^<]+)</title>", raw, re.IGNORECASE)
             title = m.group(1).strip() if m else "HTML error page (title not found)"
             # Also grab Cloudflare Ray ID if present
-            ray = _re.search(r"Cloudflare Ray ID:\s*<strong[^>]*>([^<]+)</strong>", raw)
+            ray = re.search(r"Cloudflare Ray ID:\s*<strong[^>]*>([^<]+)</strong>", raw)
             ray_id = ray.group(1).strip() if ray else None
             status_code = getattr(error, "status_code", None)
             parts = []
@@ -3859,14 +3964,12 @@ class AIAgent:
 
         # 2. Clean terminal sandbox environments
         try:
-            from tools.terminal_tool import cleanup_vm
             cleanup_vm(task_id)
         except Exception:
             pass
 
         # 3. Clean browser daemon sessions
         try:
-            from tools.browser_tool import cleanup_browser
             cleanup_browser(task_id)
         except Exception:
             pass
@@ -4277,10 +4380,6 @@ class AIAgent:
         if self._memory_store:
             self._memory_store.load_from_disk()
 
-    def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
-        """Convert chat-completions tool schemas to Responses function-tool schemas."""
-        return _codex_responses_tools(tools if tools is not None else self.tools)
-
     @staticmethod
     def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
         """Generate a deterministic call_id from tool call content.
@@ -4304,33 +4403,6 @@ class AIAgent:
         """Build a valid Responses `function_call.id` (must start with `fc_`)."""
         return _codex_derive_responses_function_call_id(call_id, response_item_id)
 
-    def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-        """Convert internal chat-style messages to Responses input items."""
-        return _codex_chat_messages_to_responses_input(messages)
-
-    def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
-        return _codex_preflight_codex_input_items(raw_items)
-
-    def _preflight_codex_api_kwargs(
-        self,
-        api_kwargs: Any,
-        *,
-        allow_stream: bool = False,
-    ) -> Dict[str, Any]:
-        return _codex_preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
-
-    def _extract_responses_message_text(self, item: Any) -> str:
-        """Extract assistant text from a Responses message output item."""
-        return _codex_extract_responses_message_text(item)
-
-    def _extract_responses_reasoning_text(self, item: Any) -> str:
-        """Extract a compact reasoning text from a Responses reasoning item."""
-        return _codex_extract_responses_reasoning_text(item)
-
-    def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
-        """Normalize a Responses API object to an assistant_message-like object."""
-        return _codex_normalize_codex_response(response)
-
     def _thread_identity(self) -> str:
         thread = threading.current_thread()
         return f"{thread.name}:{thread.ident}"
@@ -4823,7 +4895,7 @@ class AIAgent:
         active_client = client or self._ensure_primary_openai_client(reason="codex_create_stream_fallback")
         fallback_kwargs = dict(api_kwargs)
         fallback_kwargs["stream"] = True
-        fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
+        fallback_kwargs = self._get_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
         stream_or_response = active_client.responses.create(**fallback_kwargs)
 
         # Compatibility shim for mocks or providers that still return a concrete response.
@@ -5018,7 +5090,7 @@ class AIAgent:
 
             self._client_kwargs["default_headers"] = copilot_default_headers()
         elif base_url_host_matches(base_url, "api.kimi.com"):
-            self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+            self._client_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
         elif base_url_host_matches(base_url, "portal.qwen.ai"):
             self._client_kwargs["default_headers"] = _qwen_portal_headers()
         elif base_url_host_matches(base_url, "chatgpt.com"):
@@ -5178,6 +5250,9 @@ class AIAgent:
                     result["response"] = self._anthropic_messages_create(api_kwargs)
                 elif self.api_mode == "bedrock_converse":
                     # Bedrock uses boto3 directly — no OpenAI client needed.
+                    # normalize_converse_response produces an OpenAI-compatible
+                    # SimpleNamespace so the rest of the agent loop can treat
+                    # bedrock responses like chat_completions responses.
                     from agent.bedrock_adapter import (
                         _get_bedrock_runtime_client,
                         normalize_converse_response,
@@ -5805,16 +5880,6 @@ class AIAgent:
                             result["response"] = _call_chat_completions()
                         return  # success
                     except Exception as e:
-                        if deltas_were_sent["yes"]:
-                            # Streaming failed AFTER some tokens were already
-                            # delivered.  Don't retry or fall back — partial
-                            # content already reached the user.
-                            logger.warning(
-                                "Streaming failed after partial delivery, not retrying: %s", e
-                            )
-                            result["error"] = e
-                            return
-
                         _is_timeout = isinstance(
                             e, (_httpx.ReadTimeout, _httpx.ConnectTimeout, _httpx.PoolTimeout)
                         )
@@ -5822,6 +5887,123 @@ class AIAgent:
                             e, (_httpx.ConnectError, _httpx.RemoteProtocolError, ConnectionError)
                         )
 
+                        # If the stream died AFTER some tokens were delivered:
+                        # normally we don't retry (the user already saw text,
+                        # retrying would duplicate it).  BUT: if a tool call
+                        # was in-flight when the stream died, silently aborting
+                        # discards the tool call entirely.  In that case we
+                        # prefer to retry — the user sees a brief
+                        # "reconnecting" marker + duplicated preamble text,
+                        # which is strictly better than a failed action with
+                        # a "retry manually" message.  Limit this to transient
+                        # connection errors (Clawdbot-style narrow gate): no
+                        # tool has executed yet within this API call, so
+                        # silent retry is safe wrt side-effects.
+                        if deltas_were_sent["yes"]:
+                            _partial_tool_in_flight = bool(
+                                result.get("partial_tool_names")
+                            )
+                            _is_sse_conn_err_preview = False
+                            if not _is_timeout and not _is_conn_err:
+                                from openai import APIError as _APIError
+                                if isinstance(e, _APIError) and not getattr(e, "status_code", None):
+                                    _err_lower_preview = str(e).lower()
+                                    _SSE_PREVIEW_PHRASES = (
+                                        "connection lost",
+                                        "connection reset",
+                                        "connection closed",
+                                        "connection terminated",
+                                        "network error",
+                                        "network connection",
+                                        "terminated",
+                                        "peer closed",
+                                        "broken pipe",
+                                        "upstream connect error",
+                                    )
+                                    _is_sse_conn_err_preview = any(
+                                        phrase in _err_lower_preview
+                                        for phrase in _SSE_PREVIEW_PHRASES
+                                    )
+                            _is_transient = (
+                                _is_timeout or _is_conn_err or _is_sse_conn_err_preview
+                            )
+                            _can_silent_retry = (
+                                _partial_tool_in_flight
+                                and _is_transient
+                                and _stream_attempt < _max_stream_retries
+                            )
+                            if not _can_silent_retry:
+                                # Either no tool call was in-flight (so the
+                                # turn was a pure text response — current
+                                # stub-with-recovered-text behaviour is
+                                # correct), or retries are exhausted, or the
+                                # error isn't transient.  Fall through to the
+                                # stub path.
+                                logger.warning(
+                                    "Streaming failed after partial delivery, not retrying: %s", e
+                                )
+                                result["error"] = e
+                                return
+                            # Tool call was in-flight AND error is transient:
+                            # retry silently.  Clear per-attempt state so the
+                            # next stream starts clean.  Fire a "reconnecting"
+                            # marker so the user sees why the preamble is
+                            # about to be re-streamed.
+                            logger.info(
+                                "Streaming attempt %s/%s died mid tool-call "
+                                "(%s: %s) after user-visible text; retrying "
+                                "silently to avoid losing the action. "
+                                "Preamble will re-stream.",
+                                _stream_attempt + 1,
+                                _max_stream_retries + 1,
+                                type(e).__name__,
+                                e,
+                            )
+                            try:
+                                self._fire_stream_delta(
+                                    "\n\n⚠ Connection dropped mid tool-call; "
+                                    "reconnecting…\n\n"
+                                )
+                            except Exception:
+                                pass
+                            # Reset the streamed-text buffer so the retry's
+                            # fresh preamble doesn't get double-recorded in
+                            # _current_streamed_assistant_text (which would
+                            # pollute the interim-visible-text comparison).
+                            try:
+                                self._reset_stream_delivery_tracking()
+                            except Exception:
+                                pass
+                            # Reset in-memory accumulators so the next
+                            # attempt's chunks don't concat onto the dead
+                            # stream's partial JSON.
+                            result["partial_tool_names"] = []
+                            deltas_were_sent["yes"] = False
+                            first_delta_fired["done"] = False
+                            self._emit_status(
+                                f"⚠️ Connection dropped mid tool-call "
+                                f"({type(e).__name__}). Reconnecting… "
+                                f"(attempt {_stream_attempt + 2}/{_max_stream_retries + 1})"
+                            )
+                            self._touch_activity(
+                                f"stream retry {_stream_attempt + 2}/{_max_stream_retries + 1} "
+                                f"mid tool-call after {type(e).__name__}"
+                            )
+                            stale = request_client_holder.get("client")
+                            if stale is not None:
+                                self._close_request_openai_client(
+                                    stale, reason="stream_mid_tool_retry_cleanup"
+                                )
+                                request_client_holder["client"] = None
+                            try:
+                                self._replace_primary_openai_client(
+                                    reason="stream_mid_tool_retry_pool_cleanup"
+                                )
+                            except Exception:
+                                pass
+                            self._emit_status("🔄 Reconnected — resuming…")
+                            continue
+
                         # SSE error events from proxies (e.g. OpenRouter sends
                         # {"error":{"message":"Network connection lost."}}) are
                         # raised as APIError by the OpenAI SDK.  These are
@@ -6132,9 +6314,14 @@ class AIAgent:
             # falling through to OpenRouter defaults.
             fb_base_url_hint = (fb.get("base_url") or "").strip() or None
             fb_api_key_hint = (fb.get("api_key") or "").strip() or None
+            if not fb_api_key_hint:
+                fb_key_env = (fb.get("key_env") or "").strip()
+                if fb_key_env:
+                    fb_api_key_hint = os.getenv(fb_key_env, "").strip() or None
             # For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env
-            # when no explicit key is in the fallback config.
-            if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint:
+            # when no explicit key is in the fallback config. Host match
+            # (not substring) — see GHSA-76xc-57q6-vm5m.
+            if fb_base_url_hint and base_url_host_matches(fb_base_url_hint, "ollama.com") and not fb_api_key_hint:
                 fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None
             fb_client, _resolved_fb_model = resolve_provider_client(
                 fb_provider, model=fb_model, raw_codex=True,
@@ -6180,6 +6367,8 @@ class AIAgent:
             self.provider = fb_provider
             self.base_url = fb_base_url
             self.api_mode = fb_api_mode
+            if hasattr(self, "_transport_cache"):
+                self._transport_cache.clear()
             self._fallback_activated = True
 
             # Honor per-provider / per-model request_timeout_seconds for the
@@ -6291,6 +6480,8 @@ class AIAgent:
             self.provider = rt["provider"]
             self.base_url = rt["base_url"]           # setter updates _base_url_lower
             self.api_mode = rt["api_mode"]
+            if hasattr(self, "_transport_cache"):
+                self._transport_cache.clear()
             self.api_key = rt["api_key"]
             self._client_kwargs = dict(rt["client_kwargs"])
             self._use_prompt_caching = rt["use_prompt_caching"]
@@ -6397,6 +6588,8 @@ class AIAgent:
             self.provider = rt["provider"]
             self.base_url = rt["base_url"]
             self.api_mode = rt["api_mode"]
+            if hasattr(self, "_transport_cache"):
+                self._transport_cache.clear()
             self.api_key = rt["api_key"]
 
             if self.api_mode == "anthropic_messages":
@@ -6555,6 +6748,60 @@ class AIAgent:
             return suffix
         return "[A multimodal message was converted to text for Anthropic compatibility.]"
 
+    def _get_transport(self, api_mode: str = None):
+        """Return the cached transport for the given (or current) api_mode.
+
+        Lazy-initializes on first call per api_mode. Returns None if no
+        transport is registered for the mode.
+        """
+        mode = api_mode or self.api_mode
+        cache = getattr(self, "_transport_cache", None)
+        if cache is None:
+            cache = {}
+            self._transport_cache = cache
+        t = cache.get(mode)
+        if t is None:
+            from agent.transports import get_transport
+            t = get_transport(mode)
+            cache[mode] = t
+        return t
+
+    @staticmethod
+    def _nr_to_assistant_message(nr):
+        """Convert a NormalizedResponse to the SimpleNamespace shape downstream expects.
+
+        This is the single back-compat shim between the transport layer
+        (NormalizedResponse) and the agent loop (SimpleNamespace with
+        .content, .tool_calls, .reasoning, .reasoning_content,
+        .reasoning_details, .codex_reasoning_items, and per-tool-call
+        .call_id / .response_item_id).
+
+        TODO: Remove when downstream code reads NormalizedResponse directly.
+        """
+        tc_list = None
+        if nr.tool_calls:
+            tc_list = []
+            for tc in nr.tool_calls:
+                tc_ns = SimpleNamespace(
+                    id=tc.id,
+                    type="function",
+                    function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                )
+                if tc.provider_data:
+                    for key in ("call_id", "response_item_id"):
+                        if tc.provider_data.get(key):
+                            setattr(tc_ns, key, tc.provider_data[key])
+                tc_list.append(tc_ns)
+        pd = nr.provider_data or {}
+        return SimpleNamespace(
+            content=nr.content,
+            tool_calls=tc_list or None,
+            reasoning=nr.reasoning,
+            reasoning_content=pd.get("reasoning_content"),
+            reasoning_details=pd.get("reasoning_details"),
+            codex_reasoning_items=pd.get("codex_reasoning_items"),
+        )
+
     def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list:
         if not any(
             isinstance(msg, dict) and self._content_has_image_parts(msg.get("content"))
@@ -6671,20 +6918,14 @@ class AIAgent:
     def _build_api_kwargs(self, api_messages: list) -> dict:
         """Build the keyword arguments dict for the active API mode."""
         if self.api_mode == "anthropic_messages":
-            from agent.anthropic_adapter import build_anthropic_kwargs
+            _transport = self._get_transport()
             anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages)
-            # Pass context_length (total input+output window) so the adapter can
-            # clamp max_tokens (output cap) when the user configured a smaller
-            # context window than the model's native output limit.
             ctx_len = getattr(self, "context_compressor", None)
             ctx_len = ctx_len.context_length if ctx_len else None
-            # _ephemeral_max_output_tokens is set for one call when the API
-            # returns "max_tokens too large given prompt" — it caps output to
-            # the available window space without touching context_length.
             ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
             if ephemeral_out is not None:
                 self._ephemeral_max_output_tokens = None  # consume immediately
-            return build_anthropic_kwargs(
+            return _transport.build_kwargs(
                 model=self.model,
                 messages=anthropic_messages,
                 tools=self.tools,
@@ -6700,31 +6941,20 @@ class AIAgent:
         # AWS Bedrock native Converse API — bypasses the OpenAI client entirely.
         # The adapter handles message/tool conversion and boto3 calls directly.
         if self.api_mode == "bedrock_converse":
-            from agent.bedrock_adapter import build_converse_kwargs
+            _bt = self._get_transport()
             region = getattr(self, "_bedrock_region", None) or "us-east-1"
             guardrail = getattr(self, "_bedrock_guardrail_config", None)
-            return {
-                "__bedrock_converse__": True,
-                "__bedrock_region__": region,
-                **build_converse_kwargs(
-                    model=self.model,
-                    messages=api_messages,
-                    tools=self.tools,
-                    max_tokens=self.max_tokens or 4096,
-                    temperature=None,  # Let the model use its default
-                    guardrail_config=guardrail,
-                ),
-            }
+            return _bt.build_kwargs(
+                model=self.model,
+                messages=api_messages,
+                tools=self.tools,
+                max_tokens=self.max_tokens or 4096,
+                region=region,
+                guardrail_config=guardrail,
+            )
 
         if self.api_mode == "codex_responses":
-            instructions = ""
-            payload_messages = api_messages
-            if api_messages and api_messages[0].get("role") == "system":
-                instructions = str(api_messages[0].get("content") or "").strip()
-                payload_messages = api_messages[1:]
-            if not instructions:
-                instructions = DEFAULT_AGENT_IDENTITY
-
+            _ct = self._get_transport()
             is_github_responses = (
                 base_url_host_matches(self.base_url, "models.github.ai")
                 or base_url_host_matches(self.base_url, "api.githubcopilot.com")
@@ -6736,274 +6966,118 @@ class AIAgent:
                     and "/backend-api/codex" in self._base_url_lower
                 )
             )
-
-            # Resolve reasoning effort: config > default (medium)
-            reasoning_effort = "medium"
-            reasoning_enabled = True
-            if self.reasoning_config and isinstance(self.reasoning_config, dict):
-                if self.reasoning_config.get("enabled") is False:
-                    reasoning_enabled = False
-                elif self.reasoning_config.get("effort"):
-                    reasoning_effort = self.reasoning_config["effort"]
-
-            # Clamp effort levels not supported by the Responses API model.
-            # GPT-5.4 supports none/low/medium/high/xhigh but not "minimal".
-            # "minimal" is valid on OpenRouter and GPT-5 but fails on 5.2/5.4.
-            _effort_clamp = {"minimal": "low"}
-            reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
-
-            kwargs = {
-                "model": self.model,
-                "instructions": instructions,
-                "input": self._chat_messages_to_responses_input(payload_messages),
-                "tools": self._responses_tools(),
-                "tool_choice": "auto",
-                "parallel_tool_calls": True,
-                "store": False,
-            }
-
-            if not is_github_responses:
-                kwargs["prompt_cache_key"] = self.session_id
-
             is_xai_responses = self.provider == "xai" or self._base_url_hostname == "api.x.ai"
+            return _ct.build_kwargs(
+                model=self.model,
+                messages=api_messages,
+                tools=self.tools,
+                reasoning_config=self.reasoning_config,
+                session_id=getattr(self, "session_id", None),
+                max_tokens=self.max_tokens,
+                request_overrides=self.request_overrides,
+                is_github_responses=is_github_responses,
+                is_codex_backend=is_codex_backend,
+                is_xai_responses=is_xai_responses,
+                github_reasoning_extra=self._github_models_reasoning_extra_body() if is_github_responses else None,
+            )
 
-            if reasoning_enabled and is_xai_responses:
-                # xAI reasons automatically — no effort param, just include encrypted content
-                kwargs["include"] = ["reasoning.encrypted_content"]
-            elif reasoning_enabled:
-                if is_github_responses:
-                    # Copilot's Responses route advertises reasoning-effort support,
-                    # but not OpenAI-specific prompt cache or encrypted reasoning
-                    # fields. Keep the payload to the documented subset.
-                    github_reasoning = self._github_models_reasoning_extra_body()
-                    if github_reasoning is not None:
-                        kwargs["reasoning"] = github_reasoning
-                else:
-                    kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
-                    kwargs["include"] = ["reasoning.encrypted_content"]
-            elif not is_github_responses and not is_xai_responses:
-                kwargs["include"] = []
+        # ── chat_completions (default) ─────────────────────────────────────
+        _ct = self._get_transport()
 
-            if self.request_overrides:
-                kwargs.update(self.request_overrides)
-
-            if self.max_tokens is not None and not is_codex_backend:
-                kwargs["max_output_tokens"] = self.max_tokens
-
-            if is_xai_responses and getattr(self, "session_id", None):
-                kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
-
-            return kwargs
-
-        sanitized_messages = api_messages
-        needs_sanitization = False
-        for msg in api_messages:
-            if not isinstance(msg, dict):
-                continue
-            if "codex_reasoning_items" in msg:
-                needs_sanitization = True
-                break
-
-            tool_calls = msg.get("tool_calls")
-            if isinstance(tool_calls, list):
-                for tool_call in tool_calls:
-                    if not isinstance(tool_call, dict):
-                        continue
-                    if "call_id" in tool_call or "response_item_id" in tool_call:
-                        needs_sanitization = True
-                        break
-                if needs_sanitization:
-                    break
-
-        if needs_sanitization:
-            sanitized_messages = copy.deepcopy(api_messages)
-            for msg in sanitized_messages:
-                if not isinstance(msg, dict):
-                    continue
-
-                # Codex-only replay state must not leak into strict chat-completions APIs.
-                msg.pop("codex_reasoning_items", None)
-
-                tool_calls = msg.get("tool_calls")
-                if isinstance(tool_calls, list):
-                    for tool_call in tool_calls:
-                        if isinstance(tool_call, dict):
-                            tool_call.pop("call_id", None)
-                            tool_call.pop("response_item_id", None)
-
-        # Qwen portal: normalize content to list-of-dicts, inject cache_control.
-        # Must run AFTER codex sanitization so we transform the final messages.
-        # If sanitization already deepcopied, reuse that copy (in-place).
-        if self._is_qwen_portal():
-            if sanitized_messages is api_messages:
-                # No sanitization was done — we need our own copy.
-                sanitized_messages = self._qwen_prepare_chat_messages(sanitized_messages)
-            else:
-                # Already a deepcopy — transform in place to avoid a second deepcopy.
-                self._qwen_prepare_chat_messages_inplace(sanitized_messages)
-
-        # GPT-5 and Codex models respond better to 'developer' than 'system'
-        # for instruction-following.  Swap the role at the API boundary so
-        # internal message representation stays uniform ("system").
-        _model_lower = (self.model or "").lower()
-        if (
-            sanitized_messages
-            and sanitized_messages[0].get("role") == "system"
-            and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
-        ):
-            # Shallow-copy the list + first message only — rest stays shared.
-            sanitized_messages = list(sanitized_messages)
-            sanitized_messages[0] = {**sanitized_messages[0], "role": "developer"}
-
-        provider_preferences = {}
-        if self.providers_allowed:
-            provider_preferences["only"] = self.providers_allowed
-        if self.providers_ignored:
-            provider_preferences["ignore"] = self.providers_ignored
-        if self.providers_order:
-            provider_preferences["order"] = self.providers_order
-        if self.provider_sort:
-            provider_preferences["sort"] = self.provider_sort
-        if self.provider_require_parameters:
-            provider_preferences["require_parameters"] = True
-        if self.provider_data_collection:
-            provider_preferences["data_collection"] = self.provider_data_collection
-
-        api_kwargs = {
-            "model": self.model,
-            "messages": sanitized_messages,
-            "timeout": self._resolved_api_call_timeout(),
-        }
-        try:
-            from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
-        except Exception:
-            _fixed_temperature_for_model = None
-            OMIT_TEMPERATURE = None
-        if _fixed_temperature_for_model is not None:
-            fixed_temperature = _fixed_temperature_for_model(self.model, self.base_url)
-            if fixed_temperature is OMIT_TEMPERATURE:
-                api_kwargs.pop("temperature", None)
-            elif fixed_temperature is not None:
-                api_kwargs["temperature"] = fixed_temperature
-        if self._is_qwen_portal():
-            api_kwargs["metadata"] = {
-                "sessionId": self.session_id or "hermes",
-                "promptId": str(uuid.uuid4()),
-            }
-        if self.tools:
-            api_kwargs["tools"] = self.tools
-
-        # ── max_tokens for chat_completions ──────────────────────────────
-        # Priority: ephemeral override (error recovery / length-continuation
-        # boost) > user-configured max_tokens > provider-specific defaults.
-        _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
-        if _ephemeral_out is not None:
-            self._ephemeral_max_output_tokens = None  # consume immediately
-            api_kwargs.update(self._max_tokens_param(_ephemeral_out))
-        elif self.max_tokens is not None:
-            api_kwargs.update(self._max_tokens_param(self.max_tokens))
-        elif "integrate.api.nvidia.com" in self._base_url_lower:
-            # NVIDIA NIM defaults to a very low max_tokens when omitted,
-            # causing models like GLM-4.7 to truncate immediately (thinking
-            # tokens alone exhaust the budget).  16384 provides adequate room.
-            api_kwargs.update(self._max_tokens_param(16384))
-        elif self._is_qwen_portal():
-            # Qwen Portal defaults to a very low max_tokens when omitted.
-            # Reasoning models (qwen3-coder-plus) exhaust that budget on
-            # thinking tokens alone, causing the portal to return
-            # finish_reason="stop" with truncated output — the agent sees
-            # this as an intentional stop and exits the loop.  Send 65536
-            # (the documented max output for qwen3-coder models) so the
-            # model has adequate output budget for tool calls.
-            api_kwargs.update(self._max_tokens_param(65536))
-        elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower():
-            # OpenRouter and Nous Portal translate requests to Anthropic's
-            # Messages API, which requires max_tokens as a mandatory field.
-            # When we omit it, the proxy picks a default that can be too
-            # low — the model spends its output budget on thinking and has
-            # almost nothing left for the actual response (especially large
-            # tool calls like write_file).  Sending the model's real output
-            # limit ensures full capacity.
-            try:
-                from agent.anthropic_adapter import _get_anthropic_max_output
-                _model_output_limit = _get_anthropic_max_output(self.model)
-                api_kwargs["max_tokens"] = _model_output_limit
-            except Exception:
-                pass  # fail open — let the proxy pick its default
-
-        extra_body = {}
-
-        _is_openrouter = self._is_openrouter_url()
-        _is_github_models = (
+        # Provider detection flags
+        _is_qwen = self._is_qwen_portal()
+        _is_or = self._is_openrouter_url()
+        _is_gh = (
             base_url_host_matches(self._base_url_lower, "models.github.ai")
             or base_url_host_matches(self._base_url_lower, "api.githubcopilot.com")
         )
-
-        # Provider preferences (only, ignore, order, sort) are OpenRouter-
-        # specific.  Only send to OpenRouter-compatible endpoints.
-        # TODO: Nous Portal will add transparent proxy support — re-enable
-        # for _is_nous when their backend is updated.
-        if provider_preferences and _is_openrouter:
-            extra_body["provider"] = provider_preferences
         _is_nous = "nousresearch" in self._base_url_lower
+        _is_nvidia = "integrate.api.nvidia.com" in self._base_url_lower
+        _is_kimi = (
+            base_url_host_matches(self.base_url, "api.kimi.com")
+            or base_url_host_matches(self.base_url, "moonshot.ai")
+            or base_url_host_matches(self.base_url, "moonshot.cn")
+        )
 
-        if self._supports_reasoning_extra_body():
-            if _is_github_models:
-                github_reasoning = self._github_models_reasoning_extra_body()
-                if github_reasoning is not None:
-                    extra_body["reasoning"] = github_reasoning
-            else:
-                if self.reasoning_config is not None:
-                    rc = dict(self.reasoning_config)
-                    # Nous Portal requires reasoning enabled — don't send
-                    # enabled=false to it (would cause 400).
-                    if _is_nous and rc.get("enabled") is False:
-                        pass  # omit reasoning entirely for Nous when disabled
-                    else:
-                        extra_body["reasoning"] = rc
-                else:
-                    extra_body["reasoning"] = {
-                        "enabled": True,
-                        "effort": "medium"
-                    }
+        # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE
+        # sentinel (temperature omitted entirely), a numeric override, or None.
+        try:
+            from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
+            _ft = _fixed_temperature_for_model(self.model, self.base_url)
+            _omit_temp = _ft is OMIT_TEMPERATURE
+            _fixed_temp = _ft if not _omit_temp else None
+        except Exception:
+            _omit_temp = False
+            _fixed_temp = None
 
-        # Nous Portal product attribution
-        if _is_nous:
-            extra_body["tags"] = ["product=hermes-agent"]
+        # Provider preferences (OpenRouter-specific)
+        _prefs: Dict[str, Any] = {}
+        if self.providers_allowed:
+            _prefs["only"] = self.providers_allowed
+        if self.providers_ignored:
+            _prefs["ignore"] = self.providers_ignored
+        if self.providers_order:
+            _prefs["order"] = self.providers_order
+        if self.provider_sort:
+            _prefs["sort"] = self.provider_sort
+        if self.provider_require_parameters:
+            _prefs["require_parameters"] = True
+        if self.provider_data_collection:
+            _prefs["data_collection"] = self.provider_data_collection
 
-        # Ollama num_ctx: override the 2048 default so the model actually
-        # uses the context window it was trained for.  Passed via the OpenAI
-        # SDK's extra_body → options.num_ctx, which Ollama's OpenAI-compat
-        # endpoint forwards to the runner as --ctx-size.
-        if self._ollama_num_ctx:
-            options = extra_body.get("options", {})
-            options["num_ctx"] = self._ollama_num_ctx
-            extra_body["options"] = options
+        # Anthropic max output for Claude on OpenRouter/Nous
+        _ant_max = None
+        if (_is_or or _is_nous) and "claude" in (self.model or "").lower():
+            try:
+                from agent.anthropic_adapter import _get_anthropic_max_output
+                _ant_max = _get_anthropic_max_output(self.model)
+            except Exception:
+                pass  # fail open — let the proxy pick its default
 
-        # Ollama / custom provider: pass think=false when reasoning is disabled.
-        # Ollama does not recognise the OpenRouter-style `reasoning` extra_body
-        # field, so we use its native `think` parameter instead.
-        # This prevents thinking-capable models (Qwen3, etc.) from generating
-        # <think> blocks and producing empty-response errors when the user has
-        # set reasoning_effort: none.
-        if self.provider == "custom" and self.reasoning_config and isinstance(self.reasoning_config, dict):
-            _effort = (self.reasoning_config.get("effort") or "").strip().lower()
-            _enabled = self.reasoning_config.get("enabled", True)
-            if _effort == "none" or _enabled is False:
-                extra_body["think"] = False
+        # Qwen session metadata precomputed here (promptId is per-call random)
+        _qwen_meta = None
+        if _is_qwen:
+            _qwen_meta = {
+                "sessionId": self.session_id or "hermes",
+                "promptId": str(uuid.uuid4()),
+            }
 
-        if self._is_qwen_portal():
-            extra_body["vl_high_resolution_images"] = True
+        # Ephemeral max output override — consume immediately so the next
+        # turn doesn't inherit it.
+        _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
+        if _ephemeral_out is not None:
+            self._ephemeral_max_output_tokens = None
 
-        if extra_body:
-            api_kwargs["extra_body"] = extra_body
-
-        # Priority Processing / generic request overrides (e.g. service_tier).
-        # Applied last so overrides win over any defaults set above.
-        if self.request_overrides:
-            api_kwargs.update(self.request_overrides)
-
-        return api_kwargs
+        return _ct.build_kwargs(
+            model=self.model,
+            messages=api_messages,
+            tools=self.tools,
+            timeout=self._resolved_api_call_timeout(),
+            max_tokens=self.max_tokens,
+            ephemeral_max_output_tokens=_ephemeral_out,
+            max_tokens_param_fn=self._max_tokens_param,
+            reasoning_config=self.reasoning_config,
+            request_overrides=self.request_overrides,
+            session_id=getattr(self, "session_id", None),
+            model_lower=(self.model or "").lower(),
+            is_openrouter=_is_or,
+            is_nous=_is_nous,
+            is_qwen_portal=_is_qwen,
+            is_github_models=_is_gh,
+            is_nvidia_nim=_is_nvidia,
+            is_kimi=_is_kimi,
+            is_custom_provider=self.provider == "custom",
+            ollama_num_ctx=self._ollama_num_ctx,
+            provider_preferences=_prefs or None,
+            qwen_prepare_fn=self._qwen_prepare_chat_messages if _is_qwen else None,
+            qwen_prepare_inplace_fn=self._qwen_prepare_chat_messages_inplace if _is_qwen else None,
+            qwen_session_metadata=_qwen_meta,
+            fixed_temperature=_fixed_temp,
+            omit_temperature=_omit_temp,
+            supports_reasoning=self._supports_reasoning_extra_body(),
+            github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None,
+            anthropic_max_output=_ant_max,
+        )
 
     def _supports_reasoning_extra_body(self) -> bool:
         """Return True when reasoning extra_body is safe to send for this route/model.
@@ -7139,6 +7213,11 @@ class AIAgent:
             "finish_reason": finish_reason,
         }
 
+        if hasattr(assistant_message, "reasoning_content"):
+            raw_reasoning_content = getattr(assistant_message, "reasoning_content", None)
+            if raw_reasoning_content is not None:
+                msg["reasoning_content"] = _sanitize_surrogates(raw_reasoning_content)
+
         if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
             # Pass reasoning_details back unmodified so providers (OpenRouter,
             # Anthropic, OpenAI) can maintain reasoning continuity across turns.
@@ -7213,6 +7292,30 @@ class AIAgent:
 
         return msg
 
+    def _copy_reasoning_content_for_api(self, source_msg: dict, api_msg: dict) -> None:
+        """Copy provider-facing reasoning fields onto an API replay message."""
+        if source_msg.get("role") != "assistant":
+            return
+
+        explicit_reasoning = source_msg.get("reasoning_content")
+        if isinstance(explicit_reasoning, str):
+            api_msg["reasoning_content"] = explicit_reasoning
+            return
+
+        normalized_reasoning = source_msg.get("reasoning")
+        if isinstance(normalized_reasoning, str) and normalized_reasoning:
+            api_msg["reasoning_content"] = normalized_reasoning
+            return
+
+        kimi_requires_reasoning = (
+            self.provider in {"kimi-coding", "kimi-coding-cn"}
+            or base_url_host_matches(self.base_url, "api.kimi.com")
+            or base_url_host_matches(self.base_url, "moonshot.ai")
+            or base_url_host_matches(self.base_url, "moonshot.cn")
+        )
+        if kimi_requires_reasoning and source_msg.get("tool_calls"):
+            api_msg["reasoning_content"] = ""
+
     @staticmethod
     def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
         """Strip Codex Responses API fields from tool_calls for strict providers.
@@ -7296,10 +7399,7 @@ class AIAgent:
             api_messages = []
             for msg in messages:
                 api_msg = msg.copy()
-                if msg.get("role") == "assistant":
-                    reasoning = msg.get("reasoning")
-                    if reasoning:
-                        api_msg["reasoning_content"] = reasoning
+                self._copy_reasoning_content_for_api(msg, api_msg)
                 api_msg.pop("reasoning", None)
                 api_msg.pop("finish_reason", None)
                 api_msg.pop("_flush_sentinel", None)
@@ -7357,7 +7457,7 @@ class AIAgent:
             if not _aux_available and self.api_mode == "codex_responses":
                 # No auxiliary client -- use the Codex Responses path directly
                 codex_kwargs = self._build_api_kwargs(api_messages)
-                codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
+                codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def])
                 if _flush_temperature is not None:
                     codex_kwargs["temperature"] = _flush_temperature
                 else:
@@ -7366,9 +7466,9 @@ class AIAgent:
                     codex_kwargs["max_output_tokens"] = 5120
                 response = self._run_codex_stream(codex_kwargs)
             elif not _aux_available and self.api_mode == "anthropic_messages":
-                # Native Anthropic — use the Anthropic client directly
-                from agent.anthropic_adapter import build_anthropic_kwargs as _build_ant_kwargs
-                ant_kwargs = _build_ant_kwargs(
+                # Native Anthropic — use the transport for kwargs
+                _tflush = self._get_transport()
+                ant_kwargs = _tflush.build_kwargs(
                     model=self.model, messages=api_messages,
                     tools=[memory_tool_def], max_tokens=5120,
                     reasoning_config=None,
@@ -7392,18 +7492,31 @@ class AIAgent:
             # Extract tool calls from the response, handling all API formats
             tool_calls = []
             if self.api_mode == "codex_responses" and not _aux_available:
-                assistant_msg, _ = self._normalize_codex_response(response)
-                if assistant_msg and assistant_msg.tool_calls:
-                    tool_calls = assistant_msg.tool_calls
+                _ct_flush = self._get_transport()
+                _cnr_flush = _ct_flush.normalize_response(response)
+                if _cnr_flush and _cnr_flush.tool_calls:
+                    tool_calls = [
+                        SimpleNamespace(
+                            id=tc.id, type="function",
+                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                        ) for tc in _cnr_flush.tool_calls
+                    ]
             elif self.api_mode == "anthropic_messages" and not _aux_available:
-                from agent.anthropic_adapter import normalize_anthropic_response as _nar_flush
-                _flush_msg, _ = _nar_flush(response, strip_tool_prefix=self._is_anthropic_oauth)
-                if _flush_msg and _flush_msg.tool_calls:
-                    tool_calls = _flush_msg.tool_calls
+                _tfn = self._get_transport()
+                _flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
+                if _flush_nr and _flush_nr.tool_calls:
+                    tool_calls = [
+                        SimpleNamespace(
+                            id=tc.id, type="function",
+                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                        ) for tc in _flush_nr.tool_calls
+                    ]
             elif hasattr(response, "choices") and response.choices:
-                assistant_message = response.choices[0].message
-                if assistant_message.tool_calls:
-                    tool_calls = assistant_message.tool_calls
+                # chat_completions / bedrock — normalize through transport
+                _flush_cc_nr = self._get_transport().normalize_response(response)
+                _flush_msg = self._nr_to_assistant_message(_flush_cc_nr)
+                if _flush_msg.tool_calls:
+                    tool_calls = _flush_msg.tool_calls
 
             for tc in tool_calls:
                 if tc.function.name == "memory":
@@ -7559,8 +7672,27 @@ class AIAgent:
         finally:
             self._executing_tools = False
 
+    def _dispatch_delegate_task(self, function_args: dict) -> str:
+        """Single call site for delegate_task dispatch.
+
+        New DELEGATE_TASK_SCHEMA fields only need to be added here to reach all
+        invocation paths (concurrent, sequential, inline).
+        """
+        from tools.delegate_tool import delegate_task as _delegate_task
+        return _delegate_task(
+            goal=function_args.get("goal"),
+            context=function_args.get("context"),
+            toolsets=function_args.get("toolsets"),
+            tasks=function_args.get("tasks"),
+            max_iterations=function_args.get("max_iterations"),
+            acp_command=function_args.get("acp_command"),
+            acp_args=function_args.get("acp_args"),
+            role=function_args.get("role"),
+            parent_agent=self,
+        )
+
     def _invoke_tool(self, function_name: str, function_args: dict, effective_task_id: str,
-                     tool_call_id: Optional[str] = None) -> str:
+                     tool_call_id: Optional[str] = None, messages: list = None) -> str:
         """Invoke a single tool and return the result string. No display logic.
 
         Handles both agent-level tools (todo, memory, etc.) and registry-dispatched
@@ -7628,15 +7760,7 @@ class AIAgent:
                 callback=self.clarify_callback,
             )
         elif function_name == "delegate_task":
-            from tools.delegate_tool import delegate_task as _delegate_task
-            return _delegate_task(
-                goal=function_args.get("goal"),
-                context=function_args.get("context"),
-                toolsets=function_args.get("toolsets"),
-                tasks=function_args.get("tasks"),
-                max_iterations=function_args.get("max_iterations"),
-                parent_agent=self,
-            )
+            return self._dispatch_delegate_task(function_args)
         else:
             return handle_function_call(
                 function_name, function_args, effective_task_id,
@@ -7784,8 +7908,7 @@ class AIAgent:
             # the tool returns True on the next poll.
             if self._interrupt_requested:
                 try:
-                    from tools.interrupt import set_interrupt as _sif
-                    _sif(True, _worker_tid)
+                    _set_interrupt(True, _worker_tid)
                 except Exception:
                     pass
             # Set the activity callback on THIS worker thread so
@@ -7799,7 +7922,7 @@ class AIAgent:
                 pass
             start = time.time()
             try:
-                result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id)
+                result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id, messages=messages)
             except Exception as tool_error:
                 result = f"Error executing tool '{function_name}': {tool_error}"
                 logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
@@ -7816,8 +7939,7 @@ class AIAgent:
             with self._tool_worker_threads_lock:
                 self._tool_worker_threads.discard(_worker_tid)
             try:
-                from tools.interrupt import set_interrupt as _sif
-                _sif(False, _worker_tid)
+                _set_interrupt(False, _worker_tid)
             except Exception:
                 pass
 
@@ -8152,7 +8274,6 @@ class AIAgent:
                 if self._should_emit_quiet_tool_messages():
                     self._vprint(f"  {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
             elif function_name == "delegate_task":
-                from tools.delegate_tool import delegate_task as _delegate_task
                 tasks_arg = function_args.get("tasks")
                 if tasks_arg and isinstance(tasks_arg, list):
                     spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
@@ -8167,14 +8288,7 @@ class AIAgent:
                 self._delegate_spinner = spinner
                 _delegate_result = None
                 try:
-                    function_result = _delegate_task(
-                        goal=function_args.get("goal"),
-                        context=function_args.get("context"),
-                        toolsets=function_args.get("toolsets"),
-                        tasks=tasks_arg,
-                        max_iterations=function_args.get("max_iterations"),
-                        parent_agent=self,
-                    )
+                    function_result = self._dispatch_delegate_task(function_args)
                     _delegate_result = function_result
                 finally:
                     self._delegate_spinner = None
@@ -8432,8 +8546,9 @@ class AIAgent:
                 codex_kwargs = self._build_api_kwargs(api_messages)
                 codex_kwargs.pop("tools", None)
                 summary_response = self._run_codex_stream(codex_kwargs)
-                assistant_message, _ = self._normalize_codex_response(summary_response)
-                final_response = (assistant_message.content or "").strip() if assistant_message else ""
+                _ct_sum = self._get_transport()
+                _cnr_sum = _ct_sum.normalize_response(summary_response)
+                final_response = (_cnr_sum.content or "").strip()
             else:
                 summary_kwargs = {
                     "model": self.model,
@@ -8461,21 +8576,18 @@ class AIAgent:
                     summary_kwargs["extra_body"] = summary_extra_body
 
                 if self.api_mode == "anthropic_messages":
-                    from agent.anthropic_adapter import build_anthropic_kwargs as _bak, normalize_anthropic_response as _nar
-                    _ant_kw = _bak(model=self.model, messages=api_messages, tools=None,
+                    _tsum = self._get_transport()
+                    _ant_kw = _tsum.build_kwargs(model=self.model, messages=api_messages, tools=None,
                                    max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
                                    is_oauth=self._is_anthropic_oauth,
                                    preserve_dots=self._anthropic_preserve_dots())
                     summary_response = self._anthropic_messages_create(_ant_kw)
-                    _msg, _ = _nar(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
-                    final_response = (_msg.content or "").strip()
+                    _sum_nr = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    final_response = (_sum_nr.content or "").strip()
                 else:
                     summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
-
-                    if summary_response.choices and summary_response.choices[0].message.content:
-                        final_response = summary_response.choices[0].message.content
-                    else:
-                        final_response = ""
+                    _sum_cc_nr = self._get_transport().normalize_response(summary_response)
+                    final_response = (_sum_cc_nr.content or "").strip()
 
             if final_response:
                 if "<think>" in final_response:
@@ -8490,17 +8602,18 @@ class AIAgent:
                     codex_kwargs = self._build_api_kwargs(api_messages)
                     codex_kwargs.pop("tools", None)
                     retry_response = self._run_codex_stream(codex_kwargs)
-                    retry_msg, _ = self._normalize_codex_response(retry_response)
-                    final_response = (retry_msg.content or "").strip() if retry_msg else ""
+                    _ct_retry = self._get_transport()
+                    _cnr_retry = _ct_retry.normalize_response(retry_response)
+                    final_response = (_cnr_retry.content or "").strip()
                 elif self.api_mode == "anthropic_messages":
-                    from agent.anthropic_adapter import build_anthropic_kwargs as _bak2, normalize_anthropic_response as _nar2
-                    _ant_kw2 = _bak2(model=self.model, messages=api_messages, tools=None,
+                    _tretry = self._get_transport()
+                    _ant_kw2 = _tretry.build_kwargs(model=self.model, messages=api_messages, tools=None,
                                     is_oauth=self._is_anthropic_oauth,
                                     max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
                                     preserve_dots=self._anthropic_preserve_dots())
                     retry_response = self._anthropic_messages_create(_ant_kw2)
-                    _retry_msg, _ = _nar2(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
-                    final_response = (_retry_msg.content or "").strip()
+                    _retry_nr = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    final_response = (_retry_nr.content or "").strip()
                 else:
                     summary_kwargs = {
                         "model": self.model,
@@ -8514,11 +8627,8 @@ class AIAgent:
                         summary_kwargs["extra_body"] = summary_extra_body
 
                     summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary_retry").chat.completions.create(**summary_kwargs)
-
-                    if summary_response.choices and summary_response.choices[0].message.content:
-                        final_response = summary_response.choices[0].message.content
-                    else:
-                        final_response = ""
+                    _retry_cc_nr = self._get_transport().normalize_response(summary_response)
+                    final_response = (_retry_cc_nr.content or "").strip()
 
                 if final_response:
                     if "<think>" in final_response:
@@ -8602,6 +8712,11 @@ class AIAgent:
         self._persist_user_message_override = persist_user_message
         # Generate unique task_id if not provided to isolate VMs between concurrent tasks
         effective_task_id = task_id or str(uuid.uuid4())
+        # Expose the active task_id so tools running mid-turn (e.g. delegate_task
+        # in delegate_tool.py) can identify this agent for the cross-agent file
+        # state registry.  Set BEFORE any tool dispatch so snapshots taken at
+        # child-launch time see the parent's real id, not None.
+        self._current_task_id = effective_task_id
         
         # Reset retry counters and iteration budget at the start of each turn
         # so subagent usage from a previous turn doesn't eat into the next one.
@@ -9040,11 +9155,7 @@ class AIAgent:
 
                 # For ALL assistant messages, pass reasoning back to the API
                 # This ensures multi-turn reasoning context is preserved
-                if msg.get("role") == "assistant":
-                    reasoning_text = msg.get("reasoning")
-                    if reasoning_text:
-                        # Add reasoning_content for API compatibility (Moonshot AI, Novita, OpenRouter)
-                        api_msg["reasoning_content"] = reasoning_text
+                self._copy_reasoning_content_for_api(msg, api_msg)
 
                 # Remove 'reasoning' field - it's for trajectory storage only
                 # We've copied it to 'reasoning_content' for the API above
@@ -9248,7 +9359,7 @@ class AIAgent:
                     if self._force_ascii_payload:
                         _sanitize_structure_non_ascii(api_kwargs)
                     if self.api_mode == "codex_responses":
-                        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
+                        api_kwargs = self._get_transport().preflight_kwargs(api_kwargs, allow_stream=False)
 
                     try:
                         from hermes_cli.plugins import invoke_hook as _invoke_hook
@@ -9336,51 +9447,53 @@ class AIAgent:
                     response_invalid = False
                     error_details = []
                     if self.api_mode == "codex_responses":
-                        output_items = getattr(response, "output", None) if response is not None else None
-                        if response is None:
-                            response_invalid = True
-                            error_details.append("response is None")
-                        elif not isinstance(output_items, list):
-                            response_invalid = True
-                            error_details.append("response.output is not a list")
-                        elif not output_items:
-                            # Stream backfill may have failed, but
-                            # _normalize_codex_response can still recover
-                            # from response.output_text. Only mark invalid
-                            # when that fallback is also absent.
-                            _out_text = getattr(response, "output_text", None)
-                            _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
-                            if _out_text_stripped:
-                                logger.debug(
-                                    "Codex response.output is empty but output_text is present "
-                                    "(%d chars); deferring to normalization.",
-                                    len(_out_text_stripped),
-                                )
-                            else:
-                                _resp_status = getattr(response, "status", None)
-                                _resp_incomplete = getattr(response, "incomplete_details", None)
-                                logger.warning(
-                                    "Codex response.output is empty after stream backfill "
-                                    "(status=%s, incomplete_details=%s, model=%s). %s",
-                                    _resp_status, _resp_incomplete,
-                                    getattr(response, "model", None),
-                                    f"api_mode={self.api_mode} provider={self.provider}",
-                                )
+                        _ct_v = self._get_transport()
+                        if not _ct_v.validate_response(response):
+                            if response is None:
                                 response_invalid = True
-                                error_details.append("response.output is empty")
+                                error_details.append("response is None")
+                            else:
+                                # output_text fallback: stream backfill may have failed
+                                # but normalize can still recover from output_text
+                                _out_text = getattr(response, "output_text", None)
+                                _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
+                                if _out_text_stripped:
+                                    logger.debug(
+                                        "Codex response.output is empty but output_text is present "
+                                        "(%d chars); deferring to normalization.",
+                                        len(_out_text_stripped),
+                                    )
+                                else:
+                                    _resp_status = getattr(response, "status", None)
+                                    _resp_incomplete = getattr(response, "incomplete_details", None)
+                                    logger.warning(
+                                        "Codex response.output is empty after stream backfill "
+                                        "(status=%s, incomplete_details=%s, model=%s). %s",
+                                        _resp_status, _resp_incomplete,
+                                        getattr(response, "model", None),
+                                        f"api_mode={self.api_mode} provider={self.provider}",
+                                    )
+                                    response_invalid = True
+                                    error_details.append("response.output is empty")
                     elif self.api_mode == "anthropic_messages":
-                        content_blocks = getattr(response, "content", None) if response is not None else None
-                        if response is None:
+                        _tv = self._get_transport()
+                        if not _tv.validate_response(response):
                             response_invalid = True
-                            error_details.append("response is None")
-                        elif not isinstance(content_blocks, list):
+                            if response is None:
+                                error_details.append("response is None")
+                            else:
+                                error_details.append("response.content invalid (not a non-empty list)")
+                    elif self.api_mode == "bedrock_converse":
+                        _btv = self._get_transport()
+                        if not _btv.validate_response(response):
                             response_invalid = True
-                            error_details.append("response.content is not a list")
-                        elif not content_blocks:
-                            response_invalid = True
-                            error_details.append("response.content is empty")
+                            if response is None:
+                                error_details.append("response is None")
+                            else:
+                                error_details.append("Bedrock response invalid (no output or choices)")
                     else:
-                        if response is None or not hasattr(response, 'choices') or response.choices is None or not response.choices:
+                        _ctv = self._get_transport()
+                        if not _ctv.validate_response(response):
                             response_invalid = True
                             if response is None:
                                 error_details.append("response is None")
@@ -9539,11 +9652,18 @@ class AIAgent:
                         else:
                             finish_reason = "stop"
                     elif self.api_mode == "anthropic_messages":
-                        stop_reason_map = {"end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop"}
-                        finish_reason = stop_reason_map.get(response.stop_reason, "stop")
+                        _tfr = self._get_transport()
+                        finish_reason = _tfr.map_finish_reason(response.stop_reason)
+                    elif self.api_mode == "bedrock_converse":
+                        # Bedrock response already normalized at dispatch — use transport
+                        _bt_fr = self._get_transport()
+                        _bt_fr_nr = _bt_fr.normalize_response(response)
+                        finish_reason = _bt_fr_nr.finish_reason
                     else:
-                        finish_reason = response.choices[0].finish_reason
-                        assistant_message = response.choices[0].message
+                        _cc_fr = self._get_transport()
+                        _cc_fr_nr = _cc_fr.normalize_response(response)
+                        finish_reason = _cc_fr_nr.finish_reason
+                        assistant_message = self._nr_to_assistant_message(_cc_fr_nr)
                         if self._should_treat_stop_as_truncated(
                             finish_reason,
                             assistant_message,
@@ -9566,13 +9686,14 @@ class AIAgent:
                         # interim assistant message is byte-identical to what
                         # would have been appended in the non-truncated path.
                         _trunc_msg = None
-                        if self.api_mode in ("chat_completions", "bedrock_converse"):
-                            _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
-                        elif self.api_mode == "anthropic_messages":
-                            from agent.anthropic_adapter import normalize_anthropic_response
-                            _trunc_msg, _ = normalize_anthropic_response(
+                        _trunc_transport = self._get_transport()
+                        if self.api_mode == "anthropic_messages":
+                            _trunc_nr = _trunc_transport.normalize_response(
                                 response, strip_tool_prefix=self._is_anthropic_oauth
                             )
+                        else:
+                            _trunc_nr = _trunc_transport.normalize_response(response)
+                        _trunc_msg = self._nr_to_assistant_message(_trunc_nr)
 
                         _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
                         _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
@@ -9821,6 +9942,7 @@ class AIAgent:
                                     billing_mode="subscription_included"
                                     if cost_result.status == "included" else None,
                                     model=self.model,
+                                    api_call_count=1,
                                 )
                             except Exception:
                                 pass  # never block the agent loop
@@ -9828,21 +9950,27 @@ class AIAgent:
                         if self.verbose_logging:
                             logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")
                         
-                        # Log cache hit stats when prompt caching is active
-                        if self._use_prompt_caching:
-                            if self.api_mode == "anthropic_messages":
-                                # Anthropic uses cache_read_input_tokens / cache_creation_input_tokens
-                                cached = getattr(response.usage, 'cache_read_input_tokens', 0) or 0
-                                written = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0
-                            else:
-                                # OpenRouter uses prompt_tokens_details.cached_tokens
-                                details = getattr(response.usage, 'prompt_tokens_details', None)
-                                cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
-                                written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
-                            prompt = usage_dict["prompt_tokens"]
+                        # Surface cache hit stats for any provider that reports
+                        # them — not just those where we inject cache_control
+                        # markers.  OpenAI/Kimi/DeepSeek/Qwen all do automatic
+                        # server-side prefix caching and return
+                        # ``prompt_tokens_details.cached_tokens``; users
+                        # previously could not see their cache % because this
+                        # line was gated on ``_use_prompt_caching``, which is
+                        # only True for Anthropic-style marker injection.
+                        # ``canonical_usage`` is already normalised from all
+                        # three API shapes (Anthropic / Codex / OpenAI-chat)
+                        # so we can rely on its values directly.
+                        cached = canonical_usage.cache_read_tokens
+                        written = canonical_usage.cache_write_tokens
+                        prompt = usage_dict["prompt_tokens"]
+                        if (cached or written) and not self.quiet_mode:
                             hit_pct = (cached / prompt * 100) if prompt > 0 else 0
-                            if not self.quiet_mode:
-                                self._vprint(f"{self.log_prefix}   💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
+                            self._vprint(
+                                f"{self.log_prefix}   💾 Cache: "
+                                f"{cached:,}/{prompt:,} tokens "
+                                f"({hit_pct:.0f}% hit, {written:,} written)"
+                            )
                     
                     has_retried_429 = False  # Reset on success
                     # Clear Nous rate limit state on successful request —
@@ -10091,6 +10219,27 @@ class AIAgent:
                         if self._try_refresh_nous_client_credentials(force=True):
                             print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...")
                             continue
+                        # Credential refresh didn't help — show diagnostic info.
+                        # Most common causes: Portal OAuth expired/revoked,
+                        # account out of credits, or agent key blocked.
+                        from hermes_constants import display_hermes_home as _dhh_fn
+                        _dhh = _dhh_fn()
+                        _body_text = ""
+                        try:
+                            _body = getattr(api_error, "body", None) or getattr(api_error, "response", None)
+                            if _body is not None:
+                                _body_text = str(_body)[:200]
+                        except Exception:
+                            pass
+                        print(f"{self.log_prefix}🔐 Nous 401 — Portal authentication failed.")
+                        if _body_text:
+                            print(f"{self.log_prefix}   Response: {_body_text}")
+                        print(f"{self.log_prefix}   Most likely: Portal OAuth expired, account out of credits, or agent key revoked.")
+                        print(f"{self.log_prefix}   Troubleshooting:")
+                        print(f"{self.log_prefix}     • Re-authenticate: hermes login --provider nous")
+                        print(f"{self.log_prefix}     • Check credits / billing: https://portal.nousresearch.com")
+                        print(f"{self.log_prefix}     • Verify stored credentials: {_dhh}/auth.json")
+                        print(f"{self.log_prefix}     • Switch providers temporarily: /model <model> --provider openrouter")
                     if (
                         self.api_mode == "anthropic_messages"
                         and status_code == 401
@@ -10775,38 +10924,13 @@ class AIAgent:
                 break
 
             try:
-                if self.api_mode == "codex_responses":
-                    assistant_message, finish_reason = self._normalize_codex_response(response)
-                elif self.api_mode == "anthropic_messages":
-                    from agent.anthropic_adapter import normalize_anthropic_response_v2
-                    _nr = normalize_anthropic_response_v2(
-                        response, strip_tool_prefix=self._is_anthropic_oauth
-                    )
-                    # Back-compat shim: downstream code expects SimpleNamespace with
-                    # .content, .tool_calls, .reasoning, .reasoning_content,
-                    # .reasoning_details attributes.  This shim makes the cost of the
-                    # old interface visible — it vanishes when the full transport
-                    # wiring lands (PR 3+).
-                    assistant_message = SimpleNamespace(
-                        content=_nr.content,
-                        tool_calls=[
-                            SimpleNamespace(
-                                id=tc.id,
-                                type="function",
-                                function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                            )
-                            for tc in (_nr.tool_calls or [])
-                        ] or None,
-                        reasoning=_nr.reasoning,
-                        reasoning_content=None,
-                        reasoning_details=(
-                            _nr.provider_data.get("reasoning_details")
-                            if _nr.provider_data else None
-                        ),
-                    )
-                    finish_reason = _nr.finish_reason
-                else:
-                    assistant_message = response.choices[0].message
+                _transport = self._get_transport()
+                _normalize_kwargs = {}
+                if self.api_mode == "anthropic_messages":
+                    _normalize_kwargs["strip_tool_prefix"] = self._is_anthropic_oauth
+                _nr = _transport.normalize_response(response, **_normalize_kwargs)
+                assistant_message = self._nr_to_assistant_message(_nr)
+                finish_reason = _nr.finish_reason
                 
                 # Normalize content to string — some OpenAI-compatible servers
                 # (llama-server, etc.) return content as a dict or list instead
@@ -11871,7 +11995,7 @@ def main(
     
     # Handle tool listing
     if list_tools:
-        from model_tools import get_all_tool_names, get_toolset_for_tool, get_available_toolsets
+        from model_tools import get_all_tool_names, get_available_toolsets
         from toolsets import get_all_toolsets, get_toolset_info
         
         print("📋 Available Tools & Toolsets:")
diff --git a/scripts/discord-voice-doctor.py b/scripts/discord-voice-doctor.py
index 6fc3f7b15..932ab519c 100755
--- a/scripts/discord-voice-doctor.py
+++ b/scripts/discord-voice-doctor.py
@@ -265,7 +265,7 @@ def check_config(groq_key, eleven_key):
     if voice_mode_path.exists():
         try:
             import json
-            modes = json.loads(voice_mode_path.read_text())
+            modes = json.loads(voice_mode_path.read_text(encoding="utf-8"))
             off_count = sum(1 for v in modes.values() if v == "off")
             all_count = sum(1 for v in modes.values() if v == "all")
             check("Voice mode state", True, f"{all_count} on, {off_count} off, {len(modes)} total")
diff --git a/scripts/release.py b/scripts/release.py
index 1a5a1ea8a..5d655775e 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -44,17 +44,24 @@ AUTHOR_MAP = {
     "teknium@nousresearch.com": "teknium1",
     "127238744+teknium1@users.noreply.github.com": "teknium1",
     # contributors (from noreply pattern)
+    "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
     "snreynolds2506@gmail.com": "snreynolds",
     "35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
     "71184274+MassiveMassimo@users.noreply.github.com": "MassiveMassimo",
     "massivemassimo@users.noreply.github.com": "MassiveMassimo",
     "82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
+    "keifergu@tencent.com": "keifergu",
     "kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
+    "abner.the.foreman@agentmail.to": "Abnertheforeman",
+    "harryykyle1@gmail.com": "hharry11",
     "kshitijk4poor@gmail.com": "kshitijk4poor",
     "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
     "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
     "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
+    "255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
     "valdi.jorge@gmail.com": "jvcl",
+    "francip@gmail.com": "francip",
+    "omni@comelse.com": "omnissiah-comelse",
     "oussama.redcode@gmail.com": "mavrickdeveloper",
     "126368201+vilkasdev@users.noreply.github.com": "vilkasdev",
     "137614867+cutepawss@users.noreply.github.com": "cutepawss",
@@ -89,25 +96,33 @@ AUTHOR_MAP = {
     "135070653+sgaofen@users.noreply.github.com": "sgaofen",
     "nocoo@users.noreply.github.com": "nocoo",
     "30841158+n-WN@users.noreply.github.com": "n-WN",
+    "tsuijinglei@gmail.com": "hiddenpuppy",
+    "jerome@clawwork.ai": "HiddenPuppy",
     "leoyuan0099@gmail.com": "keyuyuan",
     "bxzt2006@163.com": "Only-Code-A",
     "i@troy-y.org": "TroyMitchell911",
     "mygamez@163.com": "zhongyueming1121",
     "hansnow@users.noreply.github.com": "hansnow",
+    "134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
+    "ben.burtenshaw@gmail.com": "burtenshaw",
+    "roopaknijhara@gmail.com": "rnijhara",
     # contributors (manual mapping from git names)
     "ahmedsherif95@gmail.com": "asheriif",
     "liujinkun@bytedance.com": "liujinkun2025",
     "dmayhem93@gmail.com": "dmahan93",
+    "fr@tecompanytea.com": "ifrederico",
     "cdanis@gmail.com": "cdanis",
     "samherring99@gmail.com": "samherring99",
     "desaiaum08@gmail.com": "Aum08Desai",
     "shannon.sands.1979@gmail.com": "shannonsands",
     "shannon@nousresearch.com": "shannonsands",
+    "abdi.moya@gmail.com": "AxDSan",
     "eri@plasticlabs.ai": "Erosika",
     "hjcpuro@gmail.com": "hjc-puro",
     "xaydinoktay@gmail.com": "aydnOktay",
     "abdullahfarukozden@gmail.com": "Farukest",
     "lovre.pesut@gmail.com": "rovle",
+    "xjtumj@gmail.com": "mengjian-github",
     "kevinskysunny@gmail.com": "kevinskysunny",
     "xiewenxuan462@gmail.com": "yule975",
     "yiweimeng.dlut@hotmail.com": "meng93",
@@ -122,9 +137,11 @@ AUTHOR_MAP = {
     "brooklyn.bb.nicholson@gmail.com": "brooklynnicholson",
     "withapurpose37@gmail.com": "StefanIsMe",
     "4317663+helix4u@users.noreply.github.com": "helix4u",
+    "ifkellx@users.noreply.github.com": "Ifkellx",
     "331214+counterposition@users.noreply.github.com": "counterposition",
     "blspear@gmail.com": "BrennerSpear",
     "akhater@gmail.com": "akhater",
+    "Cos_Admin@PTG-COS.lodluvup4uaudnm3ycd14giyug.xx.internal.cloudapp.net": "akhater",
     "239876380+handsdiff@users.noreply.github.com": "handsdiff",
     "hesapacicam112@gmail.com": "etherman-os",
     "mark.ramsell@rivermounts.com": "mark-ramsell",
@@ -166,6 +183,7 @@ AUTHOR_MAP = {
     "adavyasharma@gmail.com": "adavyas",
     "acaayush1111@gmail.com": "aayushchaudhary",
     "jason@outland.art": "jasonoutland",
+    "73175452+Magaav@users.noreply.github.com": "Magaav",
     "mrflu1918@proton.me": "SPANISHFLU",
     "morganemoss@gmai.com": "mormio",
     "kopjop926@gmail.com": "cesareth",
@@ -270,6 +288,7 @@ AUTHOR_MAP = {
     "srhtsrht17@gmail.com": "Sertug17",
     "stephenschoettler@gmail.com": "stephenschoettler",
     "tanishq231003@gmail.com": "yyovil",
+    "taosiyuan163@153.com": "taosiyuan163",
     "tesseracttars@gmail.com": "tesseracttars-creator",
     "tianliangjay@gmail.com": "xingkongliang",
     "tranquil_flow@protonmail.com": "Tranquil-Flow",
@@ -307,6 +326,7 @@ AUTHOR_MAP = {
     "anthhub@163.com": "anthhub",
     "shenuu@gmail.com": "shenuu",
     "xiayh17@gmail.com": "xiayh0107",
+    "zhujianxyz@gmail.com": "opriz",
     "asurla@nvidia.com": "anniesurla",
     "limkuan24@gmail.com": "WideLee",
     "aviralarora002@gmail.com": "AviArora02-commits",
@@ -322,6 +342,35 @@ AUTHOR_MAP = {
     "aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80",
     "zheng.jerilyn@gmail.com": "jerilynzheng",
     "asslaenn5@gmail.com": "Aslaaen",
+    "shalompmc0505@naver.com": "pinion05",
+    "105142614+VTRiot@users.noreply.github.com": "VTRiot",
+    "vivien000812@gmail.com": "iamagenius00",
+    "89228157+Feranmi10@users.noreply.github.com": "Feranmi10",
+    "simon@gtcl.us": "simon-gtcl",
+    "suzukaze.haduki@gmail.com": "houko",
+    "cliff@cigii.com": "cgarwood82",
+    "anna@oa.ke": "anna-oake",
+    "jaffarkeikei@gmail.com": "jaffarkeikei",
+    "hxp@hxp.plus": "hxp-plus",
+    "3580442280@qq.com": "Tianworld",
+    "wujianxu91@gmail.com": "wujhsu",
+    "zhrh120@gmail.com": "niyoh120",
+    "vrinek@hey.com": "vrinek",
+    "268198004+xandersbell@users.noreply.github.com": "xandersbell",
+    "somme4096@gmail.com": "Somme4096",
+    "brian@tiuxo.com": "brianclemens",
+    "25944632+yudaiyan@users.noreply.github.com": "yudaiyan",
+    "chayton@sina.com": "ycbai",
+    "longsizhuo@gmail.com": "longsizhuo",
+    "chenb19870707@gmail.com": "ms-alan",
+    "276886827+WuTianyi123@users.noreply.github.com": "WuTianyi123",
+    "22549957+li0near@users.noreply.github.com": "li0near",
+    "23434080+sicnuyudidi@users.noreply.github.com": "sicnuyudidi",
+    "haimu0x0@proton.me": "haimu0x",
+    "abdelmajidnidnasser1@gmail.com": "NIDNASSER-Abdelmajid",
+    "projectadmin@wit.id": "projectadmin-dev",
+    "mrigankamondal10@gmail.com": "Dev-Mriganka",
+    "132275809+shushuzn@users.noreply.github.com": "shushuzn",
 }
 
 
diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js
index 401651c8a..d1aeb7372 100644
--- a/scripts/whatsapp-bridge/bridge.js
+++ b/scripts/whatsapp-bridge/bridge.js
@@ -372,6 +372,37 @@ async function startSocket() {
 const app = express();
 app.use(express.json());
 
+// Host-header validation — defends against DNS rebinding.
+// The bridge binds loopback-only (127.0.0.1) but a victim browser on
+// the same machine could be tricked into fetching from an attacker
+// hostname that TTL-flips to 127.0.0.1. Reject any request whose Host
+// header doesn't resolve to a loopback alias.
+// See GHSA-ppp5-vxwm-4cf7.
+const _ACCEPTED_HOST_VALUES = new Set([
+  'localhost',
+  '127.0.0.1',
+  '[::1]',
+  '::1',
+]);
+
+app.use((req, res, next) => {
+  const raw = (req.headers.host || '').trim();
+  if (!raw) {
+    return res.status(400).json({ error: 'Missing Host header' });
+  }
+  // Strip port suffix: "localhost:3000" → "localhost"
+  const hostOnly = (raw.includes(':')
+    ? raw.substring(0, raw.lastIndexOf(':'))
+    : raw
+  ).replace(/^\[|\]$/g, '').toLowerCase();
+  if (!_ACCEPTED_HOST_VALUES.has(hostOnly)) {
+    return res.status(400).json({
+      error: 'Invalid Host header. Bridge accepts loopback hosts only.',
+    });
+  }
+  next();
+});
+
 // Poll for new messages (long-poll style)
 app.get('/messages', (req, res) => {
   const msgs = messageQueue.splice(0, messageQueue.length);
diff --git a/scripts/whatsapp-bridge/package-lock.json b/scripts/whatsapp-bridge/package-lock.json
index 570d8a735..2698a2872 100644
--- a/scripts/whatsapp-bridge/package-lock.json
+++ b/scripts/whatsapp-bridge/package-lock.json
@@ -8,7 +8,7 @@
       "name": "hermes-whatsapp-bridge",
       "version": "1.0.0",
       "dependencies": {
-        "@whiskeysockets/baileys": "WhiskeySockets/Baileys#fix/abprops-abt-fetch",
+        "@whiskeysockets/baileys": "WhiskeySockets/Baileys#01047debd81beb20da7b7779b08edcb06aa03770",
         "express": "^4.21.0",
         "pino": "^9.0.0",
         "qrcode-terminal": "^0.12.0"
diff --git a/skills/creative/baoyu-comic/PORT_NOTES.md b/skills/creative/baoyu-comic/PORT_NOTES.md
new file mode 100644
index 000000000..637b7befb
--- /dev/null
+++ b/skills/creative/baoyu-comic/PORT_NOTES.md
@@ -0,0 +1,77 @@
+# Port Notes — baoyu-comic
+
+Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56.1.
+
+## Changes from upstream
+
+### SKILL.md adaptations
+
+| Change | Upstream | Hermes |
+|--------|----------|--------|
+| Metadata namespace | `openclaw` | `hermes` (with `tags` + `homepage`) |
+| Trigger | Slash commands / CLI flags | Natural language skill matching |
+| User config | EXTEND.md file (project/user/XDG paths) | Removed — not part of Hermes infra |
+| User prompts | `AskUserQuestion` (batched) | `clarify` tool (one question at a time) |
+| Image generation | baoyu-imagine (Bun/TypeScript, supports `--ref`) | `image_generate` — **prompt-only**, returns a URL; no reference image input; agent must download the URL to the output directory |
+| PDF assembly | `scripts/merge-to-pdf.ts` (Bun + `pdf-lib`) | Removed — the PDF merge step is out of scope for this port; pages are delivered as PNGs only |
+| Platform support | Linux/macOS/Windows/WSL/PowerShell | Linux/macOS only |
+| File operations | Generic instructions | Hermes file tools (`write_file`, `read_file`) |
+
+### Structural removals
+
+- **`references/config/` directory** (removed entirely):
+  - `first-time-setup.md` — blocking first-time setup flow for EXTEND.md
+  - `preferences-schema.md` — EXTEND.md YAML schema
+  - `watermark-guide.md` — watermark config (tied to EXTEND.md)
+- **`scripts/` directory** (removed entirely): upstream's `merge-to-pdf.ts` depended on `pdf-lib`, which is not declared anywhere in the Hermes repo. Rather than add a new dependency, the port drops PDF assembly and delivers per-page PNGs.
+- **Workflow Step 8 (Merge to PDF)** removed from `workflow.md`; Step 9 (Completion report) renumbered to Step 8.
+- **Workflow Step 1.1** — "Load Preferences (EXTEND.md)" section removed from `workflow.md`; steps 1.2/1.3 renumbered to 1.1/1.2.
+- **Generic "User Input Tools" and "Image Generation Tools" preambles** — SKILL.md no longer lists fallback rules for multiple possible tools; it references `clarify` and `image_generate` directly.
+
+### Image generation strategy changes
+
+`image_generate`'s schema accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`). Upstream's reference-image flow (`--ref characters.png` for character consistency, plus user-supplied refs for style/palette/scene) does not map to this tool, so the workflow was restructured:
+
+- **Character sheet PNG** is still generated for multi-page comics, but it is repositioned as a **human-facing review artifact** (for visual verification) and a reference for later regenerations / manual prompt edits. Page prompts themselves are built from the **text descriptions** in `characters/characters.md` (embedded inline during Step 5). `image_generate` never sees the PNG as a visual input.
+- **User-supplied reference images** are reduced to `style` / `palette` / `scene` trait extraction — traits are embedded in the prompt body; the image files themselves are kept only for provenance under `refs/`.
+- **Page prompts** now mandate that character descriptions are embedded inline (copied from `characters/characters.md`) — this is the only mechanism left to enforce cross-page character consistency.
+- **Download step** — after every `image_generate` call, the returned URL is fetched to disk (e.g., `curl -fsSL "<url>" -o <target>.png`) and verified before the workflow advances.
+
+### SKILL.md reductions
+
+- CLI option columns (`--art`, `--tone`, `--layout`, `--aspect`, `--lang`, `--ref`, `--storyboard-only`, `--prompts-only`, `--images-only`, `--regenerate`) converted to plain-English option descriptions.
+- Preset files (`presets/*.md`) and `ohmsha-guide.md`: `` `--style X` `` / `` `--art X --tone Y` `` shorthand rewritten to `art=X, tone=Y` + natural-language references.
+- `partial-workflows.md`: per-skill slash command invocations rewritten as user-intent cues; PDF-related outputs removed.
+- `auto-selection.md`: priority order dropped the EXTEND.md tier.
+- `analysis-framework.md`: language-priority comment updated (user option → conversation → source).
+
+### File naming convention
+
+Source content pasted by the user is saved as `source-{slug}.md`, where `{slug}` is the kebab-case topic slug used for the output directory. Backups follow the same pattern with a `-backup-YYYYMMDD-HHMMSS` suffix. SKILL.md and `workflow.md` now agree on this single convention.
+
+### What was preserved verbatim
+
+- All 6 art-style definitions (`references/art-styles/`)
+- All 7 tone definitions (`references/tones/`)
+- All 7 layout definitions (`references/layouts/`)
+- Core templates: `character-template.md`, `storyboard-template.md`, `base-prompt.md`
+- Preset bodies (only the first few intro lines adapted; special rules unchanged)
+- Author, version, homepage attribution
+
+## Syncing with upstream
+
+To pull upstream updates:
+
+```bash
+# Compare versions
+curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-comic/SKILL.md | head -5
+# Look for the version: line
+
+# Diff a reference file
+diff <(curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-comic/references/art-styles/manga.md) \
+     references/art-styles/manga.md
+```
+
+Art-style, tone, and layout reference files can usually be overwritten directly (they're upstream-verbatim). `SKILL.md`, `references/workflow.md`, `references/partial-workflows.md`, `references/auto-selection.md`, `references/analysis-framework.md`, `references/ohmsha-guide.md`, and `references/presets/*.md` must be manually merged since they contain Hermes-specific adaptations.
+
+If upstream adds a Hermes-compatible PDF merge step (no extra npm deps), restore `scripts/` and reintroduce Step 8 in `workflow.md`.
diff --git a/skills/creative/baoyu-comic/SKILL.md b/skills/creative/baoyu-comic/SKILL.md
new file mode 100644
index 000000000..d3c89ed4c
--- /dev/null
+++ b/skills/creative/baoyu-comic/SKILL.md
@@ -0,0 +1,246 @@
+---
+name: baoyu-comic
+description: Knowledge comic creator supporting multiple art styles and tones. Creates original educational comics with detailed panel layouts and sequential image generation. Use when user asks to create "知识漫画", "教育漫画", "biography comic", "tutorial comic", or "Logicomix-style comic".
+version: 1.56.1
+author: 宝玉 (JimLiu)
+license: MIT
+metadata:
+  hermes:
+    tags: [comic, knowledge-comic, creative, image-generation]
+    homepage: https://github.com/JimLiu/baoyu-skills#baoyu-comic
+---
+
+# Knowledge Comic Creator
+
+Adapted from [baoyu-comic](https://github.com/JimLiu/baoyu-skills) for Hermes Agent's tool ecosystem.
+
+Create original knowledge comics with flexible art style × tone combinations.
+
+## When to Use
+
+Trigger this skill when the user asks to create a knowledge/educational comic, biography comic, tutorial comic, or uses terms like "知识漫画", "教育漫画", or "Logicomix-style". The user provides content (text, file path, URL, or topic) and optionally specifies art style, tone, layout, aspect ratio, or language.
+
+## Reference Images
+
+Hermes' `image_generate` tool is **prompt-only** — it accepts a text prompt and an aspect ratio, and returns an image URL. It does **NOT** accept reference images. When the user supplies a reference image, use it to **extract traits in text** that get embedded in every page prompt:
+
+**Intake**: Accept file paths when the user provides them (or pastes images in conversation).
+- File path(s) → copy to `refs/NN-ref-{slug}.{ext}` alongside the comic output for provenance
+- Pasted image with no path → ask the user for the path via `clarify`, or extract style traits verbally as a text fallback
+- No reference → skip this section
+
+**Usage modes** (per reference):
+
+| Usage | Effect |
+|-------|--------|
+| `style` | Extract style traits (line treatment, texture, mood) and append to every page's prompt body |
+| `palette` | Extract hex colors and append to every page's prompt body |
+| `scene` | Extract scene composition or subject notes and append to the relevant page(s) |
+
+**Record in each page's prompt frontmatter** when refs exist:
+
+```yaml
+references:
+  - ref_id: 01
+    filename: 01-ref-scene.png
+    usage: style
+    traits: "muted earth tones, soft-edged ink wash, low-contrast backgrounds"
+```
+
+Character consistency is driven by **text descriptions** in `characters/characters.md` (written in Step 3) that get embedded inline in every page prompt (Step 5). The optional PNG character sheet generated in Step 7.1 is a human-facing review artifact, not an input to `image_generate`.
+
+## Options
+
+### Visual Dimensions
+
+| Option | Values | Description |
+|--------|--------|-------------|
+| Art | ligne-claire (default), manga, realistic, ink-brush, chalk, minimalist | Art style / rendering technique |
+| Tone | neutral (default), warm, dramatic, romantic, energetic, vintage, action | Mood / atmosphere |
+| Layout | standard (default), cinematic, dense, splash, mixed, webtoon, four-panel | Panel arrangement |
+| Aspect | 3:4 (default, portrait), 4:3 (landscape), 16:9 (widescreen) | Page aspect ratio |
+| Language | auto (default), zh, en, ja, etc. | Output language |
+| Refs | File paths | Reference images used for style / palette trait extraction (not passed to the image model). See [Reference Images](#reference-images) above. |
+
+### Partial Workflow Options
+
+| Option | Description |
+|--------|-------------|
+| Storyboard only | Generate storyboard only, skip prompts and images |
+| Prompts only | Generate storyboard + prompts, skip images |
+| Images only | Generate images from existing prompts directory |
+| Regenerate N | Regenerate specific page(s) only (e.g., `3` or `2,5,8`) |
+
+Details: [references/partial-workflows.md](references/partial-workflows.md)
+
+### Art, Tone & Preset Catalogue
+
+- **Art styles** (6): `ligne-claire`, `manga`, `realistic`, `ink-brush`, `chalk`, `minimalist`. Full definitions at `references/art-styles/<style>.md`.
+- **Tones** (7): `neutral`, `warm`, `dramatic`, `romantic`, `energetic`, `vintage`, `action`. Full definitions at `references/tones/<tone>.md`.
+- **Presets** (5) with special rules beyond plain art+tone:
+
+  | Preset | Equivalent | Hook |
+  |--------|-----------|------|
+  | `ohmsha` | manga + neutral | Visual metaphors, no talking heads, gadget reveals |
+  | `wuxia` | ink-brush + action | Qi effects, combat visuals, atmospheric |
+  | `shoujo` | manga + romantic | Decorative elements, eye details, romantic beats |
+  | `concept-story` | manga + warm | Visual symbol system, growth arc, dialogue+action balance |
+  | `four-panel` | minimalist + neutral + four-panel layout | 起承转合 structure, B&W + spot color, stick-figure characters |
+
+  Full rules at `references/presets/<preset>.md` — load the file when a preset is picked.
+
+- **Compatibility matrix** and **content-signal → preset** table live in [references/auto-selection.md](references/auto-selection.md). Read it before recommending combinations in Step 2.
+
+## File Structure
+
+Output directory: `comic/{topic-slug}/`
+- Slug: 2-4 words kebab-case from topic (e.g., `alan-turing-bio`)
+- Conflict: append timestamp (e.g., `turing-story-20260118-143052`)
+
+**Contents**:
+| File | Description |
+|------|-------------|
+| `source-{slug}.md` | Saved source content (kebab-case slug matches the output directory) |
+| `analysis.md` | Content analysis |
+| `storyboard.md` | Storyboard with panel breakdown |
+| `characters/characters.md` | Character definitions |
+| `characters/characters.png` | Character reference sheet (downloaded from `image_generate`) |
+| `prompts/NN-{cover\|page}-[slug].md` | Generation prompts |
+| `NN-{cover\|page}-[slug].png` | Generated images (downloaded from `image_generate`) |
+| `refs/NN-ref-{slug}.{ext}` | User-supplied reference images (optional, for provenance) |
+
+## Language Handling
+
+**Detection Priority**:
+1. User-specified language (explicit option)
+2. User's conversation language
+3. Source content language
+
+**Rule**: Use user's input language for ALL interactions:
+- Storyboard outlines and scene descriptions
+- Image generation prompts
+- User selection options and confirmations
+- Progress updates, questions, errors, summaries
+
+Technical terms remain in English.
+
+## Workflow
+
+### Progress Checklist
+
+```
+Comic Progress:
+- [ ] Step 1: Setup & Analyze
+  - [ ] 1.1 Analyze content
+  - [ ] 1.2 Check existing directory
+- [ ] Step 2: Confirmation - Style & options ⚠️ REQUIRED
+- [ ] Step 3: Generate storyboard + characters
+- [ ] Step 4: Review outline (conditional)
+- [ ] Step 5: Generate prompts
+- [ ] Step 6: Review prompts (conditional)
+- [ ] Step 7: Generate images
+  - [ ] 7.1 Generate character sheet (if needed) → characters/characters.png
+  - [ ] 7.2 Generate pages (with character descriptions embedded in prompt)
+- [ ] Step 8: Completion report
+```
+
+### Flow
+
+```
+Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Storyboard → [Review?] → Prompts → [Review?] → Images → Complete
+```
+
+### Step Summary
+
+| Step | Action | Key Output |
+|------|--------|------------|
+| 1.1 | Analyze content | `analysis.md`, `source-{slug}.md` |
+| 1.2 | Check existing directory | Handle conflicts |
+| 2 | Confirm style, focus, audience, reviews | User preferences |
+| 3 | Generate storyboard + characters | `storyboard.md`, `characters/` |
+| 4 | Review outline (if requested) | User approval |
+| 5 | Generate prompts | `prompts/*.md` |
+| 6 | Review prompts (if requested) | User approval |
+| 7.1 | Generate character sheet (if needed) | `characters/characters.png` |
+| 7.2 | Generate pages | `*.png` files |
+| 8 | Completion report | Summary |
+
+### User Questions
+
+Use the `clarify` tool to confirm options. Since `clarify` handles one question at a time, ask the most important question first and proceed sequentially. See [references/workflow.md](references/workflow.md) for the full Step 2 question set.
+
+**Timeout handling (CRITICAL)**: `clarify` can return `"The user did not provide a response within the time limit. Use your best judgement to make the choice and proceed."` — this is NOT user consent to default everything.
+
+- Treat it as a default **for that one question only**. Continue asking the remaining Step 2 questions in sequence; each question is an independent consent point.
+- **Surface the default to the user visibly** in your next message so they have a chance to correct it: e.g. `"Style: defaulted to ohmsha preset (clarify timed out). Say the word to switch."` — an unreported default is indistinguishable from never having asked.
+- Do NOT collapse Step 2 into a single "use all defaults" pass after one timeout. If the user is genuinely absent, they will be equally absent for all five questions — but they can correct visible defaults when they return, and cannot correct invisible ones.
+
+### Step 7: Image Generation
+
+Use Hermes' built-in `image_generate` tool for all image rendering. Its schema accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`); it **returns a URL**, not a local file. Every generated page or character sheet must therefore be downloaded to the output directory.
+
+**Prompt file requirement (hard)**: write each image's full, final prompt to a standalone file under `prompts/` (naming: `NN-{type}-[slug].md`) BEFORE calling `image_generate`. The prompt file is the reproducibility record.
+
+**Aspect ratio mapping** — the storyboard's `aspect_ratio` field maps to `image_generate`'s format as follows:
+
+| Storyboard ratio | `image_generate` format |
+|------------------|-------------------------|
+| `3:4`, `9:16`, `2:3` | `portrait` |
+| `4:3`, `16:9`, `3:2` | `landscape` |
+| `1:1` | `square` |
+
+**Download step** — after every `image_generate` call:
+1. Read the URL from the tool result
+2. Fetch the image bytes using an **absolute** output path, e.g.
+   `curl -fsSL "<url>" -o /abs/path/to/comic/<slug>/NN-page-<slug>.png`
+3. Verify the file exists and is non-empty at that exact path before proceeding to the next page
+
+**Never rely on shell CWD persistence for `-o` paths.** The terminal tool's persistent-shell CWD can change between batches (session expiry, `TERMINAL_LIFETIME_SECONDS`, a failed `cd` that leaves you in the wrong directory). `curl -o relative/path.png` is a silent footgun: if CWD has drifted, the file lands somewhere else with no error. **Always pass a fully-qualified absolute path to `-o`**, or pass `workdir=<abs path>` to the terminal tool. Incident Apr 2026: pages 06-09 of a 10-page comic landed at the repo root instead of `comic/<slug>/` because batch 3 inherited a stale CWD from batch 2 and `curl -o 06-page-skills.png` wrote to the wrong directory. The agent then spent several turns claiming the files existed where they didn't.
+
+**7.1 Character sheet** — generate it (to `characters/characters.png`, aspect `landscape`) when the comic is multi-page with recurring characters. Skip for simple presets (e.g., four-panel minimalist) or single-page comics. The prompt file at `characters/characters.md` must exist before invoking `image_generate`. The rendered PNG is a **human-facing review artifact** (so the user can visually verify character design) and a reference for later regenerations or manual prompt edits — it does **not** drive Step 7.2. Page prompts are already written in Step 5 from the **text descriptions** in `characters/characters.md`; `image_generate` cannot accept images as visual input.
+
+**7.2 Pages** — each page's prompt MUST already be at `prompts/NN-{cover|page}-[slug].md` before invoking `image_generate`. Because `image_generate` is prompt-only, character consistency is enforced by **embedding character descriptions (sourced from `characters/characters.md`) inline in every page prompt during Step 5**. The embedding is done uniformly whether or not a PNG sheet is produced in 7.1; the PNG is only a review/regeneration aid.
+
+**Backup rule**: existing `prompts/…md` and `…png` files → rename with `-backup-YYYYMMDD-HHMMSS` suffix before regenerating.
+
+Full step-by-step workflow (analysis, storyboard, review gates, regeneration variants): [references/workflow.md](references/workflow.md).
+
+## References
+
+**Core Templates**:
+- [analysis-framework.md](references/analysis-framework.md) - Deep content analysis
+- [character-template.md](references/character-template.md) - Character definition format
+- [storyboard-template.md](references/storyboard-template.md) - Storyboard structure
+- [ohmsha-guide.md](references/ohmsha-guide.md) - Ohmsha manga specifics
+
+**Style Definitions**:
+- `references/art-styles/` - Art styles (ligne-claire, manga, realistic, ink-brush, chalk, minimalist)
+- `references/tones/` - Tones (neutral, warm, dramatic, romantic, energetic, vintage, action)
+- `references/presets/` - Presets with special rules (ohmsha, wuxia, shoujo, concept-story, four-panel)
+- `references/layouts/` - Layouts (standard, cinematic, dense, splash, mixed, webtoon, four-panel)
+
+**Workflow**:
+- [workflow.md](references/workflow.md) - Full workflow details
+- [auto-selection.md](references/auto-selection.md) - Content signal analysis
+- [partial-workflows.md](references/partial-workflows.md) - Partial workflow options
+
+## Page Modification
+
+| Action | Steps |
+|--------|-------|
+| **Edit** | **Update prompt file FIRST** → regenerate image → download new PNG |
+| **Add** | Create prompt at position → generate with character descriptions embedded → renumber subsequent → update storyboard |
+| **Delete** | Remove files → renumber subsequent → update storyboard |
+
+**IMPORTANT**: When updating pages, ALWAYS update the prompt file (`prompts/NN-{cover|page}-[slug].md`) FIRST before regenerating. This ensures changes are documented and reproducible.
+
+## Pitfalls
+
+- Image generation: 10-30 seconds per page; auto-retry once on failure
+- **Always download** the URL returned by `image_generate` to a local PNG — downstream tooling (and the user's review) expects files in the output directory, not ephemeral URLs
+- **Use absolute paths for `curl -o`** — never rely on persistent-shell CWD across batches. Silent footgun: files land in the wrong directory and subsequent `ls` on the intended path shows nothing. See Step 7 "Download step".
+- Use stylized alternatives for sensitive public figures
+- **Step 2 confirmation required** - do not skip
+- **Steps 4/6 conditional** - only if user requested in Step 2
+- **Step 7.1 character sheet** - recommended for multi-page comics, optional for simple presets. The PNG is a review/regeneration aid; page prompts (written in Step 5) use the text descriptions in `characters/characters.md`, not the PNG. `image_generate` does not accept images as visual input
+- **Strip secrets** — scan source content for API keys, tokens, or credentials before writing any output file
diff --git a/skills/creative/baoyu-comic/references/analysis-framework.md b/skills/creative/baoyu-comic/references/analysis-framework.md
new file mode 100644
index 000000000..da5ba9d9a
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/analysis-framework.md
@@ -0,0 +1,176 @@
+# Comic Content Analysis Framework
+
+Deep analysis framework for transforming source content into effective visual storytelling.
+
+## Purpose
+
+Before creating a comic, thoroughly analyze the source material to:
+- Identify the target audience and their needs
+- Determine what value the comic will deliver
+- Extract narrative potential for visual storytelling
+- Plan character arcs and key moments
+
+## Analysis Dimensions
+
+### 1. Core Content (Understanding "What")
+
+**Central Message**
+- What is the single most important idea readers should take away?
+- Can you express it in one sentence?
+
+**Key Concepts**
+- What are the essential concepts readers must understand?
+- How should these concepts be visualized?
+- Which concepts need simplified explanations?
+
+**Content Structure**
+- How is the source material organized?
+- What is the natural narrative arc?
+- Where are the climax and turning points?
+
+**Evidence & Examples**
+- What concrete examples, data, or stories support the main ideas?
+- Which examples translate well to visual panels?
+- What can be shown rather than told?
+
+### 2. Context & Background (Understanding "Why")
+
+**Source Origin**
+- Who created this content? What is their perspective?
+- What was the original purpose?
+- Is there bias to be aware of?
+
+**Historical/Cultural Context**
+- When and where does the story take place?
+- What background knowledge do readers need?
+- What period-specific visual elements are required?
+
+**Underlying Assumptions**
+- What does the source assume readers already know?
+- What implicit beliefs or values are present?
+- Should the comic challenge or reinforce these?
+
+### 3. Audience Analysis
+
+**Primary Audience**
+- Who will read this comic?
+- What is their existing knowledge level?
+- What are their interests and motivations?
+
+**Secondary Audiences**
+- Who else might benefit from this comic?
+- How might their needs differ?
+
+**Reader Questions**
+- What questions will readers have?
+- What misconceptions might they bring?
+- What "aha moments" can we create?
+
+### 4. Value Proposition
+
+**Knowledge Value**
+- What will readers learn?
+- What new perspectives will they gain?
+- How will this change their understanding?
+
+**Emotional Value**
+- What emotions should readers feel?
+- What connections will they make with characters?
+- What will make this memorable?
+
+**Practical Value**
+- Can readers apply what they learn?
+- What actions might this inspire?
+- What conversations might it spark?
+
+### 5. Narrative Potential
+
+**Story Arc Candidates**
+- What natural narratives exist in the content?
+- Where is the conflict or tension?
+- What transformations occur?
+
+**Character Potential**
+- Who are the key figures?
+- What are their motivations and obstacles?
+- How do they change throughout?
+
+**Visual Opportunities**
+- What scenes have strong visual potential?
+- Where can abstract concepts become concrete images?
+- What metaphors can be visualized?
+
+**Dramatic Moments**
+- What are the breakthrough/revelation moments?
+- Where are the emotional peaks?
+- What creates tension and release?
+
+### 6. Adaptation Considerations
+
+**What to Keep**
+- Essential facts and ideas
+- Key quotes or moments
+- Core emotional beats
+
+**What to Simplify**
+- Complex explanations
+- Dense technical details
+- Lengthy descriptions
+
+**What to Expand**
+- Brief mentions that deserve more attention
+- Implied emotions or relationships
+- Visual details not in source
+
+**What to Omit**
+- Tangential information
+- Redundant examples
+- Content that doesn't serve the narrative
+
+## Output Format
+
+Analysis results should be saved to `analysis.md` with:
+
+1. **YAML Front Matter**: Metadata (title, topic, time_span, source_language, user_language, aspect_ratio, recommended_page_count, recommended_art, recommended_tone, recommended_layout)
+2. **Target Audience**: Primary, secondary, tertiary audiences with their needs
+3. **Value Proposition**: What readers will gain (knowledge, emotional, practical)
+4. **Core Themes**: Table with theme, narrative potential, visual opportunity
+5. **Key Figures & Story Arcs**: Character profiles with arcs, visual identity, key moments
+6. **Content Signals**: Style and layout recommendations based on content type
+7. **Recommended Approaches**: Narrative approaches ranked by suitability
+
+### YAML Front Matter Example
+
+```yaml
+---
+title: "Alan Turing: The Father of Computing"
+topic: alan-turing-biography
+time_span: 1912-1954
+source_language: en
+user_language: zh  # User-specified or detected from conversation
+aspect_ratio: "3:4"
+recommended_page_count: 16
+recommended_art: ligne-claire  # ligne-claire|manga|realistic|ink-brush|chalk
+recommended_tone: neutral      # neutral|warm|dramatic|romantic|energetic|vintage|action
+recommended_layout: mixed      # standard|cinematic|dense|splash|mixed|webtoon
+---
+```
+
+### Language Fields
+
+| Field | Description |
+|-------|-------------|
+| `source_language` | Detected language of source content |
+| `user_language` | Output language for comic (user-specified option > conversation language > source_language) |
+
+## Analysis Checklist
+
+Before proceeding to storyboard:
+
+- [ ] Can I state the core message in one sentence?
+- [ ] Do I know exactly who will read this comic?
+- [ ] Have I identified at least 3 ways this comic provides value?
+- [ ] Are there clear protagonists with compelling arcs?
+- [ ] Have I found at least 5 visually powerful moments?
+- [ ] Do I understand what to keep, simplify, expand, and omit?
+- [ ] Have I identified the emotional peaks and valleys?
diff --git a/skills/creative/baoyu-comic/references/art-styles/chalk.md b/skills/creative/baoyu-comic/references/art-styles/chalk.md
new file mode 100644
index 000000000..3974214cc
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/chalk.md
@@ -0,0 +1,101 @@
+# chalk
+
+粉笔画风 - Chalkboard aesthetic with hand-drawn warmth
+
+## Overview
+
+Classic classroom chalkboard aesthetic with hand-drawn chalk illustrations. Nostalgic educational feel with imperfect, sketchy lines that capture the warmth of traditional teaching.
+
+## Line Work
+
+- Sketchy, imperfect hand-drawn lines
+- Chalk texture on all strokes
+- Varying line weight from chalk pressure
+- Soft edges, no sharp digital lines
+- Visible chalk dust effects
+
+## Character Design
+
+- Simplified, friendly character designs
+- Stick figures to semi-detailed range
+- Expressive through simple gestures
+- Approachable, non-intimidating
+- Educational presenter style
+
+## Background
+
+- Chalkboard Black (#1A1A1A) or Dark Green-Black (#1C2B1C)
+- Realistic chalkboard texture
+- Subtle scratches and dust particles
+- Faint eraser marks for authenticity
+- Wooden frame border optional
+
+## Typography
+
+- Hand-drawn chalk lettering style
+- Visible chalk texture on text
+- Imperfect baseline adds authenticity
+- White or bright colored chalk for emphasis
+
+## Visual Elements
+
+- Hand-drawn chalk illustrations
+- Chalk dust effects around elements
+- Doodles: stars, arrows, underlines, circles
+- Mathematical formulas and diagrams
+- Eraser smudges and chalk residue
+- Stick figures and simple icons
+- Connection lines with hand-drawn feel
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Background | Chalkboard Black | #1A1A1A |
+| Alt Background | Green-Black | #1C2B1C |
+| Primary Text | Chalk White | #F5F5F5 |
+| Accent 1 | Chalk Yellow | #FFE566 |
+| Accent 2 | Chalk Pink | #FF9999 |
+| Accent 3 | Chalk Blue | #66B3FF |
+| Accent 4 | Chalk Green | #90EE90 |
+| Accent 5 | Chalk Orange | #FFB366 |
+
+## Style Rules
+
+### Do
+- Maintain authentic chalk texture on all elements
+- Use imperfect, hand-drawn quality throughout
+- Add subtle chalk dust and smudge effects
+- Create visual hierarchy with color variety
+- Include playful doodles and annotations
+
+### Don't
+- Use perfect geometric shapes
+- Create clean digital-looking lines
+- Add photorealistic elements
+- Use gradients or glossy effects
+
+## Quality Markers
+
+- ✓ Authentic chalk texture throughout
+- ✓ Imperfect, hand-drawn quality
+- ✓ Readable despite sketchy style
+- ✓ Nostalgic classroom feel
+- ✓ Effective color hierarchy
+- ✓ Playful educational aesthetic
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Classic educational |
+| warm | ✓✓ | Nostalgic feel |
+| dramatic | ✗ | Style mismatch |
+| vintage | ✓ | Old school feel |
+| romantic | ✗ | Style mismatch |
+| energetic | ✓✓ | Fun learning |
+| action | ✗ | Style mismatch |
+
+## Best For
+
+Educational content, tutorials, classroom themes, teaching materials, workshops, informal learning, knowledge sharing
diff --git a/skills/creative/baoyu-comic/references/art-styles/ink-brush.md b/skills/creative/baoyu-comic/references/art-styles/ink-brush.md
new file mode 100644
index 000000000..6c744d142
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/ink-brush.md
@@ -0,0 +1,97 @@
+# ink-brush
+
+水墨画风 - Chinese ink brush aesthetics with dynamic strokes
+
+## Overview
+
+Traditional Chinese ink brush painting style adapted for comics. Combines calligraphic brush strokes with ink wash effects. Creates atmospheric, artistic visuals rooted in East Asian aesthetics.
+
+## Line Work
+
+- 2-3px dynamic brush strokes with varying weight
+- Ink wash effects, traditional Chinese brush feel
+- Bold, confident strokes with sharp edges
+- Flowing lines for fabric and hair
+- Pressure-sensitive stroke variation
+
+## Character Design
+
+- Realistic human proportions (7.5-8 head heights)
+- Defined features with ink brush definition
+- Dynamic poses capturing movement
+- Flowing hair and clothing in motion
+- Traditional attire options (robes, hanfu)
+- Intense, expressive faces
+
+## Brush Techniques
+
+| Technique | Usage |
+|-----------|-------|
+| Bold strokes | Character outlines |
+| Fine lines | Details, hair |
+| Ink wash | Atmosphere, shadows |
+| Dry brush | Texture, aging |
+| Splatter | Impact, drama |
+
+## Background Treatment
+
+- Dramatic landscapes: mountains, waterfalls, temples
+- Ink wash atmospheric effects
+- Misty, layered depth
+- Traditional architecture elements
+- High contrast silhouettes
+- Negative space as design element
+
+## Color Approach
+
+- Ink gradients as primary
+- Limited accent colors
+- Traditional Chinese palette
+- Atmospheric color washes
+- High contrast compositions
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Deep black ink | #1A1A1A |
+| Accent | Crimson red | #8B0000 |
+| Accent | Imperial gold | #D4AF37 |
+| Skin | Natural tan | #D4A574 |
+| Background | Misty gray | #9CA3AF |
+| Background | Earth tone | #8B7355 |
+| Wash | Ink gradient | #2D3748 |
+
+## Visual Elements
+
+- Calligraphic text integration
+- Seal stamps (optional)
+- Ink splatter effects
+- Flowing fabric trails
+- Atmospheric mist
+- Mountain silhouettes
+
+## Quality Markers
+
+- ✓ Dynamic brush stroke quality
+- ✓ Authentic ink wash atmosphere
+- ✓ High contrast compositions
+- ✓ Flowing movement in fabric/hair
+- ✓ Traditional aesthetic elements
+- ✓ Atmospheric depth
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓ | Contemplative stories |
+| warm | ✓ | Nostalgic, gentle |
+| dramatic | ✓✓ | High contrast |
+| vintage | ✓✓ | Historical pieces |
+| romantic | ✗ | Style mismatch |
+| energetic | ✗ | Too refined |
+| action | ✓✓ | Martial arts |
+
+## Best For
+
+Chinese historical stories, martial arts, traditional tales, contemplative narratives, artistic adaptations
diff --git a/skills/creative/baoyu-comic/references/art-styles/ligne-claire.md b/skills/creative/baoyu-comic/references/art-styles/ligne-claire.md
new file mode 100644
index 000000000..0ce58b2c0
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/ligne-claire.md
@@ -0,0 +1,75 @@
+# ligne-claire
+
+清线画风 - Uniform lines, flat colors, European comic tradition
+
+## Overview
+
+Classic European comic style originating from Hergé's Tintin. Characterized by clean, uniform outlines and flat color fills without gradients. Creates a timeless, accessible aesthetic suitable for educational and narrative content.
+
+## Line Work
+
+- Uniform, clean outlines with consistent weight (2px)
+- No hatching or cross-hatching for shading
+- Sharp, precise edges on all elements
+- Black ink outlines on all figures and objects
+- Shadows indicated through flat color areas, not line techniques
+
+## Character Design
+
+- Slightly stylized/cartoonish characters with realistic proportions
+- Distinctive, recognizable facial features
+- Expressive faces with clear emotions
+- Period-appropriate clothing with attention to detail
+- Consistent character appearance across panels
+- 6-7 head height proportions
+
+## Background Treatment
+
+- Detailed, realistic backgrounds with architectural accuracy
+- Period-specific props and technology
+- Clear spatial depth and perspective
+- Environmental storytelling through details
+- Contrast between simplified characters and detailed backgrounds
+
+## Color Approach
+
+- Flat colors without gradients (true to Ligne Claire tradition)
+- Limited palette per page for cohesion
+- Colors support narrative mood
+- Consistent lighting logic within scenes
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary Blue | Clean blue | #3182CE |
+| Primary Red | Classic red | #E53E3E |
+| Primary Yellow | Warm yellow | #ECC94B |
+| Skin | Warm tan | #F7CFAE |
+| Background Light | Light cream | #FFFAF0 |
+| Background Sky | Sky blue | #BEE3F8 |
+
+## Quality Markers
+
+- ✓ Clean, uniform line weight throughout
+- ✓ Flat colors without gradients
+- ✓ Detailed backgrounds, stylized characters
+- ✓ Clear panel borders and reading flow
+- ✓ Hand-drawn text style
+- ✓ Proper perspective in environments
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Classic combination |
+| warm | ✓✓ | Nostalgic stories |
+| dramatic | ✓ | Works with high contrast |
+| vintage | ✓ | Period pieces |
+| romantic | ✗ | Style mismatch |
+| energetic | ✓ | Lighter stories |
+| action | ✗ | Lacks dynamic lines |
+
+## Best For
+
+Educational content, balanced narratives, biography comics, historical stories
diff --git a/skills/creative/baoyu-comic/references/art-styles/manga.md b/skills/creative/baoyu-comic/references/art-styles/manga.md
new file mode 100644
index 000000000..bb2a2663b
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/manga.md
@@ -0,0 +1,93 @@
+# manga
+
+日漫画风 - Anime/manga aesthetics with expressive characters
+
+## Overview
+
+Japanese manga art style characterized by large expressive eyes, dynamic poses, and visual emotion indicators. Versatile style that works across genres from educational to romantic to action.
+
+## Line Work
+
+- Clean, smooth lines (1.5-2px)
+- Expressive weight variation for emphasis
+- Smooth curves, dynamic strokes
+- Speed lines and motion effects available
+- Screen tone effects for atmosphere
+
+## Character Design
+
+- Anime/manga proportions: larger eyes, expressive faces
+- 5-7 head height proportions (varies by sub-style)
+- Clear emotional indicators (！, ？, sweat drops, sparkles)
+- Dynamic poses and gestures
+- Detailed hair with individual strands
+- Fashionable clothing with natural folds
+
+## Eye Styles
+
+| Type | Description |
+|------|-------------|
+| Standard | Medium-large, 2-3 highlights |
+| Educational | Friendly, approachable eyes |
+| Dramatic | Intense, detailed irises |
+| Cute | Very large, sparkly eyes |
+
+## Background Treatment
+
+- Simplified during dialogue/explanation
+- Detailed for establishing shots
+- Screen tone gradients for mood
+- Abstract backgrounds for emotional moments
+- Technical diagrams styled as displays
+
+## Color Approach
+
+- Clean, bright anime colors
+- Soft gradients on skin
+- Vibrant palette options
+- Light and shadow with soft transitions
+- Color coding for character identification
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary Blue | Bright blue | #4299E1 |
+| Primary Orange | Warm orange | #ED8936 |
+| Primary Green | Soft green | #68D391 |
+| Skin | Anime warm | #FEEBC8 |
+| Background | Clean white | #FFFFFF |
+| Highlight | Golden | #FFD700 |
+
+## Visual Elements
+
+- Speech bubbles: rounded (normal), spiky (excitement)
+- Sound effects integrated visually
+- Emotion symbols (sweat drops, anger marks, hearts)
+- Speed lines and motion blur
+- Sparkle and glow effects
+
+## Quality Markers
+
+- ✓ Expressive character faces
+- ✓ Clean, consistent line work
+- ✓ Dynamic poses and compositions
+- ✓ Appropriate use of manga conventions
+- ✓ Readable panel flow
+- ✓ Consistent character designs
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Educational manga |
+| warm | ✓ | Slice of life |
+| dramatic | ✓ | Intense moments |
+| romantic | ✓✓ | Shoujo style |
+| energetic | ✓✓ | Shonen style |
+| vintage | ✗ | Style mismatch |
+| action | ✓✓ | Battle manga |
+
+## Best For
+
+Educational tutorials, romance, action, coming-of-age, technical explanations, youth-oriented content
diff --git a/skills/creative/baoyu-comic/references/art-styles/minimalist.md b/skills/creative/baoyu-comic/references/art-styles/minimalist.md
new file mode 100644
index 000000000..f075b2d40
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/minimalist.md
@@ -0,0 +1,84 @@
+# minimalist
+
+极简画风 - Clean black line art, limited spot color, simplified stick-figure characters
+
+## Overview
+
+Minimalist cartoon illustration characterized by clean black line art on white background with very limited spot color for emphasis. Characters are simplified to near-stick-figure abstraction, focusing on gesture and concept rather than anatomical detail. Designed for business allegory, quick-read educational content, and concept illustration.
+
+## Line Work
+
+- Clean, uniform black lines (1.5-2px)
+- No hatching, cross-hatching, or shading techniques
+- Minimal detail — every line serves a purpose
+- Bold outlines for characters, thinner lines for props/labels
+- No decorative flourishes or ornamental lines
+
+## Character Design
+
+- Highly simplified, stick-figure-like business characters
+- Circle or oval heads with minimal facial features (dot eyes, simple line mouth)
+- Body as simple geometric shapes or line constructions
+- Distinguishing features through props only (tie, hat, briefcase, glasses)
+- No anatomical detail — expressive through posture and gesture
+- 4-5 head height proportions (squat, iconic)
+
+## Background Treatment
+
+- Mostly blank/white — negative space is a design element
+- Minimal environmental cues (a line for ground, simple desk outline)
+- Concept labels and text annotations replace detailed environments
+- Icons and symbols over realistic rendering
+- No perspective or spatial depth
+
+## Color Approach
+
+- Primarily black and white (90%+ of the image)
+- 1-2 spot accent colors for emphasis on key concepts
+- Accent color used sparingly: highlighting key objects, text labels, concept indicators
+- No gradients, no shading, no color fills on backgrounds
+- Color draws the eye to the most important element in each panel
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Black ink | `#1A1A1A` |
+| Background | Clean white | `#FFFFFF` |
+| Accent 1 | Spot orange | `#FF6B35` |
+| Accent 2 | Spot blue (optional) | `#3182CE` |
+| Text labels | Dark gray | `#4A4A4A` |
+| Panel border | Medium gray | `#666666` |
+
+## Visual Elements
+
+- Text labels with accent-color backgrounds or underlines for key terms
+- Simple icons: arrows, circles, checkmarks, crosses
+- Concept highlight boxes with spot color
+- Minimal speech bubbles (simple oval or rectangle, thin black outline)
+- No sound effects, no motion lines, no screen tones
+
+## Quality Markers
+
+- ✓ Clean, purposeful line work with no unnecessary detail
+- ✓ 90%+ black-and-white with strategic spot color
+- ✓ Simplified characters readable at small sizes
+- ✓ Text labels integrated naturally into panels
+- ✓ Strong negative space usage
+- ✓ Every element serves the narrative point
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Ideal for business/educational content |
+| warm | ✓ | Works for gentle stories, slight warmth in accent |
+| energetic | ✓ | Works for punchy, high-energy content |
+| dramatic | ✗ | Style too stripped down for dramatic intensity |
+| vintage | ✗ | Minimalist aesthetic conflicts with aged/textured look |
+| romantic | ✗ | No capacity for decorative/soft elements |
+| action | ✗ | No dynamic line capability for speed/impact |
+
+## Best For
+
+Business allegory, management fables, short concept illustration, four-panel comic strips, quick-insight education, social media content
diff --git a/skills/creative/baoyu-comic/references/art-styles/realistic.md b/skills/creative/baoyu-comic/references/art-styles/realistic.md
new file mode 100644
index 000000000..fcc39ad7f
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/realistic.md
@@ -0,0 +1,89 @@
+# realistic
+
+写实画风 - Digital painting with realistic proportions and lighting
+
+## Overview
+
+Full-color realistic manga style using digital painting techniques. Features anatomically accurate characters, rich gradients, and detailed environmental rendering. Sophisticated aesthetic for mature audiences.
+
+## Line Work
+
+- Clean, precise outlines with clear contours
+- Uniform line weight for character definition
+- No excessive hatching - rely on color for depth
+- Smooth curves and realistic anatomical lines
+- Ligne Claire influence: clean but not simplified
+
+## Character Design
+
+- Realistic human proportions (7-8 head heights)
+- Anatomically accurate features and expressions
+- Detailed facial structure without exaggeration
+- Natural poses and body language
+- Consistent appearance across panels
+- Subtle expressions rather than manga-style
+
+## Rendering Style
+
+- Full-color digital painting with rich gradients
+- Soft shadow transitions on skin and fabric
+- Realistic material textures (glass, liquid, fabric, wood)
+- Detailed hair with natural shine and volume
+- Environmental lighting affects all elements
+- NOT flat cel-shading - smooth color blending
+
+## Background Treatment
+
+- Highly detailed, realistic environments
+- Accurate perspective and spatial depth
+- Atmospheric lighting (warm indoor, cool outdoor)
+- Professional settings rendered with precision
+- Props and objects with realistic textures
+
+## Color Approach
+
+- Rich gradients for depth and volume
+- Realistic lighting with warm/cool contrast
+- Material-specific rendering
+- Subtle color temperature shifts
+- Professional, sophisticated palette
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Skin Light | Natural warm | #F5D6C6 |
+| Skin Shadow | Warm shadow | #E8C4B0 |
+| Environment | Warm wood | #8B7355 |
+| Environment Cool | Cool stone | #9CA3AF |
+| Accent | Wine red | #722F37 |
+| Accent Gold | Gold | #D4AF37 |
+| Light Warm | Amber | #FFB347 |
+| Light Cool | Cool blue | #B0C4DE |
+
+## Quality Markers
+
+- ✓ Anatomically accurate proportions
+- ✓ Smooth color gradients (not flat fills)
+- ✓ Realistic material textures
+- ✓ Detailed, atmospheric backgrounds
+- ✓ Natural lighting with soft shadows
+- ✓ Expressive but subtle expressions
+- ✓ Professional aesthetic
+- ✓ Clean speech bubbles
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Professional content |
+| warm | ✓✓ | Nostalgic stories |
+| dramatic | ✓✓ | High drama |
+| vintage | ✓✓ | Period pieces |
+| romantic | ✗ | Style mismatch |
+| energetic | ✗ | Too refined |
+| action | ✓ | Serious action |
+
+## Best For
+
+Professional topics (wine, food, business), lifestyle content, adult narratives, documentary-style, mature educational guides
diff --git a/skills/creative/baoyu-comic/references/auto-selection.md b/skills/creative/baoyu-comic/references/auto-selection.md
new file mode 100644
index 000000000..4541b7dfc
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/auto-selection.md
@@ -0,0 +1,71 @@
+# Auto Selection
+
+Content signals determine default art + tone + layout (or preset).
+
+## Content Signal Matrix
+
+| Content Signals | Art Style | Tone | Layout | Preset |
+|-----------------|-----------|------|--------|--------|
+| Tutorial, how-to, beginner | manga | neutral | webtoon | **ohmsha** |
+| Computing, AI, programming | manga | neutral | dense | **ohmsha** |
+| Technical explanation, educational | manga | neutral | webtoon | **ohmsha** |
+| Pre-1950, classical, ancient | realistic | vintage | cinematic | - |
+| Personal story, mentor | ligne-claire | warm | standard | - |
+| Psychology, motivation, self-help, coaching | manga | warm | standard | **concept-story** |
+| Business narrative, management, leadership | manga | warm | standard | **concept-story** |
+| Conflict, breakthrough | (inherit) | dramatic | splash | - |
+| Wine, food, lifestyle | realistic | neutral | cinematic | - |
+| Martial arts, wuxia, xianxia | ink-brush | action | splash | **wuxia** |
+| Romance, love, school life | manga | romantic | standard | **shoujo** |
+| Business allegory, fable, parable, short insight, 四格 | minimalist | neutral | four-panel | **four-panel** |
+| Biography, balanced | ligne-claire | neutral | mixed | - |
+
+## Preset Recommendation Rules
+
+**When preset is recommended**: Load `presets/{preset}.md` and apply all special rules.
+
+### ohmsha
+- **Triggers**: Tutorial, technical, educational, computing, programming, how-to, beginner
+- **Special rules**: Visual metaphors, NO talking heads, gadget reveals, Doraemon-style characters
+- **Base**: manga + neutral + webtoon/dense
+
+### wuxia
+- **Triggers**: Martial arts, wuxia, xianxia, cultivation, swordplay
+- **Special rules**: Qi effects, combat visuals, atmospheric elements
+- **Base**: ink-brush + action + splash
+
+### shoujo
+- **Triggers**: Romance, love story, school life, emotional drama
+- **Special rules**: Decorative elements, eye details, romantic beats
+- **Base**: manga + romantic + standard
+
+### concept-story
+- **Triggers**: Psychology, motivation, self-help, business narrative, management, leadership, personal growth, coaching, soft skills, abstract concept through story
+- **Special rules**: Visual symbol system, growth arc, dialogue+action balance, original characters
+- **Base**: manga + warm + standard
+
+### four-panel
+- **Triggers**: Business allegory, fable, parable, short insight, four-panel, 四格, 四格漫画, single-page comic, minimalist comic strip
+- **Special rules**: Strict 起承转合 4-panel structure, B&W + spot color, simplified stick-figure characters, single-page story
+- **Base**: minimalist + neutral + four-panel
+
+## Compatibility Matrix
+
+Art Style × Tone combinations work best when matched appropriately:
+
+| Art Style | ✓✓ Best | ✓ Works | ✗ Avoid |
+|-----------|---------|---------|---------|
+| ligne-claire | neutral, warm | dramatic, vintage, energetic | romantic, action |
+| manga | neutral, romantic, energetic, action | warm, dramatic | vintage |
+| realistic | neutral, warm, dramatic, vintage | action | romantic, energetic |
+| ink-brush | neutral, dramatic, action, vintage | warm | romantic, energetic |
+| chalk | neutral, warm, energetic | vintage | dramatic, action, romantic |
+| minimalist | neutral | warm, energetic | dramatic, vintage, romantic, action |
+
+**Note**: Art Style × Tone × Layout can be freely combined. Incompatible combinations work but may produce unexpected results.
+
+## Priority Order
+
+1. User-specified options (art / tone / style)
+2. Content signal analysis → auto-selection
+3. Fallback: ligne-claire + neutral + standard
diff --git a/skills/creative/baoyu-comic/references/base-prompt.md b/skills/creative/baoyu-comic/references/base-prompt.md
new file mode 100644
index 000000000..7df4e959b
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/base-prompt.md
@@ -0,0 +1,98 @@
+Create a knowledge biography comic page following these guidelines:
+
+## Image Specifications
+
+- **Type**: Comic book page with multiple panels
+- **Orientation**: Portrait (vertical)
+- **Aspect Ratio**: 2:3
+- **Style**: See style-specific reference for visual guidelines
+
+## Panel Structure
+
+### Panel Borders
+- Clean black lines (1-2px) around each panel
+- White gutters between panels (8-12px)
+- Panels arranged for clear reading flow
+- Variety in panel sizes for visual rhythm
+
+### Panel Composition
+- Clear focal points in each panel
+- Proper use of foreground, midground, background
+- Camera angles vary: eye level, bird's eye, low angle, close-up, wide shot
+- Action flows logically between panels
+- Negative space used intentionally
+
+## Text Elements
+
+### Speech Bubbles
+- **Dialogue**: Oval/elliptical bubbles with pointed tails
+- White fill with thin black outline
+- Tail points clearly to speaker
+- Hand-lettered style font (not computer-generated)
+
+### Narrator Boxes
+- **Fourth Wall/Narrator**: Rectangular boxes
+- Often positioned at panel edges (top or bottom)
+- Slightly different fill color (cream or light yellow)
+- Used for commentary, time jumps, explanations
+
+### Thought Bubbles
+- Cloud-shaped with bubble trail leading to thinker
+- Softer outline than speech bubbles
+- For internal monologue
+
+### Caption Bars
+- Rectangular bars at panel edges
+- Time and place information
+- "Meanwhile...", "Three years later..." type transitions
+- Darker fill with white text, or vice versa
+
+### Typography
+- Hand-drawn lettering style throughout
+- Bold for emphasis and key terms
+- Consistent letter sizing
+- Chinese text: use full-width punctuation ""，。！
+- Clear hierarchy: titles > dialogue > captions
+
+## Scientific/Concept Visualization
+
+When depicting abstract concepts:
+
+| Concept | Visual Metaphor |
+|---------|----------------|
+| Neural networks | Glowing nodes connected by clean lines |
+| Data flow | Luminous particles along simple paths |
+| Algorithms | Geometric patterns, building blocks |
+| Logic/proof | Interlocking puzzle pieces |
+| Discovery | Light breaking through darkness |
+| Uncertainty | Forking paths, question marks |
+| Time | Clock motifs, calendar pages |
+
+- Integrate diagrams naturally into narrative panels
+- Use inset panels or thought-bubble style for explanations
+- Simplified iconography over realistic depiction
+
+## Fourth Wall / Narrator Character
+
+When depicting narrator characters addressing the reader:
+- Character may look directly out of panel
+- Can appear in "present day" framing scenes
+- Distinct visual treatment from main timeline
+- Often at page edges or in dedicated panels
+- May comment on or question the events shown
+
+## Historical Accuracy
+
+- Research period-specific details: costumes, technology, architecture
+- Show aging naturally for characters across time periods
+- Iconic items and locations rendered recognizably
+- Balance accuracy with stylization
+
+## Language
+
+- All text in Chinese (中文) unless source material is in another language
+- Use Chinese full-width punctuation: ""，。！
+
+---
+
+Please generate the comic page based on the content provided below:
diff --git a/skills/creative/baoyu-comic/references/character-template.md b/skills/creative/baoyu-comic/references/character-template.md
new file mode 100644
index 000000000..5865358ce
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/character-template.md
@@ -0,0 +1,180 @@
+# Character Definition Template
+
+## Character Document Format
+
+Create `characters/characters.md` with the following structure:
+
+```markdown
+# Character Definitions - [Comic Title]
+
+**Style**: [selected style]
+**Art Direction**: [Ligne Claire / Manga / etc.]
+
+---
+
+## Character 1: [Name]
+
+**Role**: [Protagonist / Mentor / Antagonist / Narrator]
+**Age**: [approximate age or age range in story]
+
+**Appearance**:
+- Face shape: [oval/square/round]
+- Hair: [color, style, length]
+- Eyes: [color, shape, distinctive features]
+- Build: [height, body type]
+- Distinguishing features: [glasses, beard, scar, etc.]
+
+**Costume**:
+- Default outfit: [detailed description]
+- Color palette: [primary colors for this character]
+- Accessories: [hat, bag, tools, etc.]
+
+**Expression Range**:
+- Neutral: [description]
+- Happy/Excited: [description]
+- Thinking/Confused: [description]
+- Determined: [description]
+
+**Visual Reference Notes**:
+[Any specific artistic direction]
+
+---
+
+## Character 2: [Name]
+...
+```
+
+## Reference Sheet Image Prompt
+
+After character definitions, include a prompt for generating the reference sheet:
+
+```markdown
+## Reference Sheet Prompt
+
+Character reference sheet in [style] style, clean lines, flat colors:
+
+[ROW 1 - Character Name]:
+- Front view: [detailed description]
+- 3/4 view: [description]
+- Expression sheet: Neutral | Happy | Focused | Worried
+
+[ROW 2 - Character Name]:
+...
+
+COLOR PALETTE:
+- [Character 1]: [colors]
+- [Character 2]: [colors]
+
+White background, clear labels under each character.
+```
+
+## Example: Turing Biography
+
+```markdown
+# Character Definitions - The Imitation Game
+
+**Style**: classic (Ligne Claire)
+**Art Direction**: Clean lines, muted colors, period-accurate details
+
+---
+
+## Character 1: Alan Turing
+
+**Role**: Protagonist
+**Age**: 25-40 (varies across story)
+
+**Appearance**:
+- Face shape: Oval, slightly angular
+- Hair: Dark brown, wavy, slightly disheveled
+- Eyes: Deep-set, intense gaze
+- Build: Tall, lean, slightly awkward posture
+- Distinguishing features: Prominent brow, thoughtful expression
+
+**Costume**:
+- Default outfit: Tweed jacket with elbow patches, white shirt, no tie
+- Color palette: Muted browns, navy blue, cream
+- Accessories: Occasionally a pipe, papers/notebooks
+
+**Expression Range**:
+- Neutral: Thoughtful, slightly distant
+- Happy/Excited: Eureka moment, eyes bright, subtle smile
+- Thinking/Confused: Furrowed brow, looking at abstract space
+- Determined: Jaw set, focused eyes
+
+---
+
+## Character 2: The Bombe Machine
+
+**Role**: Supporting (anthropomorphized)
+**Appearance**:
+- Large brass and wood cabinet
+- Dial "eyes" that can express states
+- Paper tape "mouth"
+- Indicator lights for emotions
+
+**Expression Range**:
+- Processing: Spinning dials, humming
+- Success: Lights up warmly
+- Stuck: Smoke wisps, stuttering
+
+---
+
+## Reference Sheet Prompt
+
+Character reference sheet in Ligne Claire style, clean lines, flat colors:
+
+TOP ROW - Alan Turing:
+- Front view: Young man, 30s, short dark wavy hair, thoughtful expression, wearing tweed jacket with elbow patches, white shirt
+- 3/4 view: Same character, slight smile, showing profile of nose
+- Expression sheet: Neutral | Excited (eureka moment) | Focused (working) | Worried
+
+BOTTOM ROW - The Bombe Machine (anthropomorphized):
+- Bombe machine as character: Large, brass and wood, dial "eyes", paper tape "mouth"
+- Expressions: Processing (spinning dials) | Success (lights up) | Stuck (smoke wisps)
+
+COLOR PALETTE:
+- Turing: Muted browns (#8B7355), navy blue (#2C3E50), cream (#F5F5DC)
+- Machine: Brass (#B5A642), mahogany (#4E2728), emerald indicators (#2ECC71)
+
+White background, clear labels under each character.
+```
+
+## Handling Age Variants
+
+For biographies spanning many years, define age variants:
+
+```markdown
+## Alan Turing - Age Variants
+
+### Young (1920s, age 10-18)
+- Boyish features, round face
+- School uniform (Sherborne)
+- Curious, eager expression
+
+### Adult (1930s-40s, age 25-35)
+- Angular face, defined jaw
+- Tweed jacket, rumpled appearance
+- Intense, focused expression
+
+### Later (1950s, age 40+)
+- Slightly weathered
+- More casual dress
+- Thoughtful, sometimes melancholic
+```
+
+## Best Practices
+
+| Practice | Description |
+|----------|-------------|
+| Be specific | "Short dark wavy hair, parted left" not just "dark hair" |
+| Use distinguishing features | Glasses, scars, accessories that identify character |
+| Define color codes | Use specific color names or hex codes |
+| Include age markers | Wrinkles, posture, clothing style matching era |
+| Reference real people | For historical figures, note "based on 1940s photographs" |
+
+## Why Character Reference Matters
+
+Without unified character definition, AI generates inconsistent appearances. The reference sheet provides:
+1. Visual anchors for consistent features
+2. Color palettes for consistent coloring
+3. Expression documentation for emotional portrayals
diff --git a/skills/creative/baoyu-comic/references/layouts/cinematic.md b/skills/creative/baoyu-comic/references/layouts/cinematic.md
new file mode 100644
index 000000000..8061cde9e
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/cinematic.md
@@ -0,0 +1,23 @@
+# cinematic
+
+Wide panels, filmic feel
+
+## Panel Structure
+
+- **Panels per page**: 2-4
+- **Structure**: Horizontal emphasis, wide aspect panels
+- **Gutters**: Generous spacing (12-15px)
+
+## Grid Configuration
+
+- 1-2 columns, horizontal emphasis
+- Panel sizes: Wide aspect ratios (3:1, 4:1)
+- Reading flow: Horizontal sweep, filmic rhythm
+
+## Best For
+
+Establishing shots, dramatic moments, landscapes
+
+## Best Style Pairings
+
+dramatic, classic, sepia
diff --git a/skills/creative/baoyu-comic/references/layouts/dense.md b/skills/creative/baoyu-comic/references/layouts/dense.md
new file mode 100644
index 000000000..7346466ff
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/dense.md
@@ -0,0 +1,23 @@
+# dense
+
+Information-rich, educational focus
+
+## Panel Structure
+
+- **Panels per page**: 6-9
+- **Structure**: Compact grid, smaller panels
+- **Gutters**: Tight spacing (4-6px)
+
+## Grid Configuration
+
+- 3 columns × 3 rows
+- Panel sizes: Compact, uniform
+- Reading flow: Rapid progression, information-rich
+
+## Best For
+
+Technical explanations, complex narratives, timelines
+
+## Best Style Pairings
+
+ohmsha, vibrant
diff --git a/skills/creative/baoyu-comic/references/layouts/four-panel.md b/skills/creative/baoyu-comic/references/layouts/four-panel.md
new file mode 100644
index 000000000..e9cbdfcdf
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/four-panel.md
@@ -0,0 +1,40 @@
+# four-panel
+
+四格漫画 - Strict 2×2 grid, single-page story
+
+## Panel Structure
+
+- **Panels per page**: 4 (exactly, no variation)
+- **Structure**: Strict 2×2 equal grid
+- **Gutters**: Consistent white space (8-10px), uniform on all sides
+
+## Grid Configuration
+
+- 2 columns × 2 rows, all panels identical size
+- Panel sizes: Exactly equal (each panel = 25% of content area)
+- Reading flow: Z-pattern — Panel 1 (top-left) → Panel 2 (top-right) → Panel 3 (bottom-left) → Panel 4 (bottom-right)
+
+## Narrative Structure
+
+Each panel serves a specific narrative role (起承转合 / kishōtenketsu):
+
+| Panel | Position | Role | Purpose |
+|-------|----------|------|---------|
+| 1 | Top-left | 起 Setup | Establish situation, introduce characters/problem |
+| 2 | Top-right | 承 Development | Build on setup, add complication or attempt |
+| 3 | Bottom-left | 转 Turn | Twist, key insight, or reversal — the pivotal moment |
+| 4 | Bottom-right | 合 Conclusion | Resolution, punchline, or takeaway |
+
+## Aspect Ratio
+
+- Recommended page aspect: **4:3** (landscape)
+- Landscape gives each panel a comfortable wide rectangle
+- Portrait (3:4) makes panels tall and narrow — avoid for this layout
+
+## Best For
+
+Business allegory, quick-insight education, social media comics, fables, parables, single-concept explanation
+
+## Best Style Pairings
+
+minimalist, ligne-claire, chalk
diff --git a/skills/creative/baoyu-comic/references/layouts/mixed.md b/skills/creative/baoyu-comic/references/layouts/mixed.md
new file mode 100644
index 000000000..dc33cc147
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/mixed.md
@@ -0,0 +1,23 @@
+# mixed
+
+Dynamic, varied rhythm
+
+## Panel Structure
+
+- **Panels per page**: 3-7 (varies)
+- **Structure**: Intentionally varied for pacing
+- **Gutters**: Dynamic spacing
+
+## Grid Configuration
+
+- Intentionally irregular
+- Panel sizes: Varied for pacing and emphasis
+- Reading flow: Guides eye through varied rhythm
+
+## Best For
+
+Action sequences, emotional arcs, complex stories
+
+## Best Style Pairings
+
+dramatic, vibrant, ohmsha
diff --git a/skills/creative/baoyu-comic/references/layouts/splash.md b/skills/creative/baoyu-comic/references/layouts/splash.md
new file mode 100644
index 000000000..15d4258b5
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/splash.md
@@ -0,0 +1,23 @@
+# splash
+
+Impact-focused, key moments
+
+## Panel Structure
+
+- **Panels per page**: 1-2 large + 2-3 small
+- **Structure**: Dominant splash with supporting panels
+- **Gutters**: Varied for emphasis
+
+## Grid Configuration
+
+- 1 dominant panel + 2-3 supporting
+- Panel sizes: 50-70% splash, remainder small
+- Reading flow: Splash dominates, supporting panels accent
+
+## Best For
+
+Revelations, breakthroughs, chapter openings
+
+## Best Style Pairings
+
+dramatic, classic, vibrant
diff --git a/skills/creative/baoyu-comic/references/layouts/standard.md b/skills/creative/baoyu-comic/references/layouts/standard.md
new file mode 100644
index 000000000..76ee5d824
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/standard.md
@@ -0,0 +1,23 @@
+# standard
+
+Classic comic grid, versatile
+
+## Panel Structure
+
+- **Panels per page**: 4-6
+- **Structure**: Regular grid with occasional variation
+- **Gutters**: Consistent white space (8-10px)
+
+## Grid Configuration
+
+- 2-3 columns × 2-3 rows
+- Panel sizes: Mostly equal, occasional variation
+- Reading flow: Left→right, top→bottom (Z-pattern)
+
+## Best For
+
+Narrative flow, dialogue scenes
+
+## Best Style Pairings
+
+classic, warm, sepia
diff --git a/skills/creative/baoyu-comic/references/layouts/webtoon.md b/skills/creative/baoyu-comic/references/layouts/webtoon.md
new file mode 100644
index 000000000..efc464aa7
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/webtoon.md
@@ -0,0 +1,30 @@
+# webtoon
+
+Vertical scrolling comic (竖版条漫)
+
+## Panel Structure
+
+- **Panels per page**: 3-5 vertically stacked
+- **Structure**: Single column, vertical flow optimized for scrolling
+- **Gutters**: Generous vertical spacing (20-40px), panels often bleed horizontally
+
+## Grid Configuration
+
+- Single column, vertical stack
+- Panel sizes: Full width, variable height (1:1 to 1:2 aspect)
+- Reading flow: Top→bottom continuous scroll
+
+## Special Features
+
+- Panels can extend beyond frame for dramatic effect
+- Generous whitespace between beats
+- Character close-ups alternate with wide explanation panels
+- "Float" effect - elements can exist between panels
+
+## Best For
+
+Ohmsha-style tutorials, mobile reading, step-by-step guides
+
+## Best Style Pairings
+
+ohmsha, vibrant
diff --git a/skills/creative/baoyu-comic/references/ohmsha-guide.md b/skills/creative/baoyu-comic/references/ohmsha-guide.md
new file mode 100644
index 000000000..e78967721
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/ohmsha-guide.md
@@ -0,0 +1,85 @@
+# Ohmsha Manga Guide Style
+
+Guidelines for educational manga comics using the `ohmsha` preset.
+
+## Character Setup
+
+| Role | Default | Traits |
+|------|---------|--------|
+| Student (Role A) | 大雄 | Confused, asks basic but crucial questions, represents reader |
+| Mentor (Role B) | 哆啦A梦 | Knowledgeable, patient, uses gadgets as technical metaphors |
+| Antagonist (Role C, optional) | 胖虎 | Represents misunderstanding, or "noise" in the data |
+
+Custom characters: ask the user for role → name mappings (e.g., `Student:小明, Mentor:教授, Antagonist:Bug怪`).
+
+## Character Reference Sheet Style
+
+For Ohmsha style, use manga/anime style with:
+- Exaggerated expressions for educational clarity
+- Simple, distinctive silhouettes
+- Bright, saturated color palettes
+- Chibi/SD (super-deformed) variants for comedic reactions
+
+## Outline Spec Block
+
+Every ohmsha outline must start with:
+
+```markdown
+【漫画规格单】
+- Language: [Same as input content]
+- Style: Ohmsha (Manga Guide), Full Color
+- Layout: Vertical Scrolling Comic (竖版条漫)
+- Characters: [List character names and roles]
+- Character Reference: characters/characters.png
+- Page Limit: ≤20 pages
+```
+
+## Visual Metaphor Rules (Critical)
+
+**NEVER** create "talking heads" panels. Every technical concept must become:
+
+1. **A tangible gadget/prop** - Something characters can hold, use, demonstrate
+2. **An action scene** - Characters doing something that illustrates the concept
+3. **A visual environment** - Stepping into a metaphorical space
+
+### Examples
+
+| Concept | Bad (Talking Heads) | Good (Visual Metaphor) |
+|---------|---------------------|------------------------|
+| Word embeddings | Characters discussing vectors | 哆啦A梦拿出"词向量压缩机"，把书本压缩成彩色小球 |
+| Gradient descent | Explaining math formula | 大雄在山谷地形上滚球，寻找最低点 |
+| Neural network | Diagram on whiteboard | 角色走进由发光节点组成的网络迷宫 |
+
+## Page Title Convention
+
+Avoid AI-style "Title: Subtitle" format. Use narrative descriptions:
+
+- ❌ "Page 3: Introduction to Neural Networks"
+- ✓ "Page 3: 大雄被海量单词淹没，哆啦A梦拿出'词向量压缩机'"
+
+## Ending Requirements
+
+- NO generic endings ("What will you choose?", "Thanks for reading")
+- End with: Technical summary moment OR character achieving a small goal
+- Final panel: Sense of accomplishment, not open-ended question
+
+### Good Endings
+
+- Student successfully applies learned concept
+- Visual callback to opening problem, now solved
+- Mentor gives summary while student demonstrates understanding
+
+### Bad Endings
+
+- "What do you think?" open questions
+- "Thanks for reading this tutorial"
+- Cliffhanger without resolution
+
+## Layout Preference
+
+Ohmsha style typically uses:
+- `webtoon` (vertical scrolling) - Primary choice
+- `dense` - For information-heavy sections
+- `mixed` - For varied pacing
+
+Avoid `cinematic` and `splash` for educational content.
diff --git a/skills/creative/baoyu-comic/references/partial-workflows.md b/skills/creative/baoyu-comic/references/partial-workflows.md
new file mode 100644
index 000000000..749b5ac7b
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/partial-workflows.md
@@ -0,0 +1,106 @@
+# Partial Workflows
+
+Options to run specific parts of the workflow. Trigger these via natural language (e.g., "just the storyboard", "regenerate page 3").
+
+## Options Summary
+
+| Option | Steps Executed | Output |
+|--------|----------------|--------|
+| Storyboard only | 1-3 | `storyboard.md` + `characters/` |
+| Prompts only | 1-5 | + `prompts/*.md` |
+| Images only | 7-8 | + images |
+| Regenerate N | 7 (partial) | Specific page(s) |
+
+---
+
+## Storyboard-only
+
+Generate storyboard and characters without prompts or images.
+
+**User cue**: "storyboard only", "just the outline", "don't generate images yet".
+
+**Workflow**: Steps 1-3 only (stop after storyboard + characters)
+
+**Output**:
+- `analysis.md`
+- `storyboard.md`
+- `characters/characters.md`
+
+**Use case**: Review and edit the storyboard before generating images. Useful for:
+- Getting feedback on the narrative structure
+- Making manual adjustments to panel layouts
+- Defining custom characters
+
+---
+
+## Prompts-only
+
+Generate storyboard, characters, and prompts without images.
+
+**User cue**: "prompts only", "write the prompts but don't generate yet".
+
+**Workflow**: Steps 1-5 (generate prompts, skip images)
+
+**Output**:
+- `analysis.md`
+- `storyboard.md`
+- `characters/characters.md`
+- `prompts/*.md`
+
+**Use case**: Review and edit prompts before image generation. Useful for:
+- Fine-tuning image generation prompts
+- Ensuring visual consistency before committing to generation
+- Making style adjustments at the prompt level
+
+---
+
+## Images-only
+
+Generate images from existing prompts (starts at Step 7).
+
+**User cue**: "generate images from existing prompts", "run the images now" (pointing at an existing `comic/topic-slug/` directory).
+
+**Workflow**: Skip to Step 7, then 8
+
+**Prerequisites** (must exist in directory):
+- `prompts/` directory with page prompt files
+- `storyboard.md` with style information
+- `characters/characters.md` with character definitions
+
+**Output**:
+- `characters/characters.png` (if not exists)
+- `NN-{cover|page}-[slug].png` images
+
+**Use case**: Re-generate images after editing prompts. Useful for:
+- Recovering from failed image generation
+- Trying different image generation settings
+- Regenerating after manual prompt edits
+
+---
+
+## Regenerate
+
+Regenerate specific pages only.
+
+**User cue**: "regenerate page 3", "redo pages 2, 5, 8", "regenerate the cover".
+
+**Workflow**:
+1. Read existing prompts for specified pages
+2. Regenerate images only for those pages via `image_generate`
+3. Download each returned URL and overwrite the existing PNG
+
+**Prerequisites** (must exist):
+- `prompts/NN-{cover|page}-[slug].md` for specified pages
+- `characters/characters.md` (for agent-side consistency checks, if it was used originally)
+
+**Output**:
+- Regenerated `NN-{cover|page}-[slug].png` for specified pages
+
+**Use case**: Fix specific pages without regenerating entire comic. Useful for:
+- Fixing a single problematic page
+- Iterating on specific visuals
+- Regenerating pages after prompt edits
+
+**Page numbering**:
+- `0` = Cover page
+- `1-N` = Content pages
diff --git a/skills/creative/baoyu-comic/references/presets/concept-story.md b/skills/creative/baoyu-comic/references/presets/concept-story.md
new file mode 100644
index 000000000..d1c71d6ed
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/concept-story.md
@@ -0,0 +1,121 @@
+# concept-story
+
+概念故事预设 - Narrative comics that visualize abstract concepts through character-driven stories
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | manga |
+| Tone | warm |
+| Layout | standard (default) |
+
+Equivalent to: art=manga, tone=warm
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `concept-story` preset is selected, ALL rules below must be applied.
+
+### Concept Visualization System (CRITICAL)
+
+Each major abstract concept SHOULD have a recurring visual symbol/metaphor:
+
+| Concept Type | Visualization Approach |
+|-------------|----------------------|
+| Psychological need | Tangible object character holds or discovers (e.g., glowing energy ball = competence) |
+| Management principle | Environmental metaphor character navigates (e.g., ship wheel = autonomy) |
+| Growth/development | Living organic symbol that transforms (e.g., seed → flowering plant = relatedness) |
+| Abstract framework | Spatial structure characters can enter or observe |
+| Emotional state | Color/lighting shift in the scene atmosphere |
+
+**Unlike ohmsha**: Dialogue panels are allowed and expected. The goal is to COMBINE visual metaphors WITH dialogue, not replace dialogue entirely.
+
+**Pattern**: "Dialogue introduces idea" → "Visual metaphor illustrates it" → "Character reacts/applies it"
+
+### Visual Symbol Continuity
+
+Symbols must persist across the story:
+
+| Stage | Treatment |
+|-------|-----------|
+| Introduction | Symbol appears with soft glow effect when concept is first mentioned |
+| Recurrence | Same symbol reappears in background or character interaction when concept is referenced |
+| Resolution | ALL symbols gather in the final composition, showing integration of learned concepts |
+
+**Storyboard requirement**: Include a Symbol Mapping Table defining concept → visual symbol before panel breakdown.
+
+### Character Archetypes (Flexible)
+
+Create original characters based on content domain. No fixed defaults:
+
+| Role | Archetype | Visual Cues |
+|------|-----------|------------|
+| Protagonist | Learner/worker facing a challenge | Modern professional or student, relatable, starts with constrained posture |
+| Mentor | Experienced guide who teaches through experience | Slightly older, calm demeanor, warm color accents |
+| Catalyst | Person or event that triggers transformation | Can be a colleague, situation, challenge, or opportunity |
+
+**IMPORTANT**: Characters are created fresh each time based on the source content's domain (business, psychology, education, etc.). No default character set.
+
+### Narrative Arc Structure
+
+Enforce a five-stage growth arc:
+
+| Act | Structure | Visual Tone |
+|-----|-----------|------------|
+| Opening | Protagonist stuck in routine, faces frustration | Muted warm tones, tight framing, constrained compositions |
+| Inciting moment | Mentor appears or opportunity arrives | Brightness increases, panels open up |
+| Learning | Concepts introduced through visual metaphors | Rich warm palette, symbols introduced one by one |
+| Turning point | Protagonist applies knowledge, faces test | Contrast increases, dynamic compositions |
+| Transformation | Growth demonstrated, new understanding visible | Full warm palette, expansive composition, all symbols present |
+
+### Dialogue + Action Balance
+
+- Dialogue is encouraged and expected (unlike ohmsha's NO talking heads rule)
+- Every page should combine at least one dialogue panel with at least one visual/action panel
+- Avoid pure "lecture" pages where a character explains for 4+ panels straight
+- When a character explains a concept verbally, the NEXT panel should visualize it
+
+**Wrong approach**: Four consecutive panels of mentor lecturing at protagonist
+**Right approach**: Mentor introduces concept → visual metaphor panel → protagonist reacts → applies understanding
+
+### Scene Atmosphere Rules
+
+| Scene Type | Atmosphere |
+|------------|-----------|
+| Problem/frustration | Cool muted tones over warm base, tight framing, cluttered environment |
+| Mentoring moment | Golden hour lighting, open composition, warm indoor glow |
+| Concept visualization | Soft glow effects, clean simplified backgrounds, symbol spotlight |
+| Growth/transformation | Warm light expanding outward, character posture opening up |
+| Resolution | Full warm palette, spacious composition, all visual symbols visible |
+
+### Ending Requirements
+
+Final page MUST include:
+
+1. Protagonist demonstrating transformed understanding (not just being told)
+2. Visual callback showing contrast with opening state (e.g., wilted plant → thriving plant)
+3. All concept symbols visible together in the composition
+4. A forward-looking element suggesting ongoing growth (not a closed ending)
+
+### Page Title Convention
+
+Every page MUST have a narrative title:
+
+**Wrong**: "Chapter 3: Self-Determination Theory"
+**Right**: "The Day Xiao Ming Found His Own Engine"
+
+## Quality Markers
+
+- ✓ Each major concept has a recurring visual symbol
+- ✓ Dialogue and visual metaphors work together (not one replacing the other)
+- ✓ Clear growth arc from problem to transformation
+- ✓ Original characters suited to the content domain
+- ✓ Warm, professional atmosphere throughout
+- ✓ Visual symbols recur and accumulate through the story
+- ✓ Final page integrates all concept symbols with transformation callback
+
+## Best For
+
+Psychology concepts, business/management principles, motivation theory, personal development,
+self-help content, leadership frameworks, coaching narratives, soft skill education,
+abstract concept explanation through character-driven stories
diff --git a/skills/creative/baoyu-comic/references/presets/four-panel.md b/skills/creative/baoyu-comic/references/presets/four-panel.md
new file mode 100644
index 000000000..8c52a4d04
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/four-panel.md
@@ -0,0 +1,107 @@
+# four-panel
+
+四格漫画预设 - Minimalist four-panel business allegory comics
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | minimalist |
+| Tone | neutral |
+| Layout | four-panel (default) |
+| Aspect | 4:3 (landscape) |
+
+Equivalent to: art=minimalist, tone=neutral, layout=four-panel, aspect=4:3
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `four-panel` preset is selected, ALL rules below must be applied.
+
+### 起承转合 Narrative Structure (CRITICAL)
+
+Every comic MUST follow the four-panel 起承转合 structure:
+
+| Panel | Role | Requirements |
+|-------|------|-------------|
+| 1 (起 Setup) | Introduce the situation | Show character(s) in a recognizable context. Establish the "normal" state or problem |
+| 2 (承 Development) | Build on the setup | Add complication, show an attempt, or introduce the concept. Stakes become clearer |
+| 3 (转 Turn) | The twist or key insight | **Most important panel.** Show the unexpected reversal, contrast, or "aha" moment that makes the allegory work |
+| 4 (合 Conclusion) | Resolution and takeaway | Show the result, consequence, or lesson learned. Can be a visual punchline or summary |
+
+**CRITICAL**: Do NOT deviate from exactly 4 panels. No 5th panel, no title panel, no footer panel within the image.
+
+### Single-Page Story Rule (CRITICAL)
+
+- The entire story is told in ONE page with exactly 4 panels
+- Page count: always 1 (plus optional cover)
+- No multi-page four-panel stories — if content requires more, create multiple separate four-panel comics
+- Storyboard structure: Cover (optional) + 1 page
+
+### Accent Color System
+
+- The image is primarily black-and-white line art
+- Use exactly 1-2 spot colors per strip (default: orange `#FF6B35`)
+- Rules:
+  - Key concept label or object: filled with accent color or outlined in accent
+  - Panel 3 (转 Turn) should have the strongest color emphasis
+  - Characters remain B&W — color is for concepts/objects/labels only
+  - Consistent accent color across all 4 panels (do not switch colors between panels)
+
+### Character Design Rules
+
+- Simplified stick-figure-like characters
+- Distinguish characters through simple props: ties, glasses, hats, briefcases, aprons
+- No detailed faces — dot eyes, line mouth at most
+- Characters should be generic enough to represent archetypes (the manager, the employee, the customer)
+- Maximum 2-3 characters per strip
+
+### Text in Panels
+
+- Chinese text for dialogue and labels (or match source language)
+- Keep text minimal — 1-2 short lines per panel maximum
+- Key concept terms can be highlighted with accent color background
+- No narrator boxes — dialogue and labels only
+- Speech bubbles: simple rectangles or ovals, thin black outline
+
+### Optional Title & Caption
+
+- A brief descriptive title above the 4 panels
+- An optional one-line caption/moral below the panels
+- These are part of the page composition, not separate panels
+
+### Character Archetypes (Flexible)
+
+Create simple stick-figure characters based on content. No fixed defaults:
+
+| Role | Archetype | Visual Cues |
+|------|-----------|------------|
+| Protagonist | Worker/employee facing a situation | Simple figure, minimal distinguishing feature (glasses, tie) |
+| Authority | Boss/manager/expert | Slightly larger figure, or prop like pointer/clipboard |
+| Object | The concept itself | Labeled object, icon, or highlighted text with accent color |
+
+### Prompt Template
+
+When generating image prompts for four-panel comics, include these keywords:
+
+> A minimalist, clean line art digital comic strip in a four-panel grid layout (2×2). The style is simplified cartoon illustration with clear black outlines and a minimal color palette of black, white, and specific spot [accent color] for key concepts.
+
+Each panel description should specify:
+- Panel position (Top Left / Top Right / Bottom Left / Bottom Right)
+- Character poses and gestures (simple, stick-figure style)
+- Dialogue text in Chinese (hand-drawn style)
+- Any accent-colored elements (concept labels, key objects)
+
+## Quality Markers
+
+- ✓ Exactly 4 panels in strict 2×2 grid
+- ✓ 起承转合 narrative arc clearly present
+- ✓ 90%+ black-and-white with strategic spot color
+- ✓ Simplified stick-figure characters
+- ✓ Key concept visually highlighted with accent color
+- ✓ Text is minimal and in Chinese (or source language)
+- ✓ Single complete story in one page
+- ✓ Panel 3 delivers a clear "turn" or insight
+
+## Best For
+
+Business allegory, management fables, short insights, workplace parables, concept contrasts, social media educational content, quick-read comics
diff --git a/skills/creative/baoyu-comic/references/presets/ohmsha.md b/skills/creative/baoyu-comic/references/presets/ohmsha.md
new file mode 100644
index 000000000..8b6540ef3
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/ohmsha.md
@@ -0,0 +1,114 @@
+# ohmsha
+
+Ohmsha预设 - Educational manga with visual metaphors
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | manga |
+| Tone | neutral |
+| Layout | webtoon (default) |
+
+Equivalent to: art=manga, tone=neutral
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `ohmsha` preset is selected, ALL rules below must be applied.
+
+### Visual Metaphor Requirements (CRITICAL)
+
+Every technical concept MUST be visualized as a metaphor:
+
+| Concept Type | Visualization Approach |
+|-------------|----------------------|
+| Algorithm | Gadget/machine that demonstrates the process |
+| Data structure | Physical space characters can enter/explore |
+| Mathematical formula | Transformation visible in environment |
+| Abstract process | Tangible flow of particles/objects |
+
+**Wrong approach**: Character points at blackboard explaining
+**Right approach**: Character uses "Concept Visualizer" gadget, steps into metaphorical space
+
+### Visual Metaphor Examples
+
+| Concept | Wrong (Talking Head) | Right (Visual Metaphor) |
+|---------|---------------------|------------------------|
+| Attention mechanism | Character points at formula on blackboard | "Attention Flashlight" gadget illuminates key words in dark room |
+| Gradient descent | "The algorithm minimizes loss" | Character rides ball rolling down mountain valley |
+| Neural network | Diagram with arrows | Living network of glowing creatures passing messages |
+| Overfitting | "The model memorized the data" | Character wearing clothes that fit only one specific pose |
+
+### Character Roles (Required)
+
+**DEFAULT: Use Doraemon characters** unless user explicitly specifies custom characters.
+
+| Role | Default Character | Visual | Traits |
+|------|-------------------|--------|--------|
+| Student (Role A) | 大雄 (Nobita) | Boy, 10yo, round glasses, black hair, yellow shirt, navy shorts | Confused, asks basic but crucial questions, represents reader |
+| Mentor (Role B) | 哆啦A梦 (Doraemon) | Blue robot cat, white belly, 4D pocket, red nose, golden bell | Knowledgeable, patient, uses gadgets as technical metaphors |
+| Challenge (Role C) | 胖虎 (Gian) | Stocky boy, small eyes, orange shirt | Represents misunderstanding, or "noise" in the data |
+| Support (Role D) | 静香 (Shizuka) | Cute girl, black short hair, pink dress | Asks clarifying questions, provides alternative perspectives |
+
+**IMPORTANT**: These Doraemon characters ARE the default for ohmsha preset. Generate character definitions using these exact characters unless user requests otherwise.
+
+To use custom characters: ask the user to provide role → character mappings (e.g., `Student:小明, Mentor:教授`).
+
+### Page Title Convention
+
+Every page MUST have a narrative title (not section header):
+
+**Wrong**: "Chapter 1: Introduction to Transformers"
+**Right**: "The Day Nobita Couldn't Understand Anyone"
+
+### Gadget Reveal Pattern
+
+When introducing a concept:
+
+1. Student expresses confusion with visual indicator (？, spiral eyes)
+2. Mentor dramatically produces gadget with sparkle effects
+3. Gadget name announced in bold with explanation
+4. Demonstration begins - student enters metaphorical space
+
+### Ending Requirements
+
+Final page MUST include:
+
+1. Student demonstrating understanding (applying the concept)
+2. Callback to opening problem (now resolved)
+3. Mentor's satisfied expression
+4. Optional: hint at next topic
+
+### NO Talking Heads Rule
+
+**Critical**: Characters must DO things, not just explain.
+
+Every panel should show:
+- Action being performed
+- Metaphor being demonstrated
+- Character interaction with concept-space
+- NOT: two characters facing each other talking
+
+### Special Visual Elements
+
+| Element | Usage |
+|---------|-------|
+| Gadget reveals | Dramatic unveiling with sparkle effects |
+| Concept spaces | Rounded borders, glowing edges for "imagination mode" |
+| Information displays | Holographic UI style for technical details |
+| Aha moments | Radial lines, light burst effects |
+| Confusion | Spiral eyes, question marks floating above head |
+
+## Quality Markers
+
+- ✓ Every concept is a visual metaphor
+- ✓ Characters are DOING things, not just talking
+- ✓ Clear student/mentor dynamic
+- ✓ Gadgets and props drive the explanation
+- ✓ Expressive manga-style emotions
+- ✓ Information density through visual design, not text walls
+- ✓ Narrative page titles
+
+## Reference
+
+For complete guidelines, see `references/ohmsha-guide.md`
diff --git a/skills/creative/baoyu-comic/references/presets/shoujo.md b/skills/creative/baoyu-comic/references/presets/shoujo.md
new file mode 100644
index 000000000..697887373
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/shoujo.md
@@ -0,0 +1,116 @@
+# shoujo
+
+少女预设 - Classic shoujo manga with romantic aesthetics
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | manga |
+| Tone | romantic |
+| Layout | standard (default) |
+
+Equivalent to: art=manga, tone=romantic
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `shoujo` preset is selected, ALL rules below must be applied.
+
+### Decorative Elements (Required)
+
+Every emotional moment must include decorative elements:
+
+| Emotion | Required Decorations |
+|---------|---------------------|
+| Love | Floating hearts, sparkles, rose petals |
+| Longing | Feathers, bubbles, distant sparkles |
+| Joy | Flowers blooming, light bursts, stars |
+| Sadness | Falling petals, fading sparkles |
+| Shyness | Soft sparkles, floating bubbles |
+| Realization | Radiating lines with sparkles |
+
+### Eye Detail Requirements
+
+Eyes are critical in shoujo style:
+
+| Aspect | Treatment |
+|--------|-----------|
+| Size | Larger than standard manga (1.2x) |
+| Highlights | Multiple (3-5), placed for emotion |
+| Reflection | Scene reflection in emotional moments |
+| Sparkle | Built-in sparkle effects |
+| Tears | Crystalline, detailed teardrops |
+
+### Character Beauty Standards
+
+| Feature | Treatment |
+|---------|-----------|
+| Hair | Flowing, detailed strands, shine highlights |
+| Skin | Porcelain, soft blush on cheeks |
+| Lips | Soft, slightly glossy |
+| Hands | Elegant, expressive gestures |
+| Posture | Graceful, elegant poses |
+
+### Background Effects
+
+**Abstract backgrounds** for emotional moments:
+
+| Moment Type | Background |
+|-------------|-----------|
+| Love confession | Soft gradient + floating flowers |
+| Shock | Screen tone speed lines + sparkles |
+| Memory | Dreamy blur + scattered petals |
+| Realization | Radial lines + light burst |
+| Intimate | Soft focus + floating elements |
+
+### Panel Flow
+
+- Overlap panels for intimate moments
+- Break panel borders for emotional impact
+- Float decorative elements between panels
+- Use screen tone gradients for mood
+- Irregular panel shapes for drama
+
+### Emotional Beat Timing
+
+Slow down pacing for emotional impact:
+
+| Scene Type | Panel Treatment |
+|------------|-----------------|
+| Confession | Multiple small panels, then splash |
+| Eye contact | Close-up sequence |
+| Touch | Slow-motion panel breakdown |
+| Realization | Build-up panels then impact |
+
+### Color Palette Application
+
+| Scene Type | Palette |
+|------------|---------|
+| Romantic | Pink, lavender, rose gold |
+| Happy | Soft yellow, peach, sky blue |
+| Sad | Pale blue, silver, gray lavender |
+| Dramatic | Deep rose, purple, contrast |
+
+### Screen Tone Usage
+
+| Mood | Tone Pattern |
+|------|-------------|
+| Neutral | Clean, minimal |
+| Romantic | Soft gradient overlays |
+| Dramatic | Heavy contrast tones |
+| Dreamy | Soft dot patterns |
+
+## Quality Markers
+
+- ✓ Large, sparkling detailed eyes
+- ✓ Decorative elements in emotional moments
+- ✓ Flowing, beautiful character designs
+- ✓ Soft, pastel color palette
+- ✓ Elegant panel compositions
+- ✓ Screen tone mood effects
+- ✓ Romantic atmosphere throughout
+- ✓ Beautiful, expressive poses
+
+## Best For
+
+Romance stories, coming-of-age, friendship narratives, school life, emotional drama, love stories
diff --git a/skills/creative/baoyu-comic/references/presets/wuxia.md b/skills/creative/baoyu-comic/references/presets/wuxia.md
new file mode 100644
index 000000000..15b911d62
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/wuxia.md
@@ -0,0 +1,110 @@
+# wuxia
+
+武侠预设 - Hong Kong martial arts comic style
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | ink-brush |
+| Tone | action |
+| Layout | splash (default) |
+
+Equivalent to: art=ink-brush, tone=action
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `wuxia` preset is selected, ALL rules below must be applied.
+
+### Qi/Energy Effects (Required)
+
+Martial arts power must be visible through qi effects:
+
+| Effect Type | Visual Treatment |
+|-------------|-----------------|
+| Internal qi | Glowing aura around character |
+| External qi | Visible energy projection |
+| Qi clash | Radiating impact waves |
+| Qi absorption | Flowing particles toward character |
+| Hidden power | Subtle glow in eyes/fists |
+
+### Energy Colors
+
+| Qi Type | Color |
+|---------|-------|
+| Righteous | Blue (#4299E1), Gold (#FFD700) |
+| Fierce | Red (#DC2626), Orange (#EA580C) |
+| Evil | Purple (#7C3AED), Green (#16A34A) |
+| Pure | White, Silver |
+| Ancient | Gold with particles |
+
+### Combat Visual Language
+
+**Impact moments** must include:
+
+1. Speed lines radiating from impact point
+2. Flying debris (stone, wood, cloth)
+3. Shockwave rings
+4. Dust/energy clouds
+5. Hair and clothing blown back
+
+### Movement Depiction
+
+| Speed Level | Visual Treatment |
+|-------------|-----------------|
+| Normal | Standard pose |
+| Fast | Motion blur, speed lines |
+| Lightning | Afterimages, multiple positions |
+| Teleport | Fade effect, particle trail |
+
+### Environmental Integration
+
+Backgrounds must support action:
+
+| Environment | Combat Enhancement |
+|-------------|-------------------|
+| Mountains | Crumbling peaks from impacts |
+| Forest | Exploding trees, flying leaves |
+| Water | Dramatic splashes, walking on water |
+| Temple | Breaking pillars, flying tiles |
+| Cliff | Dramatic falls, wind effects |
+
+### Character Pose Guidelines
+
+- Dynamic warrior stances with weight distribution
+- Flowing robes and hair showing movement
+- Muscle tension visible in action
+- Feet planted or in dynamic motion
+- Traditional martial arts postures
+
+### Weapon Effects
+
+| Weapon | Visual Treatment |
+|--------|-----------------|
+| Sword | Trailing light arc, blade glow |
+| Palm | Qi projection, wind effect |
+| Staff | Spinning blur, impact ripples |
+| Whip | Flowing energy trail |
+
+### Atmospheric Elements
+
+Always include:
+- Floating particles (leaves, petals, dust)
+- Ink wash mist for depth
+- Wind direction indicators
+- Dramatic sky/weather when appropriate
+
+## Quality Markers
+
+- ✓ Dynamic action poses with sense of motion
+- ✓ Ink brush aesthetic in line work
+- ✓ Visible qi/energy effects
+- ✓ High contrast dramatic lighting
+- ✓ Atmospheric backgrounds with Chinese elements
+- ✓ Flowing fabric and hair movement
+- ✓ Impactful combat moments
+- ✓ Speed lines and impact effects
+
+## Best For
+
+Martial arts stories, Chinese historical fiction, wuxia/xianxia adaptations, action-heavy narratives
diff --git a/skills/creative/baoyu-comic/references/storyboard-template.md b/skills/creative/baoyu-comic/references/storyboard-template.md
new file mode 100644
index 000000000..790ceb029
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/storyboard-template.md
@@ -0,0 +1,143 @@
+# Storyboard Template
+
+## Storyboard Document Format
+
+```markdown
+---
+title: "[Comic Title]"
+topic: "[topic description]"
+time_span: "[e.g., 1912-1954]"
+narrative_approach: "[chronological/thematic/character-focused]"
+recommended_style: "[style name]"
+recommended_layout: "[layout name or varies]"
+aspect_ratio: "3:4"    # 3:4 (portrait), 4:3 (landscape), 16:9 (widescreen)
+language: "[zh/en/ja/etc.]"
+page_count: [N]
+generated: "YYYY-MM-DD HH:mm"
+---
+
+# [Comic Title] - Knowledge Comic Storyboard
+
+**Character Reference**: characters/characters.png
+
+---
+
+## Cover
+
+**Filename**: 00-cover-[slug].png
+**Core Message**: [one-liner]
+
+**Visual Design**:
+- Title typography style
+- Main visual composition
+- Color scheme
+- Subtitle / time span notation
+
+**Visual Prompt**:
+[Detailed image generation prompt]
+
+---
+
+## Page 1 / N
+
+**Filename**: 01-page-[slug].png
+**Layout**: [standard/cinematic/dense/splash/mixed]
+**Narrative Layer**: [Main narrative / Narrator layer / Mixed]
+**Core Message**: [What this page conveys]
+
+### Panel Layout
+
+**Panel Count**: X
+**Layout Type**: [grid/irregular/splash]
+
+#### Panel 1 (Size: 1/3 page, Position: Top)
+
+**Scene**: [Time, location]
+**Image Description**:
+- Camera angle: [bird's eye / low angle / eye level / close-up / wide shot]
+- Characters: [pose, expression, action]
+- Environment: [scene details, period markers]
+- Lighting: [atmosphere description]
+- Color tone: [palette reference]
+
+**Text Elements**:
+- Dialogue bubble (oval): "Character line"
+- Narrator box (rectangular): 「Narrator commentary」
+- Caption bar: [Background info text]
+
+#### Panel 2...
+
+**Page Hook**: [Cliffhanger or transition at page end]
+
+**Visual Prompt**:
+[Full page image generation prompt]
+
+---
+
+## Page 2 / N
+...
+```
+
+## Cover Design Principles
+
+- Academic gravitas with visual appeal
+- Title typography reflecting knowledge/science theme
+- Composition hinting at core theme (character silhouette, iconic symbol, concept diagram)
+- Subtitle or time span for epic scope
+
+## Panel Composition Guidelines
+
+| Panel Type | Recommended Count | Usage |
+|-----------|-------------------|-------|
+| Main narrative | 3-5 per page | Story progression |
+| Concept diagram | 1-2 per page | Visualize abstractions |
+| Narrator panel | 0-1 per page | Commentary, transition |
+| Splash (full/half) | Occasional | Major moments |
+
+## Panel Size Reference
+
+- **Full page (Splash)**: Major moments, key breakthroughs
+- **Half page**: Important scenes, turning points
+- **1/3 page**: Standard narrative panels
+- **1/4 or smaller**: Quick progression, sequential action
+
+## Concept Visualization Techniques
+
+Transform abstract concepts into concrete visuals:
+
+| Abstract Concept | Visual Approach |
+|-----------------|-----------------|
+| Neural network | Glowing nodes with connecting lines |
+| Gradient descent | Ball rolling down valley terrain |
+| Data flow | Luminous particles flowing through pipes |
+| Algorithm iteration | Ascending spiral staircase |
+| Breakthrough moment | Shattering barrier, piercing light |
+| Logical proof | Building blocks assembling |
+| Uncertainty | Forking paths, fog, multiple shadows |
+
+## Text Element Design
+
+| Text Type | Style | Usage |
+|-----------|-------|-------|
+| Character dialogue | Oval speech bubble | Main narrative speech |
+| Narrator commentary | Rectangular box | Explanation, commentary |
+| Caption bar | Edge-mounted rectangle | Time, location info |
+| Thought bubble | Cloud shape | Character inner monologue |
+| Term label | Bold / special color | First appearance of technical terms |
+
+## Prompt Structure for Consistency
+
+Each page prompt should include character reference:
+
+```
+[CHARACTER REFERENCE]
+(Key details from characters.md for characters in this page)
+
+[PAGE CONTENT]
+(Specific scene, panel layout, and visual elements)
+
+[CONSISTENCY REMINDER]
+Maintain exact character appearances as defined in character reference.
+- [Character A]: [key identifying features]
+- [Character B]: [key identifying features]
+```
diff --git a/skills/creative/baoyu-comic/references/tones/action.md b/skills/creative/baoyu-comic/references/tones/action.md
new file mode 100644
index 000000000..f9c6d954a
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/action.md
@@ -0,0 +1,110 @@
+# action
+
+动作基调 - Speed, impact, power
+
+## Overview
+
+High-impact action atmosphere with dynamic movement, combat effects, and powerful visual energy. Creates visceral, exciting sequences.
+
+## Mood Characteristics
+
+- Speed and motion
+- Power and impact
+- Combat intensity
+- Physical energy
+- Visceral excitement
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | High contrast |
+| Contrast | Maximum |
+| Temperature | Variable per effect |
+| Brightness | Dynamic range |
+
+## Action Effects
+
+**Combat/motion effects** (apply liberally):
+
+| Effect | Usage |
+|--------|-------|
+| Speed lines | Motion, velocity |
+| Impact bursts | Hits, collisions |
+| Shockwaves | Powerful impacts |
+| Flying debris | Environmental destruction |
+| Dust clouds | Ground impacts |
+| Motion blur | Fast movement |
+| Afterimages | Super speed |
+
+## Special Effects
+
+| Effect Type | Visual Approach |
+|------------|-----------------|
+| Energy attacks | Glowing, radiating |
+| Physical impacts | Radiating lines, debris |
+| Movement | Speed lines, blur |
+| Atmosphere | Flying particles, wind |
+
+## Effect Colors
+
+| Effect | Color | Hex |
+|--------|-------|-----|
+| Energy glow | Blue | #4299E1 |
+| Fire/power | Gold | #FFD700 |
+| Impact | White burst | #FFFFFF |
+| Blood/intensity | Deep red | #8B0000 |
+
+## Lighting
+
+- Dynamic, shifting
+- Impact flashes
+- Energy glow sources
+- Rim lighting on figures
+- Dramatic contrast
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Determination | Fierce focus |
+| Rage | Intense, powerful |
+| Triumph | Victorious pose |
+| Struggle | Strained effort |
+
+## Composition
+
+- Dynamic angles
+- Extreme perspectives
+- Panel-breaking layouts
+- Asymmetric designs
+- Impact-focused framing
+
+## Pose Guidelines
+
+- Dynamic warrior poses
+- Weight and momentum visible
+- Muscle tension shown
+- Flow of movement captured
+- Impact points emphasized
+
+## Best For
+
+- Martial arts combat
+- Action sequences
+- Sports moments
+- Physical challenges
+- Battle scenes
+- Climactic confrontations
+
+## Combination Notes
+
+Works especially well with:
+- ink-brush: wuxia combat
+- manga: shonen battles
+
+Avoid with:
+- chalk: style mismatch
+- ligne-claire: style mismatch (too static)
diff --git a/skills/creative/baoyu-comic/references/tones/dramatic.md b/skills/creative/baoyu-comic/references/tones/dramatic.md
new file mode 100644
index 000000000..459cc2aec
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/dramatic.md
@@ -0,0 +1,95 @@
+# dramatic
+
+戏剧基调 - High contrast, intense, powerful moments
+
+## Overview
+
+High-impact dramatic tone for pivotal moments, conflicts, and breakthroughs. Uses strong contrast and intense compositions to create emotional power.
+
+## Mood Characteristics
+
+- Tension and intensity
+- Pivotal moments
+- Conflict and resolution
+- Breakthrough discoveries
+- Emotional climaxes
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | High (vibrant or deep) |
+| Contrast | Maximum |
+| Temperature | Varies for effect |
+| Brightness | Strong highlights, deep shadows |
+
+## Contrast Approach
+
+- Sharp light/dark divisions
+- Minimal mid-tones
+- Stark compositions
+- Silhouette potential
+- Rim lighting effects
+
+## Accent Colors
+
+- Deep navy (#1A365D)
+- Crimson (#9B2C2C)
+- Stark white
+- Heavy blacks
+- Limited palette per scene
+
+## Lighting
+
+- Dramatic single-source
+- High contrast shadows
+- Rim lighting on characters
+- Spotlight effects
+- Chiaroscuro influence
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Anger | Intense, defined features |
+| Determination | Strong, focused gaze |
+| Shock | Wide eyes, stark lighting |
+| Triumph | Powerful, elevated pose |
+
+## Composition
+
+- Angular, dynamic layouts
+- Dramatic camera angles
+- Low/high viewpoints
+- Diagonal compositions
+- Negative space for impact
+
+## Visual Elements
+
+- Speed lines for tension
+- Impact effects
+- Dramatic backgrounds (storms, fire)
+- Silhouettes
+- Light burst effects
+- Environmental drama
+
+## Best For
+
+- Pivotal discoveries
+- Conflict scenes
+- Climactic moments
+- Breakthrough realizations
+- Emotional confrontations
+- Historical turning points
+
+## Combination Notes
+
+Works especially well with:
+- realistic: powerful drama
+- ink-brush: martial arts climax
+- ligne-claire: historical pivots
+- manga: shonen battles
+
+Avoid with: chalk (style mismatch)
diff --git a/skills/creative/baoyu-comic/references/tones/energetic.md b/skills/creative/baoyu-comic/references/tones/energetic.md
new file mode 100644
index 000000000..257e8c6e3
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/energetic.md
@@ -0,0 +1,105 @@
+# energetic
+
+活力基调 - Bright, dynamic, exciting
+
+## Overview
+
+High-energy atmosphere for exciting, discovery-filled content. Bright colors, dynamic compositions, and movement create engaging visuals for younger audiences.
+
+## Mood Characteristics
+
+- Excitement and wonder
+- Discovery and learning
+- Energy and enthusiasm
+- Movement and action
+- Youthful spirit
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | High (vibrant) |
+| Contrast | Medium-high |
+| Temperature | Variable, punchy |
+| Brightness | Bright, clean |
+
+## Color Palette
+
+Shift toward vibrant tones:
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary Red | Bright red | #F56565 |
+| Primary Yellow | Sunny yellow | #F6E05E |
+| Primary Blue | Sky blue | #63B3ED |
+| Accent 1 | Magenta | #D53F8C |
+| Accent 2 | Lime green | #68D391 |
+| Background | Clean white | #FFFFFF |
+| Background Alt | Bright pastels | Various |
+
+## Lighting
+
+- Bright, clear lighting
+- Clean shadows
+- High energy
+- Spotlight effects for emphasis
+- Dynamic light sources
+
+## Dynamic Elements
+
+**Energy effects** (add to compositions):
+
+| Element | Usage |
+|---------|-------|
+| Speed lines | Motion, excitement |
+| Sparkles | Discoveries |
+| Burst effects | Aha moments |
+| Motion blur | Fast action |
+| Star bursts | Emphasis |
+| Sweat drops | Effort/surprise |
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Excitement | Wide eyes, big smile |
+| Surprise | Dramatic reaction |
+| Determination | Intense focus |
+| Wonder | Sparkling eyes |
+
+## Composition
+
+- Dynamic angles
+- Action-oriented layouts
+- Movement emphasis
+- Clean, punchy designs
+- Energy flows
+
+## Visual Style
+
+- Expressive, animated characters
+- Wide eyes, big reactions
+- Dynamic poses
+- Motion and action focus
+- Simplified backgrounds for energy
+
+## Best For
+
+- Science explanations
+- "Aha" moments
+- Young audience content
+- Discovery narratives
+- Learning adventures
+- Action tutorials
+
+## Combination Notes
+
+Works especially well with:
+- manga: shonen energy
+- chalk: fun education
+
+Avoid with:
+- realistic: style mismatch
+- ink-brush: style mismatch
diff --git a/skills/creative/baoyu-comic/references/tones/neutral.md b/skills/creative/baoyu-comic/references/tones/neutral.md
new file mode 100644
index 000000000..db1f7a3c5
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/neutral.md
@@ -0,0 +1,63 @@
+# neutral
+
+中性基调 - Balanced, rational, educational
+
+## Overview
+
+Default balanced tone suitable for educational and informative content. Neither overly emotional nor cold - creates accessible, professional atmosphere.
+
+## Mood Characteristics
+
+- Balanced emotional register
+- Clear, rational presentation
+- Educational focus
+- Professional but approachable
+- Objective storytelling
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Standard (no shift) |
+| Contrast | Balanced |
+| Temperature | Neutral |
+| Brightness | Slightly bright |
+
+## Lighting
+
+- Even, clear lighting
+- Minimal dramatic shadows
+- Consistent across panels
+- Natural light sources
+- No extreme contrast
+
+## Emotional Range
+
+| Emotion | Expression Level |
+|---------|-----------------|
+| Joy | Moderate smile |
+| Concern | Thoughtful expression |
+| Surprise | Mild widening of eyes |
+| Frustration | Slight frown |
+
+## Composition
+
+- Balanced panel layouts
+- Clear focal points
+- Readable hierarchies
+- Standard framing
+- Functional compositions
+
+## Best For
+
+- Educational content
+- Technical tutorials
+- Informative biographies
+- Documentary style
+- Professional topics
+
+## Usage Notes
+
+Neutral is the default tone. Combine with any art style for baseline professional output. Most versatile tone option.
diff --git a/skills/creative/baoyu-comic/references/tones/romantic.md b/skills/creative/baoyu-comic/references/tones/romantic.md
new file mode 100644
index 000000000..396439d9e
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/romantic.md
@@ -0,0 +1,100 @@
+# romantic
+
+浪漫基调 - Soft, beautiful, emotionally delicate
+
+## Overview
+
+Soft, dreamy atmosphere for romantic and emotionally delicate content. Features decorative elements, sparkles, and beautiful compositions that emphasize feeling and beauty.
+
+## Mood Characteristics
+
+- Romance and love
+- Beauty and elegance
+- Emotional delicacy
+- Dreams and hopes
+- Youth and idealism
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Soft pastels |
+| Contrast | Low, gentle |
+| Temperature | Slightly warm pink |
+| Brightness | Soft, glowing |
+
+## Color Palette
+
+Shift toward romantic tones:
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Soft pink | #FFB6C1 |
+| Secondary | Lavender | #E6E6FA |
+| Accent | Rose | #FF69B4 |
+| Highlight | Pearl white | #FFFAF0 |
+| Gold | Gold sparkle | #FFD700 |
+| Skin | Porcelain | #FFF5EE |
+| Blush | Soft blush | #FFE4E1 |
+| Background | Soft cream | #FFF8DC |
+
+## Lighting
+
+- Soft, diffused light
+- Glowing effects
+- Backlighting halos
+- Sparkle highlights
+- Dreamy atmospheres
+
+## Decorative Elements
+
+**Essential decorations** (add to compositions):
+
+| Element | Usage |
+|---------|-------|
+| Flower petals | Floating, framing |
+| Sparkles | Emotional highlights |
+| Bubbles | Dreamy moments |
+| Feathers | Gentle floating |
+| Stars | Night scenes, wonder |
+| Hearts | Love emphasis |
+| Light halos | Character highlights |
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Love | Soft gaze, blush |
+| Longing | Distant, beautiful sadness |
+| Joy | Radiant smile, sparkles |
+| Shyness | Downcast eyes, blush |
+
+## Composition
+
+- Elegant, flowing layouts
+- Soft focus backgrounds
+- Characters framed by decorations
+- Beautiful angles (3/4 profiles)
+- Screen tone gradients
+
+## Best For
+
+- Romance stories
+- Coming-of-age
+- Friendship narratives
+- Emotional drama
+- School life
+- Beautiful moments
+
+## Combination Notes
+
+Works especially well with:
+- manga: classic shoujo style
+
+Avoid with:
+- realistic: style mismatch
+- ink-brush: style mismatch
+- ligne-claire: style mismatch
+- chalk: style mismatch
diff --git a/skills/creative/baoyu-comic/references/tones/vintage.md b/skills/creative/baoyu-comic/references/tones/vintage.md
new file mode 100644
index 000000000..32250024b
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/vintage.md
@@ -0,0 +1,104 @@
+# vintage
+
+复古基调 - Historical, aged, period authenticity
+
+## Overview
+
+Historical atmosphere with aged paper effects and period-appropriate aesthetics. Creates sense of time, authenticity, and historical distance.
+
+## Mood Characteristics
+
+- Historical authenticity
+- Period distance
+- Archival quality
+- Time and memory
+- Classical elegance
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Reduced, muted |
+| Contrast | Medium, aged |
+| Temperature | Sepia shift |
+| Brightness | Slightly faded |
+
+## Color Palette
+
+Shift toward aged tones:
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Sepia brown | #8B7355 |
+| Background | Aged paper | #F5E6D3 |
+| Accent 1 | Faded teal | #6B8E8E |
+| Accent 2 | Muted burgundy | #7B3F3F |
+| Ink | Aged black | #3D3D3D |
+| Yellowed | Paper yellow | #F5DEB3 |
+
+## Visual Effects
+
+**Aging effects** (apply subtly):
+
+| Effect | Application |
+|--------|-------------|
+| Paper aging | Background texture |
+| Faded edges | Vignette effect |
+| Dust specks | Subtle overlay |
+| Yellowing | Color shift |
+| Wear marks | Corner/edge details |
+
+## Period Elements
+
+- Historical typography
+- Period-accurate details
+- Archival presentation
+- Classical compositions
+- Formal framing
+
+## Lighting
+
+- Natural, period-appropriate
+- Oil lamp/candle warmth
+- Soft, diffused light
+- Indoor historical lighting
+- Photographic quality
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Dignity | Formal, composed |
+| Sorrow | Restrained, elegant |
+| Pride | Classical posture |
+| Wisdom | Aged grace |
+
+## Composition
+
+- Classical framing
+- Formal compositions
+- Period-appropriate staging
+- Documentary style
+- Historical accuracy priority
+
+## Best For
+
+- Pre-1950s stories
+- Classical science history
+- Historical biographies
+- Period pieces
+- Documentary comics
+- Archival narratives
+
+## Combination Notes
+
+Works especially well with:
+- realistic: period drama
+- ligne-claire: historical adventure
+- ink-brush: classical Asian stories
+
+Avoid with:
+- manga: style mismatch (too modern)
+- chalk: style mismatch (modern educational)
diff --git a/skills/creative/baoyu-comic/references/tones/warm.md b/skills/creative/baoyu-comic/references/tones/warm.md
new file mode 100644
index 000000000..11b24aeef
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/warm.md
@@ -0,0 +1,94 @@
+# warm
+
+温馨基调 - Nostalgic, personal, comforting
+
+## Overview
+
+Warm, inviting atmosphere for personal stories and nostalgic content. Creates emotional connection through cozy aesthetics and comforting visuals.
+
+## Mood Characteristics
+
+- Nostalgic feeling
+- Personal, intimate atmosphere
+- Comforting and healing
+- Memory and reflection
+- Gentle emotional warmth
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Slightly reduced |
+| Contrast | Softer |
+| Temperature | Warm shift (+15%) |
+| Brightness | Soft, golden |
+
+## Color Temperature
+
+Shift palette toward warm tones:
+
+| Original | Warm Shift |
+|----------|-----------|
+| Cool blue | Soft teal |
+| Pure white | Cream |
+| Gray | Warm gray |
+| Black | Soft charcoal |
+
+## Accent Colors
+
+- Golden yellow (#D69E2E)
+- Soft orange (#DD6B20)
+- Warm brown (#8B6F47)
+- Sunset tones
+
+## Lighting
+
+- Golden hour lighting
+- Soft, diffused light
+- Warm indoor glow
+- Candle/lamp warmth
+- Gentle shadows
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Joy | Genuine warm smile |
+| Sadness | Gentle melancholy |
+| Love | Soft, tender expressions |
+| Memory | Distant, reflective gaze |
+
+## Composition
+
+- Intimate framing
+- Cozy environments
+- Soft focus backgrounds
+- Welcoming spaces
+- Personal moments highlighted
+
+## Visual Elements
+
+- Warm light rays
+- Soft edges
+- Nostalgic props (old photos, keepsakes)
+- Comfort objects (blankets, tea cups)
+- Nature elements (autumn leaves, sunset)
+
+## Best For
+
+- Personal stories
+- Childhood memories
+- Mentorship narratives
+- Family histories
+- Gentle biographies
+- Healing journeys
+
+## Combination Notes
+
+Works especially well with:
+- ligne-claire: nostalgic European comics
+- realistic: touching human stories
+- manga: slice-of-life warmth
+- chalk: nostalgic education
diff --git a/skills/creative/baoyu-comic/references/workflow.md b/skills/creative/baoyu-comic/references/workflow.md
new file mode 100644
index 000000000..f98109374
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/workflow.md
@@ -0,0 +1,401 @@
+# Complete Workflow
+
+Full workflow for generating knowledge comics.
+
+## Progress Checklist
+
+Copy and track progress:
+
+```
+Comic Progress:
+- [ ] Step 1: Setup & Analyze
+  - [ ] 1.1 Analyze content
+  - [ ] 1.2 Check existing ⚠️ REQUIRED
+- [ ] Step 2: Confirmation - Style & options ⚠️ REQUIRED
+- [ ] Step 3: Generate storyboard + characters
+- [ ] Step 4: Review outline (conditional)
+- [ ] Step 5: Generate prompts
+- [ ] Step 6: Review prompts (conditional)
+- [ ] Step 7: Generate images
+  - [ ] 7.1 Character sheet (if needed)
+  - [ ] 7.2 Generate pages
+- [ ] Step 8: Completion report
+```
+
+## Flow Diagram
+
+```
+Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Storyboard → [Review Outline?] → Prompts → [Review Prompts?] → Images → Complete
+```
+
+---
+
+## Step 1: Setup & Analyze
+
+### 1.1 Analyze Content → `analysis.md`
+
+Read source content, save it if needed, and perform deep analysis.
+
+**Actions**:
+1. **Save source content** (if not already a file):
+   - If user provides a file path: use as-is
+   - If user pastes content: save to `source-{slug}.md` in the target directory using `write_file`, where `{slug}` is the kebab-case topic slug used for the output directory
+   - **Backup rule**: If `source-{slug}.md` already exists, rename it to `source-{slug}-backup-YYYYMMDD-HHMMSS.md` before writing
+2. Read source content
+3. **Deep analysis** following `analysis-framework.md`:
+   - Target audience identification
+   - Value proposition for readers
+   - Core themes and narrative potential
+   - Key figures and their story arcs
+4. Detect source language
+5. **Determine language**:
+   - If user specified a language → use it
+   - Else → use detected source language or user's conversation language
+6. Determine recommended page count:
+   - Short story: 5-8 pages
+   - Medium complexity: 9-15 pages
+   - Full biography: 16-25 pages
+7. Analyze content signals for art/tone/layout recommendations
+8. **Save to `analysis.md`** using `write_file`
+
+**analysis.md Format**: YAML front matter (title, topic, time_span, source_language, user_language, aspect_ratio, recommended_page_count, recommended_art, recommended_tone) + sections for Target Audience, Value Proposition, Core Themes, Key Figures & Story Arcs, Content Signals, Recommended Approaches. See `analysis-framework.md` for full template.
+
+### 1.2 Check Existing Content ⚠️ REQUIRED
+
+**MUST execute before proceeding to Step 2.**
+
+Check if the output directory exists (e.g., via `test -d "comic/{topic-slug}"`).
+
+**If directory exists**, use `clarify`:
+
+```
+question: "Existing content found at comic/{topic-slug}. How to proceed?"
+options:
+  - "Regenerate storyboard — Keep images, regenerate storyboard and characters only"
+  - "Regenerate images — Keep storyboard, regenerate images only"
+  - "Backup and regenerate — Backup to {slug}-backup-{timestamp}, then regenerate all"
+  - "Exit — Cancel, keep existing content unchanged"
+```
+
+Save result and handle accordingly:
+- **Regenerate storyboard**: Skip to Step 3, preserve `prompts/` and images
+- **Regenerate images**: Skip to Step 7, use existing prompts
+- **Backup and regenerate**: Move directory, start fresh from Step 2
+- **Exit**: End workflow immediately
+
+---
+
+## Step 2: Confirmation - Style & Options ⚠️
+
+**Purpose**: Select visual style + decide whether to review outline before generation. **Do NOT skip.**
+
+**Display summary first**:
+- Content type + topic identified
+- Key figures extracted
+- Time span detected
+- Recommended page count
+- Language (detected or user-specified)
+- **Recommended style**: [art] + [tone] (based on content signals)
+
+**Use `clarify` one question at a time**, in priority order:
+
+> **Timeout handling (CRITICAL)**: if `clarify` returns `"The user did not provide a response within the time limit. Use your best judgement..."`, that is a per-question default, NOT blanket consent. Continue to the next question in the sequence — do not bail out of Step 2. Then, in your next user-visible message, explicitly surface every default that was taken (e.g. `"Defaulted style → ohmsha, narrative focus → concept explanation, audience → developers (clarify timed out on all three). Say the word to redirect."`). An unreported default is indistinguishable to the user from "the agent never asked."
+
+### Question 1: Visual Style
+
+If a preset is recommended (see `auto-selection.md`), show it first:
+
+```
+question: "Which visual style for this comic?"
+options:
+  - "[preset name] preset (Recommended) — [preset description] with special rules"
+  - "[recommended art] + [recommended tone] (Recommended) — Best match for your content"
+  - "ligne-claire + neutral — Classic educational, Logicomix style"
+  - "ohmsha preset — Educational manga with visual metaphors, gadgets, NO talking heads"
+  - "Custom — Specify your own art + tone or preset"
+```
+
+**Preset vs Art+Tone**: Presets include special rules beyond art+tone. `ohmsha` = manga + neutral + visual metaphor rules + character roles + NO talking heads. Plain `manga + neutral` does NOT include these rules.
+
+### Question 2: Narrative Focus
+
+```
+question: "What should the comic emphasize? (Pick the primary focus; mention others in a follow-up if needed)"
+options:
+  - "Biography/life story — Follow a person's journey through key life events"
+  - "Concept explanation — Break down complex ideas visually"
+  - "Historical event — Dramatize important historical moments"
+  - "Tutorial/how-to — Step-by-step educational guide"
+```
+
+### Question 3: Target Audience
+
+```
+question: "Who is the primary reader?"
+options:
+  - "General readers — Broad appeal, accessible content"
+  - "Students/learners — Educational focus, clear explanations"
+  - "Industry professionals — Technical depth, domain knowledge"
+  - "Children/young readers — Simplified language, engaging visuals"
+```
+
+### Question 4: Outline Review
+
+```
+question: "Do you want to review the outline before image generation?"
+options:
+  - "Yes, let me review (Recommended) — Review storyboard and characters before generating images"
+  - "No, generate directly — Skip outline review, start generating immediately"
+```
+
+### Question 5: Prompt Review
+
+```
+question: "Review prompts before generating images?"
+options:
+  - "Yes, review prompts (Recommended) — Review image generation prompts before generating"
+  - "No, skip prompt review — Proceed directly to image generation"
+```
+
+**After responses**:
+1. Update `analysis.md` with user preferences
+2. **Store `skip_outline_review`** flag based on Question 4 response
+3. **Store `skip_prompt_review`** flag based on Question 5 response
+4. → Step 3
+
+---
+
+## Step 3: Generate Storyboard + Characters
+
+Create storyboard and character definitions using the confirmed style from Step 2.
+
+**Loading Style References**:
+- Art style: `art-styles/{art}.md`
+- Tone: `tones/{tone}.md`
+- If preset (ohmsha/wuxia/shoujo/concept-story/four-panel): also load `presets/{preset}.md`
+
+**Generate**:
+
+1. **Storyboard** (`storyboard.md`):
+   - YAML front matter with art_style, tone, layout, aspect_ratio
+   - Cover design
+   - Each page: layout, panel breakdown, visual prompts
+   - **Written in user's preferred language** (from Step 1)
+   - Reference: `storyboard-template.md`
+   - **If using preset**: Load and apply preset rules from `presets/`
+
+2. **Character definitions** (`characters/characters.md`):
+   - Visual specs matching the art style (in user's preferred language)
+   - Include Reference Sheet Prompt for later image generation
+   - Reference: `character-template.md`
+   - **If using ohmsha preset**: Use default Doraemon characters (see below)
+
+**Ohmsha Default Characters** (use these unless user specifies custom characters):
+
+| Role | Character | Visual Description |
+|------|-----------|-------------------|
+| Student | 大雄 (Nobita) | Japanese boy, 10yo, round glasses, black hair parted in middle, yellow shirt, navy shorts |
+| Mentor | 哆啦 A 梦 (Doraemon) | Round blue robot cat, big white eyes, red nose, whiskers, white belly with 4D pocket, golden bell, no ears |
+| Challenge | 胖虎 (Gian) | Stocky boy, rough features, small eyes, orange shirt |
+| Support | 静香 (Shizuka) | Cute girl, black short hair, pink dress, gentle expression |
+
+These are the canonical ohmsha-style characters. Do NOT create custom characters for ohmsha unless explicitly requested.
+
+**After generation**:
+- If `skip_outline_review` is true → Skip Step 4, go directly to Step 5
+- If `skip_outline_review` is false → Continue to Step 4
+
+---
+
+## Step 4: Review Outline (Conditional)
+
+**Skip this step** if user selected "No, generate directly" in Step 2.
+
+**Purpose**: User reviews and confirms storyboard + characters before generation.
+
+**Display**:
+- Page count and structure
+- Art style + Tone combination
+- Page-by-page summary (Cover → P1 → P2...)
+- Character list with brief descriptions
+
+**Use `clarify`**:
+
+```
+question: "Ready to generate images with this outline?"
+options:
+  - "Yes, proceed (Recommended) — Generate character sheet and comic pages"
+  - "Edit storyboard first — I'll modify storyboard.md before continuing"
+  - "Edit characters first — I'll modify characters/characters.md before continuing"
+  - "Edit both — I'll modify both files before continuing"
+```
+
+**After response**:
+1. If user wants to edit → Wait for user to finish editing, then ask again
+2. If user confirms → Continue to Step 5
+
+---
+
+## Step 5: Generate Prompts
+
+Create image generation prompts for all pages.
+
+**Style Reference Loading**:
+- Read `art-styles/{art}.md` for rendering guidelines
+- Read `tones/{tone}.md` for mood/color adjustments
+- If preset: Read `presets/{preset}.md` for special rules
+
+**For each page (cover + pages)**:
+1. Create prompt following art style + tone guidelines
+2. **Embed character descriptions** inline (copy relevant traits from `characters/characters.md`) — `image_generate` is prompt-only, so the prompt text is the sole vehicle for character consistency
+3. Save to `prompts/NN-{cover|page}-[slug].md` using `write_file`
+   - **Backup rule**: If prompt file exists, rename to `prompts/NN-{cover|page}-[slug]-backup-YYYYMMDD-HHMMSS.md`
+
+**Prompt File Format**:
+```markdown
+# Page NN: [Title]
+
+## Visual Style
+Art: [art style] | Tone: [tone] | Layout: [layout type]
+
+## Character Reference (embedded inline — maintain exact traits below)
+- [Character A]: [detailed visual traits from characters/characters.md]
+- [Character B]: [detailed visual traits from characters/characters.md]
+
+## Panel Breakdown
+[From storyboard.md - panel descriptions, actions, dialogue]
+
+## Generation Prompt
+[Combined prompt passed to image_generate]
+```
+
+**After generation**:
+- If `skip_prompt_review` is true → Skip Step 6, go directly to Step 7
+- If `skip_prompt_review` is false → Continue to Step 6
+
+---
+
+## Step 6: Review Prompts (Conditional)
+
+**Skip this step** if user selected "No, skip prompt review" in Step 2.
+
+**Purpose**: User reviews and confirms prompts before image generation.
+
+**Display prompt summary table**:
+
+| Page | Title | Key Elements |
+|------|-------|--------------|
+| Cover | [title] | [main visual] |
+| P1 | [title] | [key elements] |
+| ... | ... | ... |
+
+**Use `clarify`**:
+
+```
+question: "Ready to generate images with these prompts?"
+options:
+  - "Yes, proceed (Recommended) — Generate all comic page images"
+  - "Edit prompts first — I'll modify prompts/*.md before continuing"
+  - "Regenerate prompts — Regenerate all prompts with different approach"
+```
+
+**After response**:
+1. If user wants to edit → Wait for user to finish editing, then ask again
+2. If user wants to regenerate → Go back to Step 5
+3. If user confirms → Continue to Step 7
+
+---
+
+## Step 7: Generate Images
+
+With confirmed prompts from Step 5/6, use the `image_generate` tool. The tool accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`) and **returns a URL** — it does not accept reference images and does not write local files. Every invocation must be followed by a download step.
+
+**Aspect ratio mapping** — map the storyboard's `aspect_ratio` to the tool's enum:
+
+| Storyboard ratio | `image_generate` format |
+|------------------|-------------------------|
+| `3:4`, `9:16`, `2:3` | `portrait` |
+| `4:3`, `16:9`, `3:2` | `landscape` |
+| `1:1` | `square` |
+
+**Download procedure** (run after every successful `image_generate` call):
+
+1. Extract the `url` field from the tool result
+2. Fetch it to disk, e.g. `curl -fsSL "<url>" -o comic/{slug}/<target>.png`
+3. Verify the file is non-empty (`test -s <target>.png`); on failure, retry the generation once
+
+### 7.1 Generate Character Reference Sheet (conditional)
+
+Character sheet is recommended for multi-page comics with recurring characters, but **NOT required** for all presets.
+
+**When to generate**:
+
+| Condition | Action |
+|-----------|--------|
+| Multi-page comic with detailed/recurring characters | Generate character sheet (recommended) |
+| Preset with simplified characters (e.g., four-panel minimalist) | Skip — prompt descriptions are sufficient |
+| Single-page comic | Skip unless characters are complex |
+
+**When generating**:
+1. Use Reference Sheet Prompt from `characters/characters.md`
+2. **Backup rule**: If `characters/characters.png` exists, rename to `characters/characters-backup-YYYYMMDD-HHMMSS.png`
+3. Call `image_generate` with `landscape` format
+4. Download the returned URL → save to `characters/characters.png`
+
+**Important**: the downloaded sheet is a **human-facing review artifact** (so the user can visually verify character design) and a reference for later regenerations or manual prompt edits. It does **not** drive Step 7.2 — page prompts were already written in Step 5 from the text descriptions in `characters/characters.md`. `image_generate` cannot accept images as visual input, so the text is the sole cross-page consistency mechanism.
+
+### 7.2 Generate Comic Pages
+
+**Before generating any page**:
+1. Confirm each prompt file exists at `prompts/NN-{cover|page}-[slug].md`
+2. Confirm that each prompt has character descriptions embedded inline (see Step 5). `image_generate` is prompt-only, so the prompt text is the sole consistency mechanism.
+
+**Page Generation Strategy**: every page prompt must embed character descriptions (sourced from `characters/characters.md`) inline. This is done during Step 5, uniformly whether or not the PNG sheet was produced in 7.1 — the PNG is only a review/regeneration aid, never a generation input.
+
+**Example embedded prompt** (`prompts/01-page-xxx.md`):
+
+```markdown
+# Page 01: [Title]
+
+## Character Reference (embedded inline — maintain consistency)
+- 大雄：Japanese boy, round glasses, yellow shirt, navy shorts, worried expression...
+- 哆啦 A 梦：Round blue robot cat, white belly, red nose, golden bell, 4D pocket...
+
+## Page Content
+[Original page prompt body — panels, dialogue, visual metaphors]
+```
+
+**For each page (cover + pages)**:
+1. Read prompt from `prompts/NN-{cover|page}-[slug].md`
+2. **Backup rule**: If image file exists, rename to `NN-{cover|page}-[slug]-backup-YYYYMMDD-HHMMSS.png`
+3. Call `image_generate` with the prompt text and mapped aspect ratio
+4. Download the returned URL → save to `NN-{cover|page}-[slug].png`
+5. Report progress after each generation: "Generated X/N: [page title]"
+
+---
+
+## Step 8: Completion Report
+
+```
+Comic Complete!
+Title: [title] | Art: [art] | Tone: [tone] | Pages: [count] | Aspect: [ratio] | Language: [lang]
+Location: [path]
+✓ source-{slug}.md (if content was pasted)
+✓ analysis.md
+✓ characters.png (if generated)
+✓ 00-cover-[slug].png ... NN-page-[slug].png
+```
+
+---
+
+## Page Modification
+
+| Action | Steps |
+|--------|-------|
+| **Edit** | Update prompt → Regenerate image → Download new PNG |
+| **Add** | Create prompt at position → Generate image → Download PNG → Renumber subsequent (NN+1) → Update storyboard |
+| **Delete** | Remove files → Renumber subsequent (NN-1) → Update storyboard |
+
+**File naming**: `NN-{cover|page}-[slug].png` (e.g., `03-page-enigma-machine.png`)
+- Slugs: kebab-case, unique, derived from content
+- Renumbering: Update NN prefix only, slugs unchanged
diff --git a/skills/mlops/inference/llama-cpp/SKILL.md b/skills/mlops/inference/llama-cpp/SKILL.md
index 33fc37adb..0844e4d5a 100644
--- a/skills/mlops/inference/llama-cpp/SKILL.md
+++ b/skills/mlops/inference/llama-cpp/SKILL.md
@@ -1,218 +1,113 @@
 ---
 name: llama-cpp
-description: Run LLM inference with llama.cpp on CPU, Apple Silicon, AMD/Intel GPUs, or NVIDIA — plus GGUF model conversion and quantization (2–8 bit with K-quants and imatrix). Covers CLI, Python bindings, OpenAI-compatible server, and Ollama/LM Studio integration. Use for edge deployment, M1/M2/M3/M4 Macs, CUDA-less environments, or flexible local quantization.
-version: 2.0.0
+description: llama.cpp local GGUF inference + HF Hub model discovery.
+version: 2.1.2
 author: Orchestra Research
 license: MIT
 dependencies: [llama-cpp-python>=0.2.0]
 metadata:
   hermes:
-    tags: [llama.cpp, GGUF, Quantization, CPU Inference, Apple Silicon, Edge Deployment, Non-NVIDIA, AMD GPUs, Intel GPUs, Embedded, Model Compression]
+    tags: [llama.cpp, GGUF, Quantization, Hugging Face Hub, CPU Inference, Apple Silicon, Edge Deployment, AMD GPUs, Intel GPUs, NVIDIA, URL-first]
 ---
 
 # llama.cpp + GGUF
 
-Pure C/C++ LLM inference with minimal dependencies, plus the GGUF (GPT-Generated Unified Format) standard used for quantized weights. One toolchain covers conversion, quantization, and serving.
+Use this skill for local GGUF inference, quant selection, or Hugging Face repo discovery for llama.cpp.
 
 ## When to use
 
-**Use llama.cpp + GGUF when:**
-- Running on CPU-only machines or Apple Silicon (M1/M2/M3/M4) with Metal acceleration
-- Using AMD (ROCm) or Intel GPUs where CUDA isn't available
-- Edge deployment (Raspberry Pi, embedded systems, consumer laptops)
-- Need flexible quantization (2–8 bit with K-quants)
-- Want local AI tools (LM Studio, Ollama, text-generation-webui, koboldcpp)
-- Want a single binary deploy without Docker/Python
+- Run local models on CPU, Apple Silicon, CUDA, ROCm, or Intel GPUs
+- Find the right GGUF for a specific Hugging Face repo
+- Build a `llama-server` or `llama-cli` command from the Hub
+- Search the Hub for models that already support llama.cpp
+- Enumerate available `.gguf` files and sizes for a repo
+- Decide between Q4/Q5/Q6/IQ variants for the user's RAM or VRAM
 
-**Key advantages:**
-- Universal hardware: CPU, Apple Silicon, NVIDIA, AMD, Intel
-- No Python runtime required (pure C/C++)
-- K-quants + imatrix for better low-bit quality
-- OpenAI-compatible server built in
-- Rich ecosystem (Ollama, LM Studio, llama-cpp-python)
+## Model Discovery workflow
 
-**Use alternatives instead:**
-- **vLLM** — NVIDIA GPUs, PagedAttention, Python-first, max throughput
-- **TensorRT-LLM** — Production NVIDIA (A100/H100), maximum speed
-- **AWQ/GPTQ** — Calibrated quantization for NVIDIA-only deployments
-- **bitsandbytes** — Simple HuggingFace transformers integration
-- **HQQ** — Fast calibration-free quantization
+Prefer URL workflows before asking for `hf`, Python, or custom scripts.
+
+1. Search for candidate repos on the Hub:
+   - Base: `https://huggingface.co/models?apps=llama.cpp&sort=trending`
+   - Add `search=<term>` for a model family
+   - Add `num_parameters=min:0,max:24B` or similar when the user has size constraints
+2. Open the repo with the llama.cpp local-app view:
+   - `https://huggingface.co/<repo>?local-app=llama.cpp`
+3. Treat the local-app snippet as the source of truth when it is visible:
+   - copy the exact `llama-server` or `llama-cli` command
+   - report the recommended quant exactly as HF shows it
+4. Read the same `?local-app=llama.cpp` URL as page text or HTML and extract the section under `Hardware compatibility`:
+   - prefer its exact quant labels and sizes over generic tables
+   - keep repo-specific labels such as `UD-Q4_K_M` or `IQ4_NL_XL`
+   - if that section is not visible in the fetched page source, say so and fall back to the tree API plus generic quant guidance
+5. Query the tree API to confirm what actually exists:
+   - `https://huggingface.co/api/models/<repo>/tree/main?recursive=true`
+   - keep entries where `type` is `file` and `path` ends with `.gguf`
+   - use `path` and `size` as the source of truth for filenames and byte sizes
+   - separate quantized checkpoints from `mmproj-*.gguf` projector files and `BF16/` shard files
+   - use `https://huggingface.co/<repo>/tree/main` only as a human fallback
+6. If the local-app snippet is not text-visible, reconstruct the command from the repo plus the chosen quant:
+   - shorthand quant selection: `llama-server -hf <repo>:<QUANT>`
+   - exact-file fallback: `llama-server --hf-repo <repo> --hf-file <filename.gguf>`
+7. Only suggest conversion from Transformers weights if the repo does not already expose GGUF files.
 
 ## Quick start
 
-### Install
+### Install llama.cpp
 
 ```bash
 # macOS / Linux (simplest)
 brew install llama.cpp
+```
 
-# Or build from source
+```bash
+winget install llama.cpp
+```
+
+```bash
 git clone https://github.com/ggml-org/llama.cpp
 cd llama.cpp
-make                        # CPU
-make GGML_METAL=1           # Apple Silicon
-make GGML_CUDA=1            # NVIDIA CUDA
-make LLAMA_HIP=1            # AMD ROCm
-
-# Python bindings (optional)
-pip install llama-cpp-python
-# With CUDA:   CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
-# With Metal:  CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
+cmake -B build
+cmake --build build --config Release
 ```
 
-### Download a pre-quantized GGUF
+### Run directly from the Hugging Face Hub
 
 ```bash
-# TheBloke hosts most popular models pre-quantized
-huggingface-cli download \
-    TheBloke/Llama-2-7B-Chat-GGUF \
-    llama-2-7b-chat.Q4_K_M.gguf \
-    --local-dir models/
+llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
 ```
 
-### Or convert a HuggingFace model to GGUF
-
 ```bash
-# 1. Download HF model
-huggingface-cli download meta-llama/Llama-3.1-8B --local-dir ./llama-3.1-8b
-
-# 2. Convert to FP16 GGUF
-python convert_hf_to_gguf.py ./llama-3.1-8b \
-    --outfile llama-3.1-8b-f16.gguf \
-    --outtype f16
-
-# 3. Quantize to Q4_K_M
-./llama-quantize llama-3.1-8b-f16.gguf llama-3.1-8b-q4_k_m.gguf Q4_K_M
+llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
 ```
 
-### Run inference
+### Run an exact GGUF file from the Hub
+
+Use this when the tree API shows custom file naming or the exact HF snippet is missing.
 
 ```bash
-# One-shot prompt
-./llama-cli -m model.Q4_K_M.gguf -p "Explain quantum computing" -n 256
-
-# Interactive chat
-./llama-cli -m model.Q4_K_M.gguf --interactive
-
-# With GPU offload
-./llama-cli -m model.Q4_K_M.gguf -ngl 35 -p "Hello!"
+llama-server \
+    --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \
+    --hf-file Phi-3-mini-4k-instruct-q4.gguf \
+    -c 4096
 ```
 
-### Serve an OpenAI-compatible API
-
-```bash
-./llama-server \
-    -m model.Q4_K_M.gguf \
-    --host 0.0.0.0 \
-    --port 8080 \
-    -ngl 35 \
-    -c 4096 \
-    --parallel 4 \
-    --cont-batching
-```
+### OpenAI-compatible server check
 
 ```bash
 curl http://localhost:8080/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "local",
-    "messages": [{"role": "user", "content": "Hello!"}],
-    "temperature": 0.7,
-    "max_tokens": 100
+    "messages": [
+      {"role": "user", "content": "Write a limerick about Python exceptions"}
+    ]
   }'
 ```
 
-## Quantization formats (GGUF)
-
-### K-quant methods (recommended)
-
-| Type | Bits | Size (7B) | Quality | Use Case |
-|------|------|-----------|---------|----------|
-| Q2_K | 2.5 | ~2.8 GB | Low | Extreme compression (testing only) |
-| Q3_K_S | 3.0 | ~3.0 GB | Low-Med | Memory constrained |
-| Q3_K_M | 3.3 | ~3.3 GB | Medium | Fits small devices |
-| Q4_K_S | 4.0 | ~3.8 GB | Med-High | Speed critical |
-| **Q4_K_M** | 4.5 | ~4.1 GB | High | **Recommended default** |
-| Q5_K_S | 5.0 | ~4.6 GB | High | Quality focused |
-| Q5_K_M | 5.5 | ~4.8 GB | Very High | High quality |
-| Q6_K | 6.0 | ~5.5 GB | Excellent | Near-original |
-| Q8_0 | 8.0 | ~7.2 GB | Best | Maximum quality, minimal degradation |
-
-**Variant suffixes** — `_S` (Small, faster, lower quality), `_M` (Medium, balanced), `_L` (Large, better quality).
-
-**Legacy (Q4_0/Q4_1/Q5_0/Q5_1) exist** but always prefer K-quants for better quality/size ratio.
-
-**IQ quantization** — ultra-low-bit with importance-aware methods: IQ2_XXS, IQ2_XS, IQ2_S, IQ3_XXS, IQ3_XS, IQ3_S, IQ4_XS. Require `--imatrix`.
-
-**Task-specific defaults:**
-- General chat / assistants: Q4_K_M, or Q5_K_M if RAM allows
-- Code generation: Q5_K_M or Q6_K (higher precision helps)
-- Technical / medical: Q6_K or Q8_0
-- Very large (70B, 405B) on consumer hardware: Q3_K_M or Q4_K_S
-- Raspberry Pi / edge: Q2_K or Q3_K_S
-
-## Conversion workflows
-
-### Basic: HF → GGUF → quantized
-
-```bash
-python convert_hf_to_gguf.py ./model --outfile model-f16.gguf --outtype f16
-./llama-quantize model-f16.gguf model-q4_k_m.gguf Q4_K_M
-./llama-cli -m model-q4_k_m.gguf -p "Hello!" -n 50
-```
-
-### With importance matrix (imatrix) — better low-bit quality
-
-`imatrix` gives 10–20% perplexity improvement at Q4, essential at Q3 and below.
-
-```bash
-# 1. Convert to FP16 GGUF
-python convert_hf_to_gguf.py ./model --outfile model-f16.gguf
-
-# 2. Prepare calibration data (diverse text, ~100MB is ideal)
-cat > calibration.txt << 'EOF'
-The quick brown fox jumps over the lazy dog.
-Machine learning is a subset of artificial intelligence.
-# Add more diverse text samples...
-EOF
-
-# 3. Generate importance matrix
-./llama-imatrix -m model-f16.gguf \
-    -f calibration.txt \
-    --chunk 512 \
-    -o model.imatrix \
-    -ngl 35
-
-# 4. Quantize with imatrix
-./llama-quantize --imatrix model.imatrix \
-    model-f16.gguf model-q4_k_m.gguf Q4_K_M
-```
-
-### Multi-quant batch
-
-```bash
-#!/bin/bash
-MODEL="llama-3.1-8b-f16.gguf"
-IMATRIX="llama-3.1-8b.imatrix"
-
-./llama-imatrix -m $MODEL -f wiki.txt -o $IMATRIX -ngl 35
-
-for QUANT in Q4_K_M Q5_K_M Q6_K Q8_0; do
-    OUTPUT="llama-3.1-8b-${QUANT,,}.gguf"
-    ./llama-quantize --imatrix $IMATRIX $MODEL $OUTPUT $QUANT
-    echo "Created: $OUTPUT ($(du -h $OUTPUT | cut -f1))"
-done
-```
-
-### Quality testing (perplexity)
-
-```bash
-./llama-perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw -c 512
-# Baseline FP16: ~5.96  |  Q4_K_M: ~6.06 (+1.7%)  |  Q2_K: ~6.87 (+15.3%)
-```
-
 ## Python bindings (llama-cpp-python)
 
+`pip install llama-cpp-python` (CUDA: `CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir`; Metal: `CMAKE_ARGS="-DGGML_METAL=on" ...`).
+
 ### Basic generation
 
 ```python
@@ -221,39 +116,32 @@ from llama_cpp import Llama
 llm = Llama(
     model_path="./model-q4_k_m.gguf",
     n_ctx=4096,
-    n_gpu_layers=35,     # 0 for CPU only, 99 to offload everything
+    n_gpu_layers=35,     # 0 for CPU, 99 to offload everything
     n_threads=8,
 )
 
-output = llm(
-    "What is machine learning?",
-    max_tokens=256,
-    temperature=0.7,
-    stop=["</s>", "\n\n"],
-)
-print(output["choices"][0]["text"])
+out = llm("What is machine learning?", max_tokens=256, temperature=0.7)
+print(out["choices"][0]["text"])
 ```
 
-### Chat completion + streaming
+### Chat + streaming
 
 ```python
 llm = Llama(
     model_path="./model-q4_k_m.gguf",
     n_ctx=4096,
     n_gpu_layers=35,
-    chat_format="llama-3",    # Or "chatml", "mistral", etc.
+    chat_format="llama-3",   # or "chatml", "mistral", etc.
 )
 
-# Non-streaming
-response = llm.create_chat_completion(
+resp = llm.create_chat_completion(
     messages=[
         {"role": "system", "content": "You are a helpful assistant."},
         {"role": "user", "content": "What is Python?"},
     ],
     max_tokens=256,
-    temperature=0.7,
 )
-print(response["choices"][0]["message"]["content"])
+print(resp["choices"][0]["message"]["content"])
 
 # Streaming
 for chunk in llm("Explain quantum computing:", max_tokens=256, stream=True):
@@ -268,171 +156,93 @@ vec = llm.embed("This is a test sentence.")
 print(f"Embedding dimension: {len(vec)}")
 ```
 
-## Hardware acceleration
-
-### Apple Silicon (Metal)
-
-```bash
-make clean && make GGML_METAL=1
-./llama-cli -m model.gguf -ngl 99 -p "Hello"   # offload all layers
-```
+You can also load a GGUF straight from the Hub:
 
 ```python
-llm = Llama(
-    model_path="model.gguf",
-    n_gpu_layers=99,     # Offload everything
-    n_threads=1,         # Metal handles parallelism
+llm = Llama.from_pretrained(
+    repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
+    filename="*Q4_K_M.gguf",
+    n_gpu_layers=35,
 )
 ```
 
-Performance: M3 Max ~40–60 tok/s on Llama 2-7B Q4_K_M.
+## Choosing a quant
 
-### NVIDIA (CUDA)
+Use the Hub page first, generic heuristics second.
 
-```bash
-make clean && make GGML_CUDA=1
-./llama-cli -m model.gguf -ngl 35 -p "Hello"
+- Prefer the exact quant that HF marks as compatible for the user's hardware profile.
+- For general chat, start with `Q4_K_M`.
+- For code or technical work, prefer `Q5_K_M` or `Q6_K` if memory allows.
+- For very tight RAM budgets, consider `Q3_K_M`, `IQ` variants, or `Q2` variants only if the user explicitly prioritizes fit over quality.
+- For multimodal repos, mention `mmproj-*.gguf` separately. The projector is not the main model file.
+- Do not normalize repo-native labels. If the page says `UD-Q4_K_M`, report `UD-Q4_K_M`.
 
-# Hybrid for large models
-./llama-cli -m llama-70b.Q4_K_M.gguf -ngl 20   # GPU: 20 layers, CPU: rest
+## Extracting available GGUFs from a repo
 
-# Multi-GPU split
-./llama-cli -m large-model.gguf --tensor-split 0.5,0.5 -ngl 60
+When the user asks what GGUFs exist, return:
+
+- filename
+- file size
+- quant label
+- whether it is a main model or an auxiliary projector
+
+Ignore unless requested:
+
+- README
+- BF16 shard files
+- imatrix blobs or calibration artifacts
+
+Use the tree API for this step:
+
+- `https://huggingface.co/api/models/<repo>/tree/main?recursive=true`
+
+For a repo like `unsloth/Qwen3.6-35B-A3B-GGUF`, the local-app page can show quant chips such as `UD-Q4_K_M`, `UD-Q5_K_M`, `UD-Q6_K`, and `Q8_0`, while the tree API exposes exact file paths such as `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf` and `Qwen3.6-35B-A3B-Q8_0.gguf` with byte sizes. Use the tree API to turn a quant label into an exact filename.
+
+## Search patterns
+
+Use these URL shapes directly:
+
+```text
+https://huggingface.co/models?apps=llama.cpp&sort=trending
+https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
+https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
+https://huggingface.co/<repo>?local-app=llama.cpp
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+https://huggingface.co/<repo>/tree/main
 ```
 
-### AMD (ROCm)
+## Output format
 
-```bash
-make LLAMA_HIP=1
-./llama-cli -m model.gguf -ngl 999
+When answering discovery requests, prefer a compact structured result like:
+
+```text
+Repo: <repo>
+Recommended quant from HF: <label> (<size>)
+llama-server: <command>
+Other GGUFs:
+- <filename> - <size>
+- <filename> - <size>
+Source URLs:
+- <local-app URL>
+- <tree API URL>
 ```
 
-### CPU
-
-```bash
-# Match PHYSICAL cores, not logical
-./llama-cli -m model.gguf -t 8 -p "Hello"
-
-# BLAS acceleration (2–3× speedup)
-make LLAMA_OPENBLAS=1
-```
-
-```python
-llm = Llama(
-    model_path="model.gguf",
-    n_gpu_layers=0,
-    n_threads=8,
-    n_batch=512,         # Larger batch = faster prompt processing
-)
-```
-
-## Performance benchmarks
-
-### CPU (Llama 2-7B Q4_K_M)
-
-| CPU | Threads | Speed |
-|-----|---------|-------|
-| Apple M3 Max (Metal) | 16 | 50 tok/s |
-| AMD Ryzen 9 7950X | 32 | 35 tok/s |
-| Intel i9-13900K | 32 | 30 tok/s |
-
-### GPU offloading on RTX 4090
-
-| Layers GPU | Speed | VRAM |
-|------------|-------|------|
-| 0 (CPU only) | 30 tok/s | 0 GB |
-| 20 (hybrid) | 80 tok/s | 8 GB |
-| 35 (all) | 120 tok/s | 12 GB |
-
-## Supported models
-
-- **LLaMA family**: Llama 2 (7B/13B/70B), Llama 3 (8B/70B/405B), Code Llama
-- **Mistral family**: Mistral 7B, Mixtral 8x7B/8x22B
-- **Other**: Falcon, BLOOM, GPT-J, Phi-3, Gemma, Qwen, LLaVA (vision), Whisper (audio)
-
-Find GGUF models: https://huggingface.co/models?library=gguf
-
-## Ecosystem integrations
-
-### Ollama
-
-```bash
-cat > Modelfile << 'EOF'
-FROM ./model-q4_k_m.gguf
-TEMPLATE """{{ .System }}
-{{ .Prompt }}"""
-PARAMETER temperature 0.7
-PARAMETER num_ctx 4096
-EOF
-
-ollama create mymodel -f Modelfile
-ollama run mymodel "Hello!"
-```
-
-### LM Studio
-
-1. Place GGUF file in `~/.cache/lm-studio/models/`
-2. Open LM Studio and select the model
-3. Configure context length and GPU offload, start inference
-
-### text-generation-webui
-
-```bash
-cp model-q4_k_m.gguf text-generation-webui/models/
-python server.py --model model-q4_k_m.gguf --loader llama.cpp --n-gpu-layers 35
-```
-
-### OpenAI client → llama-server
-
-```python
-from openai import OpenAI
-
-client = OpenAI(base_url="http://localhost:8080/v1", api_key="not-needed")
-response = client.chat.completions.create(
-    model="local-model",
-    messages=[{"role": "user", "content": "Hello!"}],
-    max_tokens=256,
-)
-print(response.choices[0].message.content)
-```
-
-## Best practices
-
-1. **Use K-quants** — Q4_K_M is the recommended default
-2. **Use imatrix** for Q4 and below (calibration improves quality substantially)
-3. **Offload as many layers as VRAM allows** — start high, reduce by 5 on OOM
-4. **Thread count** — match physical cores, not logical
-5. **Batch size** — increase `n_batch` (e.g. 512) for faster prompt processing
-6. **Context** — start at 4096, grow only as needed (memory scales with ctx)
-7. **Flash Attention** — add `--flash-attn` if your build supports it
-
-## Common issues (quick fixes)
-
-**Model loads slowly** — use `--mmap` for memory-mapped loading.
-
-**Out of memory (GPU)** — reduce `-ngl`, use a smaller quant (Q4_K_S / Q3_K_M), or quantize the KV cache:
-```python
-Llama(model_path="...", type_k=2, type_v=2, n_gpu_layers=35)  # Q4_0 KV cache
-```
-
-**Garbage output** — wrong `chat_format`, temperature too high, or model file corrupted. Test with `temperature=0.1` and verify FP16 baseline works.
-
-**Connection refused (server)** — bind to `--host 0.0.0.0`, check `lsof -i :8080`.
-
-See `references/troubleshooting.md` for the full playbook.
-
 ## References
 
+- **[hub-discovery.md](references/hub-discovery.md)** - URL-only Hugging Face workflows, search patterns, GGUF extraction, and command reconstruction
 - **[advanced-usage.md](references/advanced-usage.md)** — speculative decoding, batched inference, grammar-constrained generation, LoRA, multi-GPU, custom builds, benchmark scripts
-- **[quantization.md](references/quantization.md)** — perplexity tables, use-case guide, model size scaling (7B/13B/70B RAM needs), imatrix deep dive
-- **[server.md](references/server.md)** — OpenAI API endpoints, Docker deployment, NGINX load balancing, monitoring
+- **[quantization.md](references/quantization.md)** — quant quality tradeoffs, when to use Q4/Q5/Q6/IQ, model size scaling, imatrix
+- **[server.md](references/server.md)** — direct-from-Hub server launch, OpenAI API endpoints, Docker deployment, NGINX load balancing, monitoring
 - **[optimization.md](references/optimization.md)** — CPU threading, BLAS, GPU offload heuristics, batch tuning, benchmarks
 - **[troubleshooting.md](references/troubleshooting.md)** — install/convert/quantize/inference/server issues, Apple Silicon, debugging
 
 ## Resources
 
 - **GitHub**: https://github.com/ggml-org/llama.cpp
-- **Python bindings**: https://github.com/abetlen/llama-cpp-python
-- **Pre-quantized models**: https://huggingface.co/TheBloke
-- **GGUF converter Space**: https://huggingface.co/spaces/ggml-org/gguf-my-repo
+- **Hugging Face GGUF + llama.cpp docs**: https://huggingface.co/docs/hub/gguf-llamacpp
+- **Hugging Face Local Apps docs**: https://huggingface.co/docs/hub/main/local-apps
+- **Hugging Face Local Agents docs**: https://huggingface.co/docs/hub/agents-local
+- **Example local-app page**: https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF?local-app=llama.cpp
+- **Example tree API**: https://huggingface.co/api/models/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main?recursive=true
+- **Example llama.cpp search**: https://huggingface.co/models?num_parameters=min:0,max:24B&apps=llama.cpp&sort=trending
 - **License**: MIT
diff --git a/skills/mlops/inference/llama-cpp/references/hub-discovery.md b/skills/mlops/inference/llama-cpp/references/hub-discovery.md
new file mode 100644
index 000000000..4573ad460
--- /dev/null
+++ b/skills/mlops/inference/llama-cpp/references/hub-discovery.md
@@ -0,0 +1,168 @@
+# Hugging Face URL Workflows for llama.cpp
+
+Use URL-only workflows first. Do not require `hf` or API clients just to find GGUF files, choose a quant, or build a `llama-server` command.
+
+## Core URLs
+
+```text
+Search:
+https://huggingface.co/models?apps=llama.cpp&sort=trending
+
+Search with text:
+https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
+
+Search with size bounds:
+https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
+
+Repo local-app view:
+https://huggingface.co/<repo>?local-app=llama.cpp
+
+Repo tree API:
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+
+Repo file tree:
+https://huggingface.co/<repo>/tree/main
+```
+
+## 1. Search for llama.cpp-compatible models
+
+Start from the models page with `apps=llama.cpp`.
+
+Use:
+
+- `search=<term>` for model family names such as `Qwen`, `Gemma`, `Phi`, or `Mistral`
+- `num_parameters=min:0,max:24B` or similar if the user has hardware limits
+- `sort=trending` when the user wants popular repos right now
+
+Do not start with random GGUF repos if the user has not chosen a model family yet. Search first, shortlist second.
+
+Example: https://huggingface.co/models?search=Qwen&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
+
+## 2. Use the local-app page for the recommended quant
+
+Open:
+
+```text
+https://huggingface.co/<repo>?local-app=llama.cpp
+```
+
+Extract, in order:
+
+1. The exact `Use this model` snippet, if it is visible as text
+2. The `Hardware compatibility` section from the fetched page text or HTML:
+   - quant label
+   - file size
+   - bit-depth grouping
+3. Any extra launch flags shown in the snippet, such as `--jinja`
+
+Treat the HF local-app snippet as the source of truth when it is visible.
+
+Do this by reading the URL itself, not by assuming the UI rendered in a browser. If the fetched page source does not expose `Hardware compatibility`, say that the section was not text-visible and fall back to the tree API plus generic guidance from `quantization.md`.
+
+## 3. Confirm exact files from the tree API
+
+Open:
+
+```text
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+```
+
+Treat the JSON response as the source of truth for repo inventory.
+
+Keep entries where:
+
+- `type` is `file`
+- `path` ends with `.gguf`
+
+Use these fields:
+
+- `path` for the filename and subdirectory
+- `size` for the byte size
+- optionally `lfs.size` to confirm the LFS payload size
+
+Separate files into:
+
+- quantized single-file checkpoints, for example `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf`
+- projector weights, usually `mmproj-*.gguf`
+- BF16 shard files, usually under `BF16/`
+- everything else
+
+Ignore unless the user asks:
+
+- `README.md`
+- imatrix or calibration blobs
+
+Use `https://huggingface.co/<repo>/tree/main` only as a human fallback if the API endpoint fails or the user wants the web view.
+
+## 4. Build the command
+
+Preferred order:
+
+1. Copy the exact HF snippet from the local-app page
+2. If the page gives a clean quant label, use shorthand selection:
+
+```bash
+llama-server -hf <repo>:<QUANT>
+```
+
+3. If you need an exact file from the tree API, use the file-specific form:
+
+```bash
+llama-server --hf-repo <repo> --hf-file <filename.gguf>
+```
+
+4. For CLI usage instead of a server, use:
+
+```bash
+llama-cli -hf <repo>:<QUANT>
+```
+
+Use the exact-file form when the repo uses custom labels or nonstandard naming that could make `:<QUANT>` ambiguous.
+
+## 5. Example: `unsloth/Qwen3.6-35B-A3B-GGUF`
+
+Use these URLs:
+
+```text
+https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF?local-app=llama.cpp
+https://huggingface.co/api/models/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main?recursive=true
+https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main
+```
+
+On the local-app page, the hardware compatibility section can expose entries such as:
+
+- `UD-IQ4_XS` - 17.7 GB
+- `UD-Q4_K_S` - 20.9 GB
+- `UD-Q4_K_M` - 22.1 GB
+- `UD-Q5_K_M` - 26.5 GB
+- `UD-Q6_K` - 29.3 GB
+- `Q8_0` - 36.9 GB
+
+On the tree API, you can confirm exact filenames such as:
+
+- `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf`
+- `Qwen3.6-35B-A3B-UD-Q5_K_M.gguf`
+- `Qwen3.6-35B-A3B-UD-Q6_K.gguf`
+- `Qwen3.6-35B-A3B-Q8_0.gguf`
+- `mmproj-F16.gguf`
+
+Good final output for this repo:
+
+```text
+Repo: unsloth/Qwen3.6-35B-A3B-GGUF
+Recommended quant from HF: UD-Q4_K_M (22.1 GB)
+llama-server: llama-server --hf-repo unsloth/Qwen3.6-35B-A3B-GGUF --hf-file Qwen3.6-35B-A3B-UD-Q4_K_M.gguf
+Other GGUFs:
+- Qwen3.6-35B-A3B-UD-Q5_K_M.gguf - 26.5 GB
+- Qwen3.6-35B-A3B-UD-Q6_K.gguf - 29.3 GB
+- Qwen3.6-35B-A3B-Q8_0.gguf - 36.9 GB
+Projector:
+- mmproj-F16.gguf - 899 MB
+```
+
+## Notes
+
+- Repo-specific quant labels matter. Do not rewrite `UD-Q4_K_M` to `Q4_K_M` unless the page itself does.
+- `mmproj` files are projector weights for multimodal models, not the main language model checkpoint.
+- If the HF hardware compatibility panel is missing because the user has no hardware profile configured, or because the fetched page source did not expose it, still use the tree API plus generic quant guidance from `quantization.md`.
+- If the repo already has GGUFs, do not jump straight to conversion workflows.
diff --git a/skills/mlops/inference/llama-cpp/references/quantization.md b/skills/mlops/inference/llama-cpp/references/quantization.md
index 8620463a6..79478779f 100644
--- a/skills/mlops/inference/llama-cpp/references/quantization.md
+++ b/skills/mlops/inference/llama-cpp/references/quantization.md
@@ -2,6 +2,22 @@
 
 Complete guide to GGUF quantization formats and model conversion.
 
+## Hub-first quant selection
+
+Before using generic tables, open the model repo with:
+
+```text
+https://huggingface.co/<repo>?local-app=llama.cpp
+```
+
+Prefer the exact quant labels and sizes shown in the `Hardware compatibility` section of the fetched `?local-app=llama.cpp` page text or HTML. Then confirm the matching filenames in:
+
+```text
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+```
+
+Use the Hub page first, and only fall back to the generic heuristics below when the repo page does not expose a clear recommendation.
+
 ## Quantization Overview
 
 **GGUF** (GPT-Generated Unified Format) - Standard format for llama.cpp models.
@@ -23,11 +39,11 @@ Complete guide to GGUF quantization formats and model conversion.
 
 ## Converting Models
 
-### HuggingFace to GGUF
+### Hugging Face to GGUF
 
 ```bash
-# 1. Download HuggingFace model
-huggingface-cli download meta-llama/Llama-2-7b-chat-hf \
+# 1. Download Hugging Face model
+hf download meta-llama/Llama-2-7b-chat-hf \
     --local-dir models/llama-2-7b-chat/
 
 # 2. Convert to FP16 GGUF
@@ -152,18 +168,32 @@ Q2_K or Q3_K_S - Fit in limited RAM
 
 ## Finding Pre-Quantized Models
 
-**TheBloke** on HuggingFace:
-- https://huggingface.co/TheBloke
-- Most models available in all GGUF formats
-- No conversion needed
+Use the Hub search with the llama.cpp app filter:
+
+```text
+https://huggingface.co/models?apps=llama.cpp&sort=trending
+https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
+https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
+```
+
+For a specific repo, open:
+
+```text
+https://huggingface.co/<repo>?local-app=llama.cpp
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+```
+
+Then launch directly from the Hub without extra Hub tooling:
 
-**Example**:
 ```bash
-# Download pre-quantized Llama 2-7B
-huggingface-cli download \
-    TheBloke/Llama-2-7B-Chat-GGUF \
-    llama-2-7b-chat.Q4_K_M.gguf \
-    --local-dir models/
+llama-cli -hf <repo>:Q4_K_M
+llama-server -hf <repo>:Q4_K_M
+```
+
+If you need the exact file name from the tree API:
+
+```bash
+llama-server --hf-repo <repo> --hf-file <filename.gguf>
 ```
 
 ## Importance Matrices (imatrix)
diff --git a/skills/mlops/inference/llama-cpp/references/server.md b/skills/mlops/inference/llama-cpp/references/server.md
index 19dba47bc..896d81b96 100644
--- a/skills/mlops/inference/llama-cpp/references/server.md
+++ b/skills/mlops/inference/llama-cpp/references/server.md
@@ -2,6 +2,31 @@
 
 Production deployment of llama.cpp server with OpenAI-compatible API.
 
+## Direct from Hugging Face Hub
+
+Prefer the model repo's local-app page first:
+
+```text
+https://huggingface.co/<repo>?local-app=llama.cpp
+```
+
+If the page shows an exact snippet, copy it. If not, use one of these forms:
+
+```bash
+# Choose a quant label directly from the Hub repo
+llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
+```
+
+```bash
+# Pin an exact GGUF file from the repo tree
+llama-server \
+    --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \
+    --hf-file Phi-3-mini-4k-instruct-q4.gguf \
+    -c 4096
+```
+
+Use the file-specific form when the repo has custom naming or when you already extracted the exact filename from the tree API.
+
 ## Server Modes
 
 ### llama-server
diff --git a/skills/productivity/maps/SKILL.md b/skills/productivity/maps/SKILL.md
index 9eded2086..d93692a4a 100644
--- a/skills/productivity/maps/SKILL.md
+++ b/skills/productivity/maps/SKILL.md
@@ -2,7 +2,7 @@
 name: maps
 description: >
   Location intelligence — geocode a place, reverse-geocode coordinates,
-  find nearby places (44 POI categories), driving/walking/cycling
+  find nearby places (46 POI categories), driving/walking/cycling
   distance + time, turn-by-turn directions, timezone lookup, bounding
   box + area for a named place, and POI search within a rectangle.
   Uses OpenStreetMap + Overpass + OSRM. Free, no API key.
@@ -83,12 +83,13 @@ python3 $MAPS nearby --near "90210" --category pharmacy
 python3 $MAPS nearby --near "downtown austin" --category restaurant --category bar --limit 10
 ```
 
-44 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, supermarket,
-atm, gas_station, parking, museum, park, school, university, bank, police,
-fire_station, library, airport, train_station, bus_stop, church, mosque,
-synagogue, dentist, doctor, cinema, theatre, gym, swimming_pool, post_office,
-convenience_store, bakery, bookshop, laundry, car_wash, car_rental,
-bicycle_rental, taxi, veterinary, zoo, playground, stadium, nightclub.
+46 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, guest_house,
+camp_site, supermarket, atm, gas_station, parking, museum, park, school,
+university, bank, police, fire_station, library, airport, train_station,
+bus_stop, church, mosque, synagogue, dentist, doctor, cinema, theatre, gym,
+swimming_pool, post_office, convenience_store, bakery, bookshop, laundry,
+car_wash, car_rental, bicycle_rental, taxi, veterinary, zoo, playground,
+stadium, nightclub.
 
 Each result includes: `name`, `address`, `lat`/`lon`, `distance_m`,
 `maps_url` (clickable Google Maps link), `directions_url` (Google Maps
diff --git a/skills/productivity/maps/scripts/maps_client.py b/skills/productivity/maps/scripts/maps_client.py
index db0de82d6..06d775e82 100644
--- a/skills/productivity/maps/scripts/maps_client.py
+++ b/skills/productivity/maps/scripts/maps_client.py
@@ -58,7 +58,9 @@ CATEGORY_TAGS = {
     "restaurant":        ("amenity", "restaurant"),
     "cafe":              ("amenity", "cafe"),
     "bar":               ("amenity", "bar"),
-    "bakery":            ("shop",    "bakery"),
+    # bakery is tagged as shop=bakery in the OSM wiki, but some mappers use
+    # amenity=bakery. Search both so small indie bakeries aren't missed.
+    "bakery":            [("shop", "bakery"), ("amenity", "bakery")],
     "convenience_store": ("shop",    "convenience"),
     # Health
     "hospital":          ("amenity", "hospital"),
@@ -68,6 +70,8 @@ CATEGORY_TAGS = {
     "veterinary":        ("amenity", "veterinary"),
     # Accommodation
     "hotel":             ("tourism", "hotel"),
+    "guest_house":       ("tourism", "guest_house"),
+    "camp_site":         ("tourism", "camp_site"),
     # Shopping & Services
     "supermarket":       ("shop",    "supermarket"),
     "bookshop":          ("shop",    "books"),
@@ -120,6 +124,19 @@ RELIGION_FILTER = {
 
 VALID_CATEGORIES = sorted(CATEGORY_TAGS.keys())
 
+
+def _tags_for(category):
+    """Return the CATEGORY_TAGS entry as a list of (key, value) pairs.
+
+    Most categories map to a single (tag_key, tag_val) tuple, but some
+    (e.g. ``bakery``) are tagged under more than one OSM key and are
+    represented as a list of tuples. Normalise both forms to a list.
+    """
+    entry = CATEGORY_TAGS[category]
+    if isinstance(entry, list):
+        return list(entry)
+    return [entry]
+
 OSRM_PROFILES = {
     "driving": "driving",
     "walking": "foot",
@@ -338,36 +355,63 @@ def geocode_single(query):
 # ---------------------------------------------------------------------------
 
 def build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
-                          religion=None):
-    """Build an Overpass QL query for nearby POIs around a point."""
+                          religion=None, tag_pairs=None):
+    """Build an Overpass QL query for nearby POIs around a point.
+
+    If ``tag_pairs`` is provided, the query unions across every
+    ``(key, value)`` pair (used for categories like ``bakery`` that are
+    tagged under more than one OSM key). Otherwise falls back to the
+    single ``tag_key``/``tag_val`` pair for back-compat.
+    """
+    pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)]
     religion_filter = ""
     if religion:
         religion_filter = f'["religion"="{religion}"]'
+    body_lines = []
+    for k, v in pairs:
+        body_lines.append(
+            f'  node["{k}"="{v}"]{religion_filter}'
+            f'(around:{radius},{lat},{lon});'
+        )
+        body_lines.append(
+            f'  way["{k}"="{v}"]{religion_filter}'
+            f'(around:{radius},{lat},{lon});'
+        )
+    body = "\n".join(body_lines)
     return (
         f'[out:json][timeout:25];\n'
         f'(\n'
-        f'  node["{tag_key}"="{tag_val}"]{religion_filter}'
-        f'(around:{radius},{lat},{lon});\n'
-        f'  way["{tag_key}"="{tag_val}"]{religion_filter}'
-        f'(around:{radius},{lat},{lon});\n'
+        f'{body}\n'
         f');\n'
         f'out center {limit};\n'
     )
 
 
 def build_overpass_bbox(tag_key, tag_val, south, west, north, east, limit,
-                        religion=None):
-    """Build an Overpass QL query for POIs within a bounding box."""
+                        religion=None, tag_pairs=None):
+    """Build an Overpass QL query for POIs within a bounding box.
+
+    See ``build_overpass_nearby`` for ``tag_pairs`` semantics.
+    """
+    pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)]
     religion_filter = ""
     if religion:
         religion_filter = f'["religion"="{religion}"]'
+    body_lines = []
+    for k, v in pairs:
+        body_lines.append(
+            f'  node["{k}"="{v}"]{religion_filter}'
+            f'({south},{west},{north},{east});'
+        )
+        body_lines.append(
+            f'  way["{k}"="{v}"]{religion_filter}'
+            f'({south},{west},{north},{east});'
+        )
+    body = "\n".join(body_lines)
     return (
         f'[out:json][timeout:25];\n'
         f'(\n'
-        f'  node["{tag_key}"="{tag_val}"]{religion_filter}'
-        f'({south},{west},{north},{east});\n'
-        f'  way["{tag_key}"="{tag_val}"]{religion_filter}'
-        f'({south},{west},{north},{east});\n'
+        f'{body}\n'
         f');\n'
         f'out center {limit};\n'
     )
@@ -605,10 +649,10 @@ def cmd_nearby(args):
     # appear twice.
     merged = {}
     for category in categories:
-        tag_key, tag_val = CATEGORY_TAGS[category]
+        tag_pairs = _tags_for(category)
         religion = RELIGION_FILTER.get(category)
-        query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
-                                      religion=religion)
+        query = build_overpass_nearby(None, None, lat, lon, radius, limit,
+                                      religion=religion, tag_pairs=tag_pairs)
         raw = overpass_query(query)
         elements = raw.get("elements", [])
         for place in parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon):
@@ -945,10 +989,10 @@ def cmd_bbox(args):
     if limit <= 0:
         error_exit("Limit must be a positive integer.")
 
-    tag_key, tag_val = CATEGORY_TAGS[category]
+    tag_pairs = _tags_for(category)
     religion = RELIGION_FILTER.get(category)
-    query = build_overpass_bbox(tag_key, tag_val, south, west, north, east,
-                                limit, religion=religion)
+    query = build_overpass_bbox(None, None, south, west, north, east,
+                                limit, religion=religion, tag_pairs=tag_pairs)
 
     raw = overpass_query(query)
 
diff --git a/skills/research/llm-wiki/SKILL.md b/skills/research/llm-wiki/SKILL.md
index a90dd0a9b..d82d638f1 100644
--- a/skills/research/llm-wiki/SKILL.md
+++ b/skills/research/llm-wiki/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: llm-wiki
 description: "Karpathy's LLM Wiki — build and maintain a persistent, interlinked markdown knowledge base. Ingest sources, query compiled knowledge, and lint for consistency."
-version: 2.0.0
+version: 2.1.0
 author: Hermes Agent
 license: MIT
 metadata:
@@ -122,6 +122,10 @@ Adapt to the user's domain. The schema constrains agent behavior and ensures con
 - When updating a page, always bump the `updated` date
 - Every new page must be added to `index.md` under the correct section
 - Every action must be appended to `log.md`
+- **Provenance markers:** On pages that synthesize 3+ sources, append `^[raw/articles/source-file.md]`
+  at the end of paragraphs whose claims come from a specific source. This lets a reader trace each
+  claim back without re-reading the whole raw file. Optional on single-source pages where the
+  `sources:` frontmatter is enough.
 
 ## Frontmatter
   ```yaml
@@ -132,9 +136,33 @@ Adapt to the user's domain. The schema constrains agent behavior and ensures con
   type: entity | concept | comparison | query | summary
   tags: [from taxonomy below]
   sources: [raw/articles/source-name.md]
+  # Optional quality signals:
+  confidence: high | medium | low        # how well-supported the claims are
+  contested: true                        # set when the page has unresolved contradictions
+  contradictions: [other-page-slug]      # pages this one conflicts with
   ---
   ```
 
+`confidence` and `contested` are optional but recommended for opinion-heavy or fast-moving
+topics. Lint surfaces `contested: true` and `confidence: low` pages for review so weak claims
+don't silently harden into accepted wiki fact.
+
+### raw/ Frontmatter
+
+Raw sources ALSO get a small frontmatter block so re-ingests can detect drift:
+
+```yaml
+---
+source_url: https://example.com/article   # original URL, if applicable
+ingested: YYYY-MM-DD
+sha256: <hex digest of the raw content below the frontmatter>
+---
+```
+
+The `sha256:` lets a future re-ingest of the same URL skip processing when content is unchanged,
+and flag drift when it has changed. Compute over the body only (everything after the closing
+`---`), not the frontmatter itself.
+
 ## Tag Taxonomy
 [Define 10-20 top-level tags for the domain. Add new tags here BEFORE using them.]
 
@@ -234,6 +262,10 @@ When the user provides a source (URL, file, paste), integrate it into the wiki:
    - PDF → use `web_extract` (handles PDFs), save to `raw/papers/`
    - Pasted text → save to appropriate `raw/` subdirectory
    - Name the file descriptively: `raw/articles/karpathy-llm-wiki-2026.md`
+   - **Add raw frontmatter** (`source_url`, `ingested`, `sha256` of the body).
+     On re-ingest of the same URL: recompute the sha256, compare to the stored value —
+     skip if identical, flag drift and update if different. This is cheap enough to
+     do on every re-ingest and catches silent source changes.
 
 ② **Discuss takeaways** with the user — what's interesting, what matters for
    the domain. (Skip this in automated/cron contexts — proceed directly.)
@@ -250,6 +282,11 @@ When the user provides a source (URL, file, paste), integrate it into the wiki:
    - **Cross-reference:** Every new or updated page must link to at least 2 other
      pages via `[[wikilinks]]`. Check that existing pages link back.
    - **Tags:** Only use tags from the taxonomy in SCHEMA.md
+   - **Provenance:** On pages synthesizing 3+ sources, append `^[raw/articles/source.md]`
+     markers to paragraphs whose claims trace to a specific source.
+   - **Confidence:** For opinion-heavy, fast-moving, or single-source claims, set
+     `confidence: medium` or `low` in frontmatter. Don't mark `high` unless the
+     claim is well-supported across multiple sources.
 
 ⑤ **Update navigation:**
    - Add new pages to `index.md` under the correct section, alphabetically
@@ -304,18 +341,28 @@ wiki = "<WIKI_PATH>"
    recent source that mentions the same entities.
 
 ⑥ **Contradictions:** Pages on the same topic with conflicting claims. Look for
-   pages that share tags/entities but state different facts.
+   pages that share tags/entities but state different facts. Surface all pages
+   with `contested: true` or `contradictions:` frontmatter for user review.
 
-⑦ **Page size:** Flag pages over 200 lines — candidates for splitting.
+⑦ **Quality signals:** List pages with `confidence: low` and any page that cites
+   only a single source but has no confidence field set — these are candidates
+   for either finding corroboration or demoting to `confidence: medium`.
 
-⑧ **Tag audit:** List all tags in use, flag any not in the SCHEMA.md taxonomy.
+⑧ **Source drift:** For each file in `raw/` with a `sha256:` frontmatter, recompute
+   the hash and flag mismatches. Mismatches indicate the raw file was edited
+   (shouldn't happen — raw/ is immutable) or ingested from a URL that has since
+   changed. Not a hard error, but worth reporting.
 
-⑨ **Log rotation:** If log.md exceeds 500 entries, rotate it.
+⑨ **Page size:** Flag pages over 200 lines — candidates for splitting.
 
-⑩ **Report findings** with specific file paths and suggested actions, grouped by
-   severity (broken links > orphans > stale content > style issues).
+⑩ **Tag audit:** List all tags in use, flag any not in the SCHEMA.md taxonomy.
 
-⑪ **Append to log.md:** `## [YYYY-MM-DD] lint | N issues found`
+⑪ **Log rotation:** If log.md exceeds 500 entries, rotate it.
+
+⑫ **Report findings** with specific file paths and suggested actions, grouped by
+   severity (broken links > orphans > source drift > contested pages > stale content > style issues).
+
+⑬ **Append to log.md:** `## [YYYY-MM-DD] lint | N issues found`
 
 ## Working with the Wiki
 
@@ -448,3 +495,12 @@ vault in Obsidian on your laptop/phone — changes appear within seconds.
   The agent should check log size during lint.
 - **Handle contradictions explicitly** — don't silently overwrite. Note both claims with dates,
   mark in frontmatter, flag for user review.
+
+## Related Tools
+
+[llm-wiki-compiler](https://github.com/atomicmemory/llm-wiki-compiler) is a Node.js CLI that
+compiles sources into a concept wiki with the same Karpathy inspiration. It's Obsidian-compatible,
+so users who want a scheduled/CLI-driven compile pipeline can point it at the same vault this
+skill maintains. Trade-offs: it owns page generation (replaces the agent's judgment on page
+creation) and is tuned for small corpora. Use this skill when you want agent-in-the-loop curation;
+use llmwiki when you want batch compile of a source directory.
diff --git a/skills/social-media/xurl/SKILL.md b/skills/social-media/xurl/SKILL.md
index 3ce190808..1f47b2e6a 100644
--- a/skills/social-media/xurl/SKILL.md
+++ b/skills/social-media/xurl/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: xurl
 description: Interact with X/Twitter via xurl, the official X API CLI. Use for posting, replying, quoting, searching, timelines, mentions, likes, reposts, bookmarks, follows, DMs, media upload, and raw v2 endpoint access.
-version: 1.1.0
+version: 1.1.1
 author: xdevplatform + openclaw + Hermes Agent
 license: MIT
 platforms: [linux, macos]
@@ -95,6 +95,12 @@ These steps must be performed by the user directly, NOT by the agent, because th
    xurl auth oauth2 --app my-app
    ```
    (This opens a browser for the OAuth 2.0 PKCE flow.)
+
+   If X returns a `UsernameNotFound` error or 403 on the post-OAuth `/2/users/me` lookup, pass your handle explicitly (xurl v1.1.0+):
+   ```bash
+   xurl auth oauth2 --app my-app YOUR_USERNAME
+   ```
+   This binds the token to your handle and skips the broken `/2/users/me` call.
 6. Set the app as default so all commands use it:
    ```bash
    xurl auth default my-app
@@ -380,6 +386,7 @@ xurl --app staging /2/users/me             # one-off against staging
 | --- | --- | --- |
 | Auth errors after successful OAuth flow | Token saved to `default` app (no client-id/secret) instead of your named app | `xurl auth oauth2 --app my-app` then `xurl auth default my-app` |
 | `unauthorized_client` during OAuth | App type set to "Native App" in X dashboard | Change to "Web app, automated app or bot" in User Authentication Settings |
+| `UsernameNotFound` or 403 on `/2/users/me` right after OAuth | X not returning username reliably from `/2/users/me` | Re-run `xurl auth oauth2 --app my-app YOUR_USERNAME` (xurl v1.1.0+) to pass the handle explicitly |
 | 401 on every request | Token expired or wrong default app | Check `xurl auth status` — verify `▸` points to an app with oauth2 tokens |
 | `client-forbidden` / `client-not-enrolled` | X platform enrollment issue | Dashboard → Apps → Manage → Move to "Pay-per-use" package → Production environment |
 | `CreditsDepleted` | $0 balance on X API | Buy credits (min $5) in Developer Console → Billing |
diff --git a/tests/acp/test_approval_isolation.py b/tests/acp/test_approval_isolation.py
new file mode 100644
index 000000000..90ea4e063
--- /dev/null
+++ b/tests/acp/test_approval_isolation.py
@@ -0,0 +1,170 @@
+"""Tests for GHSA-96vc-wcxf-jjff and GHSA-qg5c-hvr5-hjgr.
+
+Two related ACP approval-flow issues:
+- 96vc: ACP didn't set HERMES_EXEC_ASK, so `check_all_command_guards`
+  took the non-interactive auto-approve path and never consulted the
+  ACP-supplied callback.
+- qg5c: `_approval_callback` was a module-global in terminal_tool;
+  overlapping ACP sessions overwrote each other's callback slot.
+
+Both fixed together by:
+1. Setting HERMES_EXEC_ASK inside _run_agent (wraps the agent call).
+2. Storing the callback in thread-local state so concurrent executor
+   threads don't collide.
+"""
+
+import os
+import threading
+from unittest.mock import MagicMock
+
+import pytest
+
+
+class TestThreadLocalApprovalCallback:
+    """GHSA-qg5c-hvr5-hjgr: set_approval_callback must be per-thread so
+    concurrent ACP sessions don't stomp on each other's handlers."""
+
+    def test_set_and_get_in_same_thread(self):
+        from tools.terminal_tool import (
+            set_approval_callback,
+            _get_approval_callback,
+        )
+
+        cb1 = lambda cmd, desc: "once"  # noqa: E731
+        set_approval_callback(cb1)
+        assert _get_approval_callback() is cb1
+
+    def test_callback_not_visible_in_different_thread(self):
+        """Thread A's callback is NOT visible to Thread B."""
+        from tools.terminal_tool import (
+            set_approval_callback,
+            _get_approval_callback,
+        )
+
+        cb_a = lambda cmd, desc: "thread_a"  # noqa: E731
+        cb_b = lambda cmd, desc: "thread_b"  # noqa: E731
+
+        seen_in_a = []
+        seen_in_b = []
+
+        def thread_a():
+            set_approval_callback(cb_a)
+            # Pause so thread B has time to set its own callback
+            import time
+            time.sleep(0.05)
+            seen_in_a.append(_get_approval_callback())
+
+        def thread_b():
+            set_approval_callback(cb_b)
+            import time
+            time.sleep(0.05)
+            seen_in_b.append(_get_approval_callback())
+
+        ta = threading.Thread(target=thread_a)
+        tb = threading.Thread(target=thread_b)
+        ta.start()
+        tb.start()
+        ta.join()
+        tb.join()
+
+        # Each thread must see ONLY its own callback — not the other's
+        assert seen_in_a == [cb_a]
+        assert seen_in_b == [cb_b]
+
+    def test_main_thread_callback_not_leaked_to_worker(self):
+        """A callback set in the main thread does NOT leak into a
+        freshly-spawned worker thread."""
+        from tools.terminal_tool import (
+            set_approval_callback,
+            _get_approval_callback,
+        )
+
+        cb_main = lambda cmd, desc: "main"  # noqa: E731
+        set_approval_callback(cb_main)
+
+        worker_saw = []
+
+        def worker():
+            worker_saw.append(_get_approval_callback())
+
+        t = threading.Thread(target=worker)
+        t.start()
+        t.join()
+
+        # Worker thread has no callback set — TLS is empty for it
+        assert worker_saw == [None]
+        # Main thread still has its callback
+        assert _get_approval_callback() is cb_main
+
+    def test_sudo_password_callback_also_thread_local(self):
+        """Same protection applies to the sudo password callback."""
+        from tools.terminal_tool import (
+            set_sudo_password_callback,
+            _get_sudo_password_callback,
+        )
+
+        cb_main = lambda: "main-password"  # noqa: E731
+        set_sudo_password_callback(cb_main)
+
+        worker_saw = []
+
+        def worker():
+            worker_saw.append(_get_sudo_password_callback())
+
+        t = threading.Thread(target=worker)
+        t.start()
+        t.join()
+
+        assert worker_saw == [None]
+        assert _get_sudo_password_callback() is cb_main
+
+
+class TestAcpExecAskGate:
+    """GHSA-96vc-wcxf-jjff: ACP's _run_agent must set HERMES_INTERACTIVE so
+    that tools.approval.check_all_command_guards takes the CLI-interactive
+    path (consults the registered callback via prompt_dangerous_approval)
+    instead of the non-interactive auto-approve shortcut.
+
+    (HERMES_EXEC_ASK takes the gateway-queue path which requires a
+    notify_cb registered in _gateway_notify_cbs — not applicable to ACP,
+    which uses a direct callback shape.)"""
+
+    def test_interactive_env_var_routes_to_callback(self, monkeypatch):
+        """When HERMES_INTERACTIVE is set and an approval callback is
+        registered, a dangerous command must route through the callback."""
+        # Clean env
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from tools.approval import check_all_command_guards
+
+        called_with = []
+
+        def fake_cb(command, description, *, allow_permanent=True):
+            called_with.append((command, description))
+            return "once"
+
+        # Without HERMES_INTERACTIVE: takes auto-approve path, callback NOT called
+        result = check_all_command_guards(
+            "rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
+        )
+        assert result["approved"] is True
+        assert called_with == [], (
+            "without HERMES_INTERACTIVE the non-interactive auto-approve "
+            "path should fire without consulting the callback"
+        )
+
+        # With HERMES_INTERACTIVE: callback IS called, approval flows through it
+        monkeypatch.setenv("HERMES_INTERACTIVE", "1")
+        called_with.clear()
+        result = check_all_command_guards(
+            "rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
+        )
+        assert called_with, (
+            "with HERMES_INTERACTIVE the approval path should consult the "
+            "registered callback — this was the ACP bypass in "
+            "GHSA-96vc-wcxf-jjff"
+        )
+        assert result["approved"] is True
diff --git a/tests/acp/test_permissions.py b/tests/acp/test_permissions.py
index de83ebeff..57e2bd4e5 100644
--- a/tests/acp/test_permissions.py
+++ b/tests/acp/test_permissions.py
@@ -73,3 +73,17 @@ class TestApprovalMapping:
             result = cb("rm -rf /", "dangerous")
 
         assert result == "deny"
+
+    def test_approval_none_response_returns_deny(self):
+        """When request_permission resolves to None, the callback should return 'deny'."""
+        loop = MagicMock(spec=asyncio.AbstractEventLoop)
+        mock_rp = MagicMock(name="request_permission")
+
+        future = MagicMock(spec=Future)
+        future.result.return_value = None
+
+        with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
+            cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=1.0)
+            result = cb("echo hi", "demo")
+
+        assert result == "deny"
diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py
index 5893d7907..faa4c18a7 100644
--- a/tests/acp/test_server.py
+++ b/tests/acp/test_server.py
@@ -95,19 +95,37 @@ class TestInitialize:
 
 class TestAuthenticate:
     @pytest.mark.asyncio
-    async def test_authenticate_with_provider_configured(self, agent, monkeypatch):
+    async def test_authenticate_with_matching_method_id(self, agent, monkeypatch):
         monkeypatch.setattr(
-            "acp_adapter.server.has_provider",
-            lambda: True,
+            "acp_adapter.server.detect_provider",
+            lambda: "openrouter",
         )
         resp = await agent.authenticate(method_id="openrouter")
         assert isinstance(resp, AuthenticateResponse)
 
+    @pytest.mark.asyncio
+    async def test_authenticate_is_case_insensitive(self, agent, monkeypatch):
+        monkeypatch.setattr(
+            "acp_adapter.server.detect_provider",
+            lambda: "openrouter",
+        )
+        resp = await agent.authenticate(method_id="OpenRouter")
+        assert isinstance(resp, AuthenticateResponse)
+
+    @pytest.mark.asyncio
+    async def test_authenticate_rejects_mismatched_method_id(self, agent, monkeypatch):
+        monkeypatch.setattr(
+            "acp_adapter.server.detect_provider",
+            lambda: "openrouter",
+        )
+        resp = await agent.authenticate(method_id="totally-invalid-method")
+        assert resp is None
+
     @pytest.mark.asyncio
     async def test_authenticate_without_provider(self, agent, monkeypatch):
         monkeypatch.setattr(
-            "acp_adapter.server.has_provider",
-            lambda: False,
+            "acp_adapter.server.detect_provider",
+            lambda: None,
         )
         resp = await agent.authenticate(method_id="openrouter")
         assert resp is None
@@ -252,6 +270,57 @@ class TestListAndFork:
 
         mock_list.assert_called_once_with(cwd="/mnt/e/Projects/AI/browser-link-3")
 
+    @pytest.mark.asyncio
+    async def test_list_sessions_pagination_first_page(self, agent):
+        from acp_adapter import server as acp_server
+
+        infos = [
+            {"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
+            for i in range(acp_server._LIST_SESSIONS_PAGE_SIZE + 5)
+        ]
+        with patch.object(agent.session_manager, "list_sessions", return_value=infos):
+            resp = await agent.list_sessions()
+
+        assert len(resp.sessions) == acp_server._LIST_SESSIONS_PAGE_SIZE
+        assert resp.next_cursor == resp.sessions[-1].session_id
+
+    @pytest.mark.asyncio
+    async def test_list_sessions_pagination_no_more(self, agent):
+        infos = [
+            {"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
+            for i in range(3)
+        ]
+        with patch.object(agent.session_manager, "list_sessions", return_value=infos):
+            resp = await agent.list_sessions()
+
+        assert len(resp.sessions) == 3
+        assert resp.next_cursor is None
+
+    @pytest.mark.asyncio
+    async def test_list_sessions_cursor_resumes_after_match(self, agent):
+        infos = [
+            {"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+            {"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+            {"session_id": "s3", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+        ]
+        with patch.object(agent.session_manager, "list_sessions", return_value=infos):
+            resp = await agent.list_sessions(cursor="s1")
+
+        assert [s.session_id for s in resp.sessions] == ["s2", "s3"]
+        assert resp.next_cursor is None
+
+    @pytest.mark.asyncio
+    async def test_list_sessions_unknown_cursor_returns_empty(self, agent):
+        infos = [
+            {"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+            {"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+        ]
+        with patch.object(agent.session_manager, "list_sessions", return_value=infos):
+            resp = await agent.list_sessions(cursor="does-not-exist")
+
+        assert resp.sessions == []
+        assert resp.next_cursor is None
+
 # ---------------------------------------------------------------------------
 # session configuration / model routing
 # ---------------------------------------------------------------------------
diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
index b947a2df8..dedf3e125 100644
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@@ -1659,3 +1659,91 @@ class TestToolChoice:
             tool_choice="search",
         )
         assert kwargs["tool_choice"] == {"type": "tool", "name": "search"}
+
+
+
+# ---------------------------------------------------------------------------
+# max_tokens resolver — openclaw/openclaw#66664 port
+# ---------------------------------------------------------------------------
+
+from agent.anthropic_adapter import (
+    _resolve_positive_anthropic_max_tokens,
+    _resolve_anthropic_messages_max_tokens,
+)
+
+
+class TestResolvePositiveMaxTokens:
+    """Unit tests for the positive-int resolver helper."""
+
+    def test_positive_int_passes_through(self):
+        assert _resolve_positive_anthropic_max_tokens(8192) == 8192
+
+    def test_zero_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens(0) is None
+
+    def test_negative_int_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens(-1) is None
+        assert _resolve_positive_anthropic_max_tokens(-500) is None
+
+    def test_fractional_float_floored_and_kept_if_positive(self):
+        # 8192.7 -> 8192, still positive
+        assert _resolve_positive_anthropic_max_tokens(8192.7) == 8192
+
+    def test_small_positive_float_below_one_returns_none(self):
+        # 0.5 floors to 0, which is not positive
+        assert _resolve_positive_anthropic_max_tokens(0.5) is None
+
+    def test_negative_float_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens(-1.5) is None
+
+    def test_nan_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens(float("nan")) is None
+
+    def test_infinity_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens(float("inf")) is None
+        assert _resolve_positive_anthropic_max_tokens(float("-inf")) is None
+
+    def test_bool_true_returns_none(self):
+        # True is an int subclass but semantically never a real max_tokens value
+        assert _resolve_positive_anthropic_max_tokens(True) is None
+        assert _resolve_positive_anthropic_max_tokens(False) is None
+
+    def test_string_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens("8192") is None
+
+    def test_none_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens(None) is None
+
+
+class TestResolveMessagesMaxTokens:
+    """Integration tests for the full Messages resolver."""
+
+    def test_positive_requested_wins(self):
+        assert _resolve_anthropic_messages_max_tokens(
+            8192, "claude-opus-4-6"
+        ) == 8192
+
+    def test_zero_falls_back_to_model_default(self):
+        # Should use _get_anthropic_max_output(model), not crash
+        result = _resolve_anthropic_messages_max_tokens(0, "claude-opus-4-6")
+        assert result > 0
+
+    def test_none_falls_back_to_model_default(self):
+        result = _resolve_anthropic_messages_max_tokens(None, "claude-opus-4-6")
+        assert result > 0
+
+    def test_negative_falls_back_to_model_default(self):
+        # Previously leaked -1 to the API; now falls back safely
+        result = _resolve_anthropic_messages_max_tokens(-1, "claude-opus-4-6")
+        assert result > 0
+
+    def test_fractional_positive_floored(self):
+        assert _resolve_anthropic_messages_max_tokens(
+            8192.5, "claude-opus-4-6"
+        ) == 8192
+
+    def test_sub_one_float_falls_back(self):
+        # 0.5 floors to 0 -> not positive -> falls back to model ceiling
+        result = _resolve_anthropic_messages_max_tokens(0.5, "claude-opus-4-6")
+        assert result > 0
+        assert result != 0
diff --git a/tests/agent/test_anthropic_normalize_v2.py b/tests/agent/test_anthropic_normalize_v2.py
deleted file mode 100644
index 9d5c16139..000000000
--- a/tests/agent/test_anthropic_normalize_v2.py
+++ /dev/null
@@ -1,238 +0,0 @@
-"""Regression tests: normalize_anthropic_response_v2 vs v1.
-
-Constructs mock Anthropic responses and asserts that the v2 function
-(returning NormalizedResponse) produces identical field values to the
-original v1 function (returning SimpleNamespace + finish_reason).
-"""
-
-import json
-import pytest
-from types import SimpleNamespace
-
-from agent.anthropic_adapter import (
-    normalize_anthropic_response,
-    normalize_anthropic_response_v2,
-)
-from agent.transports.types import NormalizedResponse, ToolCall
-
-
-# ---------------------------------------------------------------------------
-# Helpers to build mock Anthropic SDK responses
-# ---------------------------------------------------------------------------
-
-def _text_block(text: str):
-    return SimpleNamespace(type="text", text=text)
-
-
-def _thinking_block(thinking: str, signature: str = "sig_abc"):
-    return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
-
-
-def _tool_use_block(id: str, name: str, input: dict):
-    return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
-
-
-def _response(content_blocks, stop_reason="end_turn"):
-    return SimpleNamespace(
-        content=content_blocks,
-        stop_reason=stop_reason,
-        usage=SimpleNamespace(
-            input_tokens=10,
-            output_tokens=5,
-        ),
-    )
-
-
-# ---------------------------------------------------------------------------
-# Tests
-# ---------------------------------------------------------------------------
-
-class TestTextOnly:
-    """Text-only response — no tools, no thinking."""
-
-    def setup_method(self):
-        self.resp = _response([_text_block("Hello world")])
-        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
-        self.v2 = normalize_anthropic_response_v2(self.resp)
-
-    def test_type(self):
-        assert isinstance(self.v2, NormalizedResponse)
-
-    def test_content_matches(self):
-        assert self.v2.content == self.v1_msg.content
-
-    def test_finish_reason_matches(self):
-        assert self.v2.finish_reason == self.v1_finish
-
-    def test_no_tool_calls(self):
-        assert self.v2.tool_calls is None
-        assert self.v1_msg.tool_calls is None
-
-    def test_no_reasoning(self):
-        assert self.v2.reasoning is None
-        assert self.v1_msg.reasoning is None
-
-
-class TestWithToolCalls:
-    """Response with tool calls."""
-
-    def setup_method(self):
-        self.resp = _response(
-            [
-                _text_block("I'll check that"),
-                _tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
-                _tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
-            ],
-            stop_reason="tool_use",
-        )
-        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
-        self.v2 = normalize_anthropic_response_v2(self.resp)
-
-    def test_finish_reason(self):
-        assert self.v2.finish_reason == "tool_calls"
-        assert self.v1_finish == "tool_calls"
-
-    def test_tool_call_count(self):
-        assert len(self.v2.tool_calls) == 2
-        assert len(self.v1_msg.tool_calls) == 2
-
-    def test_tool_call_ids_match(self):
-        for i in range(2):
-            assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
-
-    def test_tool_call_names_match(self):
-        assert self.v2.tool_calls[0].name == "terminal"
-        assert self.v2.tool_calls[1].name == "read_file"
-        for i in range(2):
-            assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
-
-    def test_tool_call_arguments_match(self):
-        for i in range(2):
-            assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
-
-    def test_content_preserved(self):
-        assert self.v2.content == self.v1_msg.content
-        assert "check that" in self.v2.content
-
-
-class TestWithThinking:
-    """Response with thinking blocks (Claude 3.5+ extended thinking)."""
-
-    def setup_method(self):
-        self.resp = _response([
-            _thinking_block("Let me think about this carefully..."),
-            _text_block("The answer is 42."),
-        ])
-        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
-        self.v2 = normalize_anthropic_response_v2(self.resp)
-
-    def test_reasoning_matches(self):
-        assert self.v2.reasoning == self.v1_msg.reasoning
-        assert "think about this" in self.v2.reasoning
-
-    def test_reasoning_details_in_provider_data(self):
-        v1_details = self.v1_msg.reasoning_details
-        v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
-        assert v1_details is not None
-        assert v2_details is not None
-        assert len(v2_details) == len(v1_details)
-
-    def test_content_excludes_thinking(self):
-        assert self.v2.content == "The answer is 42."
-
-
-class TestMixed:
-    """Response with thinking + text + tool calls."""
-
-    def setup_method(self):
-        self.resp = _response(
-            [
-                _thinking_block("Planning my approach..."),
-                _text_block("I'll run the command"),
-                _tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
-            ],
-            stop_reason="tool_use",
-        )
-        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
-        self.v2 = normalize_anthropic_response_v2(self.resp)
-
-    def test_all_fields_present(self):
-        assert self.v2.content is not None
-        assert self.v2.tool_calls is not None
-        assert self.v2.reasoning is not None
-        assert self.v2.finish_reason == "tool_calls"
-
-    def test_content_matches(self):
-        assert self.v2.content == self.v1_msg.content
-
-    def test_reasoning_matches(self):
-        assert self.v2.reasoning == self.v1_msg.reasoning
-
-    def test_tool_call_matches(self):
-        assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
-        assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
-
-
-class TestStopReasons:
-    """Verify finish_reason mapping matches between v1 and v2."""
-
-    @pytest.mark.parametrize("stop_reason,expected", [
-        ("end_turn", "stop"),
-        ("tool_use", "tool_calls"),
-        ("max_tokens", "length"),
-        ("stop_sequence", "stop"),
-        ("refusal", "content_filter"),
-        ("model_context_window_exceeded", "length"),
-        ("unknown_future_reason", "stop"),
-    ])
-    def test_stop_reason_mapping(self, stop_reason, expected):
-        resp = _response([_text_block("x")], stop_reason=stop_reason)
-        v1_msg, v1_finish = normalize_anthropic_response(resp)
-        v2 = normalize_anthropic_response_v2(resp)
-        assert v2.finish_reason == v1_finish == expected
-
-
-class TestStripToolPrefix:
-    """Verify mcp_ prefix stripping works identically."""
-
-    def test_prefix_stripped(self):
-        resp = _response(
-            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
-            stop_reason="tool_use",
-        )
-        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
-        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
-        assert v1_msg.tool_calls[0].function.name == "terminal"
-        assert v2.tool_calls[0].name == "terminal"
-
-    def test_prefix_kept(self):
-        resp = _response(
-            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
-            stop_reason="tool_use",
-        )
-        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
-        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
-        assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
-        assert v2.tool_calls[0].name == "mcp_terminal"
-
-
-class TestEdgeCases:
-    """Edge cases: empty content, no blocks, etc."""
-
-    def test_empty_content_blocks(self):
-        resp = _response([])
-        v1_msg, v1_finish = normalize_anthropic_response(resp)
-        v2 = normalize_anthropic_response_v2(resp)
-        assert v2.content == v1_msg.content
-        assert v2.content is None
-
-    def test_no_reasoning_details_means_none_provider_data(self):
-        resp = _response([_text_block("hi")])
-        v2 = normalize_anthropic_response_v2(resp)
-        assert v2.provider_data is None
-
-    def test_v2_returns_dataclass_not_namespace(self):
-        resp = _response([_text_block("hi")])
-        v2 = normalize_anthropic_response_v2(resp)
-        assert isinstance(v2, NormalizedResponse)
-        assert not isinstance(v2, SimpleNamespace)
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 2285a58f4..4c775b8a6 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -476,6 +476,133 @@ class TestGetTextAuxiliaryClient:
         assert isinstance(client, CodexAuxiliaryClient)
         assert model == "gpt-5.2-codex"
 
+
+class TestNousAuxiliaryRefresh:
+    def test_try_nous_prefers_runtime_credentials(self):
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            from agent.auxiliary_client import _try_nous
+
+            mock_openai.return_value = MagicMock()
+            client, model = _try_nous()
+
+        assert client is not None
+        # No Portal recommendation → falls back to the hardcoded default.
+        assert model == "google/gemini-3-flash-preview"
+        assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
+        assert mock_openai.call_args.kwargs["base_url"] == fresh_base
+
+    def test_try_nous_uses_portal_recommendation_for_text(self):
+        """When the Portal recommends a compaction model, _try_nous honors it."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec,
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            from agent.auxiliary_client import _try_nous
+
+            mock_openai.return_value = MagicMock()
+            client, model = _try_nous(vision=False)
+
+        assert client is not None
+        assert model == "minimax/minimax-m2.7"
+        assert mock_rec.call_args.kwargs["vision"] is False
+
+    def test_try_nous_uses_portal_recommendation_for_vision(self):
+        """Vision tasks should ask for the vision-specific recommendation."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec,
+            patch("agent.auxiliary_client.OpenAI"),
+        ):
+            from agent.auxiliary_client import _try_nous
+            client, model = _try_nous(vision=True)
+
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+        assert mock_rec.call_args.kwargs["vision"] is True
+
+    def test_try_nous_falls_back_when_recommendation_lookup_raises(self):
+        """If the Portal lookup throws, we must still return a usable model."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")),
+            patch("agent.auxiliary_client.OpenAI"),
+        ):
+            from agent.auxiliary_client import _try_nous
+            client, model = _try_nous()
+
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_call_llm_retries_nous_after_401(self):
+        class _Auth401(Exception):
+            status_code = 401
+
+        stale_client = MagicMock()
+        stale_client.base_url = "https://inference-api.nousresearch.com/v1"
+        stale_client.chat.completions.create.side_effect = _Auth401("stale nous key")
+
+        fresh_client = MagicMock()
+        fresh_client.base_url = "https://inference-api.nousresearch.com/v1"
+        fresh_client.chat.completions.create.return_value = {"ok": True}
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
+            patch("agent.auxiliary_client.OpenAI", return_value=fresh_client),
+            patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
+        ):
+            result = call_llm(
+                task="compression",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert result == {"ok": True}
+        assert stale_client.chat.completions.create.call_count == 1
+        assert fresh_client.chat.completions.create.call_count == 1
+
+    @pytest.mark.asyncio
+    async def test_async_call_llm_retries_nous_after_401(self):
+        class _Auth401(Exception):
+            status_code = 401
+
+        stale_client = MagicMock()
+        stale_client.base_url = "https://inference-api.nousresearch.com/v1"
+        stale_client.chat.completions.create = AsyncMock(side_effect=_Auth401("stale nous key"))
+
+        fresh_async_client = MagicMock()
+        fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1"
+        fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True})
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
+            patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")),
+            patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
+        ):
+            result = await async_call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert result == {"ok": True}
+        assert stale_client.chat.completions.create.await_count == 1
+        assert fresh_async_client.chat.completions.create.await_count == 1
+
 # ── Payment / credit exhaustion fallback ─────────────────────────────────
 
 
diff --git a/tests/agent/test_auxiliary_main_first.py b/tests/agent/test_auxiliary_main_first.py
index 353c6c2dd..d756d6ffb 100644
--- a/tests/agent/test_auxiliary_main_first.py
+++ b/tests/agent/test_auxiliary_main_first.py
@@ -167,7 +167,7 @@ class TestResolveAutoMainFirst:
 
 
 class TestResolveVisionMainFirst:
-    """Vision auto-detection prefers main provider + main model first."""
+    """Vision auto-detection prefers the main provider first."""
 
     def test_openrouter_main_vision_uses_main_model(self, monkeypatch):
         """OpenRouter main with vision-capable model → aux vision uses main model."""
@@ -200,28 +200,49 @@ class TestResolveVisionMainFirst:
         assert mock_resolve.call_args.args[0] == "openrouter"
         assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"
 
-    def test_nous_main_vision_uses_main_model(self):
-        """Nous Portal main → aux vision uses main model, not free-tier MiMo-V2-Omni."""
+    def test_nous_main_vision_uses_paid_nous_vision_backend(self):
+        """Paid Nous main → aux vision uses the dedicated Nous vision backend."""
         with patch(
             "agent.auxiliary_client._read_main_provider", return_value="nous",
         ), patch(
             "agent.auxiliary_client._read_main_model",
             return_value="openai/gpt-5",
         ), patch(
-            "agent.auxiliary_client.resolve_provider_client"
-        ) as mock_resolve, patch(
             "agent.auxiliary_client._resolve_task_provider_model",
             return_value=("auto", None, None, None, None),
+        ), patch(
+            "agent.auxiliary_client._resolve_strict_vision_backend",
+            return_value=(MagicMock(), "google/gemini-3-flash-preview"),
         ):
-            mock_client = MagicMock()
-            mock_resolve.return_value = (mock_client, "openai/gpt-5")
-
             from agent.auxiliary_client import resolve_vision_provider_client
 
             provider, client, model = resolve_vision_provider_client()
 
         assert provider == "nous"
-        assert model == "openai/gpt-5"
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_nous_main_vision_uses_free_tier_nous_vision_backend(self):
+        """Free-tier Nous main → aux vision uses MiMo omni, not the text main model."""
+        with patch(
+            "agent.auxiliary_client._read_main_provider", return_value="nous",
+        ), patch(
+            "agent.auxiliary_client._read_main_model",
+            return_value="xiaomi/mimo-v2-pro",
+        ), patch(
+            "agent.auxiliary_client._resolve_task_provider_model",
+            return_value=("auto", None, None, None, None),
+        ), patch(
+            "agent.auxiliary_client._resolve_strict_vision_backend",
+            return_value=(MagicMock(), "xiaomi/mimo-v2-omni"),
+        ):
+            from agent.auxiliary_client import resolve_vision_provider_client
+
+            provider, client, model = resolve_vision_provider_client()
+
+        assert provider == "nous"
+        assert client is not None
+        assert model == "xiaomi/mimo-v2-omni"
 
     def test_exotic_provider_with_vision_override_preserved(self):
         """xiaomi → mimo-v2-omni override still wins over main_model."""
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 0c20dddcd..8072a58d9 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -253,6 +253,35 @@ class TestSummaryPrefixNormalization:
 
 
 class TestCompressWithClient:
+    def test_system_content_list_gets_compression_note_without_crashing(self):
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "summary text"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        msgs = [
+            {"role": "system", "content": [{"type": "text", "text": "system prompt"}]},
+            {"role": "user", "content": "msg 1"},
+            {"role": "assistant", "content": "msg 2"},
+            {"role": "user", "content": "msg 3"},
+            {"role": "assistant", "content": "msg 4"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "assistant", "content": "msg 6"},
+            {"role": "user", "content": "msg 7"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            result = c.compress(msgs)
+
+        assert isinstance(result[0]["content"], list)
+        assert any(
+            isinstance(block, dict)
+            and "compacted into a handoff summary" in block.get("text", "")
+            for block in result[0]["content"]
+        )
+
     def test_summarization_path(self):
         mock_client = MagicMock()
         mock_response = MagicMock()
@@ -460,6 +489,41 @@ class TestCompressWithClient:
         assert len(first_tail) == 1
         assert "summary text" in first_tail[0]["content"]
 
+    def test_double_collision_merges_summary_into_list_tail_content(self):
+        """Structured tail content should accept a merged summary without TypeError."""
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "summary text"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=3)
+
+        msgs = [
+            {"role": "system", "content": "system prompt"},
+            {"role": "user", "content": "msg 1"},
+            {"role": "assistant", "content": "msg 2"},
+            {"role": "user", "content": "msg 3"},
+            {"role": "assistant", "content": "msg 4"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "user", "content": [{"type": "text", "text": "msg 6"}]},
+            {"role": "assistant", "content": "msg 7"},
+            {"role": "user", "content": "msg 8"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            result = c.compress(msgs)
+
+        merged_tail = next(
+            m for m in result
+            if m.get("role") == "user" and isinstance(m.get("content"), list)
+        )
+        assert isinstance(merged_tail["content"], list)
+        assert "summary text" in merged_tail["content"][0]["text"]
+        assert any(
+            isinstance(block, dict) and block.get("text") == "msg 6"
+            for block in merged_tail["content"]
+        )
+
     def test_double_collision_user_head_assistant_tail(self):
         """Reverse double collision: head ends with 'user', tail starts with 'assistant'.
         summary='assistant' collides with tail, 'user' collides with head → merge."""
diff --git a/tests/agent/test_copilot_acp_client.py b/tests/agent/test_copilot_acp_client.py
new file mode 100644
index 000000000..52ad20a35
--- /dev/null
+++ b/tests/agent/test_copilot_acp_client.py
@@ -0,0 +1,146 @@
+"""Focused regressions for the Copilot ACP shim safety layer."""
+
+from __future__ import annotations
+
+import io
+import json
+import os
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+from agent.copilot_acp_client import CopilotACPClient
+
+
+class _FakeProcess:
+    def __init__(self) -> None:
+        self.stdin = io.StringIO()
+
+
+class CopilotACPClientSafetyTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.client = CopilotACPClient(acp_cwd="/tmp")
+
+    def _dispatch(self, message: dict, *, cwd: str) -> dict:
+        process = _FakeProcess()
+        handled = self.client._handle_server_message(
+            message,
+            process=process,
+            cwd=cwd,
+            text_parts=[],
+            reasoning_parts=[],
+        )
+        self.assertTrue(handled)
+        payload = process.stdin.getvalue().strip()
+        self.assertTrue(payload)
+        return json.loads(payload)
+
+    def test_request_permission_is_not_auto_allowed(self) -> None:
+        response = self._dispatch(
+            {
+                "jsonrpc": "2.0",
+                "id": 1,
+                "method": "session/request_permission",
+                "params": {},
+            },
+            cwd="/tmp",
+        )
+
+        outcome = (((response.get("result") or {}).get("outcome") or {}).get("outcome"))
+        self.assertEqual(outcome, "cancelled")
+
+    def test_read_text_file_blocks_internal_hermes_hub_files(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            home = Path(tmpdir) / "home"
+            blocked = home / ".hermes" / "skills" / ".hub" / "index-cache" / "entry.json"
+            blocked.parent.mkdir(parents=True, exist_ok=True)
+            blocked.write_text('{"token":"sk-test-secret-1234567890"}')
+
+            with patch.dict(
+                os.environ,
+                {"HOME": str(home), "HERMES_HOME": str(home / ".hermes")},
+                clear=False,
+            ):
+                response = self._dispatch(
+                    {
+                        "jsonrpc": "2.0",
+                        "id": 2,
+                        "method": "fs/read_text_file",
+                        "params": {"path": str(blocked)},
+                    },
+                    cwd=str(home),
+                )
+
+        self.assertIn("error", response)
+
+    def test_read_text_file_redacts_sensitive_content(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            secret_file = root / "config.env"
+            secret_file.write_text("OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012")
+
+            response = self._dispatch(
+                {
+                    "jsonrpc": "2.0",
+                    "id": 3,
+                    "method": "fs/read_text_file",
+                    "params": {"path": str(secret_file)},
+                },
+                cwd=str(root),
+            )
+
+        content = ((response.get("result") or {}).get("content") or "")
+        self.assertNotIn("abc123def456", content)
+        self.assertIn("OPENAI_API_KEY=", content)
+
+    def test_write_text_file_reuses_write_denylist(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            home = Path(tmpdir) / "home"
+            target = home / ".ssh" / "id_rsa"
+            target.parent.mkdir(parents=True, exist_ok=True)
+
+            with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True):
+                response = self._dispatch(
+                    {
+                        "jsonrpc": "2.0",
+                        "id": 4,
+                        "method": "fs/write_text_file",
+                        "params": {
+                            "path": str(target),
+                            "content": "fake-private-key",
+                        },
+                    },
+                    cwd=str(home),
+                )
+
+        self.assertIn("error", response)
+        self.assertFalse(target.exists())
+
+    def test_write_text_file_respects_safe_root(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            safe_root = root / "workspace"
+            safe_root.mkdir()
+            outside = root / "outside.txt"
+
+            with patch.dict(os.environ, {"HERMES_WRITE_SAFE_ROOT": str(safe_root)}, clear=False):
+                response = self._dispatch(
+                    {
+                        "jsonrpc": "2.0",
+                        "id": 5,
+                        "method": "fs/write_text_file",
+                        "params": {
+                            "path": str(outside),
+                            "content": "should-not-write",
+                        },
+                    },
+                    cwd=str(root),
+                )
+
+        self.assertIn("error", response)
+        self.assertFalse(outside.exists())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py
index be4775a4d..c8faffb0c 100644
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@@ -298,9 +298,15 @@ class TestClassifyApiError:
         assert result.retryable is False
 
     def test_404_generic(self):
+        # Generic 404 with no "model not found" signal — common for local
+        # llama.cpp/Ollama/vLLM endpoints with slightly wrong paths.  Treat
+        # as unknown (retryable) so the real error surfaces, rather than
+        # claiming the model is missing and silently falling back.
         e = MockAPIError("Not Found", status_code=404)
         result = classify_api_error(e)
-        assert result.reason == FailoverReason.model_not_found
+        assert result.reason == FailoverReason.unknown
+        assert result.retryable is True
+        assert result.should_fallback is False
 
     # ── Payload too large ──
 
@@ -943,3 +949,94 @@ class TestAdversarialEdgeCases:
         e = MockAPIError("server error", status_code=500, body={"message": None})
         result = classify_api_error(e)
         assert result is not None
+
+
+# ── Test: SSL/TLS transient errors ─────────────────────────────────────
+
+class TestSSLTransientPatterns:
+    """SSL/TLS alerts mid-stream should retry as timeout, not unknown, and
+    should NOT trigger context compression even on a large session.
+
+    Motivation: OpenSSL 3.x changed TLS alert error code format
+    (`SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
+    breaking string-exact matching in downstream retry logic.  We match
+    stable substrings instead.
+    """
+
+    def test_bad_record_mac_classifies_as_timeout(self):
+        """OpenSSL 3.x mid-stream bad record mac alert."""
+        e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac (_ssl.c:2580)")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_openssl_3x_format_classifies_as_timeout(self):
+        """New format `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC` still matches
+        because we key on both space- and underscore-separated forms of
+        the stable `bad_record_mac` token."""
+        e = Exception("ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC during streaming")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_tls_alert_internal_error_classifies_as_timeout(self):
+        e = Exception("[SSL: TLSV1_ALERT_INTERNAL_ERROR] tlsv1 alert internal error")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_ssl_handshake_failure_classifies_as_timeout(self):
+        e = Exception("ssl handshake failure during mid-stream")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+
+    def test_ssl_prefix_classifies_as_timeout(self):
+        """Python's generic '[SSL: XYZ]' prefix from the ssl module."""
+        e = Exception("[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+
+    def test_ssl_alert_on_large_session_does_not_compress(self):
+        """Critical: SSL alerts on big contexts must NOT trigger context
+        compression — compression is expensive and won't fix a transport
+        hiccup.  This is why _SSL_TRANSIENT_PATTERNS is separate from
+        _SERVER_DISCONNECT_PATTERNS.
+        """
+        e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac")
+        result = classify_api_error(
+            e,
+            approx_tokens=180000,      # 90% of a 200k-context window
+            context_length=200000,
+            num_messages=300,
+        )
+        assert result.reason == FailoverReason.timeout
+        assert result.should_compress is False
+
+    def test_plain_disconnect_on_large_session_still_compresses(self):
+        """Regression guard: the context-overflow-via-disconnect path
+        (non-SSL disconnects on large sessions) must still trigger
+        compression.  Only SSL-specific disconnects skip it.
+        """
+        e = Exception("Server disconnected without sending a response")
+        result = classify_api_error(
+            e,
+            approx_tokens=180000,
+            context_length=200000,
+            num_messages=300,
+        )
+        assert result.reason == FailoverReason.context_overflow
+        assert result.should_compress is True
+
+    def test_real_ssl_error_type_classifies_as_timeout(self):
+        """Real ssl.SSLError instance — the type name alone (not message)
+        should route to the transport bucket."""
+        import ssl
+        e = ssl.SSLError("arbitrary ssl error")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
diff --git a/tests/agent/test_image_gen_registry.py b/tests/agent/test_image_gen_registry.py
new file mode 100644
index 000000000..7b492395c
--- /dev/null
+++ b/tests/agent/test_image_gen_registry.py
@@ -0,0 +1,111 @@
+"""Tests for agent/image_gen_registry.py — provider registration & active lookup."""
+
+from __future__ import annotations
+
+import pytest
+
+from agent import image_gen_registry
+from agent.image_gen_provider import ImageGenProvider
+
+
+class _FakeProvider(ImageGenProvider):
+    def __init__(self, name: str, available: bool = True):
+        self._name = name
+        self._available = available
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def is_available(self) -> bool:
+        return self._available
+
+    def generate(self, prompt, aspect_ratio="landscape", **kw):
+        return {"success": True, "image": f"{self._name}://{prompt}"}
+
+
+@pytest.fixture(autouse=True)
+def _reset_registry():
+    image_gen_registry._reset_for_tests()
+    yield
+    image_gen_registry._reset_for_tests()
+
+
+class TestRegisterProvider:
+    def test_register_and_lookup(self):
+        provider = _FakeProvider("fake")
+        image_gen_registry.register_provider(provider)
+        assert image_gen_registry.get_provider("fake") is provider
+
+    def test_rejects_non_provider(self):
+        with pytest.raises(TypeError):
+            image_gen_registry.register_provider("not a provider")  # type: ignore[arg-type]
+
+    def test_rejects_empty_name(self):
+        class Empty(ImageGenProvider):
+            @property
+            def name(self) -> str:
+                return ""
+
+            def generate(self, prompt, aspect_ratio="landscape", **kw):
+                return {}
+
+        with pytest.raises(ValueError):
+            image_gen_registry.register_provider(Empty())
+
+    def test_reregister_overwrites(self):
+        a = _FakeProvider("same")
+        b = _FakeProvider("same")
+        image_gen_registry.register_provider(a)
+        image_gen_registry.register_provider(b)
+        assert image_gen_registry.get_provider("same") is b
+
+    def test_list_is_sorted(self):
+        image_gen_registry.register_provider(_FakeProvider("zeta"))
+        image_gen_registry.register_provider(_FakeProvider("alpha"))
+        names = [p.name for p in image_gen_registry.list_providers()]
+        assert names == ["alpha", "zeta"]
+
+
+class TestGetActiveProvider:
+    def test_single_provider_autoresolves(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        image_gen_registry.register_provider(_FakeProvider("solo"))
+        active = image_gen_registry.get_active_provider()
+        assert active is not None and active.name == "solo"
+
+    def test_fal_preferred_on_multi_without_config(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        image_gen_registry.register_provider(_FakeProvider("openai"))
+        active = image_gen_registry.get_active_provider()
+        assert active is not None and active.name == "fal"
+
+    def test_explicit_config_wins(self, tmp_path, monkeypatch):
+        import yaml
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"provider": "openai"}})
+        )
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        image_gen_registry.register_provider(_FakeProvider("openai"))
+        active = image_gen_registry.get_active_provider()
+        assert active is not None and active.name == "openai"
+
+    def test_missing_configured_provider_falls_back(self, tmp_path, monkeypatch):
+        import yaml
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"provider": "replicate"}})
+        )
+        # Only FAL is registered — configured provider doesn't exist
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        active = image_gen_registry.get_active_provider()
+        # Falls back to FAL preference (legacy default) rather than None
+        assert active is not None and active.name == "fal"
+
+    def test_none_when_empty(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        assert image_gen_registry.get_active_provider() is None
diff --git a/tests/agent/test_kimi_coding_anthropic_thinking.py b/tests/agent/test_kimi_coding_anthropic_thinking.py
new file mode 100644
index 000000000..706f7e0e1
--- /dev/null
+++ b/tests/agent/test_kimi_coding_anthropic_thinking.py
@@ -0,0 +1,115 @@
+"""Regression guard: don't send Anthropic ``thinking`` to Kimi's /coding endpoint.
+
+Kimi's ``api.kimi.com/coding`` endpoint speaks the Anthropic Messages protocol
+but has its own thinking semantics.  When ``thinking.enabled`` is present in
+the request, Kimi validates the message history and requires every prior
+assistant tool-call message to carry OpenAI-style ``reasoning_content``.
+
+The Anthropic path never populates that field, and
+``convert_messages_to_anthropic`` strips Anthropic thinking blocks on
+third-party endpoints — so after one turn with tool calls the next request
+fails with HTTP 400::
+
+    thinking is enabled but reasoning_content is missing in assistant
+    tool call message at index N
+
+Kimi on the chat_completions route handles ``thinking`` via ``extra_body`` in
+``ChatCompletionsTransport`` (#13503).  On the Anthropic route the right
+thing to do is drop the parameter entirely and let Kimi drive reasoning
+server-side.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+class TestKimiCodingSkipsAnthropicThinking:
+    """build_anthropic_kwargs must not inject ``thinking`` for Kimi /coding."""
+
+    @pytest.mark.parametrize(
+        "base_url",
+        [
+            "https://api.kimi.com/coding",
+            "https://api.kimi.com/coding/v1",
+            "https://api.kimi.com/coding/anthropic",
+            "https://api.kimi.com/coding/",
+        ],
+    )
+    def test_kimi_coding_endpoint_omits_thinking(self, base_url: str) -> None:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url=base_url,
+        )
+        assert "thinking" not in kwargs, (
+            "Anthropic thinking must not be sent to Kimi /coding — "
+            "endpoint requires reasoning_content on history we don't preserve."
+        )
+        assert "output_config" not in kwargs
+
+    def test_kimi_coding_with_explicit_disabled_also_omits(self) -> None:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": False},
+            base_url="https://api.kimi.com/coding",
+        )
+        assert "thinking" not in kwargs
+
+    def test_non_kimi_third_party_still_gets_thinking(self) -> None:
+        """MiniMax and other third-party Anthropic endpoints must retain thinking."""
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="MiniMax-M2.7",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url="https://api.minimax.io/anthropic",
+        )
+        assert "thinking" in kwargs
+        assert kwargs["thinking"]["type"] == "enabled"
+
+    def test_native_anthropic_still_gets_thinking(self) -> None:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="claude-sonnet-4-20250514",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url=None,
+        )
+        assert "thinking" in kwargs
+
+    def test_kimi_root_endpoint_unaffected(self) -> None:
+        """Only the /coding route is special-cased — plain api.kimi.com is not.
+
+        ``api.kimi.com`` without ``/coding`` uses the chat_completions transport
+        (see runtime_provider._detect_api_mode_for_url); build_anthropic_kwargs
+        should never see it, but if it somehow does we should not suppress
+        thinking there — that path has different semantics.
+        """
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url="https://api.kimi.com/v1",
+        )
+        assert "thinking" in kwargs
diff --git a/tests/agent/test_local_stream_timeout.py b/tests/agent/test_local_stream_timeout.py
index 8184dd2d4..0252633f3 100644
--- a/tests/agent/test_local_stream_timeout.py
+++ b/tests/agent/test_local_stream_timeout.py
@@ -106,3 +106,25 @@ class TestIsLocalEndpoint:
     ])
     def test_remote_endpoints(self, url):
         assert is_local_endpoint(url) is False
+
+    @pytest.mark.parametrize("url", [
+        "http://100.64.0.0:11434",            # lower bound of CGNAT block
+        "http://100.64.0.1:11434/v1",         # lower bound +1
+        "http://100.77.243.5:11434",          # representative Tailscale host
+        "https://100.100.100.100:443",        # Tailscale MagicDNS anchor
+        "https://100.127.255.254:443",        # upper bound -1
+        "http://100.127.255.255:11434",       # upper bound of CGNAT block
+    ])
+    def test_tailscale_cgnat_is_local(self, url):
+        """Tailscale 100.64.0.0/10 should be treated as local for timeout bumps."""
+        assert is_local_endpoint(url) is True
+
+    @pytest.mark.parametrize("url", [
+        "http://100.63.255.255:11434",        # just below CGNAT block
+        "http://100.128.0.1:11434",           # just above CGNAT block
+        "http://100.200.0.1:11434",           # well outside CGNAT
+        "http://99.64.0.1:11434",             # first octet wrong
+    ])
+    def test_near_but_not_cgnat_is_remote(self, url):
+        """Hosts adjacent to but outside 100.64.0.0/10 must not match."""
+        assert is_local_endpoint(url) is False
diff --git a/tests/agent/test_memory_user_id.py b/tests/agent/test_memory_user_id.py
index d33753bd2..7b60b05dd 100644
--- a/tests/agent/test_memory_user_id.py
+++ b/tests/agent/test_memory_user_id.py
@@ -79,6 +79,28 @@ class TestMemoryManagerUserIdThreading:
         assert p._init_kwargs.get("platform") == "telegram"
         assert p._init_session_id == "sess-123"
 
+    def test_chat_context_forwarded_to_provider(self):
+        mgr = MemoryManager()
+        p = RecordingProvider()
+        mgr.add_provider(p)
+
+        mgr.initialize_all(
+            session_id="sess-chat",
+            platform="discord",
+            user_id="discord_u_7",
+            user_name="fakeusername",
+            chat_id="1485316232612941897",
+            chat_name="fakeassistantname-forums",
+            chat_type="thread",
+            thread_id="1491249007475949698",
+        )
+
+        assert p._init_kwargs.get("user_name") == "fakeusername"
+        assert p._init_kwargs.get("chat_id") == "1485316232612941897"
+        assert p._init_kwargs.get("chat_name") == "fakeassistantname-forums"
+        assert p._init_kwargs.get("chat_type") == "thread"
+        assert p._init_kwargs.get("thread_id") == "1491249007475949698"
+
     def test_no_user_id_when_cli(self):
         """CLI sessions should not have user_id in kwargs."""
         mgr = MemoryManager()
@@ -334,3 +356,4 @@ class TestAIAgentUserIdPropagation:
             agent = object.__new__(AIAgent)
             agent._user_id = None
             assert agent._user_id is None
+
diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py
index 6a0eab151..45e716022 100644
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@@ -385,6 +385,7 @@ class TestStripProviderPrefix:
         assert _strip_provider_prefix("local:my-model") == "my-model"
         assert _strip_provider_prefix("openrouter:anthropic/claude-sonnet-4") == "anthropic/claude-sonnet-4"
         assert _strip_provider_prefix("anthropic:claude-sonnet-4") == "claude-sonnet-4"
+        assert _strip_provider_prefix("stepfun:step-3.5-flash") == "step-3.5-flash"
 
     def test_ollama_model_tag_preserved(self):
         """Ollama model:tag format must NOT be stripped."""
diff --git a/tests/agent/test_models_dev.py b/tests/agent/test_models_dev.py
index be4b3b139..c2a214018 100644
--- a/tests/agent/test_models_dev.py
+++ b/tests/agent/test_models_dev.py
@@ -82,6 +82,7 @@ class TestProviderMapping:
     def test_known_providers_mapped(self):
         assert PROVIDER_TO_MODELS_DEV["anthropic"] == "anthropic"
         assert PROVIDER_TO_MODELS_DEV["copilot"] == "github-copilot"
+        assert PROVIDER_TO_MODELS_DEV["stepfun"] == "stepfun"
         assert PROVIDER_TO_MODELS_DEV["kilocode"] == "kilo"
         assert PROVIDER_TO_MODELS_DEV["ai-gateway"] == "vercel"
 
diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py
index 096206031..11712b951 100644
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@@ -789,6 +789,24 @@ class TestPromptBuilderConstants:
         assert "cron" in PLATFORM_HINTS
         assert "cli" in PLATFORM_HINTS
 
+    def test_cli_hint_does_not_suggest_media_tags(self):
+        # Regression: MEDIA:/path tags are intercepted only by messaging
+        # gateway platforms. On the CLI they render as literal text and
+        # confuse users. The CLI hint must steer the agent away from them.
+        cli_hint = PLATFORM_HINTS["cli"]
+        assert "MEDIA:" in cli_hint, (
+            "CLI hint should mention MEDIA: in order to tell the agent "
+            "NOT to use it (negative guidance)."
+        )
+        # Must contain explicit "don't" language near the MEDIA reference.
+        assert any(
+            marker in cli_hint.lower()
+            for marker in ("do not emit media", "not intercepted", "do not", "don't")
+        ), "CLI hint should explicitly discourage MEDIA: tags."
+        # Messaging hints should still advertise MEDIA: positively (sanity
+        # check that this test is calibrated correctly).
+        assert "include MEDIA:" in PLATFORM_HINTS["telegram"]
+
 
 # =========================================================================
 # Environment hints
diff --git a/tests/agent/test_proxy_and_url_validation.py b/tests/agent/test_proxy_and_url_validation.py
index 4fd6138a4..7d7268ed1 100644
--- a/tests/agent/test_proxy_and_url_validation.py
+++ b/tests/agent/test_proxy_and_url_validation.py
@@ -6,6 +6,8 @@ when proxy env vars or custom endpoint URLs are malformed.
 """
 from __future__ import annotations
 
+import os
+
 import pytest
 
 from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
@@ -31,6 +33,12 @@ def test_proxy_env_accepts_empty(monkeypatch):
     _validate_proxy_env_urls()  # should not raise
 
 
+def test_proxy_env_normalizes_socks_alias(monkeypatch):
+    monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
+    _validate_proxy_env_urls()
+    assert os.environ["ALL_PROXY"] == "socks5://127.0.0.1:1080/"
+
+
 @pytest.mark.parametrize("key", [
     "HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY",
     "http_proxy", "https_proxy", "all_proxy",
diff --git a/tests/agent/test_skill_commands.py b/tests/agent/test_skill_commands.py
index 57ac7d6b5..e399db619 100644
--- a/tests/agent/test_skill_commands.py
+++ b/tests/agent/test_skill_commands.py
@@ -405,3 +405,191 @@ class TestPlanSkillHelpers:
         assert "Add a /plan command" in msg
         assert ".hermes/plans/plan.md" in msg
         assert "Runtime note:" in msg
+
+
+class TestSkillDirectoryHeader:
+    """The activation message must expose the absolute skill directory and
+    explain how to resolve relative paths, so skills with bundled scripts
+    don't force the agent into a second ``skill_view()`` round-trip."""
+
+    def test_header_contains_absolute_skill_dir(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            skill_dir = _make_skill(tmp_path, "abs-dir-skill")
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/abs-dir-skill", "go")
+
+        assert msg is not None
+        assert f"[Skill directory: {skill_dir}]" in msg
+        assert "Resolve any relative paths" in msg
+
+    def test_supporting_files_shown_with_absolute_paths(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            skill_dir = _make_skill(tmp_path, "scripted-skill")
+            (skill_dir / "scripts").mkdir()
+            (skill_dir / "scripts" / "run.js").write_text("console.log('hi')")
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/scripted-skill")
+
+        assert msg is not None
+        # The supporting-files block must emit both the relative form (so the
+        # agent can call skill_view on it) and the absolute form (so it can
+        # run the script directly via terminal).
+        assert "scripts/run.js" in msg
+        assert str(skill_dir / "scripts" / "run.js") in msg
+        assert f"node {skill_dir}/scripts/foo.js" in msg
+
+
+class TestTemplateVarSubstitution:
+    """``${HERMES_SKILL_DIR}`` and ``${HERMES_SESSION_ID}`` in SKILL.md body
+    are replaced before the agent sees the content."""
+
+    def test_substitutes_skill_dir(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            skill_dir = _make_skill(
+                tmp_path,
+                "templated",
+                body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/templated")
+
+        assert msg is not None
+        assert f"node {skill_dir}/scripts/foo.js" in msg
+        # The literal template token must not leak through.
+        assert "${HERMES_SKILL_DIR}" not in msg.split("[Skill directory:")[0]
+
+    def test_substitutes_session_id_when_available(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(
+                tmp_path,
+                "sess-templated",
+                body="Session: ${HERMES_SESSION_ID}",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message(
+                "/sess-templated", task_id="abc-123"
+            )
+
+        assert msg is not None
+        assert "Session: abc-123" in msg
+
+    def test_leaves_session_id_token_when_missing(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(
+                tmp_path,
+                "sess-missing",
+                body="Session: ${HERMES_SESSION_ID}",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/sess-missing", task_id=None)
+
+        assert msg is not None
+        # No session — token left intact so the author can spot it.
+        assert "Session: ${HERMES_SESSION_ID}" in msg
+
+    def test_disable_template_vars_via_config(self, tmp_path):
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_commands._load_skills_config",
+                return_value={"template_vars": False},
+            ),
+        ):
+            _make_skill(
+                tmp_path,
+                "no-sub",
+                body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/no-sub")
+
+        assert msg is not None
+        # Template token must survive when substitution is disabled.
+        assert "${HERMES_SKILL_DIR}/scripts/foo.js" in msg
+
+
+class TestInlineShellExpansion:
+    """Inline ``!`cmd`` snippets in SKILL.md run before the agent sees the
+    content — but only when the user has opted in via config."""
+
+    def test_inline_shell_is_off_by_default(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(
+                tmp_path,
+                "dyn-default-off",
+                body="Today is !`echo INLINE_RAN`.",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/dyn-default-off")
+
+        assert msg is not None
+        # Default config has inline_shell=False — snippet must stay literal.
+        assert "!`echo INLINE_RAN`" in msg
+        assert "Today is INLINE_RAN." not in msg
+
+    def test_inline_shell_runs_when_enabled(self, tmp_path):
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_commands._load_skills_config",
+                return_value={"template_vars": True, "inline_shell": True,
+                              "inline_shell_timeout": 5},
+            ),
+        ):
+            _make_skill(
+                tmp_path,
+                "dyn-on",
+                body="Marker: !`echo INLINE_RAN`.",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/dyn-on")
+
+        assert msg is not None
+        assert "Marker: INLINE_RAN." in msg
+        assert "!`echo INLINE_RAN`" not in msg
+
+    def test_inline_shell_runs_in_skill_directory(self, tmp_path):
+        """Inline snippets get the skill dir as CWD so relative paths work."""
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_commands._load_skills_config",
+                return_value={"template_vars": True, "inline_shell": True,
+                              "inline_shell_timeout": 5},
+            ),
+        ):
+            skill_dir = _make_skill(
+                tmp_path,
+                "dyn-cwd",
+                body="Here: !`pwd`",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/dyn-cwd")
+
+        assert msg is not None
+        assert f"Here: {skill_dir}" in msg
+
+    def test_inline_shell_timeout_does_not_break_message(self, tmp_path):
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_commands._load_skills_config",
+                return_value={"template_vars": True, "inline_shell": True,
+                              "inline_shell_timeout": 1},
+            ),
+        ):
+            _make_skill(
+                tmp_path,
+                "dyn-slow",
+                body="Slow: !`sleep 5 && printf DYN_MARKER`",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/dyn-slow")
+
+        assert msg is not None
+        # Timeout is surfaced as a marker instead of propagating as an error,
+        # and the rest of the skill message still renders.
+        assert "inline-shell timeout" in msg
+        # The command's intended stdout never made it through — only the
+        # timeout marker (which echoes the command text) survives.
+        assert "DYN_MARKER" not in msg.replace("sleep 5 && printf DYN_MARKER", "")
diff --git a/tests/agent/test_subagent_progress.py b/tests/agent/test_subagent_progress.py
index 88b2e3790..953f26a69 100644
--- a/tests/agent/test_subagent_progress.py
+++ b/tests/agent/test_subagent_progress.py
@@ -193,7 +193,7 @@ class TestBuildChildProgressCallback:
         
         # task_index=0 in a batch of 3 → prefix "[1]"
         cb0 = _build_child_progress_callback(0, "test goal", parent, task_count=3)
-        cb0("web_search", "test")
+        cb0("tool.started", "web_search", "test", {})
         output = buf.getvalue()
         assert "[1]" in output
 
@@ -201,7 +201,7 @@ class TestBuildChildProgressCallback:
         buf.truncate(0)
         buf.seek(0)
         cb2 = _build_child_progress_callback(2, "test goal", parent, task_count=3)
-        cb2("web_search", "test")
+        cb2("tool.started", "web_search", "test", {})
         output = buf.getvalue()
         assert "[3]" in output
 
diff --git a/tests/agent/test_usage_pricing.py b/tests/agent/test_usage_pricing.py
index a65668bb4..5daace97d 100644
--- a/tests/agent/test_usage_pricing.py
+++ b/tests/agent/test_usage_pricing.py
@@ -39,6 +39,73 @@ def test_normalize_usage_openai_subtracts_cached_prompt_tokens():
     assert normalized.output_tokens == 700
 
 
+def test_normalize_usage_openai_reads_top_level_anthropic_cache_fields():
+    """Some OpenAI-compatible proxies (OpenRouter, Vercel AI Gateway, Cline) expose
+    Anthropic-style cache token counts at the top level of the usage object when
+    routing Claude models, instead of nesting them in prompt_tokens_details.
+
+    Regression guard for the bug fixed in cline/cline#10266 — before this fix,
+    the chat-completions branch of normalize_usage() only read
+    prompt_tokens_details.cache_write_tokens and completely missed the
+    cache_creation_input_tokens case, so cache writes showed as 0 and reflected
+    inputTokens were overstated by the cache-write amount.
+    """
+    usage = SimpleNamespace(
+        prompt_tokens=1000,
+        completion_tokens=200,
+        prompt_tokens_details=SimpleNamespace(cached_tokens=500),
+        cache_creation_input_tokens=300,
+    )
+
+    normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
+
+    # Expected: cache read from prompt_tokens_details.cached_tokens (preferred),
+    # cache write from top-level cache_creation_input_tokens (fallback).
+    assert normalized.cache_read_tokens == 500
+    assert normalized.cache_write_tokens == 300
+    # input_tokens = prompt_total - cache_read - cache_write = 1000 - 500 - 300 = 200
+    assert normalized.input_tokens == 200
+    assert normalized.output_tokens == 200
+
+
+def test_normalize_usage_openai_reads_top_level_cache_read_when_details_missing():
+    """Some proxies expose only top-level Anthropic-style fields with no
+    prompt_tokens_details object. Regression guard for cline/cline#10266.
+    """
+    usage = SimpleNamespace(
+        prompt_tokens=1000,
+        completion_tokens=200,
+        cache_read_input_tokens=500,
+        cache_creation_input_tokens=300,
+    )
+
+    normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
+
+    assert normalized.cache_read_tokens == 500
+    assert normalized.cache_write_tokens == 300
+    assert normalized.input_tokens == 200
+
+
+def test_normalize_usage_openai_prefers_prompt_tokens_details_over_top_level():
+    """When both prompt_tokens_details and top-level Anthropic fields are
+    present, we prefer the OpenAI-standard nested fields. Top-level Anthropic
+    fields are only a fallback when the nested ones are absent/zero.
+    """
+    usage = SimpleNamespace(
+        prompt_tokens=1000,
+        completion_tokens=200,
+        prompt_tokens_details=SimpleNamespace(cached_tokens=600, cache_write_tokens=150),
+        # Intentionally different values — proving we ignore these when details exist.
+        cache_read_input_tokens=999,
+        cache_creation_input_tokens=999,
+    )
+
+    normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
+
+    assert normalized.cache_read_tokens == 600
+    assert normalized.cache_write_tokens == 150
+
+
 def test_openrouter_models_api_pricing_is_converted_from_per_token_to_per_million(monkeypatch):
     monkeypatch.setattr(
         "agent.usage_pricing.fetch_model_metadata",
diff --git a/tests/agent/transports/test_bedrock_transport.py b/tests/agent/transports/test_bedrock_transport.py
new file mode 100644
index 000000000..f9d78a31c
--- /dev/null
+++ b/tests/agent/transports/test_bedrock_transport.py
@@ -0,0 +1,164 @@
+"""Tests for the BedrockTransport."""
+
+import json
+import pytest
+from types import SimpleNamespace
+
+from agent.transports import get_transport
+from agent.transports.types import NormalizedResponse, ToolCall
+
+
+@pytest.fixture
+def transport():
+    import agent.transports.bedrock  # noqa: F401
+    return get_transport("bedrock_converse")
+
+
+class TestBedrockBasic:
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "bedrock_converse"
+
+    def test_registered(self, transport):
+        assert transport is not None
+
+
+class TestBedrockBuildKwargs:
+
+    def test_basic_kwargs(self, transport):
+        msgs = [{"role": "user", "content": "Hello"}]
+        kw = transport.build_kwargs(model="anthropic.claude-3-5-sonnet-20241022-v2:0", messages=msgs)
+        assert kw["modelId"] == "anthropic.claude-3-5-sonnet-20241022-v2:0"
+        assert kw["__bedrock_converse__"] is True
+        assert kw["__bedrock_region__"] == "us-east-1"
+        assert "messages" in kw
+
+    def test_custom_region(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="anthropic.claude-3-5-sonnet-20241022-v2:0",
+            messages=msgs,
+            region="eu-west-1",
+        )
+        assert kw["__bedrock_region__"] == "eu-west-1"
+
+    def test_max_tokens(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="anthropic.claude-3-5-sonnet-20241022-v2:0",
+            messages=msgs,
+            max_tokens=8192,
+        )
+        assert kw["inferenceConfig"]["maxTokens"] == 8192
+
+
+class TestBedrockConvertTools:
+
+    def test_convert_tools(self, transport):
+        tools = [{
+            "type": "function",
+            "function": {
+                "name": "terminal",
+                "description": "Run commands",
+                "parameters": {"type": "object", "properties": {"command": {"type": "string"}}},
+            }
+        }]
+        result = transport.convert_tools(tools)
+        assert len(result) == 1
+        assert result[0]["toolSpec"]["name"] == "terminal"
+
+
+class TestBedrockValidate:
+
+    def test_none(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_raw_dict_valid(self, transport):
+        assert transport.validate_response({"output": {"message": {}}}) is True
+
+    def test_raw_dict_invalid(self, transport):
+        assert transport.validate_response({"error": "fail"}) is False
+
+    def test_normalized_valid(self, transport):
+        r = SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content="hi"))])
+        assert transport.validate_response(r) is True
+
+
+class TestBedrockMapFinishReason:
+
+    def test_end_turn(self, transport):
+        assert transport.map_finish_reason("end_turn") == "stop"
+
+    def test_tool_use(self, transport):
+        assert transport.map_finish_reason("tool_use") == "tool_calls"
+
+    def test_max_tokens(self, transport):
+        assert transport.map_finish_reason("max_tokens") == "length"
+
+    def test_guardrail(self, transport):
+        assert transport.map_finish_reason("guardrail_intervened") == "content_filter"
+
+    def test_unknown(self, transport):
+        assert transport.map_finish_reason("unknown") == "stop"
+
+
+class TestBedrockNormalize:
+
+    def _make_bedrock_response(self, text="Hello", tool_calls=None, stop_reason="end_turn"):
+        """Build a raw Bedrock converse response dict."""
+        content = []
+        if text:
+            content.append({"text": text})
+        if tool_calls:
+            for tc in tool_calls:
+                content.append({
+                    "toolUse": {
+                        "toolUseId": tc["id"],
+                        "name": tc["name"],
+                        "input": tc["input"],
+                    }
+                })
+        return {
+            "output": {"message": {"role": "assistant", "content": content}},
+            "stopReason": stop_reason,
+            "usage": {"inputTokens": 10, "outputTokens": 5, "totalTokens": 15},
+        }
+
+    def test_text_response(self, transport):
+        raw = self._make_bedrock_response(text="Hello world")
+        nr = transport.normalize_response(raw)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello world"
+        assert nr.finish_reason == "stop"
+
+    def test_tool_call_response(self, transport):
+        raw = self._make_bedrock_response(
+            text=None,
+            tool_calls=[{"id": "tool_1", "name": "terminal", "input": {"command": "ls"}}],
+            stop_reason="tool_use",
+        )
+        nr = transport.normalize_response(raw)
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        assert nr.tool_calls[0].name == "terminal"
+
+    def test_already_normalized_response(self, transport):
+        """Test normalize_response handles already-normalized SimpleNamespace (from dispatch site)."""
+        pre_normalized = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(
+                    content="Hello from Bedrock",
+                    tool_calls=None,
+                    reasoning=None,
+                    reasoning_content=None,
+                ),
+                finish_reason="stop",
+            )],
+            usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15),
+        )
+        nr = transport.normalize_response(pre_normalized)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello from Bedrock"
+        assert nr.finish_reason == "stop"
+        assert nr.usage is not None
+        assert nr.usage.prompt_tokens == 10
diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py
new file mode 100644
index 000000000..b44eafd45
--- /dev/null
+++ b/tests/agent/transports/test_chat_completions.py
@@ -0,0 +1,349 @@
+"""Tests for the ChatCompletionsTransport."""
+
+import pytest
+from types import SimpleNamespace
+
+from agent.transports import get_transport
+from agent.transports.types import NormalizedResponse, ToolCall
+
+
+@pytest.fixture
+def transport():
+    import agent.transports.chat_completions  # noqa: F401
+    return get_transport("chat_completions")
+
+
+class TestChatCompletionsBasic:
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "chat_completions"
+
+    def test_registered(self, transport):
+        assert transport is not None
+
+    def test_convert_tools_identity(self, transport):
+        tools = [{"type": "function", "function": {"name": "test", "parameters": {}}}]
+        assert transport.convert_tools(tools) is tools
+
+    def test_convert_messages_no_codex_leaks(self, transport):
+        msgs = [{"role": "user", "content": "hi"}]
+        result = transport.convert_messages(msgs)
+        assert result is msgs  # no copy needed
+
+    def test_convert_messages_strips_codex_fields(self, transport):
+        msgs = [
+            {"role": "assistant", "content": "ok", "codex_reasoning_items": [{"id": "rs_1"}],
+             "tool_calls": [{"id": "call_1", "call_id": "call_1", "response_item_id": "fc_1",
+                            "type": "function", "function": {"name": "t", "arguments": "{}"}}]},
+        ]
+        result = transport.convert_messages(msgs)
+        assert "codex_reasoning_items" not in result[0]
+        assert "call_id" not in result[0]["tool_calls"][0]
+        assert "response_item_id" not in result[0]["tool_calls"][0]
+        # Original list untouched (deepcopy-on-demand)
+        assert "codex_reasoning_items" in msgs[0]
+
+
+class TestChatCompletionsBuildKwargs:
+
+    def test_basic_kwargs(self, transport):
+        msgs = [{"role": "user", "content": "Hello"}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, timeout=30.0)
+        assert kw["model"] == "gpt-4o"
+        assert kw["messages"][0]["content"] == "Hello"
+        assert kw["timeout"] == 30.0
+
+    def test_developer_role_swap(self, transport):
+        msgs = [{"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-5.4", messages=msgs, model_lower="gpt-5.4")
+        assert kw["messages"][0]["role"] == "developer"
+
+    def test_no_developer_swap_for_non_gpt5(self, transport):
+        msgs = [{"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="claude-sonnet-4", messages=msgs, model_lower="claude-sonnet-4")
+        assert kw["messages"][0]["role"] == "system"
+
+    def test_tools_included(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        tools = [{"type": "function", "function": {"name": "test", "parameters": {}}}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, tools=tools)
+        assert kw["tools"] == tools
+
+    def test_openrouter_provider_prefs(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            is_openrouter=True,
+            provider_preferences={"only": ["openai"]},
+        )
+        assert kw["extra_body"]["provider"] == {"only": ["openai"]}
+
+    def test_nous_tags(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, is_nous=True)
+        assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
+
+    def test_reasoning_default(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            supports_reasoning=True,
+        )
+        assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"}
+
+    def test_nous_omits_disabled_reasoning(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            supports_reasoning=True,
+            is_nous=True,
+            reasoning_config={"enabled": False},
+        )
+        # Nous rejects enabled=false; reasoning omitted entirely
+        assert "reasoning" not in kw.get("extra_body", {})
+
+    def test_ollama_num_ctx(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="llama3", messages=msgs,
+            ollama_num_ctx=32768,
+        )
+        assert kw["extra_body"]["options"]["num_ctx"] == 32768
+
+    def test_custom_think_false(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="qwen3", messages=msgs,
+            is_custom_provider=True,
+            reasoning_config={"effort": "none"},
+        )
+        assert kw["extra_body"]["think"] is False
+
+    def test_max_tokens_with_fn(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            max_tokens=4096,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["max_tokens"] == 4096
+
+    def test_ephemeral_overrides_max_tokens(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            max_tokens=4096,
+            ephemeral_max_output_tokens=2048,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["max_tokens"] == 2048
+
+    def test_nvidia_default_max_tokens(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="glm-4.7", messages=msgs,
+            is_nvidia_nim=True,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # NVIDIA default: 16384
+        assert kw["max_tokens"] == 16384
+
+    def test_qwen_default_max_tokens(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="qwen3-coder-plus", messages=msgs,
+            is_qwen_portal=True,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # Qwen default: 65536
+        assert kw["max_tokens"] == 65536
+
+    def test_anthropic_max_output_for_claude_on_aggregator(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="anthropic/claude-sonnet-4.6", messages=msgs,
+            is_openrouter=True,
+            anthropic_max_output=64000,
+        )
+        # Set as plain max_tokens (not via fn) because the aggregator proxies to
+        # Anthropic Messages API which requires the field.
+        assert kw["max_tokens"] == 64000
+
+    def test_request_overrides_last(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            request_overrides={"service_tier": "priority"},
+        )
+        assert kw["service_tier"] == "priority"
+
+    def test_fixed_temperature(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, fixed_temperature=0.6)
+        assert kw["temperature"] == 0.6
+
+    def test_omit_temperature(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, omit_temperature=True, fixed_temperature=0.5)
+        # omit wins
+        assert "temperature" not in kw
+
+
+class TestChatCompletionsKimi:
+    """Regression tests for the Kimi/Moonshot quirks migrated into the transport."""
+
+    def test_kimi_max_tokens_default(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # Kimi CLI default: 32000
+        assert kw["max_tokens"] == 32000
+
+    def test_kimi_reasoning_effort_top_level(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            reasoning_config={"effort": "high"},
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # Kimi requires reasoning_effort as a top-level parameter
+        assert kw["reasoning_effort"] == "high"
+
+    def test_kimi_reasoning_effort_omitted_when_thinking_disabled(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            reasoning_config={"enabled": False},
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # Mirror Kimi CLI: omit reasoning_effort entirely when thinking off
+        assert "reasoning_effort" not in kw
+
+    def test_kimi_thinking_enabled_extra_body(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["extra_body"]["thinking"] == {"type": "enabled"}
+
+    def test_kimi_thinking_disabled_extra_body(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            reasoning_config={"enabled": False},
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["extra_body"]["thinking"] == {"type": "disabled"}
+
+
+class TestChatCompletionsValidate:
+
+    def test_none(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_no_choices(self, transport):
+        r = SimpleNamespace(choices=None)
+        assert transport.validate_response(r) is False
+
+    def test_empty_choices(self, transport):
+        r = SimpleNamespace(choices=[])
+        assert transport.validate_response(r) is False
+
+    def test_valid(self, transport):
+        r = SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content="hi"))])
+        assert transport.validate_response(r) is True
+
+
+class TestChatCompletionsNormalize:
+
+    def test_text_response(self, transport):
+        r = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(content="Hello", tool_calls=None, reasoning_content=None),
+                finish_reason="stop",
+            )],
+            usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15),
+        )
+        nr = transport.normalize_response(r)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello"
+        assert nr.finish_reason == "stop"
+        assert nr.tool_calls is None
+
+    def test_tool_call_response(self, transport):
+        tc = SimpleNamespace(
+            id="call_123",
+            function=SimpleNamespace(name="terminal", arguments='{"command": "ls"}'),
+        )
+        r = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(content=None, tool_calls=[tc], reasoning_content=None),
+                finish_reason="tool_calls",
+            )],
+            usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20, total_tokens=30),
+        )
+        nr = transport.normalize_response(r)
+        assert len(nr.tool_calls) == 1
+        assert nr.tool_calls[0].name == "terminal"
+        assert nr.tool_calls[0].id == "call_123"
+
+    def test_tool_call_extra_content_preserved(self, transport):
+        """Gemini 3 thinking models attach extra_content with thought_signature
+        on tool_calls.  Without this replay on the next turn, the API rejects
+        the request with 400.  The transport MUST surface extra_content so the
+        agent loop can write it back into the assistant message."""
+        tc = SimpleNamespace(
+            id="call_gem",
+            function=SimpleNamespace(name="terminal", arguments='{"command": "ls"}'),
+            extra_content={"google": {"thought_signature": "SIG_ABC123"}},
+        )
+        r = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(content=None, tool_calls=[tc], reasoning_content=None),
+                finish_reason="tool_calls",
+            )],
+            usage=None,
+        )
+        nr = transport.normalize_response(r)
+        assert nr.tool_calls[0].provider_data == {
+            "extra_content": {"google": {"thought_signature": "SIG_ABC123"}}
+        }
+
+    def test_reasoning_content_preserved_separately(self, transport):
+        """DeepSeek/Moonshot use reasoning_content distinct from reasoning.
+        Don't merge them — the thinking-prefill retry check reads each field
+        separately."""
+        r = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(
+                    content=None, tool_calls=None,
+                    reasoning="summary text",
+                    reasoning_content="detailed scratchpad",
+                ),
+                finish_reason="stop",
+            )],
+            usage=None,
+        )
+        nr = transport.normalize_response(r)
+        assert nr.reasoning == "summary text"
+        assert nr.provider_data == {"reasoning_content": "detailed scratchpad"}
+
+
+class TestChatCompletionsCacheStats:
+
+    def test_no_usage(self, transport):
+        r = SimpleNamespace(usage=None)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_no_details(self, transport):
+        r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=None))
+        assert transport.extract_cache_stats(r) is None
+
+    def test_with_cache(self, transport):
+        details = SimpleNamespace(cached_tokens=500, cache_write_tokens=100)
+        r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=details))
+        result = transport.extract_cache_stats(r)
+        assert result == {"cached_tokens": 500, "creation_tokens": 100}
diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py
new file mode 100644
index 000000000..f97c913af
--- /dev/null
+++ b/tests/agent/transports/test_codex_transport.py
@@ -0,0 +1,220 @@
+"""Tests for the ResponsesApiTransport (Codex)."""
+
+import json
+import pytest
+from types import SimpleNamespace
+
+from agent.transports import get_transport
+from agent.transports.types import NormalizedResponse, ToolCall
+
+
+@pytest.fixture
+def transport():
+    import agent.transports.codex  # noqa: F401
+    return get_transport("codex_responses")
+
+
+class TestCodexTransportBasic:
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "codex_responses"
+
+    def test_registered_on_import(self, transport):
+        assert transport is not None
+
+    def test_convert_tools(self, transport):
+        tools = [{
+            "type": "function",
+            "function": {
+                "name": "terminal",
+                "description": "Run a command",
+                "parameters": {"type": "object", "properties": {"command": {"type": "string"}}},
+            }
+        }]
+        result = transport.convert_tools(tools)
+        assert len(result) == 1
+        assert result[0]["type"] == "function"
+        assert result[0]["name"] == "terminal"
+
+
+class TestCodexBuildKwargs:
+
+    def test_basic_kwargs(self, transport):
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "Hello"},
+        ]
+        kw = transport.build_kwargs(
+            model="gpt-5.4",
+            messages=messages,
+            tools=[],
+        )
+        assert kw["model"] == "gpt-5.4"
+        assert kw["instructions"] == "You are helpful."
+        assert "input" in kw
+        assert kw["store"] is False
+
+    def test_system_extracted_from_messages(self, transport):
+        messages = [
+            {"role": "system", "content": "Custom system prompt"},
+            {"role": "user", "content": "Hi"},
+        ]
+        kw = transport.build_kwargs(model="gpt-5.4", messages=messages, tools=[])
+        assert kw["instructions"] == "Custom system prompt"
+
+    def test_no_system_uses_default(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-5.4", messages=messages, tools=[])
+        assert kw["instructions"]  # should be non-empty default
+
+    def test_reasoning_config(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            reasoning_config={"effort": "high"},
+        )
+        assert kw.get("reasoning", {}).get("effort") == "high"
+
+    def test_reasoning_disabled(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            reasoning_config={"enabled": False},
+        )
+        assert "reasoning" not in kw or kw.get("include") == []
+
+    def test_session_id_sets_cache_key(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            session_id="test-session-123",
+        )
+        assert kw.get("prompt_cache_key") == "test-session-123"
+
+    def test_github_responses_no_cache_key(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            session_id="test-session",
+            is_github_responses=True,
+        )
+        assert "prompt_cache_key" not in kw
+
+    def test_max_tokens(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            max_tokens=4096,
+        )
+        assert kw.get("max_output_tokens") == 4096
+
+    def test_codex_backend_no_max_output_tokens(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            max_tokens=4096,
+            is_codex_backend=True,
+        )
+        assert "max_output_tokens" not in kw
+
+    def test_xai_headers(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="grok-3", messages=messages, tools=[],
+            session_id="conv-123",
+            is_xai_responses=True,
+        )
+        assert kw.get("extra_headers", {}).get("x-grok-conv-id") == "conv-123"
+
+    def test_minimal_effort_clamped(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            reasoning_config={"effort": "minimal"},
+        )
+        # "minimal" should be clamped to "low"
+        assert kw.get("reasoning", {}).get("effort") == "low"
+
+
+class TestCodexValidateResponse:
+
+    def test_none_response(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_empty_output(self, transport):
+        r = SimpleNamespace(output=[], output_text=None)
+        assert transport.validate_response(r) is False
+
+    def test_valid_output(self, transport):
+        r = SimpleNamespace(output=[{"type": "message", "content": []}])
+        assert transport.validate_response(r) is True
+
+    def test_output_text_fallback_not_valid(self, transport):
+        """validate_response is strict — output_text doesn't make it valid.
+        The caller handles output_text fallback with diagnostic logging."""
+        r = SimpleNamespace(output=None, output_text="Some text")
+        assert transport.validate_response(r) is False
+
+
+class TestCodexMapFinishReason:
+
+    def test_completed(self, transport):
+        assert transport.map_finish_reason("completed") == "stop"
+
+    def test_incomplete(self, transport):
+        assert transport.map_finish_reason("incomplete") == "length"
+
+    def test_failed(self, transport):
+        assert transport.map_finish_reason("failed") == "stop"
+
+    def test_unknown(self, transport):
+        assert transport.map_finish_reason("unknown_status") == "stop"
+
+
+class TestCodexNormalizeResponse:
+
+    def test_text_response(self, transport):
+        """Normalize a simple text Codex response."""
+        r = SimpleNamespace(
+            output=[
+                SimpleNamespace(
+                    type="message",
+                    role="assistant",
+                    content=[SimpleNamespace(type="output_text", text="Hello world")],
+                    status="completed",
+                ),
+            ],
+            status="completed",
+            incomplete_details=None,
+            usage=SimpleNamespace(input_tokens=10, output_tokens=5,
+                                  input_tokens_details=None, output_tokens_details=None),
+        )
+        nr = transport.normalize_response(r)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello world"
+        assert nr.finish_reason == "stop"
+
+    def test_tool_call_response(self, transport):
+        """Normalize a Codex response with tool calls."""
+        r = SimpleNamespace(
+            output=[
+                SimpleNamespace(
+                    type="function_call",
+                    call_id="call_abc123",
+                    name="terminal",
+                    arguments=json.dumps({"command": "ls"}),
+                    id="fc_abc123",
+                    status="completed",
+                ),
+            ],
+            status="completed",
+            incomplete_details=None,
+            usage=SimpleNamespace(input_tokens=10, output_tokens=20,
+                                  input_tokens_details=None, output_tokens_details=None),
+        )
+        nr = transport.normalize_response(r)
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        tc = nr.tool_calls[0]
+        assert tc.name == "terminal"
+        assert '"command"' in tc.arguments
diff --git a/tests/agent/transports/test_transport.py b/tests/agent/transports/test_transport.py
new file mode 100644
index 000000000..75b3a2c70
--- /dev/null
+++ b/tests/agent/transports/test_transport.py
@@ -0,0 +1,228 @@
+"""Tests for the transport ABC, registry, and AnthropicTransport."""
+
+import pytest
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+from agent.transports import get_transport, register_transport, _REGISTRY
+
+
+# ── ABC contract tests ──────────────────────────────────────────────────
+
+class TestProviderTransportABC:
+    """Verify the ABC contract is enforceable."""
+
+    def test_cannot_instantiate_abc(self):
+        with pytest.raises(TypeError):
+            ProviderTransport()
+
+    def test_concrete_must_implement_all_abstract(self):
+        class Incomplete(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "test"
+        with pytest.raises(TypeError):
+            Incomplete()
+
+    def test_minimal_concrete(self):
+        class Minimal(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "test_minimal"
+            def convert_messages(self, messages, **kw):
+                return messages
+            def convert_tools(self, tools):
+                return tools
+            def build_kwargs(self, model, messages, tools=None, **params):
+                return {"model": model, "messages": messages}
+            def normalize_response(self, response, **kw):
+                return NormalizedResponse(content="ok", tool_calls=None, finish_reason="stop")
+
+        t = Minimal()
+        assert t.api_mode == "test_minimal"
+        assert t.validate_response(None) is True  # default
+        assert t.extract_cache_stats(None) is None  # default
+        assert t.map_finish_reason("end_turn") == "end_turn"  # default passthrough
+
+
+# ── Registry tests ───────────────────────────────────────────────────────
+
+class TestTransportRegistry:
+
+    def test_get_unregistered_returns_none(self):
+        assert get_transport("nonexistent_mode") is None
+
+    def test_anthropic_registered_on_import(self):
+        import agent.transports.anthropic  # noqa: F401
+        t = get_transport("anthropic_messages")
+        assert t is not None
+        assert t.api_mode == "anthropic_messages"
+
+    def test_register_and_get(self):
+        class DummyTransport(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "dummy_test"
+            def convert_messages(self, messages, **kw):
+                return messages
+            def convert_tools(self, tools):
+                return tools
+            def build_kwargs(self, model, messages, tools=None, **params):
+                return {}
+            def normalize_response(self, response, **kw):
+                return NormalizedResponse(content=None, tool_calls=None, finish_reason="stop")
+
+        register_transport("dummy_test", DummyTransport)
+        t = get_transport("dummy_test")
+        assert t.api_mode == "dummy_test"
+        # Cleanup
+        _REGISTRY.pop("dummy_test", None)
+
+
+# ── AnthropicTransport tests ────────────────────────────────────────────
+
+class TestAnthropicTransport:
+
+    @pytest.fixture
+    def transport(self):
+        import agent.transports.anthropic  # noqa: F401
+        return get_transport("anthropic_messages")
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "anthropic_messages"
+
+    def test_convert_tools_simple(self, transport):
+        tools = [{
+            "type": "function",
+            "function": {
+                "name": "test_tool",
+                "description": "A test",
+                "parameters": {"type": "object", "properties": {}},
+            }
+        }]
+        result = transport.convert_tools(tools)
+        assert len(result) == 1
+        assert result[0]["name"] == "test_tool"
+        assert "input_schema" in result[0]
+
+    def test_validate_response_none(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_validate_response_empty_content(self, transport):
+        r = SimpleNamespace(content=[])
+        assert transport.validate_response(r) is False
+
+    def test_validate_response_empty_content_with_end_turn_is_valid(self, transport):
+        r = SimpleNamespace(content=[], stop_reason="end_turn")
+        assert transport.validate_response(r) is True
+
+    def test_validate_response_empty_content_with_tool_use_is_invalid(self, transport):
+        r = SimpleNamespace(content=[], stop_reason="tool_use")
+        assert transport.validate_response(r) is False
+
+    def test_validate_response_valid(self, transport):
+        r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")])
+        assert transport.validate_response(r) is True
+
+    def test_map_finish_reason(self, transport):
+        assert transport.map_finish_reason("end_turn") == "stop"
+        assert transport.map_finish_reason("tool_use") == "tool_calls"
+        assert transport.map_finish_reason("max_tokens") == "length"
+        assert transport.map_finish_reason("stop_sequence") == "stop"
+        assert transport.map_finish_reason("refusal") == "content_filter"
+        assert transport.map_finish_reason("model_context_window_exceeded") == "length"
+        assert transport.map_finish_reason("unknown") == "stop"
+
+    def test_extract_cache_stats_none_usage(self, transport):
+        r = SimpleNamespace(usage=None)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_extract_cache_stats_with_cache(self, transport):
+        usage = SimpleNamespace(cache_read_input_tokens=100, cache_creation_input_tokens=50)
+        r = SimpleNamespace(usage=usage)
+        result = transport.extract_cache_stats(r)
+        assert result == {"cached_tokens": 100, "creation_tokens": 50}
+
+    def test_extract_cache_stats_zero(self, transport):
+        usage = SimpleNamespace(cache_read_input_tokens=0, cache_creation_input_tokens=0)
+        r = SimpleNamespace(usage=usage)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_normalize_response_text(self, transport):
+        """Test normalization of a simple text response."""
+        r = SimpleNamespace(
+            content=[SimpleNamespace(type="text", text="Hello world")],
+            stop_reason="end_turn",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=5),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello world"
+        assert nr.tool_calls is None or nr.tool_calls == []
+        assert nr.finish_reason == "stop"
+
+    def test_normalize_response_tool_calls(self, transport):
+        """Test normalization of a tool-use response."""
+        r = SimpleNamespace(
+            content=[
+                SimpleNamespace(
+                    type="tool_use",
+                    id="toolu_123",
+                    name="terminal",
+                    input={"command": "ls"},
+                ),
+            ],
+            stop_reason="tool_use",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=20),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        tc = nr.tool_calls[0]
+        assert tc.name == "terminal"
+        assert tc.id == "toolu_123"
+        assert '"command"' in tc.arguments
+
+    def test_normalize_response_thinking(self, transport):
+        """Test normalization preserves thinking content."""
+        r = SimpleNamespace(
+            content=[
+                SimpleNamespace(type="thinking", thinking="Let me think..."),
+                SimpleNamespace(type="text", text="The answer is 42"),
+            ],
+            stop_reason="end_turn",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=15),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert nr.content == "The answer is 42"
+        assert nr.reasoning == "Let me think..."
+
+    def test_build_kwargs_returns_dict(self, transport):
+        """Test build_kwargs produces a usable kwargs dict."""
+        messages = [{"role": "user", "content": "Hello"}]
+        kw = transport.build_kwargs(
+            model="claude-sonnet-4-6",
+            messages=messages,
+            max_tokens=1024,
+        )
+        assert isinstance(kw, dict)
+        assert "model" in kw
+        assert "max_tokens" in kw
+        assert "messages" in kw
+
+    def test_convert_messages_extracts_system(self, transport):
+        """Test convert_messages separates system from messages."""
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "Hi"},
+        ]
+        system, msgs = transport.convert_messages(messages)
+        # System should be extracted
+        assert system is not None
+        # Messages should only have user
+        assert len(msgs) >= 1
diff --git a/tests/cli/test_cli_approval_ui.py b/tests/cli/test_cli_approval_ui.py
index 205e31608..5be1c0ca0 100644
--- a/tests/cli/test_cli_approval_ui.py
+++ b/tests/cli/test_cli_approval_ui.py
@@ -254,3 +254,88 @@ class TestCliApprovalUi:
 
         # Command got truncated with a marker.
         assert "(command truncated" in rendered
+
+
+class TestApprovalCallbackThreadLocalWiring:
+    """Regression guard for the thread-local callback freeze (#13617 / #13618).
+
+    After 62348cff made _approval_callback / _sudo_password_callback thread-local
+    (ACP GHSA-qg5c-hvr5-hjgr), the CLI agent thread could no longer see callbacks
+    registered in the main thread — the dangerous-command prompt silently fell
+    back to stdin input() and deadlocked against prompt_toolkit. The fix is to
+    register the callbacks INSIDE the agent worker thread (matching the ACP
+    pattern). These tests lock in that invariant.
+    """
+
+    def test_main_thread_registration_is_invisible_to_child_thread(self):
+        """Confirms the underlying threading.local semantics that drove the bug.
+
+        If this ever starts passing as "visible", the thread-local isolation
+        is gone and the ACP race GHSA-qg5c-hvr5-hjgr may be back.
+        """
+        from tools.terminal_tool import (
+            set_approval_callback,
+            _get_approval_callback,
+        )
+
+        def main_cb(_cmd, _desc):
+            return "once"
+
+        set_approval_callback(main_cb)
+        try:
+            seen = {}
+
+            def _child():
+                seen["value"] = _get_approval_callback()
+
+            t = threading.Thread(target=_child, daemon=True)
+            t.start()
+            t.join(timeout=2)
+            assert seen["value"] is None
+        finally:
+            set_approval_callback(None)
+
+    def test_child_thread_registration_is_visible_and_cleared_in_finally(self):
+        """The fix pattern: register INSIDE the worker thread, clear in finally.
+
+        This is exactly what cli.py's run_agent() closure does. If this test
+        fails, the CLI approval prompt freeze (#13617) has regressed.
+        """
+        from tools.terminal_tool import (
+            set_approval_callback,
+            set_sudo_password_callback,
+            _get_approval_callback,
+            _get_sudo_password_callback,
+        )
+
+        def approval_cb(_cmd, _desc):
+            return "once"
+
+        def sudo_cb():
+            return "hunter2"
+
+        seen = {}
+
+        def _worker():
+            # Mimic cli.py's run_agent() thread target.
+            set_approval_callback(approval_cb)
+            set_sudo_password_callback(sudo_cb)
+            try:
+                seen["approval"] = _get_approval_callback()
+                seen["sudo"] = _get_sudo_password_callback()
+            finally:
+                set_approval_callback(None)
+                set_sudo_password_callback(None)
+                seen["approval_after"] = _get_approval_callback()
+                seen["sudo_after"] = _get_sudo_password_callback()
+
+        t = threading.Thread(target=_worker, daemon=True)
+        t.start()
+        t.join(timeout=2)
+
+        assert seen["approval"] is approval_cb
+        assert seen["sudo"] is sudo_cb
+        # Finally block must clear both slots — otherwise a reused thread
+        # would hold a stale reference to a disposed CLI instance.
+        assert seen["approval_after"] is None
+        assert seen["sudo_after"] is None
diff --git a/tests/cli/test_cli_file_drop.py b/tests/cli/test_cli_file_drop.py
index 78503de8d..fa6aac1ed 100644
--- a/tests/cli/test_cli_file_drop.py
+++ b/tests/cli/test_cli_file_drop.py
@@ -147,6 +147,37 @@ class TestEscapedSpaces:
         assert result["path"] == tmp_image_with_spaces
         assert result["remainder"] == "what is this?"
 
+    def test_unquoted_spaces_in_path(self, tmp_image_with_spaces):
+        result = _detect_file_drop(str(tmp_image_with_spaces))
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["is_image"] is True
+        assert result["remainder"] == ""
+
+    def test_unquoted_spaces_with_trailing_text(self, tmp_image_with_spaces):
+        user_input = f"{tmp_image_with_spaces} what is this?"
+        result = _detect_file_drop(user_input)
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["remainder"] == "what is this?"
+
+    def test_mixed_escaped_and_literal_spaces_in_path(self, tmp_path):
+        img = tmp_path / "Screenshot 2026-04-21 at 1.04.43 PM.png"
+        img.write_bytes(b"\x89PNG\r\n\x1a\n")
+        mixed = str(img).replace("Screenshot ", "Screenshot\\ ").replace("2026-04-21 ", "2026-04-21\\ ").replace("at ", "at\\ ")
+        result = _detect_file_drop(mixed)
+        assert result is not None
+        assert result["path"] == img
+        assert result["is_image"] is True
+        assert result["remainder"] == ""
+
+    def test_file_uri_image_path(self, tmp_image_with_spaces):
+        uri = tmp_image_with_spaces.as_uri()
+        result = _detect_file_drop(uri)
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["is_image"] is True
+
     def test_tilde_prefixed_path(self, tmp_path, monkeypatch):
         home = tmp_path / "home"
         img = home / "storage" / "shared" / "Pictures" / "cat.png"
diff --git a/tests/cli/test_cli_markdown_rendering.py b/tests/cli/test_cli_markdown_rendering.py
index 97ed1c751..01f0bab6c 100644
--- a/tests/cli/test_cli_markdown_rendering.py
+++ b/tests/cli/test_cli_markdown_rendering.py
@@ -115,3 +115,27 @@ def test_final_assistant_content_can_leave_markdown_raw():
 
     output = _render_to_text(renderable)
     assert "***Bold italic***" in output
+
+
+def test_strip_mode_preserves_intraword_underscores_in_snake_case_identifiers():
+    renderable = _render_final_assistant_content(
+        "Let me look at test_case_with_underscores and SOME_CONST "
+        "then /tmp/snake_case_dir/file_with_name.py",
+        mode="strip",
+    )
+
+    output = _render_to_text(renderable)
+    assert "test_case_with_underscores" in output
+    assert "SOME_CONST" in output
+    assert "snake_case_dir" in output
+    assert "file_with_name" in output
+
+
+def test_strip_mode_still_strips_boundary_underscore_emphasis():
+    renderable = _render_final_assistant_content(
+        "say _hi_ and __bold__ now",
+        mode="strip",
+    )
+
+    output = _render_to_text(renderable)
+    assert "say hi and bold now" in output
diff --git a/tests/conftest.py b/tests/conftest.py
index ca4a9a970..0258e034f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -186,6 +186,31 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
     "HERMES_HOME_MODE",
     "BROWSER_CDP_URL",
     "CAMOFOX_URL",
+    # Platform allowlists — not credentials, but if set from any source
+    # (user shell, earlier leaky test, CI env), they change gateway auth
+    # behavior and flake button-authorization tests.
+    "TELEGRAM_ALLOWED_USERS",
+    "DISCORD_ALLOWED_USERS",
+    "WHATSAPP_ALLOWED_USERS",
+    "SLACK_ALLOWED_USERS",
+    "SIGNAL_ALLOWED_USERS",
+    "SIGNAL_GROUP_ALLOWED_USERS",
+    "EMAIL_ALLOWED_USERS",
+    "SMS_ALLOWED_USERS",
+    "MATTERMOST_ALLOWED_USERS",
+    "MATRIX_ALLOWED_USERS",
+    "DINGTALK_ALLOWED_USERS",
+    "FEISHU_ALLOWED_USERS",
+    "WECOM_ALLOWED_USERS",
+    "GATEWAY_ALLOWED_USERS",
+    "GATEWAY_ALLOW_ALL_USERS",
+    "TELEGRAM_ALLOW_ALL_USERS",
+    "DISCORD_ALLOW_ALL_USERS",
+    "WHATSAPP_ALLOW_ALL_USERS",
+    "SLACK_ALLOW_ALL_USERS",
+    "SIGNAL_ALLOW_ALL_USERS",
+    "EMAIL_ALLOW_ALL_USERS",
+    "SMS_ALLOW_ALL_USERS",
 })
 
 
@@ -258,6 +283,107 @@ def _isolate_hermes_home(_hermetic_environment):
     return None
 
 
+# ── Module-level state reset ───────────────────────────────────────────────
+#
+# Python modules are singletons per process, and pytest-xdist workers are
+# long-lived. Module-level dicts/sets (tool registries, approval state,
+# interrupt flags) and ContextVars persist across tests in the same worker,
+# causing tests that pass alone to fail when run with siblings.
+#
+# Each entry in this fixture clears state that belongs to a specific module.
+# New state buckets go here too — this is the single gate that prevents
+# "works alone, flakes in CI" bugs from state leakage.
+#
+# The skill `test-suite-cascade-diagnosis` documents the concrete patterns
+# this closes; the running example was `test_command_guards` failing 12/15
+# CI runs because ``tools.approval._session_approved`` carried approvals
+# from one test's session into another's.
+
+@pytest.fixture(autouse=True)
+def _reset_module_state():
+    """Clear module-level mutable state and ContextVars between tests.
+
+    Keeps state from leaking across tests on the same xdist worker. Modules
+    that don't exist yet (test collection before production import) are
+    skipped silently — production import later creates fresh empty state.
+    """
+    # --- tools.approval — the single biggest source of cross-test pollution ---
+    try:
+        from tools import approval as _approval_mod
+        _approval_mod._session_approved.clear()
+        _approval_mod._session_yolo.clear()
+        _approval_mod._permanent_approved.clear()
+        _approval_mod._pending.clear()
+        _approval_mod._gateway_queues.clear()
+        _approval_mod._gateway_notify_cbs.clear()
+        # ContextVar: reset to empty string so get_current_session_key()
+        # falls through to the env var / default path, matching a fresh
+        # process.
+        _approval_mod._approval_session_key.set("")
+    except Exception:
+        pass
+
+    # --- tools.interrupt — per-thread interrupt flag set ---
+    try:
+        from tools import interrupt as _interrupt_mod
+        with _interrupt_mod._lock:
+            _interrupt_mod._interrupted_threads.clear()
+    except Exception:
+        pass
+
+    # --- gateway.session_context — 9 ContextVars that represent
+    #     the active gateway session. If set in one test and not reset,
+    #     the next test's get_session_env() reads stale values.
+    try:
+        from gateway import session_context as _sc_mod
+        for _cv in (
+            _sc_mod._SESSION_PLATFORM,
+            _sc_mod._SESSION_CHAT_ID,
+            _sc_mod._SESSION_CHAT_NAME,
+            _sc_mod._SESSION_THREAD_ID,
+            _sc_mod._SESSION_USER_ID,
+            _sc_mod._SESSION_USER_NAME,
+            _sc_mod._SESSION_KEY,
+            _sc_mod._CRON_AUTO_DELIVER_PLATFORM,
+            _sc_mod._CRON_AUTO_DELIVER_CHAT_ID,
+            _sc_mod._CRON_AUTO_DELIVER_THREAD_ID,
+        ):
+            _cv.set(_sc_mod._UNSET)
+    except Exception:
+        pass
+
+    # --- tools.env_passthrough — ContextVar<set[str]> with no default ---
+    # LookupError is normal if the test never set it. Setting it to an
+    # empty set unconditionally normalizes the starting state.
+    try:
+        from tools import env_passthrough as _envp_mod
+        _envp_mod._allowed_env_vars_var.set(set())
+    except Exception:
+        pass
+
+    # --- tools.credential_files — ContextVar<dict> ---
+    try:
+        from tools import credential_files as _credf_mod
+        _credf_mod._registered_files_var.set({})
+    except Exception:
+        pass
+
+    # --- tools.file_tools — per-task read history + file-ops cache ---
+    # _read_tracker accumulates per-task_id read history for loop detection,
+    # capped by _READ_HISTORY_CAP. If entries from a prior test persist, the
+    # cap is hit faster than expected and capacity-related tests flake.
+    try:
+        from tools import file_tools as _ft_mod
+        with _ft_mod._read_tracker_lock:
+            _ft_mod._read_tracker.clear()
+        with _ft_mod._file_ops_lock:
+            _ft_mod._file_ops_cache.clear()
+    except Exception:
+        pass
+
+    yield
+
+
 @pytest.fixture()
 def tmp_dir(tmp_path):
     """Provide a temporary directory that is cleaned up automatically."""
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index e862638ee..524490eb0 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -1580,3 +1580,128 @@ class TestParallelTick:
         end_s1 = [t for action, jid, t in call_times if action == "end" and jid == "s1"][0]
         start_s2 = [t for action, jid, t in call_times if action == "start" and jid == "s2"][0]
         assert start_s2 >= end_s1, "Jobs ran concurrently despite max_parallel=1"
+
+
+class TestDeliverResultTimeoutCancelsFuture:
+    """When future.result(timeout=60) raises TimeoutError in the live
+    adapter delivery path, _deliver_result must cancel the orphan
+    coroutine so it cannot duplicate-send after the standalone fallback.
+    """
+
+    def test_live_adapter_timeout_cancels_future_and_falls_back(self):
+        """End-to-end: live adapter hangs past the 60s budget, _deliver_result
+        patches the timeout down to a fast value, confirms future.cancel() fires,
+        and verifies the standalone fallback path still delivers."""
+        from gateway.config import Platform
+        from concurrent.futures import Future
+
+        # Live adapter whose send() coroutine never resolves within the budget
+        adapter = AsyncMock()
+        adapter.send.return_value = MagicMock(success=True)
+
+        pconfig = MagicMock()
+        pconfig.enabled = True
+        mock_cfg = MagicMock()
+        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
+
+        loop = MagicMock()
+        loop.is_running.return_value = True
+
+        # A real concurrent.futures.Future so .cancel() has real semantics,
+        # but we override .result() to raise TimeoutError exactly like the
+        # 60s wait firing in production.
+        captured_future = Future()
+        cancel_calls = []
+        original_cancel = captured_future.cancel
+
+        def tracking_cancel():
+            cancel_calls.append(True)
+            return original_cancel()
+
+        captured_future.cancel = tracking_cancel
+        captured_future.result = MagicMock(side_effect=TimeoutError("timed out"))
+
+        def fake_run_coro(coro, _loop):
+            coro.close()
+            return captured_future
+
+        job = {
+            "id": "timeout-job",
+            "deliver": "origin",
+            "origin": {"platform": "telegram", "chat_id": "123"},
+        }
+
+        standalone_send = AsyncMock(return_value={"success": True})
+
+        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
+             patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \
+             patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro), \
+             patch("tools.send_message_tool._send_to_platform", new=standalone_send):
+            result = _deliver_result(
+                job,
+                "Hello world",
+                adapters={Platform.TELEGRAM: adapter},
+                loop=loop,
+            )
+
+        # 1. The orphan future was cancelled on timeout (the bug fix)
+        assert cancel_calls == [True], "future.cancel() must fire on TimeoutError"
+        # 2. The standalone fallback delivered — no double send, no silent drop
+        assert result is None, f"expected successful delivery, got error: {result!r}"
+        standalone_send.assert_awaited_once()
+
+
+class TestSendMediaTimeoutCancelsFuture:
+    """Same orphan-coroutine guarantee for _send_media_via_adapter's
+    future.result(timeout=30) call. If this times out mid-batch, the
+    in-flight coroutine must be cancelled before the next file is tried.
+    """
+
+    def test_media_send_timeout_cancels_future_and_continues(self):
+        """End-to-end: _send_media_via_adapter with a future whose .result()
+        raises TimeoutError. Assert cancel() fires and the loop proceeds
+        to the next file rather than hanging or crashing."""
+        from concurrent.futures import Future
+
+        adapter = MagicMock()
+        adapter.send_image_file = AsyncMock()
+        adapter.send_video = AsyncMock()
+
+        # First file: future that times out. Second file: future that resolves OK.
+        timeout_future = Future()
+        timeout_cancel_calls = []
+        original_cancel = timeout_future.cancel
+
+        def tracking_cancel():
+            timeout_cancel_calls.append(True)
+            return original_cancel()
+
+        timeout_future.cancel = tracking_cancel
+        timeout_future.result = MagicMock(side_effect=TimeoutError("timed out"))
+
+        ok_future = Future()
+        ok_future.set_result(MagicMock(success=True))
+
+        futures_iter = iter([timeout_future, ok_future])
+
+        def fake_run_coro(coro, _loop):
+            coro.close()
+            return next(futures_iter)
+
+        media_files = [
+            ("/tmp/slow.png", False),   # times out
+            ("/tmp/fast.mp4", False),   # succeeds
+        ]
+
+        loop = MagicMock()
+        job = {"id": "media-timeout"}
+
+        with patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
+            # Should not raise — the except Exception clause swallows the timeout
+            _send_media_via_adapter(adapter, "chat-1", media_files, None, loop, job)
+
+        # 1. The timed-out future was cancelled (the bug fix)
+        assert timeout_cancel_calls == [True], "future.cancel() must fire on TimeoutError"
+        # 2. Second file still got dispatched — one timeout doesn't abort the batch
+        adapter.send_video.assert_called_once()
+        assert adapter.send_video.call_args[1]["video_path"] == "/tmp/fast.mp4"
diff --git a/tests/gateway/test_api_server_jobs.py b/tests/gateway/test_api_server_jobs.py
index 6c17bb120..a14765783 100644
--- a/tests/gateway/test_api_server_jobs.py
+++ b/tests/gateway/test_api_server_jobs.py
@@ -20,6 +20,8 @@ from aiohttp.test_utils import TestClient, TestServer
 from gateway.config import PlatformConfig
 from gateway.platforms.api_server import APIServerAdapter, cors_middleware
 
+_MOD = "gateway.platforms.api_server"
+
 
 # ---------------------------------------------------------------------------
 # Helpers
@@ -83,10 +85,10 @@ class TestListJobs:
         """GET /api/jobs returns job list."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_list", return_value=[SAMPLE_JOB]
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_list", return_value=[SAMPLE_JOB]
             ):
                 resp = await cli.get("/api/jobs")
                 assert resp.status == 200
@@ -104,10 +106,10 @@ class TestListJobs:
         app = _create_app(adapter)
         mock_list = MagicMock(return_value=[SAMPLE_JOB])
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_list", mock_list
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_list", mock_list
             ):
                 resp = await cli.get("/api/jobs?include_disabled=true")
                 assert resp.status == 200
@@ -119,10 +121,10 @@ class TestListJobs:
         app = _create_app(adapter)
         mock_list = MagicMock(return_value=[])
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_list", mock_list
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_list", mock_list
             ):
                 resp = await cli.get("/api/jobs")
                 assert resp.status == 200
@@ -140,10 +142,10 @@ class TestCreateJob:
         app = _create_app(adapter)
         mock_create = MagicMock(return_value=SAMPLE_JOB)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_create", mock_create
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_create", mock_create
             ):
                 resp = await cli.post("/api/jobs", json={
                     "name": "test-job",
@@ -164,7 +166,7 @@ class TestCreateJob:
         """POST /api/jobs without name returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.post("/api/jobs", json={
                     "schedule": "*/5 * * * *",
                     "prompt": "do something",
@@ -178,7 +180,7 @@ class TestCreateJob:
         """POST /api/jobs with name > 200 chars returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.post("/api/jobs", json={
                     "name": "x" * 201,
                     "schedule": "*/5 * * * *",
@@ -192,7 +194,7 @@ class TestCreateJob:
         """POST /api/jobs with prompt > 5000 chars returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.post("/api/jobs", json={
                     "name": "test-job",
                     "schedule": "*/5 * * * *",
@@ -207,7 +209,7 @@ class TestCreateJob:
         """POST /api/jobs with repeat=0 returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.post("/api/jobs", json={
                     "name": "test-job",
                     "schedule": "*/5 * * * *",
@@ -222,7 +224,7 @@ class TestCreateJob:
         """POST /api/jobs without schedule returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.post("/api/jobs", json={
                     "name": "test-job",
                 })
@@ -242,10 +244,10 @@ class TestGetJob:
         app = _create_app(adapter)
         mock_get = MagicMock(return_value=SAMPLE_JOB)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_get", mock_get
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_get", mock_get
             ):
                 resp = await cli.get(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 200
@@ -259,10 +261,10 @@ class TestGetJob:
         app = _create_app(adapter)
         mock_get = MagicMock(return_value=None)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_get", mock_get
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_get", mock_get
             ):
                 resp = await cli.get(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 404
@@ -272,7 +274,7 @@ class TestGetJob:
         """GET /api/jobs/{id} with non-hex id returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.get("/api/jobs/not-a-valid-hex!")
                 assert resp.status == 400
                 data = await resp.json()
@@ -291,10 +293,10 @@ class TestUpdateJob:
         updated_job = {**SAMPLE_JOB, "name": "updated-name"}
         mock_update = MagicMock(return_value=updated_job)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_update", mock_update
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_update", mock_update
             ):
                 resp = await cli.patch(
                     f"/api/jobs/{VALID_JOB_ID}",
@@ -317,10 +319,10 @@ class TestUpdateJob:
         updated_job = {**SAMPLE_JOB, "name": "new-name"}
         mock_update = MagicMock(return_value=updated_job)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_update", mock_update
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_update", mock_update
             ):
                 resp = await cli.patch(
                     f"/api/jobs/{VALID_JOB_ID}",
@@ -342,7 +344,7 @@ class TestUpdateJob:
         """PATCH /api/jobs/{id} with only unknown fields returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.patch(
                     f"/api/jobs/{VALID_JOB_ID}",
                     json={"evil_field": "malicious"},
@@ -363,10 +365,10 @@ class TestDeleteJob:
         app = _create_app(adapter)
         mock_remove = MagicMock(return_value=True)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_remove", mock_remove
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_remove", mock_remove
             ):
                 resp = await cli.delete(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 200
@@ -380,10 +382,10 @@ class TestDeleteJob:
         app = _create_app(adapter)
         mock_remove = MagicMock(return_value=False)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_remove", mock_remove
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_remove", mock_remove
             ):
                 resp = await cli.delete(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 404
@@ -401,10 +403,10 @@ class TestPauseJob:
         paused_job = {**SAMPLE_JOB, "enabled": False}
         mock_pause = MagicMock(return_value=paused_job)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_pause", mock_pause
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_pause", mock_pause
             ):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/pause")
                 assert resp.status == 200
@@ -426,10 +428,10 @@ class TestResumeJob:
         resumed_job = {**SAMPLE_JOB, "enabled": True}
         mock_resume = MagicMock(return_value=resumed_job)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_resume", mock_resume
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_resume", mock_resume
             ):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/resume")
                 assert resp.status == 200
@@ -451,10 +453,10 @@ class TestRunJob:
         triggered_job = {**SAMPLE_JOB, "last_run": "2025-01-01T00:00:00Z"}
         mock_trigger = MagicMock(return_value=triggered_job)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_trigger", mock_trigger
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_trigger", mock_trigger
             ):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/run")
                 assert resp.status == 200
@@ -473,7 +475,7 @@ class TestAuthRequired:
         """GET /api/jobs without API key returns 401 when key is set."""
         app = _create_app(auth_adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.get("/api/jobs")
                 assert resp.status == 401
 
@@ -482,7 +484,7 @@ class TestAuthRequired:
         """POST /api/jobs without API key returns 401 when key is set."""
         app = _create_app(auth_adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.post("/api/jobs", json={
                     "name": "test", "schedule": "* * * * *",
                 })
@@ -493,7 +495,7 @@ class TestAuthRequired:
         """GET /api/jobs/{id} without API key returns 401 when key is set."""
         app = _create_app(auth_adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.get(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 401
 
@@ -502,7 +504,7 @@ class TestAuthRequired:
         """DELETE /api/jobs/{id} without API key returns 401."""
         app = _create_app(auth_adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.delete(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 401
 
@@ -512,10 +514,10 @@ class TestAuthRequired:
         app = _create_app(auth_adapter)
         mock_list = MagicMock(return_value=[])
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_list", mock_list
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_list", mock_list
             ):
                 resp = await cli.get(
                     "/api/jobs",
@@ -534,7 +536,7 @@ class TestCronUnavailable:
         """GET /api/jobs returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.get("/api/jobs")
                 assert resp.status == 501
                 data = await resp.json()
@@ -551,8 +553,8 @@ class TestCronUnavailable:
             return SAMPLE_JOB
 
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True), patch.object(
-                APIServerAdapter, "_cron_pause", staticmethod(_plain_pause)
+            with patch(f"{_MOD}._CRON_AVAILABLE", True), patch(
+                f"{_MOD}._cron_pause", _plain_pause
             ):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/pause")
                 assert resp.status == 200
@@ -571,8 +573,8 @@ class TestCronUnavailable:
             return [SAMPLE_JOB]
 
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True), patch.object(
-                APIServerAdapter, "_cron_list", staticmethod(_plain_list)
+            with patch(f"{_MOD}._CRON_AVAILABLE", True), patch(
+                f"{_MOD}._cron_list", _plain_list
             ):
                 resp = await cli.get("/api/jobs?include_disabled=true")
                 assert resp.status == 200
@@ -593,8 +595,8 @@ class TestCronUnavailable:
             return updated_job
 
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True), patch.object(
-                APIServerAdapter, "_cron_update", staticmethod(_plain_update)
+            with patch(f"{_MOD}._CRON_AVAILABLE", True), patch(
+                f"{_MOD}._cron_update", _plain_update
             ):
                 resp = await cli.patch(
                     f"/api/jobs/{VALID_JOB_ID}",
@@ -611,7 +613,7 @@ class TestCronUnavailable:
         """POST /api/jobs returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.post("/api/jobs", json={
                     "name": "test", "schedule": "* * * * *",
                 })
@@ -622,7 +624,7 @@ class TestCronUnavailable:
         """GET /api/jobs/{id} returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.get(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 501
 
@@ -631,7 +633,7 @@ class TestCronUnavailable:
         """DELETE /api/jobs/{id} returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.delete(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 501
 
@@ -640,7 +642,7 @@ class TestCronUnavailable:
         """POST /api/jobs/{id}/pause returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/pause")
                 assert resp.status == 501
 
@@ -649,7 +651,7 @@ class TestCronUnavailable:
         """POST /api/jobs/{id}/resume returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/resume")
                 assert resp.status == 501
 
@@ -658,6 +660,6 @@ class TestCronUnavailable:
         """POST /api/jobs/{id}/run returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/run")
                 assert resp.status == 501
diff --git a/tests/gateway/test_complete_path_at_filter.py b/tests/gateway/test_complete_path_at_filter.py
new file mode 100644
index 000000000..9e5031c0d
--- /dev/null
+++ b/tests/gateway/test_complete_path_at_filter.py
@@ -0,0 +1,91 @@
+"""Regression tests for the TUI gateway's `complete.path` handler.
+
+Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder`
+with no colon yet) still surfaced files alongside directories in the
+TUI composer, because the gateway-side completion lives in
+`tui_gateway/server.py` and was never touched by the earlier fix to
+`hermes_cli/commands.py`.
+
+Covers:
+  - `@folder:` only yields directories
+  - `@file:` only yields regular files
+  - Bare `@folder` / `@file` (no colon) lists cwd directly
+  - Explicit prefix is preserved in the completion text
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from tui_gateway import server
+
+
+def _fixture(tmp_path: Path):
+    (tmp_path / "readme.md").write_text("x")
+    (tmp_path / ".env").write_text("x")
+    (tmp_path / "src").mkdir()
+    (tmp_path / "docs").mkdir()
+
+
+def _items(word: str):
+    resp = server.handle_request({"id": "1", "method": "complete.path", "params": {"word": word}})
+
+    return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]]
+
+
+def test_at_folder_colon_only_dirs(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@folder:")]
+
+    assert all(t.startswith("@folder:") for t in texts), texts
+    assert any(t == "@folder:src/" for t in texts)
+    assert any(t == "@folder:docs/" for t in texts)
+    assert not any(t == "@folder:readme.md" for t in texts)
+    assert not any(t == "@folder:.env" for t in texts)
+
+
+def test_at_file_colon_only_files(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@file:")]
+
+    assert all(t.startswith("@file:") for t in texts), texts
+    assert any(t == "@file:readme.md" for t in texts)
+    assert not any(t == "@file:src/" for t in texts)
+    assert not any(t == "@file:docs/" for t in texts)
+
+
+def test_at_folder_bare_without_colon_lists_dirs(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@folder")]
+
+    assert any(t == "@folder:src/" for t in texts), texts
+    assert any(t == "@folder:docs/" for t in texts), texts
+    assert not any(t == "@folder:readme.md" for t in texts)
+
+
+def test_at_file_bare_without_colon_lists_files(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@file")]
+
+    assert any(t == "@file:readme.md" for t in texts), texts
+    assert not any(t == "@file:src/" for t in texts)
+
+
+def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch):
+    """`@` alone should list the static references so users discover the
+    available prefixes.  (Unchanged behaviour; regression guard.)
+    """
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _, _ in _items("@")]
+
+    for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"):
+        assert expected in texts, f"missing static ref {expected!r} in {texts!r}"
diff --git a/tests/gateway/test_debug_command.py b/tests/gateway/test_debug_command.py
new file mode 100644
index 000000000..48cda3014
--- /dev/null
+++ b/tests/gateway/test_debug_command.py
@@ -0,0 +1,60 @@
+"""Tests for the gateway /debug command."""
+
+from unittest.mock import patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionSource
+
+
+def _make_event(text="/debug", platform=Platform.TELEGRAM,
+                user_id="12345", chat_id="67890"):
+    source = SessionSource(
+        platform=platform,
+        user_id=user_id,
+        chat_id=chat_id,
+        user_name="testuser",
+    )
+    return MessageEvent(text=text, source=source)
+
+
+def _make_runner():
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig()
+    runner.adapters = {}
+    return runner
+
+
+class TestHandleDebugCommand:
+    @pytest.mark.asyncio
+    async def test_debug_sweeps_expired_pastes_before_upload(self):
+        runner = _make_runner()
+        event = _make_event()
+
+        with patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)) as mock_sweep, \
+             patch("hermes_cli.debug._capture_dump", return_value="dump"), \
+             patch("hermes_cli.debug.collect_debug_report", return_value="report"), \
+             patch("hermes_cli.debug.upload_to_pastebin", return_value="https://paste.rs/report"), \
+             patch("hermes_cli.debug._schedule_auto_delete"):
+            result = await runner._handle_debug_command(event)
+
+        mock_sweep.assert_called_once()
+        assert "https://paste.rs/report" in result
+
+    @pytest.mark.asyncio
+    async def test_debug_survives_sweep_failure(self):
+        runner = _make_runner()
+        event = _make_event()
+
+        with patch("hermes_cli.debug._sweep_expired_pastes", side_effect=RuntimeError("offline")), \
+             patch("hermes_cli.debug._capture_dump", return_value="dump"), \
+             patch("hermes_cli.debug.collect_debug_report", return_value="report"), \
+             patch("hermes_cli.debug.upload_to_pastebin", return_value="https://paste.rs/report"), \
+             patch("hermes_cli.debug._schedule_auto_delete"):
+            result = await runner._handle_debug_command(event)
+
+        assert "https://paste.rs/report" in result
diff --git a/tests/gateway/test_discord_slash_commands.py b/tests/gateway/test_discord_slash_commands.py
index 1c3ec2625..7e1f5d4a8 100644
--- a/tests/gateway/test_discord_slash_commands.py
+++ b/tests/gateway/test_discord_slash_commands.py
@@ -199,6 +199,89 @@ async def test_auto_registered_command_with_args(adapter):
     )
 
 
+@pytest.mark.asyncio
+async def test_auto_registers_plugin_commands_for_discord(adapter):
+    """Plugin slash commands should appear as native Discord app commands."""
+    adapter._run_simple_slash = AsyncMock()
+
+    with patch(
+        "hermes_cli.plugins.get_plugin_commands",
+        return_value={
+            "metricas": {
+                "handler": lambda _a: "ok",
+                "description": "Metrics dashboard",
+                "args_hint": "dias:7 formato:json",
+                "plugin": "metrics-plugin",
+            }
+        },
+    ):
+        adapter._register_slash_commands()
+
+    tree_names = set(adapter._client.tree.commands.keys())
+    assert "metricas" in tree_names
+
+    metricas_cmd = adapter._client.tree.commands["metricas"]
+    interaction = SimpleNamespace()
+    await metricas_cmd.callback(interaction, args="dias:7 formato:json")
+    adapter._run_simple_slash.assert_awaited_once_with(
+        interaction, "/metricas dias:7 formato:json"
+    )
+
+
+@pytest.mark.asyncio
+async def test_auto_registered_plugin_command_without_args_hint(adapter):
+    """Plugin commands without args_hint should register as parameterless."""
+    adapter._run_simple_slash = AsyncMock()
+
+    with patch(
+        "hermes_cli.plugins.get_plugin_commands",
+        return_value={
+            "ping": {
+                "handler": lambda _a: "pong",
+                "description": "Ping the plugin",
+                "args_hint": "",
+                "plugin": "ping-plugin",
+            }
+        },
+    ):
+        adapter._register_slash_commands()
+
+    assert "ping" in adapter._client.tree.commands
+    ping_cmd = adapter._client.tree.commands["ping"]
+    interaction = SimpleNamespace()
+    await ping_cmd.callback(interaction)
+    adapter._run_simple_slash.assert_awaited_once_with(interaction, "/ping")
+
+
+@pytest.mark.asyncio
+async def test_plugin_command_name_conflict_skipped(adapter):
+    """A plugin command that collides with a built-in must not override it."""
+    adapter._run_simple_slash = AsyncMock()
+
+    with patch(
+        "hermes_cli.plugins.get_plugin_commands",
+        return_value={
+            "status": {
+                "handler": lambda _a: "plugin-status",
+                "description": "Plugin status",
+                "args_hint": "",
+                "plugin": "shadow-plugin",
+            }
+        },
+    ):
+        adapter._register_slash_commands()
+
+    # Built-ins are registered via @tree.command as plain functions. A
+    # plugin-registered override would install a _FakeCommand instance
+    # (has .callback) via tree.add_command. If the conflict-skip logic
+    # fires, the slot remains a bare function.
+    status_entry = adapter._client.tree.commands["status"]
+    assert callable(status_entry) and not hasattr(status_entry, "callback"), (
+        "plugin registration overrode the built-in /status command — "
+        "the already_registered skip must prevent this"
+    )
+
+
 # ------------------------------------------------------------------
 # _handle_thread_create_slash — success, session dispatch, failure
 # ------------------------------------------------------------------
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index 1813eb31f..f21b7dcef 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -781,11 +781,12 @@ class TestAdapterBehavior(unittest.TestCase):
         from gateway.platforms.feishu import FeishuAdapter
 
         adapter = FeishuAdapter(PlatformConfig())
+        # Mention without IDs — name fallback legitimately engages.
         mentioned = SimpleNamespace(
             mentions=[
                 SimpleNamespace(
                     name="Hermes Bot",
-                    id=SimpleNamespace(open_id="ou_other", user_id="u_other"),
+                    id=SimpleNamespace(open_id=None, user_id=None),
                 )
             ]
         )
@@ -1026,40 +1027,47 @@ class TestAdapterBehavior(unittest.TestCase):
 
     @patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "open"}, clear=True)
     def test_group_message_matches_bot_name_when_only_name_available(self):
+        """Name fallback engages when either side lacks an open_id. When BOTH
+        the mention and the bot carry open_ids, IDs are authoritative — a
+        same-name human with a different open_id must NOT admit."""
         from gateway.config import PlatformConfig
         from gateway.platforms.feishu import FeishuAdapter
 
+        # Case 1: bot has only a name (open_id not hydrated / not configured).
+        # Name fallback is the only available signal for any mention.
         adapter = FeishuAdapter(PlatformConfig())
         adapter._bot_name = "Hermes Bot"
         sender_id = SimpleNamespace(open_id="ou_any", user_id=None)
 
-        named_mention = SimpleNamespace(
+        name_only_mention = SimpleNamespace(
             name="Hermes Bot",
-            id=SimpleNamespace(open_id="ou_other", user_id="u_other"),
+            id=SimpleNamespace(open_id=None, user_id=None),
         )
         different_mention = SimpleNamespace(
             name="Another Bot",
-            id=SimpleNamespace(open_id="ou_other", user_id="u_other"),
+            id=SimpleNamespace(open_id=None, user_id=None),
         )
 
-        self.assertTrue(adapter._should_accept_group_message(SimpleNamespace(mentions=[named_mention]), sender_id, ""))
+        self.assertTrue(adapter._should_accept_group_message(SimpleNamespace(mentions=[name_only_mention]), sender_id, ""))
         self.assertFalse(adapter._should_accept_group_message(SimpleNamespace(mentions=[different_mention]), sender_id, ""))
 
-    @patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "open"}, clear=True)
-    def test_group_post_message_uses_parsed_mentions_when_sdk_mentions_missing(self):
-        from gateway.config import PlatformConfig
-        from gateway.platforms.feishu import FeishuAdapter
+        # Case 2: bot's open_id IS known — a same-name human with different
+        # open_id must NOT admit (IDs override names).
+        adapter2 = FeishuAdapter(PlatformConfig())
+        adapter2._bot_open_id = "ou_bot"
+        adapter2._bot_name = "Hermes Bot"
 
-        adapter = FeishuAdapter(PlatformConfig())
-        adapter._bot_open_id = "ou_bot"
-        sender_id = SimpleNamespace(open_id="ou_any", user_id=None)
-        message = SimpleNamespace(
-            message_type="post",
-            mentions=[],
-            content='{"en_us":{"content":[[{"tag":"at","user_name":"Hermes","open_id":"ou_bot"}]]}}',
+        same_name_other_id_mention = SimpleNamespace(
+            name="Hermes Bot",
+            id=SimpleNamespace(open_id="ou_other", user_id="u_other"),
+        )
+        bot_mention = SimpleNamespace(
+            name="Hermes Bot",
+            id=SimpleNamespace(open_id="ou_bot", user_id=None),
         )
 
-        self.assertTrue(adapter._should_accept_group_message(message, sender_id, ""))
+        self.assertFalse(adapter2._should_accept_group_message(SimpleNamespace(mentions=[same_name_other_id_mention]), sender_id, ""))
+        self.assertTrue(adapter2._should_accept_group_message(SimpleNamespace(mentions=[bot_mention]), sender_id, ""))
 
     @patch.dict(os.environ, {}, clear=True)
     def test_extract_post_message_as_text(self):
@@ -1073,7 +1081,7 @@ class TestAdapterBehavior(unittest.TestCase):
             message_id="om_post",
         )
 
-        text, msg_type, media_urls, media_types = asyncio.run(adapter._extract_message_content(message))
+        text, msg_type, media_urls, media_types, _mentions = asyncio.run(adapter._extract_message_content(message))
 
         self.assertEqual(text, "Title\nhello\n[doc](https://example.com)")
         self.assertEqual(msg_type.value, "text")
@@ -1092,7 +1100,7 @@ class TestAdapterBehavior(unittest.TestCase):
             message_id="om_post_fr",
         )
 
-        text, msg_type, media_urls, media_types = asyncio.run(adapter._extract_message_content(message))
+        text, msg_type, media_urls, media_types, _mentions = asyncio.run(adapter._extract_message_content(message))
 
         self.assertEqual(text, "Subject\nbonjour")
         self.assertEqual(msg_type.value, "text")
@@ -1118,7 +1126,7 @@ class TestAdapterBehavior(unittest.TestCase):
             message_id="om_post_rich",
         )
 
-        text, msg_type, media_urls, media_types = asyncio.run(adapter._extract_message_content(message))
+        text, msg_type, media_urls, media_types, _mentions = asyncio.run(adapter._extract_message_content(message))
 
         self.assertEqual(text, "Rich message\n[Image: diagram]\n@Alice please check the attachment\n[Attachment: spec.pdf]\n:smile:")
         self.assertEqual(msg_type.value, "text")
@@ -1144,7 +1152,7 @@ class TestAdapterBehavior(unittest.TestCase):
             message_id="om_post_media",
         )
 
-        text, msg_type, media_urls, media_types = asyncio.run(adapter._extract_message_content(message))
+        text, msg_type, media_urls, media_types, _mentions = asyncio.run(adapter._extract_message_content(message))
 
         self.assertEqual(text, "Rich message\n[Image: diagram]\n[Attachment: spec.pdf]")
         self.assertEqual(msg_type.value, "text")
@@ -1181,7 +1189,7 @@ class TestAdapterBehavior(unittest.TestCase):
             message_id="om_merge_forward",
         )
 
-        text, msg_type, media_urls, media_types = asyncio.run(adapter._extract_message_content(message))
+        text, msg_type, media_urls, media_types, _mentions = asyncio.run(adapter._extract_message_content(message))
 
         self.assertEqual(
             text,
@@ -1203,7 +1211,7 @@ class TestAdapterBehavior(unittest.TestCase):
             message_id="om_share_chat",
         )
 
-        text, msg_type, media_urls, media_types = asyncio.run(adapter._extract_message_content(message))
+        text, msg_type, media_urls, media_types, _mentions = asyncio.run(adapter._extract_message_content(message))
 
         self.assertEqual(text, "Shared chat: Platform Ops\nChat ID: oc_shared")
         self.assertEqual(msg_type.value, "text")
@@ -1237,7 +1245,7 @@ class TestAdapterBehavior(unittest.TestCase):
             message_id="om_interactive",
         )
 
-        text, msg_type, media_urls, media_types = asyncio.run(adapter._extract_message_content(message))
+        text, msg_type, media_urls, media_types, _mentions = asyncio.run(adapter._extract_message_content(message))
 
         self.assertEqual(text, "Approval Request\nRequester: Alice\nApprove\nActions: Approve")
         self.assertEqual(msg_type.value, "text")
@@ -1257,7 +1265,7 @@ class TestAdapterBehavior(unittest.TestCase):
             message_id="om_image",
         )
 
-        text, msg_type, media_urls, media_types = asyncio.run(adapter._extract_message_content(message))
+        text, msg_type, media_urls, media_types, _mentions = asyncio.run(adapter._extract_message_content(message))
 
         self.assertEqual(text, "")
         self.assertEqual(msg_type.value, "photo")
@@ -1283,7 +1291,7 @@ class TestAdapterBehavior(unittest.TestCase):
             message_id="om_audio",
         )
 
-        text, msg_type, media_urls, media_types = asyncio.run(adapter._extract_message_content(message))
+        text, msg_type, media_urls, media_types, _mentions = asyncio.run(adapter._extract_message_content(message))
 
         self.assertEqual(text, "")
         self.assertEqual(msg_type.value, "audio")
@@ -1305,7 +1313,7 @@ class TestAdapterBehavior(unittest.TestCase):
             message_id="om_file",
         )
 
-        text, msg_type, media_urls, media_types = asyncio.run(adapter._extract_message_content(message))
+        text, msg_type, media_urls, media_types, _mentions = asyncio.run(adapter._extract_message_content(message))
 
         self.assertEqual(text, "")
         self.assertEqual(msg_type.value, "document")
@@ -1327,7 +1335,7 @@ class TestAdapterBehavior(unittest.TestCase):
             message_id="om_media",
         )
 
-        text, msg_type, media_urls, media_types = asyncio.run(adapter._extract_message_content(message))
+        text, msg_type, media_urls, media_types, _mentions = asyncio.run(adapter._extract_message_content(message))
 
         self.assertEqual(text, "")
         self.assertEqual(msg_type.value, "photo")
@@ -1349,7 +1357,7 @@ class TestAdapterBehavior(unittest.TestCase):
             message_id="om_video",
         )
 
-        text, msg_type, media_urls, media_types = asyncio.run(adapter._extract_message_content(message))
+        text, msg_type, media_urls, media_types, _mentions = asyncio.run(adapter._extract_message_content(message))
 
         self.assertEqual(text, "")
         self.assertEqual(msg_type.value, "video")
@@ -1529,7 +1537,7 @@ class TestAdapterBehavior(unittest.TestCase):
         adapter._dispatch_inbound_event.assert_awaited_once()
         event = adapter._dispatch_inbound_event.await_args.args[0]
         self.assertEqual(event.message_type, MessageType.TEXT)
-        self.assertEqual(event.source.user_id, "ou_user")
+        self.assertEqual(event.source.user_id, "u_user")  # tenant-scoped user_id preferred over app-scoped open_id
         self.assertEqual(event.source.user_name, "张三")
         self.assertEqual(event.source.user_id_alt, "on_union")
         self.assertEqual(event.source.chat_name, "Feishu DM")
@@ -2685,7 +2693,7 @@ class TestHydrateBotIdentity(unittest.TestCase):
                 },
             }
         ).encode("utf-8")
-        response = SimpleNamespace(content=payload)
+        response = SimpleNamespace(raw=SimpleNamespace(content=payload))
         adapter._client.request = Mock(return_value=response)
 
         asyncio.run(adapter._hydrate_bot_identity())
@@ -2732,7 +2740,7 @@ class TestHydrateBotIdentity(unittest.TestCase):
                 },
             }
         ).encode("utf-8")
-        adapter._client.request = Mock(return_value=SimpleNamespace(content=payload))
+        adapter._client.request = Mock(return_value=SimpleNamespace(raw=SimpleNamespace(content=payload)))
 
         asyncio.run(adapter._hydrate_bot_identity())
 
@@ -2766,7 +2774,7 @@ class TestHydrateBotIdentity(unittest.TestCase):
         payload = json.dumps(
             {"code": 0, "bot": {"bot_name": "Hermes", "open_id": "ou_hermes"}}
         ).encode("utf-8")
-        adapter._client.request = Mock(return_value=SimpleNamespace(content=payload))
+        adapter._client.request = Mock(return_value=SimpleNamespace(raw=SimpleNamespace(content=payload)))
 
         asyncio.run(adapter._hydrate_bot_identity())
 
@@ -3479,3 +3487,1033 @@ class TestProcessingReactions(unittest.TestCase):
             len(adapter._pending_processing_reactions),
             _FEISHU_PROCESSING_REACTION_CACHE_SIZE,
         )
+
+
+class TestFeishuMentionMap(unittest.TestCase):
+    def test_build_mentions_map_handles_at_all(self):
+        from gateway.platforms.feishu import _build_mentions_map, _FeishuBotIdentity, FeishuMentionRef
+
+        mention = SimpleNamespace(key="@_all", id=None, name="")
+        result = _build_mentions_map(
+            [mention],
+            _FeishuBotIdentity(open_id="ou_bot", name="Hermes"),
+        )
+        self.assertEqual(result["@_all"], FeishuMentionRef(is_all=True))
+
+    def test_build_mentions_map_marks_self_by_open_id(self):
+        from gateway.platforms.feishu import _build_mentions_map, _FeishuBotIdentity
+
+        mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_bot", user_id=""),
+            name="Hermes",
+        )
+        ref = _build_mentions_map([mention], _FeishuBotIdentity(open_id="ou_bot"))["@_user_1"]
+        self.assertTrue(ref.is_self)
+        self.assertEqual(ref.open_id, "ou_bot")
+        self.assertEqual(ref.name, "Hermes")
+
+    def test_build_mentions_map_marks_self_by_name_fallback(self):
+        from gateway.platforms.feishu import _build_mentions_map, _FeishuBotIdentity
+
+        mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="", user_id=""),
+            name="Hermes",
+        )
+        result = _build_mentions_map([mention], _FeishuBotIdentity(name="Hermes"))
+        self.assertTrue(result["@_user_1"].is_self)
+
+    def test_build_mentions_map_name_match_does_not_override_mismatching_open_id(self):
+        """Regression: a human user whose display name matches the bot must
+        NOT be flagged as self when their open_id differs. Before the fix,
+        name-match fired even when open_id was present and different, causing
+        their messages to be silently stripped/dropped."""
+        from gateway.platforms.feishu import _build_mentions_map, _FeishuBotIdentity
+
+        human_with_same_name = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_human", user_id=""),
+            name="Hermes Bot",
+        )
+        result = _build_mentions_map(
+            [human_with_same_name],
+            _FeishuBotIdentity(open_id="ou_bot", name="Hermes Bot"),
+        )
+        self.assertFalse(result["@_user_1"].is_self)
+
+    def test_build_mentions_map_falls_back_to_name_when_bot_open_id_not_hydrated(self):
+        """Regression: right after gateway startup, _hydrate_bot_identity may
+        not have populated _bot_open_id yet. During that window, a mention
+        carrying a real open_id should still match via name — otherwise
+        @bot messages silently fail admission."""
+        from gateway.platforms.feishu import _build_mentions_map, _FeishuBotIdentity
+
+        bot_mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_bot_actual", user_id=""),
+            name="Hermes Bot",
+        )
+        # Bot identity has name but no open_id yet (hydration pending).
+        result = _build_mentions_map(
+            [bot_mention],
+            _FeishuBotIdentity(open_id="", name="Hermes Bot"),
+        )
+        self.assertTrue(result["@_user_1"].is_self)
+
+    def test_build_mentions_map_non_self_user(self):
+        from gateway.platforms.feishu import _build_mentions_map, _FeishuBotIdentity
+
+        mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_alice", user_id=""),
+            name="Alice",
+        )
+        ref = _build_mentions_map([mention], _FeishuBotIdentity(open_id="ou_bot"))["@_user_1"]
+        self.assertFalse(ref.is_self)
+        self.assertEqual(ref.open_id, "ou_alice")
+        self.assertEqual(ref.name, "Alice")
+
+    def test_build_mentions_map_returns_empty_for_none_input(self):
+        from gateway.platforms.feishu import _build_mentions_map, _FeishuBotIdentity
+
+        self.assertEqual(_build_mentions_map(None, _FeishuBotIdentity(open_id="ou_bot")), {})
+
+    def test_build_mentions_map_tolerates_missing_id_object(self):
+        from gateway.platforms.feishu import _build_mentions_map, _FeishuBotIdentity
+
+        mention = SimpleNamespace(key="@_user_9", id=None, name="")
+        ref = _build_mentions_map([mention], _FeishuBotIdentity(open_id="ou_bot"))["@_user_9"]
+        self.assertEqual(ref.open_id, "")
+        self.assertFalse(ref.is_self)
+
+
+class TestFeishuMentionHint(unittest.TestCase):
+    def test_hint_single_user(self):
+        from gateway.platforms.feishu import FeishuMentionRef, _build_mention_hint
+
+        refs = [FeishuMentionRef(name="Alice", open_id="ou_alice")]
+        self.assertEqual(
+            _build_mention_hint(refs),
+            "[Mentioned: Alice (open_id=ou_alice)]",
+        )
+
+    def test_hint_multiple_users(self):
+        from gateway.platforms.feishu import FeishuMentionRef, _build_mention_hint
+
+        refs = [
+            FeishuMentionRef(name="Alice", open_id="ou_alice"),
+            FeishuMentionRef(name="Bob", open_id="ou_bob"),
+        ]
+        self.assertEqual(
+            _build_mention_hint(refs),
+            "[Mentioned: Alice (open_id=ou_alice), Bob (open_id=ou_bob)]",
+        )
+
+    def test_hint_at_all(self):
+        from gateway.platforms.feishu import FeishuMentionRef, _build_mention_hint
+
+        refs = [FeishuMentionRef(is_all=True)]
+        self.assertEqual(_build_mention_hint(refs), "[Mentioned: @all]")
+
+    def test_hint_filters_self_mentions(self):
+        from gateway.platforms.feishu import FeishuMentionRef, _build_mention_hint
+
+        refs = [
+            FeishuMentionRef(name="Hermes", open_id="ou_bot", is_self=True),
+            FeishuMentionRef(name="Alice", open_id="ou_alice"),
+        ]
+        self.assertEqual(
+            _build_mention_hint(refs),
+            "[Mentioned: Alice (open_id=ou_alice)]",
+        )
+
+    def test_hint_returns_empty_when_only_self(self):
+        from gateway.platforms.feishu import FeishuMentionRef, _build_mention_hint
+
+        refs = [FeishuMentionRef(name="Hermes", open_id="ou_bot", is_self=True)]
+        self.assertEqual(_build_mention_hint(refs), "")
+
+    def test_hint_returns_empty_for_no_refs(self):
+        from gateway.platforms.feishu import _build_mention_hint
+
+        self.assertEqual(_build_mention_hint([]), "")
+
+    def test_hint_falls_back_when_open_id_missing(self):
+        from gateway.platforms.feishu import FeishuMentionRef, _build_mention_hint
+
+        refs = [FeishuMentionRef(name="Alice", open_id="")]
+        self.assertEqual(_build_mention_hint(refs), "[Mentioned: Alice]")
+
+    def test_hint_uses_unknown_placeholder_when_name_missing(self):
+        from gateway.platforms.feishu import FeishuMentionRef, _build_mention_hint
+
+        refs = [FeishuMentionRef(name="", open_id="ou_xxx")]
+        self.assertEqual(_build_mention_hint(refs), "[Mentioned: unknown (open_id=ou_xxx)]")
+
+    def test_hint_dedupes_repeated_user(self):
+        from gateway.platforms.feishu import FeishuMentionRef, _build_mention_hint
+
+        refs = [
+            FeishuMentionRef(name="Alice", open_id="ou_alice"),
+            FeishuMentionRef(name="Alice", open_id="ou_alice"),
+            FeishuMentionRef(name="Bob", open_id="ou_bob"),
+        ]
+        self.assertEqual(
+            _build_mention_hint(refs),
+            "[Mentioned: Alice (open_id=ou_alice), Bob (open_id=ou_bob)]",
+        )
+
+    def test_hint_dedupes_repeated_at_all(self):
+        from gateway.platforms.feishu import FeishuMentionRef, _build_mention_hint
+
+        refs = [FeishuMentionRef(is_all=True), FeishuMentionRef(is_all=True)]
+        self.assertEqual(_build_mention_hint(refs), "[Mentioned: @all]")
+
+
+class TestFeishuStripLeadingSelf(unittest.TestCase):
+    def _make_refs(self, *, self_name="Hermes", other_name=None):
+        from gateway.platforms.feishu import FeishuMentionRef
+
+        refs = [FeishuMentionRef(name=self_name, open_id="ou_bot", is_self=True)]
+        if other_name:
+            refs.append(FeishuMentionRef(name=other_name, open_id="ou_alice"))
+        return refs
+
+    def test_strips_leading_self(self):
+        from gateway.platforms.feishu import _strip_edge_self_mentions
+
+        result = _strip_edge_self_mentions("@Hermes /help", self._make_refs())
+        self.assertEqual(result, "/help")
+
+    def test_strips_consecutive_leading_self(self):
+        from gateway.platforms.feishu import _strip_edge_self_mentions
+
+        result = _strip_edge_self_mentions("@Hermes @Hermes hi", self._make_refs())
+        self.assertEqual(result, "hi")
+
+    def test_stops_at_first_non_self_token(self):
+        from gateway.platforms.feishu import _strip_edge_self_mentions
+
+        result = _strip_edge_self_mentions(
+            "@Hermes @Alice make a group", self._make_refs(other_name="Alice")
+        )
+        self.assertEqual(result, "@Alice make a group")
+
+    def test_preserves_mid_text_self(self):
+        from gateway.platforms.feishu import _strip_edge_self_mentions
+
+        result = _strip_edge_self_mentions("check @Hermes said yesterday", self._make_refs())
+        self.assertEqual(result, "check @Hermes said yesterday")
+
+    def test_strips_trailing_self_at_end_of_text(self):
+        from gateway.platforms.feishu import _strip_edge_self_mentions
+
+        result = _strip_edge_self_mentions("look up docs @Hermes", self._make_refs())
+        self.assertEqual(result, "look up docs")
+
+    def test_strips_trailing_self_with_terminal_punct(self):
+        from gateway.platforms.feishu import _strip_edge_self_mentions
+
+        # Terminal punct after the mention — strip the mention, keep the punct.
+        result = _strip_edge_self_mentions("look up docs @Hermes.", self._make_refs())
+        self.assertEqual(result, "look up docs.")
+
+    def test_preserves_trailing_self_before_non_terminal_char(self):
+        from gateway.platforms.feishu import _strip_edge_self_mentions
+
+        # Non-terminal char (here a Chinese particle) follows — preserve.
+        result = _strip_edge_self_mentions(
+            "please don't @Hermes anymore", self._make_refs()
+        )
+        self.assertEqual(result, "please don't @Hermes anymore")
+
+    def test_returns_input_when_refs_empty(self):
+        from gateway.platforms.feishu import _strip_edge_self_mentions
+
+        self.assertEqual(_strip_edge_self_mentions("@Hermes /help", []), "@Hermes /help")
+
+    def test_returns_input_when_no_self_refs(self):
+        from gateway.platforms.feishu import _strip_edge_self_mentions, FeishuMentionRef
+
+        refs = [FeishuMentionRef(name="Alice", open_id="ou_alice")]
+        self.assertEqual(_strip_edge_self_mentions("@Alice hi", refs), "@Alice hi")
+
+    def test_uses_open_id_fallback_when_name_missing(self):
+        from gateway.platforms.feishu import _strip_edge_self_mentions, FeishuMentionRef
+
+        refs = [FeishuMentionRef(name="", open_id="ou_bot", is_self=True)]
+        self.assertEqual(_strip_edge_self_mentions("@ou_bot hi", refs), "hi")
+
+    def test_word_boundary_prevents_prefix_collision(self):
+        """A bot named 'Al' must not eat the leading '@Alice' of a different user."""
+        from gateway.platforms.feishu import _strip_edge_self_mentions, FeishuMentionRef
+
+        refs = [FeishuMentionRef(name="Al", open_id="ou_bot", is_self=True)]
+        self.assertEqual(_strip_edge_self_mentions("@Alice hi", refs), "@Alice hi")
+
+
+class TestFeishuNormalizeText(unittest.TestCase):
+    def test_renders_mention_with_display_name(self):
+        from gateway.platforms.feishu import _normalize_feishu_text, FeishuMentionRef
+
+        refs = {"@_user_1": FeishuMentionRef(name="Alice", open_id="ou_alice")}
+        self.assertEqual(_normalize_feishu_text("@_user_1 hello", refs), "@Alice hello")
+
+    def test_renders_self_mention_with_name(self):
+        from gateway.platforms.feishu import _normalize_feishu_text, FeishuMentionRef
+
+        refs = {"@_user_1": FeishuMentionRef(name="Hermes", open_id="ou_bot", is_self=True)}
+        self.assertEqual(
+            _normalize_feishu_text("stop pinging @_user_1 please", refs),
+            "stop pinging @Hermes please",
+        )
+
+    def test_at_all_rendered_as_english_literal(self):
+        from gateway.platforms.feishu import _normalize_feishu_text
+
+        self.assertEqual(_normalize_feishu_text("@_all notice", None), "@all notice")
+
+    def test_unknown_placeholder_degrades_to_space(self):
+        from gateway.platforms.feishu import _normalize_feishu_text
+
+        # No map: fall back to the old behavior (substitute with space, then collapse).
+        self.assertEqual(_normalize_feishu_text("@_user_9 hello", None), "hello")
+
+    def test_backward_compatible_without_map(self):
+        from gateway.platforms.feishu import _normalize_feishu_text
+
+        self.assertEqual(_normalize_feishu_text("hello  world"), "hello world")
+
+    def test_mention_for_missing_map_entry_degrades_to_space(self):
+        from gateway.platforms.feishu import _normalize_feishu_text, FeishuMentionRef
+
+        refs = {"@_user_1": FeishuMentionRef(name="Alice")}
+        # @_user_2 has no entry — should degrade to a space (legacy behavior)
+        self.assertEqual(
+            _normalize_feishu_text("@_user_1 @_user_2 hi", refs),
+            "@Alice hi",
+        )
+
+
+class TestFeishuPostMentionParsing(unittest.TestCase):
+    def test_post_at_tag_renders_via_mentions_map(self):
+        """Post <at>.user_id is a placeholder ('@_user_N'); the real display
+        name comes from the mentions_map lookup. Confirmed via live
+        im.v1.message.get payload."""
+        from gateway.platforms.feishu import parse_feishu_post_payload, FeishuMentionRef
+
+        payload = {
+            "en_us": {
+                "content": [[
+                    {"tag": "at", "user_id": "@_user_1", "user_name": "ignored"},
+                    {"tag": "text", "text": " hello"},
+                ]]
+            }
+        }
+        mentions_map = {
+            "@_user_1": FeishuMentionRef(name="Alice", open_id="ou_alice"),
+        }
+        result = parse_feishu_post_payload(payload, mentions_map=mentions_map)
+        self.assertEqual(result.text_content, "@Alice hello")
+
+    def test_post_at_tag_falls_back_to_inline_user_name_when_map_misses(self):
+        """When the mentions payload is missing a placeholder, fall back to the
+        inline user_name in the <at> tag itself."""
+        from gateway.platforms.feishu import parse_feishu_post_payload
+
+        payload = {
+            "en_us": {
+                "content": [[
+                    {"tag": "at", "user_id": "@_user_7", "user_name": "Unknown"},
+                    {"tag": "text", "text": " hi"},
+                ]]
+            }
+        }
+        result = parse_feishu_post_payload(payload, mentions_map={})
+        self.assertEqual(result.text_content, "@Unknown hi")
+
+    def test_post_at_all_tag_renders_as_at_all(self):
+        """Post-format @everyone has user_id == '@_all' (confirmed via live
+        im.v1.message.get). Rendered as literal '@all' regardless of map."""
+        from gateway.platforms.feishu import parse_feishu_post_payload
+
+        payload = {
+            "en_us": {
+                "content": [[
+                    {"tag": "at", "user_id": "@_all", "user_name": "everyone"},
+                    {"tag": "text", "text": " meeting"},
+                ]]
+            }
+        }
+        result = parse_feishu_post_payload(payload)
+        self.assertIn("@all", result.text_content)
+
+
+class TestFeishuNormalizeWithMentions(unittest.TestCase):
+    def test_text_message_renders_mention_by_name(self):
+        from gateway.platforms.feishu import normalize_feishu_message, _FeishuBotIdentity
+
+        mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_alice", user_id=""),
+            name="Alice",
+        )
+        normalized = normalize_feishu_message(
+            message_type="text",
+            raw_content=json.dumps({"text": "@_user_1 hello"}),
+            mentions=[mention],
+            bot=_FeishuBotIdentity(open_id="ou_bot"),
+        )
+        self.assertEqual(normalized.text_content, "@Alice hello")
+        self.assertEqual(len(normalized.mentions), 1)
+        self.assertEqual(normalized.mentions[0].open_id, "ou_alice")
+        self.assertFalse(normalized.mentions[0].is_self)
+
+    def test_text_message_marks_bot_self_mention(self):
+        from gateway.platforms.feishu import normalize_feishu_message, _FeishuBotIdentity
+
+        mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_bot", user_id=""),
+            name="Hermes",
+        )
+        normalized = normalize_feishu_message(
+            message_type="text",
+            raw_content=json.dumps({"text": "@_user_1 /help"}),
+            mentions=[mention],
+            bot=_FeishuBotIdentity(open_id="ou_bot"),
+        )
+        self.assertTrue(normalized.mentions[0].is_self)
+        # self mention is still rendered — strip is a separate adapter-level pass
+        self.assertEqual(normalized.text_content, "@Hermes /help")
+
+    def test_text_message_at_all_surfaces_ref(self):
+        from gateway.platforms.feishu import normalize_feishu_message
+
+        mention = SimpleNamespace(key="@_all", id=None, name="")
+        normalized = normalize_feishu_message(
+            message_type="text",
+            raw_content=json.dumps({"text": "@_all meeting"}),
+            mentions=[mention],
+        )
+        self.assertEqual(normalized.text_content, "@all meeting")
+        self.assertEqual(len(normalized.mentions), 1)
+        self.assertTrue(normalized.mentions[0].is_all)
+
+    def test_text_message_at_all_in_text_without_mentions_payload(self):
+        """Feishu SDK sometimes omits @_all from the mentions payload (confirmed
+        via im.v1.message.get). The fallback scan on raw text must still yield
+        an is_all ref so [Mentioned: @all] gets injected."""
+        from gateway.platforms.feishu import normalize_feishu_message
+
+        normalized = normalize_feishu_message(
+            message_type="text",
+            raw_content=json.dumps({"text": "@_all hello"}),
+            mentions=None,
+        )
+        self.assertEqual(normalized.text_content, "@all hello")
+        self.assertEqual(len(normalized.mentions), 1)
+        self.assertTrue(normalized.mentions[0].is_all)
+
+    def test_text_message_at_all_not_synthesized_if_absent_from_text(self):
+        """No @_all in text → no synthetic ref even if mentions_map is empty."""
+        from gateway.platforms.feishu import normalize_feishu_message
+
+        normalized = normalize_feishu_message(
+            message_type="text",
+            raw_content=json.dumps({"text": "plain hello"}),
+            mentions=None,
+        )
+        self.assertEqual(normalized.mentions, [])
+
+    def test_text_message_without_mentions_param_is_backward_compatible(self):
+        from gateway.platforms.feishu import normalize_feishu_message
+
+        normalized = normalize_feishu_message(
+            message_type="text",
+            raw_content=json.dumps({"text": "hello world"}),
+        )
+        self.assertEqual(normalized.text_content, "hello world")
+        self.assertEqual(normalized.mentions, [])
+
+    def test_post_message_marks_self_via_mentions_map_lookup(self):
+        """Real Feishu post: <at user_id="@_user_N"> + top-level mentions array
+        resolves to open_id via placeholder lookup, not direct tag fields."""
+        from gateway.platforms.feishu import normalize_feishu_message, _FeishuBotIdentity
+
+        raw = json.dumps({
+            "en_us": {
+                "content": [
+                    [
+                        {"tag": "at", "user_id": "@_user_1", "user_name": "Hermes"},
+                        {"tag": "text", "text": " check this"},
+                    ]
+                ]
+            }
+        })
+        bot_mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_bot", user_id=""),
+            name="Hermes",
+        )
+        normalized = normalize_feishu_message(
+            message_type="post",
+            raw_content=raw,
+            mentions=[bot_mention],
+            bot=_FeishuBotIdentity(open_id="ou_bot"),
+        )
+        self.assertEqual(len(normalized.mentions), 1)
+        self.assertTrue(normalized.mentions[0].is_self)
+        self.assertEqual(normalized.mentions[0].open_id, "ou_bot")
+
+
+class TestFeishuPostMentionsBot(unittest.TestCase):
+    def _build_adapter(self, bot_open_id="ou_bot", bot_user_id="", bot_name=""):
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter.__new__(FeishuAdapter)
+        adapter._bot_open_id = bot_open_id
+        adapter._bot_user_id = bot_user_id
+        adapter._bot_name = bot_name
+        return adapter
+
+    def test_post_mentions_bot_uses_is_self_flag(self):
+        from gateway.platforms.feishu import FeishuMentionRef
+
+        adapter = self._build_adapter()
+        self.assertTrue(
+            adapter._post_mentions_bot(
+                [FeishuMentionRef(name="Hermes", open_id="ou_bot", is_self=True)]
+            )
+        )
+        self.assertFalse(
+            adapter._post_mentions_bot(
+                [FeishuMentionRef(name="Alice", open_id="ou_alice")]
+            )
+        )
+
+    def test_post_mentions_bot_empty_returns_false(self):
+        adapter = self._build_adapter()
+        self.assertFalse(adapter._post_mentions_bot([]))
+
+
+class TestFeishuExtractMessageContent(unittest.TestCase):
+    def _build_adapter(self):
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter.__new__(FeishuAdapter)
+        adapter._bot_open_id = "ou_bot"
+        adapter._bot_user_id = ""
+        adapter._bot_name = "Hermes"
+        adapter._download_feishu_message_resources = AsyncMock(return_value=([], []))
+        return adapter
+
+    def test_returns_five_tuple_with_mentions(self):
+        adapter = self._build_adapter()
+        message = SimpleNamespace(
+            content=json.dumps({"text": "@_user_1 hello"}),
+            message_type="text",
+            message_id="m1",
+            mentions=[
+                SimpleNamespace(
+                    key="@_user_1",
+                    id=SimpleNamespace(open_id="ou_alice", user_id=""),
+                    name="Alice",
+                )
+            ],
+        )
+
+        text, inbound_type, media_urls, media_types, mentions = asyncio.run(
+            adapter._extract_message_content(message)
+        )
+        self.assertEqual(text, "@Alice hello")
+        self.assertEqual(len(mentions), 1)
+        self.assertEqual(mentions[0].open_id, "ou_alice")
+
+    def test_returns_empty_mentions_when_missing(self):
+        adapter = self._build_adapter()
+        message = SimpleNamespace(
+            content=json.dumps({"text": "plain hello"}),
+            message_type="text",
+            message_id="m2",
+            mentions=None,
+        )
+
+        text, _, _, _, mentions = asyncio.run(adapter._extract_message_content(message))
+        self.assertEqual(text, "plain hello")
+        self.assertEqual(mentions, [])
+
+
+class TestFeishuProcessInboundMessage(unittest.TestCase):
+    def _build_adapter(self):
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter.__new__(FeishuAdapter)
+        adapter._bot_open_id = "ou_bot"
+        adapter._bot_user_id = ""
+        adapter._bot_name = "Hermes"
+        adapter._download_feishu_message_resources = AsyncMock(return_value=([], []))
+        adapter._fetch_message_text = AsyncMock(return_value=None)
+        adapter.get_chat_info = AsyncMock(return_value={"name": "Test Chat"})
+        adapter._resolve_sender_profile = AsyncMock(
+            return_value={"user_id": "u1", "user_name": "Alice", "user_id_alt": None}
+        )
+        adapter._resolve_source_chat_type = Mock(return_value="group")
+        adapter.build_source = Mock(return_value=SimpleNamespace(thread_id=None))
+        adapter._dispatch_inbound_event = AsyncMock()
+        return adapter
+
+    def test_leading_self_mention_stripped_for_command(self):
+        from gateway.platforms.base import MessageType
+
+        adapter = self._build_adapter()
+        bot_mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_bot", user_id=""),
+            name="Hermes",
+        )
+        message = SimpleNamespace(
+            content=json.dumps({"text": "@_user_1 /help"}),
+            message_type="text",
+            message_id="m1",
+            mentions=[bot_mention],
+            chat_id="oc_chat",
+            parent_id=None,
+            upper_message_id=None,
+            thread_id=None,
+        )
+        asyncio.run(
+            adapter._process_inbound_message(
+                data=message,
+                message=message,
+                sender_id=None,
+                chat_type="group",
+                message_id="m1",
+            )
+        )
+        event = adapter._dispatch_inbound_event.call_args.args[0]
+        self.assertEqual(event.text, "/help")
+        self.assertEqual(event.message_type, MessageType.COMMAND)
+
+    def test_non_command_message_with_mentions_injects_hint(self):
+        from gateway.platforms.base import MessageType
+
+        adapter = self._build_adapter()
+        alice = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_alice", user_id=""),
+            name="Alice",
+        )
+        bob = SimpleNamespace(
+            key="@_user_2",
+            id=SimpleNamespace(open_id="ou_bob", user_id=""),
+            name="Bob",
+        )
+        message = SimpleNamespace(
+            content=json.dumps({"text": "@_user_1 @_user_2 make a group"}),
+            message_type="text",
+            message_id="m2",
+            mentions=[alice, bob],
+            chat_id="oc_chat",
+            parent_id=None,
+            upper_message_id=None,
+            thread_id=None,
+        )
+        asyncio.run(
+            adapter._process_inbound_message(
+                data=message,
+                message=message,
+                sender_id=None,
+                chat_type="group",
+                message_id="m2",
+            )
+        )
+        event = adapter._dispatch_inbound_event.call_args.args[0]
+        self.assertEqual(event.message_type, MessageType.TEXT)
+        self.assertIn("[Mentioned: Alice (open_id=ou_alice), Bob (open_id=ou_bob)]", event.text)
+        self.assertIn("@Alice @Bob make a group", event.text)
+
+    def test_command_message_never_injects_hint(self):
+        adapter = self._build_adapter()
+        bot_mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_bot", user_id=""),
+            name="Hermes",
+        )
+        alice = SimpleNamespace(
+            key="@_user_2",
+            id=SimpleNamespace(open_id="ou_alice", user_id=""),
+            name="Alice",
+        )
+        message = SimpleNamespace(
+            content=json.dumps({"text": "@_user_1 /model @_user_2"}),
+            message_type="text",
+            message_id="m3",
+            mentions=[bot_mention, alice],
+            chat_id="oc_chat",
+            parent_id=None,
+            upper_message_id=None,
+            thread_id=None,
+        )
+        asyncio.run(
+            adapter._process_inbound_message(
+                data=message,
+                message=message,
+                sender_id=None,
+                chat_type="group",
+                message_id="m3",
+            )
+        )
+        event = adapter._dispatch_inbound_event.call_args.args[0]
+        self.assertNotIn("[Mentioned:", event.text)
+        self.assertTrue(event.text.startswith("/model"))
+
+    def test_mid_text_self_mention_preserved(self):
+        adapter = self._build_adapter()
+        bot_mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_bot", user_id=""),
+            name="Hermes",
+        )
+        message = SimpleNamespace(
+            content=json.dumps({"text": "stop pinging @_user_1 please"}),
+            message_type="text",
+            message_id="m4",
+            mentions=[bot_mention],
+            chat_id="oc_chat",
+            parent_id=None,
+            upper_message_id=None,
+            thread_id=None,
+        )
+        asyncio.run(
+            adapter._process_inbound_message(
+                data=message,
+                message=message,
+                sender_id=None,
+                chat_type="group",
+                message_id="m4",
+            )
+        )
+        event = adapter._dispatch_inbound_event.call_args.args[0]
+        self.assertEqual(event.text, "stop pinging @Hermes please")
+
+    def test_pure_self_mention_message_is_ignored(self):
+        """A message containing only '@Bot' (no body, no media) must not dispatch.
+
+        Regression guard: the rendered '@Hermes' slips past the pre-strip empty
+        guard; the post-strip guard must catch it.
+        """
+        adapter = self._build_adapter()
+        bot_mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_bot", user_id=""),
+            name="Hermes",
+        )
+        message = SimpleNamespace(
+            content=json.dumps({"text": "@_user_1"}),
+            message_type="text",
+            message_id="m5",
+            mentions=[bot_mention],
+            chat_id="oc_chat",
+            parent_id=None,
+            upper_message_id=None,
+            thread_id=None,
+        )
+        asyncio.run(
+            adapter._process_inbound_message(
+                data=message, message=message, sender_id=None,
+                chat_type="group", message_id="m5",
+            )
+        )
+        adapter._dispatch_inbound_event.assert_not_called()
+
+
+class TestFeishuFetchMessageText(unittest.TestCase):
+    def _build_adapter(self):
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter.__new__(FeishuAdapter)
+        adapter._bot_open_id = "ou_bot"
+        adapter._bot_user_id = ""
+        adapter._bot_name = "Hermes"
+        adapter._message_text_cache = {}
+        adapter._client = Mock()
+        adapter._build_get_message_request = Mock(return_value=object())
+        return adapter
+
+    def test_fetch_message_text_renders_mentions_without_hint_prefix(self):
+        adapter = self._build_adapter()
+
+        alice_mention = SimpleNamespace(
+            key="@_user_1",
+            id="ou_alice",
+            id_type="open_id",
+            name="Alice",
+        )
+        parent = SimpleNamespace(
+            body=SimpleNamespace(content=json.dumps({"text": "@_user_1 hi"})),
+            msg_type="text",
+            mentions=[alice_mention],
+        )
+        response = Mock()
+        response.success = Mock(return_value=True)
+        response.data = SimpleNamespace(items=[parent])
+        adapter._client.im.v1.message.get = Mock(return_value=response)
+
+        result = asyncio.run(adapter._fetch_message_text("m_parent"))
+        self.assertEqual(result, "@Alice hi")
+        # No [Mentioned:] wrapper — reply-context path intentionally skips the hint.
+        self.assertNotIn("[Mentioned:", result)
+
+    def test_extract_text_from_raw_content_accepts_mentions_kwarg(self):
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter.__new__(FeishuAdapter)
+        adapter._bot_open_id = ""
+        adapter._bot_user_id = ""
+        adapter._bot_name = ""
+
+        alice_mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_alice", user_id=""),
+            name="Alice",
+        )
+        self.assertEqual(
+            adapter._extract_text_from_raw_content(
+                msg_type="text",
+                raw_content=json.dumps({"text": "@_user_1 hello"}),
+                mentions=[alice_mention],
+            ),
+            "@Alice hello",
+        )
+
+    def test_fetch_message_text_marks_is_self_via_string_id_shape(self):
+        """History-path Mention objects carry id as str + id_type; is_self must still work."""
+        adapter = self._build_adapter()
+        # bot_name is empty — is_self must be detected via open_id alone
+        adapter._bot_name = ""
+
+        bot_mention = SimpleNamespace(
+            key="@_user_1",
+            id="ou_bot",
+            id_type="open_id",
+            name="Hermes",
+        )
+        parent = SimpleNamespace(
+            body=SimpleNamespace(content=json.dumps({"text": "@_user_1 hi"})),
+            msg_type="text",
+            mentions=[bot_mention],
+        )
+        response = Mock()
+        response.success = Mock(return_value=True)
+        response.data = SimpleNamespace(items=[parent])
+        adapter._client.im.v1.message.get = Mock(return_value=response)
+
+        # The rendered text should still have the bot name substituted.
+        result = asyncio.run(adapter._fetch_message_text("m_parent"))
+        self.assertEqual(result, "@Hermes hi")
+
+    def test_build_mentions_map_string_id_shape(self):
+        """_build_mentions_map accepts the reply-history shape (id as str +
+        id_type='open_id'). user_id id_type is not load-bearing for self
+        detection — inbound mention payloads always include an open_id."""
+        from gateway.platforms.feishu import _build_mentions_map, _FeishuBotIdentity
+
+        # open_id discriminator, non-self
+        alice = SimpleNamespace(key="@_user_1", id="ou_alice", id_type="open_id", name="Alice")
+        ref = _build_mentions_map([alice], _FeishuBotIdentity(open_id="ou_bot"))["@_user_1"]
+        self.assertEqual(ref.open_id, "ou_alice")
+        self.assertFalse(ref.is_self)
+
+        # open_id discriminator, is_self matches via open_id
+        bot_oid = SimpleNamespace(key="@_user_3", id="ou_bot", id_type="open_id", name="Hermes")
+        self.assertTrue(
+            _build_mentions_map([bot_oid], _FeishuBotIdentity(open_id="ou_bot"))["@_user_3"].is_self
+        )
+
+
+class TestFeishuMentionEndToEnd(unittest.TestCase):
+    """High-level scenarios from the design spec — verify the full pipeline."""
+
+    def _build_adapter(self):
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter.__new__(FeishuAdapter)
+        adapter._bot_open_id = "ou_bot"
+        adapter._bot_user_id = ""
+        adapter._bot_name = "Hermes"
+        adapter._download_feishu_message_resources = AsyncMock(return_value=([], []))
+        adapter._fetch_message_text = AsyncMock(return_value=None)
+        adapter.get_chat_info = AsyncMock(return_value={"name": "Test Chat"})
+        adapter._resolve_sender_profile = AsyncMock(
+            return_value={"user_id": "u1", "user_name": "Alice", "user_id_alt": None}
+        )
+        adapter._resolve_source_chat_type = Mock(return_value="group")
+        adapter.build_source = Mock(return_value=SimpleNamespace(thread_id=None))
+        adapter._dispatch_inbound_event = AsyncMock()
+        return adapter
+
+    def _run(self, adapter, text, mentions):
+        raw_mentions = [
+            SimpleNamespace(
+                key=m["key"],
+                id=SimpleNamespace(open_id=m.get("open_id", ""), user_id=m.get("user_id", "")),
+                name=m.get("name", ""),
+            )
+            for m in mentions
+        ]
+        message = SimpleNamespace(
+            content=json.dumps({"text": text}),
+            message_type="text",
+            message_id="m",
+            mentions=raw_mentions,
+            chat_id="oc_chat",
+            parent_id=None,
+            upper_message_id=None,
+            thread_id=None,
+        )
+        asyncio.run(
+            adapter._process_inbound_message(
+                data=message, message=message, sender_id=None, chat_type="group", message_id="m",
+            )
+        )
+        return adapter._dispatch_inbound_event.call_args.args[0]
+
+    def test_scenario_bot_plus_alice_plus_bob_build_group(self):
+        adapter = self._build_adapter()
+        event = self._run(
+            adapter,
+            "@_user_1 @_user_2 @_user_3 build me a group",
+            [
+                {"key": "@_user_1", "open_id": "ou_bot", "name": "Hermes"},
+                {"key": "@_user_2", "open_id": "ou_alice", "name": "Alice"},
+                {"key": "@_user_3", "open_id": "ou_bob", "name": "Bob"},
+            ],
+        )
+        self.assertIn("[Mentioned: Alice (open_id=ou_alice), Bob (open_id=ou_bob)]", event.text)
+        self.assertIn("@Alice @Bob build me a group", event.text)
+        self.assertNotIn("@Hermes", event.text)
+
+    def test_scenario_at_all_announcement(self):
+        adapter = self._build_adapter()
+        event = self._run(
+            adapter,
+            "@_all meeting at 3pm",
+            [{"key": "@_all"}],
+        )
+        self.assertTrue(event.text.startswith("[Mentioned: @all]"))
+        self.assertIn("@all meeting at 3pm", event.text)
+
+    def test_scenario_trailing_self_mention_stripped(self):
+        """Trailing @bot at the end of a message is routing noise, not content —
+        strip it so the agent sees a clean instruction body."""
+        adapter = self._build_adapter()
+        event = self._run(
+            adapter,
+            "who are you @_user_1",
+            [{"key": "@_user_1", "open_id": "ou_bot", "name": "Hermes"}],
+        )
+        self.assertEqual(event.text, "who are you")
+
+    def test_scenario_mid_text_self_mention_preserved(self):
+        """Self mention in the middle of a sentence (followed by a non-terminal
+        character) is meaningful content — preserve it."""
+        adapter = self._build_adapter()
+        event = self._run(
+            adapter,
+            "please don't @_user_1 anymore",
+            [{"key": "@_user_1", "open_id": "ou_bot", "name": "Hermes"}],
+        )
+        self.assertEqual(event.text, "please don't @Hermes anymore")
+
+    def test_scenario_no_mentions_zero_regression(self):
+        adapter = self._build_adapter()
+        event = self._run(adapter, "plain message", [])
+        self.assertEqual(event.text, "plain message")
+        self.assertNotIn("[Mentioned:", event.text)
+
+    def test_scenario_post_at_alice_exposes_open_id(self):
+        """Post-type @mention: <at> placeholder resolves via top-level mentions,
+        agent gets real open_id in the hint (mirrors text-type behavior)."""
+        adapter = self._build_adapter()
+        alice_mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_alice", user_id=""),
+            name="Alice",
+        )
+        post_content = json.dumps({
+            "zh_cn": {
+                "content": [[
+                    {"tag": "at", "user_id": "@_user_1", "user_name": "Alice"},
+                    {"tag": "text", "text": " lookup this doc"},
+                ]]
+            }
+        })
+        message = SimpleNamespace(
+            content=post_content,
+            message_type="post",
+            message_id="m_post",
+            mentions=[alice_mention],
+            chat_id="oc_chat",
+            parent_id=None,
+            upper_message_id=None,
+            thread_id=None,
+        )
+        asyncio.run(
+            adapter._process_inbound_message(
+                data=message, message=message, sender_id=None,
+                chat_type="group", message_id="m_post",
+            )
+        )
+        event = adapter._dispatch_inbound_event.call_args.args[0]
+        self.assertIn("[Mentioned: Alice (open_id=ou_alice)]", event.text)
+        self.assertIn("@Alice lookup this doc", event.text)
+
+    def test_scenario_post_bot_plus_alice_filters_self_from_hint(self):
+        """Post-type message @-ing both the bot and Alice: leading bot is
+        stripped from the body, self is filtered from the [Mentioned: ...]
+        hint, and Alice's real open_id is surfaced for the agent."""
+        adapter = self._build_adapter()
+        bot_mention = SimpleNamespace(
+            key="@_user_1",
+            id=SimpleNamespace(open_id="ou_bot", user_id=""),
+            name="Hermes",
+        )
+        alice_mention = SimpleNamespace(
+            key="@_user_2",
+            id=SimpleNamespace(open_id="ou_alice", user_id=""),
+            name="Alice",
+        )
+        post_content = json.dumps({
+            "zh_cn": {
+                "content": [[
+                    {"tag": "at", "user_id": "@_user_1", "user_name": "Hermes"},
+                    {"tag": "at", "user_id": "@_user_2", "user_name": "Alice"},
+                    {"tag": "text", "text": " review the spec with Alice"},
+                ]]
+            }
+        })
+        message = SimpleNamespace(
+            content=post_content,
+            message_type="post",
+            message_id="m_post_both",
+            mentions=[bot_mention, alice_mention],
+            chat_id="oc_chat",
+            parent_id=None,
+            upper_message_id=None,
+            thread_id=None,
+        )
+        asyncio.run(
+            adapter._process_inbound_message(
+                data=message, message=message, sender_id=None,
+                chat_type="group", message_id="m_post_both",
+            )
+        )
+        event = adapter._dispatch_inbound_event.call_args.args[0]
+        # Hint surfaces Alice; bot excluded because is_self=True.
+        self.assertIn("[Mentioned: Alice (open_id=ou_alice)]", event.text)
+        self.assertNotIn("Hermes (open_id=", event.text)
+        # Body: leading @Hermes stripped, Alice preserved, trailing text intact.
+        self.assertIn("@Alice review the spec with Alice", event.text)
+        self.assertNotIn("@Hermes @Alice", event.text)
diff --git a/tests/gateway/test_hooks.py b/tests/gateway/test_hooks.py
index 1301aebae..ac9e51919 100644
--- a/tests/gateway/test_hooks.py
+++ b/tests/gateway/test_hooks.py
@@ -220,3 +220,99 @@ class TestEmit:
 
         await reg.emit("agent:start")  # no context arg
         assert captured[0] == {}
+
+
+class TestEmitCollect:
+    """Tests for emit_collect() — returns handler return values for decision-style hooks."""
+
+    @pytest.mark.asyncio
+    async def test_collects_sync_return_values(self):
+        reg = HookRegistry()
+        reg._handlers["command:status"] = [
+            lambda _e, _c: {"decision": "allow"},
+            lambda _e, _c: {"decision": "deny", "message": "nope"},
+        ]
+
+        results = await reg.emit_collect("command:status", {})
+
+        assert results == [
+            {"decision": "allow"},
+            {"decision": "deny", "message": "nope"},
+        ]
+
+    @pytest.mark.asyncio
+    async def test_collects_async_return_values(self):
+        reg = HookRegistry()
+
+        async def _async_handler(_event_type, _ctx):
+            return {"decision": "handled", "message": "done"}
+
+        reg._handlers["command:ping"] = [_async_handler]
+
+        results = await reg.emit_collect("command:ping", {})
+
+        assert results == [{"decision": "handled", "message": "done"}]
+
+    @pytest.mark.asyncio
+    async def test_drops_none_return_values(self):
+        reg = HookRegistry()
+        reg._handlers["command:x"] = [
+            lambda _e, _c: None,  # fire-and-forget, returns nothing
+            lambda _e, _c: {"decision": "deny"},
+            lambda _e, _c: None,
+        ]
+
+        results = await reg.emit_collect("command:x", {})
+
+        assert results == [{"decision": "deny"}]
+
+    @pytest.mark.asyncio
+    async def test_handler_exception_does_not_abort_chain(self):
+        reg = HookRegistry()
+
+        def _raises(_e, _c):
+            raise ValueError("boom")
+
+        reg._handlers["command:x"] = [
+            _raises,
+            lambda _e, _c: {"decision": "allow"},
+        ]
+
+        results = await reg.emit_collect("command:x", {})
+
+        # First handler's exception is swallowed; second handler's value still collected.
+        assert results == [{"decision": "allow"}]
+
+    @pytest.mark.asyncio
+    async def test_wildcard_match_also_collected(self):
+        reg = HookRegistry()
+        reg._handlers["command:*"] = [lambda _e, _c: {"decision": "allow"}]
+        reg._handlers["command:reset"] = [lambda _e, _c: {"decision": "deny"}]
+
+        results = await reg.emit_collect("command:reset", {})
+
+        # Exact match fires first, then wildcard.
+        assert results == [{"decision": "deny"}, {"decision": "allow"}]
+
+    @pytest.mark.asyncio
+    async def test_no_handlers_returns_empty_list(self):
+        reg = HookRegistry()
+
+        results = await reg.emit_collect("unknown:event", {})
+
+        assert results == []
+
+    @pytest.mark.asyncio
+    async def test_default_context(self):
+        reg = HookRegistry()
+        captured = []
+
+        def _handler(event_type, context):
+            captured.append((event_type, context))
+            return None
+
+        reg._handlers["agent:start"] = [_handler]
+
+        await reg.emit_collect("agent:start")  # no context arg
+
+        assert captured == [("agent:start", {})]
diff --git a/tests/gateway/test_internal_event_bypass_pairing.py b/tests/gateway/test_internal_event_bypass_pairing.py
index d10195b2d..887884253 100644
--- a/tests/gateway/test_internal_event_bypass_pairing.py
+++ b/tests/gateway/test_internal_event_bypass_pairing.py
@@ -355,8 +355,17 @@ async def test_none_user_id_does_not_generate_pairing_code(monkeypatch, tmp_path
 async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path):
     """Verify the normal (non-internal) path still triggers pairing for unknown users."""
     import gateway.run as gateway_run
+    import gateway.pairing as pairing_mod
 
     monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    # gateway.pairing.PAIRING_DIR is a module-level constant captured at
+    # import time from whichever HERMES_HOME was set then. Per-test
+    # HERMES_HOME redirection in conftest doesn't retroactively move it.
+    # Override directly so pairing rate-limit state lives in this test's
+    # tmp_path (and so stale state from prior xdist workers can't leak in).
+    pairing_dir = tmp_path / "pairing"
+    pairing_dir.mkdir()
+    monkeypatch.setattr(pairing_mod, "PAIRING_DIR", pairing_dir)
     (tmp_path / "config.yaml").write_text("", encoding="utf-8")
 
     # Clear env vars that could let all users through (loaded by
diff --git a/tests/gateway/test_proxy_mode.py b/tests/gateway/test_proxy_mode.py
index 11180639e..e25f226ee 100644
--- a/tests/gateway/test_proxy_mode.py
+++ b/tests/gateway/test_proxy_mode.py
@@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 
 from gateway.config import Platform, StreamingConfig
+from gateway.platforms.base import resolve_proxy_url
 from gateway.run import GatewayRunner
 from gateway.session import SessionSource
 
@@ -133,6 +134,15 @@ class TestGetProxyUrl:
             assert runner._get_proxy_url() is None
 
 
+class TestResolveProxyUrl:
+    def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch):
+        for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                    "https_proxy", "http_proxy", "all_proxy"):
+            monkeypatch.delenv(key, raising=False)
+        monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
+        assert resolve_proxy_url() == "socks5://127.0.0.1:1080/"
+
+
 class TestRunAgentProxyDispatch:
     """Test that _run_agent() delegates to proxy when configured."""
 
diff --git a/tests/gateway/test_reply_to_injection.py b/tests/gateway/test_reply_to_injection.py
new file mode 100644
index 000000000..f75ec6d68
--- /dev/null
+++ b/tests/gateway/test_reply_to_injection.py
@@ -0,0 +1,159 @@
+"""Tests for reply-to pointer injection in _prepare_inbound_message_text.
+
+The `[Replying to: "..."]` prefix is a *disambiguation pointer*, not
+deduplication. It must always be injected when the user explicitly replies
+to a prior message — even when the quoted text already exists somewhere
+in the conversation history. History can contain the same or similar text
+multiple times, and without an explicit pointer the agent has to guess
+which prior message the user is referencing.
+"""
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.run import GatewayRunner
+from gateway.session import SessionSource
+
+
+def _make_runner() -> GatewayRunner:
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake")},
+    )
+    runner.adapters = {}
+    runner._model = "openai/gpt-4.1-mini"
+    runner._base_url = None
+    return runner
+
+
+def _source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="123",
+        chat_name="DM",
+        chat_type="private",
+        user_name="Alice",
+    )
+
+
+@pytest.mark.asyncio
+async def test_reply_prefix_injected_when_text_absent_from_history():
+    runner = _make_runner()
+    source = _source()
+    event = MessageEvent(
+        text="What's the best time to go?",
+        source=source,
+        reply_to_message_id="42",
+        reply_to_text="Japan is great for culture, food, and efficiency.",
+    )
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[{"role": "user", "content": "unrelated"}],
+    )
+
+    assert result is not None
+    assert result.startswith(
+        '[Replying to: "Japan is great for culture, food, and efficiency."]'
+    )
+    assert result.endswith("What's the best time to go?")
+
+
+@pytest.mark.asyncio
+async def test_reply_prefix_still_injected_when_text_in_history():
+    """Regression test: the pointer must survive even when the quoted text
+    already appears in history. Previously a `found_in_history` guard
+    silently dropped the prefix, leaving the agent to guess which prior
+    message the user was referencing."""
+    runner = _make_runner()
+    source = _source()
+    quoted = "Japan is great for culture, food, and efficiency."
+    event = MessageEvent(
+        text="What's the best time to go?",
+        source=source,
+        reply_to_message_id="42",
+        reply_to_text=quoted,
+    )
+
+    history = [
+        {"role": "user", "content": "I'm thinking of going to Japan or Italy."},
+        {
+            "role": "assistant",
+            "content": (
+                f"{quoted} Italy is better if you prefer a relaxed pace."
+            ),
+        },
+        {"role": "user", "content": "How long should I stay?"},
+        {"role": "assistant", "content": "For Japan, 10-14 days is ideal."},
+    ]
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=history,
+    )
+
+    assert result is not None
+    assert result.startswith(f'[Replying to: "{quoted}"]')
+    assert result.endswith("What's the best time to go?")
+
+
+@pytest.mark.asyncio
+async def test_no_prefix_without_reply_context():
+    runner = _make_runner()
+    source = _source()
+    event = MessageEvent(text="hello", source=source)
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result == "hello"
+
+
+@pytest.mark.asyncio
+async def test_no_prefix_when_reply_to_text_is_empty():
+    """reply_to_message_id alone without text (e.g. a reply to a media-only
+    message) should not produce an empty `[Replying to: ""]` prefix."""
+    runner = _make_runner()
+    source = _source()
+    event = MessageEvent(
+        text="hi",
+        source=source,
+        reply_to_message_id="42",
+        reply_to_text=None,
+    )
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result == "hi"
+
+
+@pytest.mark.asyncio
+async def test_reply_snippet_truncated_to_500_chars():
+    runner = _make_runner()
+    source = _source()
+    long_text = "x" * 800
+    event = MessageEvent(
+        text="follow-up",
+        source=source,
+        reply_to_message_id="42",
+        reply_to_text=long_text,
+    )
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result is not None
+    assert result.startswith('[Replying to: "' + "x" * 500 + '"]')
+    assert "x" * 501 not in result
diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py
index 96d5d4627..83ffc0d4d 100644
--- a/tests/gateway/test_runner_startup_failures.py
+++ b/tests/gateway/test_runner_startup_failures.py
@@ -184,8 +184,15 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p
         async def stop(self):
             return None
 
-    monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
-    monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
+    # get_running_pid returns 42 before we kill the old gateway, then None
+    # after remove_pid_file() clears the record (reflects real behavior).
+    _pid_state = {"alive": True}
+    def _mock_get_running_pid():
+        return 42 if _pid_state["alive"] else None
+    def _mock_remove_pid_file():
+        _pid_state["alive"] = False
+    monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
+    monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
     monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
     monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force)))
     monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
@@ -253,8 +260,13 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm(
         async def stop(self):
             return None
 
-    monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
-    monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
+    _pid_state = {"alive": True}
+    def _mock_get_running_pid():
+        return 42 if _pid_state["alive"] else None
+    def _mock_remove_pid_file():
+        _pid_state["alive"] = False
+    monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
+    monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
     monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
     monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker)
     monkeypatch.setattr("gateway.status.terminate_pid", record_terminate)
diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py
index 39e4aad3d..539b12a5e 100644
--- a/tests/gateway/test_session.py
+++ b/tests/gateway/test_session.py
@@ -356,6 +356,28 @@ class TestBuildSessionContextPrompt:
         assert "**User:** Alice" in prompt
         assert "Multi-user thread" not in prompt
 
+    def test_shared_non_thread_group_prompt_hides_single_user(self):
+        """Shared non-thread group sessions should avoid pinning one user."""
+        config = GatewayConfig(
+            platforms={
+                Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
+            },
+            group_sessions_per_user=False,
+        )
+        source = SessionSource(
+            platform=Platform.TELEGRAM,
+            chat_id="-1002285219667",
+            chat_name="Test Group",
+            chat_type="group",
+            user_name="Alice",
+        )
+        ctx = build_session_context(source, config)
+        prompt = build_session_context_prompt(ctx)
+
+        assert "Multi-user session" in prompt
+        assert "[sender name]" in prompt
+        assert "**User:** Alice" not in prompt
+
     def test_dm_thread_shows_user_not_multi(self):
         """DM threads are single-user and should show User, not multi-user note."""
         config = GatewayConfig(
@@ -1037,6 +1059,7 @@ class TestRewriteTranscriptPreservesReasoning:
             role="assistant",
             content="The answer is 42.",
             reasoning="I need to think step by step.",
+            reasoning_content="provider scratchpad",
             reasoning_details=[{"type": "summary", "text": "step by step"}],
             codex_reasoning_items=[{"id": "r1", "type": "reasoning"}],
         )
@@ -1044,6 +1067,7 @@ class TestRewriteTranscriptPreservesReasoning:
         # Verify all three were stored
         before = db.get_messages_as_conversation(session_id)
         assert before[0].get("reasoning") == "I need to think step by step."
+        assert before[0].get("reasoning_content") == "provider scratchpad"
         assert before[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}]
         assert before[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}]
 
@@ -1060,5 +1084,6 @@ class TestRewriteTranscriptPreservesReasoning:
         # Load again — all three reasoning fields must survive
         after = db.get_messages_as_conversation(session_id)
         assert after[0].get("reasoning") == "I need to think step by step."
+        assert after[0].get("reasoning_content") == "provider scratchpad"
         assert after[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}]
         assert after[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}]
diff --git a/tests/gateway/test_session_boundary_security_state.py b/tests/gateway/test_session_boundary_security_state.py
new file mode 100644
index 000000000..9908badea
--- /dev/null
+++ b/tests/gateway/test_session_boundary_security_state.py
@@ -0,0 +1,201 @@
+"""Regression tests for approval-state cleanup on session boundaries."""
+
+from datetime import datetime
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import Platform
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource, build_session_key
+from tools import approval as approval_mod
+from tools.approval import (
+    approve_session,
+    enable_session_yolo,
+    is_approved,
+    is_session_yolo_enabled,
+)
+
+
+@pytest.fixture(autouse=True)
+def _clear_approval_state():
+    approval_mod._gateway_queues.clear()
+    approval_mod._gateway_notify_cbs.clear()
+    approval_mod._session_approved.clear()
+    approval_mod._session_yolo.clear()
+    approval_mod._permanent_approved.clear()
+    approval_mod._pending.clear()
+    yield
+    approval_mod._gateway_queues.clear()
+    approval_mod._gateway_notify_cbs.clear()
+    approval_mod._session_approved.clear()
+    approval_mod._session_yolo.clear()
+    approval_mod._permanent_approved.clear()
+    approval_mod._pending.clear()
+
+
+def _make_source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="u1",
+        chat_id="c1",
+        user_name="tester",
+        chat_type="dm",
+    )
+
+
+def _make_event(text: str) -> MessageEvent:
+    return MessageEvent(text=text, source=_make_source(), message_id="m1")
+
+
+def _make_entry(session_id: str, source: SessionSource | None = None) -> SessionEntry:
+    source = source or _make_source()
+    return SessionEntry(
+        session_key=build_session_key(source),
+        session_id=session_id,
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        origin=source,
+        platform=source.platform,
+        chat_type=source.chat_type,
+    )
+
+
+def _make_resume_runner():
+    from gateway.run import GatewayRunner
+
+    source = _make_source()
+    session_key = build_session_key(source)
+    current_entry = _make_entry("current-session", source)
+    resumed_entry = _make_entry("resumed-session", source)
+
+    runner = object.__new__(GatewayRunner)
+    runner.adapters = {}
+    runner._background_tasks = set()
+    runner._async_flush_memories = AsyncMock()
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._busy_ack_ts = {}
+    runner._pending_approvals = {}
+    runner._agent_cache_lock = None
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = current_entry
+    runner.session_store.switch_session.return_value = resumed_entry
+    runner.session_store.load_transcript.return_value = []
+    runner._session_db = MagicMock()
+    runner._session_db.resolve_session_by_title.return_value = "resumed-session"
+    runner._session_db.get_session_title.return_value = "Resumed Work"
+    return runner, session_key
+
+
+def _make_branch_runner():
+    from gateway.run import GatewayRunner
+
+    source = _make_source()
+    session_key = build_session_key(source)
+    current_entry = _make_entry("current-session", source)
+    branched_entry = _make_entry("branched-session", source)
+
+    runner = object.__new__(GatewayRunner)
+    runner.adapters = {}
+    runner.config = {}
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._busy_ack_ts = {}
+    runner._pending_approvals = {}
+    runner._agent_cache_lock = None
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = current_entry
+    runner.session_store.load_transcript.return_value = [
+        {"role": "user", "content": "hello"},
+        {"role": "assistant", "content": "world"},
+    ]
+    runner.session_store.switch_session.return_value = branched_entry
+    runner._session_db = MagicMock()
+    runner._session_db.get_session_title.return_value = "Current Work"
+    runner._session_db.get_next_title_in_lineage.return_value = "Current Work #2"
+    return runner, session_key
+
+
+@pytest.mark.asyncio
+async def test_resume_clears_session_scoped_approval_and_yolo_state():
+    runner, session_key = _make_resume_runner()
+    other_key = "agent:main:telegram:dm:other-chat"
+
+    approve_session(session_key, "recursive delete")
+    approve_session(other_key, "recursive delete")
+    enable_session_yolo(session_key)
+    enable_session_yolo(other_key)
+    runner._pending_approvals[session_key] = {"command": "rm -rf /tmp/demo"}
+    runner._pending_approvals[other_key] = {"command": "rm -rf /tmp/other"}
+
+    result = await runner._handle_resume_command(_make_event("/resume Resumed Work"))
+
+    assert "Resumed session" in result
+    assert is_approved(session_key, "recursive delete") is False
+    assert is_session_yolo_enabled(session_key) is False
+    assert session_key not in runner._pending_approvals
+    assert is_approved(other_key, "recursive delete") is True
+    assert is_session_yolo_enabled(other_key) is True
+    assert other_key in runner._pending_approvals
+
+
+@pytest.mark.asyncio
+async def test_branch_clears_session_scoped_approval_and_yolo_state():
+    runner, session_key = _make_branch_runner()
+    other_key = "agent:main:telegram:dm:other-chat"
+
+    approve_session(session_key, "recursive delete")
+    approve_session(other_key, "recursive delete")
+    enable_session_yolo(session_key)
+    enable_session_yolo(other_key)
+    runner._pending_approvals[session_key] = {"command": "rm -rf /tmp/demo"}
+    runner._pending_approvals[other_key] = {"command": "rm -rf /tmp/other"}
+
+    result = await runner._handle_branch_command(_make_event("/branch"))
+
+    assert "Branched to" in result
+    assert is_approved(session_key, "recursive delete") is False
+    assert is_session_yolo_enabled(session_key) is False
+    assert session_key not in runner._pending_approvals
+    assert is_approved(other_key, "recursive delete") is True
+    assert is_session_yolo_enabled(other_key) is True
+    assert other_key in runner._pending_approvals
+
+
+def test_clear_session_boundary_security_state_is_scoped():
+    """The helper must wipe only the target session's approval/yolo state.
+
+    Also exercises the /new reset path indirectly: /new calls this helper,
+    so if the helper is scoped correctly, /new's clearing is correct too.
+    """
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner._pending_approvals = {}
+
+    source = _make_source()
+    session_key = build_session_key(source)
+    other_key = "agent:main:telegram:dm:other-chat"
+
+    approve_session(session_key, "recursive delete")
+    approve_session(other_key, "recursive delete")
+    enable_session_yolo(session_key)
+    enable_session_yolo(other_key)
+    runner._pending_approvals[session_key] = {"command": "rm -rf /tmp/demo"}
+    runner._pending_approvals[other_key] = {"command": "rm -rf /tmp/other"}
+
+    runner._clear_session_boundary_security_state(session_key)
+
+    # Target session cleared
+    assert is_approved(session_key, "recursive delete") is False
+    assert is_session_yolo_enabled(session_key) is False
+    assert session_key not in runner._pending_approvals
+    # Other session untouched
+    assert is_approved(other_key, "recursive delete") is True
+    assert is_session_yolo_enabled(other_key) is True
+    assert other_key in runner._pending_approvals
+
+    # Empty session_key is a no-op
+    runner._clear_session_boundary_security_state("")
+    assert is_approved(other_key, "recursive delete") is True
diff --git a/tests/gateway/test_session_list_allowed_sources.py b/tests/gateway/test_session_list_allowed_sources.py
new file mode 100644
index 000000000..bd6791ff4
--- /dev/null
+++ b/tests/gateway/test_session_list_allowed_sources.py
@@ -0,0 +1,76 @@
+"""Regression tests for the TUI gateway's ``session.list`` handler.
+
+Reported during TUI v2 blitz retest: the ``/resume`` modal inside a TUI
+session only surfaced ``tui``/``cli`` rows, hiding telegram sessions users
+could still resume directly via ``hermes --tui --resume <id>``.
+
+The fix widens the picker to a curated allowlist of user-facing sources
+(tui/cli + chat adapters) while still filtering internal/system sources.
+"""
+
+from __future__ import annotations
+
+from tui_gateway import server
+
+
+class _StubDB:
+    def __init__(self, rows):
+        self.rows = rows
+        self.calls: list[dict] = []
+
+    def list_sessions_rich(self, **kwargs):
+        self.calls.append(kwargs)
+        return list(self.rows)
+
+
+def _call(limit: int = 20):
+    return server.handle_request({
+        "id": "1",
+        "method": "session.list",
+        "params": {"limit": limit},
+    })
+
+
+def test_session_list_includes_telegram_but_filters_internal_sources(monkeypatch):
+    rows = [
+        {"id": "tui-1", "source": "tui", "started_at": 9},
+        {"id": "tool-1", "source": "tool", "started_at": 8},
+        {"id": "tg-1", "source": "telegram", "started_at": 7},
+        {"id": "acp-1", "source": "acp", "started_at": 6},
+        {"id": "cli-1", "source": "cli", "started_at": 5},
+    ]
+    db = _StubDB(rows)
+    monkeypatch.setattr(server, "_get_db", lambda: db)
+
+    resp = _call(limit=10)
+    sessions = resp["result"]["sessions"]
+    ids = [s["id"] for s in sessions]
+
+    assert "tg-1" in ids and "tui-1" in ids and "cli-1" in ids, ids
+    assert "tool-1" not in ids and "acp-1" not in ids, ids
+
+
+def test_session_list_fetches_wider_window_before_filtering(monkeypatch):
+    db = _StubDB([{"id": "x", "source": "cli", "started_at": 1}])
+    monkeypatch.setattr(server, "_get_db", lambda: db)
+
+    _call(limit=10)
+
+    assert len(db.calls) == 1
+    assert db.calls[0].get("source") is None, db.calls[0]
+    assert db.calls[0].get("limit") == 100, db.calls[0]
+
+
+def test_session_list_preserves_ordering_after_filter(monkeypatch):
+    rows = [
+        {"id": "newest", "source": "telegram", "started_at": 5},
+        {"id": "internal", "source": "tool", "started_at": 4},
+        {"id": "middle", "source": "tui", "started_at": 3},
+        {"id": "oldest", "source": "discord", "started_at": 1},
+    ]
+    monkeypatch.setattr(server, "_get_db", lambda: _StubDB(rows))
+
+    resp = _call()
+    ids = [s["id"] for s in resp["result"]["sessions"]]
+
+    assert ids == ["newest", "middle", "oldest"]
diff --git a/tests/gateway/test_shared_group_sender_prefix.py b/tests/gateway/test_shared_group_sender_prefix.py
new file mode 100644
index 000000000..9f0e525f6
--- /dev/null
+++ b/tests/gateway/test_shared_group_sender_prefix.py
@@ -0,0 +1,70 @@
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.run import GatewayRunner
+from gateway.session import SessionSource
+
+
+def _make_runner(config: GatewayConfig) -> GatewayRunner:
+    runner = object.__new__(GatewayRunner)
+    runner.config = config
+    runner.adapters = {}
+    runner._model = "openai/gpt-4.1-mini"
+    runner._base_url = None
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_preprocess_prefixes_sender_for_shared_non_thread_group_session():
+    runner = _make_runner(
+        GatewayConfig(
+            platforms={
+                Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
+            },
+            group_sessions_per_user=False,
+        )
+    )
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="-1002285219667",
+        chat_name="Test Group",
+        chat_type="group",
+        user_name="Alice",
+    )
+    event = MessageEvent(text="hello", source=source)
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result == "[Alice] hello"
+
+
+@pytest.mark.asyncio
+async def test_preprocess_keeps_plain_text_for_default_group_sessions():
+    runner = _make_runner(
+        GatewayConfig(
+            platforms={
+                Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
+            },
+        )
+    )
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="-1002285219667",
+        chat_name="Test Group",
+        chat_type="group",
+        user_name="Alice",
+    )
+    event = MessageEvent(text="hello", source=source)
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result == "hello"
diff --git a/tests/gateway/test_signal.py b/tests/gateway/test_signal.py
index d7943b7f9..b51ec713f 100644
--- a/tests/gateway/test_signal.py
+++ b/tests/gateway/test_signal.py
@@ -306,7 +306,13 @@ class TestSignalSessionSource:
 class TestSignalPhoneRedaction:
     @pytest.fixture(autouse=True)
     def _ensure_redaction_enabled(self, monkeypatch):
+        # agent.redact snapshots _REDACT_ENABLED at import time from the
+        # HERMES_REDACT_SECRETS env var. monkeypatch.delenv is too late —
+        # the module was already imported during test collection with
+        # whatever value was in the env then. Force the flag directly.
+        # See skill: xdist-cross-test-pollution Pattern 5.
         monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
+        monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
 
     def test_us_number(self):
         from agent.redact import redact_sensitive_text
diff --git a/tests/gateway/test_slack.py b/tests/gateway/test_slack.py
index d79a78a83..cdd27364b 100644
--- a/tests/gateway/test_slack.py
+++ b/tests/gateway/test_slack.py
@@ -1031,7 +1031,7 @@ class TestReactions:
 
     @pytest.mark.asyncio
     async def test_reactions_in_message_flow(self, adapter):
-        """Reactions should be added on receipt and swapped on completion."""
+        """Reactions should be bracketed around actual processing via hooks."""
         adapter._app.client.reactions_add = AsyncMock()
         adapter._app.client.reactions_remove = AsyncMock()
         adapter._app.client.users_info = AsyncMock(return_value={
@@ -1047,15 +1047,147 @@ class TestReactions:
         }
         await adapter._handle_slack_message(event)
 
-        # Should have added 👀, then removed 👀, then added ✅
+        # _handle_slack_message should register the message for reactions
+        assert "1234567890.000001" in adapter._reacting_message_ids
+
+        # Simulate the base class calling on_processing_start
+        from gateway.platforms.base import MessageEvent, MessageType, SessionSource
+        from gateway.config import Platform
+        source = SessionSource(
+            platform=Platform.SLACK,
+            chat_id="C123",
+            chat_type="dm",
+            user_id="U_USER",
+        )
+        msg_event = MessageEvent(
+            text="hello",
+            message_type=MessageType.TEXT,
+            source=source,
+            message_id="1234567890.000001",
+        )
+        await adapter.on_processing_start(msg_event)
+
+        add_calls = adapter._app.client.reactions_add.call_args_list
+        assert len(add_calls) == 1
+        assert add_calls[0].kwargs["name"] == "eyes"
+
+        # Simulate the base class calling on_processing_complete
+        from gateway.platforms.base import ProcessingOutcome
+        await adapter.on_processing_complete(msg_event, ProcessingOutcome.SUCCESS)
+
         add_calls = adapter._app.client.reactions_add.call_args_list
         remove_calls = adapter._app.client.reactions_remove.call_args_list
         assert len(add_calls) == 2
-        assert add_calls[0].kwargs["name"] == "eyes"
         assert add_calls[1].kwargs["name"] == "white_check_mark"
         assert len(remove_calls) == 1
         assert remove_calls[0].kwargs["name"] == "eyes"
 
+        # Message ID should be cleaned up
+        assert "1234567890.000001" not in adapter._reacting_message_ids
+
+    @pytest.mark.asyncio
+    async def test_reactions_failure_outcome(self, adapter):
+        """Failed processing should add :x: instead of :white_check_mark:."""
+        adapter._app.client.reactions_add = AsyncMock()
+        adapter._app.client.reactions_remove = AsyncMock()
+
+        from gateway.platforms.base import MessageEvent, MessageType, SessionSource, ProcessingOutcome
+        from gateway.config import Platform
+        source = SessionSource(
+            platform=Platform.SLACK,
+            chat_id="C123",
+            chat_type="dm",
+            user_id="U_USER",
+        )
+        adapter._reacting_message_ids.add("1234567890.000002")
+        msg_event = MessageEvent(
+            text="hello",
+            message_type=MessageType.TEXT,
+            source=source,
+            message_id="1234567890.000002",
+        )
+        await adapter.on_processing_complete(msg_event, ProcessingOutcome.FAILURE)
+
+        add_calls = adapter._app.client.reactions_add.call_args_list
+        remove_calls = adapter._app.client.reactions_remove.call_args_list
+        assert len(add_calls) == 1
+        assert add_calls[0].kwargs["name"] == "x"
+        assert len(remove_calls) == 1
+        assert remove_calls[0].kwargs["name"] == "eyes"
+
+    @pytest.mark.asyncio
+    async def test_reactions_skipped_for_non_dm_non_mention(self, adapter):
+        """Non-DM, non-mention messages should not get reactions."""
+        adapter._app.client.reactions_add = AsyncMock()
+        adapter._app.client.reactions_remove = AsyncMock()
+        adapter._app.client.users_info = AsyncMock(return_value={
+            "user": {"profile": {"display_name": "Tyler"}}
+        })
+
+        event = {
+            "text": "hello",
+            "user": "U_USER",
+            "channel": "C123",
+            "channel_type": "channel",
+            "ts": "1234567890.000003",
+        }
+        await adapter._handle_slack_message(event)
+
+        # Should NOT register for reactions when not mentioned in a channel
+        assert "1234567890.000003" not in adapter._reacting_message_ids
+        adapter._app.client.reactions_add.assert_not_called()
+        adapter._app.client.reactions_remove.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_reactions_disabled_via_env(self, adapter, monkeypatch):
+        """SLACK_REACTIONS=false should suppress all reaction lifecycle."""
+        monkeypatch.setenv("SLACK_REACTIONS", "false")
+        adapter._app.client.reactions_add = AsyncMock()
+        adapter._app.client.reactions_remove = AsyncMock()
+        adapter._app.client.users_info = AsyncMock(return_value={
+            "user": {"profile": {"display_name": "Tyler"}}
+        })
+
+        event = {
+            "text": "hello",
+            "user": "U_USER",
+            "channel": "C123",
+            "channel_type": "im",
+            "ts": "1234567890.000004",
+        }
+        await adapter._handle_slack_message(event)
+
+        # Should NOT register for reactions when toggle is off
+        assert "1234567890.000004" not in adapter._reacting_message_ids
+
+        # Hooks should also be no-ops when disabled
+        from gateway.platforms.base import MessageEvent, MessageType, SessionSource, ProcessingOutcome
+        from gateway.config import Platform
+        source = SessionSource(
+            platform=Platform.SLACK,
+            chat_id="C123",
+            chat_type="dm",
+            user_id="U_USER",
+        )
+        msg_event = MessageEvent(
+            text="hello",
+            message_type=MessageType.TEXT,
+            source=source,
+            message_id="1234567890.000004",
+        )
+        # Force-add to verify hooks respect the toggle independently
+        adapter._reacting_message_ids.add("1234567890.000004")
+        await adapter.on_processing_start(msg_event)
+        await adapter.on_processing_complete(msg_event, ProcessingOutcome.SUCCESS)
+
+        adapter._app.client.reactions_add.assert_not_called()
+        adapter._app.client.reactions_remove.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_reactions_enabled_by_default(self, adapter):
+        """SLACK_REACTIONS defaults to true (matches existing behavior)."""
+        assert adapter._reactions_enabled() is True
+
 
 # ---------------------------------------------------------------------------
 # TestThreadReplyHandling
diff --git a/tests/gateway/test_status.py b/tests/gateway/test_status.py
index 04a0856f6..f2b6b1b1f 100644
--- a/tests/gateway/test_status.py
+++ b/tests/gateway/test_status.py
@@ -19,6 +19,30 @@ class TestGatewayPidState:
         assert isinstance(payload["argv"], list)
         assert payload["argv"]
 
+    def test_write_pid_file_is_atomic_against_concurrent_writers(self, tmp_path, monkeypatch):
+        """Regression: two concurrent --replace invocations must not both win.
+
+        Without O_CREAT|O_EXCL, two processes racing through start_gateway()'s
+        termination-wait would both write to gateway.pid, silently overwriting
+        each other and leaving multiple gateway instances alive (#11718).
+        """
+        import pytest
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        # First write wins.
+        status.write_pid_file()
+        assert (tmp_path / "gateway.pid").exists()
+
+        # Second write (simulating a racing --replace that missed the earlier
+        # guards) must raise FileExistsError rather than clobber the record.
+        with pytest.raises(FileExistsError):
+            status.write_pid_file()
+
+        # Original record is preserved.
+        payload = json.loads((tmp_path / "gateway.pid").read_text())
+        assert payload["pid"] == os.getpid()
+
     def test_get_running_pid_rejects_live_non_gateway_pid(self, tmp_path, monkeypatch):
         monkeypatch.setenv("HERMES_HOME", str(tmp_path))
         pid_path = tmp_path / "gateway.pid"
@@ -41,7 +65,11 @@ class TestGatewayPidState:
         monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
         monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)
 
-        assert status.get_running_pid() == os.getpid()
+        assert status.acquire_gateway_runtime_lock() is True
+        try:
+            assert status.get_running_pid() == os.getpid()
+        finally:
+            status.release_gateway_runtime_lock()
 
     def test_get_running_pid_accepts_script_style_gateway_cmdline(self, tmp_path, monkeypatch):
         monkeypatch.setenv("HERMES_HOME", str(tmp_path))
@@ -61,7 +89,11 @@ class TestGatewayPidState:
             lambda pid: "/venv/bin/python /repo/hermes_cli/main.py gateway run --replace",
         )
 
-        assert status.get_running_pid() == os.getpid()
+        assert status.acquire_gateway_runtime_lock() is True
+        try:
+            assert status.get_running_pid() == os.getpid()
+        finally:
+            status.release_gateway_runtime_lock()
 
     def test_get_running_pid_accepts_explicit_pid_path_without_cleanup(self, tmp_path, monkeypatch):
         other_home = tmp_path / "profile-home"
@@ -78,9 +110,116 @@ class TestGatewayPidState:
         monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
         monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)
 
+        lock_path = other_home / "gateway.lock"
+        lock_path.write_text(json.dumps({
+            "pid": os.getpid(),
+            "kind": "hermes-gateway",
+            "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+            "start_time": 123,
+        }))
+        monkeypatch.setattr(status, "is_gateway_runtime_lock_active", lambda lock_path=None: True)
+
         assert status.get_running_pid(pid_path, cleanup_stale=False) == os.getpid()
         assert pid_path.exists()
 
+    def test_runtime_lock_claims_and_releases_liveness(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        assert status.is_gateway_runtime_lock_active() is False
+        assert status.acquire_gateway_runtime_lock() is True
+        assert status.is_gateway_runtime_lock_active() is True
+
+        status.release_gateway_runtime_lock()
+
+        assert status.is_gateway_runtime_lock_active() is False
+
+    def test_get_running_pid_treats_pid_file_as_stale_without_runtime_lock(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        pid_path = tmp_path / "gateway.pid"
+        pid_path.write_text(json.dumps({
+            "pid": os.getpid(),
+            "kind": "hermes-gateway",
+            "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+            "start_time": 123,
+        }))
+
+        monkeypatch.setattr(status.os, "kill", lambda pid, sig: None)
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
+        monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)
+
+        assert status.get_running_pid() is None
+        assert not pid_path.exists()
+
+    def test_get_running_pid_cleans_stale_metadata_from_dead_foreign_pid(self, tmp_path, monkeypatch):
+        """Stale PID file from a *different* PID (crashed process) must still be cleaned.
+
+        Regression for: ``remove_pid_file()`` defensively refuses to delete a
+        PID file whose pid != ``os.getpid()`` to protect ``--replace``
+        handoffs.  Stale-cleanup must not go through that path or real
+        crashed-process PID files never get removed.
+        """
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        pid_path = tmp_path / "gateway.pid"
+        lock_path = tmp_path / "gateway.lock"
+
+        # PID that is guaranteed not alive and not our own.
+        dead_foreign_pid = 999999
+        assert dead_foreign_pid != os.getpid()
+
+        pid_path.write_text(json.dumps({
+            "pid": dead_foreign_pid,
+            "kind": "hermes-gateway",
+            "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+            "start_time": 123,
+        }))
+        lock_path.write_text(json.dumps({
+            "pid": dead_foreign_pid,
+            "kind": "hermes-gateway",
+            "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+            "start_time": 123,
+        }))
+
+        # No live lock holder → get_running_pid should clean both files.
+        assert status.get_running_pid() is None
+        assert not pid_path.exists()
+        assert not lock_path.exists()
+
+    def test_get_running_pid_falls_back_to_live_lock_record(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        pid_path = tmp_path / "gateway.pid"
+        pid_path.write_text(json.dumps({
+            "pid": 99999,
+            "kind": "hermes-gateway",
+            "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+            "start_time": 123,
+        }))
+
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
+        monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)
+        monkeypatch.setattr(
+            status,
+            "_build_pid_record",
+            lambda: {
+                "pid": os.getpid(),
+                "kind": "hermes-gateway",
+                "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+                "start_time": 123,
+            },
+        )
+        assert status.acquire_gateway_runtime_lock() is True
+
+        def fake_kill(pid, sig):
+            if pid == 99999:
+                raise ProcessLookupError
+            return None
+
+        monkeypatch.setattr(status.os, "kill", fake_kill)
+
+        try:
+            assert status.get_running_pid() == os.getpid()
+        finally:
+            status.release_gateway_runtime_lock()
+
 
 class TestGatewayRuntimeStatus:
     def test_write_runtime_status_overwrites_stale_pid_on_restart(self, tmp_path, monkeypatch):
diff --git a/tests/gateway/test_telegram_group_gating.py b/tests/gateway/test_telegram_group_gating.py
index 15ffca9ec..0381cf6f4 100644
--- a/tests/gateway/test_telegram_group_gating.py
+++ b/tests/gateway/test_telegram_group_gating.py
@@ -71,7 +71,17 @@ def test_group_messages_can_require_direct_trigger_via_config():
     assert adapter._should_process_message(_group_message("hello everyone")) is False
     assert adapter._should_process_message(_group_message("hi @hermes_bot", entities=[_mention_entity("hi @hermes_bot")])) is True
     assert adapter._should_process_message(_group_message("replying", reply_to_bot=True)) is True
-    assert adapter._should_process_message(_group_message("/status"), is_command=True) is True
+    # Commands must also respect require_mention when it is enabled
+    assert adapter._should_process_message(_group_message("/status"), is_command=True) is False
+    # But commands with @mention still pass (Telegram emits a MENTION entity
+    # for /cmd@botname — the bot menu and python-telegram-bot's CommandHandler
+    # rely on this same mechanism)
+    assert adapter._should_process_message(
+        _group_message("/status@hermes_bot", entities=[_mention_entity("/status@hermes_bot")])
+    ) is True
+    # And commands still pass unconditionally when require_mention is disabled
+    adapter_no_mention = _make_adapter(require_mention=False)
+    assert adapter_no_mention._should_process_message(_group_message("/status"), is_command=True) is True
 
 
 def test_free_response_chats_bypass_mention_requirement():
diff --git a/tests/gateway/test_telegram_webhook_secret.py b/tests/gateway/test_telegram_webhook_secret.py
new file mode 100644
index 000000000..0f1e78636
--- /dev/null
+++ b/tests/gateway/test_telegram_webhook_secret.py
@@ -0,0 +1,100 @@
+"""Tests for GHSA-3vpc-7q5r-276h — Telegram webhook secret required.
+
+Previously, when TELEGRAM_WEBHOOK_URL was set but TELEGRAM_WEBHOOK_SECRET
+was not, python-telegram-bot received secret_token=None and the webhook
+endpoint accepted any HTTP POST.
+
+The fix refuses to start the adapter in webhook mode without the secret.
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+from pathlib import Path
+
+import pytest
+
+_repo = str(Path(__file__).resolve().parents[2])
+if _repo not in sys.path:
+    sys.path.insert(0, _repo)
+
+
+class TestTelegramWebhookSecretRequired:
+    """Direct source-level check of the webhook-secret guard.
+
+    The guard is embedded in TelegramAdapter.connect() and hard to isolate
+    via mocks (requires a full python-telegram-bot ApplicationBuilder
+    chain). These tests exercise it via source inspection — verifying the
+    check exists, raises RuntimeError with the advisory link, and only
+    fires in webhook mode. End-to-end validation is covered by CI +
+    manual deployment tests.
+    """
+
+    def _get_source(self) -> str:
+        path = Path(_repo) / "gateway" / "platforms" / "telegram.py"
+        return path.read_text(encoding="utf-8")
+
+    def test_webhook_branch_checks_secret(self):
+        """The webhook-mode branch of connect() must read
+        TELEGRAM_WEBHOOK_SECRET and refuse when empty."""
+        src = self._get_source()
+        # The guard must appear after TELEGRAM_WEBHOOK_URL is set
+        assert re.search(
+            r'TELEGRAM_WEBHOOK_SECRET.*?\.strip\(\)\s*\n\s*if not webhook_secret:',
+            src, re.DOTALL,
+        ), (
+            "TelegramAdapter.connect() must strip TELEGRAM_WEBHOOK_SECRET "
+            "and raise when the secret is empty — see GHSA-3vpc-7q5r-276h"
+        )
+
+    def test_guard_raises_runtime_error(self):
+        """The guard raises RuntimeError (not a silent log) so operators
+        see the failure at startup."""
+        src = self._get_source()
+        # Between the "if not webhook_secret:" line and the next blank
+        # line block, we should see a RuntimeError being raised
+        guard_match = re.search(
+            r'if not webhook_secret:\s*\n\s*raise\s+RuntimeError\(',
+            src,
+        )
+        assert guard_match, (
+            "Missing webhook secret must raise RuntimeError — silent "
+            "fall-through was the original GHSA-3vpc-7q5r-276h bypass"
+        )
+
+    def test_guard_message_includes_advisory_link(self):
+        """The RuntimeError message should reference the advisory so
+        operators can read the full context."""
+        src = self._get_source()
+        assert "GHSA-3vpc-7q5r-276h" in src, (
+            "Guard error message must cite the advisory for operator context"
+        )
+
+    def test_guard_message_explains_remediation(self):
+        """The error should tell the operator how to fix it."""
+        src = self._get_source()
+        # Should mention how to generate a secret
+        assert "openssl rand" in src or "TELEGRAM_WEBHOOK_SECRET=" in src, (
+            "Guard error message should show operators how to set "
+            "TELEGRAM_WEBHOOK_SECRET"
+        )
+
+    def test_polling_branch_has_no_secret_guard(self):
+        """Polling mode (else-branch) must NOT require the webhook secret —
+        polling authenticates via the bot token, not a webhook secret."""
+        src = self._get_source()
+        # The guard should appear inside the `if webhook_url:` branch,
+        # not the `else:` polling branch. Rough check: the raise is
+        # followed (within ~60 lines) by an `else:` that starts the
+        # polling branch, and there's no secret-check in that polling
+        # branch.
+        webhook_block = re.search(
+            r'if webhook_url:\s*\n(.*?)\n            else:\s*\n(.*?)\n',
+            src, re.DOTALL,
+        )
+        if webhook_block:
+            webhook_body = webhook_block.group(1)
+            polling_body = webhook_block.group(2)
+            assert "TELEGRAM_WEBHOOK_SECRET" in webhook_body
+            assert "TELEGRAM_WEBHOOK_SECRET" not in polling_body
diff --git a/tests/gateway/test_unknown_command.py b/tests/gateway/test_unknown_command.py
index 4c644cb73..114134496 100644
--- a/tests/gateway/test_unknown_command.py
+++ b/tests/gateway/test_unknown_command.py
@@ -41,7 +41,11 @@ def _make_runner():
     adapter.send = AsyncMock()
     runner.adapters = {Platform.TELEGRAM: adapter}
     runner._voice_mode = {}
-    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
+    runner.hooks = SimpleNamespace(
+        emit=AsyncMock(),
+        emit_collect=AsyncMock(return_value=[]),
+        loaded_hooks=False,
+    )
 
     session_entry = SessionEntry(
         session_key=build_session_key(_make_source()),
@@ -164,3 +168,206 @@ async def test_underscored_alias_for_hyphenated_builtin_not_flagged(monkeypatch)
     # Whatever /reload_mcp returns, it must not be the unknown-command guard.
     if result is not None:
         assert "Unknown command" not in result
+
+
+# ------------------------------------------------------------------
+# command:<name> decision hook — deny / handled / rewrite
+# ------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_command_hook_can_deny_before_dispatch(monkeypatch):
+    """A handler returning {"decision": "deny"} blocks a slash command early."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._run_agent = AsyncMock(
+        side_effect=AssertionError("denied slash command leaked to the agent")
+    )
+    runner._handle_status_command = AsyncMock(
+        side_effect=AssertionError("denied slash command reached its handler")
+    )
+    runner.hooks.emit_collect = AsyncMock(
+        return_value=[{"decision": "deny", "message": "Blocked by ACL"}]
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result == "Blocked by ACL"
+    runner._run_agent.assert_not_called()
+    # The emit_collect call should use the canonical command name.
+    call_args = runner.hooks.emit_collect.await_args
+    assert call_args.args[0] == "command:status"
+
+
+@pytest.mark.asyncio
+async def test_command_hook_deny_without_message_uses_default(monkeypatch):
+    """A deny decision with no message falls back to a generic blocked string."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._handle_status_command = AsyncMock(
+        side_effect=AssertionError("denied slash command reached its handler")
+    )
+    runner.hooks.emit_collect = AsyncMock(return_value=[{"decision": "deny"}])
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result is not None
+    assert "blocked" in result.lower()
+
+
+@pytest.mark.asyncio
+async def test_command_hook_can_mark_command_as_handled(monkeypatch):
+    """A handled decision short-circuits dispatch cleanly with a custom reply."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._handle_status_command = AsyncMock(
+        side_effect=AssertionError("handled slash command reached its handler")
+    )
+    runner.hooks.emit_collect = AsyncMock(
+        return_value=[{"decision": "handled", "message": "Already handled upstream"}]
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result == "Already handled upstream"
+
+
+@pytest.mark.asyncio
+async def test_command_hook_allow_decision_is_passthrough(monkeypatch):
+    """A handler returning {"decision": "allow"} must NOT prevent normal dispatch."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._handle_status_command = AsyncMock(return_value="status: ok")
+    runner.hooks.emit_collect = AsyncMock(
+        return_value=[{"decision": "allow"}]
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result == "status: ok"
+    runner._handle_status_command.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_command_hook_non_dict_return_values_ignored(monkeypatch):
+    """Hook return values that aren't dicts must not break dispatch."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._handle_status_command = AsyncMock(return_value="status: ok")
+    runner.hooks.emit_collect = AsyncMock(
+        return_value=["some string", 42, None, {}]
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result == "status: ok"
+
+
+@pytest.mark.asyncio
+async def test_command_hook_fires_for_plugin_registered_command(monkeypatch):
+    """Plugin-registered slash commands should also trigger command:<name> hooks."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._run_agent = AsyncMock(
+        side_effect=AssertionError("plugin command leaked to the agent")
+    )
+    runner.hooks.emit_collect = AsyncMock(
+        return_value=[{"decision": "handled", "message": "intercepted"}]
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+    # Stub plugin command lookup so is_gateway_known_command() recognizes /metricas.
+    from hermes_cli import plugins as _plugins_mod
+
+    monkeypatch.setattr(
+        _plugins_mod,
+        "get_plugin_commands",
+        lambda: {"metricas": {"description": "Metrics", "args_hint": "dias:7"}},
+    )
+
+    result = await runner._handle_message(_make_event("/metricas dias:7"))
+
+    assert result == "intercepted"
+    # Hook event name uses the plugin command as canonical.
+    call_args = runner.hooks.emit_collect.await_args
+    assert call_args.args[0] == "command:metricas"
+    # Args are passed through in both "args" and "raw_args" keys.
+    ctx = call_args.args[1]
+    assert ctx["raw_args"] == "dias:7"
+
+
+@pytest.mark.asyncio
+async def test_command_hook_rewrite_routes_to_plugin(monkeypatch):
+    """A rewrite decision should re-resolve the command and route to the new one."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._run_agent = AsyncMock(
+        side_effect=AssertionError("rewritten command leaked to the agent")
+    )
+
+    call_log = []
+
+    async def _emit_collect(event_type, ctx):
+        call_log.append(event_type)
+        if event_type == "command:status":
+            return [
+                {
+                    "decision": "rewrite",
+                    "command_name": "metricas",
+                    "raw_args": "dias:7",
+                }
+            ]
+        return []
+
+    runner.hooks.emit_collect = AsyncMock(side_effect=_emit_collect)
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+    from hermes_cli import plugins as _plugins_mod
+
+    monkeypatch.setattr(
+        _plugins_mod,
+        "get_plugin_commands",
+        lambda: {"metricas": {"description": "Metrics", "args_hint": "dias:7"}},
+    )
+    monkeypatch.setattr(
+        _plugins_mod,
+        "get_plugin_command_handler",
+        lambda name: (lambda args: f"metrics {args}") if name == "metricas" else None,
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result == "metrics dias:7"
+    # First emit_collect fires on the original command; after rewrite the
+    # dispatcher does NOT re-fire for the new command (one decision per turn).
+    assert call_log == ["command:status"]
diff --git a/tests/gateway/test_usage_command.py b/tests/gateway/test_usage_command.py
index 291581089..feced75b2 100644
--- a/tests/gateway/test_usage_command.py
+++ b/tests/gateway/test_usage_command.py
@@ -175,3 +175,79 @@ class TestUsageCachedAgent:
             result = await runner._handle_usage_command(event)
 
         assert "Cost: included" in result
+
+
+class TestUsageAccountSection:
+    """Account-limits section appended to /usage output (PR #2486)."""
+
+    @pytest.mark.asyncio
+    async def test_usage_command_includes_account_section(self, monkeypatch):
+        agent = _make_mock_agent(provider="openai-codex")
+        agent.base_url = "https://chatgpt.com/backend-api/codex"
+        agent.api_key = "unused"
+        runner = _make_runner(SK, cached_agent=agent)
+        event = MagicMock()
+
+        monkeypatch.setattr(
+            "gateway.run.fetch_account_usage",
+            lambda provider, base_url=None, api_key=None: object(),
+        )
+        monkeypatch.setattr(
+            "gateway.run.render_account_usage_lines",
+            lambda snapshot, markdown=False: [
+                "📈 **Account limits**",
+                "Provider: openai-codex (Pro)",
+                "Session: 85% remaining (15% used)",
+            ],
+        )
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
+             patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
+            mock_cost.return_value = MagicMock(amount_usd=None, status="included")
+            result = await runner._handle_usage_command(event)
+
+        assert "📊 **Session Token Usage**" in result
+        assert "📈 **Account limits**" in result
+        assert "Provider: openai-codex (Pro)" in result
+
+    @pytest.mark.asyncio
+    async def test_usage_command_uses_persisted_provider_when_agent_not_running(self, monkeypatch):
+        runner = _make_runner(SK)
+        runner._session_db = MagicMock()
+        runner._session_db.get_session.return_value = {
+            "billing_provider": "openai-codex",
+            "billing_base_url": "https://chatgpt.com/backend-api/codex",
+        }
+        session_entry = MagicMock()
+        session_entry.session_id = "sess-1"
+        runner.session_store.get_or_create_session.return_value = session_entry
+        runner.session_store.load_transcript.return_value = [
+            {"role": "user", "content": "earlier"},
+        ]
+
+        calls = {}
+
+        async def _fake_to_thread(fn, *args, **kwargs):
+            calls["args"] = args
+            calls["kwargs"] = kwargs
+            return fn(*args, **kwargs)
+
+        monkeypatch.setattr("gateway.run.asyncio.to_thread", _fake_to_thread)
+        monkeypatch.setattr(
+            "gateway.run.fetch_account_usage",
+            lambda provider, base_url=None, api_key=None: object(),
+        )
+        monkeypatch.setattr(
+            "gateway.run.render_account_usage_lines",
+            lambda snapshot, markdown=False: [
+                "📈 **Account limits**",
+                "Provider: openai-codex (Pro)",
+            ],
+        )
+
+        event = MagicMock()
+        result = await runner._handle_usage_command(event)
+
+        assert calls["args"] == ("openai-codex",)
+        assert calls["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
+        assert "📊 **Session Info**" in result
+        assert "📈 **Account limits**" in result
diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py
index 2af003ea0..e8f181fa4 100644
--- a/tests/hermes_cli/test_api_key_providers.py
+++ b/tests/hermes_cli/test_api_key_providers.py
@@ -15,6 +15,8 @@ from hermes_cli.auth import (
     get_auth_status,
     AuthError,
     KIMI_CODE_BASE_URL,
+    STEPFUN_STEP_PLAN_INTL_BASE_URL,
+    STEPFUN_STEP_PLAN_CN_BASE_URL,
     _resolve_kimi_base_url,
 )
 from hermes_cli.copilot_auth import _try_gh_cli_token
@@ -35,6 +37,7 @@ class TestProviderRegistry:
         ("xai", "xAI", "api_key"),
         ("nvidia", "NVIDIA NIM", "api_key"),
         ("kimi-coding", "Kimi / Moonshot", "api_key"),
+        ("stepfun", "StepFun Step Plan", "api_key"),
         ("minimax", "MiniMax", "api_key"),
         ("minimax-cn", "MiniMax (China)", "api_key"),
         ("ai-gateway", "Vercel AI Gateway", "api_key"),
@@ -71,7 +74,11 @@ class TestProviderRegistry:
 
     def test_kimi_env_vars(self):
         pconfig = PROVIDER_REGISTRY["kimi-coding"]
-        assert pconfig.api_key_env_vars == ("KIMI_API_KEY",)
+        # KIMI_API_KEY is the primary env var; KIMI_CODING_API_KEY is a
+        # secondary fallback for Kimi Code sk-kimi- keys so users don't
+        # have to overload the same variable.
+        assert "KIMI_API_KEY" in pconfig.api_key_env_vars
+        assert "KIMI_CODING_API_KEY" in pconfig.api_key_env_vars
         assert pconfig.base_url_env_var == "KIMI_BASE_URL"
 
     def test_minimax_env_vars(self):
@@ -79,6 +86,11 @@ class TestProviderRegistry:
         assert pconfig.api_key_env_vars == ("MINIMAX_API_KEY",)
         assert pconfig.base_url_env_var == "MINIMAX_BASE_URL"
 
+    def test_stepfun_env_vars(self):
+        pconfig = PROVIDER_REGISTRY["stepfun"]
+        assert pconfig.api_key_env_vars == ("STEPFUN_API_KEY",)
+        assert pconfig.base_url_env_var == "STEPFUN_BASE_URL"
+
     def test_minimax_cn_env_vars(self):
         pconfig = PROVIDER_REGISTRY["minimax-cn"]
         assert pconfig.api_key_env_vars == ("MINIMAX_CN_API_KEY",)
@@ -104,6 +116,7 @@ class TestProviderRegistry:
         assert PROVIDER_REGISTRY["copilot-acp"].inference_base_url == "acp://copilot"
         assert PROVIDER_REGISTRY["zai"].inference_base_url == "https://api.z.ai/api/paas/v4"
         assert PROVIDER_REGISTRY["kimi-coding"].inference_base_url == "https://api.moonshot.ai/v1"
+        assert PROVIDER_REGISTRY["stepfun"].inference_base_url == STEPFUN_STEP_PLAN_INTL_BASE_URL
         assert PROVIDER_REGISTRY["minimax"].inference_base_url == "https://api.minimax.io/anthropic"
         assert PROVIDER_REGISTRY["minimax-cn"].inference_base_url == "https://api.minimaxi.com/anthropic"
         assert PROVIDER_REGISTRY["ai-gateway"].inference_base_url == "https://ai-gateway.vercel.sh/v1"
@@ -126,7 +139,8 @@ PROVIDER_ENV_VARS = (
     "OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
     "CLAUDE_CODE_OAUTH_TOKEN",
     "GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY",
-    "KIMI_API_KEY", "KIMI_BASE_URL", "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY",
+    "KIMI_API_KEY", "KIMI_BASE_URL", "STEPFUN_API_KEY", "STEPFUN_BASE_URL",
+    "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY",
     "AI_GATEWAY_API_KEY", "AI_GATEWAY_BASE_URL",
     "KILOCODE_API_KEY", "KILOCODE_BASE_URL",
     "DASHSCOPE_API_KEY", "OPENCODE_ZEN_API_KEY", "OPENCODE_GO_API_KEY",
@@ -152,6 +166,9 @@ class TestResolveProvider:
     def test_explicit_kimi_coding(self):
         assert resolve_provider("kimi-coding") == "kimi-coding"
 
+    def test_explicit_stepfun(self):
+        assert resolve_provider("stepfun") == "stepfun"
+
     def test_explicit_minimax(self):
         assert resolve_provider("minimax") == "minimax"
 
@@ -176,6 +193,9 @@ class TestResolveProvider:
     def test_alias_moonshot(self):
         assert resolve_provider("moonshot") == "kimi-coding"
 
+    def test_alias_step(self):
+        assert resolve_provider("step") == "stepfun"
+
     def test_alias_minimax_underscore(self):
         assert resolve_provider("minimax_cn") == "minimax-cn"
 
@@ -244,6 +264,10 @@ class TestResolveProvider:
         monkeypatch.setenv("KIMI_API_KEY", "test-kimi-key")
         assert resolve_provider("auto") == "kimi-coding"
 
+    def test_auto_detects_stepfun_key(self, monkeypatch):
+        monkeypatch.setenv("STEPFUN_API_KEY", "test-stepfun-key")
+        assert resolve_provider("auto") == "stepfun"
+
     def test_auto_detects_minimax_key(self, monkeypatch):
         monkeypatch.setenv("MINIMAX_API_KEY", "test-mm-key")
         assert resolve_provider("auto") == "minimax"
@@ -308,6 +332,13 @@ class TestApiKeyProviderStatus:
         status = get_api_key_provider_status("kimi-coding")
         assert status["base_url"] == "https://custom.kimi.example/v1"
 
+    def test_stepfun_status_uses_configured_base_url(self, monkeypatch):
+        monkeypatch.setenv("STEPFUN_API_KEY", "stepfun-key")
+        monkeypatch.setenv("STEPFUN_BASE_URL", STEPFUN_STEP_PLAN_CN_BASE_URL)
+        status = get_api_key_provider_status("stepfun")
+        assert status["configured"] is True
+        assert status["base_url"] == STEPFUN_STEP_PLAN_CN_BASE_URL
+
     def test_copilot_status_uses_gh_cli_token(self, monkeypatch):
         monkeypatch.setattr("hermes_cli.copilot_auth._try_gh_cli_token", lambda: "gho_gh_cli_token")
         status = get_api_key_provider_status("copilot")
@@ -425,6 +456,19 @@ class TestResolveApiKeyProviderCredentials:
         assert creds["api_key"] == "kimi-secret-key"
         assert creds["base_url"] == "https://api.moonshot.ai/v1"
 
+    def test_resolve_stepfun_with_key(self, monkeypatch):
+        monkeypatch.setenv("STEPFUN_API_KEY", "stepfun-secret-key")
+        creds = resolve_api_key_provider_credentials("stepfun")
+        assert creds["provider"] == "stepfun"
+        assert creds["api_key"] == "stepfun-secret-key"
+        assert creds["base_url"] == STEPFUN_STEP_PLAN_INTL_BASE_URL
+
+    def test_resolve_stepfun_custom_base_url(self, monkeypatch):
+        monkeypatch.setenv("STEPFUN_API_KEY", "stepfun-secret-key")
+        monkeypatch.setenv("STEPFUN_BASE_URL", STEPFUN_STEP_PLAN_CN_BASE_URL)
+        creds = resolve_api_key_provider_credentials("stepfun")
+        assert creds["base_url"] == STEPFUN_STEP_PLAN_CN_BASE_URL
+
     def test_resolve_minimax_with_key(self, monkeypatch):
         monkeypatch.setenv("MINIMAX_API_KEY", "mm-secret-key")
         creds = resolve_api_key_provider_credentials("minimax")
@@ -515,6 +559,16 @@ class TestRuntimeProviderResolution:
         assert result["api_mode"] == "chat_completions"
         assert result["api_key"] == "kimi-key"
 
+    def test_runtime_stepfun(self, monkeypatch):
+        monkeypatch.setenv("STEPFUN_API_KEY", "stepfun-key")
+        monkeypatch.setenv("STEPFUN_BASE_URL", STEPFUN_STEP_PLAN_CN_BASE_URL)
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+        result = resolve_runtime_provider(requested="stepfun")
+        assert result["provider"] == "stepfun"
+        assert result["api_mode"] == "chat_completions"
+        assert result["api_key"] == "stepfun-key"
+        assert result["base_url"] == STEPFUN_STEP_PLAN_CN_BASE_URL
+
     def test_runtime_minimax(self, monkeypatch):
         monkeypatch.setenv("MINIMAX_API_KEY", "mm-key")
         from hermes_cli.runtime_provider import resolve_runtime_provider
diff --git a/tests/hermes_cli/test_at_context_completion_filter.py b/tests/hermes_cli/test_at_context_completion_filter.py
new file mode 100644
index 000000000..dfd44b472
--- /dev/null
+++ b/tests/hermes_cli/test_at_context_completion_filter.py
@@ -0,0 +1,90 @@
+"""Regression test: `@folder:` completion must only surface directories and
+`@file:` must only surface regular files.
+
+Reported during TUI v2 blitz testing: typing `@folder:` showed .dockerignore,
+.env, .gitignore, etc. alongside the actual directories because the path-
+completion branch yielded every entry regardless of the explicit prefix, and
+auto-switched the completion kind based on `is_dir`. That defeated the user's
+explicit choice and rendered the `@folder:` / `@file:` prefixes useless for
+filtering.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Iterable
+
+from hermes_cli.commands import SlashCommandCompleter
+
+
+def _run(tmp_path: Path, word: str) -> list[tuple[str, str]]:
+    (tmp_path / "readme.md").write_text("x")
+    (tmp_path / ".env").write_text("x")
+    (tmp_path / "src").mkdir()
+    (tmp_path / "docs").mkdir()
+
+    completer = SlashCommandCompleter.__new__(SlashCommandCompleter)
+    completions: Iterable = completer._context_completions(word)
+
+    return [(c.text, c.display_meta) for c in completions if c.text.startswith(("@file:", "@folder:"))]
+
+
+def test_at_folder_only_yields_directories(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@folder:")]
+
+    assert all(t.startswith("@folder:") for t in texts), texts
+    assert any(t == "@folder:src/" for t in texts)
+    assert any(t == "@folder:docs/" for t in texts)
+    assert not any(t == "@folder:readme.md" for t in texts)
+    assert not any(t == "@folder:.env" for t in texts)
+
+
+def test_at_file_only_yields_files(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@file:")]
+
+    assert all(t.startswith("@file:") for t in texts), texts
+    assert any(t == "@file:readme.md" for t in texts)
+    assert any(t == "@file:.env" for t in texts)
+    assert not any(t == "@file:src/" for t in texts)
+    assert not any(t == "@file:docs/" for t in texts)
+
+
+def test_at_folder_preserves_prefix_on_empty_match(tmp_path, monkeypatch):
+    """User typed `@folder:` (no partial) — completion text must keep the
+    `@folder:` prefix even though the previous implementation auto-rewrote
+    it to `@file:` for non-dir entries.
+    """
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@folder:")]
+
+    assert texts, "expected at least one directory completion"
+    for t in texts:
+        assert t.startswith("@folder:"), f"prefix leaked: {t}"
+
+
+def test_at_folder_bare_without_colon_lists_directories(tmp_path, monkeypatch):
+    """Typing `@folder` alone (no colon yet) should surface directories so
+    users don't need to first accept the static `@folder:` hint before
+    seeing what they're picking from.
+    """
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@folder")]
+
+    assert any(t == "@folder:src/" for t in texts), texts
+    assert any(t == "@folder:docs/" for t in texts), texts
+    assert not any(t == "@folder:readme.md" for t in texts)
+
+
+def test_at_file_bare_without_colon_lists_files(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@file")]
+
+    assert any(t == "@file:readme.md" for t in texts), texts
+    assert not any(t == "@file:src/" for t in texts)
diff --git a/tests/hermes_cli/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py
index 5b0d9062b..fb749b6ae 100644
--- a/tests/hermes_cli/test_auth_commands.py
+++ b/tests/hermes_cli/test_auth_commands.py
@@ -1011,3 +1011,466 @@ def test_seed_from_singletons_respects_codex_suppression(tmp_path, monkeypatch):
     # Verify the auth store was NOT modified (no auto-import happened)
     after = json.loads((hermes_home / "auth.json").read_text())
     assert "openai-codex" not in after.get("providers", {})
+
+
+def test_auth_remove_env_seeded_suppresses_shell_exported_var(tmp_path, monkeypatch, capsys):
+    """`hermes auth remove xai 1` must stick even when the env var is exported
+    by the shell (not written into ~/.hermes/.env).  Before PR for #13371 the
+    removal silently restored on next load_pool() because _seed_from_env()
+    re-read os.environ.  Now env:<VAR> is suppressed in auth.json.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Simulate shell export (NOT written to .env)
+    monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
+    (hermes_home / ".env").write_text("")
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "xai": [{
+                    "id": "env-1",
+                    "label": "XAI_API_KEY",
+                    "auth_type": "api_key",
+                    "priority": 0,
+                    "source": "env:XAI_API_KEY",
+                    "access_token": "sk-xai-shell-export",
+                    "base_url": "https://api.x.ai/v1",
+                }]
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth_commands import auth_remove_command
+    auth_remove_command(SimpleNamespace(provider="xai", target="1"))
+
+    # Suppression marker written
+    after = json.loads((hermes_home / "auth.json").read_text())
+    assert "env:XAI_API_KEY" in after.get("suppressed_sources", {}).get("xai", [])
+
+    # Diagnostic printed pointing at the shell
+    out = capsys.readouterr().out
+    assert "still set in your shell environment" in out
+    assert "Cleared XAI_API_KEY from .env" not in out  # wasn't in .env
+
+    # Fresh simulation: shell re-exports, reload pool
+    monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
+    from agent.credential_pool import load_pool
+    pool = load_pool("xai")
+    assert not pool.has_credentials(), "pool must stay empty — env:XAI_API_KEY suppressed"
+
+
+def test_auth_remove_env_seeded_dotenv_only_no_shell_hint(tmp_path, monkeypatch, capsys):
+    """When the env var lives only in ~/.hermes/.env (not the shell), the
+    shell-hint should NOT be printed — avoid scaring the user about a
+    non-existent shell export.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Key ONLY in .env, shell must not have it
+    monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False)
+    (hermes_home / ".env").write_text("DEEPSEEK_API_KEY=sk-ds-only\n")
+    # Mimic load_env() populating os.environ
+    monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-ds-only")
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "deepseek": [{
+                    "id": "env-1",
+                    "label": "DEEPSEEK_API_KEY",
+                    "auth_type": "api_key",
+                    "priority": 0,
+                    "source": "env:DEEPSEEK_API_KEY",
+                    "access_token": "sk-ds-only",
+                }]
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth_commands import auth_remove_command
+    auth_remove_command(SimpleNamespace(provider="deepseek", target="1"))
+
+    out = capsys.readouterr().out
+    assert "Cleared DEEPSEEK_API_KEY from .env" in out
+    assert "still set in your shell environment" not in out
+    assert (hermes_home / ".env").read_text().strip() == ""
+
+
+def test_auth_add_clears_env_suppression_for_provider(tmp_path, monkeypatch):
+    """Re-adding a credential via `hermes auth add <provider>` clears any
+    env:<VAR> suppression marker — strong signal the user wants auth back.
+    Matches the Codex device_code re-link behaviour.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.delenv("XAI_API_KEY", raising=False)
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "providers": {},
+            "suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth import is_source_suppressed
+    from hermes_cli.auth_commands import auth_add_command
+
+    assert is_source_suppressed("xai", "env:XAI_API_KEY") is True
+    auth_add_command(SimpleNamespace(
+        provider="xai", auth_type="api_key",
+        api_key="sk-xai-manual", label="manual",
+    ))
+    assert is_source_suppressed("xai", "env:XAI_API_KEY") is False
+
+
+def test_seed_from_env_respects_env_suppression(tmp_path, monkeypatch):
+    """_seed_from_env() must skip env:<VAR> sources that the user suppressed
+    via `hermes auth remove`.  This is the gate that prevents shell-exported
+    keys from resurrecting removed credentials.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
+    }))
+
+    from agent.credential_pool import _seed_from_env
+
+    entries = []
+    changed, active = _seed_from_env("xai", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+def test_seed_from_env_respects_openrouter_suppression(tmp_path, monkeypatch):
+    """OpenRouter is the special-case branch in _seed_from_env; verify it
+    honours suppression too.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-shell-export")
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"openrouter": ["env:OPENROUTER_API_KEY"]},
+    }))
+
+    from agent.credential_pool import _seed_from_env
+
+    entries = []
+    changed, active = _seed_from_env("openrouter", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+# =============================================================================
+# Unified credential-source stickiness — every source Hermes reads from has a
+# registered RemovalStep in agent.credential_sources, and every seeding path
+# gates on is_source_suppressed.  Below: one test per source proving remove
+# sticks across a fresh load_pool() call.
+# =============================================================================
+
+
+def test_seed_from_singletons_respects_nous_suppression(tmp_path, monkeypatch):
+    """nous device_code must not re-seed from auth.json when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {"nous": {"access_token": "tok", "refresh_token": "r", "expires_at": 9999999999}},
+        "suppressed_sources": {"nous": ["device_code"]},
+    }))
+
+    from agent.credential_pool import _seed_from_singletons
+    entries = []
+    changed, active = _seed_from_singletons("nous", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+def test_seed_from_singletons_respects_copilot_suppression(tmp_path, monkeypatch):
+    """copilot gh_cli must not re-seed when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"copilot": ["gh_cli"]},
+    }))
+
+    # Stub resolve_copilot_token to return a live token
+    import hermes_cli.copilot_auth as ca
+    monkeypatch.setattr(ca, "resolve_copilot_token", lambda: ("ghp_fake", "gh auth token"))
+
+    from agent.credential_pool import _seed_from_singletons
+    entries = []
+    changed, active = _seed_from_singletons("copilot", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+def test_seed_from_singletons_respects_qwen_suppression(tmp_path, monkeypatch):
+    """qwen-oauth qwen-cli must not re-seed from ~/.qwen/oauth_creds.json when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"qwen-oauth": ["qwen-cli"]},
+    }))
+
+    import hermes_cli.auth as ha
+    monkeypatch.setattr(ha, "resolve_qwen_runtime_credentials", lambda **kw: {
+        "api_key": "tok", "source": "qwen-cli", "base_url": "https://q",
+    })
+
+    from agent.credential_pool import _seed_from_singletons
+    entries = []
+    changed, active = _seed_from_singletons("qwen-oauth", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+def test_seed_from_singletons_respects_hermes_pkce_suppression(tmp_path, monkeypatch):
+    """anthropic hermes_pkce must not re-seed from ~/.hermes/.anthropic_oauth.json when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    import yaml
+    (hermes_home / "config.yaml").write_text(yaml.dump({"model": {"provider": "anthropic", "model": "claude"}}))
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"anthropic": ["hermes_pkce"]},
+    }))
+
+    # Stub the readers so only hermes_pkce is "available"; claude_code returns None
+    import agent.anthropic_adapter as aa
+    monkeypatch.setattr(aa, "read_hermes_oauth_credentials", lambda: {
+        "accessToken": "tok", "refreshToken": "r", "expiresAt": 9999999999000,
+    })
+    monkeypatch.setattr(aa, "read_claude_code_credentials", lambda: None)
+
+    from agent.credential_pool import _seed_from_singletons
+    entries = []
+    changed, active = _seed_from_singletons("anthropic", entries)
+    # hermes_pkce suppressed, claude_code returns None → nothing should be seeded
+    assert entries == []
+    assert "hermes_pkce" not in active
+
+
+def test_seed_custom_pool_respects_config_suppression(tmp_path, monkeypatch):
+    """Custom provider config:<name> source must not re-seed when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    import yaml
+    (hermes_home / "config.yaml").write_text(yaml.dump({
+        "model": {},
+        "custom_providers": [
+            {"name": "my", "base_url": "https://c.example.com", "api_key": "sk-custom"},
+        ],
+    }))
+
+    from agent.credential_pool import _seed_custom_pool, get_custom_provider_pool_key
+    pool_key = get_custom_provider_pool_key("https://c.example.com")
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {pool_key: ["config:my"]},
+    }))
+
+    entries = []
+    changed, active = _seed_custom_pool(pool_key, entries)
+    assert changed is False
+    assert entries == []
+    assert "config:my" not in active
+
+
+def test_credential_sources_registry_has_expected_steps():
+    """Sanity check — the registry contains the expected RemovalSteps.
+
+    Guards against accidentally dropping a step during future refactors.
+    If you add a new credential source, add it to the expected set below.
+    """
+    from agent.credential_sources import _REGISTRY
+
+    descriptions = {step.description for step in _REGISTRY}
+    expected = {
+        "gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
+        "Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
+        "~/.claude/.credentials.json",
+        "~/.hermes/.anthropic_oauth.json",
+        "auth.json providers.nous",
+        "auth.json providers.openai-codex + ~/.codex/auth.json",
+        "~/.qwen/oauth_creds.json",
+        "Custom provider config.yaml api_key field",
+    }
+    assert descriptions == expected, f"Registry mismatch. Got: {descriptions}"
+
+
+def test_credential_sources_find_step_returns_none_for_manual():
+    """Manual entries have nothing external to clean up — no step registered."""
+    from agent.credential_sources import find_removal_step
+    assert find_removal_step("openrouter", "manual") is None
+    assert find_removal_step("xai", "manual") is None
+
+
+def test_credential_sources_find_step_copilot_before_generic_env(tmp_path, monkeypatch):
+    """copilot env:GH_TOKEN must dispatch to the copilot step, not the
+    generic env-var step.  The copilot step handles the duplicate-source
+    problem (same token seeded as both gh_cli and env:<VAR>); the generic
+    env step would only suppress one of the variants.
+    """
+    from agent.credential_sources import find_removal_step
+
+    step = find_removal_step("copilot", "env:GH_TOKEN")
+    assert step is not None
+    assert "copilot" in step.description.lower() or "gh" in step.description.lower()
+
+    # Generic step still matches any other provider's env var
+    step = find_removal_step("xai", "env:XAI_API_KEY")
+    assert step is not None
+    assert "env-seeded" in step.description.lower()
+
+
+def test_auth_remove_copilot_suppresses_all_variants(tmp_path, monkeypatch):
+    """Removing any copilot source must suppress gh_cli + all env:* variants
+    so the duplicate-seed paths don't resurrect the credential.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "copilot": [{
+                    "id": "c1",
+                    "label": "gh auth token",
+                    "auth_type": "api_key",
+                    "priority": 0,
+                    "source": "gh_cli",
+                    "access_token": "ghp_fake",
+                }]
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth import is_source_suppressed
+    from hermes_cli.auth_commands import auth_remove_command
+
+    auth_remove_command(SimpleNamespace(provider="copilot", target="1"))
+
+    assert is_source_suppressed("copilot", "gh_cli")
+    assert is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
+    assert is_source_suppressed("copilot", "env:GH_TOKEN")
+    assert is_source_suppressed("copilot", "env:GITHUB_TOKEN")
+
+
+def test_auth_add_clears_all_suppressions_including_non_env(tmp_path, monkeypatch):
+    """Re-adding a credential via `hermes auth add <provider>` clears ALL
+    suppression markers for the provider, not just env:*.  This matches
+    the single "re-engage" semantic — the user wants auth back, period.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "providers": {},
+            "suppressed_sources": {
+                "copilot": ["gh_cli", "env:GH_TOKEN", "env:COPILOT_GITHUB_TOKEN"],
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth import is_source_suppressed
+    from hermes_cli.auth_commands import auth_add_command
+
+    auth_add_command(SimpleNamespace(
+        provider="copilot", auth_type="api_key",
+        api_key="ghp-manual", label="m",
+    ))
+
+    assert not is_source_suppressed("copilot", "gh_cli")
+    assert not is_source_suppressed("copilot", "env:GH_TOKEN")
+    assert not is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
+
+
+def test_auth_remove_codex_manual_device_code_suppresses_canonical(tmp_path, monkeypatch):
+    """Removing a manual:device_code entry (from `hermes auth add openai-codex`)
+    must suppress the canonical ``device_code`` key, not ``manual:device_code``.
+    The re-seed gate in _seed_from_singletons checks ``device_code``.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "providers": {"openai-codex": {"tokens": {"access_token": "t", "refresh_token": "r"}}},
+            "credential_pool": {
+                "openai-codex": [{
+                    "id": "cdx",
+                    "label": "manual-codex",
+                    "auth_type": "oauth",
+                    "priority": 0,
+                    "source": "manual:device_code",
+                    "access_token": "t",
+                }]
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth import is_source_suppressed
+    from hermes_cli.auth_commands import auth_remove_command
+
+    auth_remove_command(SimpleNamespace(provider="openai-codex", target="1"))
+    assert is_source_suppressed("openai-codex", "device_code")
diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py
index 3a58282ca..b6d70a26f 100644
--- a/tests/hermes_cli/test_auth_nous_provider.py
+++ b/tests/hermes_cli/test_auth_nous_provider.py
@@ -376,7 +376,6 @@ class TestLoginNousSkipKeepsCurrent:
             lambda *a, **kw: prompt_returns,
         )
         monkeypatch.setattr(models_mod, "get_pricing_for_provider", lambda p: {})
-        monkeypatch.setattr(models_mod, "filter_nous_free_models", lambda ids, p: ids)
         monkeypatch.setattr(models_mod, "check_nous_free_tier", lambda: None)
         monkeypatch.setattr(
             models_mod, "partition_nous_models_by_tier",
diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py
index 49e114aef..a27f99661 100644
--- a/tests/hermes_cli/test_commands.py
+++ b/tests/hermes_cli/test_commands.py
@@ -1208,3 +1208,119 @@ class TestDiscordSkillCommandsByCategory:
         assert "axolotl" in names
         assert "vllm" in names
         assert len(uncategorized) == 0
+
+
+# ---------------------------------------------------------------------------
+# Plugin slash command integration
+# ---------------------------------------------------------------------------
+
+class TestPluginCommandEnumeration:
+    """Plugin commands registered via ctx.register_command() must be surfaced
+    by every gateway enumerator (Telegram menu, Slack subcommand map, etc.).
+    """
+
+    def _patch_plugin_commands(self, monkeypatch, commands):
+        """Monkeypatch hermes_cli.plugins.get_plugin_commands() to a fixed dict."""
+        from hermes_cli import plugins as _plugins_mod
+
+        monkeypatch.setattr(
+            _plugins_mod, "get_plugin_commands", lambda: dict(commands)
+        )
+
+    def test_plugin_command_appears_in_telegram_menu(self, monkeypatch):
+        """/metricas registered by a plugin must appear in Telegram BotCommand menu."""
+        self._patch_plugin_commands(monkeypatch, {
+            "metricas": {
+                "handler": lambda _a: "ok",
+                "description": "Metrics dashboard",
+                "args_hint": "dias:7",
+                "plugin": "metrics-plugin",
+            }
+        })
+        names = {name for name, _desc in telegram_bot_commands()}
+        assert "metricas" in names
+
+    def test_plugin_command_appears_in_slack_subcommand_map(self, monkeypatch):
+        """/hermes metricas must route through the Slack subcommand map."""
+        self._patch_plugin_commands(monkeypatch, {
+            "metricas": {
+                "handler": lambda _a: "ok",
+                "description": "Metrics",
+                "args_hint": "",
+                "plugin": "metrics-plugin",
+            }
+        })
+        mapping = slack_subcommand_map()
+        assert mapping.get("metricas") == "/metricas"
+
+    def test_plugin_command_does_not_shadow_builtin_in_slack(self, monkeypatch):
+        """If a plugin registers a name that collides with a built-in, the built-in mapping wins."""
+        self._patch_plugin_commands(monkeypatch, {
+            "status": {
+                "handler": lambda _a: "plugin-status",
+                "description": "Plugin status",
+                "args_hint": "",
+                "plugin": "shadow-plugin",
+            }
+        })
+        mapping = slack_subcommand_map()
+        # Built-in /status must still be present and not overwritten.
+        assert mapping.get("status") == "/status"
+
+    def test_plugin_command_with_hyphens_sanitized_for_telegram(self, monkeypatch):
+        """Plugin names containing hyphens must be underscore-normalized for Telegram."""
+        self._patch_plugin_commands(monkeypatch, {
+            "my-plugin-cmd": {
+                "handler": lambda _a: "ok",
+                "description": "desc",
+                "args_hint": "",
+                "plugin": "p",
+            }
+        })
+        names = {name for name, _desc in telegram_bot_commands()}
+        assert "my_plugin_cmd" in names
+        assert "my-plugin-cmd" not in names
+
+    def test_is_gateway_known_command_recognizes_plugin_commands(self, monkeypatch):
+        """is_gateway_known_command() must return True for plugin commands."""
+        from hermes_cli.commands import is_gateway_known_command
+
+        self._patch_plugin_commands(monkeypatch, {
+            "metricas": {
+                "handler": lambda _a: "ok",
+                "description": "Metrics",
+                "args_hint": "",
+                "plugin": "p",
+            }
+        })
+        assert is_gateway_known_command("metricas") is True
+        assert is_gateway_known_command("definitely-not-registered") is False
+
+    def test_is_gateway_known_command_still_recognizes_builtins(self, monkeypatch):
+        """Built-in commands must remain known even when plugin discovery fails."""
+        from hermes_cli import plugins as _plugins_mod
+        from hermes_cli.commands import is_gateway_known_command
+
+        def _boom():
+            raise RuntimeError("plugin system down")
+
+        monkeypatch.setattr(_plugins_mod, "get_plugin_commands", _boom)
+
+        assert is_gateway_known_command("status") is True
+        assert is_gateway_known_command(None) is False
+        assert is_gateway_known_command("") is False
+
+    def test_plugin_enumerator_handles_missing_plugin_manager(self, monkeypatch):
+        """Enumerators must never raise when plugin discovery raises."""
+        from hermes_cli import plugins as _plugins_mod
+
+        def _boom():
+            raise RuntimeError("plugin system down")
+
+        monkeypatch.setattr(_plugins_mod, "get_plugin_commands", _boom)
+
+        # Both calls should succeed and just return the built-in set.
+        tg_names = {name for name, _desc in telegram_bot_commands()}
+        slack_names = set(slack_subcommand_map())
+        assert "status" in tg_names
+        assert "status" in slack_names
diff --git a/tests/hermes_cli/test_config_drift.py b/tests/hermes_cli/test_config_drift.py
new file mode 100644
index 000000000..6fa96042c
--- /dev/null
+++ b/tests/hermes_cli/test_config_drift.py
@@ -0,0 +1,36 @@
+"""Regression tests for removed dead config keys.
+
+This file guards against accidental re-introduction of config keys that were
+documented or declared at some point but never actually wired up to read code.
+Future dead-config regressions can accumulate here.
+"""
+
+import inspect
+
+
+def test_delegation_default_toolsets_removed_from_cli_config():
+    """delegation.default_toolsets was dead config — never read by
+    _load_config() or anywhere else. Removed.
+
+    Guards against accidental re-introduction in cli.py's CLI_CONFIG default
+    dict. If this test fails, someone re-added the key without wiring it up
+    to _load_config() in tools/delegate_tool.py.
+
+    We inspect the source of load_cli_config() instead of asserting on the
+    runtime CLI_CONFIG dict because CLI_CONFIG is populated by deep-merging
+    the user's ~/.hermes/config.yaml over the defaults (cli.py:359-366).
+    A contributor who still has the legacy key set in their own config
+    would cause a false failure, and HERMES_HOME patching via conftest
+    doesn't help because cli._hermes_home is frozen at module import time
+    (cli.py:76) — before any autouse fixture can fire. Source inspection
+    sidesteps all of that: it tests the defaults literal directly.
+    """
+    from cli import load_cli_config
+
+    source = inspect.getsource(load_cli_config)
+    assert '"default_toolsets"' not in source, (
+        "delegation.default_toolsets was removed because it was never read. "
+        "Do not re-add it to cli.py's CLI_CONFIG default dict; "
+        "use tools/delegate_tool.py's DEFAULT_TOOLSETS module constant or "
+        "wire a new config key through _load_config()."
+    )
diff --git a/tests/hermes_cli/test_debug.py b/tests/hermes_cli/test_debug.py
index 021660cbb..4bba56867 100644
--- a/tests/hermes_cli/test_debug.py
+++ b/tests/hermes_cli/test_debug.py
@@ -137,50 +137,105 @@ class TestUploadToPastebin:
 # Log reading
 # ---------------------------------------------------------------------------
 
-class TestReadFullLog:
-    """Test _read_full_log for standalone log uploads."""
+class TestCaptureLogSnapshot:
+    """Test _capture_log_snapshot for log reading and truncation."""
 
     def test_reads_small_file(self, hermes_home):
-        from hermes_cli.debug import _read_full_log
+        from hermes_cli.debug import _capture_log_snapshot
 
-        content = _read_full_log("agent")
-        assert content is not None
-        assert "session started" in content
+        snap = _capture_log_snapshot("agent", tail_lines=10)
+        assert snap.full_text is not None
+        assert "session started" in snap.full_text
+        assert "session started" in snap.tail_text
 
     def test_returns_none_for_missing(self, tmp_path, monkeypatch):
         home = tmp_path / ".hermes"
         home.mkdir()
         monkeypatch.setenv("HERMES_HOME", str(home))
 
-        from hermes_cli.debug import _read_full_log
-        assert _read_full_log("agent") is None
+        from hermes_cli.debug import _capture_log_snapshot
+        snap = _capture_log_snapshot("agent", tail_lines=10)
+        assert snap.full_text is None
+        assert snap.tail_text == "(file not found)"
 
-    def test_returns_none_for_empty(self, hermes_home):
-        # Truncate agent.log to empty
+    def test_empty_primary_reports_file_empty(self, hermes_home):
+        """Empty primary (no .1 fallback) surfaces as '(file empty)', not missing."""
         (hermes_home / "logs" / "agent.log").write_text("")
 
-        from hermes_cli.debug import _read_full_log
-        assert _read_full_log("agent") is None
+        from hermes_cli.debug import _capture_log_snapshot
+        snap = _capture_log_snapshot("agent", tail_lines=10)
+        assert snap.full_text is None
+        assert snap.tail_text == "(file empty)"
+
+    def test_race_truncate_after_resolve_reports_empty(self, hermes_home, monkeypatch):
+        """If the log is truncated between resolve and stat, say 'empty', not 'missing'."""
+        log_path = hermes_home / "logs" / "agent.log"
+        from hermes_cli import debug
+
+        monkeypatch.setattr(debug, "_resolve_log_path", lambda _name: log_path)
+        log_path.write_text("")
+
+        snap = debug._capture_log_snapshot("agent", tail_lines=10)
+        assert snap.path == log_path
+        assert snap.full_text is None
+        assert snap.tail_text == "(file empty)"
 
     def test_truncates_large_file(self, hermes_home):
         """Files larger than max_bytes get tail-truncated."""
-        from hermes_cli.debug import _read_full_log
+        from hermes_cli.debug import _capture_log_snapshot
 
         # Write a file larger than 1KB
         big_content = "x" * 100 + "\n"
         (hermes_home / "logs" / "agent.log").write_text(big_content * 200)
 
-        content = _read_full_log("agent", max_bytes=1024)
-        assert content is not None
-        assert "truncated" in content
+        snap = _capture_log_snapshot("agent", tail_lines=10, max_bytes=1024)
+        assert snap.full_text is not None
+        assert "truncated" in snap.full_text
+
+    def test_keeps_first_line_when_truncation_on_boundary(self, hermes_home):
+        """When truncation lands on a line boundary, keep the first full line."""
+        from hermes_cli.debug import _capture_log_snapshot
+
+        # File must exceed the initial chunk_size (8192) used by the
+        # backward-reading loop so the truncation path actually fires.
+        line = "A" * 99 + "\n"  # 100 bytes per line
+        num_lines = 200  # 20000 bytes
+        (hermes_home / "logs" / "agent.log").write_text(line * num_lines)
+
+        # max_bytes = 1000 = 100 * 10 → cut at byte 20000 - 1000 = 19000,
+        # and byte 19000 - 1 is '\n'.  Boundary hit → keep all 10 lines.
+        snap = _capture_log_snapshot("agent", tail_lines=5, max_bytes=1000)
+        assert snap.full_text is not None
+        assert "truncated" in snap.full_text
+        raw = snap.full_text.split("\n", 1)[1]
+        kept = [l for l in raw.strip().splitlines() if l.startswith("A")]
+        assert len(kept) == 10
+
+    def test_drops_partial_when_truncation_mid_line(self, hermes_home):
+        """When truncation lands mid-line, drop the partial fragment."""
+        from hermes_cli.debug import _capture_log_snapshot
+
+        line = "A" * 99 + "\n"  # 100 bytes per line
+        num_lines = 200  # 20000 bytes
+        (hermes_home / "logs" / "agent.log").write_text(line * num_lines)
+
+        # max_bytes = 950 doesn't divide evenly into 100 → mid-line cut.
+        snap = _capture_log_snapshot("agent", tail_lines=5, max_bytes=950)
+        assert snap.full_text is not None
+        assert "truncated" in snap.full_text
+        raw = snap.full_text.split("\n", 1)[1]
+        kept = [l for l in raw.strip().splitlines() if l.startswith("A")]
+        # 950 / 100 = 9.5 → 9 complete lines after dropping partial
+        assert len(kept) == 9
 
     def test_unknown_log_returns_none(self, hermes_home):
-        from hermes_cli.debug import _read_full_log
-        assert _read_full_log("nonexistent") is None
+        from hermes_cli.debug import _capture_log_snapshot
+        snap = _capture_log_snapshot("nonexistent", tail_lines=10)
+        assert snap.full_text is None
 
     def test_falls_back_to_rotated_file(self, hermes_home):
         """When gateway.log doesn't exist, falls back to gateway.log.1."""
-        from hermes_cli.debug import _read_full_log
+        from hermes_cli.debug import _capture_log_snapshot
 
         logs_dir = hermes_home / "logs"
         # Remove the primary (if any) and create a .1 rotation
@@ -189,33 +244,33 @@ class TestReadFullLog:
             "2026-04-12 10:00:00 INFO gateway.run: rotated content\n"
         )
 
-        content = _read_full_log("gateway")
-        assert content is not None
-        assert "rotated content" in content
+        snap = _capture_log_snapshot("gateway", tail_lines=10)
+        assert snap.full_text is not None
+        assert "rotated content" in snap.full_text
 
     def test_prefers_primary_over_rotated(self, hermes_home):
         """Primary log is used when it exists, even if .1 also exists."""
-        from hermes_cli.debug import _read_full_log
+        from hermes_cli.debug import _capture_log_snapshot
 
         logs_dir = hermes_home / "logs"
         (logs_dir / "gateway.log").write_text("primary content\n")
         (logs_dir / "gateway.log.1").write_text("rotated content\n")
 
-        content = _read_full_log("gateway")
-        assert "primary content" in content
-        assert "rotated" not in content
+        snap = _capture_log_snapshot("gateway", tail_lines=10)
+        assert "primary content" in snap.full_text
+        assert "rotated" not in snap.full_text
 
     def test_falls_back_when_primary_empty(self, hermes_home):
         """Empty primary log falls back to .1 rotation."""
-        from hermes_cli.debug import _read_full_log
+        from hermes_cli.debug import _capture_log_snapshot
 
         logs_dir = hermes_home / "logs"
         (logs_dir / "agent.log").write_text("")
         (logs_dir / "agent.log.1").write_text("rotated agent data\n")
 
-        content = _read_full_log("agent")
-        assert content is not None
-        assert "rotated agent data" in content
+        snap = _capture_log_snapshot("agent", tail_lines=10)
+        assert snap.full_text is not None
+        assert "rotated agent data" in snap.full_text
 
 
 # ---------------------------------------------------------------------------
@@ -283,6 +338,44 @@ class TestCollectDebugReport:
 class TestRunDebugShare:
     """Test the run_debug_share CLI handler."""
 
+    def test_share_sweeps_expired_pastes(self, hermes_home, capsys):
+        """Slash-command path should sweep old pending deletes before uploading."""
+        from hermes_cli.debug import run_debug_share
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+
+        with patch("hermes_cli.dump.run_dump"), \
+             patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)) as mock_sweep, \
+             patch("hermes_cli.debug.upload_to_pastebin",
+                    return_value="https://paste.rs/test"):
+            run_debug_share(args)
+
+        mock_sweep.assert_called_once()
+        assert "Debug report uploaded" in capsys.readouterr().out
+
+    def test_share_survives_sweep_failure(self, hermes_home, capsys):
+        """Expired-paste cleanup is best-effort and must not block sharing."""
+        from hermes_cli.debug import run_debug_share
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+
+        with patch("hermes_cli.dump.run_dump"), \
+             patch(
+                 "hermes_cli.debug._sweep_expired_pastes",
+                 side_effect=RuntimeError("offline"),
+             ), \
+             patch("hermes_cli.debug.upload_to_pastebin",
+                    return_value="https://paste.rs/test"):
+            run_debug_share(args)
+
+        assert "https://paste.rs/test" in capsys.readouterr().out
+
     def test_local_flag_prints_full_logs(self, hermes_home, capsys):
         """--local prints the report plus full log contents."""
         from hermes_cli.debug import run_debug_share
@@ -340,6 +433,55 @@ class TestRunDebugShare:
         assert "--- hermes dump ---" in gateway_paste
         assert "--- full gateway.log ---" in gateway_paste
 
+    def test_share_keeps_report_and_full_log_on_same_snapshot(self, hermes_home, capsys):
+        """A mid-run rotation must not make full agent.log older than the report."""
+        from hermes_cli.debug import run_debug_share, collect_debug_report as real_collect_debug_report
+
+        logs_dir = hermes_home / "logs"
+        (logs_dir / "agent.log").write_text(
+            "2026-04-22 12:00:00 INFO agent: newest line\n"
+        )
+        (logs_dir / "agent.log.1").write_text(
+            "2026-04-10 12:00:00 INFO agent: old rotated line\n"
+        )
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+
+        uploaded_content = []
+
+        def _mock_upload(content, expiry_days=7):
+            uploaded_content.append(content)
+            return f"https://paste.rs/paste{len(uploaded_content)}"
+
+        def _wrapped_collect_debug_report(*, log_lines=200, dump_text="", log_snapshots=None):
+            report = real_collect_debug_report(
+                log_lines=log_lines,
+                dump_text=dump_text,
+                log_snapshots=log_snapshots,
+            )
+            # Simulate the live log rotating after the report is built but
+            # before the old implementation would have re-read agent.log for
+            # standalone upload.
+            (logs_dir / "agent.log").write_text("")
+            (logs_dir / "agent.log.1").write_text(
+                "2026-04-10 12:00:00 INFO agent: old rotated line\n"
+            )
+            return report
+
+        with patch("hermes_cli.dump.run_dump"), \
+             patch("hermes_cli.debug.collect_debug_report", side_effect=_wrapped_collect_debug_report), \
+             patch("hermes_cli.debug.upload_to_pastebin", side_effect=_mock_upload):
+            run_debug_share(args)
+
+        report_paste = uploaded_content[0]
+        agent_paste = uploaded_content[1]
+        assert "2026-04-22 12:00:00 INFO agent: newest line" in report_paste
+        assert "2026-04-22 12:00:00 INFO agent: newest line" in agent_paste
+        assert "old rotated line" not in agent_paste
+
     def test_share_skips_missing_logs(self, tmp_path, monkeypatch, capsys):
         """Only uploads logs that exist."""
         home = tmp_path / ".hermes"
diff --git a/tests/hermes_cli/test_env_loader.py b/tests/hermes_cli/test_env_loader.py
index b85ef4bec..f94649a63 100644
--- a/tests/hermes_cli/test_env_loader.py
+++ b/tests/hermes_cli/test_env_loader.py
@@ -33,6 +33,25 @@ def test_project_env_overrides_stale_shell_values_when_user_env_missing(tmp_path
     assert os.getenv("OPENAI_BASE_URL") == "https://project.example/v1"
 
 
+def test_project_env_is_sanitized_before_loading(tmp_path, monkeypatch):
+    home = tmp_path / "hermes"
+    project_env = tmp_path / ".env"
+    project_env.write_text(
+        "TELEGRAM_BOT_TOKEN=8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
+        "ANTHROPIC_API_KEY=sk-ant-test123\n",
+        encoding="utf-8",
+    )
+
+    monkeypatch.delenv("TELEGRAM_BOT_TOKEN", raising=False)
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+
+    loaded = load_hermes_dotenv(hermes_home=home, project_env=project_env)
+
+    assert loaded == [project_env]
+    assert os.getenv("TELEGRAM_BOT_TOKEN") == "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
+    assert os.getenv("ANTHROPIC_API_KEY") == "sk-ant-test123"
+
+
 def test_user_env_takes_precedence_over_project_env(tmp_path, monkeypatch):
     home = tmp_path / "hermes"
     home.mkdir()
diff --git a/tests/hermes_cli/test_gateway.py b/tests/hermes_cli/test_gateway.py
index 07265b2c3..9dea51987 100644
--- a/tests/hermes_cli/test_gateway.py
+++ b/tests/hermes_cli/test_gateway.py
@@ -121,6 +121,12 @@ def test_systemd_status_warns_when_linger_disabled(monkeypatch, tmp_path, capsys
             return SimpleNamespace(returncode=0, stdout="", stderr="")
         if cmd[:3] == ["systemctl", "--user", "is-active"]:
             return SimpleNamespace(returncode=0, stdout="active\n", stderr="")
+        if cmd[:3] == ["systemctl", "--user", "show"]:
+            return SimpleNamespace(
+                returncode=0,
+                stdout="ActiveState=active\nSubState=running\nResult=success\nExecMainStatus=0\n",
+                stderr="",
+            )
         raise AssertionError(f"Unexpected command: {cmd}")
 
     monkeypatch.setattr(gateway.subprocess, "run", fake_run)
@@ -352,3 +358,24 @@ class TestWaitForGatewayExit:
 
         assert killed == 2
         assert calls == [(11, True), (22, True)]
+
+
+class TestStopProfileGateway:
+    def test_stop_profile_gateway_keeps_pid_file_when_process_still_running(self, monkeypatch):
+        calls = {"kill": 0, "remove": 0}
+
+        monkeypatch.setattr("gateway.status.get_running_pid", lambda: 12345)
+        monkeypatch.setattr(
+            gateway.os,
+            "kill",
+            lambda pid, sig: calls.__setitem__("kill", calls["kill"] + 1),
+        )
+        monkeypatch.setattr("time.sleep", lambda _: None)
+        monkeypatch.setattr(
+            "gateway.status.remove_pid_file",
+            lambda: calls.__setitem__("remove", calls["remove"] + 1),
+        )
+
+        assert gateway.stop_profile_gateway() is True
+        assert calls["kill"] == 21
+        assert calls["remove"] == 0
diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py
index 3c03aab7e..fda893e1e 100644
--- a/tests/hermes_cli/test_gateway_service.py
+++ b/tests/hermes_cli/test_gateway_service.py
@@ -77,8 +77,10 @@ class TestSystemdServiceRefresh:
         gateway_cli.systemd_restart()
 
         assert unit_path.read_text(encoding="utf-8") == "new unit\n"
-        assert calls[:2] == [
+        assert calls[:4] == [
             ["systemctl", "--user", "daemon-reload"],
+            ["systemctl", "--user", "show", gateway_cli.get_service_name(), "--no-pager", "--property", "ActiveState,SubState,Result,ExecMainStatus"],
+            ["systemctl", "--user", "reset-failed", gateway_cli.get_service_name()],
             ["systemctl", "--user", "reload-or-restart", gateway_cli.get_service_name()],
         ]
 
@@ -474,13 +476,21 @@ class TestGatewaySystemServiceRouting:
                 raise ProcessLookupError()
         monkeypatch.setattr(os, "kill", fake_kill)
 
-        # Simulate systemctl is-active returning "active" with a new PID
+        # Simulate systemctl reset-failed/start followed by an active unit
         new_pid = [None]
         def fake_subprocess_run(cmd, **kwargs):
-            if "is-active" in cmd:
-                result = SimpleNamespace(stdout="active\n", returncode=0)
-                new_pid[0] = 999  # new PID
-                return result
+            if "reset-failed" in cmd:
+                calls.append(("reset-failed", cmd))
+                return SimpleNamespace(stdout="", returncode=0)
+            if "start" in cmd:
+                calls.append(("start", cmd))
+                return SimpleNamespace(stdout="", returncode=0)
+            if "show" in cmd:
+                new_pid[0] = 999
+                return SimpleNamespace(
+                    stdout="ActiveState=active\nSubState=running\nResult=success\nExecMainStatus=0\n",
+                    returncode=0,
+                )
             raise AssertionError(f"Unexpected systemctl call: {cmd}")
 
         monkeypatch.setattr(gateway_cli.subprocess, "run", fake_subprocess_run)
@@ -494,9 +504,131 @@ class TestGatewaySystemServiceRouting:
         gateway_cli.systemd_restart()
 
         assert ("self", 654) in calls
+        assert any(call[0] == "reset-failed" for call in calls)
+        assert any(call[0] == "start" for call in calls)
         out = capsys.readouterr().out.lower()
         assert "restarted" in out
 
+    def test_systemd_restart_recovers_failed_planned_restart(self, monkeypatch, capsys):
+        monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
+        monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
+        monkeypatch.setattr(
+            "gateway.status.read_runtime_status",
+            lambda: {"restart_requested": True, "gateway_state": "stopped"},
+        )
+        monkeypatch.setattr(gateway_cli, "_request_gateway_self_restart", lambda pid: False)
+
+        calls = []
+        started = {"value": False}
+
+        def fake_subprocess_run(cmd, **kwargs):
+            if "show" in cmd:
+                if not started["value"]:
+                    return SimpleNamespace(
+                        stdout=(
+                            "ActiveState=failed\n"
+                            "SubState=failed\n"
+                            "Result=exit-code\n"
+                            f"ExecMainStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}\n"
+                        ),
+                        returncode=0,
+                    )
+                return SimpleNamespace(
+                    stdout="ActiveState=active\nSubState=running\nResult=success\nExecMainStatus=0\n",
+                    returncode=0,
+                )
+            if "reset-failed" in cmd:
+                calls.append(("reset-failed", cmd))
+                return SimpleNamespace(stdout="", returncode=0)
+            if "start" in cmd:
+                started["value"] = True
+                calls.append(("start", cmd))
+                return SimpleNamespace(stdout="", returncode=0)
+            raise AssertionError(f"Unexpected command: {cmd}")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_subprocess_run)
+        monkeypatch.setattr(
+            "gateway.status.get_running_pid",
+            lambda: 999 if started["value"] else None,
+        )
+
+        gateway_cli.systemd_restart()
+
+        assert any(call[0] == "reset-failed" for call in calls)
+        assert any(call[0] == "start" for call in calls)
+        out = capsys.readouterr().out.lower()
+        assert "restarted" in out
+
+    def test_systemd_status_surfaces_planned_restart_failure(self, monkeypatch, capsys):
+        unit = SimpleNamespace(exists=lambda: True)
+        monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
+        monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit)
+        monkeypatch.setattr(gateway_cli, "has_conflicting_systemd_units", lambda: False)
+        monkeypatch.setattr(gateway_cli, "has_legacy_hermes_units", lambda: False)
+        monkeypatch.setattr(gateway_cli, "systemd_unit_is_current", lambda system=False: True)
+        monkeypatch.setattr(gateway_cli, "_runtime_health_lines", lambda: ["⚠ Last shutdown reason: Gateway restart requested"])
+        monkeypatch.setattr(gateway_cli, "get_systemd_linger_status", lambda: (True, ""))
+        monkeypatch.setattr(gateway_cli, "_read_systemd_unit_properties", lambda system=False: {
+            "ActiveState": "failed",
+            "SubState": "failed",
+            "Result": "exit-code",
+            "ExecMainStatus": str(GATEWAY_SERVICE_RESTART_EXIT_CODE),
+        })
+
+        calls = []
+
+        def fake_run_systemctl(args, **kwargs):
+            calls.append(args)
+            if args[:2] == ["status", gateway_cli.get_service_name()]:
+                return SimpleNamespace(returncode=0, stdout="", stderr="")
+            if args[:2] == ["is-active", gateway_cli.get_service_name()]:
+                return SimpleNamespace(returncode=3, stdout="failed\n", stderr="")
+            raise AssertionError(f"Unexpected args: {args}")
+
+        monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
+
+        gateway_cli.systemd_status()
+
+        out = capsys.readouterr().out
+        assert "Planned restart is stuck in systemd failed state" in out
+
+    def test_gateway_status_dispatches_full_flag(self, monkeypatch):
+        user_unit = SimpleNamespace(exists=lambda: True)
+        system_unit = SimpleNamespace(exists=lambda: False)
+
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(
+            gateway_cli,
+            "get_systemd_unit_path",
+            lambda system=False: system_unit if system else user_unit,
+        )
+        monkeypatch.setattr(
+            gateway_cli,
+            "get_gateway_runtime_snapshot",
+            lambda system=False: gateway_cli.GatewayRuntimeSnapshot(
+                manager="systemd (user)",
+                service_installed=True,
+                service_running=False,
+                gateway_pids=(),
+                service_scope="user",
+            ),
+        )
+
+        calls = []
+        monkeypatch.setattr(
+            gateway_cli,
+            "systemd_status",
+            lambda deep=False, system=False, full=False: calls.append((deep, system, full)),
+        )
+
+        gateway_cli.gateway_command(
+            SimpleNamespace(gateway_command="status", deep=False, system=False, full=True)
+        )
+
+        assert calls == [(False, False, True)]
+
     def test_gateway_install_passes_system_flags(self, monkeypatch):
         monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
         monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
@@ -547,11 +679,15 @@ class TestGatewaySystemServiceRouting:
         )
 
         calls = []
-        monkeypatch.setattr(gateway_cli, "systemd_status", lambda deep=False, system=False: calls.append((deep, system)))
+        monkeypatch.setattr(
+            gateway_cli,
+            "systemd_status",
+            lambda deep=False, system=False, full=False: calls.append((deep, system, full)),
+        )
 
         gateway_cli.gateway_command(SimpleNamespace(gateway_command="status", deep=False, system=False))
 
-        assert calls == [(False, False)]
+        assert calls == [(False, False, False)]
 
     def test_gateway_status_reports_manual_process_when_service_is_stopped(self, monkeypatch, capsys):
         user_unit = SimpleNamespace(exists=lambda: True)
@@ -565,7 +701,11 @@ class TestGatewaySystemServiceRouting:
             "get_systemd_unit_path",
             lambda system=False: system_unit if system else user_unit,
         )
-        monkeypatch.setattr(gateway_cli, "systemd_status", lambda deep=False, system=False: print("service stopped"))
+        monkeypatch.setattr(
+            gateway_cli,
+            "systemd_status",
+            lambda deep=False, system=False, full=False: print("service stopped"),
+        )
         monkeypatch.setattr(
             gateway_cli,
             "get_gateway_runtime_snapshot",
@@ -1570,6 +1710,23 @@ class TestMigrateLegacyCommand:
 
         assert called == {"interactive": False, "dry_run": False}
 
+
+class TestGatewayStatusParser:
+    def test_gateway_status_subparser_accepts_full_flag(self):
+        import subprocess
+        import sys
+
+        result = subprocess.run(
+            [sys.executable, "-m", "hermes_cli.main", "gateway", "status", "-l", "--help"],
+            cwd=str(gateway_cli.PROJECT_ROOT),
+            capture_output=True,
+            text=True,
+            timeout=15,
+        )
+
+        assert result.returncode == 0
+        assert "unrecognized arguments" not in result.stderr
+
     def test_gateway_command_migrate_legacy_dry_run_passes_through(
         self, monkeypatch
     ):
diff --git a/tests/hermes_cli/test_image_gen_picker.py b/tests/hermes_cli/test_image_gen_picker.py
new file mode 100644
index 000000000..27c502def
--- /dev/null
+++ b/tests/hermes_cli/test_image_gen_picker.py
@@ -0,0 +1,174 @@
+"""Tests for plugin image_gen providers injecting themselves into the picker.
+
+Covers `_plugin_image_gen_providers`, `_visible_providers`, and
+`_toolset_needs_configuration_prompt` handling of plugin providers.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agent import image_gen_registry
+from agent.image_gen_provider import ImageGenProvider
+
+
+class _FakeProvider(ImageGenProvider):
+    def __init__(self, name: str, available: bool = True, schema=None, models=None):
+        self._name = name
+        self._available = available
+        self._schema = schema or {
+            "name": name.title(),
+            "badge": "test",
+            "tag": f"{name} test tag",
+            "env_vars": [{"key": f"{name.upper()}_API_KEY", "prompt": f"{name} key"}],
+        }
+        self._models = models or [
+            {"id": f"{name}-model-v1", "display": f"{name} v1",
+             "speed": "~5s", "strengths": "test", "price": "$"},
+        ]
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def is_available(self) -> bool:
+        return self._available
+
+    def list_models(self):
+        return list(self._models)
+
+    def default_model(self):
+        return self._models[0]["id"] if self._models else None
+
+    def get_setup_schema(self):
+        return dict(self._schema)
+
+    def generate(self, prompt, aspect_ratio="landscape", **kw):
+        return {"success": True, "image": f"{self._name}://{prompt}"}
+
+
+@pytest.fixture(autouse=True)
+def _reset_registry():
+    image_gen_registry._reset_for_tests()
+    yield
+    image_gen_registry._reset_for_tests()
+
+
+class TestPluginPickerInjection:
+    def test_plugin_providers_returns_registered(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("myimg"))
+
+        rows = tools_config._plugin_image_gen_providers()
+        names = [r["name"] for r in rows]
+        plugin_names = [r.get("image_gen_plugin_name") for r in rows]
+
+        assert "Myimg" in names
+        assert "myimg" in plugin_names
+
+    def test_fal_skipped_to_avoid_duplicate(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        # Simulate a FAL plugin being registered — the picker already has
+        # hardcoded FAL rows in TOOL_CATEGORIES, so plugin-FAL must be
+        # skipped to avoid showing FAL twice.
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        image_gen_registry.register_provider(_FakeProvider("openai"))
+
+        rows = tools_config._plugin_image_gen_providers()
+        names = [r.get("image_gen_plugin_name") for r in rows]
+        assert "fal" not in names
+        assert "openai" in names
+
+    def test_visible_providers_includes_plugins_for_image_gen(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("someimg"))
+
+        cat = tools_config.TOOL_CATEGORIES["image_gen"]
+        visible = tools_config._visible_providers(cat, {})
+        plugin_names = [p.get("image_gen_plugin_name") for p in visible if p.get("image_gen_plugin_name")]
+        assert "someimg" in plugin_names
+
+    def test_visible_providers_does_not_inject_into_other_categories(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("someimg"))
+
+        # Browser category must NOT see image_gen plugins.
+        browser = tools_config.TOOL_CATEGORIES["browser"]
+        visible = tools_config._visible_providers(browser, {})
+        assert all(p.get("image_gen_plugin_name") is None for p in visible)
+
+
+class TestPluginCatalog:
+    def test_plugin_catalog_returns_models(self):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("catimg"))
+
+        catalog, default = tools_config._plugin_image_gen_catalog("catimg")
+        assert "catimg-model-v1" in catalog
+        assert default == "catimg-model-v1"
+
+    def test_plugin_catalog_empty_for_unknown(self):
+        from hermes_cli import tools_config
+
+        catalog, default = tools_config._plugin_image_gen_catalog("does-not-exist")
+        assert catalog == {}
+        assert default is None
+
+
+class TestConfigPrompt:
+    def test_image_gen_satisfied_by_plugin_provider(self, monkeypatch, tmp_path):
+        """When a plugin provider reports is_available(), the picker should
+        not force a setup prompt on the user."""
+        from hermes_cli import tools_config
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.delenv("FAL_KEY", raising=False)
+
+        image_gen_registry.register_provider(_FakeProvider("avail-img", available=True))
+
+        assert tools_config._toolset_needs_configuration_prompt("image_gen", {}) is False
+
+    def test_image_gen_still_prompts_when_nothing_available(self, monkeypatch, tmp_path):
+        from hermes_cli import tools_config
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.delenv("FAL_KEY", raising=False)
+
+        image_gen_registry.register_provider(_FakeProvider("unavail-img", available=False))
+
+        assert tools_config._toolset_needs_configuration_prompt("image_gen", {}) is True
+
+
+class TestConfigWriting:
+    def test_picking_plugin_provider_writes_provider_and_model(self, monkeypatch, tmp_path):
+        """When a user picks a plugin-backed image_gen provider with no
+        env vars needed, ``_configure_provider`` should write both
+        ``image_gen.provider`` and ``image_gen.model``."""
+        from hermes_cli import tools_config
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        image_gen_registry.register_provider(_FakeProvider("noenv", schema={
+            "name": "NoEnv",
+            "badge": "free",
+            "tag": "",
+            "env_vars": [],
+        }))
+
+        # Stub out the interactive model picker — no TTY in tests.
+        monkeypatch.setattr(tools_config, "_prompt_choice", lambda *a, **kw: 0)
+
+        config: dict = {}
+        provider_row = {
+            "name": "NoEnv",
+            "env_vars": [],
+            "image_gen_plugin_name": "noenv",
+        }
+        tools_config._configure_provider(provider_row, config)
+
+        assert config["image_gen"]["provider"] == "noenv"
+        assert config["image_gen"]["model"] == "noenv-model-v1"
diff --git a/tests/hermes_cli/test_model_provider_persistence.py b/tests/hermes_cli/test_model_provider_persistence.py
index a06facd30..067483680 100644
--- a/tests/hermes_cli/test_model_provider_persistence.py
+++ b/tests/hermes_cli/test_model_provider_persistence.py
@@ -32,6 +32,8 @@ def config_home(tmp_path, monkeypatch):
     monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
     monkeypatch.delenv("OPENAI_API_KEY", raising=False)
     monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    monkeypatch.delenv("STEPFUN_API_KEY", raising=False)
+    monkeypatch.delenv("STEPFUN_BASE_URL", raising=False)
     return home
 
 
@@ -330,3 +332,33 @@ class TestBaseUrlValidation:
 
         saved = get_env_value("GLM_BASE_URL") or ""
         assert saved == "", "Empty input should not save a base URL"
+
+    def test_stepfun_provider_saved_with_selected_region(self, config_home, monkeypatch):
+        from hermes_cli.main import _model_flow_stepfun
+        from hermes_cli.config import load_config, get_env_value
+
+        monkeypatch.setenv("STEPFUN_API_KEY", "stepfun-test-key")
+
+        with patch(
+            "hermes_cli.main._prompt_provider_choice",
+            return_value=1,
+        ), patch(
+            "hermes_cli.models.fetch_api_models",
+            return_value=["step-3.5-flash", "step-3-agent-lite"],
+        ), patch(
+            "hermes_cli.auth._prompt_model_selection",
+            return_value="step-3-agent-lite",
+        ), patch(
+            "hermes_cli.auth.deactivate_provider",
+        ):
+            _model_flow_stepfun(load_config(), "old-model")
+
+        import yaml
+
+        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
+        model = config.get("model")
+        assert isinstance(model, dict)
+        assert model.get("provider") == "stepfun"
+        assert model.get("default") == "step-3-agent-lite"
+        assert model.get("base_url") == "https://api.stepfun.com/step_plan/v1"
+        assert get_env_value("STEPFUN_BASE_URL") == "https://api.stepfun.com/step_plan/v1"
diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py
index 65405d909..6a1a230c4 100644
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@@ -63,6 +63,11 @@ class TestParseModelInput:
         assert provider == "zai"
         assert model == "glm-5"
 
+    def test_stepfun_alias_resolved(self):
+        provider, model = parse_model_input("step:step-3.5-flash", "openrouter")
+        assert provider == "stepfun"
+        assert model == "step-3.5-flash"
+
     def test_no_slash_no_colon_keeps_provider(self):
         provider, model = parse_model_input("gpt-5.4", "openrouter")
         assert provider == "openrouter"
@@ -154,6 +159,7 @@ class TestNormalizeProvider:
         assert normalize_provider("glm") == "zai"
         assert normalize_provider("kimi") == "kimi-coding"
         assert normalize_provider("moonshot") == "kimi-coding"
+        assert normalize_provider("step") == "stepfun"
         assert normalize_provider("github-copilot") == "copilot"
 
     def test_case_insensitive(self):
@@ -164,6 +170,7 @@ class TestProviderLabel:
     def test_known_labels_and_auto(self):
         assert provider_label("anthropic") == "Anthropic"
         assert provider_label("kimi") == "Kimi / Kimi Coding Plan"
+        assert provider_label("stepfun") == "StepFun Step Plan"
         assert provider_label("copilot") == "GitHub Copilot"
         assert provider_label("copilot-acp") == "GitHub Copilot ACP"
         assert provider_label("auto") == "Auto"
@@ -193,6 +200,16 @@ class TestProviderModelIds:
     def test_zai_returns_glm_models(self):
         assert "glm-5" in provider_model_ids("zai")
 
+    def test_stepfun_prefers_live_catalog(self):
+        with patch(
+            "hermes_cli.auth.resolve_api_key_provider_credentials",
+            return_value={"api_key": "***", "base_url": "https://api.stepfun.com/step_plan/v1"},
+        ), patch(
+            "hermes_cli.models.fetch_api_models",
+            return_value=["step-3.5-flash", "step-3-agent-lite"],
+        ):
+            assert provider_model_ids("stepfun") == ["step-3.5-flash", "step-3-agent-lite"]
+
     def test_copilot_prefers_live_catalog(self):
         with patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={"api_key": "gh-token"}), \
              patch("hermes_cli.models._fetch_github_models", return_value=["gpt-5.4", "claude-sonnet-4.6"]):
@@ -457,29 +474,62 @@ class TestValidateApiNotFound:
         assert "not found" in result["message"]
 
 
-# -- validate — API unreachable — reject with guidance ----------------
+# -- validate — API unreachable — soft-accept via catalog or warning --------
 
 class TestValidateApiFallback:
-    def test_any_model_rejected_when_api_down(self):
-        result = _validate("anthropic/claude-opus-4.6", api_models=None)
-        assert result["accepted"] is False
-        assert result["persist"] is False
+    """When /models is unreachable, the validator must accept the model (with
+    a warning) rather than reject it outright — otherwise provider switches
+    fail in the gateway for any provider whose /models endpoint is down or
+    doesn't exist (e.g. opencode-go returns 404 HTML).
 
-    def test_unknown_model_also_rejected_when_api_down(self):
-        result = _validate("anthropic/claude-next-gen", api_models=None)
-        assert result["accepted"] is False
-        assert result["persist"] is False
-        assert "could not reach" in result["message"].lower()
+    Two paths:
+      1. Provider has a curated catalog (``_PROVIDER_MODELS`` / live fetch):
+         validate against it (recognized=True for known models,
+         recognized=False with 'Note:' for unknown).
+      2. Provider has no catalog: accept with a generic 'Note:' warning.
 
-    def test_zai_model_rejected_when_api_down(self):
+    In both cases ``accepted`` and ``persist`` must be True so the gateway can
+    write the ``_session_model_overrides`` entry.
+    """
+
+    def test_known_model_accepted_via_catalog_when_api_down(self):
+        # Force the openrouter catalog lookup to return a deterministic list.
+        with patch(
+            "hermes_cli.models.provider_model_ids",
+            return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
+        ):
+            result = _validate("anthropic/claude-opus-4.6", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is True
+
+    def test_unknown_model_accepted_with_note_when_api_down(self):
+        with patch(
+            "hermes_cli.models.provider_model_ids",
+            return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
+        ):
+            result = _validate("anthropic/claude-next-gen", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is False
+        # Message flags it as unverified against the catalog.
+        assert "not found" in result["message"].lower() or "note" in result["message"].lower()
+
+    def test_zai_known_model_accepted_via_catalog_when_api_down(self):
+        # glm-5 is in the zai curated catalog (_PROVIDER_MODELS["zai"]).
         result = _validate("glm-5", provider="zai", api_models=None)
-        assert result["accepted"] is False
-        assert result["persist"] is False
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is True
 
-    def test_unknown_provider_rejected_when_api_down(self):
-        result = _validate("some-model", provider="totally-unknown", api_models=None)
-        assert result["accepted"] is False
-        assert result["persist"] is False
+    def test_unknown_provider_soft_accepted_when_api_down(self):
+        # No catalog for unknown providers — soft-accept with a Note.
+        with patch("hermes_cli.models.provider_model_ids", return_value=[]):
+            result = _validate("some-model", provider="totally-unknown", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is False
+        assert "note" in result["message"].lower()
 
     def test_custom_endpoint_warns_with_probed_url_and_v1_hint(self):
         with patch(
diff --git a/tests/hermes_cli/test_models.py b/tests/hermes_cli/test_models.py
index fc86caeeb..b493fd2b6 100644
--- a/tests/hermes_cli/test_models.py
+++ b/tests/hermes_cli/test_models.py
@@ -4,7 +4,6 @@ from unittest.mock import patch, MagicMock
 
 from hermes_cli.models import (
     OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model,
-    filter_nous_free_models, _NOUS_ALLOWED_FREE_MODELS,
     is_nous_free_tier, partition_nous_models_by_tier,
     check_nous_free_tier, _FREE_TIER_CACHE_TTL,
 )
@@ -88,6 +87,131 @@ class TestFetchOpenRouterModels:
 
         assert models == OPENROUTER_MODELS
 
+    def test_filters_out_models_without_tool_support(self, monkeypatch):
+        """Models whose supported_parameters omits 'tools' must not appear in the picker.
+
+        hermes-agent is tool-calling-first — surfacing a non-tool model leads to
+        immediate runtime failures when the user selects it. Ported from
+        Kilo-Org/kilocode#9068.
+        """
+        class _Resp:
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def read(self):
+                # opus-4.6 advertises tools → kept
+                # nano-image has explicit supported_parameters that OMITS tools → dropped
+                # qwen3.6-plus advertises tools → kept
+                return (
+                    b'{"data":['
+                    b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"},'
+                    b'"supported_parameters":["temperature","tools","tool_choice"]},'
+                    b'{"id":"google/gemini-3-pro-image-preview","pricing":{"prompt":"0.00001","completion":"0.00003"},'
+                    b'"supported_parameters":["temperature","response_format"]},'
+                    b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"},'
+                    b'"supported_parameters":["tools","temperature"]}'
+                    b']}'
+                )
+
+        # Include the image-only id in the curated list so it has a chance to be surfaced.
+        monkeypatch.setattr(
+            _models_mod,
+            "OPENROUTER_MODELS",
+            [
+                ("anthropic/claude-opus-4.6", ""),
+                ("google/gemini-3-pro-image-preview", ""),
+                ("qwen/qwen3.6-plus", ""),
+            ],
+        )
+        monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
+        with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
+            models = fetch_openrouter_models(force_refresh=True)
+
+        ids = [mid for mid, _ in models]
+        assert "anthropic/claude-opus-4.6" in ids
+        assert "qwen/qwen3.6-plus" in ids
+        # Image-only model advertised supported_parameters WITHOUT tools → must be dropped.
+        assert "google/gemini-3-pro-image-preview" not in ids
+
+    def test_permissive_when_supported_parameters_missing(self, monkeypatch):
+        """Models missing the supported_parameters field keep appearing in the picker.
+
+        Some OpenRouter-compatible gateways (Nous Portal, private mirrors, older
+        catalog snapshots) don't populate supported_parameters. Treating missing
+        as 'unknown → allow' prevents the picker from silently emptying on
+        those gateways.
+        """
+        class _Resp:
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def read(self):
+                # No supported_parameters field at all on either entry.
+                return (
+                    b'{"data":['
+                    b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"}},'
+                    b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"}}'
+                    b']}'
+                )
+
+        monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
+        with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
+            models = fetch_openrouter_models(force_refresh=True)
+
+        ids = [mid for mid, _ in models]
+        assert "anthropic/claude-opus-4.6" in ids
+        assert "qwen/qwen3.6-plus" in ids
+
+
+class TestOpenRouterToolSupportHelper:
+    """Unit tests for _openrouter_model_supports_tools (Kilo port #9068)."""
+
+    def test_tools_in_supported_parameters(self):
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(
+            {"id": "x", "supported_parameters": ["temperature", "tools"]}
+        ) is True
+
+    def test_tools_missing_from_supported_parameters(self):
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(
+            {"id": "x", "supported_parameters": ["temperature", "response_format"]}
+        ) is False
+
+    def test_supported_parameters_absent_is_permissive(self):
+        """Missing field → allow (so older / non-OR gateways still work)."""
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools({"id": "x"}) is True
+
+    def test_supported_parameters_none_is_permissive(self):
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools({"id": "x", "supported_parameters": None}) is True
+
+    def test_supported_parameters_malformed_is_permissive(self):
+        """Malformed (non-list) value → allow rather than silently drop."""
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(
+            {"id": "x", "supported_parameters": "tools,temperature"}
+        ) is True
+
+    def test_non_dict_item_is_permissive(self):
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(None) is True
+        assert _openrouter_model_supports_tools("anthropic/claude-opus-4.6") is True
+
+    def test_empty_supported_parameters_list_drops_model(self):
+        """Explicit empty list → no tools → drop."""
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(
+            {"id": "x", "supported_parameters": []}
+        ) is False
+
 
 class TestFindOpenrouterSlug:
     def test_exact_match(self):
@@ -168,89 +292,6 @@ class TestDetectProviderForModel:
         assert result[0] not in ("nous",)  # nous has claude models but shouldn't be suggested
 
 
-class TestFilterNousFreeModels:
-    """Tests for filter_nous_free_models — Nous Portal free-model policy."""
-
-    _PAID = {"prompt": "0.000003", "completion": "0.000015"}
-    _FREE = {"prompt": "0", "completion": "0"}
-
-    def test_paid_models_kept(self):
-        """Regular paid models pass through unchanged."""
-        models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
-        pricing = {m: self._PAID for m in models}
-        assert filter_nous_free_models(models, pricing) == models
-
-    def test_free_non_allowlist_models_removed(self):
-        """Free models NOT in the allowlist are filtered out."""
-        models = ["anthropic/claude-opus-4.6", "arcee-ai/trinity-large-preview:free"]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "arcee-ai/trinity-large-preview:free": self._FREE,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == ["anthropic/claude-opus-4.6"]
-
-    def test_allowlist_model_kept_when_free(self):
-        """Allowlist models are kept when they report as free."""
-        models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "xiaomi/mimo-v2-pro": self._FREE,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
-
-    def test_allowlist_model_removed_when_paid(self):
-        """Allowlist models are removed when they are NOT free."""
-        models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "xiaomi/mimo-v2-pro": self._PAID,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == ["anthropic/claude-opus-4.6"]
-
-    def test_no_pricing_returns_all(self):
-        """When pricing data is unavailable, all models pass through."""
-        models = ["anthropic/claude-opus-4.6", "nvidia/nemotron-3-super-120b-a12b:free"]
-        assert filter_nous_free_models(models, {}) == models
-
-    def test_model_with_no_pricing_entry_treated_as_paid(self):
-        """A model missing from the pricing dict is kept (assumed paid)."""
-        models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
-        pricing = {"anthropic/claude-opus-4.6": self._PAID}  # gpt-5.4 not in pricing
-        result = filter_nous_free_models(models, pricing)
-        assert result == models
-
-    def test_mixed_scenario(self):
-        """End-to-end: mix of paid, free-allowed, free-disallowed, allowlist-not-free."""
-        models = [
-            "anthropic/claude-opus-4.6",       # paid, not allowlist → keep
-            "nvidia/nemotron-3-super-120b-a12b:free",  # free, not allowlist → drop
-            "xiaomi/mimo-v2-pro",              # free, allowlist → keep
-            "xiaomi/mimo-v2-omni",             # paid, allowlist → drop
-            "openai/gpt-5.4",                  # paid, not allowlist → keep
-        ]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "nvidia/nemotron-3-super-120b-a12b:free": self._FREE,
-            "xiaomi/mimo-v2-pro": self._FREE,
-            "xiaomi/mimo-v2-omni": self._PAID,
-            "openai/gpt-5.4": self._PAID,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == [
-            "anthropic/claude-opus-4.6",
-            "xiaomi/mimo-v2-pro",
-            "openai/gpt-5.4",
-        ]
-
-    def test_allowlist_contains_expected_models(self):
-        """Sanity: the allowlist has the models we expect."""
-        assert "xiaomi/mimo-v2-pro" in _NOUS_ALLOWED_FREE_MODELS
-        assert "xiaomi/mimo-v2-omni" in _NOUS_ALLOWED_FREE_MODELS
-
-
 class TestIsNousFreeTier:
     """Tests for is_nous_free_tier — account tier detection."""
 
@@ -376,3 +417,190 @@ class TestCheckNousFreeTierCache:
     def test_cache_ttl_is_short(self):
         """TTL should be short enough to catch upgrades quickly (<=5 min)."""
         assert _FREE_TIER_CACHE_TTL <= 300
+
+
+class TestNousRecommendedModels:
+    """Tests for fetch_nous_recommended_models + get_nous_recommended_aux_model."""
+
+    _SAMPLE_PAYLOAD = {
+        "paidRecommendedModels": [],
+        "freeRecommendedModels": [],
+        "paidRecommendedCompactionModel": None,
+        "paidRecommendedVisionModel": None,
+        "freeRecommendedCompactionModel": {
+            "modelName": "google/gemini-3-flash-preview",
+            "displayName": "Google: Gemini 3 Flash Preview",
+        },
+        "freeRecommendedVisionModel": {
+            "modelName": "google/gemini-3-flash-preview",
+            "displayName": "Google: Gemini 3 Flash Preview",
+        },
+    }
+
+    def setup_method(self):
+        _models_mod._nous_recommended_cache.clear()
+
+    def teardown_method(self):
+        _models_mod._nous_recommended_cache.clear()
+
+    def _mock_urlopen(self, payload):
+        """Return a context-manager mock mimicking urllib.request.urlopen()."""
+        import json as _json
+        response = MagicMock()
+        response.read.return_value = _json.dumps(payload).encode()
+        cm = MagicMock()
+        cm.__enter__.return_value = response
+        cm.__exit__.return_value = False
+        return cm
+
+    def test_fetch_caches_per_portal_url(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
+        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
+            a = fetch_nous_recommended_models("https://portal.example.com")
+            b = fetch_nous_recommended_models("https://portal.example.com")
+        assert a == self._SAMPLE_PAYLOAD
+        assert b == self._SAMPLE_PAYLOAD
+        assert mock_urlopen.call_count == 1  # second call served from cache
+
+    def test_fetch_cache_is_keyed_per_portal(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
+        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
+            fetch_nous_recommended_models("https://portal.example.com")
+            fetch_nous_recommended_models("https://portal.staging-nousresearch.com")
+        assert mock_urlopen.call_count == 2  # different portals → separate fetches
+
+    def test_fetch_returns_empty_on_network_failure(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        with patch("urllib.request.urlopen", side_effect=OSError("boom")):
+            result = fetch_nous_recommended_models("https://portal.example.com")
+        assert result == {}
+
+    def test_fetch_force_refresh_bypasses_cache(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
+        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
+            fetch_nous_recommended_models("https://portal.example.com")
+            fetch_nous_recommended_models("https://portal.example.com", force_refresh=True)
+        assert mock_urlopen.call_count == 2
+
+    def test_get_aux_model_returns_vision_recommendation(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=self._SAMPLE_PAYLOAD,
+        ):
+            # Free tier → free vision recommendation.
+            model = get_nous_recommended_aux_model(vision=True, free_tier=True)
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_get_aux_model_returns_compaction_recommendation(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = dict(self._SAMPLE_PAYLOAD)
+        payload["freeRecommendedCompactionModel"] = {"modelName": "minimax/minimax-m2.7"}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=payload,
+        ):
+            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
+        assert model == "minimax/minimax-m2.7"
+
+    def test_get_aux_model_returns_none_when_field_null(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = dict(self._SAMPLE_PAYLOAD)
+        payload["freeRecommendedCompactionModel"] = None
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=payload,
+        ):
+            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
+        assert model is None
+
+    def test_get_aux_model_returns_none_on_empty_payload(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
+            assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
+            assert get_nous_recommended_aux_model(vision=True, free_tier=False) is None
+
+    def test_get_aux_model_returns_none_when_modelname_blank(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {"freeRecommendedCompactionModel": {"modelName": "  "}}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=payload,
+        ):
+            assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
+
+    def test_paid_tier_prefers_paid_recommendation(self):
+        """Paid-tier users should get the paid model when it's populated."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
+            "freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
+            "paidRecommendedVisionModel": {"modelName": "openai/gpt-5.4"},
+            "freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
+        }
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
+            text = get_nous_recommended_aux_model(vision=False, free_tier=False)
+            vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
+        assert text == "anthropic/claude-opus-4.7"
+        assert vision == "openai/gpt-5.4"
+
+    def test_paid_tier_falls_back_to_free_when_paid_is_null(self):
+        """If the Portal returns null for the paid field, fall back to free."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": None,
+            "freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
+            "paidRecommendedVisionModel": None,
+            "freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
+        }
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
+            text = get_nous_recommended_aux_model(vision=False, free_tier=False)
+            vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
+        assert text == "google/gemini-3-flash-preview"
+        assert vision == "google/gemini-3-flash-preview"
+
+    def test_free_tier_never_uses_paid_recommendation(self):
+        """Free-tier users must not get paid-only recommendations."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
+            "freeRecommendedCompactionModel": None,  # no free recommendation
+        }
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
+            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
+        # Free tier must return None — never leak the paid model.
+        assert model is None
+
+    def test_auto_detects_tier_when_not_supplied(self):
+        """Default behaviour: call check_nous_free_tier() to pick the tier."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "paid-model"},
+            "freeRecommendedCompactionModel": {"modelName": "free-model"},
+        }
+        with (
+            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
+            patch("hermes_cli.models.check_nous_free_tier", return_value=True),
+        ):
+            assert get_nous_recommended_aux_model(vision=False) == "free-model"
+        with (
+            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
+            patch("hermes_cli.models.check_nous_free_tier", return_value=False),
+        ):
+            assert get_nous_recommended_aux_model(vision=False) == "paid-model"
+
+    def test_tier_detection_error_defaults_to_paid(self):
+        """If tier detection raises, assume paid so we don't downgrade silently."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "paid-model"},
+            "freeRecommendedCompactionModel": {"modelName": "free-model"},
+        }
+        with (
+            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
+            patch("hermes_cli.models.check_nous_free_tier", side_effect=RuntimeError("boom")),
+        ):
+            assert get_nous_recommended_aux_model(vision=False) == "paid-model"
diff --git a/tests/hermes_cli/test_models_dev_preferred_merge.py b/tests/hermes_cli/test_models_dev_preferred_merge.py
new file mode 100644
index 000000000..0345643f3
--- /dev/null
+++ b/tests/hermes_cli/test_models_dev_preferred_merge.py
@@ -0,0 +1,124 @@
+"""Tests for the models.dev-preferred merge behavior in provider_model_ids
+and list_authenticated_providers.
+
+These guard the contract:
+
+  * For providers in ``_MODELS_DEV_PREFERRED`` (opencode-go, opencode-zen,
+    xiaomi, deepseek, smaller inference providers), both the CLI model
+    picker path (``provider_model_ids``) and the gateway ``/model`` picker
+    path (``list_authenticated_providers``) merge fresh models.dev entries
+    on top of the curated static list.
+  * OpenRouter and Nous Portal are NEVER merged — they keep their curated
+    (OpenRouter) or live-Portal (Nous) semantics.
+  * If models.dev is unreachable (offline / CI), the curated list is the
+    fallback — no crash, no empty list.
+
+Merging is what lets new models (e.g. ``mimo-v2.5-pro`` on opencode-go)
+appear in ``/model`` without a Hermes release.
+"""
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.models import (
+    _MODELS_DEV_PREFERRED,
+    _merge_with_models_dev,
+    provider_model_ids,
+)
+
+
+class TestMergeHelper:
+    def test_merge_empty_mdev_returns_curated(self):
+        """When models.dev returns nothing, curated list is preserved verbatim."""
+        with patch("agent.models_dev.list_agentic_models", return_value=[]):
+            out = _merge_with_models_dev("opencode-go", ["mimo-v2-pro", "kimi-k2.6"])
+        assert out == ["mimo-v2-pro", "kimi-k2.6"]
+
+    def test_merge_mdev_raises_returns_curated(self):
+        """Offline / broken models.dev must not break the catalog path."""
+        def boom(_provider):
+            raise RuntimeError("network down")
+
+        with patch("agent.models_dev.list_agentic_models", side_effect=boom):
+            out = _merge_with_models_dev("opencode-go", ["mimo-v2-pro"])
+        assert out == ["mimo-v2-pro"]
+
+    def test_merge_mdev_first_then_curated_extras(self):
+        """models.dev entries come first; curated-only entries are appended."""
+        mdev = ["mimo-v2.5-pro", "mimo-v2-pro", "kimi-k2.6"]
+        curated = ["kimi-k2.6", "kimi-k2.5", "mimo-v2-pro"]  # kimi-k2.5 is curated-only
+        with patch("agent.models_dev.list_agentic_models", return_value=mdev):
+            out = _merge_with_models_dev("opencode-go", curated)
+        # models.dev entries first (in order), then curated-only entries
+        assert out == ["mimo-v2.5-pro", "mimo-v2-pro", "kimi-k2.6", "kimi-k2.5"]
+
+    def test_merge_case_insensitive_dedup(self):
+        """Dedup is case-insensitive but preserves the first occurrence's casing."""
+        mdev = ["MiniMax-M2.7"]
+        curated = ["minimax-m2.7", "minimax-m2.5"]
+        with patch("agent.models_dev.list_agentic_models", return_value=mdev):
+            out = _merge_with_models_dev("minimax", curated)
+        # models.dev casing wins since it came first
+        assert out == ["MiniMax-M2.7", "minimax-m2.5"]
+
+
+class TestProviderModelIdsPreferred:
+    def test_opencode_go_is_preferred(self):
+        assert "opencode-go" in _MODELS_DEV_PREFERRED
+
+    def test_opencode_go_includes_fresh_models_dev_entries(self):
+        """provider_model_ids('opencode-go') adds models.dev entries on top."""
+        mdev = ["mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "kimi-k2.6"]
+        with patch("agent.models_dev.list_agentic_models", return_value=mdev):
+            out = provider_model_ids("opencode-go")
+        # Fresh models must surface (this is exactly the reported bug fix:
+        # mimo-v2.5-pro should be pickable on opencode-go).
+        assert "mimo-v2.5-pro" in out
+        assert "mimo-v2.5" in out
+        # Curated entries are still present.
+        assert "mimo-v2-pro" in out
+        assert "kimi-k2.6" in out
+
+    def test_opencode_go_offline_falls_back_to_curated(self):
+        """Offline models.dev → curated-only list, no crash."""
+        with patch("agent.models_dev.list_agentic_models", return_value=[]):
+            out = provider_model_ids("opencode-go")
+        # Curated floor (see hermes_cli/models.py _PROVIDER_MODELS["opencode-go"])
+        assert "mimo-v2-pro" in out
+        assert "kimi-k2.6" in out
+
+    def test_opencode_zen_includes_fresh_models(self):
+        """opencode-zen follows the same pattern as opencode-go."""
+        assert "opencode-zen" in _MODELS_DEV_PREFERRED
+        mdev = ["claude-opus-4-7", "kimi-k2.6", "glm-5.1"]
+        with patch("agent.models_dev.list_agentic_models", return_value=mdev):
+            out = provider_model_ids("opencode-zen")
+        assert "claude-opus-4-7" in out
+        assert "kimi-k2.6" in out
+
+
+class TestOpenRouterAndNousUnchanged:
+    """Per Teknium: openrouter and nous are NEVER merged with models.dev."""
+
+    def test_openrouter_not_in_preferred_set(self):
+        assert "openrouter" not in _MODELS_DEV_PREFERRED
+
+    def test_nous_not_in_preferred_set(self):
+        assert "nous" not in _MODELS_DEV_PREFERRED
+
+    def test_openrouter_does_not_call_merge(self):
+        """openrouter takes its own live path — merge helper must NOT run."""
+        with patch(
+            "hermes_cli.models._merge_with_models_dev",
+            side_effect=AssertionError("merge should not be called for openrouter"),
+        ):
+            # Even if model_ids() fails for some other reason, we just care
+            # that the merge path isn't invoked.
+            try:
+                provider_model_ids("openrouter")
+            except AssertionError:
+                raise
+            except Exception:
+                pass  # model_ids() may fail in the hermetic test env — that's fine.
diff --git a/tests/hermes_cli/test_opencode_go_in_model_list.py b/tests/hermes_cli/test_opencode_go_in_model_list.py
index a84701f09..6020c8179 100644
--- a/tests/hermes_cli/test_opencode_go_in_model_list.py
+++ b/tests/hermes_cli/test_opencode_go_in_model_list.py
@@ -6,16 +6,41 @@ from unittest.mock import patch
 from hermes_cli.model_switch import list_authenticated_providers
 
 
+# Minimum set of models that must be present for opencode-go no matter
+# whether the picker sourced its list from curated-only or curated+models.dev.
+# The curated list in hermes_cli/models.py defines the floor; models.dev only
+# ever adds names on top of it via _merge_with_models_dev.
+_OPENCODE_GO_REQUIRED = {
+    "kimi-k2.6",
+    "kimi-k2.5",
+    "glm-5.1",
+    "glm-5",
+    "mimo-v2-pro",
+    "mimo-v2-omni",
+    "minimax-m2.7",
+    "minimax-m2.5",
+}
+
+
 @patch.dict(os.environ, {"OPENCODE_GO_API_KEY": "test-key"}, clear=False)
 def test_opencode_go_appears_when_api_key_set():
     """opencode-go should appear in list_authenticated_providers when OPENCODE_GO_API_KEY is set."""
-    providers = list_authenticated_providers(current_provider="openrouter")
-    
+    providers = list_authenticated_providers(current_provider="openrouter", max_models=50)
+
     # Find opencode-go in results
     opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None)
-    
+
     assert opencode_go is not None, "opencode-go should appear when OPENCODE_GO_API_KEY is set"
-    assert opencode_go["models"] == ["kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
+    # Behavior check: the curated floor must be present. The list may also
+    # include extra models.dev entries (e.g. mimo-v2.5-pro) when the registry
+    # is reachable — that's the whole point of the models.dev-preferred merge
+    # introduced for opencode-go, so don't pin to an exact list here.
+    present = set(opencode_go["models"])
+    missing = _OPENCODE_GO_REQUIRED - present
+    assert not missing, (
+        f"opencode-go picker should include the curated floor; missing: {sorted(missing)}. "
+        f"Got: {opencode_go['models']}"
+    )
     # opencode-go can appear as "built-in" (from PROVIDER_TO_MODELS_DEV when
     # models.dev is reachable) or "hermes" (from HERMES_OVERLAYS fallback when
     # the API is unavailable, e.g. in CI).
@@ -26,10 +51,10 @@ def test_opencode_go_not_appears_when_no_creds():
     """opencode-go should NOT appear when no credentials are set."""
     # Ensure OPENCODE_GO_API_KEY is not set
     env_without_key = {k: v for k, v in os.environ.items() if k != "OPENCODE_GO_API_KEY"}
-    
+
     with patch.dict(os.environ, env_without_key, clear=True):
         providers = list_authenticated_providers(current_provider="openrouter")
-        
+
         # opencode-go should not be in results
         opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None)
         assert opencode_go is None, "opencode-go should not appear without credentials"
diff --git a/tests/hermes_cli/test_opencode_go_validation_fallback.py b/tests/hermes_cli/test_opencode_go_validation_fallback.py
new file mode 100644
index 000000000..f0ae76098
--- /dev/null
+++ b/tests/hermes_cli/test_opencode_go_validation_fallback.py
@@ -0,0 +1,133 @@
+"""Tests for the static-catalog fallback in validate_requested_model.
+
+OpenCode Go and OpenCode Zen publish an OpenAI-compatible API at paths that do
+NOT expose ``/models`` (the path returns the marketing site's HTML 404).  This
+caused ``validate_requested_model`` to return ``accepted=False`` for every
+model on those providers, which in turn made ``switch_model()`` fail and the
+gateway's ``/model <name> --provider opencode-go`` command never write to
+``_session_model_overrides``.
+
+These tests cover the catalog-fallback path: when ``fetch_api_models`` returns
+``None``, the validator must consult ``provider_model_ids()`` for the provider
+(populated from ``_PROVIDER_MODELS``) rather than rejecting outright.
+"""
+
+from unittest.mock import patch
+
+from hermes_cli.models import validate_requested_model
+
+
+_UNREACHABLE_PROBE = {
+    "models": None,
+    "probed_url": "https://opencode.ai/zen/go/v1/models",
+    "resolved_base_url": "https://opencode.ai/zen/go/v1",
+    "suggested_base_url": None,
+    "used_fallback": False,
+}
+
+
+def _patched(func):
+    """Decorator: force fetch_api_models / probe_api_models to simulate an
+    unreachable /models endpoint, proving the catalog path is used."""
+    def wrapper(*args, **kwargs):
+        with patch("hermes_cli.models.fetch_api_models", return_value=None), \
+             patch("hermes_cli.models.probe_api_models", return_value=_UNREACHABLE_PROBE):
+            return func(*args, **kwargs)
+    wrapper.__name__ = func.__name__
+    return wrapper
+
+
+# ---------------------------------------------------------------------------
+# opencode-go: curated catalog in _PROVIDER_MODELS
+# ---------------------------------------------------------------------------
+
+
+@_patched
+def test_opencode_go_known_model_accepted():
+    """A model present in the opencode-go curated catalog must be accepted
+    even when /models is unreachable."""
+    result = validate_requested_model("kimi-k2.6", "opencode-go")
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is True
+    assert result["message"] is None
+
+
+@_patched
+def test_opencode_go_known_model_case_insensitive():
+    """Catalog lookup is case-insensitive."""
+    result = validate_requested_model("KIMI-K2.6", "opencode-go")
+    assert result["accepted"] is True
+    assert result["recognized"] is True
+
+
+@_patched
+def test_opencode_go_typo_auto_corrected():
+    """A close typo (>= 0.9 similarity) is auto-corrected to the catalog
+    entry."""
+    # 'kimi-k2.55' vs 'kimi-k2.5' ratio ≈ 0.95 — within the 0.9 cutoff.
+    result = validate_requested_model("kimi-k2.55", "opencode-go")
+    assert result["accepted"] is True
+    assert result["recognized"] is True
+    assert result.get("corrected_model") == "kimi-k2.5"
+
+
+@_patched
+def test_opencode_go_unknown_model_accepted_with_suggestion():
+    """An unknown model that has a medium-similarity match (>= 0.5 but < 0.9)
+    is accepted with recognized=False and a 'similar models' hint.  The key
+    invariant: the gateway MUST be able to persist this override, so
+    accepted/persist must both be True."""
+    # 'kimi-k3-preview' vs 'kimi-k2.6' — similar enough to suggest, not to auto-correct.
+    result = validate_requested_model("kimi-k3-preview", "opencode-go")
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is False
+    assert "kimi-k3-preview" in result["message"]
+    assert "curated catalog" in result["message"]
+
+
+@_patched
+def test_opencode_go_totally_unknown_model_still_accepted():
+    """A model with zero similarity to the catalog is still accepted (no
+    suggestion line) so the user can try a model that hasn't made it into the
+    curated list yet."""
+    result = validate_requested_model("some-brand-new-model", "opencode-go")
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is False
+    # No suggestion text (no close matches)
+    assert "Similar models" not in result["message"]
+    assert "opencode" in result["message"].lower() or "opencode go" in result["message"].lower()
+
+
+# ---------------------------------------------------------------------------
+# opencode-zen: same pattern as opencode-go
+# ---------------------------------------------------------------------------
+
+
+@_patched
+def test_opencode_zen_known_model_accepted():
+    """opencode-zen also uses _PROVIDER_MODELS; kimi-k2 is in its catalog."""
+    result = validate_requested_model("kimi-k2", "opencode-zen")
+    assert result["accepted"] is True
+    assert result["recognized"] is True
+
+
+# ---------------------------------------------------------------------------
+# Unknown provider with no catalog: soft-accept (honors the comment's intent)
+# ---------------------------------------------------------------------------
+
+
+@_patched
+def test_provider_without_catalog_accepts_with_warning():
+    """When a provider has no entry in _PROVIDER_MODELS and /models is
+    unreachable, accept the model with a 'Note:' warning rather than reject.
+    This matches the in-code comment: 'Accept and persist, but warn so typos
+    don't silently break things.'"""
+    # Use a made-up provider name that won't resolve to any catalog.
+    result = validate_requested_model("some-model", "provider-that-does-not-exist")
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is False
+    assert "Note:" in result["message"]
diff --git a/tests/hermes_cli/test_plugin_scanner_recursion.py b/tests/hermes_cli/test_plugin_scanner_recursion.py
new file mode 100644
index 000000000..b6e264168
--- /dev/null
+++ b/tests/hermes_cli/test_plugin_scanner_recursion.py
@@ -0,0 +1,357 @@
+"""Tests for PR1 pluggable image gen: scanner recursion, kinds, path keys.
+
+Covers ``_scan_directory`` recursion into category namespaces
+(``plugins/image_gen/openai/``), ``kind`` parsing, path-derived registry
+keys, and the new gate logic (bundled backends auto-load; user backends
+still opt-in; exclusive kind skipped; unknown kinds → standalone warning).
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, Dict
+
+import pytest
+import yaml
+
+from hermes_cli.plugins import PluginManager, PluginManifest
+
+
+# ── Helpers ────────────────────────────────────────────────────────────────
+
+
+def _write_plugin(
+    root: Path,
+    segments: list[str],
+    *,
+    manifest_extra: Dict[str, Any] | None = None,
+    register_body: str = "pass",
+) -> Path:
+    """Create a plugin dir at ``root/<segments...>/`` with plugin.yaml + __init__.py.
+
+    ``segments`` lets tests build both flat (``["my-plugin"]``) and
+    category-namespaced (``["image_gen", "openai"]``) layouts.
+    """
+    plugin_dir = root
+    for seg in segments:
+        plugin_dir = plugin_dir / seg
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+
+    manifest = {
+        "name": segments[-1],
+        "version": "0.1.0",
+        "description": f"Test plugin {'/'.join(segments)}",
+    }
+    if manifest_extra:
+        manifest.update(manifest_extra)
+    (plugin_dir / "plugin.yaml").write_text(yaml.dump(manifest))
+    (plugin_dir / "__init__.py").write_text(
+        f"def register(ctx):\n    {register_body}\n"
+    )
+    return plugin_dir
+
+
+def _enable(hermes_home: Path, name: str) -> None:
+    """Append ``name`` to ``plugins.enabled`` in ``<hermes_home>/config.yaml``."""
+    cfg_path = hermes_home / "config.yaml"
+    cfg: dict = {}
+    if cfg_path.exists():
+        try:
+            cfg = yaml.safe_load(cfg_path.read_text()) or {}
+        except Exception:
+            cfg = {}
+    plugins_cfg = cfg.setdefault("plugins", {})
+    enabled = plugins_cfg.setdefault("enabled", [])
+    if isinstance(enabled, list) and name not in enabled:
+        enabled.append(name)
+    cfg_path.write_text(yaml.safe_dump(cfg))
+
+
+# ── Scanner recursion ──────────────────────────────────────────────────────
+
+
+class TestCategoryNamespaceRecursion:
+    def test_category_namespace_discovered(self, tmp_path, monkeypatch):
+        """``<root>/image_gen/openai/plugin.yaml`` is discovered with key
+        ``image_gen/openai`` when the ``image_gen`` parent has no manifest."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(user_plugins, ["image_gen", "openai"])
+        _enable(hermes_home, "image_gen/openai")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "image_gen/openai" in mgr._plugins
+        loaded = mgr._plugins["image_gen/openai"]
+        assert loaded.manifest.key == "image_gen/openai"
+        assert loaded.manifest.name == "openai"
+        assert loaded.enabled is True
+
+    def test_flat_plugin_key_matches_name(self, tmp_path, monkeypatch):
+        """Flat plugins keep their bare name as the key (back-compat)."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(user_plugins, ["my-plugin"])
+        _enable(hermes_home, "my-plugin")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "my-plugin" in mgr._plugins
+        assert mgr._plugins["my-plugin"].manifest.key == "my-plugin"
+
+    def test_depth_cap_two(self, tmp_path, monkeypatch):
+        """Plugins nested three levels deep are not discovered.
+
+        ``<root>/a/b/c/plugin.yaml`` should NOT be picked up — cap is
+        two segments.
+        """
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(user_plugins, ["a", "b", "c"])
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        non_bundled = [
+            k for k, p in mgr._plugins.items()
+            if p.manifest.source != "bundled"
+        ]
+        assert non_bundled == []
+
+    def test_category_dir_with_manifest_is_leaf(self, tmp_path, monkeypatch):
+        """If ``image_gen/plugin.yaml`` exists, ``image_gen`` itself IS the
+        plugin and its children are ignored."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        # parent has a manifest → stop recursing
+        _write_plugin(user_plugins, ["image_gen"])
+        # child also has a manifest — should NOT be found because we stop
+        # at the parent.
+        _write_plugin(user_plugins, ["image_gen", "openai"])
+        _enable(hermes_home, "image_gen")
+        _enable(hermes_home, "image_gen/openai")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        # The bundled plugins/image_gen/openai/ exists in the repo — filter
+        # it out so we're only asserting on the user-dir layout.
+        user_plugins_in_registry = {
+            k for k, p in mgr._plugins.items() if p.manifest.source != "bundled"
+        }
+        assert "image_gen" in user_plugins_in_registry
+        assert "image_gen/openai" not in user_plugins_in_registry
+
+
+# ── Kind parsing ───────────────────────────────────────────────────────────
+
+
+class TestKindField:
+    def test_default_kind_is_standalone(self, tmp_path, monkeypatch):
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(hermes_home / "plugins", ["p1"])
+        _enable(hermes_home, "p1")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert mgr._plugins["p1"].manifest.kind == "standalone"
+
+    @pytest.mark.parametrize("kind", ["backend", "exclusive", "standalone"])
+    def test_valid_kinds_parsed(self, kind, tmp_path, monkeypatch):
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(
+            hermes_home / "plugins",
+            ["p1"],
+            manifest_extra={"kind": kind},
+        )
+        # Not all kinds auto-load, but manifest should parse.
+        _enable(hermes_home, "p1")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "p1" in mgr._plugins
+        assert mgr._plugins["p1"].manifest.kind == kind
+
+    def test_unknown_kind_falls_back_to_standalone(self, tmp_path, monkeypatch, caplog):
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(
+            hermes_home / "plugins",
+            ["p1"],
+            manifest_extra={"kind": "bogus"},
+        )
+        _enable(hermes_home, "p1")
+
+        with caplog.at_level("WARNING"):
+            mgr = PluginManager()
+            mgr.discover_and_load()
+
+        assert mgr._plugins["p1"].manifest.kind == "standalone"
+        assert any(
+            "unknown kind" in rec.getMessage() for rec in caplog.records
+        )
+
+
+# ── Gate logic ─────────────────────────────────────────────────────────────
+
+
+class TestBackendGate:
+    def test_user_backend_still_gated_by_enabled(self, tmp_path, monkeypatch):
+        """User-installed ``kind: backend`` plugins still require opt-in —
+        they're not trusted by default."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(
+            user_plugins,
+            ["image_gen", "fancy"],
+            manifest_extra={"kind": "backend"},
+        )
+        # Do NOT opt in.
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        loaded = mgr._plugins["image_gen/fancy"]
+        assert loaded.enabled is False
+        assert "not enabled" in (loaded.error or "")
+
+    def test_user_backend_loads_when_enabled(self, tmp_path, monkeypatch):
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(
+            user_plugins,
+            ["image_gen", "fancy"],
+            manifest_extra={"kind": "backend"},
+        )
+        _enable(hermes_home, "image_gen/fancy")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert mgr._plugins["image_gen/fancy"].enabled is True
+
+    def test_exclusive_kind_skipped(self, tmp_path, monkeypatch):
+        """``kind: exclusive`` plugins are recorded but not loaded — the
+        category's own discovery system handles them (memory today)."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(
+            hermes_home / "plugins",
+            ["some-backend"],
+            manifest_extra={"kind": "exclusive"},
+        )
+        _enable(hermes_home, "some-backend")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        loaded = mgr._plugins["some-backend"]
+        assert loaded.enabled is False
+        assert "exclusive" in (loaded.error or "")
+
+
+# ── Bundled backend auto-load (integration with real bundled plugin) ────────
+
+
+class TestBundledBackendAutoLoad:
+    def test_bundled_image_gen_openai_autoloads(self, tmp_path, monkeypatch):
+        """The bundled ``plugins/image_gen/openai/`` plugin loads without
+        any opt-in — it's ``kind: backend`` and shipped in-repo."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "image_gen/openai" in mgr._plugins
+        loaded = mgr._plugins["image_gen/openai"]
+        assert loaded.manifest.source == "bundled"
+        assert loaded.manifest.kind == "backend"
+        assert loaded.enabled is True, f"error: {loaded.error}"
+
+
+# ── PluginContext.register_image_gen_provider ───────────────────────────────
+
+
+class TestRegisterImageGenProvider:
+    def test_accepts_valid_provider(self, tmp_path, monkeypatch):
+        from agent import image_gen_registry
+        from agent.image_gen_provider import ImageGenProvider
+
+        image_gen_registry._reset_for_tests()
+
+        class FakeProvider(ImageGenProvider):
+            @property
+            def name(self) -> str:
+                return "fake-test"
+
+            def generate(self, prompt, aspect_ratio="landscape", **kw):
+                return {"success": True, "image": "test://fake"}
+
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        plugin_dir = _write_plugin(
+            hermes_home / "plugins",
+            ["my-img-plugin"],
+            register_body=(
+                "from agent.image_gen_provider import ImageGenProvider\n"
+                "    class P(ImageGenProvider):\n"
+                "        @property\n"
+                "        def name(self): return 'fake-ctx'\n"
+                "        def generate(self, prompt, aspect_ratio='landscape', **kw):\n"
+                "            return {'success': True, 'image': 'x://y'}\n"
+                "    ctx.register_image_gen_provider(P())"
+            ),
+        )
+        _enable(hermes_home, "my-img-plugin")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert mgr._plugins["my-img-plugin"].enabled is True
+        assert image_gen_registry.get_provider("fake-ctx") is not None
+
+        image_gen_registry._reset_for_tests()
+
+    def test_rejects_non_provider(self, tmp_path, monkeypatch, caplog):
+        from agent import image_gen_registry
+
+        image_gen_registry._reset_for_tests()
+
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(
+            hermes_home / "plugins",
+            ["bad-img-plugin"],
+            register_body="ctx.register_image_gen_provider('not a provider')",
+        )
+        _enable(hermes_home, "bad-img-plugin")
+
+        with caplog.at_level("WARNING"):
+            mgr = PluginManager()
+            mgr.discover_and_load()
+
+        # Plugin loaded (register returned normally) but nothing was
+        # registered in the provider registry.
+        assert mgr._plugins["bad-img-plugin"].enabled is True
+        assert image_gen_registry.get_provider("not a provider") is None
+
+        image_gen_registry._reset_for_tests()
diff --git a/tests/hermes_cli/test_plugins.py b/tests/hermes_cli/test_plugins.py
index 9433ecdca..2455547de 100644
--- a/tests/hermes_cli/test_plugins.py
+++ b/tests/hermes_cli/test_plugins.py
@@ -250,6 +250,73 @@ class TestPluginLoading:
 
         assert "hermes_plugins.ns_plugin" in sys.modules
 
+    def test_user_memory_plugin_auto_coerced_to_exclusive(self, tmp_path, monkeypatch):
+        """User-installed memory plugins must NOT be loaded by the general
+        PluginManager — they belong to plugins/memory discovery.
+
+        Regression test for the mempalace crash:
+            'PluginContext' object has no attribute 'register_memory_provider'
+
+        A plugin that calls ``ctx.register_memory_provider`` in its
+        ``__init__.py`` should be auto-detected and treated as
+        ``kind: exclusive`` so the general loader records the manifest but
+        does not import/register() it. The real activation happens through
+        ``plugins/memory/__init__.py`` via ``memory.provider`` config.
+        """
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        plugin_dir = plugins_dir / "mempalace"
+        plugin_dir.mkdir(parents=True)
+        # No explicit `kind:` — the heuristic should kick in.
+        (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "mempalace"}))
+        (plugin_dir / "__init__.py").write_text(
+            "class MemPalaceProvider:\n"
+            "    pass\n"
+            "def register(ctx):\n"
+            "    ctx.register_memory_provider('mempalace', MemPalaceProvider)\n"
+        )
+        # Even if the user explicitly enables it in config, the loader
+        # should still treat it as exclusive and skip general loading.
+        hermes_home = tmp_path / "hermes_test"
+        (hermes_home / "config.yaml").write_text(
+            yaml.safe_dump({"plugins": {"enabled": ["mempalace"]}})
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "mempalace" in mgr._plugins
+        entry = mgr._plugins["mempalace"]
+        assert entry.manifest.kind == "exclusive", (
+            f"Expected auto-coerced kind='exclusive', got {entry.manifest.kind}"
+        )
+        # Not loaded by general manager (no register() call, no AttributeError).
+        assert not entry.enabled
+        assert entry.module is None
+        assert "exclusive" in (entry.error or "").lower()
+
+    def test_explicit_standalone_kind_not_coerced(self, tmp_path, monkeypatch):
+        """If a plugin explicitly declares ``kind: standalone`` in its
+        manifest, the memory-provider heuristic must NOT override it —
+        even if the source happens to mention ``MemoryProvider``.
+        """
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        plugin_dir = plugins_dir / "not_memory"
+        plugin_dir.mkdir(parents=True)
+        (plugin_dir / "plugin.yaml").write_text(
+            yaml.dump({"name": "not_memory", "kind": "standalone"})
+        )
+        (plugin_dir / "__init__.py").write_text(
+            "# This plugin inspects MemoryProvider docs but isn't one.\n"
+            "def register(ctx):\n    pass\n"
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert mgr._plugins["not_memory"].manifest.kind == "standalone"
+
 
 # ── TestPluginHooks ────────────────────────────────────────────────────────
 
@@ -720,6 +787,33 @@ class TestPluginCommands:
         assert entry["handler"] is handler
         assert entry["description"] == "My custom command"
         assert entry["plugin"] == "test-plugin"
+        # args_hint defaults to empty string when not passed.
+        assert entry["args_hint"] == ""
+
+    def test_register_command_with_args_hint(self):
+        """args_hint is stored and surfaced for gateway-native UI registration."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        ctx.register_command(
+            "metricas",
+            lambda a: a,
+            description="Metrics dashboard",
+            args_hint="dias:7 formato:json",
+        )
+
+        entry = mgr._plugin_commands["metricas"]
+        assert entry["args_hint"] == "dias:7 formato:json"
+
+    def test_register_command_args_hint_whitespace_trimmed(self):
+        """args_hint leading/trailing whitespace is stripped."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        ctx.register_command("foo", lambda a: a, args_hint="  <file>  ")
+        assert mgr._plugin_commands["foo"]["args_hint"] == "<file>"
 
     def test_register_command_normalizes_name(self):
         """Names are lowercased, stripped, and leading slashes removed."""
diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py
index c7510a55b..9d2232f39 100644
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@@ -1412,3 +1412,90 @@ def test_named_custom_runtime_no_model_when_absent(monkeypatch):
 
     resolved = rp.resolve_runtime_provider(requested="my-server")
     assert "model" not in resolved
+
+
+# ---------------------------------------------------------------------------
+# GHSA-76xc-57q6-vm5m — Ollama URL substring leak
+#
+# Same bug class as the previously-fixed GHSA-xf8p-v2cg-h7h5 (OpenRouter).
+# _resolve_openrouter_runtime's custom-endpoint branch selects OLLAMA_API_KEY
+# when the base_url "looks like" ollama.com. Previous implementation used
+# raw substring match; a custom base_url whose PATH or look-alike host
+# merely contained "ollama.com" leaked OLLAMA_API_KEY to that endpoint.
+# Fix: use base_url_host_matches (same helper as the OpenRouter sweep).
+# ---------------------------------------------------------------------------
+
+class TestOllamaUrlSubstringLeak:
+    """Call-site regression tests for the fix in _resolve_openrouter_runtime."""
+
+    def _make_cfg(self, base_url):
+        return {"base_url": base_url, "api_key": "", "provider": "custom"}
+
+    def test_ollama_key_not_leaked_to_path_injection(self, monkeypatch):
+        """http://127.0.0.1:9000/ollama.com/v1 — attacker endpoint with
+        ollama.com in PATH. Must resolve to OPENAI_API_KEY, not OLLAMA_API_KEY."""
+        monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-secret")
+        monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "http://127.0.0.1:9000/ollama.com/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+        monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+        resolved = rp.resolve_runtime_provider(requested="custom")
+
+        assert "ol-SECRET" not in resolved["api_key"], (
+            "OLLAMA_API_KEY must not be sent to an endpoint whose "
+            "hostname is not ollama.com (GHSA-76xc-57q6-vm5m)"
+        )
+        assert resolved["api_key"] == "oa-secret"
+
+    def test_ollama_key_not_leaked_to_lookalike_host(self, monkeypatch):
+        """ollama.com.attacker.test — look-alike host. OLLAMA_API_KEY
+        must not be sent."""
+        monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
+        monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "http://ollama.com.attacker.test:9000/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+        monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+        resolved = rp.resolve_runtime_provider(requested="custom")
+
+        assert "ol-SECRET" not in resolved["api_key"]
+        assert resolved["api_key"] == "oa-secret"
+
+    def test_ollama_key_sent_to_genuine_ollama_com(self, monkeypatch):
+        """https://ollama.com/v1 — legit Ollama Cloud. OLLAMA_API_KEY
+        should be used."""
+        monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
+        monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "https://ollama.com/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+        monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+        resolved = rp.resolve_runtime_provider(requested="custom")
+
+        assert resolved["api_key"] == "ol-legit-key"
+
+    def test_ollama_key_sent_to_ollama_subdomain(self, monkeypatch):
+        """https://api.ollama.com/v1 — legit subdomain."""
+        monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
+        monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "https://api.ollama.com/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+        monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+        resolved = rp.resolve_runtime_provider(requested="custom")
+
+        assert resolved["api_key"] == "ol-legit-key"
diff --git a/tests/hermes_cli/test_skin_engine.py b/tests/hermes_cli/test_skin_engine.py
index 3ce185b82..b3fbb8dee 100644
--- a/tests/hermes_cli/test_skin_engine.py
+++ b/tests/hermes_cli/test_skin_engine.py
@@ -268,7 +268,6 @@ class TestCliBrandingHelpers:
 
     def test_prompt_toolkit_style_overrides_cover_tui_classes(self):
         from hermes_cli.skin_engine import set_active_skin, get_prompt_toolkit_style_overrides
-
         set_active_skin("ares")
         overrides = get_prompt_toolkit_style_overrides()
         required = {
@@ -277,6 +276,13 @@ class TestCliBrandingHelpers:
             "prompt",
             "prompt-working",
             "hint",
+            "status-bar",
+            "status-bar-strong",
+            "status-bar-dim",
+            "status-bar-good",
+            "status-bar-warn",
+            "status-bar-bad",
+            "status-bar-critical",
             "input-rule",
             "image-badge",
             "completion-menu",
@@ -325,6 +331,15 @@ class TestCliBrandingHelpers:
         overrides = get_prompt_toolkit_style_overrides()
         assert overrides["prompt"] == skin.get_color("prompt")
         assert overrides["input-rule"] == skin.get_color("input_rule")
+        assert overrides["status-bar"] == (
+            f"bg:{skin.get_color('status_bar_bg')} {skin.get_color('status_bar_text')}"
+        )
+        assert overrides["status-bar-strong"] == (
+            f"bg:{skin.get_color('status_bar_bg')} {skin.get_color('status_bar_strong')} bold"
+        )
+        assert overrides["status-bar-critical"] == (
+            f"bg:{skin.get_color('status_bar_bg')} {skin.get_color('status_bar_critical')} bold"
+        )
         assert overrides["clarify-title"] == f"{skin.get_color('banner_title')} bold"
         assert overrides["sudo-prompt"] == f"{skin.get_color('ui_error')} bold"
         assert overrides["approval-title"] == f"{skin.get_color('ui_warn')} bold"
diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py
index b90aa297f..bcd0320ed 100644
--- a/tests/hermes_cli/test_web_server.py
+++ b/tests/hermes_cli/test_web_server.py
@@ -706,6 +706,7 @@ class TestNewEndpoints:
         assert "skills" in data
         assert isinstance(data["daily"], list)
         assert "total_sessions" in data["totals"]
+        assert "total_api_calls" in data["totals"]
         assert data["skills"] == {
             "summary": {
                 "total_skill_loads": 0,
diff --git a/tests/hermes_cli/test_web_server_host_header.py b/tests/hermes_cli/test_web_server_host_header.py
new file mode 100644
index 000000000..966127b05
--- /dev/null
+++ b/tests/hermes_cli/test_web_server_host_header.py
@@ -0,0 +1,148 @@
+"""Tests for GHSA-ppp5-vxwm-4cf7 — Host-header validation.
+
+DNS rebinding defence: a victim browser that has the dashboard open
+could be tricked into fetching from an attacker-controlled hostname
+that TTL-flips to 127.0.0.1. Same-origin / CORS checks won't help —
+the browser now treats the attacker origin as same-origin. Validating
+the Host header at the application layer rejects the attack.
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import pytest
+
+_repo = str(Path(__file__).resolve().parents[1])
+if _repo not in sys.path:
+    sys.path.insert(0, _repo)
+
+
+class TestHostHeaderValidator:
+    """Unit test the _is_accepted_host helper directly — cheaper and
+    more thorough than spinning up the full FastAPI app."""
+
+    def test_loopback_bind_accepts_loopback_names(self):
+        from hermes_cli.web_server import _is_accepted_host
+
+        for bound in ("127.0.0.1", "localhost", "::1"):
+            for host_header in (
+                "127.0.0.1", "127.0.0.1:9119",
+                "localhost", "localhost:9119",
+                "[::1]", "[::1]:9119",
+            ):
+                assert _is_accepted_host(host_header, bound), (
+                    f"bound={bound} must accept host={host_header}"
+                )
+
+    def test_loopback_bind_rejects_attacker_hostnames(self):
+        """The core rebinding defence: attacker-controlled hosts that
+        TTL-flip to 127.0.0.1 must be rejected."""
+        from hermes_cli.web_server import _is_accepted_host
+
+        for bound in ("127.0.0.1", "localhost"):
+            for attacker in (
+                "evil.example",
+                "evil.example:9119",
+                "rebind.attacker.test:80",
+                "localhost.attacker.test",  # subdomain trick
+                "127.0.0.1.evil.test",  # lookalike IP prefix
+                "",  # missing Host
+            ):
+                assert not _is_accepted_host(attacker, bound), (
+                    f"bound={bound} must reject attacker host={attacker!r}"
+                )
+
+    def test_zero_zero_bind_accepts_anything(self):
+        """0.0.0.0 means operator explicitly opted into all-interfaces
+        (requires --insecure). No Host-layer defence is possible — rely
+        on operator network controls."""
+        from hermes_cli.web_server import _is_accepted_host
+
+        for host in ("10.0.0.5", "evil.example", "my-server.corp.net"):
+            assert _is_accepted_host(host, "0.0.0.0")
+            assert _is_accepted_host(host + ":9119", "0.0.0.0")
+
+    def test_explicit_non_loopback_bind_requires_exact_match(self):
+        """If the operator bound to a specific non-loopback hostname,
+        the Host header must match exactly."""
+        from hermes_cli.web_server import _is_accepted_host
+
+        assert _is_accepted_host("my-server.corp.net", "my-server.corp.net")
+        assert _is_accepted_host("my-server.corp.net:9119", "my-server.corp.net")
+        # Different host — reject
+        assert not _is_accepted_host("evil.example", "my-server.corp.net")
+        # Loopback — reject (we bound to a specific non-loopback name)
+        assert not _is_accepted_host("localhost", "my-server.corp.net")
+
+    def test_case_insensitive_comparison(self):
+        """Host headers are case-insensitive per RFC — accept variations."""
+        from hermes_cli.web_server import _is_accepted_host
+
+        assert _is_accepted_host("LOCALHOST", "127.0.0.1")
+        assert _is_accepted_host("LocalHost:9119", "127.0.0.1")
+
+
+class TestHostHeaderMiddleware:
+    """End-to-end test via the FastAPI app — verify the middleware
+    rejects bad Host headers with 400."""
+
+    def test_rebinding_request_rejected(self):
+        from fastapi.testclient import TestClient
+        from hermes_cli.web_server import app
+
+        # Simulate start_server having set the bound_host
+        app.state.bound_host = "127.0.0.1"
+        try:
+            client = TestClient(app)
+            # The TestClient sends Host: testserver by default — which is
+            # NOT a loopback alias, so the middleware must reject it.
+            resp = client.get(
+                "/api/status",
+                headers={"Host": "evil.example"},
+            )
+            assert resp.status_code == 400
+            assert "Invalid Host header" in resp.json()["detail"]
+        finally:
+            # Clean up so other tests don't inherit the bound_host
+            if hasattr(app.state, "bound_host"):
+                del app.state.bound_host
+
+    def test_legit_loopback_request_accepted(self):
+        from fastapi.testclient import TestClient
+        from hermes_cli.web_server import app
+
+        app.state.bound_host = "127.0.0.1"
+        try:
+            client = TestClient(app)
+            # /api/status is in _PUBLIC_API_PATHS — passes auth — so the
+            # only thing that can reject is the host header middleware
+            resp = client.get(
+                "/api/status",
+                headers={"Host": "localhost:9119"},
+            )
+            # Either 200 (endpoint served) or some other non-400 —
+            # just not the host-rejection 400
+            assert resp.status_code != 400 or (
+                "Invalid Host header" not in resp.json().get("detail", "")
+            )
+        finally:
+            if hasattr(app.state, "bound_host"):
+                del app.state.bound_host
+
+    def test_no_bound_host_skips_validation(self):
+        """If app.state.bound_host isn't set (e.g. running under test
+        infra without calling start_server), middleware must pass through
+        rather than crash."""
+        from fastapi.testclient import TestClient
+        from hermes_cli.web_server import app
+
+        # Make sure bound_host isn't set
+        if hasattr(app.state, "bound_host"):
+            del app.state.bound_host
+
+        client = TestClient(app)
+        resp = client.get("/api/status")
+        # Should get through to the status endpoint, not a 400
+        assert resp.status_code != 400
diff --git a/tests/plugins/image_gen/__init__.py b/tests/plugins/image_gen/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/plugins/image_gen/test_openai_provider.py b/tests/plugins/image_gen/test_openai_provider.py
new file mode 100644
index 000000000..670722efb
--- /dev/null
+++ b/tests/plugins/image_gen/test_openai_provider.py
@@ -0,0 +1,243 @@
+"""Tests for the bundled OpenAI image_gen plugin (gpt-image-2, three tiers)."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+import plugins.image_gen.openai as openai_plugin
+
+
+# 1×1 transparent PNG — valid bytes for save_b64_image()
+_PNG_HEX = (
+    "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
+    "890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
+    "ae426082"
+)
+
+
+def _b64_png() -> str:
+    import base64
+    return base64.b64encode(bytes.fromhex(_PNG_HEX)).decode()
+
+
+def _fake_response(*, b64=None, url=None, revised_prompt=None):
+    item = SimpleNamespace(b64_json=b64, url=url, revised_prompt=revised_prompt)
+    return SimpleNamespace(data=[item])
+
+
+@pytest.fixture(autouse=True)
+def _tmp_hermes_home(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    yield tmp_path
+
+
+@pytest.fixture
+def provider(monkeypatch):
+    monkeypatch.setenv("OPENAI_API_KEY", "test-key")
+    return openai_plugin.OpenAIImageGenProvider()
+
+
+def _patched_openai(fake_client: MagicMock):
+    fake_openai = MagicMock()
+    fake_openai.OpenAI.return_value = fake_client
+    return patch.dict("sys.modules", {"openai": fake_openai})
+
+
+# ── Metadata ────────────────────────────────────────────────────────────────
+
+
+class TestMetadata:
+    def test_name(self, provider):
+        assert provider.name == "openai"
+
+    def test_default_model(self, provider):
+        assert provider.default_model() == "gpt-image-2-medium"
+
+    def test_list_models_three_tiers(self, provider):
+        ids = [m["id"] for m in provider.list_models()]
+        assert ids == ["gpt-image-2-low", "gpt-image-2-medium", "gpt-image-2-high"]
+
+    def test_catalog_entries_have_display_speed_strengths(self, provider):
+        for entry in provider.list_models():
+            assert entry["display"].startswith("GPT Image 2")
+            assert entry["speed"]
+            assert entry["strengths"]
+
+
+# ── Availability ────────────────────────────────────────────────────────────
+
+
+class TestAvailability:
+    def test_no_api_key_unavailable(self, monkeypatch):
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        assert openai_plugin.OpenAIImageGenProvider().is_available() is False
+
+    def test_api_key_set_available(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_API_KEY", "test")
+        assert openai_plugin.OpenAIImageGenProvider().is_available() is True
+
+
+# ── Model resolution ────────────────────────────────────────────────────────
+
+
+class TestModelResolution:
+    def test_default_is_medium(self):
+        model_id, meta = openai_plugin._resolve_model()
+        assert model_id == "gpt-image-2-medium"
+        assert meta["quality"] == "medium"
+
+    def test_env_var_override(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_IMAGE_MODEL", "gpt-image-2-high")
+        model_id, meta = openai_plugin._resolve_model()
+        assert model_id == "gpt-image-2-high"
+        assert meta["quality"] == "high"
+
+    def test_env_var_unknown_falls_back(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_IMAGE_MODEL", "bogus-tier")
+        model_id, _ = openai_plugin._resolve_model()
+        assert model_id == openai_plugin.DEFAULT_MODEL
+
+    def test_config_openai_model(self, tmp_path):
+        import yaml
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"openai": {"model": "gpt-image-2-low"}}})
+        )
+        model_id, meta = openai_plugin._resolve_model()
+        assert model_id == "gpt-image-2-low"
+        assert meta["quality"] == "low"
+
+    def test_config_top_level_model(self, tmp_path):
+        """``image_gen.model: gpt-image-2-high`` also works (top-level)."""
+        import yaml
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"model": "gpt-image-2-high"}})
+        )
+        model_id, meta = openai_plugin._resolve_model()
+        assert model_id == "gpt-image-2-high"
+        assert meta["quality"] == "high"
+
+
+# ── Generate ────────────────────────────────────────────────────────────────
+
+
+class TestGenerate:
+    def test_empty_prompt_rejected(self, provider):
+        result = provider.generate("", aspect_ratio="square")
+        assert result["success"] is False
+        assert result["error_type"] == "invalid_argument"
+
+    def test_missing_api_key(self, monkeypatch):
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        result = openai_plugin.OpenAIImageGenProvider().generate("a cat")
+        assert result["success"] is False
+        assert result["error_type"] == "auth_required"
+
+    def test_b64_saves_to_cache(self, provider, tmp_path):
+        import base64
+        png_bytes = bytes.fromhex(_PNG_HEX)
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(b64=_b64_png())
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat", aspect_ratio="landscape")
+
+        assert result["success"] is True
+        assert result["model"] == "gpt-image-2-medium"
+        assert result["aspect_ratio"] == "landscape"
+        assert result["provider"] == "openai"
+        assert result["quality"] == "medium"
+
+        saved = Path(result["image"])
+        assert saved.exists()
+        assert saved.parent == tmp_path / "cache" / "images"
+        assert saved.read_bytes() == png_bytes
+
+        call_kwargs = fake_client.images.generate.call_args.kwargs
+        # All tiers hit the single underlying API model.
+        assert call_kwargs["model"] == "gpt-image-2"
+        assert call_kwargs["quality"] == "medium"
+        assert call_kwargs["size"] == "1536x1024"
+        # gpt-image-2 rejects response_format — we must NOT send it.
+        assert "response_format" not in call_kwargs
+
+    @pytest.mark.parametrize("tier,expected_quality", [
+        ("gpt-image-2-low", "low"),
+        ("gpt-image-2-medium", "medium"),
+        ("gpt-image-2-high", "high"),
+    ])
+    def test_tier_maps_to_quality(self, provider, monkeypatch, tier, expected_quality):
+        monkeypatch.setenv("OPENAI_IMAGE_MODEL", tier)
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(b64=_b64_png())
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["model"] == tier
+        assert result["quality"] == expected_quality
+        assert fake_client.images.generate.call_args.kwargs["quality"] == expected_quality
+        # Always the same underlying API model regardless of tier.
+        assert fake_client.images.generate.call_args.kwargs["model"] == "gpt-image-2"
+
+    @pytest.mark.parametrize("aspect,expected_size", [
+        ("landscape", "1536x1024"),
+        ("square", "1024x1024"),
+        ("portrait", "1024x1536"),
+    ])
+    def test_aspect_ratio_mapping(self, provider, aspect, expected_size):
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(b64=_b64_png())
+
+        with _patched_openai(fake_client):
+            provider.generate("a cat", aspect_ratio=aspect)
+
+        assert fake_client.images.generate.call_args.kwargs["size"] == expected_size
+
+    def test_revised_prompt_passed_through(self, provider):
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(
+            b64=_b64_png(), revised_prompt="A photo of a cat",
+        )
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["revised_prompt"] == "A photo of a cat"
+
+    def test_api_error_returns_error_response(self, provider):
+        fake_client = MagicMock()
+        fake_client.images.generate.side_effect = RuntimeError("boom")
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["success"] is False
+        assert result["error_type"] == "api_error"
+        assert "boom" in result["error"]
+
+    def test_empty_response_data(self, provider):
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = SimpleNamespace(data=[])
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["success"] is False
+        assert result["error_type"] == "empty_response"
+
+    def test_url_fallback_if_api_changes(self, provider):
+        """Defensive: if OpenAI ever returns URL instead of b64, pass through."""
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(
+            b64=None, url="https://example.com/img.png",
+        )
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["success"] is True
+        assert result["image"] == "https://example.com/img.png"
diff --git a/tests/plugins/memory/test_hindsight_provider.py b/tests/plugins/memory/test_hindsight_provider.py
index 5548a29ad..db86f7626 100644
--- a/tests/plugins/memory/test_hindsight_provider.py
+++ b/tests/plugins/memory/test_hindsight_provider.py
@@ -6,6 +6,7 @@ turn counting, tags), and schema completeness.
 """
 
 import json
+import re
 import threading
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
@@ -18,6 +19,7 @@ from plugins.memory.hindsight import (
     REFLECT_SCHEMA,
     RETAIN_SCHEMA,
     _load_config,
+    _normalize_retain_tags,
 )
 
 
@@ -32,14 +34,30 @@ def _clean_env(monkeypatch):
     for key in (
         "HINDSIGHT_API_KEY", "HINDSIGHT_API_URL", "HINDSIGHT_BANK_ID",
         "HINDSIGHT_BUDGET", "HINDSIGHT_MODE", "HINDSIGHT_LLM_API_KEY",
+        "HINDSIGHT_RETAIN_TAGS", "HINDSIGHT_RETAIN_SOURCE",
+        "HINDSIGHT_RETAIN_USER_PREFIX", "HINDSIGHT_RETAIN_ASSISTANT_PREFIX",
     ):
         monkeypatch.delenv(key, raising=False)
 
 
 def _make_mock_client():
     """Create a mock Hindsight client with async methods."""
+    async def _aretain(
+        bank_id,
+        content,
+        timestamp=None,
+        context=None,
+        document_id=None,
+        metadata=None,
+        entities=None,
+        tags=None,
+        update_mode=None,
+        retain_async=None,
+    ):
+        return SimpleNamespace(ok=True)
+
     client = MagicMock()
-    client.aretain = AsyncMock()
+    client.aretain = AsyncMock(side_effect=_aretain)
     client.arecall = AsyncMock(
         return_value=SimpleNamespace(
             results=[
@@ -56,6 +74,14 @@ def _make_mock_client():
     return client
 
 
+class _FakeSessionDB:
+    def __init__(self, messages=None):
+        self._messages = list(messages or [])
+
+    def get_messages_as_conversation(self, session_id):
+        return list(self._messages)
+
+
 @pytest.fixture()
 def provider(tmp_path, monkeypatch):
     """Create an initialized HindsightMemoryProvider with a mock client."""
@@ -109,6 +135,18 @@ def provider_with_config(tmp_path, monkeypatch):
     return _make
 
 
+def test_normalize_retain_tags_accepts_csv_and_dedupes():
+    assert _normalize_retain_tags("agent:fakeassistantname, source_system:hermes-agent, agent:fakeassistantname") == [
+        "agent:fakeassistantname",
+        "source_system:hermes-agent",
+    ]
+
+
+def test_normalize_retain_tags_accepts_json_array_string():
+    value = json.dumps(["agent:fakeassistantname", "source_system:hermes-agent"])
+    assert _normalize_retain_tags(value) == ["agent:fakeassistantname", "source_system:hermes-agent"]
+
+
 # ---------------------------------------------------------------------------
 # Schema tests
 # ---------------------------------------------------------------------------
@@ -118,6 +156,7 @@ class TestSchemas:
     def test_retain_schema_has_content(self):
         assert RETAIN_SCHEMA["name"] == "hindsight_retain"
         assert "content" in RETAIN_SCHEMA["parameters"]["properties"]
+        assert "tags" in RETAIN_SCHEMA["parameters"]["properties"]
         assert "content" in RETAIN_SCHEMA["parameters"]["required"]
 
     def test_recall_schema_has_query(self):
@@ -160,7 +199,10 @@ class TestConfig:
 
     def test_custom_config_values(self, provider_with_config):
         p = provider_with_config(
-            tags=["tag1", "tag2"],
+            retain_tags=["tag1", "tag2"],
+            retain_source="hermes",
+            retain_user_prefix="User (fakeusername)",
+            retain_assistant_prefix="Assistant (fakeassistantname)",
             recall_tags=["recall-tag"],
             recall_tags_match="all",
             auto_retain=False,
@@ -175,6 +217,10 @@ class TestConfig:
             bank_mission="Test agent mission",
         )
         assert p._tags == ["tag1", "tag2"]
+        assert p._retain_tags == ["tag1", "tag2"]
+        assert p._retain_source == "hermes"
+        assert p._retain_user_prefix == "User (fakeusername)"
+        assert p._retain_assistant_prefix == "Assistant (fakeassistantname)"
         assert p._recall_tags == ["recall-tag"]
         assert p._recall_tags_match == "all"
         assert p._auto_retain is False
@@ -222,11 +268,20 @@ class TestToolHandlers:
         assert call_kwargs["content"] == "user likes dark mode"
 
     def test_retain_with_tags(self, provider_with_config):
-        p = provider_with_config(tags=["pref", "ui"])
+        p = provider_with_config(retain_tags=["pref", "ui"])
         p.handle_tool_call("hindsight_retain", {"content": "likes dark mode"})
         call_kwargs = p._client.aretain.call_args.kwargs
         assert call_kwargs["tags"] == ["pref", "ui"]
 
+    def test_retain_merges_per_call_tags_with_config_tags(self, provider_with_config):
+        p = provider_with_config(retain_tags=["pref", "ui"])
+        p.handle_tool_call(
+            "hindsight_retain",
+            {"content": "likes dark mode", "tags": ["client:x", "ui"]},
+        )
+        call_kwargs = p._client.aretain.call_args.kwargs
+        assert call_kwargs["tags"] == ["pref", "ui", "client:x"]
+
     def test_retain_without_tags(self, provider):
         provider.handle_tool_call("hindsight_retain", {"content": "hello"})
         call_kwargs = provider._client.aretain.call_args.kwargs
@@ -389,38 +444,58 @@ class TestPrefetch:
 
 
 class TestSyncTurn:
-    def _get_retain_kwargs(self, provider):
-        """Helper to get the kwargs from the aretain_batch call."""
-        return provider._client.aretain_batch.call_args.kwargs
+    def test_sync_turn_retains_metadata_rich_turn(self, provider_with_config):
+        p = provider_with_config(
+            retain_tags=["conv", "session1"],
+            retain_source="hermes",
+            retain_user_prefix="User (fakeusername)",
+            retain_assistant_prefix="Assistant (fakeassistantname)",
+        )
+        p.initialize(
+            session_id="session-1",
+            platform="discord",
+            user_id="fakeusername-123",
+            user_name="fakeusername",
+            chat_id="1485316232612941897",
+            chat_name="fakeassistantname-forums",
+            chat_type="thread",
+            thread_id="1491249007475949698",
+            agent_identity="fakeassistantname",
+        )
+        p._client = _make_mock_client()
 
-    def _get_retain_content(self, provider):
-        """Helper to get the raw content string from the first item."""
-        kwargs = self._get_retain_kwargs(provider)
-        return kwargs["items"][0]["content"]
+        p.sync_turn("hello", "hi there")
+        p._sync_thread.join(timeout=5.0)
 
-    def _get_retain_messages(self, provider):
-        """Helper to parse the first turn's messages from retained content.
-
-        Content is a JSON array of turns: [[msgs...], [msgs...], ...]
-        For single-turn tests, returns the first turn's messages.
-        """
-        content = self._get_retain_content(provider)
-        turns = json.loads(content)
-        return turns[0] if len(turns) == 1 else turns
-
-    def test_sync_turn_retains(self, provider):
-        provider.sync_turn("hello", "hi there")
-        if provider._sync_thread:
-            provider._sync_thread.join(timeout=5.0)
-        provider._client.aretain_batch.assert_called_once()
-        messages = self._get_retain_messages(provider)
-        assert len(messages) == 2
-        assert messages[0]["role"] == "user"
-        assert messages[0]["content"] == "hello"
-        assert "timestamp" in messages[0]
-        assert messages[1]["role"] == "assistant"
-        assert messages[1]["content"] == "hi there"
-        assert "timestamp" in messages[1]
+        p._client.aretain_batch.assert_called_once()
+        call_kwargs = p._client.aretain_batch.call_args.kwargs
+        assert call_kwargs["bank_id"] == "test-bank"
+        assert call_kwargs["document_id"] == "session-1"
+        assert call_kwargs["retain_async"] is True
+        assert len(call_kwargs["items"]) == 1
+        item = call_kwargs["items"][0]
+        assert item["context"] == "conversation between Hermes Agent and the User"
+        assert item["tags"] == ["conv", "session1"]
+        content = json.loads(item["content"])
+        assert len(content) == 1
+        assert content[0][0]["role"] == "user"
+        assert content[0][0]["content"] == "User (fakeusername): hello"
+        assert content[0][1]["role"] == "assistant"
+        assert content[0][1]["content"] == "Assistant (fakeassistantname): hi there"
+        assert item["metadata"]["source"] == "hermes"
+        assert item["metadata"]["session_id"] == "session-1"
+        assert item["metadata"]["platform"] == "discord"
+        assert item["metadata"]["user_id"] == "fakeusername-123"
+        assert item["metadata"]["user_name"] == "fakeusername"
+        assert item["metadata"]["chat_id"] == "1485316232612941897"
+        assert item["metadata"]["chat_name"] == "fakeassistantname-forums"
+        assert item["metadata"]["chat_type"] == "thread"
+        assert item["metadata"]["thread_id"] == "1491249007475949698"
+        assert item["metadata"]["agent_identity"] == "fakeassistantname"
+        assert item["metadata"]["turn_index"] == "1"
+        assert item["metadata"]["message_count"] == "2"
+        assert re.fullmatch(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?\+00:00", content[0][0]["timestamp"])
+        assert re.fullmatch(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z", item["metadata"]["retained_at"])
 
     def test_sync_turn_skipped_when_auto_retain_off(self, provider_with_config):
         p = provider_with_config(auto_retain=False)
@@ -428,93 +503,33 @@ class TestSyncTurn:
         assert p._sync_thread is None
         p._client.aretain_batch.assert_not_called()
 
-    def test_sync_turn_with_tags(self, provider_with_config):
-        p = provider_with_config(tags=["conv", "session1"])
-        p.sync_turn("hello", "hi")
-        if p._sync_thread:
-            p._sync_thread.join(timeout=5.0)
-        item = p._client.aretain_batch.call_args.kwargs["items"][0]
-        assert item["tags"] == ["conv", "session1"]
-
-    def test_sync_turn_uses_aretain_batch(self, provider):
-        """sync_turn should use aretain_batch with retain_async."""
-        provider.sync_turn("hello", "hi")
-        if provider._sync_thread:
-            provider._sync_thread.join(timeout=5.0)
-        provider._client.aretain_batch.assert_called_once()
-        call_kwargs = provider._client.aretain_batch.call_args.kwargs
-        assert call_kwargs["document_id"] == "test-session"
-        assert call_kwargs["retain_async"] is True
-        assert len(call_kwargs["items"]) == 1
-        assert call_kwargs["items"][0]["context"] == "conversation between Hermes Agent and the User"
-
-    def test_sync_turn_custom_context(self, provider_with_config):
-        p = provider_with_config(retain_context="my-agent")
-        p.sync_turn("hello", "hi")
-        if p._sync_thread:
-            p._sync_thread.join(timeout=5.0)
-        item = p._client.aretain_batch.call_args.kwargs["items"][0]
-        assert item["context"] == "my-agent"
-
     def test_sync_turn_every_n_turns(self, provider_with_config):
-        """With retain_every_n_turns=3, only retains on every 3rd turn."""
-        p = provider_with_config(retain_every_n_turns=3)
-
+        p = provider_with_config(retain_every_n_turns=3, retain_async=False)
         p.sync_turn("turn1-user", "turn1-asst")
-        assert p._sync_thread is None  # not retained yet
-
+        assert p._sync_thread is None
         p.sync_turn("turn2-user", "turn2-asst")
-        assert p._sync_thread is None  # not retained yet
-
+        assert p._sync_thread is None
         p.sync_turn("turn3-user", "turn3-asst")
-        assert p._sync_thread is not None  # retained!
         p._sync_thread.join(timeout=5.0)
-
         p._client.aretain_batch.assert_called_once()
-        content = p._client.aretain_batch.call_args.kwargs["items"][0]["content"]
-        # Should contain all 3 turns
-        assert "turn1-user" in content
-        assert "turn2-user" in content
-        assert "turn3-user" in content
-
-    def test_sync_turn_accumulates_full_session(self, provider_with_config):
-        """Each retain sends the ENTIRE session, not just the latest batch."""
-        p = provider_with_config(retain_every_n_turns=2)
-
-        p.sync_turn("turn1-user", "turn1-asst")
-        p.sync_turn("turn2-user", "turn2-asst")
-        if p._sync_thread:
-            p._sync_thread.join(timeout=5.0)
-
-        p._client.aretain_batch.reset_mock()
-
-        p.sync_turn("turn3-user", "turn3-asst")
-        p.sync_turn("turn4-user", "turn4-asst")
-        if p._sync_thread:
-            p._sync_thread.join(timeout=5.0)
-
-        content = p._client.aretain_batch.call_args.kwargs["items"][0]["content"]
-        # Should contain ALL turns from the session
-        assert "turn1-user" in content
-        assert "turn2-user" in content
-        assert "turn3-user" in content
-        assert "turn4-user" in content
-
-    def test_sync_turn_passes_document_id(self, provider):
-        """sync_turn should pass session_id as document_id for dedup."""
-        provider.sync_turn("hello", "hi")
-        if provider._sync_thread:
-            provider._sync_thread.join(timeout=5.0)
-        call_kwargs = provider._client.aretain_batch.call_args.kwargs
+        call_kwargs = p._client.aretain_batch.call_args.kwargs
         assert call_kwargs["document_id"] == "test-session"
+        assert call_kwargs["retain_async"] is False
+        item = call_kwargs["items"][0]
+        content = json.loads(item["content"])
+        assert len(content) == 3
+        assert content[-1][0]["role"] == "user"
+        assert content[-1][0]["content"] == "User: turn3-user"
+        assert content[-1][1]["role"] == "assistant"
+        assert content[-1][1]["content"] == "Assistant: turn3-asst"
+        assert item["metadata"]["turn_index"] == "3"
+        assert item["metadata"]["message_count"] == "6"
 
     def test_sync_turn_error_does_not_raise(self, provider):
-        """Errors in sync_turn should be swallowed (non-blocking)."""
         provider._client.aretain_batch.side_effect = RuntimeError("network error")
         provider.sync_turn("hello", "hi")
         if provider._sync_thread:
             provider._sync_thread.join(timeout=5.0)
-        # Should not raise
 
 
 # ---------------------------------------------------------------------------
@@ -555,10 +570,11 @@ class TestConfigSchema:
             "mode", "api_url", "api_key", "llm_provider", "llm_api_key",
             "llm_model", "bank_id", "bank_mission", "bank_retain_mission",
             "recall_budget", "memory_mode", "recall_prefetch_method",
-            "tags", "recall_tags", "recall_tags_match",
+            "retain_tags", "retain_source",
+            "retain_user_prefix", "retain_assistant_prefix",
+            "recall_tags", "recall_tags_match",
             "auto_recall", "auto_retain",
-            "retain_every_n_turns", "retain_async",
-            "retain_context",
+            "retain_every_n_turns", "retain_async", "retain_context",
             "recall_max_tokens", "recall_max_input_chars",
             "recall_prompt_preamble",
         }
diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py
index 7d5a16654..7a85022a5 100644
--- a/tests/run_agent/test_anthropic_prompt_cache_policy.py
+++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py
@@ -118,6 +118,86 @@ class TestOpenAIWireFormatOnCustomProvider:
         assert agent._anthropic_prompt_cache_policy() == (False, False)
 
 
+class TestQwenAlibabaFamily:
+    """Qwen on OpenCode/OpenCode-Go/Alibaba — needs cache_control even on OpenAI-wire.
+
+    Upstream pi-mono #3392 / #3393 documented that these providers serve
+    zero cache hits without Anthropic-style markers. Regression reported
+    by community user (Qwen3.6 on opencode-go burning through
+    subscription with no cache). Envelope layout, not native, because the
+    wire format is OpenAI chat.completions.
+    """
+
+    def test_qwen_on_opencode_go_caches_with_envelope_layout(self):
+        agent = _make_agent(
+            provider="opencode-go",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="qwen3.6-plus",
+        )
+        should, native = agent._anthropic_prompt_cache_policy()
+        assert should is True, "Qwen on opencode-go must cache"
+        assert native is False, "opencode-go is OpenAI-wire; envelope layout"
+
+    def test_qwen35_plus_on_opencode_go(self):
+        agent = _make_agent(
+            provider="opencode-go",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="qwen3.5-plus",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, False)
+
+    def test_qwen_on_opencode_zen_caches(self):
+        agent = _make_agent(
+            provider="opencode",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="qwen3-coder-plus",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, False)
+
+    def test_qwen_on_direct_alibaba_caches(self):
+        agent = _make_agent(
+            provider="alibaba",
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+            api_mode="chat_completions",
+            model="qwen3-coder",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, False)
+
+    def test_non_qwen_on_opencode_go_does_not_cache(self):
+        # GLM / Kimi on opencode-go don't need markers (they have automatic
+        # server-side caching or none at all).
+        agent = _make_agent(
+            provider="opencode-go",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="glm-5",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+    def test_kimi_on_opencode_go_does_not_cache(self):
+        agent = _make_agent(
+            provider="opencode-go",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="kimi-k2.5",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+    def test_qwen_on_openrouter_not_affected(self):
+        # Qwen via OpenRouter falls through — OpenRouter has its own
+        # upstream caching arrangement for Qwen (provider-dependent).
+        agent = _make_agent(
+            provider="openrouter",
+            base_url="https://openrouter.ai/api/v1",
+            api_mode="chat_completions",
+            model="qwen/qwen3-coder",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+
 class TestExplicitOverrides:
     """Policy accepts keyword overrides for switch_model / fallback activation."""
 
diff --git a/tests/run_agent/test_create_openai_client_proxy_env.py b/tests/run_agent/test_create_openai_client_proxy_env.py
index 7ac9b7e16..9ef8e3dcd 100644
--- a/tests/run_agent/test_create_openai_client_proxy_env.py
+++ b/tests/run_agent/test_create_openai_client_proxy_env.py
@@ -67,6 +67,14 @@ def test_get_proxy_from_env_ignores_blank_values(monkeypatch):
     assert _get_proxy_from_env() == "http://real-proxy:8080"
 
 
+def test_get_proxy_from_env_normalizes_socks_alias(monkeypatch):
+    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                "https_proxy", "http_proxy", "all_proxy"):
+        monkeypatch.delenv(key, raising=False)
+    monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
+    assert _get_proxy_from_env() == "socks5://127.0.0.1:1080/"
+
+
 @patch("run_agent.OpenAI")
 def test_create_openai_client_routes_via_proxy_when_env_set(mock_openai, monkeypatch):
     """With HTTPS_PROXY set, the custom httpx.Client must mount an HTTPProxy pool.
diff --git a/tests/run_agent/test_provider_fallback.py b/tests/run_agent/test_provider_fallback.py
index e441bfd33..88982437e 100644
--- a/tests/run_agent/test_provider_fallback.py
+++ b/tests/run_agent/test_provider_fallback.py
@@ -155,3 +155,29 @@ class TestFallbackChainAdvancement:
             ]
             assert agent._try_activate_fallback() is True
             assert agent.model == "gpt-4o"
+
+    def test_resolves_key_env_for_fallback_provider(self):
+        fbs = [
+            {
+                "provider": "custom",
+                "model": "fallback-model",
+                "base_url": "https://fallback.example/v1",
+                "key_env": "MY_FALLBACK_KEY",
+            }
+        ]
+        agent = _make_agent(fallback_model=fbs)
+        with (
+            patch.dict("os.environ", {"MY_FALLBACK_KEY": "env-secret"}, clear=False),
+            patch(
+                "agent.auxiliary_client.resolve_provider_client",
+                return_value=(
+                    _mock_client(
+                        base_url="https://fallback.example/v1",
+                        api_key="env-secret",
+                    ),
+                    "fallback-model",
+                ),
+            ) as mock_rpc,
+        ):
+            assert agent._try_activate_fallback() is True
+            assert mock_rpc.call_args.kwargs["explicit_api_key"] == "env-secret"
diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
index 3df51b853..f96dbf421 100644
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@@ -12,6 +12,7 @@ from types import SimpleNamespace
 from unittest.mock import patch, MagicMock
 
 import pytest
+from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
 
 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@@ -446,7 +447,7 @@ class TestChatMessagesToResponsesInput:
         agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                             base_url="https://chatgpt.com/backend-api/codex")
         messages = [{"role": "user", "content": "hello"}]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         assert items == [{"role": "user", "content": "hello"}]
 
     def test_system_messages_filtered(self, monkeypatch):
@@ -456,7 +457,7 @@ class TestChatMessagesToResponsesInput:
             {"role": "system", "content": "be helpful"},
             {"role": "user", "content": "hello"},
         ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         assert len(items) == 1
         assert items[0]["role"] == "user"
 
@@ -472,7 +473,7 @@ class TestChatMessagesToResponsesInput:
                 "function": {"name": "web_search", "arguments": '{"query": "test"}'},
             }],
         }]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         fc_items = [i for i in items if i.get("type") == "function_call"]
         assert len(fc_items) == 1
         assert fc_items[0]["name"] == "web_search"
@@ -482,7 +483,7 @@ class TestChatMessagesToResponsesInput:
         agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                             base_url="https://chatgpt.com/backend-api/codex")
         messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         assert items[0]["type"] == "function_call_output"
         assert items[0]["call_id"] == "call_abc"
         assert items[0]["output"] == "result here"
@@ -502,7 +503,7 @@ class TestChatMessagesToResponsesInput:
             },
             {"role": "user", "content": "continue"},
         ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         reasoning_items = [i for i in items if i.get("type") == "reasoning"]
         assert len(reasoning_items) == 1
         assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
@@ -515,7 +516,7 @@ class TestChatMessagesToResponsesInput:
             {"role": "assistant", "content": "hi"},
             {"role": "user", "content": "hello"},
         ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         reasoning_items = [i for i in items if i.get("type") == "reasoning"]
         assert len(reasoning_items) == 0
 
@@ -539,7 +540,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert msg.content == "Hello!"
         assert reason == "stop"
 
@@ -557,7 +558,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert msg.content == "42"
         assert "math" in msg.reasoning
         assert reason == "stop"
@@ -576,7 +577,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert msg.codex_reasoning_items is not None
         assert len(msg.codex_reasoning_items) == 1
         assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
@@ -592,7 +593,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert msg.codex_reasoning_items is None
 
     def test_tool_calls_extracted(self, monkeypatch):
@@ -605,7 +606,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert reason == "tool_calls"
         assert len(msg.tool_calls) == 1
         assert msg.tool_calls[0].function.name == "web_search"
@@ -821,7 +822,7 @@ class TestCodexReasoningPreflight:
              "summary": [{"type": "summary_text", "text": "Thinking about it"}]},
             {"role": "assistant", "content": "hi there"},
         ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
         reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
         assert len(reasoning_items) == 1
         assert reasoning_items[0]["encrypted_content"] == "abc123encrypted"
@@ -837,7 +838,7 @@ class TestCodexReasoningPreflight:
         raw_input = [
             {"type": "reasoning", "encrypted_content": "abc123"},
         ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
         assert len(normalized) == 1
         assert "id" not in normalized[0]
         assert normalized[0]["summary"] == []  # default empty summary
@@ -849,7 +850,7 @@ class TestCodexReasoningPreflight:
             {"type": "reasoning", "encrypted_content": ""},
             {"role": "user", "content": "hello"},
         ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
         reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
         assert len(reasoning_items) == 0
 
@@ -868,7 +869,7 @@ class TestCodexReasoningPreflight:
             },
             {"role": "user", "content": "follow up"},
         ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         reasoning_items = [i for i in items if isinstance(i, dict) and i.get("type") == "reasoning"]
         assert len(reasoning_items) == 1
         assert reasoning_items[0]["encrypted_content"] == "enc123"
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 9f3341101..991ca07d2 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -16,6 +16,7 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
+from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
 
 import run_agent
 from run_agent import AIAgent
@@ -371,6 +372,91 @@ class TestStripThinkBlocks:
         assert "mixed" not in result
         assert "final" in result
 
+    # ─── Tool-call XML block stripping (openclaw/openclaw#67318) ─────────
+    # Some open models (notably Gemma variants via OpenRouter) emit
+    # standalone tool-call XML inside assistant content instead of via the
+    # structured `tool_calls` field. Left unstripped, raw XML leaks to
+    # gateway users (Discord/Telegram/Matrix) and the CLI.
+
+    def test_tool_call_block_stripped(self, agent):
+        text = '<tool_call>{"name": "read_file", "arguments": {"path": "/tmp/x"}}</tool_call> done'
+        result = agent._strip_think_blocks(text)
+        assert "<tool_call>" not in result
+        assert "read_file" not in result
+        assert "done" in result
+
+    def test_function_calls_block_stripped(self, agent):
+        text = '<function_calls>[{"name":"x"}]</function_calls>after'
+        result = agent._strip_think_blocks(text)
+        assert "<function_calls>" not in result
+        assert "after" in result
+
+    def test_gemma_function_name_block_stripped(self, agent):
+        """Gemma-style: <function name="read"><parameter>...</parameter></function>."""
+        text = (
+            'Let me check the file.\n'
+            '<function name="read_file"><parameter name="path">/tmp/x.md</parameter></function>\n'
+            'Here is the result.'
+        )
+        result = agent._strip_think_blocks(text)
+        assert '<function name="read_file">' not in result
+        assert "/tmp/x.md" not in result
+        assert "Let me check the file." in result
+        assert "Here is the result." in result
+
+    def test_gemma_function_multiline_payload_stripped(self, agent):
+        text = (
+            'Reading now.\n'
+            '<function name="read_file">\n'
+            '  <parameter name="path">/etc/passwd</parameter>\n'
+            '</function>\n'
+            'Done.'
+        )
+        result = agent._strip_think_blocks(text)
+        assert "/etc/passwd" not in result
+        assert "Reading now." in result
+        assert "Done." in result
+
+    def test_function_mention_in_prose_preserved(self, agent):
+        """'Use <function> in JavaScript.' — no name attr, not at block boundary
+        in a way that suggests tool call. Must survive."""
+        text = "In JS you can use <function> declarations for hoisting."
+        result = agent._strip_think_blocks(text)
+        # Prose mention has no name="..." attribute -> not stripped
+        assert "declarations for hoisting" in result
+
+    def test_function_with_attr_in_middle_of_sentence_preserved(self, agent):
+        """Docs example: 'Use <function name="x">...</function> in docs.'
+        The sentence-middle position without a preceding punctuation block
+        boundary means it is NOT stripped. Prose context remains."""
+        text = 'You can write <function name="x">y</function> inline.'
+        result = agent._strip_think_blocks(text)
+        # Without a leading block boundary (no punctuation before), leaves intact
+        assert "You can write" in result
+        assert "inline" in result
+
+    def test_stray_function_close_tag_removed(self, agent):
+        text = "answer</function> trailing"
+        result = agent._strip_think_blocks(text)
+        assert "</function>" not in result
+        assert "answer" in result
+        assert "trailing" in result
+
+    def test_dangling_function_open_tag_preserved(self, agent):
+        """A streamed-but-truncated <function name="..."> block with no close
+        is intentionally NOT stripped (OpenClaw's asymmetry). The tail of a
+        streaming reply may still be valuable to the user."""
+        text = 'Checking: <function name="read">'
+        result = agent._strip_think_blocks(text)
+        assert "Checking:" in result
+
+    def test_mixed_reasoning_and_tool_call_both_stripped(self, agent):
+        text = '<think>let me plan</think><tool_call>{"name":"x"}</tool_call>final answer'
+        result = agent._strip_think_blocks(text)
+        assert "let me plan" not in result
+        assert "<tool_call>" not in result
+        assert "final answer" in result
+
 
 class TestExtractReasoning:
     def test_reasoning_field(self, agent):
@@ -952,6 +1038,84 @@ class TestBuildApiKwargs:
 
         assert "temperature" not in kwargs
 
+    def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent):
+        """Kimi endpoint should send max_tokens=32000 and reasoning_effort as
+        top-level params, matching Kimi CLI's default behavior."""
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-for-coding"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["max_tokens"] == 32000
+        assert kwargs["reasoning_effort"] == "medium"
+
+    def test_kimi_coding_endpoint_respects_custom_effort(self, agent):
+        """reasoning_effort should reflect reasoning_config.effort when set."""
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-for-coding"
+        agent.reasoning_config = {"enabled": True, "effort": "high"}
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["reasoning_effort"] == "high"
+
+    def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent):
+        """Kimi endpoint should send extra_body.thinking={"type":"enabled"}
+        to activate reasoning mode, mirroring Kimi CLI's with_thinking()."""
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-for-coding"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
+
+    def test_kimi_coding_endpoint_disables_thinking(self, agent):
+        """When reasoning_config.enabled=False, thinking should be disabled
+        and reasoning_effort should be omitted entirely — mirroring Kimi
+        CLI's with_thinking("off") which maps to reasoning_effort=None."""
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-for-coding"
+        agent.reasoning_config = {"enabled": False}
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["extra_body"]["thinking"] == {"type": "disabled"}
+        assert "reasoning_effort" not in kwargs
+
+    def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent):
+        """api.moonshot.ai should get the same Kimi-compatible params."""
+        agent.base_url = "https://api.moonshot.ai/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-k2.5"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["max_tokens"] == 32000
+        assert kwargs["reasoning_effort"] == "medium"
+        assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
+
+    def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent):
+        """api.moonshot.cn (China endpoint) should get the same params."""
+        agent.base_url = "https://api.moonshot.cn/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-k2.5"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["max_tokens"] == 32000
+        assert kwargs["reasoning_effort"] == "medium"
+        assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
+
     def test_provider_preferences_injected(self, agent):
         agent.base_url = "https://openrouter.ai/api/v1"
         agent.providers_allowed = ["Anthropic"]
@@ -1137,6 +1301,15 @@ class TestBuildAssistantMessage:
         result = agent._build_assistant_message(msg, "stop")
         assert result["reasoning"] == "thinking"
 
+    def test_reasoning_content_preserved_separately(self, agent):
+        msg = _mock_assistant_msg(
+            content="answer",
+            reasoning="summary",
+            reasoning_content="provider scratchpad",
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["reasoning_content"] == "provider scratchpad"
+
     def test_with_tool_calls(self, agent):
         tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1")
         msg = _mock_assistant_msg(content="", tool_calls=[tc])
@@ -4109,6 +4282,90 @@ class TestPersistUserMessageOverride:
         assert first_db_write["content"] == "Hello there"
 
 
+class TestReasoningReplayForStrictProviders:
+    """Assistant replay must preserve provider-native reasoning fields."""
+
+    def _setup_agent(self, agent):
+        agent._cached_system_prompt = "You are helpful."
+        agent._use_prompt_caching = False
+        agent.tool_delay = 0
+        agent.compression_enabled = False
+        agent.save_trajectories = False
+
+    def test_kimi_tool_replay_includes_empty_reasoning_content(self, agent):
+        self._setup_agent(agent)
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.provider = "kimi-coding"
+
+        prior_assistant = {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "c1",
+                    "type": "function",
+                    "function": {"name": "terminal", "arguments": "{\"command\":\"date\"}"},
+                }
+            ],
+        }
+        tool_result = {"role": "tool", "tool_call_id": "c1", "content": "Tue Apr 21"}
+        final_resp = _mock_response(content="done", finish_reason="stop")
+        agent.client.chat.completions.create.return_value = final_resp
+
+        with (
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation(
+                "next step",
+                conversation_history=[prior_assistant, tool_result],
+            )
+
+        assert result["completed"] is True
+        sent_messages = agent.client.chat.completions.create.call_args.kwargs["messages"]
+        replayed_assistant = next(msg for msg in sent_messages if msg.get("role") == "assistant")
+        assert replayed_assistant["role"] == "assistant"
+        assert replayed_assistant["tool_calls"][0]["function"]["name"] == "terminal"
+        assert "reasoning_content" in replayed_assistant
+        assert replayed_assistant["reasoning_content"] == ""
+
+    def test_explicit_reasoning_content_beats_normalized_reasoning_on_replay(self, agent):
+        self._setup_agent(agent)
+        prior_assistant = {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "c1",
+                    "type": "function",
+                    "function": {"name": "web_search", "arguments": "{\"q\":\"test\"}"},
+                }
+            ],
+            "reasoning": "summary reasoning",
+            "reasoning_content": "provider-native scratchpad",
+        }
+        tool_result = {"role": "tool", "tool_call_id": "c1", "content": "ok"}
+        final_resp = _mock_response(content="done", finish_reason="stop")
+        agent.client.chat.completions.create.return_value = final_resp
+
+        with (
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation(
+                "next step",
+                conversation_history=[prior_assistant, tool_result],
+            )
+
+        assert result["completed"] is True
+        sent_messages = agent.client.chat.completions.create.call_args.kwargs["messages"]
+        replayed_assistant = next(msg for msg in sent_messages if msg.get("role") == "assistant")
+        assert replayed_assistant["reasoning_content"] == "provider-native scratchpad"
+
+
 # ---------------------------------------------------------------------------
 # Bugfix: _vprint force=True on error messages during TTS
 # ---------------------------------------------------------------------------
@@ -4170,7 +4427,7 @@ class TestNormalizeCodexDictArguments:
         json.dumps, not str(), so downstream json.loads() succeeds."""
         args_dict = {"query": "weather in NYC", "units": "celsius"}
         response = self._make_codex_response("function_call", args_dict)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
         tc = msg.tool_calls[0]
         parsed = json.loads(tc.function.arguments)
         assert parsed == args_dict
@@ -4179,7 +4436,7 @@ class TestNormalizeCodexDictArguments:
         """dict arguments from custom_tool_call must also use json.dumps."""
         args_dict = {"path": "/tmp/test.txt", "content": "hello"}
         response = self._make_codex_response("custom_tool_call", args_dict)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
         tc = msg.tool_calls[0]
         parsed = json.loads(tc.function.arguments)
         assert parsed == args_dict
@@ -4188,7 +4445,7 @@ class TestNormalizeCodexDictArguments:
         """String arguments must pass through without modification."""
         args_str = '{"query": "test"}'
         response = self._make_codex_response("function_call", args_str)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
         tc = msg.tool_calls[0]
         assert tc.function.arguments == args_str
 
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 81213aaf6..16ab3f02d 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -640,7 +640,8 @@ def test_run_conversation_codex_tool_round_trip(monkeypatch):
 
 def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
     agent = _build_agent(monkeypatch)
-    items = agent._chat_messages_to_responses_input(
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(
         [
             {"role": "user", "content": "Run terminal"},
             {
@@ -668,7 +669,8 @@ def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeyp
 
 def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
     agent = _build_agent(monkeypatch)
-    items = agent._chat_messages_to_responses_input(
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(
         [
             {"role": "user", "content": "Run terminal"},
             {
@@ -696,7 +698,8 @@ def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
 
 def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
     agent = _build_agent(monkeypatch)
-    preflight = agent._preflight_codex_api_kwargs(
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+    preflight = _preflight_codex_api_kwargs(
         {
             "model": "gpt-5-codex",
             "instructions": "You are Hermes.",
@@ -724,7 +727,8 @@ def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id
     agent = _build_agent(monkeypatch)
 
     with pytest.raises(ValueError, match="function_call_output is missing call_id"):
-        agent._preflight_codex_api_kwargs(
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        _preflight_codex_api_kwargs(
             {
                 "model": "gpt-5-codex",
                 "instructions": "You are Hermes.",
@@ -741,7 +745,8 @@ def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypat
     kwargs["some_unknown_field"] = "value"
 
     with pytest.raises(ValueError, match="unsupported field"):
-        agent._preflight_codex_api_kwargs(kwargs)
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        _preflight_codex_api_kwargs(kwargs)
 
 
 def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
@@ -752,7 +757,8 @@ def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch
     kwargs["temperature"] = 0.7
     kwargs["max_output_tokens"] = 4096
 
-    result = agent._preflight_codex_api_kwargs(kwargs)
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+    result = _preflight_codex_api_kwargs(kwargs)
     assert result["reasoning"] == {"effort": "high", "summary": "auto"}
     assert result["include"] == ["reasoning.encrypted_content"]
     assert result["temperature"] == 0.7
@@ -764,7 +770,8 @@ def test_preflight_codex_api_kwargs_allows_service_tier(monkeypatch):
     kwargs = _codex_request_kwargs()
     kwargs["service_tier"] = "priority"
 
-    result = agent._preflight_codex_api_kwargs(kwargs)
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+    result = _preflight_codex_api_kwargs(kwargs)
     assert result["service_tier"] == "priority"
 
 
@@ -841,7 +848,8 @@ def test_run_conversation_codex_continues_after_incomplete_interim_message(monke
 
 def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
     agent = _build_agent(monkeypatch)
-    assistant_message, finish_reason = agent._normalize_codex_response(
+    from agent.codex_responses_adapter import _normalize_codex_response
+    assistant_message, finish_reason = _normalize_codex_response(
         _codex_commentary_message_response("I'll inspect the repository first.")
     )
 
@@ -1068,7 +1076,8 @@ def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch
     sends them into the empty-content retry loop (3 retries then failure).
     """
     agent = _build_agent(monkeypatch)
-    assistant_message, finish_reason = agent._normalize_codex_response(
+    from agent.codex_responses_adapter import _normalize_codex_response
+    assistant_message, finish_reason = _normalize_codex_response(
         _codex_reasoning_only_response()
     )
 
@@ -1101,7 +1110,8 @@ def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch):
         status="completed",
         model="gpt-5-codex",
     )
-    assistant_message, finish_reason = agent._normalize_codex_response(response)
+    from agent.codex_responses_adapter import _normalize_codex_response
+    assistant_message, finish_reason = _normalize_codex_response(response)
 
     assert finish_reason == "stop"
     assert "Here is the answer" in assistant_message.content
@@ -1186,7 +1196,8 @@ def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monk
             ],
         },
     ]
-    items = agent._chat_messages_to_responses_input(messages)
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(messages)
 
     # Find the reasoning item
     reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"]
@@ -1273,7 +1284,8 @@ def test_chat_messages_to_responses_input_deduplicates_reasoning_ids(monkeypatch
             ],
         },
     ]
-    items = agent._chat_messages_to_responses_input(messages)
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(messages)
 
     reasoning_items = [it for it in items if it.get("type") == "reasoning"]
     # Dedup: rs_aaa appears in both turns but should only be emitted once.
@@ -1299,7 +1311,8 @@ def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
         {"type": "reasoning", "id": "rs_zzz", "encrypted_content": "enc_b"},
         {"role": "assistant", "content": "done"},
     ]
-    normalized = agent._preflight_codex_input_items(raw_input)
+    from agent.codex_responses_adapter import _preflight_codex_input_items
+    normalized = _preflight_codex_input_items(raw_input)
 
     reasoning_items = [it for it in normalized if it.get("type") == "reasoning"]
     # rs_xyz duplicate should be collapsed to one item; rs_zzz kept.
diff --git a/tests/run_agent/test_run_agent_multimodal_prologue.py b/tests/run_agent/test_run_agent_multimodal_prologue.py
index 1d470d060..88f91701f 100644
--- a/tests/run_agent/test_run_agent_multimodal_prologue.py
+++ b/tests/run_agent/test_run_agent_multimodal_prologue.py
@@ -13,7 +13,8 @@ They do NOT boot the full AIAgent — the prologue-fix guarantees are pure
 function contracts at module scope.
 """
 
-from run_agent import _chat_content_to_responses_parts, _summarize_user_message_for_log
+from run_agent import _summarize_user_message_for_log
+from agent.codex_responses_adapter import _chat_content_to_responses_parts
 
 
 class TestSummarizeUserMessageForLog:
diff --git a/tests/run_agent/test_streaming.py b/tests/run_agent/test_streaming.py
index ff99264c7..22eab8114 100644
--- a/tests/run_agent/test_streaming.py
+++ b/tests/run_agent/test_streaming.py
@@ -1133,3 +1133,225 @@ class TestPartialToolCallWarning:
             f"Unexpected warning on text-only partial stream: {content!r}"
         )
 
+
+class TestSilentRetryMidToolCall:
+    """Regression: when the stream dies mid tool-call JSON after text was
+    already delivered, we previously stubbed the turn with a "retry manually"
+    warning.  Now: if the error is a transient connection error AND a tool
+    call was in flight, silently retry the stream (the user sees a brief
+    reconnect marker + duplicated preamble, which is strictly better than
+    a lost action).  If no tool call was in flight, or the error isn't
+    transient, the existing stub-with-warning behaviour is preserved.
+    """
+
+    @patch("run_agent.AIAgent._replace_primary_openai_client")
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_silent_retry_recovers_tool_call(
+        self, mock_close, mock_create, mock_replace,
+    ):
+        """First attempt: text + partial tool-call + connection drop.
+        Second attempt: text + complete tool-call.  Response should contain
+        the recovered tool call; no warning stub should be returned."""
+        from run_agent import AIAgent
+        import httpx as _httpx
+
+        attempts = {"n": 0}
+
+        def _first_stream():
+            yield _make_stream_chunk(content="Let me write the audit: ")
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_1", name="write_file"),
+            ])
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, arguments='{"path": "/tmp/x", '),
+            ])
+            raise _httpx.RemoteProtocolError("peer closed connection")
+
+        def _second_stream():
+            yield _make_stream_chunk(content="Let me write the audit: ")
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_1", name="write_file"),
+            ])
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(
+                    index=0, arguments='{"path": "/tmp/x", "content": "hi"}',
+                ),
+            ])
+            yield _make_stream_chunk(finish_reason="tool_calls")
+
+        def _pick_stream(*a, **kw):
+            attempts["n"] += 1
+            return _first_stream() if attempts["n"] == 1 else _second_stream()
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = _pick_stream
+        mock_create.return_value = mock_client
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+
+        fired_deltas: list = []
+        agent._fire_stream_delta = lambda text: fired_deltas.append(text)
+
+        import os as _os
+        _prev = _os.environ.get("HERMES_STREAM_RETRIES")
+        _os.environ["HERMES_STREAM_RETRIES"] = "2"
+        try:
+            response = agent._interruptible_streaming_api_call({})
+        finally:
+            if _prev is None:
+                _os.environ.pop("HERMES_STREAM_RETRIES", None)
+            else:
+                _os.environ["HERMES_STREAM_RETRIES"] = _prev
+
+        assert attempts["n"] == 2, (
+            f"Expected silent retry (2 attempts), got {attempts['n']}"
+        )
+        # Response should carry the recovered tool call, not a warning stub.
+        msg = response.choices[0].message
+        tool_calls = getattr(msg, "tool_calls", None)
+        assert tool_calls, (
+            f"Silent retry should recover the tool call, got tool_calls={tool_calls!r} "
+            f"content={getattr(msg, 'content', None)!r}"
+        )
+        _tc0 = tool_calls[0]
+        _name = (
+            _tc0["function"]["name"] if isinstance(_tc0, dict)
+            else _tc0.function.name
+        )
+        assert _name == "write_file"
+        # User saw a reconnect marker between attempts.
+        assert any("reconnecting" in d.lower() for d in fired_deltas), (
+            f"Expected a reconnect marker delta, fired_deltas={fired_deltas}"
+        )
+        # Stub-path warning must NOT appear (this was the whole point).
+        joined = "".join(fired_deltas)
+        assert "Stream stalled" not in joined, (
+            f"Stub-path warning leaked into silent-retry path: {joined!r}"
+        )
+
+    @patch("run_agent.AIAgent._replace_primary_openai_client")
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_silent_retry_exhausted_falls_back_to_stub(
+        self, mock_close, mock_create, mock_replace,
+    ):
+        """When all retry attempts fail with connection errors, fall back
+        to the original stub-with-warning behaviour so the user isn't left
+        with zero signal."""
+        from run_agent import AIAgent
+        import httpx as _httpx
+
+        def _always_fails():
+            yield _make_stream_chunk(content="Let me write the audit: ")
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_1", name="write_file"),
+            ])
+            raise _httpx.RemoteProtocolError("peer closed connection")
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = lambda *a, **kw: _always_fails()
+        mock_create.return_value = mock_client
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+
+        fired_deltas: list = []
+        agent._fire_stream_delta = lambda text: fired_deltas.append(text)
+
+        import os as _os
+        _prev = _os.environ.get("HERMES_STREAM_RETRIES")
+        _os.environ["HERMES_STREAM_RETRIES"] = "1"
+        try:
+            response = agent._interruptible_streaming_api_call({})
+        finally:
+            if _prev is None:
+                _os.environ.pop("HERMES_STREAM_RETRIES", None)
+            else:
+                _os.environ["HERMES_STREAM_RETRIES"] = _prev
+
+        # After retries exhaust, the stub-with-warning path must engage.
+        content = response.choices[0].message.content or ""
+        assert "Stream stalled mid tool-call" in content, (
+            f"Exhausted-retry fallback dropped the user-visible warning: {content!r}"
+        )
+        assert response.choices[0].message.tool_calls is None
+
+    @patch("run_agent.AIAgent._replace_primary_openai_client")
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_no_silent_retry_for_text_only_stall(
+        self, mock_close, mock_create, mock_replace,
+    ):
+        """Text-only stall (no tool call in flight) must NOT trigger silent
+        retry — that's the case where the user saw the model's text reply
+        and retrying would duplicate it with no benefit."""
+        from run_agent import AIAgent
+        import httpx as _httpx
+
+        attempts = {"n": 0}
+
+        def _text_stall(*a, **kw):
+            attempts["n"] += 1
+
+            def _gen():
+                yield _make_stream_chunk(content="Here's my answer so far")
+                raise _httpx.RemoteProtocolError("peer closed connection")
+            return _gen()
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = _text_stall
+        mock_create.return_value = mock_client
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+        agent._current_streamed_assistant_text = "Here's my answer so far"
+
+        import os as _os
+        _prev = _os.environ.get("HERMES_STREAM_RETRIES")
+        _os.environ["HERMES_STREAM_RETRIES"] = "2"
+        try:
+            response = agent._interruptible_streaming_api_call({})
+        finally:
+            if _prev is None:
+                _os.environ.pop("HERMES_STREAM_RETRIES", None)
+            else:
+                _os.environ["HERMES_STREAM_RETRIES"] = _prev
+
+        # Only one attempt: text-only stall short-circuits retry.
+        assert attempts["n"] == 1, (
+            f"Text-only stall should not silent-retry, got {attempts['n']} attempts"
+        )
+        content = response.choices[0].message.content or ""
+        assert content == "Here's my answer so far", (
+            f"Text-only stall regressed: {content!r}"
+        )
+        assert "Stream stalled" not in content, (
+            f"Text-only stall should not emit tool-call warning: {content!r}"
+        )
+
diff --git a/tests/run_agent/test_strip_reasoning_tags_cli.py b/tests/run_agent/test_strip_reasoning_tags_cli.py
new file mode 100644
index 000000000..7eb15daf4
--- /dev/null
+++ b/tests/run_agent/test_strip_reasoning_tags_cli.py
@@ -0,0 +1,69 @@
+"""Tests for cli.py::_strip_reasoning_tags — specifically the tool-call
+XML stripping added in openclaw/openclaw#67318 port.
+
+The CLI has its own copy of the stripper because it needs to run on the
+final displayed assistant text (after streaming) without depending on the
+AIAgent instance. It must stay in sync with run_agent.py::_strip_think_blocks
+for tool-call tag coverage."""
+
+import pytest
+
+from cli import _strip_reasoning_tags
+
+
+class TestToolCallStripping:
+    def test_tool_call_block_stripped(self):
+        text = '<tool_call>{"name": "x"}</tool_call>result'
+        result = _strip_reasoning_tags(text)
+        assert "<tool_call>" not in result
+        assert "result" in result
+
+    def test_function_calls_block_stripped(self):
+        text = '<function_calls>[{}]</function_calls>\nanswer'
+        result = _strip_reasoning_tags(text)
+        assert "<function_calls>" not in result
+        assert "answer" in result
+
+    def test_gemma_function_name_block_stripped(self):
+        text = (
+            'Reading.\n'
+            '<function name="r"><parameter name="p">/tmp/x</parameter></function>\n'
+            'Done.'
+        )
+        result = _strip_reasoning_tags(text)
+        assert '<function name="r">' not in result
+        assert "/tmp/x" not in result
+        assert "Reading." in result
+        assert "Done." in result
+
+    def test_prose_mention_of_function_preserved(self):
+        text = "Use <function> declarations in JavaScript."
+        result = _strip_reasoning_tags(text)
+        assert "JavaScript" in result
+
+    def test_reasoning_still_stripped(self):
+        """Regression: make sure existing think-tag stripping still works."""
+        text = "<think>reasoning</think> answer"
+        result = _strip_reasoning_tags(text)
+        assert "reasoning" not in result
+        assert "answer" in result
+
+    def test_mixed_reasoning_and_tool_call(self):
+        text = '<think>plan</think><tool_call>{"x":1}</tool_call>final'
+        result = _strip_reasoning_tags(text)
+        assert "plan" not in result
+        assert "<tool_call>" not in result
+        assert "final" in result
+
+    def test_stray_function_close(self):
+        text = "visible</function> tail"
+        result = _strip_reasoning_tags(text)
+        assert "</function>" not in result
+        assert "visible" in result
+        assert "tail" in result
+
+    def test_empty_string(self):
+        assert _strip_reasoning_tags("") == ""
+
+    def test_plain_text_unchanged(self):
+        assert _strip_reasoning_tags("just text") == "just text"
diff --git a/tests/run_agent/test_switch_model_fallback_prune.py b/tests/run_agent/test_switch_model_fallback_prune.py
new file mode 100644
index 000000000..99af3579f
--- /dev/null
+++ b/tests/run_agent/test_switch_model_fallback_prune.py
@@ -0,0 +1,93 @@
+"""Regression test for TUI v2 blitz bug: explicit /model --provider switch
+silently fell back to the old primary provider on the next turn because the
+fallback chain — seeded from config at agent __init__ — kept entries for the
+provider the user just moved away from.
+
+Reported: "switched from openrouter provider to anthropic api key via hermes
+model and the tui keeps trying openrouter".
+"""
+
+from unittest.mock import MagicMock, patch
+
+from run_agent import AIAgent
+
+
+def _make_agent(chain):
+    agent = AIAgent.__new__(AIAgent)
+
+    agent.provider = "openrouter"
+    agent.model = "x-ai/grok-4"
+    agent.base_url = "https://openrouter.ai/api/v1"
+    agent.api_key = "or-key"
+    agent.api_mode = "chat_completions"
+    agent.client = MagicMock()
+    agent._client_kwargs = {"api_key": "or-key", "base_url": "https://openrouter.ai/api/v1"}
+    agent.context_compressor = None
+    agent._anthropic_api_key = ""
+    agent._anthropic_base_url = None
+    agent._anthropic_client = None
+    agent._is_anthropic_oauth = False
+    agent._cached_system_prompt = "cached"
+    agent._primary_runtime = {}
+    agent._fallback_activated = False
+    agent._fallback_index = 0
+    agent._fallback_chain = list(chain)
+    agent._fallback_model = chain[0] if chain else None
+
+    return agent
+
+
+def _switch_to_anthropic(agent):
+    with (
+        patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+        patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-xyz"),
+        patch("agent.anthropic_adapter._is_oauth_token", return_value=False),
+        patch("hermes_cli.timeouts.get_provider_request_timeout", return_value=None),
+    ):
+        agent.switch_model(
+            new_model="claude-sonnet-4-5",
+            new_provider="anthropic",
+            api_key="sk-ant-xyz",
+            base_url="https://api.anthropic.com",
+            api_mode="anthropic_messages",
+        )
+
+
+def test_switch_drops_old_primary_from_fallback_chain():
+    agent = _make_agent([
+        {"provider": "openrouter", "model": "x-ai/grok-4"},
+        {"provider": "nous", "model": "hermes-4"},
+    ])
+
+    _switch_to_anthropic(agent)
+
+    providers = [entry["provider"] for entry in agent._fallback_chain]
+
+    assert "openrouter" not in providers, "old primary must be pruned"
+    assert "anthropic" not in providers, "new primary is redundant in the chain"
+    assert providers == ["nous"]
+    assert agent._fallback_model == {"provider": "nous", "model": "hermes-4"}
+
+
+def test_switch_with_empty_chain_stays_empty():
+    agent = _make_agent([])
+
+    _switch_to_anthropic(agent)
+
+    assert agent._fallback_chain == []
+    assert agent._fallback_model is None
+
+
+def test_switch_within_same_provider_preserves_chain():
+    chain = [{"provider": "openrouter", "model": "x-ai/grok-4"}]
+    agent = _make_agent(chain)
+
+    with patch("hermes_cli.timeouts.get_provider_request_timeout", return_value=None):
+        agent.switch_model(
+            new_model="openai/gpt-5",
+            new_provider="openrouter",
+            api_key="or-key",
+            base_url="https://openrouter.ai/api/v1",
+        )
+
+    assert agent._fallback_chain == chain
diff --git a/tests/test_account_usage.py b/tests/test_account_usage.py
new file mode 100644
index 000000000..072dc21c6
--- /dev/null
+++ b/tests/test_account_usage.py
@@ -0,0 +1,203 @@
+from datetime import datetime, timezone
+
+from agent.account_usage import (
+    AccountUsageSnapshot,
+    AccountUsageWindow,
+    fetch_account_usage,
+    render_account_usage_lines,
+)
+
+
+class _Response:
+    def __init__(self, payload, status_code=200):
+        self._payload = payload
+        self.status_code = status_code
+
+    def raise_for_status(self):
+        if self.status_code >= 400:
+            raise RuntimeError(f"HTTP {self.status_code}")
+
+    def json(self):
+        return self._payload
+
+
+class _Client:
+    def __init__(self, payload):
+        self._payload = payload
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def get(self, url, headers=None):
+        return _Response(self._payload)
+
+
+class _RoutingClient:
+    def __init__(self, payloads):
+        self._payloads = payloads
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def get(self, url, headers=None):
+        return _Response(self._payloads[url])
+
+
+def test_fetch_account_usage_codex(monkeypatch):
+    monkeypatch.setattr(
+        "agent.account_usage.resolve_codex_runtime_credentials",
+        lambda refresh_if_expiring=True: {
+            "provider": "openai-codex",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "access-token",
+        },
+    )
+    monkeypatch.setattr(
+        "agent.account_usage._read_codex_tokens",
+        lambda: {"tokens": {"account_id": "acct_123"}},
+    )
+    monkeypatch.setattr(
+        "agent.account_usage.httpx.Client",
+        lambda timeout=15.0: _Client(
+            {
+                "plan_type": "pro",
+                "rate_limit": {
+                    "primary_window": {
+                        "used_percent": 15,
+                        "reset_at": 1_900_000_000,
+                        "limit_window_seconds": 18000,
+                    },
+                    "secondary_window": {
+                        "used_percent": 40,
+                        "reset_at": 1_900_500_000,
+                        "limit_window_seconds": 604800,
+                    },
+                },
+                "credits": {"has_credits": True, "balance": 12.5},
+            }
+        ),
+    )
+
+    snapshot = fetch_account_usage("openai-codex")
+
+    assert snapshot is not None
+    assert snapshot.plan == "Pro"
+    assert len(snapshot.windows) == 2
+    assert snapshot.windows[0].label == "Session"
+    assert snapshot.windows[0].used_percent == 15.0
+    assert snapshot.windows[0].reset_at == datetime.fromtimestamp(1_900_000_000, tz=timezone.utc)
+    assert "Credits balance: $12.50" in snapshot.details
+
+
+def test_render_account_usage_lines_includes_reset_and_provider():
+    snapshot = AccountUsageSnapshot(
+        provider="openai-codex",
+        source="usage_api",
+        fetched_at=datetime.now(timezone.utc),
+        plan="Pro",
+        windows=(
+            AccountUsageWindow(
+                label="Session",
+                used_percent=25,
+                reset_at=datetime.now(timezone.utc),
+            ),
+        ),
+        details=("Credits balance: $9.99",),
+    )
+    lines = render_account_usage_lines(snapshot)
+
+    assert lines[0] == "📈 Account limits"
+    assert "openai-codex (Pro)" in lines[1]
+    assert "Session: 75% remaining (25% used)" in lines[2]
+    assert "Credits balance: $9.99" in lines[3]
+
+
+def test_fetch_account_usage_openrouter_uses_limit_remaining_and_ignores_deprecated_rate_limit(monkeypatch):
+    monkeypatch.setattr(
+        "agent.account_usage.resolve_runtime_provider",
+        lambda requested, explicit_base_url=None, explicit_api_key=None: {
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "sk-test",
+        },
+    )
+    monkeypatch.setattr(
+        "agent.account_usage.httpx.Client",
+        lambda timeout=10.0: _RoutingClient(
+            {
+                "https://openrouter.ai/api/v1/credits": {
+                    "data": {"total_credits": 300.0, "total_usage": 10.92}
+                },
+                "https://openrouter.ai/api/v1/key": {
+                    "data": {
+                        "limit": 100.0,
+                        "limit_remaining": 70.0,
+                        "limit_reset": "monthly",
+                        "usage": 12.5,
+                        "usage_daily": 0.5,
+                        "usage_weekly": 2.0,
+                        "usage_monthly": 8.0,
+                        "rate_limit": {"requests": -1, "interval": "10s"},
+                    }
+                },
+            }
+        ),
+    )
+
+    snapshot = fetch_account_usage("openrouter")
+
+    assert snapshot is not None
+    assert snapshot.windows == (
+        AccountUsageWindow(
+            label="API key quota",
+            used_percent=30.0,
+            detail="$70.00 of $100.00 remaining • resets monthly",
+        ),
+    )
+    assert "Credits balance: $289.08" in snapshot.details
+    assert "API key usage: $12.50 total • $0.50 today • $2.00 this week • $8.00 this month" in snapshot.details
+    assert all("-1 requests / 10s" not in line for line in render_account_usage_lines(snapshot))
+
+
+def test_fetch_account_usage_openrouter_omits_quota_window_when_key_has_no_limit(monkeypatch):
+    monkeypatch.setattr(
+        "agent.account_usage.resolve_runtime_provider",
+        lambda requested, explicit_base_url=None, explicit_api_key=None: {
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "sk-test",
+        },
+    )
+    monkeypatch.setattr(
+        "agent.account_usage.httpx.Client",
+        lambda timeout=10.0: _RoutingClient(
+            {
+                "https://openrouter.ai/api/v1/credits": {
+                    "data": {"total_credits": 100.0, "total_usage": 25.5}
+                },
+                "https://openrouter.ai/api/v1/key": {
+                    "data": {
+                        "limit": None,
+                        "limit_remaining": None,
+                        "usage": 25.5,
+                        "usage_daily": 1.25,
+                        "usage_weekly": 4.5,
+                        "usage_monthly": 18.0,
+                    }
+                },
+            }
+        ),
+    )
+
+    snapshot = fetch_account_usage("openrouter")
+
+    assert snapshot is not None
+    assert snapshot.windows == ()
+    assert "Credits balance: $74.50" in snapshot.details
+    assert "API key usage: $25.50 total • $1.25 today • $4.50 this week • $18.00 this month" in snapshot.details
diff --git a/tests/test_base_url_hostname.py b/tests/test_base_url_hostname.py
index 54aca08c0..cdf8450a2 100644
--- a/tests/test_base_url_hostname.py
+++ b/tests/test_base_url_hostname.py
@@ -106,3 +106,55 @@ class TestBaseUrlHostMatchesEdgeCases:
 
     def test_trailing_dot_on_domain_stripped(self):
         assert base_url_host_matches("https://openrouter.ai/v1", "openrouter.ai.") is True
+
+
+class TestOllamaUrlHostCheck:
+    """GHSA-76xc-57q6-vm5m — ollama.com was using a raw substring match for
+    credential selection (same bug class as GHSA-xf8p-v2cg-h7h5 for OpenRouter).
+    These tests lock in that the base_url_host_matches fix correctly rejects
+    the same attack vectors for Ollama.
+    """
+
+    def test_ollama_com_path_injection_rejected(self):
+        """http://evil.test/ollama.com/v1 — ollama.com appears in the path,
+        not the host. Must not be treated as Ollama Cloud."""
+        assert base_url_host_matches(
+            "http://127.0.0.1:9000/ollama.com/v1", "ollama.com"
+        ) is False
+
+    def test_ollama_com_subdomain_lookalike_rejected(self):
+        """ollama.com.attacker.test is a separate host, not ollama.com."""
+        assert base_url_host_matches(
+            "http://ollama.com.attacker.test:9000/v1", "ollama.com"
+        ) is False
+
+    def test_ollama_com_localtest_me_rejected(self):
+        """ollama.com.localtest.me resolves to 127.0.0.1 via localtest.me
+        but its true hostname is localtest.me, not ollama.com."""
+        assert base_url_host_matches(
+            "http://ollama.com.localtest.me:9000/v1", "ollama.com"
+        ) is False
+
+    def test_ollama_ai_is_not_ollama_com(self):
+        """Different TLD. ollama.ai is not ollama.com."""
+        assert base_url_host_matches(
+            "https://ollama.ai/v1", "ollama.com"
+        ) is False
+
+    def test_localhost_ollama_port_is_not_ollama_com(self):
+        """http://localhost:11434/v1 is a local Ollama install, but its
+        hostname is localhost, so OLLAMA_API_KEY (an ollama.com-only secret)
+        must not be sent."""
+        assert base_url_host_matches(
+            "http://localhost:11434/v1", "ollama.com"
+        ) is False
+
+    def test_genuine_ollama_com_matches(self):
+        assert base_url_host_matches(
+            "https://ollama.com/api/generate", "ollama.com"
+        ) is True
+
+    def test_ollama_com_subdomain_matches(self):
+        assert base_url_host_matches(
+            "https://api.ollama.com/v1", "ollama.com"
+        ) is True
diff --git a/tests/test_cli_file_drop.py b/tests/test_cli_file_drop.py
index 386aba5d1..5161e435f 100644
--- a/tests/test_cli_file_drop.py
+++ b/tests/test_cli_file_drop.py
@@ -147,6 +147,27 @@ class TestEscapedSpaces:
         assert result["path"] == tmp_image_with_spaces
         assert result["remainder"] == "what is this?"
 
+    def test_unquoted_spaces_in_path(self, tmp_image_with_spaces):
+        result = _detect_file_drop(str(tmp_image_with_spaces))
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["is_image"] is True
+        assert result["remainder"] == ""
+
+    def test_unquoted_spaces_with_trailing_text(self, tmp_image_with_spaces):
+        user_input = f"{tmp_image_with_spaces} what is this?"
+        result = _detect_file_drop(user_input)
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["remainder"] == "what is this?"
+
+    def test_file_uri_image_path(self, tmp_image_with_spaces):
+        uri = tmp_image_with_spaces.as_uri()
+        result = _detect_file_drop(uri)
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["is_image"] is True
+
 
 # ---------------------------------------------------------------------------
 # Tests: edge cases
diff --git a/tests/test_cli_skin_integration.py b/tests/test_cli_skin_integration.py
index 272a7bc5b..3a876f777 100644
--- a/tests/test_cli_skin_integration.py
+++ b/tests/test_cli_skin_integration.py
@@ -66,6 +66,9 @@ class TestCliSkinPromptIntegration:
         assert style_dict["prompt"] == skin.get_color("prompt")
         assert style_dict["input-rule"] == skin.get_color("input_rule")
         assert style_dict["prompt-working"] == f"{skin.get_color('banner_dim')} italic"
+        assert style_dict["status-bar"] == (
+            f"bg:{skin.get_color('status_bar_bg')} {skin.get_color('status_bar_text')}"
+        )
         assert style_dict["approval-title"] == f"{skin.get_color('ui_warn')} bold"
 
     def test_apply_tui_skin_style_updates_running_app(self):
diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py
index dfb2445c5..f405cf8bd 100644
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -93,6 +93,27 @@ class TestSessionLifecycle:
         assert session["input_tokens"] == 300
         assert session["output_tokens"] == 150
 
+    def test_update_token_counts_tracks_api_call_count(self, db):
+        """api_call_count increments with each update_token_counts call."""
+        db.create_session(session_id="s1", source="cli")
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+
+        session = db.get_session("s1")
+        assert session["api_call_count"] == 3
+
+    def test_update_token_counts_api_call_count_absolute(self, db):
+        """absolute mode sets api_call_count directly."""
+        db.create_session(session_id="s1", source="cli")
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+        db.update_token_counts("s1", input_tokens=300, output_tokens=150,
+                               api_call_count=5, absolute=True)
+
+        session = db.get_session("s1")
+        assert session["api_call_count"] == 5
+        assert session["input_tokens"] == 300
+
     def test_update_token_counts_backfills_model_when_null(self, db):
         db.create_session(session_id="s1", source="telegram")
         db.update_token_counts("s1", input_tokens=10, output_tokens=5, model="openai/gpt-5.4")
@@ -255,6 +276,38 @@ class TestMessageStorage:
         assert msg["reasoning"] == "Thinking about what to say"
         assert msg["reasoning_details"] == details
 
+    def test_reasoning_content_persisted_and_restored(self, db):
+        """reasoning_content must survive session replay as its own field."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message(
+            "s1",
+            role="assistant",
+            content="Hello",
+            reasoning="Short summary",
+            reasoning_content="Longer provider-native scratchpad",
+        )
+
+        conv = db.get_messages_as_conversation("s1")
+        assert len(conv) == 1
+        assert conv[0]["reasoning"] == "Short summary"
+        assert conv[0]["reasoning_content"] == "Longer provider-native scratchpad"
+
+    def test_reasoning_content_empty_string_restored_for_assistant(self, db):
+        """Empty reasoning_content still needs to round-trip for strict replays."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message(
+            "s1",
+            role="assistant",
+            content="",
+            tool_calls=[{"id": "c1", "type": "function", "function": {"name": "date", "arguments": "{}"}}],
+            reasoning_content="",
+        )
+
+        conv = db.get_messages_as_conversation("s1")
+        assert len(conv) == 1
+        assert "reasoning_content" in conv[0]
+        assert conv[0]["reasoning_content"] == ""
+
     def test_reasoning_not_set_for_non_assistant(self, db):
         """reasoning is never leaked onto user or tool messages."""
         db.create_session(session_id="s1", source="telegram")
@@ -1120,7 +1173,7 @@ class TestSchemaInit:
     def test_schema_version(self, db):
         cursor = db._conn.execute("SELECT version FROM schema_version")
         version = cursor.fetchone()[0]
-        assert version == 6
+        assert version == 8
 
     def test_title_column_exists(self, db):
         """Verify the title column was created in the sessions table."""
@@ -1176,18 +1229,24 @@ class TestSchemaInit:
         conn.commit()
         conn.close()
 
-        # Open with SessionDB — should migrate to v6
+        # Open with SessionDB — should migrate to v8
         migrated_db = SessionDB(db_path=db_path)
 
         # Verify migration
         cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
-        assert cursor.fetchone()[0] == 6
+        assert cursor.fetchone()[0] == 8
 
         # Verify title column exists and is NULL for existing sessions
         session = migrated_db.get_session("existing")
         assert session is not None
         assert session["title"] is None
 
+        # Verify api_call_count column was added with default 0
+        cursor = migrated_db._conn.execute(
+            "SELECT api_call_count FROM sessions WHERE id = 'existing'"
+        )
+        assert cursor.fetchone()[0] == 0
+
         # Verify we can set title on migrated session
         assert migrated_db.set_session_title("existing", "Migrated Title") is True
         session = migrated_db.get_session("existing")
@@ -1732,3 +1791,124 @@ class TestConcurrentWriteSafety:
         assert "30" in src, (
             "SQLite timeout should be at least 30s to handle CLI/gateway lock contention"
         )
+
+
+# =========================================================================
+# Auto-maintenance: state_meta + vacuum + maybe_auto_prune_and_vacuum
+# =========================================================================
+
+class TestStateMeta:
+    def test_get_meta_missing_returns_none(self, db):
+        assert db.get_meta("nonexistent") is None
+
+    def test_set_then_get_meta(self, db):
+        db.set_meta("foo", "bar")
+        assert db.get_meta("foo") == "bar"
+
+    def test_set_meta_upsert(self, db):
+        """set_meta overwrites existing value (ON CONFLICT DO UPDATE)."""
+        db.set_meta("key", "v1")
+        db.set_meta("key", "v2")
+        assert db.get_meta("key") == "v2"
+
+
+class TestVacuum:
+    def test_vacuum_runs_without_error(self, db):
+        """VACUUM must succeed on a fresh DB (no rows to reclaim)."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message(session_id="s1", role="user", content="hi")
+        # Should not raise, even though there's nothing significant to reclaim.
+        db.vacuum()
+
+
+class TestAutoMaintenance:
+    def _make_old_ended(self, db, sid: str, days_old: int = 100):
+        """Create a session that is ended and was started `days_old` days ago."""
+        db.create_session(session_id=sid, source="cli")
+        db.end_session(sid, end_reason="done")
+        db._conn.execute(
+            "UPDATE sessions SET started_at = ? WHERE id = ?",
+            (time.time() - days_old * 86400, sid),
+        )
+        db._conn.commit()
+
+    def test_first_run_prunes_and_vacuums(self, db):
+        self._make_old_ended(db, "old1", days_old=100)
+        self._make_old_ended(db, "old2", days_old=100)
+        db.create_session(session_id="new", source="cli")  # active, must survive
+
+        result = db.maybe_auto_prune_and_vacuum(retention_days=90)
+        assert result["skipped"] is False
+        assert result["pruned"] == 2
+        assert result["vacuumed"] is True
+        assert result.get("error") is None
+        assert db.get_session("old1") is None
+        assert db.get_session("old2") is None
+        assert db.get_session("new") is not None
+
+    def test_second_call_within_interval_skips(self, db):
+        self._make_old_ended(db, "old", days_old=100)
+        first = db.maybe_auto_prune_and_vacuum(
+            retention_days=90, min_interval_hours=24
+        )
+        assert first["skipped"] is False
+        assert first["pruned"] == 1
+
+        # Create another prunable session; a second call within
+        # min_interval_hours should still skip without touching it.
+        self._make_old_ended(db, "old2", days_old=100)
+        second = db.maybe_auto_prune_and_vacuum(
+            retention_days=90, min_interval_hours=24
+        )
+        assert second["skipped"] is True
+        assert second["pruned"] == 0
+        assert db.get_session("old2") is not None  # untouched
+
+    def test_second_call_after_interval_runs_again(self, db):
+        self._make_old_ended(db, "old", days_old=100)
+        db.maybe_auto_prune_and_vacuum(retention_days=90, min_interval_hours=24)
+
+        # Backdate the last-run marker to force another run.
+        db.set_meta("last_auto_prune", str(time.time() - 48 * 3600))
+
+        self._make_old_ended(db, "old2", days_old=100)
+        result = db.maybe_auto_prune_and_vacuum(
+            retention_days=90, min_interval_hours=24
+        )
+        assert result["skipped"] is False
+        assert result["pruned"] == 1
+        assert db.get_session("old2") is None
+
+    def test_no_prunable_sessions_no_vacuum(self, db):
+        """When prune deletes 0 rows, VACUUM is skipped (wasted I/O)."""
+        db.create_session(session_id="fresh", source="cli")  # too recent
+        result = db.maybe_auto_prune_and_vacuum(retention_days=90)
+        assert result["skipped"] is False
+        assert result["pruned"] == 0
+        assert result["vacuumed"] is False
+        # But last-run is still recorded so we don't retry immediately.
+        assert db.get_meta("last_auto_prune") is not None
+
+    def test_vacuum_disabled_via_flag(self, db):
+        self._make_old_ended(db, "old", days_old=100)
+        result = db.maybe_auto_prune_and_vacuum(retention_days=90, vacuum=False)
+        assert result["pruned"] == 1
+        assert result["vacuumed"] is False
+
+    def test_corrupt_last_run_marker_treated_as_no_prior_run(self, db):
+        """A non-numeric marker must not break maintenance."""
+        db.set_meta("last_auto_prune", "not-a-timestamp")
+        self._make_old_ended(db, "old", days_old=100)
+        result = db.maybe_auto_prune_and_vacuum(retention_days=90)
+        assert result["skipped"] is False
+        assert result["pruned"] == 1
+
+    def test_state_meta_survives_vacuum(self, db):
+        """Marker written just before VACUUM must still be readable after."""
+        self._make_old_ended(db, "old", days_old=100)
+        db.maybe_auto_prune_and_vacuum(retention_days=90)
+        marker = db.get_meta("last_auto_prune")
+        assert marker is not None
+        # Should parse as a float timestamp close to now.
+        assert abs(float(marker) - time.time()) < 60
+
diff --git a/tests/test_model_tools_async_bridge.py b/tests/test_model_tools_async_bridge.py
index d7acb46ac..d6266d7c3 100644
--- a/tests/test_model_tools_async_bridge.py
+++ b/tests/test_model_tools_async_bridge.py
@@ -197,6 +197,68 @@ class TestRunAsyncWithRunningLoop:
         )
         assert result == 42
 
+    @pytest.mark.asyncio
+    async def test_timeout_uses_nonblocking_executor_shutdown(self, monkeypatch):
+        """A timeout in the running-loop branch must not wait for the worker.
+
+        ThreadPoolExecutor's context manager performs shutdown(wait=True).
+        If _run_async relies on that path after future.result(timeout=...)
+        times out, the timeout does not bound wall-clock time because the
+        caller still waits for the stuck coroutine's thread to finish.
+        """
+        import concurrent.futures
+        from model_tools import _run_async
+
+        events = {
+            "cancelled": False,
+            "result_timeout": None,
+            "shutdown_calls": [],
+        }
+
+        class TimeoutFuture:
+            def result(self, timeout=None):
+                events["result_timeout"] = timeout
+                raise concurrent.futures.TimeoutError()
+
+            def cancel(self):
+                events["cancelled"] = True
+                return True
+
+        class FakeExecutor:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                self.shutdown(wait=True)
+                return False
+
+            def submit(self, fn, *args, **kwargs):
+                if args and hasattr(args[0], "close"):
+                    args[0].close()
+                return TimeoutFuture()
+
+            def shutdown(self, wait=True, cancel_futures=False):
+                events["shutdown_calls"].append((wait, cancel_futures))
+
+        async def _never_finishes():
+            await asyncio.sleep(999)
+
+        monkeypatch.setattr(
+            concurrent.futures,
+            "ThreadPoolExecutor",
+            FakeExecutor,
+        )
+
+        with pytest.raises(concurrent.futures.TimeoutError):
+            _run_async(_never_finishes())
+
+        assert events["result_timeout"] == 300
+        assert events["cancelled"] is True
+        assert events["shutdown_calls"] == [(False, True)]
+
 
 # ---------------------------------------------------------------------------
 # Integration: full vision_analyze dispatch chain
diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index 7585f3336..2c50065b2 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -1,4 +1,5 @@
 import json
+import os
 import sys
 import threading
 import time
@@ -105,11 +106,23 @@ def test_config_set_yolo_toggles_session_scope():
 
     server._sessions["sid"] = _session()
     try:
-        resp_on = server.handle_request({"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "yolo"}})
+        resp_on = server.handle_request(
+            {
+                "id": "1",
+                "method": "config.set",
+                "params": {"session_id": "sid", "key": "yolo"},
+            }
+        )
         assert resp_on["result"]["value"] == "1"
         assert is_session_yolo_enabled("session-key") is True
 
-        resp_off = server.handle_request({"id": "2", "method": "config.set", "params": {"session_id": "sid", "key": "yolo"}})
+        resp_off = server.handle_request(
+            {
+                "id": "2",
+                "method": "config.set",
+                "params": {"session_id": "sid", "key": "yolo"},
+            }
+        )
         assert resp_off["result"]["value"] == "0"
         assert is_session_yolo_enabled("session-key") is False
     finally:
@@ -117,6 +130,36 @@ def test_config_set_yolo_toggles_session_scope():
         server._sessions.clear()
 
 
+def test_config_get_statusbar_survives_non_dict_display(monkeypatch):
+    monkeypatch.setattr(server, "_load_cfg", lambda: {"display": "broken"})
+
+    resp = server.handle_request(
+        {"id": "1", "method": "config.get", "params": {"key": "statusbar"}}
+    )
+
+    assert resp["result"]["value"] == "top"
+
+
+def test_config_set_statusbar_survives_non_dict_display(tmp_path, monkeypatch):
+    import yaml
+
+    cfg_path = tmp_path / "config.yaml"
+    cfg_path.write_text(yaml.safe_dump({"display": "broken"}))
+    monkeypatch.setattr(server, "_hermes_home", tmp_path)
+
+    resp = server.handle_request(
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {"key": "statusbar", "value": "bottom"},
+        }
+    )
+
+    assert resp["result"]["value"] == "bottom"
+    saved = yaml.safe_load(cfg_path.read_text())
+    assert saved["display"]["tui_statusbar"] == "bottom"
+
+
 def test_enable_gateway_prompts_sets_gateway_env(monkeypatch):
     monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
     monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
@@ -143,13 +186,21 @@ def test_config_set_reasoning_updates_live_session_and_agent(tmp_path, monkeypat
     server._sessions["sid"] = _session(agent=agent)
 
     resp_effort = server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "reasoning", "value": "low"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "reasoning", "value": "low"},
+        }
     )
     assert resp_effort["result"]["value"] == "low"
     assert agent.reasoning_config == {"enabled": True, "effort": "low"}
 
     resp_show = server.handle_request(
-        {"id": "2", "method": "config.set", "params": {"session_id": "sid", "key": "reasoning", "value": "show"}}
+        {
+            "id": "2",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "reasoning", "value": "show"},
+        }
     )
     assert resp_show["result"]["value"] == "show"
     assert server._sessions["sid"]["show_reasoning"] is True
@@ -161,7 +212,11 @@ def test_config_set_verbose_updates_session_mode_and_agent(tmp_path, monkeypatch
     server._sessions["sid"] = _session(agent=agent)
 
     resp = server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "verbose", "value": "cycle"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "verbose", "value": "cycle"},
+        }
     )
 
     assert resp["result"]["value"] == "verbose"
@@ -179,7 +234,11 @@ def test_config_set_model_uses_live_switch_path(monkeypatch):
 
     monkeypatch.setattr(server, "_apply_model_switch", _fake_apply)
     resp = server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "model", "value": "new/model"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "model", "value": "new/model"},
+        }
     )
 
     assert resp["result"]["value"] == "new/model"
@@ -220,7 +279,15 @@ def test_config_set_model_global_persists(monkeypatch):
     monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: saved.update(cfg))
 
     resp = server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "model", "value": "anthropic/claude-sonnet-4.6 --global"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {
+                "session_id": "sid",
+                "key": "model",
+                "value": "anthropic/claude-sonnet-4.6 --global",
+            },
+        }
     )
 
     assert resp["result"]["value"] == "anthropic/claude-sonnet-4.6"
@@ -230,10 +297,71 @@ def test_config_set_model_global_persists(monkeypatch):
     assert saved["model"]["base_url"] == "https://api.anthropic.com"
 
 
+def test_config_set_model_syncs_inference_provider_env(monkeypatch):
+    """After an explicit provider switch, HERMES_INFERENCE_PROVIDER must
+    reflect the user's choice so ambient re-resolution (credential pool
+    refresh, aux clients) picks up the new provider instead of the original
+    one persisted in config or shell env.
+
+    Regression: a TUI user switched openrouter → anthropic and the TUI kept
+    trying openrouter because the env-var-backed resolvers still saw the old
+    provider.
+    """
+
+    class _Agent:
+        provider = "openrouter"
+        model = "old/model"
+        base_url = ""
+        api_key = "sk-or"
+
+        def switch_model(self, **_kwargs):
+            return None
+
+    result = types.SimpleNamespace(
+        success=True,
+        new_model="claude-sonnet-4.6",
+        target_provider="anthropic",
+        api_key="sk-ant",
+        base_url="https://api.anthropic.com",
+        api_mode="anthropic_messages",
+        warning_message="",
+    )
+
+    server._sessions["sid"] = _session(agent=_Agent())
+    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter")
+    monkeypatch.setattr(
+        "hermes_cli.model_switch.switch_model", lambda **_kwargs: result
+    )
+    monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None)
+    monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
+
+    server.handle_request(
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {
+                "session_id": "sid",
+                "key": "model",
+                "value": "claude-sonnet-4.6 --provider anthropic",
+            },
+        }
+    )
+
+    assert os.environ["HERMES_INFERENCE_PROVIDER"] == "anthropic"
+
+
 def test_config_set_personality_rejects_unknown_name(monkeypatch):
-    monkeypatch.setattr(server, "_available_personalities", lambda cfg=None: {"helpful": "You are helpful."})
+    monkeypatch.setattr(
+        server,
+        "_available_personalities",
+        lambda cfg=None: {"helpful": "You are helpful."},
+    )
     resp = server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"key": "personality", "value": "bogus"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {"key": "personality", "value": "bogus"},
+        }
     )
 
     assert "error" in resp
@@ -241,20 +369,36 @@ def test_config_set_personality_rejects_unknown_name(monkeypatch):
 
 
 def test_config_set_personality_resets_history_and_returns_info(monkeypatch):
-    session = _session(agent=types.SimpleNamespace(), history=[{"role": "user", "text": "hi"}], history_version=4)
+    session = _session(
+        agent=types.SimpleNamespace(),
+        history=[{"role": "user", "text": "hi"}],
+        history_version=4,
+    )
     new_agent = types.SimpleNamespace(model="x")
     emits = []
 
     server._sessions["sid"] = session
-    monkeypatch.setattr(server, "_available_personalities", lambda cfg=None: {"helpful": "You are helpful."})
-    monkeypatch.setattr(server, "_make_agent", lambda sid, key, session_id=None: new_agent)
-    monkeypatch.setattr(server, "_session_info", lambda agent: {"model": getattr(agent, "model", "?")})
+    monkeypatch.setattr(
+        server,
+        "_available_personalities",
+        lambda cfg=None: {"helpful": "You are helpful."},
+    )
+    monkeypatch.setattr(
+        server, "_make_agent", lambda sid, key, session_id=None: new_agent
+    )
+    monkeypatch.setattr(
+        server, "_session_info", lambda agent: {"model": getattr(agent, "model", "?")}
+    )
     monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None)
     monkeypatch.setattr(server, "_emit", lambda *args: emits.append(args))
     monkeypatch.setattr(server, "_write_config_key", lambda path, value: None)
 
     resp = server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "personality", "value": "helpful"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "personality", "value": "helpful"},
+        }
     )
 
     assert resp["result"]["history_reset"] is True
@@ -268,11 +412,17 @@ def test_session_compress_uses_compress_helper(monkeypatch):
     agent = types.SimpleNamespace()
     server._sessions["sid"] = _session(agent=agent)
 
-    monkeypatch.setattr(server, "_compress_session_history", lambda session, focus_topic=None: (2, {"total": 42}))
+    monkeypatch.setattr(
+        server,
+        "_compress_session_history",
+        lambda session, focus_topic=None: (2, {"total": 42}),
+    )
     monkeypatch.setattr(server, "_session_info", lambda _agent: {"model": "x"})
 
     with patch("tui_gateway.server._emit") as emit:
-        resp = server.handle_request({"id": "1", "method": "session.compress", "params": {"session_id": "sid"}})
+        resp = server.handle_request(
+            {"id": "1", "method": "session.compress", "params": {"session_id": "sid"}}
+        )
 
     assert resp["result"]["removed"] == 2
     assert resp["result"]["usage"]["total"] == 42
@@ -285,9 +435,14 @@ def test_prompt_submit_sets_approval_session_key(monkeypatch):
     captured = {}
 
     class _Agent:
-        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+        def run_conversation(
+            self, prompt, conversation_history=None, stream_callback=None
+        ):
             captured["session_key"] = get_current_session_key(default="")
-            return {"final_response": "ok", "messages": [{"role": "assistant", "content": "ok"}]}
+            return {
+                "final_response": "ok",
+                "messages": [{"role": "assistant", "content": "ok"}],
+            }
 
     class _ImmediateThread:
         def __init__(self, target=None, daemon=None):
@@ -302,7 +457,13 @@ def test_prompt_submit_sets_approval_session_key(monkeypatch):
     monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
     monkeypatch.setattr(server, "render_message", lambda raw, cols: None)
 
-    resp = server.handle_request({"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "ping"}})
+    resp = server.handle_request(
+        {
+            "id": "1",
+            "method": "prompt.submit",
+            "params": {"session_id": "sid", "text": "ping"},
+        }
+    )
 
     assert resp["result"]["status"] == "streaming"
     assert captured["session_key"] == "session-key"
@@ -316,9 +477,14 @@ def test_prompt_submit_expands_context_refs(monkeypatch):
         base_url = ""
         api_key = ""
 
-        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+        def run_conversation(
+            self, prompt, conversation_history=None, stream_callback=None
+        ):
             captured["prompt"] = prompt
-            return {"final_response": "ok", "messages": [{"role": "assistant", "content": "ok"}]}
+            return {
+                "final_response": "ok",
+                "messages": [{"role": "assistant", "content": "ok"}],
+            }
 
     class _ImmediateThread:
         def __init__(self, target=None, daemon=None):
@@ -328,8 +494,14 @@ def test_prompt_submit_expands_context_refs(monkeypatch):
             self._target()
 
     fake_ctx = types.ModuleType("agent.context_references")
-    fake_ctx.preprocess_context_references = lambda message, **kwargs: types.SimpleNamespace(
-        blocked=False, message="expanded prompt", warnings=[], references=[], injected_tokens=0
+    fake_ctx.preprocess_context_references = (
+        lambda message, **kwargs: types.SimpleNamespace(
+            blocked=False,
+            message="expanded prompt",
+            warnings=[],
+            references=[],
+            injected_tokens=0,
+        )
     )
     fake_meta = types.ModuleType("agent.model_metadata")
     fake_meta.get_model_context_length = lambda *args, **kwargs: 100000
@@ -342,7 +514,13 @@ def test_prompt_submit_expands_context_refs(monkeypatch):
     monkeypatch.setitem(sys.modules, "agent.context_references", fake_ctx)
     monkeypatch.setitem(sys.modules, "agent.model_metadata", fake_meta)
 
-    server.handle_request({"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "@diff"}})
+    server.handle_request(
+        {
+            "id": "1",
+            "method": "prompt.submit",
+            "params": {"session_id": "sid", "text": "@diff"},
+        }
+    )
 
     assert captured["prompt"] == "expanded prompt"
 
@@ -350,34 +528,91 @@ def test_prompt_submit_expands_context_refs(monkeypatch):
 def test_image_attach_appends_local_image(monkeypatch):
     fake_cli = types.ModuleType("cli")
     fake_cli._IMAGE_EXTENSIONS = {".png"}
+    fake_cli._detect_file_drop = lambda raw: {
+        "path": Path("/tmp/cat.png"),
+        "is_image": True,
+        "remainder": "",
+    }
     fake_cli._split_path_input = lambda raw: (raw, "")
     fake_cli._resolve_attachment_path = lambda raw: Path("/tmp/cat.png")
 
     server._sessions["sid"] = _session()
     monkeypatch.setitem(sys.modules, "cli", fake_cli)
 
-    resp = server.handle_request({"id": "1", "method": "image.attach", "params": {"session_id": "sid", "path": "/tmp/cat.png"}})
+    resp = server.handle_request(
+        {
+            "id": "1",
+            "method": "image.attach",
+            "params": {"session_id": "sid", "path": "/tmp/cat.png"},
+        }
+    )
 
     assert resp["result"]["attached"] is True
     assert resp["result"]["name"] == "cat.png"
     assert len(server._sessions["sid"]["attached_images"]) == 1
 
 
-def test_commands_catalog_surfaces_quick_commands(monkeypatch):
-    monkeypatch.setattr(server, "_load_cfg", lambda: {"quick_commands": {
-        "build": {"type": "exec", "command": "npm run build"},
-        "git": {"type": "alias", "target": "/shell git"},
-        "notes": {"type": "exec", "command": "cat NOTES.md", "description": "Open design notes"},
-    }})
+def test_image_attach_accepts_unquoted_screenshot_path_with_spaces(monkeypatch):
+    screenshot = Path("/tmp/Screenshot 2026-04-21 at 1.04.43 PM.png")
+    fake_cli = types.ModuleType("cli")
+    fake_cli._IMAGE_EXTENSIONS = {".png"}
+    fake_cli._detect_file_drop = lambda raw: {
+        "path": screenshot,
+        "is_image": True,
+        "remainder": "",
+    }
+    fake_cli._split_path_input = lambda raw: (
+        "/tmp/Screenshot",
+        "2026-04-21 at 1.04.43 PM.png",
+    )
+    fake_cli._resolve_attachment_path = lambda raw: None
 
-    resp = server.handle_request({"id": "1", "method": "commands.catalog", "params": {}})
+    server._sessions["sid"] = _session()
+    monkeypatch.setitem(sys.modules, "cli", fake_cli)
+
+    resp = server.handle_request(
+        {
+            "id": "1",
+            "method": "image.attach",
+            "params": {"session_id": "sid", "path": str(screenshot)},
+        }
+    )
+
+    assert resp["result"]["attached"] is True
+    assert resp["result"]["path"] == str(screenshot)
+    assert resp["result"]["remainder"] == ""
+    assert len(server._sessions["sid"]["attached_images"]) == 1
+
+
+def test_commands_catalog_surfaces_quick_commands(monkeypatch):
+    monkeypatch.setattr(
+        server,
+        "_load_cfg",
+        lambda: {
+            "quick_commands": {
+                "build": {"type": "exec", "command": "npm run build"},
+                "git": {"type": "alias", "target": "/shell git"},
+                "notes": {
+                    "type": "exec",
+                    "command": "cat NOTES.md",
+                    "description": "Open design notes",
+                },
+            }
+        },
+    )
+
+    resp = server.handle_request(
+        {"id": "1", "method": "commands.catalog", "params": {}}
+    )
 
     pairs = dict(resp["result"]["pairs"])
     assert "npm run build" in pairs["/build"]
     assert pairs["/git"].startswith("alias →")
     assert pairs["/notes"] == "Open design notes"
 
-    user_cat = next(c for c in resp["result"]["categories"] if c["name"] == "User commands")
+    user_cat = next(
+        c for c in resp["result"]["categories"] if c["name"] == "User commands"
+    )
     user_pairs = dict(user_cat["pairs"])
     assert set(user_pairs) == {"/build", "/git", "/notes"}
 
@@ -386,14 +621,22 @@ def test_commands_catalog_surfaces_quick_commands(monkeypatch):
 
 
 def test_command_dispatch_exec_nonzero_surfaces_error(monkeypatch):
-    monkeypatch.setattr(server, "_load_cfg", lambda: {"quick_commands": {"boom": {"type": "exec", "command": "boom"}}})
+    monkeypatch.setattr(
+        server,
+        "_load_cfg",
+        lambda: {"quick_commands": {"boom": {"type": "exec", "command": "boom"}}},
+    )
     monkeypatch.setattr(
         server.subprocess,
         "run",
-        lambda *args, **kwargs: types.SimpleNamespace(returncode=1, stdout="", stderr="failed"),
+        lambda *args, **kwargs: types.SimpleNamespace(
+            returncode=1, stdout="", stderr="failed"
+        ),
     )
 
-    resp = server.handle_request({"id": "1", "method": "command.dispatch", "params": {"name": "boom"}})
+    resp = server.handle_request(
+        {"id": "1", "method": "command.dispatch", "params": {"name": "boom"}}
+    )
 
     assert "error" in resp
     assert "failed" in resp["error"]["message"]
@@ -401,15 +644,22 @@ def test_command_dispatch_exec_nonzero_surfaces_error(monkeypatch):
 
 def test_plugins_list_surfaces_loader_error(monkeypatch):
     with patch("hermes_cli.plugins.get_plugin_manager", side_effect=Exception("boom")):
-        resp = server.handle_request({"id": "1", "method": "plugins.list", "params": {}})
+        resp = server.handle_request(
+            {"id": "1", "method": "plugins.list", "params": {}}
+        )
 
     assert "error" in resp
     assert "boom" in resp["error"]["message"]
 
 
 def test_complete_slash_surfaces_completer_error(monkeypatch):
-    with patch("hermes_cli.commands.SlashCommandCompleter", side_effect=Exception("no completer")):
-        resp = server.handle_request({"id": "1", "method": "complete.slash", "params": {"text": "/mo"}})
+    with patch(
+        "hermes_cli.commands.SlashCommandCompleter",
+        side_effect=Exception("no completer"),
+    ):
+        resp = server.handle_request(
+            {"id": "1", "method": "complete.slash", "params": {"text": "/mo"}}
+        )
 
     assert "error" in resp
     assert "no completer" in resp["error"]["message"]
@@ -427,7 +677,11 @@ def test_input_detect_drop_attaches_image(monkeypatch):
     monkeypatch.setitem(sys.modules, "cli", fake_cli)
 
     resp = server.handle_request(
-        {"id": "1", "method": "input.detect_drop", "params": {"session_id": "sid", "text": "/tmp/cat.png"}}
+        {
+            "id": "1",
+            "method": "input.detect_drop",
+            "params": {"session_id": "sid", "text": "/tmp/cat.png"},
+        }
     )
 
     assert resp["result"]["matched"] is True
@@ -448,7 +702,9 @@ def test_rollback_restore_resolves_number_and_file_path():
             calls["args"] = (cwd, target, file_path)
             return {"success": True, "message": "done"}
 
-    server._sessions["sid"] = _session(agent=types.SimpleNamespace(_checkpoint_mgr=_Mgr()), history=[])
+    server._sessions["sid"] = _session(
+        agent=types.SimpleNamespace(_checkpoint_mgr=_Mgr()), history=[]
+    )
     resp = server.handle_request(
         {
             "id": "1",
@@ -499,7 +755,9 @@ def test_session_steer_calls_agent_steer_when_agent_supports_it():
 
 
 def test_session_steer_rejects_empty_text():
-    server._sessions["sid"] = _session(agent=types.SimpleNamespace(steer=lambda t: True))
+    server._sessions["sid"] = _session(
+        agent=types.SimpleNamespace(steer=lambda t: True)
+    )
     try:
         resp = server.handle_request(
             {
@@ -559,10 +817,13 @@ def test_session_undo_rejects_while_running():
     """Fix for TUI silent-drop #1: /undo must not mutate history
     while the agent is mid-turn — would either clobber the undo or
     cause prompt.submit to silently drop the agent's response."""
-    server._sessions["sid"] = _session(running=True, history=[
-        {"role": "user", "content": "hi"},
-        {"role": "assistant", "content": "hello"},
-    ])
+    server._sessions["sid"] = _session(
+        running=True,
+        history=[
+            {"role": "user", "content": "hi"},
+            {"role": "assistant", "content": "hello"},
+        ],
+    )
     try:
         resp = server.handle_request(
             {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}}
@@ -578,10 +839,13 @@ def test_session_undo_rejects_while_running():
 
 def test_session_undo_allowed_when_idle():
     """Regression guard: when not running, /undo still works."""
-    server._sessions["sid"] = _session(running=False, history=[
-        {"role": "user", "content": "hi"},
-        {"role": "assistant", "content": "hello"},
-    ])
+    server._sessions["sid"] = _session(
+        running=False,
+        history=[
+            {"role": "user", "content": "hi"},
+            {"role": "assistant", "content": "hello"},
+        ],
+    )
     try:
         resp = server.handle_request(
             {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}}
@@ -610,7 +874,11 @@ def test_rollback_restore_rejects_full_history_while_running(monkeypatch):
     server._sessions["sid"] = _session(running=True)
     try:
         resp = server.handle_request(
-            {"id": "1", "method": "rollback.restore", "params": {"session_id": "sid", "hash": "abc"}}
+            {
+                "id": "1",
+                "method": "rollback.restore",
+                "params": {"session_id": "sid", "hash": "abc"},
+            }
         )
         assert resp.get("error"), "full-history rollback should reject while running"
         assert resp["error"]["code"] == 4009
@@ -628,12 +896,17 @@ def test_prompt_submit_history_version_mismatch_surfaces_warning(monkeypatch):
     session_ref = {"s": None}
 
     class _RacyAgent:
-        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+        def run_conversation(
+            self, prompt, conversation_history=None, stream_callback=None
+        ):
             # Simulate: something external bumped history_version
             # while we were running.
             with session_ref["s"]["history_lock"]:
                 session_ref["s"]["history_version"] += 1
-            return {"final_response": "agent reply", "messages": [{"role": "assistant", "content": "agent reply"}]}
+            return {
+                "final_response": "agent reply",
+                "messages": [{"role": "assistant", "content": "agent reply"}],
+            }
 
     class _ImmediateThread:
         def __init__(self, target=None, daemon=None):
@@ -652,7 +925,11 @@ def test_prompt_submit_history_version_mismatch_surfaces_warning(monkeypatch):
         monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a))
 
         resp = server.handle_request(
-            {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}}
+            {
+                "id": "1",
+                "method": "prompt.submit",
+                "params": {"session_id": "sid", "text": "hi"},
+            }
         )
         assert resp.get("result"), f"got error: {resp.get('error')}"
 
@@ -669,16 +946,25 @@ def test_prompt_submit_history_version_mismatch_surfaces_warning(monkeypatch):
             "history_version mismatch — otherwise the UI silently "
             "shows output that was never persisted"
         )
-        assert "not saved" in payload["warning"].lower() or "changed" in payload["warning"].lower()
+        assert (
+            "not saved" in payload["warning"].lower()
+            or "changed" in payload["warning"].lower()
+        )
     finally:
         server._sessions.pop("sid", None)
 
 
 def test_prompt_submit_history_version_match_persists_normally(monkeypatch):
     """Regression guard: the backstop does not affect the happy path."""
+
     class _Agent:
-        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
-            return {"final_response": "reply", "messages": [{"role": "assistant", "content": "reply"}]}
+        def run_conversation(
+            self, prompt, conversation_history=None, stream_callback=None
+        ):
+            return {
+                "final_response": "reply",
+                "messages": [{"role": "assistant", "content": "reply"}],
+            }
 
     class _ImmediateThread:
         def __init__(self, target=None, daemon=None):
@@ -696,12 +982,18 @@ def test_prompt_submit_history_version_match_persists_normally(monkeypatch):
         monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a))
 
         resp = server.handle_request(
-            {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}}
+            {
+                "id": "1",
+                "method": "prompt.submit",
+                "params": {"session_id": "sid", "text": "hi"},
+            }
         )
         assert resp.get("result")
 
         # History was written
-        assert server._sessions["sid"]["history"] == [{"role": "assistant", "content": "reply"}]
+        assert server._sessions["sid"]["history"] == [
+            {"role": "assistant", "content": "reply"}
+        ]
         assert server._sessions["sid"]["history_version"] == 1
 
         # No warning should be attached
@@ -745,7 +1037,11 @@ def test_interrupt_only_clears_own_session_pending():
 
         # Interrupt session A.
         resp = server.handle_request(
-            {"id": "1", "method": "session.interrupt", "params": {"session_id": "sid_a"}}
+            {
+                "id": "1",
+                "method": "session.interrupt",
+                "params": {"session_id": "sid_a"},
+            }
         )
         assert resp.get("result"), f"got error: {resp.get('error')}"
 
@@ -818,8 +1114,11 @@ def test_respond_unpacks_sid_tuple_correctly():
     server._pending["rid-x"] = ("sid_x", ev)
     try:
         resp = server.handle_request(
-            {"id": "1", "method": "clarify.respond",
-             "params": {"request_id": "rid-x", "answer": "the answer"}}
+            {
+                "id": "1",
+                "method": "clarify.respond",
+                "params": {"request_id": "rid-x", "answer": "the answer"},
+            }
         )
         assert resp.get("result")
         assert ev.is_set()
@@ -829,7 +1128,6 @@ def test_respond_unpacks_sid_tuple_correctly():
         server._answers.pop("rid-x", None)
 
 
-
 # ---------------------------------------------------------------------------
 # /model switch and other agent-mutating commands must reject while the
 # session is running.  agent.switch_model() mutates self.model, self.provider,
@@ -852,10 +1150,17 @@ def test_config_set_model_rejects_while_running(monkeypatch):
 
     server._sessions["sid"] = _session(running=True)
     try:
-        resp = server.handle_request({
-            "id": "1", "method": "config.set",
-            "params": {"session_id": "sid", "key": "model", "value": "anthropic/claude-sonnet-4.6"},
-        })
+        resp = server.handle_request(
+            {
+                "id": "1",
+                "method": "config.set",
+                "params": {
+                    "session_id": "sid",
+                    "key": "model",
+                    "value": "anthropic/claude-sonnet-4.6",
+                },
+            }
+        )
         assert resp.get("error")
         assert resp["error"]["code"] == 4009
         assert "session busy" in resp["error"]["message"]
@@ -879,10 +1184,13 @@ def test_config_set_model_allowed_when_idle(monkeypatch):
 
     server._sessions["sid"] = _session(running=False)
     try:
-        resp = server.handle_request({
-            "id": "1", "method": "config.set",
-            "params": {"session_id": "sid", "key": "model", "value": "newmodel"},
-        })
+        resp = server.handle_request(
+            {
+                "id": "1",
+                "method": "config.set",
+                "params": {"session_id": "sid", "key": "model", "value": "newmodel"},
+            }
+        )
         assert resp.get("result")
         assert resp["result"]["value"] == "newmodel"
         assert seen["called"]
@@ -920,9 +1228,9 @@ def test_mirror_slash_side_effects_rejects_mutating_commands_while_running(monke
         ("/compress", "compress"),
     ]:
         warning = server._mirror_slash_side_effects("sid", session, cmd)
-        assert "session busy" in warning, (
-            f"{cmd} should have returned busy warning, got: {warning!r}"
-        )
+        assert (
+            "session busy" in warning
+        ), f"{cmd} should have returned busy warning, got: {warning!r}"
         assert f"/{expected_name}" in warning
 
     # None of the mutating side-effect helpers should have fired.
@@ -995,7 +1303,11 @@ def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
     # Stub everything _build touches
     monkeypatch.setattr(server, "_make_agent", _slow_make_agent)
     monkeypatch.setattr(server, "_SlashWorker", _FakeWorker)
-    monkeypatch.setattr(server, "_get_db", lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None))
+    monkeypatch.setattr(
+        server,
+        "_get_db",
+        lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None),
+    )
     monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"})
     monkeypatch.setattr(server, "_probe_credentials", lambda _a: None)
     monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None)
@@ -1003,25 +1315,36 @@ def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
 
     # Shim register/unregister to observe leaks
     import tools.approval as _approval
-    monkeypatch.setattr(_approval, "register_gateway_notify",
-                        lambda key, cb: None)
-    monkeypatch.setattr(_approval, "unregister_gateway_notify",
-                        lambda key: unregistered_keys.append(key))
+
+    monkeypatch.setattr(_approval, "register_gateway_notify", lambda key, cb: None)
+    monkeypatch.setattr(
+        _approval,
+        "unregister_gateway_notify",
+        lambda key: unregistered_keys.append(key),
+    )
     monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None)
 
     # Start: session.create spawns _build thread, returns synchronously
-    resp = server.handle_request({
-        "id": "1", "method": "session.create", "params": {"cols": 80},
-    })
+    resp = server.handle_request(
+        {
+            "id": "1",
+            "method": "session.create",
+            "params": {"cols": 80},
+        }
+    )
     assert resp.get("result"), f"got error: {resp.get('error')}"
     sid = resp["result"]["session_id"]
 
     # Build thread is blocked in _slow_make_agent.  Close the session
     # NOW — this pops _sessions[sid] before _build can install the
     # worker/notify.
-    close_resp = server.handle_request({
-        "id": "2", "method": "session.close", "params": {"session_id": sid},
-    })
+    close_resp = server.handle_request(
+        {
+            "id": "2",
+            "method": "session.close",
+            "params": {"session_id": sid},
+        }
+    )
     assert close_resp.get("result", {}).get("closed") is True
 
     # At this point session.close saw slash_worker=None (not yet
@@ -1035,11 +1358,12 @@ def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
         if closed_workers:
             break
         import time
+
         time.sleep(0.02)
 
-    assert len(closed_workers) == 1, (
-        f"orphan worker was not cleaned up — closed_workers={closed_workers}"
-    )
+    assert (
+        len(closed_workers) == 1
+    ), f"orphan worker was not cleaned up — closed_workers={closed_workers}"
     # Notify may be unregistered by both session.close (unconditional)
     # and the orphan-cleanup path; the key guarantee is that the build
     # thread does at least one unregister call (any prior close
@@ -1073,21 +1397,33 @@ def test_session_create_no_race_keeps_worker_alive(monkeypatch):
 
     monkeypatch.setattr(server, "_make_agent", lambda sid, key: _FakeAgent())
     monkeypatch.setattr(server, "_SlashWorker", _FakeWorker)
-    monkeypatch.setattr(server, "_get_db", lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None))
+    monkeypatch.setattr(
+        server,
+        "_get_db",
+        lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None),
+    )
     monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"})
     monkeypatch.setattr(server, "_probe_credentials", lambda _a: None)
     monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None)
     monkeypatch.setattr(server, "_emit", lambda *a, **kw: None)
 
     import tools.approval as _approval
+
     monkeypatch.setattr(_approval, "register_gateway_notify", lambda key, cb: None)
-    monkeypatch.setattr(_approval, "unregister_gateway_notify",
-                        lambda key: unregistered_keys.append(key))
+    monkeypatch.setattr(
+        _approval,
+        "unregister_gateway_notify",
+        lambda key: unregistered_keys.append(key),
+    )
     monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None)
 
-    resp = server.handle_request({
-        "id": "1", "method": "session.create", "params": {"cols": 80},
-    })
+    resp = server.handle_request(
+        {
+            "id": "1",
+            "method": "session.create",
+            "params": {"cols": 80},
+        }
+    )
     sid = resp["result"]["session_id"]
 
     # Wait for the build to finish (ready event inside session dict).
@@ -1096,12 +1432,12 @@ def test_session_create_no_race_keeps_worker_alive(monkeypatch):
 
     # Build finished without a close race — nothing should have been
     # cleaned up by the orphan check.
-    assert closed_workers == [], (
-        f"build thread closed its own worker despite no race: {closed_workers}"
-    )
-    assert unregistered_keys == [], (
-        f"build thread unregistered its own notify despite no race: {unregistered_keys}"
-    )
+    assert (
+        closed_workers == []
+    ), f"build thread closed its own worker despite no race: {closed_workers}"
+    assert (
+        unregistered_keys == []
+    ), f"build thread unregistered its own notify despite no race: {unregistered_keys}"
 
     # Session should have the live worker installed.
     assert session.get("slash_worker") is not None
@@ -1110,6 +1446,75 @@ def test_session_create_no_race_keeps_worker_alive(monkeypatch):
     server._sessions.pop(sid, None)
 
 
+def test_get_db_degrades_cleanly_when_sessiondb_init_fails(monkeypatch):
+    fake_mod = types.ModuleType("hermes_state")
+
+    class _BrokenSessionDB:
+        def __init__(self):
+            raise RuntimeError("locking protocol")
+
+    fake_mod.SessionDB = _BrokenSessionDB
+    monkeypatch.setitem(sys.modules, "hermes_state", fake_mod)
+    monkeypatch.setattr(server, "_db", None)
+    monkeypatch.setattr(server, "_db_error", None)
+
+    assert server._get_db() is None
+    assert server._db_error == "locking protocol"
+
+
+def test_session_create_continues_when_state_db_is_unavailable(monkeypatch):
+    class _FakeWorker:
+        def __init__(self, key, model):
+            self.key = key
+
+        def close(self):
+            return None
+
+    class _FakeAgent:
+        def __init__(self):
+            self.model = "x"
+            self.provider = "openrouter"
+            self.base_url = ""
+            self.api_key = ""
+
+    emits = []
+
+    monkeypatch.setattr(server, "_make_agent", lambda sid, key: _FakeAgent())
+    monkeypatch.setattr(server, "_SlashWorker", _FakeWorker)
+    monkeypatch.setattr(server, "_get_db", lambda: None)
+    monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"})
+    monkeypatch.setattr(server, "_probe_credentials", lambda _a: None)
+    monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None)
+    monkeypatch.setattr(server, "_emit", lambda *a, **kw: emits.append(a))
+
+    import tools.approval as _approval
+    monkeypatch.setattr(_approval, "register_gateway_notify", lambda key, cb: None)
+    monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None)
+
+    resp = server.handle_request(
+        {"id": "1", "method": "session.create", "params": {"cols": 80}}
+    )
+    sid = resp["result"]["session_id"]
+    session = server._sessions[sid]
+    session["agent_ready"].wait(timeout=2.0)
+
+    assert session["agent_error"] is None
+    assert session["agent"] is not None
+    assert not any(args and args[0] == "error" for args in emits)
+
+    server._sessions.pop(sid, None)
+
+
+def test_session_list_returns_clean_error_when_state_db_is_unavailable(monkeypatch):
+    monkeypatch.setattr(server, "_get_db", lambda: None)
+    monkeypatch.setattr(server, "_db_error", "locking protocol")
+
+    resp = server.handle_request({"id": "1", "method": "session.list", "params": {}})
+
+    assert "error" in resp
+    assert "state.db unavailable: locking protocol" in resp["error"]["message"]
+
+
 # --------------------------------------------------------------------------
 # model.options — curated-list parity with `hermes model` and classic /model
 # --------------------------------------------------------------------------
diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py
index 762654a25..356f72d88 100644
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -20,11 +20,14 @@ from unittest.mock import MagicMock, patch
 from tools.delegate_tool import (
     DELEGATE_BLOCKED_TOOLS,
     DELEGATE_TASK_SCHEMA,
+    DelegateEvent,
     _get_max_concurrent_children,
+    _LEGACY_EVENT_MAP,
     MAX_DEPTH,
     check_delegate_requirements,
     delegate_task,
     _build_child_agent,
+    _build_child_progress_callback,
     _build_child_system_prompt,
     _strip_blocked_tools,
     _resolve_child_credential_pool,
@@ -387,7 +390,7 @@ class TestToolNamePreservation(unittest.TestCase):
         with patch("run_agent.AIAgent") as MockAgent:
             mock_child = MagicMock()
 
-            def capture_and_return(user_message):
+            def capture_and_return(user_message, task_id=None):
                 captured["saved"] = list(mock_child._delegate_saved_tool_names)
                 return {"final_response": "ok", "completed": True, "api_calls": 1}
 
@@ -568,8 +571,16 @@ class TestBlockedTools(unittest.TestCase):
             self.assertIn(tool, DELEGATE_BLOCKED_TOOLS)
 
     def test_constants(self):
+        from tools.delegate_tool import (
+            _get_max_spawn_depth, _get_orchestrator_enabled,
+            _MIN_SPAWN_DEPTH, _MAX_SPAWN_DEPTH_CAP,
+        )
         self.assertEqual(_get_max_concurrent_children(), 3)
-        self.assertEqual(MAX_DEPTH, 2)
+        self.assertEqual(MAX_DEPTH, 1)
+        self.assertEqual(_get_max_spawn_depth(), 1)       # default: flat
+        self.assertTrue(_get_orchestrator_enabled())      # default
+        self.assertEqual(_MIN_SPAWN_DEPTH, 1)
+        self.assertEqual(_MAX_SPAWN_DEPTH_CAP, 3)
 
 
 class TestDelegationCredentialResolution(unittest.TestCase):
@@ -1047,6 +1058,59 @@ class TestChildCredentialPoolResolution(unittest.TestCase):
 
             self.assertEqual(mock_child._credential_pool, mock_pool)
 
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_build_child_agent_preserves_mcp_toolsets_by_default(self, mock_cfg):
+        parent = _make_mock_parent()
+        parent.enabled_toolsets = ["web", "browser", "mcp-MiniMax"]
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            MockAgent.return_value = mock_child
+
+            _build_child_agent(
+                task_index=0,
+                goal="Test narrowed toolsets",
+                context=None,
+                toolsets=["web", "browser"],
+                model=None,
+                max_iterations=10,
+                parent_agent=parent,
+                task_count=1,
+            )
+
+        self.assertEqual(
+            MockAgent.call_args[1]["enabled_toolsets"],
+            ["web", "browser", "mcp-MiniMax"],
+        )
+
+    @patch(
+        "tools.delegate_tool._load_config",
+        return_value={"inherit_mcp_toolsets": False},
+    )
+    def test_build_child_agent_strict_intersection_when_opted_out(self, mock_cfg):
+        parent = _make_mock_parent()
+        parent.enabled_toolsets = ["web", "browser", "mcp-MiniMax"]
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            MockAgent.return_value = mock_child
+
+            _build_child_agent(
+                task_index=0,
+                goal="Test narrowed toolsets",
+                context=None,
+                toolsets=["web", "browser"],
+                model=None,
+                max_iterations=10,
+                parent_agent=parent,
+                task_count=1,
+            )
+
+        self.assertEqual(
+            MockAgent.call_args[1]["enabled_toolsets"],
+            ["web", "browser"],
+        )
+
 
 class TestChildCredentialLeasing(unittest.TestCase):
     def test_run_single_child_acquires_and_releases_lease(self):
@@ -1325,5 +1389,635 @@ class TestDelegationReasoningEffort(unittest.TestCase):
         self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "medium"})
 
 
+# =========================================================================
+# Dispatch helper, progress events, concurrency
+# =========================================================================
+
+class TestDispatchDelegateTask(unittest.TestCase):
+    """Tests for the _dispatch_delegate_task helper and full param forwarding."""
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_acp_args_forwarded(self, mock_creds, mock_cfg):
+        """Both acp_command and acp_args reach delegate_task via the helper."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        with patch("tools.delegate_tool._build_child_agent") as mock_build:
+            mock_child = MagicMock()
+            mock_child.run_conversation.return_value = {
+                "final_response": "done", "completed": True,
+                "api_calls": 1, "messages": [],
+            }
+            mock_child._delegate_saved_tool_names = []
+            mock_child._credential_pool = None
+            mock_child.session_prompt_tokens = 0
+            mock_child.session_completion_tokens = 0
+            mock_child.model = "test"
+            mock_build.return_value = mock_child
+
+            delegate_task(
+                goal="test",
+                acp_command="claude",
+                acp_args=["--acp", "--stdio"],
+                parent_agent=parent,
+            )
+            _, kwargs = mock_build.call_args
+            self.assertEqual(kwargs["override_acp_command"], "claude")
+            self.assertEqual(kwargs["override_acp_args"], ["--acp", "--stdio"])
+
+class TestDelegateEventEnum(unittest.TestCase):
+    """Tests for DelegateEvent enum and back-compat aliases."""
+
+    def test_enum_values_are_strings(self):
+        for event in DelegateEvent:
+            self.assertIsInstance(event.value, str)
+            self.assertTrue(event.value.startswith("delegate."))
+
+    def test_legacy_map_covers_all_old_names(self):
+        expected_legacy = {"_thinking", "reasoning.available",
+                          "tool.started", "tool.completed", "subagent_progress"}
+        self.assertEqual(set(_LEGACY_EVENT_MAP.keys()), expected_legacy)
+
+    def test_legacy_map_values_are_delegate_events(self):
+        for old_name, event in _LEGACY_EVENT_MAP.items():
+            self.assertIsInstance(event, DelegateEvent)
+
+    def test_progress_callback_normalises_tool_started(self):
+        """_build_child_progress_callback handles tool.started via enum."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = MagicMock()
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        self.assertIsNotNone(cb)
+
+        cb("tool.started", tool_name="terminal", preview="ls")
+        parent._delegate_spinner.print_above.assert_called()
+
+    def test_progress_callback_normalises_thinking(self):
+        """Both _thinking and reasoning.available route to TASK_THINKING."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = None
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+
+        cb("_thinking", tool_name=None, preview="pondering...")
+        assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
+
+        parent._delegate_spinner.print_above.reset_mock()
+        cb("reasoning.available", tool_name=None, preview="hmm")
+        assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
+
+    def test_progress_callback_tool_completed_is_noop(self):
+        """tool.completed is normalised but produces no display output."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = None
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        cb("tool.completed", tool_name="terminal")
+        parent._delegate_spinner.print_above.assert_not_called()
+
+    def test_progress_callback_ignores_unknown_events(self):
+        """Unknown event types are silently ignored."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        # Should not raise
+        cb("some.unknown.event", tool_name="x")
+        parent._delegate_spinner.print_above.assert_not_called()
+
+    def test_progress_callback_accepts_enum_value_directly(self):
+        """cb(DelegateEvent.TASK_THINKING, ...) must route to the thinking
+        branch.  Pre-fix the callback only handled legacy strings via
+        _LEGACY_EVENT_MAP.get and silently dropped enum-typed callers."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = None
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        cb(DelegateEvent.TASK_THINKING, preview="pondering")
+        # If the enum was accepted, the thinking emoji got printed.
+        assert any(
+            "💭" in str(c)
+            for c in parent._delegate_spinner.print_above.call_args_list
+        )
+
+    def test_progress_callback_accepts_new_style_string(self):
+        """cb('delegate.task_thinking', ...) — the string form of the
+        enum value — must route to the thinking branch too, so new-style
+        emitters don't have to import DelegateEvent."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        cb("delegate.task_thinking", preview="hmm")
+        assert any(
+            "💭" in str(c)
+            for c in parent._delegate_spinner.print_above.call_args_list
+        )
+
+    def test_progress_callback_task_progress_not_misrendered(self):
+        """'subagent_progress' (legacy name for TASK_PROGRESS) carries a
+        pre-batched summary in the tool_name slot.  Before the fix, this
+        fell through to the TASK_TOOL_STARTED rendering path, treating
+        the summary string as a tool name.  After the fix: distinct
+        render (no tool-start emoji lookup) and pass-through relay
+        upward (no re-batching).
+
+        Regression path only reachable once nested orchestration is
+        enabled: nested orchestrators relay subagent_progress from
+        grandchildren upward through this callback.
+        """
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = MagicMock()
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        cb("subagent_progress", tool_name="🔀 [1] terminal, file")
+
+        # Spinner gets a distinct 🔀-prefixed line, NOT a tool emoji
+        # followed by the summary string as if it were a tool name.
+        calls = parent._delegate_spinner.print_above.call_args_list
+        self.assertTrue(any("🔀 🔀 [1] terminal, file" in str(c) for c in calls))
+        # Parent callback receives the relay (pass-through, no re-batching).
+        parent.tool_progress_callback.assert_called_once()
+        # No '⚡' tool-start emoji should appear — that's the pre-fix bug.
+        self.assertFalse(any("⚡" in str(c) for c in calls))
+
+
+class TestConcurrencyDefaults(unittest.TestCase):
+    """Tests for the concurrency default and no hard ceiling."""
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_default_is_three(self, mock_cfg):
+        # Clear env var if set
+        with patch.dict(os.environ, {}, clear=True):
+            self.assertEqual(_get_max_concurrent_children(), 3)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_concurrent_children": 10})
+    def test_no_upper_ceiling(self, mock_cfg):
+        """Users can raise concurrency as high as they want — no hard cap."""
+        self.assertEqual(_get_max_concurrent_children(), 10)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_concurrent_children": 100})
+    def test_very_high_values_honored(self, mock_cfg):
+        self.assertEqual(_get_max_concurrent_children(), 100)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_concurrent_children": 0})
+    def test_zero_clamped_to_one(self, mock_cfg):
+        """Floor of 1 is enforced; zero or negative values raise to 1."""
+        self.assertEqual(_get_max_concurrent_children(), 1)
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_env_var_honored_uncapped(self, mock_cfg):
+        with patch.dict(os.environ, {"DELEGATION_MAX_CONCURRENT_CHILDREN": "12"}):
+            self.assertEqual(_get_max_concurrent_children(), 12)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_concurrent_children": 6})
+    def test_configured_value_returned(self, mock_cfg):
+        self.assertEqual(_get_max_concurrent_children(), 6)
+
+
+# =========================================================================
+# max_spawn_depth clamping
+# =========================================================================
+
+class TestMaxSpawnDepth(unittest.TestCase):
+    """Tests for _get_max_spawn_depth clamping and fallback behavior."""
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_max_spawn_depth_defaults_to_1(self, mock_cfg):
+        from tools.delegate_tool import _get_max_spawn_depth
+        self.assertEqual(_get_max_spawn_depth(), 1)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 0})
+    def test_max_spawn_depth_clamped_below_one(self, mock_cfg):
+        import logging
+        from tools.delegate_tool import _get_max_spawn_depth
+        with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
+            result = _get_max_spawn_depth()
+        self.assertEqual(result, 1)
+        self.assertTrue(any("clamping to 1" in m for m in cm.output))
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 99})
+    def test_max_spawn_depth_clamped_above_three(self, mock_cfg):
+        import logging
+        from tools.delegate_tool import _get_max_spawn_depth
+        with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
+            result = _get_max_spawn_depth()
+        self.assertEqual(result, 3)
+        self.assertTrue(any("clamping to 3" in m for m in cm.output))
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": "not-a-number"})
+    def test_max_spawn_depth_invalid_falls_back_to_default(self, mock_cfg):
+        from tools.delegate_tool import _get_max_spawn_depth
+        self.assertEqual(_get_max_spawn_depth(), 1)
+
+
+# =========================================================================
+# role param plumbing
+# =========================================================================
+#
+# These tests cover the schema + signature + stash plumbing of the role
+# param.  The full role-honoring behavior (toolset re-add, role-aware
+# prompt) lives in TestOrchestratorRoleBehavior below; these tests only
+# assert on _delegate_role stashing and on the schema shape.
+
+
+class TestOrchestratorRoleSchema(unittest.TestCase):
+    """Tests that the role param reaches the child via dispatch."""
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def _run_with_mock_child(self, role_arg, mock_cfg, mock_creds):
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.run_conversation.return_value = {
+                "final_response": "done", "completed": True,
+                "api_calls": 1, "messages": [],
+            }
+            mock_child._delegate_saved_tool_names = []
+            mock_child._credential_pool = None
+            mock_child.session_prompt_tokens = 0
+            mock_child.session_completion_tokens = 0
+            mock_child.model = "test"
+            MockAgent.return_value = mock_child
+            kwargs = {"goal": "test", "parent_agent": parent}
+            if role_arg is not _SENTINEL:
+                kwargs["role"] = role_arg
+            delegate_task(**kwargs)
+            return mock_child
+
+    def test_default_role_is_leaf(self):
+        child = self._run_with_mock_child(_SENTINEL)
+        self.assertEqual(child._delegate_role, "leaf")
+
+    def test_explicit_orchestrator_role_stashed(self):
+        """role='orchestrator' reaches _build_child_agent and is stashed.
+        Full behavior (toolset re-add) lands in commit 3; commit 2 only
+        verifies the plumbing."""
+        child = self._run_with_mock_child("orchestrator")
+        self.assertEqual(child._delegate_role, "orchestrator")
+
+    def test_unknown_role_coerces_to_leaf(self):
+        """role='nonsense' → _normalize_role warns and returns 'leaf'."""
+        import logging
+        with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
+            child = self._run_with_mock_child("nonsense")
+        self.assertEqual(child._delegate_role, "leaf")
+        self.assertTrue(any("coercing" in m.lower() for m in cm.output))
+
+    def test_schema_has_role_top_level_and_per_task(self):
+        from tools.delegate_tool import DELEGATE_TASK_SCHEMA
+        props = DELEGATE_TASK_SCHEMA["parameters"]["properties"]
+        self.assertIn("role", props)
+        self.assertEqual(props["role"]["enum"], ["leaf", "orchestrator"])
+        task_props = props["tasks"]["items"]["properties"]
+        self.assertIn("role", task_props)
+        self.assertEqual(task_props["role"]["enum"], ["leaf", "orchestrator"])
+
+
+# Sentinel used to distinguish "role kwarg omitted" from "role=None".
+_SENTINEL = object()
+
+
+# =========================================================================
+# role-honoring behavior
+# =========================================================================
+
+
+def _make_role_mock_child():
+    """Helper: mock child with minimal fields for delegate_task to process."""
+    mock_child = MagicMock()
+    mock_child.run_conversation.return_value = {
+        "final_response": "done", "completed": True,
+        "api_calls": 1, "messages": [],
+    }
+    mock_child._delegate_saved_tool_names = []
+    mock_child._credential_pool = None
+    mock_child.session_prompt_tokens = 0
+    mock_child.session_completion_tokens = 0
+    mock_child.model = "test"
+    return mock_child
+
+
+class TestOrchestratorRoleBehavior(unittest.TestCase):
+    """Tests that role='orchestrator' actually changes toolset + prompt."""
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_orchestrator_role_keeps_delegation_at_depth_1(
+        self, mock_cfg, mock_creds
+    ):
+        """role='orchestrator' + depth-0 parent with max_spawn_depth=2 →
+        child at depth 1 gets 'delegation' in enabled_toolsets (can
+        further delegate).  Requires max_spawn_depth>=2 since the new
+        default is 1 (flat)."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file"]
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = _make_role_mock_child()
+            MockAgent.return_value = mock_child
+            delegate_task(goal="test", role="orchestrator", parent_agent=parent)
+            kwargs = MockAgent.call_args[1]
+            self.assertIn("delegation", kwargs["enabled_toolsets"])
+            self.assertEqual(mock_child._delegate_role, "orchestrator")
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_orchestrator_blocked_at_max_spawn_depth(
+        self, mock_cfg, mock_creds
+    ):
+        """Parent at depth 1 with max_spawn_depth=2 spawns child
+        at depth 2 (the floor); role='orchestrator' degrades to leaf."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=1)
+        parent.enabled_toolsets = ["terminal", "delegation"]
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = _make_role_mock_child()
+            MockAgent.return_value = mock_child
+            delegate_task(goal="test", role="orchestrator", parent_agent=parent)
+            kwargs = MockAgent.call_args[1]
+            self.assertNotIn("delegation", kwargs["enabled_toolsets"])
+            self.assertEqual(mock_child._delegate_role, "leaf")
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_orchestrator_blocked_at_default_flat_depth(
+        self, mock_cfg, mock_creds
+    ):
+        """With default max_spawn_depth=1 (flat), role='orchestrator'
+        on a depth-0 parent produces a depth-1 child that is already at
+        the floor — the role degrades to 'leaf' and the delegation
+        toolset is stripped.  This is the new default posture."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file", "delegation"]
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = _make_role_mock_child()
+            MockAgent.return_value = mock_child
+            delegate_task(goal="test", role="orchestrator", parent_agent=parent)
+            kwargs = MockAgent.call_args[1]
+            self.assertNotIn("delegation", kwargs["enabled_toolsets"])
+            self.assertEqual(mock_child._delegate_role, "leaf")
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_orchestrator_enabled_false_forces_leaf(self, mock_creds):
+        """Kill switch delegation.orchestrator_enabled=false overrides
+        role='orchestrator'."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "delegation"]
+        with patch("tools.delegate_tool._load_config",
+                   return_value={"orchestrator_enabled": False}):
+            with patch("run_agent.AIAgent") as MockAgent:
+                mock_child = _make_role_mock_child()
+                MockAgent.return_value = mock_child
+                delegate_task(goal="test", role="orchestrator",
+                              parent_agent=parent)
+                kwargs = MockAgent.call_args[1]
+                self.assertNotIn("delegation", kwargs["enabled_toolsets"])
+                self.assertEqual(mock_child._delegate_role, "leaf")
+
+    # ── Role-aware system prompt ────────────────────────────────────────
+
+    def test_leaf_prompt_does_not_mention_delegation(self):
+        prompt = _build_child_system_prompt(
+            "Fix tests", role="leaf",
+            max_spawn_depth=2, child_depth=1,
+        )
+        self.assertNotIn("delegate_task", prompt)
+        self.assertNotIn("Orchestrator Role", prompt)
+
+    def test_orchestrator_prompt_mentions_delegation_capability(self):
+        prompt = _build_child_system_prompt(
+            "Survey approaches", role="orchestrator",
+            max_spawn_depth=2, child_depth=1,
+        )
+        self.assertIn("delegate_task", prompt)
+        self.assertIn("Orchestrator Role", prompt)
+        # Depth/max-depth note present and literal:
+        self.assertIn("depth 1", prompt)
+        self.assertIn("max_spawn_depth=2", prompt)
+
+    def test_orchestrator_prompt_at_depth_floor_says_children_are_leaves(self):
+        """With max_spawn_depth=2 and child_depth=1, the orchestrator's
+        own children would be at depth 2 (the floor) → must be leaves."""
+        prompt = _build_child_system_prompt(
+            "Survey", role="orchestrator",
+            max_spawn_depth=2, child_depth=1,
+        )
+        self.assertIn("MUST be leaves", prompt)
+
+    def test_orchestrator_prompt_below_floor_allows_more_nesting(self):
+        """With max_spawn_depth=3 and child_depth=1, the orchestrator's
+        own children can themselves be orchestrators (depth 2 < 3)."""
+        prompt = _build_child_system_prompt(
+            "Deep work", role="orchestrator",
+            max_spawn_depth=3, child_depth=1,
+        )
+        self.assertIn("can themselves be orchestrators", prompt)
+
+    # ── Batch mode and intersection ─────────────────────────────────────
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_batch_mode_per_task_role_override(self, mock_cfg, mock_creds):
+        """Per-task role beats top-level; no top-level role → "leaf".
+
+        tasks=[{role:'orchestrator'},{role:'leaf'},{}] → first gets
+        delegation, second and third don't.  Requires max_spawn_depth>=2
+        (raised explicitly here) since the new default is 1 (flat).
+        """
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file", "delegation"]
+        built_toolsets = []
+
+        def _factory(*a, **kw):
+            m = _make_role_mock_child()
+            built_toolsets.append(kw.get("enabled_toolsets"))
+            return m
+
+        with patch("run_agent.AIAgent", side_effect=_factory):
+            delegate_task(
+                tasks=[
+                    {"goal": "A", "role": "orchestrator"},
+                    {"goal": "B", "role": "leaf"},
+                    {"goal": "C"},  # no role → falls back to top_role (leaf)
+                ],
+                parent_agent=parent,
+            )
+        self.assertIn("delegation", built_toolsets[0])
+        self.assertNotIn("delegation", built_toolsets[1])
+        self.assertNotIn("delegation", built_toolsets[2])
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_intersection_preserves_delegation_bound(
+        self, mock_cfg, mock_creds
+    ):
+        """Design decision: orchestrator capability is granted by role,
+        NOT inherited from the parent's toolset. A parent without
+        'delegation' in its enabled_toolsets can still spawn an
+        orchestrator child — the re-add in _build_child_agent runs
+        unconditionally for orchestrators (when max_spawn_depth allows).
+
+        If you want to change to "parent must have delegation too",
+        update _build_child_agent to check parent_toolsets before the
+        re-add and update this test to match.
+        """
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file"]  # no delegation
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = _make_role_mock_child()
+            MockAgent.return_value = mock_child
+            delegate_task(goal="test", role="orchestrator",
+                          parent_agent=parent)
+            self.assertIn("delegation", MockAgent.call_args[1]["enabled_toolsets"])
+
+
+class TestOrchestratorEndToEnd(unittest.TestCase):
+    """End-to-end: parent -> orchestrator -> two-leaf nested orchestration.
+
+    Covers the acceptance gate: parent delegates to an orchestrator
+    child; the orchestrator delegates to two leaf grandchildren; the
+    role/toolset/depth chain all resolve correctly.
+
+    Mock strategy: a single AIAgent patch with a side_effect factory
+    that keys on the child's ephemeral_system_prompt — orchestrator
+    prompts contain the string "Orchestrator Role" (see
+    _build_child_system_prompt), leaves don't.  The orchestrator
+    mock's run_conversation recursively calls delegate_task with
+    tasks=[{goal:...},{goal:...}] to spawn two leaves.  This keeps
+    the test in one patch context and avoids depth-indexed nesting.
+    """
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_end_to_end_nested_orchestration(self, mock_cfg, mock_creds):
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file", "delegation"]
+
+        # (enabled_toolsets, _delegate_role) for each agent built
+        built_agents: list = []
+        # Keep the orchestrator mock around so the re-entrant delegate_task
+        # can reach it via closure.
+        orch_mock = {}
+
+        def _factory(*a, **kw):
+            prompt = kw.get("ephemeral_system_prompt", "") or ""
+            is_orchestrator = "Orchestrator Role" in prompt
+            m = _make_role_mock_child()
+            built_agents.append({
+                "enabled_toolsets": list(kw.get("enabled_toolsets") or []),
+                "is_orchestrator_prompt": is_orchestrator,
+            })
+
+            if is_orchestrator:
+                # Prepare the orchestrator mock as a parent-capable object
+                # so the nested delegate_task call succeeds.
+                m._delegate_depth = 1
+                m._delegate_role = "orchestrator"
+                m._active_children = []
+                m._active_children_lock = threading.Lock()
+                m._session_db = None
+                m.platform = "cli"
+                m.enabled_toolsets = ["terminal", "file", "delegation"]
+                m.api_key = "***"
+                m.base_url = ""
+                m.provider = None
+                m.api_mode = None
+                m.providers_allowed = None
+                m.providers_ignored = None
+                m.providers_order = None
+                m.provider_sort = None
+                m._print_fn = None
+                m.tool_progress_callback = None
+                m.thinking_callback = None
+                orch_mock["agent"] = m
+
+                def _orchestrator_run(user_message=None, task_id=None):
+                    # Re-entrant: orchestrator spawns two leaves
+                    delegate_task(
+                        tasks=[{"goal": "leaf-A"}, {"goal": "leaf-B"}],
+                        parent_agent=m,
+                    )
+                    return {
+                        "final_response": "orchestrated 2 workers",
+                        "completed": True, "api_calls": 1,
+                        "messages": [],
+                    }
+                m.run_conversation.side_effect = _orchestrator_run
+
+            return m
+
+        with patch("run_agent.AIAgent", side_effect=_factory) as MockAgent:
+            delegate_task(
+                goal="top-level orchestration",
+                role="orchestrator",
+                parent_agent=parent,
+            )
+
+        # 1 orchestrator + 2 leaf grandchildren = 3 agents
+        self.assertEqual(MockAgent.call_count, 3)
+        # First built = the orchestrator (parent's direct child)
+        self.assertIn("delegation", built_agents[0]["enabled_toolsets"])
+        self.assertTrue(built_agents[0]["is_orchestrator_prompt"])
+        # Next two = leaves (grandchildren)
+        self.assertNotIn("delegation", built_agents[1]["enabled_toolsets"])
+        self.assertFalse(built_agents[1]["is_orchestrator_prompt"])
+        self.assertNotIn("delegation", built_agents[2]["enabled_toolsets"])
+        self.assertFalse(built_agents[2]["is_orchestrator_prompt"])
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py
index e19229a79..62b8b83df 100644
--- a/tests/tools/test_docker_environment.py
+++ b/tests/tools/test_docker_environment.py
@@ -382,3 +382,31 @@ def test_normalize_env_dict_rejects_complex_values():
         "BAD_DICT": {"nested": True},
     })
     assert result == {"GOOD": "string"}
+
+
+def test_security_args_include_setuid_setgid_for_gosu_drop():
+    """_SECURITY_ARGS must include SETUID and SETGID so the image entrypoint
+    can drop from root to the non-root `hermes` user via gosu.
+
+    Without these caps gosu exits with
+    ``error: failed switching to 'hermes': operation not permitted``
+    and the container exits immediately (exit 1) before running any work.
+
+    `no-new-privileges` is kept, so gosu still cannot escalate back to root
+    after the drop — the drop is a one-way transition performed before the
+    `no_new_privs` bit is enforced on the exec boundary.
+    """
+    args = docker_env._SECURITY_ARGS
+
+    # Flatten to set of added caps for clarity.
+    added = {
+        args[i + 1]
+        for i, flag in enumerate(args[:-1])
+        if flag == "--cap-add"
+    }
+    assert "SETUID" in added, "SETUID cap missing — gosu drop in entrypoint will fail"
+    assert "SETGID" in added, "SETGID cap missing — gosu drop in entrypoint will fail"
+
+    # Sanity: the hardening posture is still in place.
+    assert "--cap-drop" in args and "ALL" in args
+    assert "--security-opt" in args and "no-new-privileges" in args
diff --git a/tests/tools/test_env_passthrough.py b/tests/tools/test_env_passthrough.py
index 6e48ee5c3..eba84bdb2 100644
--- a/tests/tools/test_env_passthrough.py
+++ b/tests/tools/test_env_passthrough.py
@@ -172,28 +172,60 @@ class TestTerminalIntegration:
         assert blocked_var not in result
         assert "PATH" in result
 
-    def test_passthrough_allows_blocklisted_var(self):
-        from tools.environments.local import _sanitize_subprocess_env, _HERMES_PROVIDER_ENV_BLOCKLIST
+    def test_passthrough_cannot_override_provider_blocklist(self):
+        """GHSA-rhgp-j443-p4rf: register_env_passthrough must NOT accept
+        Hermes provider credentials — that was the bypass where a skill
+        could declare ANTHROPIC_TOKEN / OPENAI_API_KEY as passthrough and
+        defeat the execute_code sandbox scrubbing."""
+        from tools.environments.local import (
+            _sanitize_subprocess_env,
+            _HERMES_PROVIDER_ENV_BLOCKLIST,
+        )
 
         blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
+        # Attempt to register — must be silently refused (logged warning).
         register_env_passthrough([blocked_var])
 
+        # is_env_passthrough must NOT report it as allowed
+        assert not is_env_passthrough(blocked_var)
+
+        # Sanitizer still strips the var from subprocess env
         env = {blocked_var: "secret_value", "PATH": "/usr/bin"}
         result = _sanitize_subprocess_env(env)
-        assert blocked_var in result
-        assert result[blocked_var] == "secret_value"
+        assert blocked_var not in result
+        assert "PATH" in result
 
-    def test_make_run_env_passthrough(self, monkeypatch):
-        from tools.environments.local import _make_run_env, _HERMES_PROVIDER_ENV_BLOCKLIST
+    def test_make_run_env_blocklist_override_rejected(self):
+        """_make_run_env must NOT expose a blocklisted var to subprocess env
+        even after a skill attempts to register it via passthrough."""
+        import os
+        from tools.environments.local import (
+            _make_run_env,
+            _HERMES_PROVIDER_ENV_BLOCKLIST,
+        )
 
         blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
-        monkeypatch.setenv(blocked_var, "secret_value")
+        os.environ[blocked_var] = "secret_value"
+        try:
+            # Without passthrough — blocked
+            result_before = _make_run_env({})
+            assert blocked_var not in result_before
 
-        # Without passthrough — blocked
-        result_before = _make_run_env({})
-        assert blocked_var not in result_before
+            # Skill tries to register it — must be refused, so still blocked
+            register_env_passthrough([blocked_var])
+            result_after = _make_run_env({})
+            assert blocked_var not in result_after
+        finally:
+            os.environ.pop(blocked_var, None)
 
-        # With passthrough — allowed
-        register_env_passthrough([blocked_var])
-        result_after = _make_run_env({})
-        assert blocked_var in result_after
+    def test_non_hermes_api_key_still_registerable(self):
+        """Third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT
+        Hermes provider credentials and must still pass through — skills
+        that legitimately wrap third-party APIs must keep working."""
+        # TENOR_API_KEY is a real example — used by the gif-search skill
+        register_env_passthrough(["TENOR_API_KEY"])
+        assert is_env_passthrough("TENOR_API_KEY")
+
+        # Arbitrary skill-specific var
+        register_env_passthrough(["MY_SKILL_CUSTOM_CONFIG"])
+        assert is_env_passthrough("MY_SKILL_CUSTOM_CONFIG")
diff --git a/tests/tools/test_file_operations.py b/tests/tools/test_file_operations.py
index b379fefcb..dfd54ba63 100644
--- a/tests/tools/test_file_operations.py
+++ b/tests/tools/test_file_operations.py
@@ -19,6 +19,8 @@ from tools.file_operations import (
     BINARY_EXTENSIONS,
     IMAGE_EXTENSIONS,
     MAX_LINE_LENGTH,
+    normalize_read_pagination,
+    normalize_search_pagination,
 )
 
 
@@ -192,6 +194,17 @@ def file_ops(mock_env):
 
 
 class TestShellFileOpsHelpers:
+    def test_normalize_read_pagination_clamps_invalid_values(self):
+        assert normalize_read_pagination(offset=0, limit=0) == (1, 1)
+        assert normalize_read_pagination(offset=-10, limit=-5) == (1, 1)
+        assert normalize_read_pagination(offset="bad", limit="bad") == (1, 500)
+        assert normalize_read_pagination(offset=2, limit=999999) == (2, 2000)
+
+    def test_normalize_search_pagination_clamps_invalid_values(self):
+        assert normalize_search_pagination(offset=-10, limit=-5) == (0, 1)
+        assert normalize_search_pagination(offset="bad", limit="bad") == (0, 50)
+        assert normalize_search_pagination(offset=3, limit=0) == (3, 1)
+
     def test_escape_shell_arg_simple(self, file_ops):
         assert file_ops._escape_shell_arg("hello") == "'hello'"
 
diff --git a/tests/tools/test_file_operations_edge_cases.py b/tests/tools/test_file_operations_edge_cases.py
index b13dedded..8a4378d2f 100644
--- a/tests/tools/test_file_operations_edge_cases.py
+++ b/tests/tools/test_file_operations_edge_cases.py
@@ -146,3 +146,61 @@ class TestCheckLintBracePaths:
 
         assert result.success is False
         assert "SyntaxError" in result.output
+
+
+# =========================================================================
+# Pagination bounds
+# =========================================================================
+
+
+class TestPaginationBounds:
+    """Invalid pagination inputs should not leak into shell commands."""
+
+    def test_read_file_clamps_offset_and_limit_before_building_sed_range(self):
+        env = MagicMock()
+        env.cwd = "/tmp"
+        ops = ShellFileOperations(env)
+        commands = []
+
+        def fake_exec(command, *args, **kwargs):
+            commands.append(command)
+            if command.startswith("wc -c"):
+                return MagicMock(exit_code=0, stdout="12")
+            if command.startswith("head -c"):
+                return MagicMock(exit_code=0, stdout="line1\nline2\n")
+            if command.startswith("sed -n"):
+                return MagicMock(exit_code=0, stdout="line1\n")
+            if command.startswith("wc -l"):
+                return MagicMock(exit_code=0, stdout="2")
+            return MagicMock(exit_code=0, stdout="")
+
+        with patch.object(ops, "_exec", side_effect=fake_exec):
+            result = ops.read_file("notes.txt", offset=0, limit=0)
+
+        assert result.error is None
+        assert "     1|line1" in result.content
+        sed_commands = [cmd for cmd in commands if cmd.startswith("sed -n")]
+        assert sed_commands == ["sed -n '1,1p' 'notes.txt'"]
+
+    def test_search_clamps_offset_and_limit_before_building_head_pipeline(self):
+        env = MagicMock()
+        env.cwd = "/tmp"
+        ops = ShellFileOperations(env)
+        commands = []
+
+        def fake_exec(command, *args, **kwargs):
+            commands.append(command)
+            if command.startswith("test -e"):
+                return MagicMock(exit_code=0, stdout="exists")
+            if command.startswith("rg --files"):
+                return MagicMock(exit_code=0, stdout="a.py\n")
+            return MagicMock(exit_code=0, stdout="")
+
+        with patch.object(ops, "_has_command", side_effect=lambda cmd: cmd == "rg"), \
+             patch.object(ops, "_exec", side_effect=fake_exec):
+            result = ops.search("*.py", target="files", path=".", offset=-4, limit=-2)
+
+        assert result.files == ["a.py"]
+        rg_commands = [cmd for cmd in commands if cmd.startswith("rg --files")]
+        assert rg_commands
+        assert "| head -n 1" in rg_commands[0]
diff --git a/tests/tools/test_file_state_registry.py b/tests/tools/test_file_state_registry.py
new file mode 100644
index 000000000..6038036ae
--- /dev/null
+++ b/tests/tools/test_file_state_registry.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+"""Tests for the cross-agent FileStateRegistry (tools/file_state.py).
+
+Covers the three layers added for safe concurrent subagent file edits:
+
+  1. Cross-agent staleness detection via ``check_stale``
+  2. Per-path serialization via ``lock_path``
+  3. Delegate-completion reminder via ``writes_since``
+
+Plus integration through the real ``read_file_tool`` / ``write_file_tool``
+/ ``patch_tool`` handlers so the full hook wiring is exercised.
+
+Run:
+    python -m pytest tests/tools/test_file_state_registry.py -v
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import tempfile
+import threading
+import time
+import unittest
+
+from tools import file_state
+from tools.file_tools import (
+    read_file_tool,
+    write_file_tool,
+    patch_tool,
+)
+
+
+def _tmp_file(content: str = "initial\n") -> str:
+    fd, path = tempfile.mkstemp(prefix="hermes_file_state_test_", suffix=".txt")
+    with os.fdopen(fd, "w") as f:
+        f.write(content)
+    return path
+
+
+class FileStateRegistryUnitTests(unittest.TestCase):
+    """Direct unit tests on the registry singleton."""
+
+    def setUp(self) -> None:
+        file_state.get_registry().clear()
+        self._tmpfiles: list[str] = []
+
+    def tearDown(self) -> None:
+        for p in self._tmpfiles:
+            try:
+                os.unlink(p)
+            except OSError:
+                pass
+        file_state.get_registry().clear()
+
+    def _mk(self, content: str = "x\n") -> str:
+        p = _tmp_file(content)
+        self._tmpfiles.append(p)
+        return p
+
+    def test_record_read_then_check_stale_returns_none(self):
+        p = self._mk()
+        file_state.record_read("A", p)
+        self.assertIsNone(file_state.check_stale("A", p))
+
+    def test_sibling_write_flags_other_agent_as_stale(self):
+        p = self._mk()
+        file_state.record_read("A", p)
+        # Simulate sibling writing this file later
+        time.sleep(0.01)  # ensure ts ordering across resolution
+        file_state.note_write("B", p)
+        warn = file_state.check_stale("A", p)
+        self.assertIsNotNone(warn)
+        self.assertIn("B", warn)
+        self.assertIn("sibling", warn.lower())
+
+    def test_write_without_read_flagged(self):
+        p = self._mk()
+        # Agent A never read this file.
+        file_state.note_write("B", p)  # another agent touched it
+        warn = file_state.check_stale("A", p)
+        self.assertIsNotNone(warn)
+
+    def test_partial_read_flagged_on_write(self):
+        p = self._mk()
+        file_state.record_read("A", p, partial=True)
+        warn = file_state.check_stale("A", p)
+        self.assertIsNotNone(warn)
+        self.assertIn("partial", warn.lower())
+
+    def test_external_mtime_drift_flagged(self):
+        p = self._mk()
+        file_state.record_read("A", p)
+        # Bump the on-disk mtime without going through the registry.
+        time.sleep(0.01)
+        os.utime(p, None)
+        with open(p, "w") as f:
+            f.write("externally modified\n")
+        warn = file_state.check_stale("A", p)
+        self.assertIsNotNone(warn)
+        self.assertIn("modified since you last read", warn)
+
+    def test_own_write_updates_stamp_so_next_write_is_clean(self):
+        p = self._mk()
+        file_state.record_read("A", p)
+        file_state.note_write("A", p)
+        # Second write by the same agent — should not be flagged.
+        self.assertIsNone(file_state.check_stale("A", p))
+
+    def test_different_paths_dont_interfere(self):
+        a = self._mk()
+        b = self._mk()
+        file_state.record_read("A", a)
+        file_state.note_write("B", b)
+        # A reads only `a`; B writes `b`. A writing `a` is NOT stale.
+        self.assertIsNone(file_state.check_stale("A", a))
+
+    def test_lock_path_serializes_same_path(self):
+        p = self._mk()
+        events: list[tuple[str, int]] = []
+        lock = threading.Lock()
+
+        def worker(i: int) -> None:
+            with file_state.lock_path(p):
+                with lock:
+                    events.append(("enter", i))
+                time.sleep(0.01)
+                with lock:
+                    events.append(("exit", i))
+
+        threads = [threading.Thread(target=worker, args=(i,)) for i in range(4)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        # Every enter must be immediately followed by its matching exit.
+        self.assertEqual(len(events), 8)
+        for i in range(0, 8, 2):
+            self.assertEqual(events[i][0], "enter")
+            self.assertEqual(events[i + 1][0], "exit")
+            self.assertEqual(events[i][1], events[i + 1][1])
+
+    def test_lock_path_is_per_path_not_global(self):
+        a = self._mk()
+        b = self._mk()
+        b_entered = threading.Event()
+
+        def hold_a() -> None:
+            with file_state.lock_path(a):
+                b_entered.wait(timeout=2.0)
+
+        def enter_b() -> None:
+            time.sleep(0.02)  # let A grab its lock
+            with file_state.lock_path(b):
+                b_entered.set()
+
+        ta = threading.Thread(target=hold_a)
+        tb = threading.Thread(target=enter_b)
+        ta.start()
+        tb.start()
+        self.assertTrue(b_entered.wait(timeout=3.0))
+        ta.join(timeout=3.0)
+        tb.join(timeout=3.0)
+
+    def test_writes_since_filters_by_parent_read_set(self):
+        foo = self._mk()
+        bar = self._mk()
+        baz = self._mk()
+        file_state.record_read("parent", foo)
+        file_state.record_read("parent", bar)
+        since = time.time()
+        time.sleep(0.01)
+        file_state.note_write("child", foo)  # parent read this — report
+        file_state.note_write("child", baz)  # parent never saw — skip
+
+        # Caller passes only paths the parent actually read (this is what
+        # delegate_tool does via ``known_reads(parent_task_id)``).
+        parent_reads = file_state.known_reads("parent")
+        out = file_state.writes_since("parent", since, parent_reads)
+        self.assertIn("child", out)
+        self.assertIn(foo, out["child"])
+        self.assertNotIn(baz, out["child"])
+
+    def test_writes_since_excludes_the_target_agent(self):
+        p = self._mk()
+        file_state.record_read("parent", p)
+        since = time.time()
+        time.sleep(0.01)
+        file_state.note_write("parent", p)  # parent's own write
+        out = file_state.writes_since("parent", since, [p])
+        self.assertEqual(out, {})
+
+    def test_kill_switch_env_var(self):
+        p = self._mk()
+        os.environ["HERMES_DISABLE_FILE_STATE_GUARD"] = "1"
+        try:
+            file_state.record_read("A", p)
+            file_state.note_write("B", p)
+            self.assertIsNone(file_state.check_stale("A", p))
+            self.assertEqual(file_state.known_reads("A"), [])
+            self.assertEqual(
+                file_state.writes_since("A", 0.0, [p]),
+                {},
+            )
+        finally:
+            del os.environ["HERMES_DISABLE_FILE_STATE_GUARD"]
+
+
+class FileToolsIntegrationTests(unittest.TestCase):
+    """Integration through the real file_tools handlers.
+
+    These exercise the wiring: read_file_tool → registry.record_read,
+    write_file_tool / patch_tool → check_stale + lock_path + note_write.
+    """
+
+    def setUp(self) -> None:
+        file_state.get_registry().clear()
+        self._tmpdir = tempfile.mkdtemp(prefix="hermes_file_state_int_")
+
+    def tearDown(self) -> None:
+        import shutil
+        shutil.rmtree(self._tmpdir, ignore_errors=True)
+        file_state.get_registry().clear()
+
+    def _write_seed(self, name: str, content: str = "seed\n") -> str:
+        p = os.path.join(self._tmpdir, name)
+        with open(p, "w") as f:
+            f.write(content)
+        return p
+
+    def test_sibling_agent_write_surfaces_warning_through_handler(self):
+        p = self._write_seed("shared.txt")
+        r = json.loads(read_file_tool(path=p, task_id="agentA"))
+        self.assertNotIn("error", r)
+
+        w_b = json.loads(write_file_tool(path=p, content="B wrote\n", task_id="agentB"))
+        self.assertNotIn("error", w_b)
+
+        w_a = json.loads(write_file_tool(path=p, content="A stale\n", task_id="agentA"))
+        warn = w_a.get("_warning", "")
+        self.assertTrue(warn, f"expected warning, got: {w_a}")
+        # The cross-agent message names the sibling task_id.
+        self.assertIn("agentB", warn)
+        self.assertIn("sibling", warn.lower())
+
+    def test_same_agent_consecutive_writes_no_false_warning(self):
+        p = self._write_seed("own.txt")
+        json.loads(read_file_tool(path=p, task_id="agentC"))
+        w1 = json.loads(write_file_tool(path=p, content="one\n", task_id="agentC"))
+        self.assertFalse(w1.get("_warning"))
+        w2 = json.loads(write_file_tool(path=p, content="two\n", task_id="agentC"))
+        self.assertFalse(w2.get("_warning"))
+
+    def test_patch_tool_also_surfaces_sibling_warning(self):
+        p = self._write_seed("p.txt", "hello world\n")
+        json.loads(read_file_tool(path=p, task_id="agentA"))
+        json.loads(write_file_tool(path=p, content="hello planet\n", task_id="agentB"))
+        r = json.loads(
+            patch_tool(
+                mode="replace",
+                path=p,
+                old_string="hello",
+                new_string="HI",
+                task_id="agentA",
+            )
+        )
+        warn = r.get("_warning", "")
+        # Patch may fail (sibling changed the content so old_string may not
+        # match) or succeed — either way, the cross-agent warning should be
+        # present when old_string still happens to match.  What matters is
+        # that if the patch succeeded or the warning was reported, it names
+        # the sibling.  When old_string doesn't match, the patch itself
+        # returns an error but the warning is still set from the pre-check.
+        if warn:
+            self.assertIn("agentB", warn)
+
+    def test_net_new_file_no_warning(self):
+        p = os.path.join(self._tmpdir, "brand_new.txt")
+        # Nobody has read or written this before.
+        w = json.loads(write_file_tool(path=p, content="hi\n", task_id="agentX"))
+        self.assertFalse(w.get("_warning"))
+        self.assertNotIn("error", w)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/tools/test_file_tools.py b/tests/tools/test_file_tools.py
index 1e1fccb66..c2d75bf5d 100644
--- a/tests/tools/test_file_tools.py
+++ b/tests/tools/test_file_tools.py
@@ -45,6 +45,19 @@ class TestReadFileHandler:
         read_file_tool("/tmp/big.txt", offset=10, limit=20)
         mock_ops.read_file.assert_called_once_with("/tmp/big.txt", 10, 20)
 
+    @patch("tools.file_tools._get_file_ops")
+    def test_invalid_offset_and_limit_are_normalized_before_dispatch(self, mock_get):
+        mock_ops = MagicMock()
+        result_obj = MagicMock()
+        result_obj.content = "line1"
+        result_obj.to_dict.return_value = {"content": "line1", "total_lines": 1}
+        mock_ops.read_file.return_value = result_obj
+        mock_get.return_value = mock_ops
+
+        from tools.file_tools import read_file_tool
+        read_file_tool("/tmp/big.txt", offset=0, limit=0)
+        mock_ops.read_file.assert_called_once_with("/tmp/big.txt", 1, 1)
+
     @patch("tools.file_tools._get_file_ops")
     def test_exception_returns_error_json(self, mock_get):
         mock_get.side_effect = RuntimeError("terminal not available")
@@ -191,6 +204,21 @@ class TestSearchHandler:
             limit=10, offset=5, output_mode="count", context=2,
         )
 
+    @patch("tools.file_tools._get_file_ops")
+    def test_search_normalizes_invalid_pagination_before_dispatch(self, mock_get):
+        mock_ops = MagicMock()
+        result_obj = MagicMock()
+        result_obj.to_dict.return_value = {"files": []}
+        mock_ops.search.return_value = result_obj
+        mock_get.return_value = mock_ops
+
+        from tools.file_tools import search_tool
+        search_tool(pattern="class", target="files", path="/src", limit=-5, offset=-2)
+        mock_ops.search.assert_called_once_with(
+            pattern="class", path="/src", target="files", file_glob=None,
+            limit=1, offset=0, output_mode="content", context=0,
+        )
+
     @patch("tools.file_tools._get_file_ops")
     def test_search_exception_returns_error(self, mock_get):
         mock_get.side_effect = RuntimeError("no terminal")
diff --git a/tests/tools/test_fuzzy_match.py b/tests/tools/test_fuzzy_match.py
index 7a03065f4..3f7d31582 100644
--- a/tests/tools/test_fuzzy_match.py
+++ b/tests/tools/test_fuzzy_match.py
@@ -230,3 +230,102 @@ class TestEscapeDriftGuard:
         new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
         assert err is None
         assert count == 1
+
+
+class TestFindClosestLines:
+    def setup_method(self):
+        from tools.fuzzy_match import find_closest_lines
+        self.find_closest_lines = find_closest_lines
+
+    def test_finds_similar_line(self):
+        content = "def foo():\n    pass\ndef bar():\n    return 1\n"
+        result = self.find_closest_lines("def baz():", content)
+        assert "def foo" in result or "def bar" in result
+
+    def test_returns_empty_for_no_match(self):
+        content = "completely different content here"
+        result = self.find_closest_lines("xyzzy_no_match_possible_!!!", content)
+        assert result == ""
+
+    def test_returns_empty_for_empty_inputs(self):
+        assert self.find_closest_lines("", "some content") == ""
+        assert self.find_closest_lines("old string", "") == ""
+
+    def test_includes_context_lines(self):
+        content = "line1\nline2\ndef target():\n    pass\nline5\n"
+        result = self.find_closest_lines("def target():", content)
+        assert "target" in result
+
+    def test_includes_line_numbers(self):
+        content = "line1\nline2\ndef foo():\n    pass\n"
+        result = self.find_closest_lines("def foo():", content)
+        # Should include line numbers in format "N| content"
+        assert "|" in result
+
+
+class TestFormatNoMatchHint:
+    """Gating tests for format_no_match_hint — the shared helper that decides
+    whether a 'Did you mean?' snippet should be appended to an error.
+    """
+
+    def setup_method(self):
+        from tools.fuzzy_match import format_no_match_hint
+        self.fmt = format_no_match_hint
+
+    def test_fires_on_could_not_find_with_match(self):
+        """Classic no-match: similar content exists → hint fires."""
+        content = "def foo():\n    pass\ndef bar():\n    pass\n"
+        result = self.fmt(
+            "Could not find a match for old_string in the file",
+            0, "def baz():", content,
+        )
+        assert "Did you mean" in result
+        assert "foo" in result or "bar" in result
+
+    def test_silent_on_ambiguous_match_error(self):
+        """'Found N matches' is not a missing-match failure — no hint."""
+        content = "aaa bbb aaa\n"
+        result = self.fmt(
+            "Found 2 matches for old_string. Provide more context to make it unique, or use replace_all=True.",
+            0, "aaa", content,
+        )
+        assert result == ""
+
+    def test_silent_on_escape_drift_error(self):
+        """Escape-drift errors are intentional blocks — hint would mislead."""
+        content = "x = 1\n"
+        result = self.fmt(
+            "Escape-drift detected: old_string and new_string contain the literal sequence '\\\\''...",
+            0, "x = \\'1\\'", content,
+        )
+        assert result == ""
+
+    def test_silent_on_identical_strings(self):
+        """old_string == new_string — hint irrelevant."""
+        result = self.fmt(
+            "old_string and new_string are identical",
+            0, "foo", "foo bar\n",
+        )
+        assert result == ""
+
+    def test_silent_when_match_count_nonzero(self):
+        """If match succeeded, we shouldn't be in the error path — defense in depth."""
+        result = self.fmt(
+            "Could not find a match for old_string in the file",
+            1, "foo", "foo bar\n",
+        )
+        assert result == ""
+
+    def test_silent_on_none_error(self):
+        """No error at all — no hint."""
+        result = self.fmt(None, 0, "foo", "bar\n")
+        assert result == ""
+
+    def test_silent_when_no_similar_content(self):
+        """Even for a valid no-match error, skip hint when nothing similar exists."""
+        result = self.fmt(
+            "Could not find a match for old_string in the file",
+            0, "totally_unique_xyzzy_qux", "abc\nxyz\n",
+        )
+        assert result == ""
+
diff --git a/tests/tools/test_image_generation.py b/tests/tools/test_image_generation.py
index 4cde05fb4..b24e6bc1f 100644
--- a/tests/tools/test_image_generation.py
+++ b/tests/tools/test_image_generation.py
@@ -136,6 +136,49 @@ class TestGptLiteralFamily:
         assert p["image_size"] == "1024x1536"
 
 
+class TestGptImage2Presets:
+    """GPT Image 2 uses preset enum sizes (not literal strings like 1.5).
+    Mapped to 4:3 variants so we stay above the 655,360 min-pixel floor
+    (16:9 presets at 1024x576 = 589,824 would be rejected)."""
+
+    def test_gpt2_landscape_uses_4_3_preset(self, image_tool):
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hello", "landscape")
+        assert p["image_size"] == "landscape_4_3"
+
+    def test_gpt2_square_uses_square_hd(self, image_tool):
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hello", "square")
+        assert p["image_size"] == "square_hd"
+
+    def test_gpt2_portrait_uses_4_3_preset(self, image_tool):
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hello", "portrait")
+        assert p["image_size"] == "portrait_4_3"
+
+    def test_gpt2_quality_pinned_to_medium(self, image_tool):
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hi", "square")
+        assert p["quality"] == "medium"
+
+    def test_gpt2_strips_byok_and_unsupported_overrides(self, image_tool):
+        """openai_api_key (BYOK) is deliberately not in supports — all users
+        route through shared FAL billing. guidance_scale/num_inference_steps
+        aren't in the model's API surface either."""
+        p = image_tool._build_fal_payload(
+            "fal-ai/gpt-image-2", "hi", "square",
+            overrides={
+                "openai_api_key": "sk-...",
+                "guidance_scale": 7.5,
+                "num_inference_steps": 50,
+            },
+        )
+        assert "openai_api_key" not in p
+        assert "guidance_scale" not in p
+        assert "num_inference_steps" not in p
+
+    def test_gpt2_strips_seed_even_if_passed(self, image_tool):
+        # seed isn't in the GPT Image 2 API surface either.
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hi", "square", seed=42)
+        assert "seed" not in p
+
+
 # ---------------------------------------------------------------------------
 # Supports whitelist — the main safety property
 # ---------------------------------------------------------------------------
@@ -231,10 +274,11 @@ class TestGptQualityPinnedToMedium:
         assert p["quality"] == "medium"
 
     def test_non_gpt_model_never_gets_quality(self, image_tool):
-        """quality is only meaningful for gpt-image-1.5 — other models should
-        never have it in their payload."""
+        """quality is only meaningful for GPT-Image models (1.5, 2) — other
+        models should never have it in their payload."""
+        gpt_models = {"fal-ai/gpt-image-1.5", "fal-ai/gpt-image-2"}
         for mid in image_tool.FAL_MODELS:
-            if mid == "fal-ai/gpt-image-1.5":
+            if mid in gpt_models:
                 continue
             p = image_tool._build_fal_payload(mid, "hi", "square")
             assert "quality" not in p, f"{mid} unexpectedly has 'quality' in payload"
diff --git a/tests/tools/test_image_generation_env.py b/tests/tools/test_image_generation_env.py
new file mode 100644
index 000000000..fc4e65533
--- /dev/null
+++ b/tests/tools/test_image_generation_env.py
@@ -0,0 +1,39 @@
+"""FAL_KEY env var normalization (whitespace-only treated as unset)."""
+
+
+def test_fal_key_whitespace_is_unset(monkeypatch):
+    # Whitespace-only FAL_KEY must NOT register as configured, and the managed
+    # gateway fallback must be disabled for this assertion to be meaningful.
+    monkeypatch.setenv("FAL_KEY", "   ")
+
+    from tools import image_generation_tool
+
+    monkeypatch.setattr(
+        image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
+    )
+
+    assert image_generation_tool.check_fal_api_key() is False
+
+
+def test_fal_key_valid(monkeypatch):
+    monkeypatch.setenv("FAL_KEY", "sk-test")
+
+    from tools import image_generation_tool
+
+    monkeypatch.setattr(
+        image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
+    )
+
+    assert image_generation_tool.check_fal_api_key() is True
+
+
+def test_fal_key_empty_is_unset(monkeypatch):
+    monkeypatch.setenv("FAL_KEY", "")
+
+    from tools import image_generation_tool
+
+    monkeypatch.setattr(
+        image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
+    )
+
+    assert image_generation_tool.check_fal_api_key() is False
diff --git a/tests/tools/test_local_shell_init.py b/tests/tools/test_local_shell_init.py
new file mode 100644
index 000000000..96e26e735
--- /dev/null
+++ b/tests/tools/test_local_shell_init.py
@@ -0,0 +1,162 @@
+"""Tests for terminal.shell_init_files / terminal.auto_source_bashrc.
+
+A bash ``-l -c`` invocation does NOT source ``~/.bashrc``, so tools that
+register themselves there (nvm, asdf, pyenv) stay invisible to the
+environment snapshot built by ``LocalEnvironment.init_session``.  These
+tests verify the config-driven prelude that fixes that.
+"""
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from tools.environments.local import (
+    LocalEnvironment,
+    _prepend_shell_init,
+    _read_terminal_shell_init_config,
+    _resolve_shell_init_files,
+)
+
+
+class TestResolveShellInitFiles:
+    def test_auto_sources_bashrc_when_present(self, tmp_path, monkeypatch):
+        bashrc = tmp_path / ".bashrc"
+        bashrc.write_text('export MARKER=seen\n')
+        monkeypatch.setenv("HOME", str(tmp_path))
+
+        # Default config: auto_source_bashrc on, no explicit list.
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([], True),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == [str(bashrc)]
+
+    def test_skips_bashrc_when_missing(self, tmp_path, monkeypatch):
+        # No bashrc written.
+        monkeypatch.setenv("HOME", str(tmp_path))
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([], True),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == []
+
+    def test_auto_source_bashrc_off_suppresses_default(self, tmp_path, monkeypatch):
+        bashrc = tmp_path / ".bashrc"
+        bashrc.write_text('export MARKER=seen\n')
+        monkeypatch.setenv("HOME", str(tmp_path))
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([], False),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == []
+
+    def test_explicit_list_wins_over_auto(self, tmp_path, monkeypatch):
+        bashrc = tmp_path / ".bashrc"
+        bashrc.write_text('export FROM_BASHRC=1\n')
+        custom = tmp_path / "custom.sh"
+        custom.write_text('export FROM_CUSTOM=1\n')
+        monkeypatch.setenv("HOME", str(tmp_path))
+
+        # auto_source_bashrc stays True but the explicit list takes precedence.
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([str(custom)], True),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == [str(custom)]
+        assert str(bashrc) not in resolved
+
+    def test_expands_home_and_env_vars(self, tmp_path, monkeypatch):
+        target = tmp_path / "rc" / "custom.sh"
+        target.parent.mkdir()
+        target.write_text('export A=1\n')
+        monkeypatch.setenv("HOME", str(tmp_path))
+        monkeypatch.setenv("CUSTOM_RC_DIR", str(tmp_path / "rc"))
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=(["~/rc/custom.sh"], False),
+        ):
+            resolved_home = _resolve_shell_init_files()
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=(["${CUSTOM_RC_DIR}/custom.sh"], False),
+        ):
+            resolved_var = _resolve_shell_init_files()
+
+        assert resolved_home == [str(target)]
+        assert resolved_var == [str(target)]
+
+    def test_missing_explicit_files_are_skipped_silently(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HOME", str(tmp_path))
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([str(tmp_path / "does-not-exist.sh")], False),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == []
+
+
+class TestPrependShellInit:
+    def test_empty_list_returns_command_unchanged(self):
+        assert _prepend_shell_init("echo hi", []) == "echo hi"
+
+    def test_prepends_guarded_source_lines(self):
+        wrapped = _prepend_shell_init("echo hi", ["/tmp/a.sh", "/tmp/b.sh"])
+        assert "echo hi" in wrapped
+        # Each file is sourced through a guarded [ -r … ] && . '…' || true
+        # pattern so a missing/broken rc can't abort the bootstrap.
+        assert "/tmp/a.sh" in wrapped
+        assert "/tmp/b.sh" in wrapped
+        assert "|| true" in wrapped
+        assert "set +e" in wrapped
+
+    def test_escapes_single_quotes(self):
+        wrapped = _prepend_shell_init("echo hi", ["/tmp/o'malley.sh"])
+        # The path must survive as the shell receives it; embedded single
+        # quote is escaped as '\'' rather than breaking the outer quoting.
+        assert "o'\\''malley" in wrapped
+
+
+@pytest.mark.skipif(
+    os.environ.get("CI") == "true" and not os.path.isfile("/bin/bash"),
+    reason="Requires bash; CI sandbox may strip it.",
+)
+class TestSnapshotEndToEnd:
+    """Spin up a real LocalEnvironment and confirm the snapshot sources
+    extra init files."""
+
+    def test_snapshot_picks_up_init_file_exports(self, tmp_path, monkeypatch):
+        init_file = tmp_path / "custom-init.sh"
+        init_file.write_text(
+            'export HERMES_SHELL_INIT_PROBE="probe-ok"\n'
+            'export PATH="/opt/shell-init-probe/bin:$PATH"\n'
+        )
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([str(init_file)], False),
+        ):
+            env = LocalEnvironment(cwd=str(tmp_path), timeout=15)
+            try:
+                result = env.execute(
+                    'echo "PROBE=$HERMES_SHELL_INIT_PROBE"; echo "PATH=$PATH"'
+                )
+            finally:
+                env.cleanup()
+
+        output = result.get("output", "")
+        assert "PROBE=probe-ok" in output
+        assert "/opt/shell-init-probe/bin" in output
diff --git a/tests/tools/test_mcp_circuit_breaker.py b/tests/tools/test_mcp_circuit_breaker.py
new file mode 100644
index 000000000..0173fa52a
--- /dev/null
+++ b/tests/tools/test_mcp_circuit_breaker.py
@@ -0,0 +1,252 @@
+"""Tests for MCP tool-handler circuit-breaker recovery.
+
+The circuit breaker in ``tools/mcp_tool.py`` is intended to short-circuit
+calls to an MCP server that has failed ``_CIRCUIT_BREAKER_THRESHOLD``
+consecutive times, then *transition back to a usable state* once the
+server has had time to recover (or an explicit reconnect succeeds).
+
+The original implementation only had two states — closed and open — with
+no mechanism to transition back to closed, so a tripped breaker stayed
+tripped for the lifetime of the process. These tests lock in the
+half-open / cooldown / reconnect-resets-breaker behavior that fixes
+that.
+"""
+import json
+from unittest.mock import MagicMock
+
+import pytest
+
+
+pytest.importorskip("mcp.client.auth.oauth2")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _install_stub_server(mcp_tool_module, name: str, call_tool_impl):
+    """Install a fake MCP server in the module's registry.
+
+    ``call_tool_impl`` is an async function stored at ``session.call_tool``
+    (it's what the tool handler invokes).
+    """
+    server = MagicMock()
+    server.name = name
+    session = MagicMock()
+    session.call_tool = call_tool_impl
+    server.session = session
+    server._reconnect_event = MagicMock()
+    server._ready = MagicMock()
+    server._ready.is_set.return_value = True
+
+    mcp_tool_module._servers[name] = server
+    mcp_tool_module._server_error_counts.pop(name, None)
+    if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
+        mcp_tool_module._server_breaker_opened_at.pop(name, None)
+    return server
+
+
+def _cleanup(mcp_tool_module, name: str) -> None:
+    mcp_tool_module._servers.pop(name, None)
+    mcp_tool_module._server_error_counts.pop(name, None)
+    if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
+        mcp_tool_module._server_breaker_opened_at.pop(name, None)
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+def test_circuit_breaker_half_opens_after_cooldown(monkeypatch, tmp_path):
+    """After a tripped breaker's cooldown elapses, the *next* call must
+    actually execute against the session (half-open probe). When the
+    probe succeeds, the breaker resets to fully closed.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from tools import mcp_tool
+    from tools.mcp_tool import _make_tool_handler
+
+    call_count = {"n": 0}
+
+    async def _call_tool_success(*a, **kw):
+        call_count["n"] += 1
+        result = MagicMock()
+        result.isError = False
+        block = MagicMock()
+        block.text = "ok"
+        result.content = [block]
+        result.structuredContent = None
+        return result
+
+    _install_stub_server(mcp_tool, "srv", _call_tool_success)
+    mcp_tool._ensure_mcp_loop()
+
+    try:
+        # Trip the breaker by setting the count at/above threshold and
+        # stamping the open-time to "now".
+        mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
+        fake_now = [1000.0]
+
+        def _fake_monotonic():
+            return fake_now[0]
+
+        monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
+        # The breaker-open timestamp dict is introduced by the fix; on
+        # a pre-fix build it won't exist, which will cause the test to
+        # fail at the .get() inside the gate (correct — the fix is
+        # required for this state to be tracked at all).
+        if hasattr(mcp_tool, "_server_breaker_opened_at"):
+            mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
+        cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
+
+        handler = _make_tool_handler("srv", "tool1", 10.0)
+
+        # Before cooldown: must short-circuit (no session call).
+        result = handler({})
+        parsed = json.loads(result)
+        assert "error" in parsed, parsed
+        assert "unreachable" in parsed["error"].lower()
+        assert call_count["n"] == 0, (
+            "breaker should short-circuit before cooldown elapses"
+        )
+
+        # Advance past cooldown → next call is a half-open probe that
+        # actually hits the session.
+        fake_now[0] += cooldown + 1.0
+
+        result = handler({})
+        parsed = json.loads(result)
+        assert parsed.get("result") == "ok", parsed
+        assert call_count["n"] == 1, "half-open probe should invoke session"
+
+        # On probe success the breaker must close (count reset to 0).
+        assert mcp_tool._server_error_counts.get("srv", 0) == 0
+    finally:
+        _cleanup(mcp_tool, "srv")
+
+
+def test_circuit_breaker_reopens_on_probe_failure(monkeypatch, tmp_path):
+    """If the half-open probe fails, the breaker must re-arm the
+    cooldown (not let every subsequent call through).
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from tools import mcp_tool
+    from tools.mcp_tool import _make_tool_handler
+
+    call_count = {"n": 0}
+
+    async def _call_tool_fails(*a, **kw):
+        call_count["n"] += 1
+        raise RuntimeError("still broken")
+
+    _install_stub_server(mcp_tool, "srv", _call_tool_fails)
+    mcp_tool._ensure_mcp_loop()
+
+    try:
+        mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
+        fake_now = [1000.0]
+
+        def _fake_monotonic():
+            return fake_now[0]
+
+        monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
+        if hasattr(mcp_tool, "_server_breaker_opened_at"):
+            mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
+        cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
+
+        handler = _make_tool_handler("srv", "tool1", 10.0)
+
+        # Advance past cooldown, run probe, expect failure.
+        fake_now[0] += cooldown + 1.0
+        result = handler({})
+        parsed = json.loads(result)
+        assert "error" in parsed
+        assert call_count["n"] == 1, "probe should invoke session once"
+
+        # The probe failure must have re-armed the cooldown — another
+        # immediate call should short-circuit, not invoke session again.
+        result = handler({})
+        parsed = json.loads(result)
+        assert "unreachable" in parsed.get("error", "").lower()
+        assert call_count["n"] == 1, (
+            "breaker should re-open and block further calls after probe failure"
+        )
+    finally:
+        _cleanup(mcp_tool, "srv")
+
+
+def test_circuit_breaker_cleared_on_reconnect(monkeypatch, tmp_path):
+    """When the auth-recovery path successfully reconnects the server,
+    the breaker should be cleared so subsequent calls aren't gated on a
+    stale failure count — even if the post-reconnect retry itself fails.
+
+    This locks in the fix-#2 contract: a successful reconnect is
+    sufficient evidence that the server is viable again. Under the old
+    implementation, reset only happened on retry *success*, so a
+    reconnect+retry-failure left the counter pinned above threshold
+    forever.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from tools import mcp_tool
+    from tools.mcp_oauth_manager import get_manager, reset_manager_for_tests
+    from mcp.client.auth import OAuthFlowError
+
+    reset_manager_for_tests()
+
+    async def _call_tool_unused(*a, **kw):  # pragma: no cover
+        raise AssertionError("session.call_tool should not be reached in this test")
+
+    _install_stub_server(mcp_tool, "srv", _call_tool_unused)
+    mcp_tool._ensure_mcp_loop()
+
+    # Open the breaker well above threshold, with a recent open-time so
+    # it would short-circuit everything without a reset.
+    mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + 2
+    if hasattr(mcp_tool, "_server_breaker_opened_at"):
+        import time as _time
+        mcp_tool._server_breaker_opened_at["srv"] = _time.monotonic()
+
+    # Force handle_401 to claim recovery succeeded.
+    mgr = get_manager()
+
+    async def _h401(name, token=None):
+        return True
+
+    monkeypatch.setattr(mgr, "handle_401", _h401)
+
+    try:
+        # Retry fails *after* the successful reconnect. Under the old
+        # implementation this bumps an already-tripped counter even
+        # higher. Under fix #2 the reset happens on successful
+        # reconnect, and the post-retry bump only raises the fresh
+        # count to 1 — still below threshold.
+        def _retry_call():
+            raise OAuthFlowError("still failing post-reconnect")
+
+        result = mcp_tool._handle_auth_error_and_retry(
+            "srv",
+            OAuthFlowError("initial"),
+            _retry_call,
+            "tools/call test",
+        )
+        # The call as a whole still surfaces needs_reauth because the
+        # retry itself didn't succeed, but the breaker state must
+        # reflect the successful reconnect.
+        assert result is not None
+        parsed = json.loads(result)
+        assert parsed.get("needs_reauth") is True, parsed
+
+        # Post-reconnect count was reset to 0, then the failing retry
+        # bumped it to exactly 1 — well below threshold.
+        count = mcp_tool._server_error_counts.get("srv", 0)
+        assert count < mcp_tool._CIRCUIT_BREAKER_THRESHOLD, (
+            f"successful reconnect must reset the breaker below threshold; "
+            f"got count={count}, threshold={mcp_tool._CIRCUIT_BREAKER_THRESHOLD}"
+        )
+    finally:
+        _cleanup(mcp_tool, "srv")
diff --git a/tests/tools/test_parse_env_var.py b/tests/tools/test_parse_env_var.py
index cffee7c9a..8cbbce698 100644
--- a/tests/tools/test_parse_env_var.py
+++ b/tests/tools/test_parse_env_var.py
@@ -1,5 +1,6 @@
 """Tests for _parse_env_var and _get_env_config env-var validation."""
 
+import importlib
 import json
 from unittest.mock import patch
 
@@ -84,3 +85,23 @@ class TestParseEnvVar:
         with patch.dict("os.environ", {"TERMINAL_DOCKER_VOLUMES": "not json"}):
             with pytest.raises(ValueError, match="valid JSON"):
                 _parse_env_var("TERMINAL_DOCKER_VOLUMES", "[]", json.loads, "valid JSON")
+
+
+class TestImportTimeEnvParsing:
+    """Module-level env parsing should never make terminal_tool unimportable."""
+
+    def test_invalid_foreground_timeout_falls_back_to_default(self):
+        try:
+            with patch.dict("os.environ", {"TERMINAL_MAX_FOREGROUND_TIMEOUT": "5m"}, clear=False):
+                mod = importlib.reload(_tt_mod)
+                assert mod.FOREGROUND_MAX_TIMEOUT == 600
+        finally:
+            importlib.reload(_tt_mod)
+
+    def test_invalid_disk_warning_threshold_falls_back_to_default(self):
+        try:
+            with patch.dict("os.environ", {"TERMINAL_DISK_WARNING_GB": "huge"}, clear=False):
+                mod = importlib.reload(_tt_mod)
+                assert mod.DISK_USAGE_WARNING_THRESHOLD_GB == 500.0
+        finally:
+            importlib.reload(_tt_mod)
diff --git a/tests/tools/test_tts_kittentts.py b/tests/tools/test_tts_kittentts.py
new file mode 100644
index 000000000..ab841f59f
--- /dev/null
+++ b/tests/tools/test_tts_kittentts.py
@@ -0,0 +1,198 @@
+"""Tests for the KittenTTS local provider in tools/tts_tool.py."""
+
+import json
+from unittest.mock import MagicMock, patch
+
+import numpy as np
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def clean_env(monkeypatch):
+    for key in ("HERMES_SESSION_PLATFORM",):
+        monkeypatch.delenv(key, raising=False)
+
+
+@pytest.fixture(autouse=True)
+def clear_kittentts_cache():
+    """Reset the module-level model cache between tests."""
+    from tools import tts_tool as _tt
+    _tt._kittentts_model_cache.clear()
+    yield
+    _tt._kittentts_model_cache.clear()
+
+
+@pytest.fixture
+def mock_kittentts_module():
+    """Inject a fake kittentts + soundfile module that return stub objects."""
+    fake_model = MagicMock()
+    # 24kHz float32 PCM at ~2s of silence
+    fake_model.generate.return_value = np.zeros(48000, dtype=np.float32)
+    fake_cls = MagicMock(return_value=fake_model)
+    fake_kittentts = MagicMock()
+    fake_kittentts.KittenTTS = fake_cls
+
+    # Stub soundfile — the real package isn't installed in CI venv, and
+    # _generate_kittentts does `import soundfile as sf` at runtime.
+    fake_sf = MagicMock()
+    def _fake_write(path, audio, samplerate):
+        # Emulate writing a real file so downstream path checks succeed.
+        import pathlib
+        pathlib.Path(path).write_bytes(b"RIFF\x00\x00\x00\x00WAVEfmt fake")
+    fake_sf.write = _fake_write
+
+    with patch.dict(
+        "sys.modules",
+        {"kittentts": fake_kittentts, "soundfile": fake_sf},
+    ):
+        yield fake_model, fake_cls
+
+
+class TestGenerateKittenTts:
+    def test_successful_wav_generation(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import _generate_kittentts
+
+        fake_model, fake_cls = mock_kittentts_module
+        output_path = str(tmp_path / "test.wav")
+        result = _generate_kittentts("Hello world", output_path, {})
+
+        assert result == output_path
+        assert (tmp_path / "test.wav").exists()
+        fake_cls.assert_called_once()
+        fake_model.generate.assert_called_once()
+
+    def test_config_passes_voice_speed_cleantext(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import _generate_kittentts
+
+        fake_model, _ = mock_kittentts_module
+        config = {
+            "kittentts": {
+                "model": "KittenML/kitten-tts-mini-0.8",
+                "voice": "Luna",
+                "speed": 1.25,
+                "clean_text": False,
+            }
+        }
+        _generate_kittentts("Hi there", str(tmp_path / "out.wav"), config)
+
+        call_kwargs = fake_model.generate.call_args.kwargs
+        assert call_kwargs["voice"] == "Luna"
+        assert call_kwargs["speed"] == 1.25
+        assert call_kwargs["clean_text"] is False
+
+    def test_default_model_and_voice(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import (
+            DEFAULT_KITTENTTS_MODEL,
+            DEFAULT_KITTENTTS_VOICE,
+            _generate_kittentts,
+        )
+
+        fake_model, fake_cls = mock_kittentts_module
+        _generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
+
+        fake_cls.assert_called_once_with(DEFAULT_KITTENTTS_MODEL)
+        assert fake_model.generate.call_args.kwargs["voice"] == DEFAULT_KITTENTTS_VOICE
+
+    def test_model_is_cached_across_calls(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import _generate_kittentts
+
+        _, fake_cls = mock_kittentts_module
+        _generate_kittentts("One", str(tmp_path / "a.wav"), {})
+        _generate_kittentts("Two", str(tmp_path / "b.wav"), {})
+
+        # Same model name → class instantiated exactly once
+        assert fake_cls.call_count == 1
+
+    def test_different_models_are_cached_separately(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import _generate_kittentts
+
+        _, fake_cls = mock_kittentts_module
+        _generate_kittentts(
+            "A", str(tmp_path / "a.wav"),
+            {"kittentts": {"model": "KittenML/kitten-tts-nano-0.8-int8"}},
+        )
+        _generate_kittentts(
+            "B", str(tmp_path / "b.wav"),
+            {"kittentts": {"model": "KittenML/kitten-tts-mini-0.8"}},
+        )
+
+        assert fake_cls.call_count == 2
+
+    def test_non_wav_extension_triggers_ffmpeg_conversion(
+        self, tmp_path, mock_kittentts_module, monkeypatch
+    ):
+        """Non-.wav output path causes WAV → target ffmpeg conversion."""
+        from tools import tts_tool as _tt
+
+        calls = []
+
+        def fake_shutil_which(cmd):
+            return "/usr/bin/ffmpeg" if cmd == "ffmpeg" else None
+
+        def fake_run(cmd, check=False, timeout=None, **kw):
+            calls.append(cmd)
+            # Emulate ffmpeg writing the output file
+            import pathlib
+            out_path = cmd[-1]
+            pathlib.Path(out_path).write_bytes(b"fake-mp3-data")
+            return MagicMock(returncode=0)
+
+        monkeypatch.setattr(_tt.shutil, "which", fake_shutil_which)
+        monkeypatch.setattr(_tt.subprocess, "run", fake_run)
+
+        output_path = str(tmp_path / "test.mp3")
+        result = _tt._generate_kittentts("Hi", output_path, {})
+
+        assert result == output_path
+        assert len(calls) == 1
+        assert calls[0][0] == "/usr/bin/ffmpeg"
+
+    def test_missing_kittentts_raises_import_error(self, tmp_path, monkeypatch):
+        """When kittentts package is not installed, _import_kittentts raises."""
+        import sys
+        monkeypatch.setitem(sys.modules, "kittentts", None)
+        from tools.tts_tool import _generate_kittentts
+
+        with pytest.raises((ImportError, TypeError)):
+            _generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
+
+
+class TestCheckKittenttsAvailable:
+    def test_reports_available_when_package_present(self, monkeypatch):
+        import importlib.util
+        from tools.tts_tool import _check_kittentts_available
+
+        fake_spec = MagicMock()
+        monkeypatch.setattr(
+            importlib.util, "find_spec",
+            lambda name: fake_spec if name == "kittentts" else None,
+        )
+        assert _check_kittentts_available() is True
+
+    def test_reports_unavailable_when_package_missing(self, monkeypatch):
+        import importlib.util
+        from tools.tts_tool import _check_kittentts_available
+
+        monkeypatch.setattr(importlib.util, "find_spec", lambda name: None)
+        assert _check_kittentts_available() is False
+
+
+class TestDispatcherBranch:
+    def test_kittentts_not_installed_returns_helpful_error(self, monkeypatch, tmp_path):
+        """When provider=kittentts but package missing, return JSON error with setup hint."""
+        import sys
+        monkeypatch.setitem(sys.modules, "kittentts", None)
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        from tools.tts_tool import text_to_speech_tool
+
+        # Write a config telling it to use kittentts
+        import yaml
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"tts": {"provider": "kittentts"}})
+        )
+
+        result = json.loads(text_to_speech_tool(text="Hello"))
+        assert result["success"] is False
+        assert "kittentts" in result["error"].lower()
+        assert "hermes setup tts" in result["error"].lower()
diff --git a/tests/tools/test_tts_max_text_length.py b/tests/tools/test_tts_max_text_length.py
new file mode 100644
index 000000000..38a763ea7
--- /dev/null
+++ b/tests/tools/test_tts_max_text_length.py
@@ -0,0 +1,197 @@
+"""Tests for per-provider TTS input-character limits.
+
+Replaces the old global ``MAX_TEXT_LENGTH = 4000`` cap that truncated every
+provider at 4000 chars even though OpenAI allows 4096, xAI allows 15000,
+MiniMax allows 10000, and ElevenLabs allows 5000-40000 depending on model.
+"""
+
+import json
+from unittest.mock import patch
+
+import pytest
+
+from tools.tts_tool import (
+    ELEVENLABS_MODEL_MAX_TEXT_LENGTH,
+    FALLBACK_MAX_TEXT_LENGTH,
+    PROVIDER_MAX_TEXT_LENGTH,
+    _resolve_max_text_length,
+)
+
+
+class TestResolveMaxTextLength:
+    def test_edge_default(self):
+        assert _resolve_max_text_length("edge", {}) == PROVIDER_MAX_TEXT_LENGTH["edge"]
+
+    def test_openai_default_is_4096(self):
+        assert _resolve_max_text_length("openai", {}) == 4096
+
+    def test_xai_default_is_15000(self):
+        assert _resolve_max_text_length("xai", {}) == 15000
+
+    def test_minimax_default_is_10000(self):
+        assert _resolve_max_text_length("minimax", {}) == 10000
+
+    def test_mistral_default(self):
+        assert _resolve_max_text_length("mistral", {}) == PROVIDER_MAX_TEXT_LENGTH["mistral"]
+
+    def test_gemini_default(self):
+        assert _resolve_max_text_length("gemini", {}) == PROVIDER_MAX_TEXT_LENGTH["gemini"]
+
+    def test_unknown_provider_falls_back(self):
+        assert _resolve_max_text_length("does-not-exist", {}) == FALLBACK_MAX_TEXT_LENGTH
+
+    def test_empty_provider_falls_back(self):
+        assert _resolve_max_text_length("", {}) == FALLBACK_MAX_TEXT_LENGTH
+        assert _resolve_max_text_length(None, {}) == FALLBACK_MAX_TEXT_LENGTH
+
+    def test_case_insensitive(self):
+        assert _resolve_max_text_length("OpenAI", {}) == 4096
+        assert _resolve_max_text_length("  XAI  ", {}) == 15000
+
+    # --- Overrides ---
+
+    def test_override_wins(self):
+        cfg = {"openai": {"max_text_length": 9999}}
+        assert _resolve_max_text_length("openai", cfg) == 9999
+
+    def test_override_zero_falls_through(self):
+        # A broken/zero override must not disable truncation
+        cfg = {"openai": {"max_text_length": 0}}
+        assert _resolve_max_text_length("openai", cfg) == 4096
+
+    def test_override_negative_falls_through(self):
+        cfg = {"xai": {"max_text_length": -1}}
+        assert _resolve_max_text_length("xai", cfg) == 15000
+
+    def test_override_non_int_falls_through(self):
+        cfg = {"minimax": {"max_text_length": "lots"}}
+        assert _resolve_max_text_length("minimax", cfg) == 10000
+
+    def test_override_bool_falls_through(self):
+        # bool is technically an int; make sure we don't treat True as 1 char
+        cfg = {"openai": {"max_text_length": True}}
+        assert _resolve_max_text_length("openai", cfg) == 4096
+
+    def test_missing_provider_section_uses_default(self):
+        cfg = {"provider": "openai"}  # no "openai" key
+        assert _resolve_max_text_length("openai", cfg) == 4096
+
+    # --- ElevenLabs model-aware ---
+
+    def test_elevenlabs_default_model_multilingual_v2(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_multilingual_v2"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 10000
+
+    def test_elevenlabs_flash_v2_5_gets_40k(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_flash_v2_5"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 40000
+
+    def test_elevenlabs_flash_v2_gets_30k(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_flash_v2"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 30000
+
+    def test_elevenlabs_v3_gets_5k(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_v3"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 5000
+
+    def test_elevenlabs_unknown_model_falls_back_to_provider_default(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_experimental_xyz"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == PROVIDER_MAX_TEXT_LENGTH["elevenlabs"]
+
+    def test_elevenlabs_override_beats_model_lookup(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_flash_v2_5", "max_text_length": 1000}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 1000
+
+    def test_elevenlabs_no_model_id_uses_default_model_mapping(self):
+        # Falls back to DEFAULT_ELEVENLABS_MODEL_ID = eleven_multilingual_v2 -> 10000
+        assert _resolve_max_text_length("elevenlabs", {}) == 10000
+
+    def test_provider_config_not_a_dict(self):
+        cfg = {"openai": "not-a-dict"}
+        assert _resolve_max_text_length("openai", cfg) == 4096
+
+    # --- Sanity: the table covers every provider listed in the schema ---
+
+    def test_all_documented_providers_have_defaults(self):
+        expected = {"edge", "openai", "xai", "minimax", "mistral",
+                    "gemini", "elevenlabs", "neutts", "kittentts"}
+        assert expected.issubset(PROVIDER_MAX_TEXT_LENGTH.keys())
+
+
+class TestTextToSpeechToolTruncation:
+    """End-to-end: verify the resolver actually drives the text_to_speech_tool
+    truncation path rather than the old 4000-char global."""
+
+    def test_openai_truncates_at_4096_not_4000(self, tmp_path, monkeypatch, caplog):
+        import logging
+        caplog.set_level(logging.WARNING, logger="tools.tts_tool")
+
+        # 5000 chars -- over OpenAI's 4096 limit but under xAI's 15k
+        text = "A" * 5000
+        captured_text = {}
+
+        def fake_openai(t, out, cfg):
+            captured_text["text"] = t
+            with open(out, "wb") as f:
+                f.write(b"\x00")
+            return out
+
+        monkeypatch.setattr("tools.tts_tool._generate_openai_tts", fake_openai)
+        monkeypatch.setattr("tools.tts_tool._load_tts_config",
+                            lambda: {"provider": "openai"})
+
+        from tools.tts_tool import text_to_speech_tool
+        out = str(tmp_path / "out.mp3")
+        result = json.loads(text_to_speech_tool(text=text, output_path=out))
+
+        assert result["success"] is True
+        # Should be truncated to 4096, not the old 4000
+        assert len(captured_text["text"]) == 4096
+        # And the warning should mention the provider
+        assert any("openai" in rec.message.lower() for rec in caplog.records)
+
+    def test_xai_accepts_much_longer_input(self, tmp_path, monkeypatch):
+        # 12000 chars -- over old global 4000, under xAI's 15000
+        text = "B" * 12000
+        captured_text = {}
+
+        def fake_xai(t, out, cfg):
+            captured_text["text"] = t
+            with open(out, "wb") as f:
+                f.write(b"\x00")
+            return out
+
+        monkeypatch.setattr("tools.tts_tool._generate_xai_tts", fake_xai)
+        monkeypatch.setattr("tools.tts_tool._load_tts_config",
+                            lambda: {"provider": "xai"})
+
+        from tools.tts_tool import text_to_speech_tool
+        out = str(tmp_path / "out.mp3")
+        result = json.loads(text_to_speech_tool(text=text, output_path=out))
+
+        assert result["success"] is True
+        # xAI should accept the full 12000 chars
+        assert len(captured_text["text"]) == 12000
+
+    def test_user_override_is_respected(self, tmp_path, monkeypatch):
+        # User says "cap openai at 100 chars" -- we must honor it
+        text = "C" * 500
+        captured_text = {}
+
+        def fake_openai(t, out, cfg):
+            captured_text["text"] = t
+            with open(out, "wb") as f:
+                f.write(b"\x00")
+            return out
+
+        monkeypatch.setattr("tools.tts_tool._generate_openai_tts", fake_openai)
+        monkeypatch.setattr("tools.tts_tool._load_tts_config",
+                            lambda: {"provider": "openai",
+                                     "openai": {"max_text_length": 100}})
+
+        from tools.tts_tool import text_to_speech_tool
+        out = str(tmp_path / "out.mp3")
+        result = json.loads(text_to_speech_tool(text=text, output_path=out))
+
+        assert result["success"] is True
+        assert len(captured_text["text"]) == 100
diff --git a/tests/tools/test_url_safety.py b/tests/tools/test_url_safety.py
index 4382d8ab3..9377fc40e 100644
--- a/tests/tools/test_url_safety.py
+++ b/tests/tools/test_url_safety.py
@@ -3,7 +3,12 @@
 import socket
 from unittest.mock import patch
 
-from tools.url_safety import is_safe_url, _is_blocked_ip
+from tools.url_safety import (
+    is_safe_url,
+    _is_blocked_ip,
+    _global_allow_private_urls,
+    _reset_allow_private_cache,
+)
 
 import ipaddress
 import pytest
@@ -202,3 +207,189 @@ class TestIsBlockedIp:
     def test_allowed_ips(self, ip_str):
         ip = ipaddress.ip_address(ip_str)
         assert _is_blocked_ip(ip) is False, f"{ip_str} should be allowed"
+
+
+class TestGlobalAllowPrivateUrls:
+    """Tests for the security.allow_private_urls config toggle."""
+
+    @pytest.fixture(autouse=True)
+    def _reset_cache(self):
+        """Reset the module-level toggle cache before and after each test."""
+        _reset_allow_private_cache()
+        yield
+        _reset_allow_private_cache()
+
+    def test_default_is_false(self, monkeypatch):
+        """Toggle defaults to False when no env var or config is set."""
+        monkeypatch.delenv("HERMES_ALLOW_PRIVATE_URLS", raising=False)
+        with patch("hermes_cli.config.read_raw_config", side_effect=Exception("no config")):
+            assert _global_allow_private_urls() is False
+
+    def test_env_var_true(self, monkeypatch):
+        """HERMES_ALLOW_PRIVATE_URLS=true enables the toggle."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        assert _global_allow_private_urls() is True
+
+    def test_env_var_1(self, monkeypatch):
+        """HERMES_ALLOW_PRIVATE_URLS=1 enables the toggle."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "1")
+        assert _global_allow_private_urls() is True
+
+    def test_env_var_yes(self, monkeypatch):
+        """HERMES_ALLOW_PRIVATE_URLS=yes enables the toggle."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "yes")
+        assert _global_allow_private_urls() is True
+
+    def test_env_var_false(self, monkeypatch):
+        """HERMES_ALLOW_PRIVATE_URLS=false keeps it disabled."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "false")
+        assert _global_allow_private_urls() is False
+
+    def test_config_security_section(self, monkeypatch):
+        """security.allow_private_urls in config enables the toggle."""
+        monkeypatch.delenv("HERMES_ALLOW_PRIVATE_URLS", raising=False)
+        cfg = {"security": {"allow_private_urls": True}}
+        with patch("hermes_cli.config.read_raw_config", return_value=cfg):
+            assert _global_allow_private_urls() is True
+
+    def test_config_browser_fallback(self, monkeypatch):
+        """browser.allow_private_urls works as legacy fallback."""
+        monkeypatch.delenv("HERMES_ALLOW_PRIVATE_URLS", raising=False)
+        cfg = {"browser": {"allow_private_urls": True}}
+        with patch("hermes_cli.config.read_raw_config", return_value=cfg):
+            assert _global_allow_private_urls() is True
+
+    def test_config_security_takes_precedence_over_browser(self, monkeypatch):
+        """security section is checked before browser section."""
+        monkeypatch.delenv("HERMES_ALLOW_PRIVATE_URLS", raising=False)
+        cfg = {"security": {"allow_private_urls": True}, "browser": {"allow_private_urls": False}}
+        with patch("hermes_cli.config.read_raw_config", return_value=cfg):
+            assert _global_allow_private_urls() is True
+
+    def test_env_var_overrides_config(self, monkeypatch):
+        """Env var takes priority over config."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "false")
+        cfg = {"security": {"allow_private_urls": True}}
+        with patch("hermes_cli.config.read_raw_config", return_value=cfg):
+            assert _global_allow_private_urls() is False
+
+    def test_result_is_cached(self, monkeypatch):
+        """Second call uses cached result, doesn't re-read config."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        assert _global_allow_private_urls() is True
+        # Change env after first call — should still be True (cached)
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "false")
+        assert _global_allow_private_urls() is True
+
+
+class TestAllowPrivateUrlsIntegration:
+    """Integration tests: is_safe_url respects the global toggle."""
+
+    @pytest.fixture(autouse=True)
+    def _reset_cache(self):
+        _reset_allow_private_cache()
+        yield
+        _reset_allow_private_cache()
+
+    def test_private_ip_allowed_when_toggle_on(self, monkeypatch):
+        """Private IPs pass is_safe_url when toggle is enabled."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("192.168.1.1", 0)),
+        ]):
+            assert is_safe_url("http://router.local") is True
+
+    def test_benchmark_ip_allowed_when_toggle_on(self, monkeypatch):
+        """198.18.x.x (benchmark/OpenWrt proxy range) passes when toggle is on."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("198.18.23.183", 0)),
+        ]):
+            assert is_safe_url("https://nousresearch.com") is True
+
+    def test_cgnat_allowed_when_toggle_on(self, monkeypatch):
+        """CGNAT range (100.64.0.0/10) passes when toggle is on."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("100.100.100.100", 0)),
+        ]):
+            assert is_safe_url("http://tailscale-peer.example/") is True
+
+    def test_localhost_allowed_when_toggle_on(self, monkeypatch):
+        """Even localhost passes when toggle is on."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("127.0.0.1", 0)),
+        ]):
+            assert is_safe_url("http://localhost:8080/api") is True
+
+    # --- Cloud metadata always blocked regardless of toggle ---
+
+    def test_metadata_hostname_blocked_even_with_toggle(self, monkeypatch):
+        """metadata.google.internal is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        assert is_safe_url("http://metadata.google.internal/computeMetadata/v1/") is False
+
+    def test_metadata_goog_blocked_even_with_toggle(self, monkeypatch):
+        """metadata.goog is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        assert is_safe_url("http://metadata.goog/computeMetadata/v1/") is False
+
+    def test_metadata_ip_blocked_even_with_toggle(self, monkeypatch):
+        """169.254.169.254 (AWS/GCP metadata IP) is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("169.254.169.254", 0)),
+        ]):
+            assert is_safe_url("http://169.254.169.254/latest/meta-data/") is False
+
+    def test_metadata_ipv6_blocked_even_with_toggle(self, monkeypatch):
+        """fd00:ec2::254 (AWS IPv6 metadata) is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (10, 1, 6, "", ("fd00:ec2::254", 0, 0, 0)),
+        ]):
+            assert is_safe_url("http://[fd00:ec2::254]/latest/") is False
+
+    def test_ecs_metadata_blocked_even_with_toggle(self, monkeypatch):
+        """169.254.170.2 (AWS ECS task metadata) is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("169.254.170.2", 0)),
+        ]):
+            assert is_safe_url("http://169.254.170.2/v2/credentials") is False
+
+    def test_alibaba_metadata_blocked_even_with_toggle(self, monkeypatch):
+        """100.100.100.200 (Alibaba Cloud metadata) is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("100.100.100.200", 0)),
+        ]):
+            assert is_safe_url("http://100.100.100.200/latest/meta-data/") is False
+
+    def test_azure_wire_server_blocked_even_with_toggle(self, monkeypatch):
+        """169.254.169.253 (Azure IMDS wire server) is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("169.254.169.253", 0)),
+        ]):
+            assert is_safe_url("http://169.254.169.253/") is False
+
+    def test_entire_link_local_blocked_even_with_toggle(self, monkeypatch):
+        """Any 169.254.x.x address is ALWAYS blocked (entire link-local range)."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("169.254.42.99", 0)),
+        ]):
+            assert is_safe_url("http://169.254.42.99/anything") is False
+
+    def test_dns_failure_still_blocked_with_toggle(self, monkeypatch):
+        """DNS failures are still blocked even with toggle on."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", side_effect=socket.gaierror("fail")):
+            assert is_safe_url("https://nonexistent.example.com") is False
+
+    def test_empty_url_still_blocked_with_toggle(self, monkeypatch):
+        """Empty URLs are still blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        assert is_safe_url("") is False
diff --git a/tests/tools/test_voice_cli_integration.py b/tests/tools/test_voice_cli_integration.py
index da500996a..e7d8811e0 100644
--- a/tests/tools/test_voice_cli_integration.py
+++ b/tests/tools/test_voice_cli_integration.py
@@ -933,6 +933,58 @@ class TestEnableVoiceModeReal:
         assert cli._voice_mode is True
 
 
+class TestVoiceBeepConfigReal:
+    """Tests the CLI voice beep toggle."""
+
+    @patch("hermes_cli.config.load_config", return_value={"voice": {}})
+    def test_beeps_enabled_by_default(self, _cfg):
+        cli = _make_voice_cli()
+        assert cli._voice_beeps_enabled() is True
+
+    @patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
+    def test_beeps_can_be_disabled(self, _cfg):
+        cli = _make_voice_cli()
+        assert cli._voice_beeps_enabled() is False
+
+    @patch("cli._cprint")
+    @patch("cli.threading.Thread")
+    @patch("tools.voice_mode.play_beep")
+    @patch("tools.voice_mode.create_audio_recorder")
+    @patch(
+        "tools.voice_mode.check_voice_requirements",
+        return_value={
+            "available": True,
+            "audio_available": True,
+            "stt_available": True,
+            "details": "OK",
+            "missing_packages": [],
+        },
+    )
+    @patch(
+        "hermes_cli.config.load_config",
+        return_value={
+            "voice": {
+                "beep_enabled": False,
+                "silence_threshold": 200,
+                "silence_duration": 3.0,
+            }
+        },
+    )
+    def test_start_recording_skips_beep_when_disabled(
+        self, _cfg, _req, mock_create, mock_beep, mock_thread, _cp
+    ):
+        recorder = MagicMock()
+        recorder.supports_silence_autostop = True
+        mock_create.return_value = recorder
+        mock_thread.return_value = MagicMock(start=MagicMock())
+
+        cli = _make_voice_cli()
+        cli._voice_start_recording()
+
+        recorder.start.assert_called_once()
+        mock_beep.assert_not_called()
+
+
 class TestDisableVoiceModeReal:
     """Tests _disable_voice_mode with real CLI instance."""
 
@@ -1087,6 +1139,16 @@ class TestVoiceStopAndTranscribeReal:
         cli._voice_stop_and_transcribe()
         assert cli._pending_input.empty()
 
+    @patch("cli._cprint")
+    @patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
+    @patch("tools.voice_mode.play_beep")
+    def test_no_speech_detected_skips_beep_when_disabled(self, mock_beep, _cfg, _cp):
+        recorder = MagicMock()
+        recorder.stop.return_value = None
+        cli = _make_voice_cli(_voice_recording=True, _voice_recorder=recorder)
+        cli._voice_stop_and_transcribe()
+        mock_beep.assert_not_called()
+
     @patch("cli._cprint")
     @patch("cli.os.unlink")
     @patch("cli.os.path.isfile", return_value=True)
diff --git a/tests/tui_gateway/test_make_agent_provider.py b/tests/tui_gateway/test_make_agent_provider.py
new file mode 100644
index 000000000..bdc7fecf4
--- /dev/null
+++ b/tests/tui_gateway/test_make_agent_provider.py
@@ -0,0 +1,48 @@
+"""Regression test for #11884: _make_agent must resolve runtime provider.
+
+Without resolve_runtime_provider(), bare-slug models in config
+(e.g. ``claude-opus-4-6`` with ``model.provider: anthropic``) leave
+provider/base_url/api_key empty in AIAgent, causing HTTP 404.
+"""
+
+from unittest.mock import MagicMock, patch
+
+
+def test_make_agent_passes_resolved_provider():
+    """_make_agent forwards provider/base_url/api_key/api_mode from
+    resolve_runtime_provider to AIAgent."""
+
+    fake_runtime = {
+        "provider": "anthropic",
+        "base_url": "https://api.anthropic.com",
+        "api_key": "sk-test-key",
+        "api_mode": "anthropic_messages",
+        "command": None,
+        "args": None,
+        "credential_pool": None,
+    }
+
+    fake_cfg = {
+        "model": {"default": "claude-opus-4-6", "provider": "anthropic"},
+        "agent": {"system_prompt": "test"},
+    }
+
+    with patch("tui_gateway.server._load_cfg", return_value=fake_cfg), \
+         patch("tui_gateway.server._get_db", return_value=MagicMock()), \
+         patch("tui_gateway.server._load_tool_progress_mode", return_value="compact"), \
+         patch("tui_gateway.server._load_reasoning_config", return_value=None), \
+         patch("tui_gateway.server._load_service_tier", return_value=None), \
+         patch("tui_gateway.server._load_enabled_toolsets", return_value=None), \
+         patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value=fake_runtime) as mock_resolve, \
+         patch("run_agent.AIAgent") as mock_agent:
+
+        from tui_gateway.server import _make_agent
+        _make_agent("sid-1", "key-1")
+
+        mock_resolve.assert_called_once_with(requested=None)
+
+        call_kwargs = mock_agent.call_args
+        assert call_kwargs.kwargs["provider"] == "anthropic"
+        assert call_kwargs.kwargs["base_url"] == "https://api.anthropic.com"
+        assert call_kwargs.kwargs["api_key"] == "sk-test-key"
+        assert call_kwargs.kwargs["api_mode"] == "anthropic_messages"
diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py
index 0efeb16e9..e1233859a 100644
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@@ -543,7 +543,6 @@ def camofox_vision(question: str, annotate: bool = False,
         )
 
         try:
-            from hermes_cli.config import load_config
             _cfg = load_config()
             _vision_cfg = _cfg.get("auxiliary", {}).get("vision", {})
             _vision_timeout = float(_vision_cfg.get("timeout", 120))
diff --git a/tools/browser_cdp_tool.py b/tools/browser_cdp_tool.py
index 7817b9c35..9b13b2bb6 100644
--- a/tools/browser_cdp_tool.py
+++ b/tools/browser_cdp_tool.py
@@ -402,7 +402,7 @@ def _browser_cdp_check() -> bool:
 
 registry.register(
     name="browser_cdp",
-    toolset="browser",
+    toolset="browser-cdp",
     schema=BROWSER_CDP_SCHEMA,
     handler=lambda args, **kw: browser_cdp(
         method=args.get("method", ""),
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index 97427dc61..e46636ad9 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -1182,6 +1182,15 @@ def _run_browser_command(
         # used during CLI discovery.
         browser_env["PATH"] = _merge_browser_path(browser_env.get("PATH", ""))
         browser_env["AGENT_BROWSER_SOCKET_DIR"] = task_socket_dir
+
+        # Tell the agent-browser daemon to self-terminate after being idle
+        # for our configured inactivity timeout.  This is the daemon-side
+        # counterpart to our Python-side _cleanup_inactive_browser_sessions
+        # — the daemon kills itself and its Chrome children when no CLI
+        # commands arrive within the window.  Added in agent-browser 0.24.
+        if "AGENT_BROWSER_IDLE_TIMEOUT_MS" not in browser_env:
+            idle_ms = str(BROWSER_SESSION_INACTIVITY_TIMEOUT * 1000)
+            browser_env["AGENT_BROWSER_IDLE_TIMEOUT_MS"] = idle_ms
         
         # Use temp files for stdout/stderr instead of pipes.
         # agent-browser starts a background daemon that inherits file
@@ -1911,7 +1920,6 @@ def _maybe_start_recording(task_id: str):
         recordings_dir.mkdir(parents=True, exist_ok=True)
         _cleanup_old_recordings(max_age_hours=72)
         
-        import time
         timestamp = time.strftime("%Y%m%d_%H%M%S")
         recording_path = recordings_dir / f"session_{timestamp}_{task_id[:16]}.webm"
         
@@ -2027,8 +2035,6 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
 
     import base64
     import uuid as uuid_mod
-    from pathlib import Path
-    
     effective_task_id = task_id or "default"
     
     # Save screenshot to persistent location so it can be shared with users
@@ -2210,7 +2216,6 @@ def _cleanup_old_screenshots(screenshots_dir, max_age_hours=24):
 
 def _cleanup_old_recordings(max_age_hours=72):
     """Remove browser recordings older than max_age_hours to prevent disk bloat."""
-    import time
     try:
         hermes_home = get_hermes_home()
         recordings_dir = hermes_home / "browser_recordings"
diff --git a/tools/checkpoint_manager.py b/tools/checkpoint_manager.py
index 277a23e44..a3beee2a7 100644
--- a/tools/checkpoint_manager.py
+++ b/tools/checkpoint_manager.py
@@ -389,7 +389,6 @@ class CheckpointManager:
     @staticmethod
     def _parse_shortstat(stat_line: str, entry: Dict) -> None:
         """Parse git --shortstat output into entry dict."""
-        import re
         m = re.search(r'(\d+) file', stat_line)
         if m:
             entry["files_changed"] = int(m.group(1))
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index c5a89488a..96e21d0cb 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -979,6 +979,7 @@ def execute_code(
         # --- Start UDS server ---
         server_sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
         server_sock.bind(sock_path)
+        os.chmod(sock_path, 0o600)
         server_sock.listen(1)
 
         rpc_thread = threading.Thread(
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 7065e129a..19a9916da 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -16,34 +16,50 @@ The parent's context only sees the delegation call and the summary result,
 never the child's intermediate tool calls or reasoning.
 """
 
+import enum
 import json
 import logging
+
 logger = logging.getLogger(__name__)
 import os
 import threading
 import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
+from concurrent.futures import (
+    ThreadPoolExecutor,
+    TimeoutError as FuturesTimeoutError,
+    as_completed,
+)
 from typing import Any, Dict, List, Optional
 
 from toolsets import TOOLSETS
-from utils import base_url_hostname
+from tools import file_state
+from utils import base_url_hostname, is_truthy_value
 
 
 # Tools that children must never have access to
-DELEGATE_BLOCKED_TOOLS = frozenset([
-    "delegate_task",   # no recursive delegation
-    "clarify",         # no user interaction
-    "memory",          # no writes to shared MEMORY.md
-    "send_message",    # no cross-platform side effects
-    "execute_code",    # children should reason step-by-step, not write scripts
-])
+DELEGATE_BLOCKED_TOOLS = frozenset(
+    [
+        "delegate_task",  # no recursive delegation
+        "clarify",  # no user interaction
+        "memory",  # no writes to shared MEMORY.md
+        "send_message",  # no cross-platform side effects
+        "execute_code",  # children should reason step-by-step, not write scripts
+    ]
+)
 
 # Build a description fragment listing toolsets available for subagents.
 # Excludes toolsets where ALL tools are blocked, composite/platform toolsets
 # (hermes-* prefixed), and scenario toolsets.
+#
+# NOTE: "delegation" is in this exclusion set so the subagent-facing
+# capability hint string (_TOOLSET_LIST_STR) doesn't advertise it as a
+# toolset to request explicitly — the correct mechanism for nested
+# delegation is role='orchestrator', which re-adds "delegation" in
+# _build_child_agent regardless of this exclusion.
 _EXCLUDED_TOOLSET_NAMES = frozenset({"debugging", "safe", "delegation", "moa", "rl"})
 _SUBAGENT_TOOLSETS = sorted(
-    name for name, defn in TOOLSETS.items()
+    name
+    for name, defn in TOOLSETS.items()
     if name not in _EXCLUDED_TOOLSET_NAMES
     and not name.startswith("hermes-")
     and not all(t in DELEGATE_BLOCKED_TOOLS for t in defn.get("tools", []))
@@ -51,13 +67,208 @@ _SUBAGENT_TOOLSETS = sorted(
 _TOOLSET_LIST_STR = ", ".join(f"'{n}'" for n in _SUBAGENT_TOOLSETS)
 
 _DEFAULT_MAX_CONCURRENT_CHILDREN = 3
-MAX_DEPTH = 2  # parent (0) -> child (1) -> grandchild rejected (2)
+MAX_DEPTH = 1  # flat by default: parent (0) -> child (1); grandchild rejected unless max_spawn_depth raised.
+# Configurable depth cap consulted by _get_max_spawn_depth; MAX_DEPTH
+# stays as the default fallback and is still the symbol tests import.
+_MIN_SPAWN_DEPTH = 1
+_MAX_SPAWN_DEPTH_CAP = 3
+
+
+# ---------------------------------------------------------------------------
+# Runtime state: pause flag + active subagent registry
+#
+# Consumed by the TUI observability layer (overlay/control surface) and the
+# gateway RPCs `delegation.pause`, `delegation.status`, `subagent.interrupt`.
+# Kept module-level so they span every delegate_task invocation in the
+# process, including nested orchestrator -> worker chains.
+# ---------------------------------------------------------------------------
+
+_spawn_pause_lock = threading.Lock()
+_spawn_paused: bool = False
+
+_active_subagents_lock = threading.Lock()
+# subagent_id -> mutable record tracking the live child agent.  Stays only
+# for the lifetime of the run; _run_single_child is the owner.
+_active_subagents: Dict[str, Dict[str, Any]] = {}
+
+
+def set_spawn_paused(paused: bool) -> bool:
+    """Globally block/unblock new delegate_task spawns.
+
+    Active children keep running; only NEW calls to delegate_task fail fast
+    with a "spawning paused" error until unblocked.  Returns the new state.
+    """
+    global _spawn_paused
+    with _spawn_pause_lock:
+        _spawn_paused = bool(paused)
+        return _spawn_paused
+
+
+def is_spawn_paused() -> bool:
+    with _spawn_pause_lock:
+        return _spawn_paused
+
+
+def _register_subagent(record: Dict[str, Any]) -> None:
+    sid = record.get("subagent_id")
+    if not sid:
+        return
+    with _active_subagents_lock:
+        _active_subagents[sid] = record
+
+
+def _unregister_subagent(subagent_id: str) -> None:
+    with _active_subagents_lock:
+        _active_subagents.pop(subagent_id, None)
+
+
+def interrupt_subagent(subagent_id: str) -> bool:
+    """Request that a single running subagent stop at its next iteration boundary.
+
+    Does not hard-kill the worker thread (Python can't); sets the child's
+    interrupt flag which propagates to in-flight tools and recurses into
+    grandchildren via AIAgent.interrupt().  Returns True if a matching
+    subagent was found.
+    """
+    with _active_subagents_lock:
+        record = _active_subagents.get(subagent_id)
+    if not record:
+        return False
+    agent = record.get("agent")
+    if agent is None:
+        return False
+    try:
+        agent.interrupt(f"Interrupted via TUI ({subagent_id})")
+    except Exception as exc:
+        logger.debug("interrupt_subagent(%s) failed: %s", subagent_id, exc)
+        return False
+    return True
+
+
+def list_active_subagents() -> List[Dict[str, Any]]:
+    """Snapshot of the currently running subagent tree.
+
+    Each record: {subagent_id, parent_id, depth, goal, model, started_at,
+    tool_count, status}.  Safe to call from any thread — returns a copy.
+    """
+    with _active_subagents_lock:
+        return [
+            {k: v for k, v in r.items() if k != "agent"}
+            for r in _active_subagents.values()
+        ]
+
+
+def _extract_output_tail(
+    result: Dict[str, Any],
+    *,
+    max_entries: int = 12,
+    max_chars: int = 8000,
+) -> List[Dict[str, Any]]:
+    """Pull the last N tool-call results from a child's conversation.
+
+    Powers the overlay's "Output" section — the cc-swarm-parity feature.
+    We reuse the same messages list the trajectory saver walks, taking
+    only the tail to keep event payloads small.  Each entry is
+    ``{tool, preview, is_error}``.
+    """
+    messages = result.get("messages") if isinstance(result, dict) else None
+    if not isinstance(messages, list):
+        return []
+
+    # Walk in reverse to build a tail; stop when we have enough.
+    tail: List[Dict[str, Any]] = []
+    pending_call_by_id: Dict[str, str] = {}
+
+    # First pass (forward): build tool_call_id -> tool_name map
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        if msg.get("role") == "assistant":
+            for tc in msg.get("tool_calls") or []:
+                tc_id = tc.get("id")
+                fn = tc.get("function") or {}
+                if tc_id:
+                    pending_call_by_id[tc_id] = str(fn.get("name") or "tool")
+
+    # Second pass (reverse): pick tool results, newest first
+    for msg in reversed(messages):
+        if len(tail) >= max_entries:
+            break
+        if not isinstance(msg, dict) or msg.get("role") != "tool":
+            continue
+        content = msg.get("content") or ""
+        if not isinstance(content, str):
+            content = str(content)
+        is_error = _looks_like_error_output(content)
+        tool_name = pending_call_by_id.get(msg.get("tool_call_id") or "", "tool")
+        # Preserve line structure so the overlay's wrapped scroll region can
+        # show real output rather than a whitespace-collapsed blob. We still
+        # cap the payload size to keep events bounded.
+        preview = content[:max_chars]
+        tail.append({"tool": tool_name, "preview": preview, "is_error": is_error})
+
+    tail.reverse()  # restore chronological order for display
+    return tail
+
+
+def _looks_like_error_output(content: str) -> bool:
+    """Conservative stderr/error detector for tool-result previews.
+
+    The old heuristic flagged any preview containing the substring "error",
+    which painted perfectly normal terminal/json output red.  We now only
+    mark output as an error when there is stronger evidence:
+      - structured JSON with an ``error`` key
+      - structured JSON with ``status`` of error/failed
+      - first line starts with a classic error marker
+    """
+    if not content:
+        return False
+
+    head = content.lstrip()
+    if head.startswith("{") or head.startswith("["):
+        try:
+            parsed = json.loads(content)
+            if isinstance(parsed, dict):
+                if parsed.get("error"):
+                    return True
+                status = str(parsed.get("status") or "").strip().lower()
+                if status in {"error", "failed", "failure", "timeout"}:
+                    return True
+        except Exception:
+            pass
+
+    first = content.splitlines()[0].strip().lower() if content.splitlines() else ""
+    return (
+        first.startswith("error:")
+        or first.startswith("failed:")
+        or first.startswith("traceback ")
+        or first.startswith("exception:")
+    )
+
+
+def _normalize_role(r: Optional[str]) -> str:
+    """Normalise a caller-provided role to 'leaf' or 'orchestrator'.
+
+    None/empty -> 'leaf'.  Unknown strings coerce to 'leaf' with a
+    warning log (matches the silent-degrade pattern of
+    _get_orchestrator_enabled).  _build_child_agent adds a second
+    degrade layer for depth/kill-switch bounds.
+    """
+    if r is None or not r:
+        return "leaf"
+    r_norm = str(r).strip().lower()
+    if r_norm in ("leaf", "orchestrator"):
+        return r_norm
+    logger.warning("Unknown delegate_task role=%r, coercing to 'leaf'", r)
+    return "leaf"
 
 
 def _get_max_concurrent_children() -> int:
     """Read delegation.max_concurrent_children from config, falling back to
     DELEGATION_MAX_CONCURRENT_CHILDREN env var, then the default (3).
 
+    Users can raise this as high as they want; only the floor (1) is enforced.
+
     Uses the same ``_load_config()`` path that the rest of ``delegate_task``
     uses, keeping config priority consistent (config.yaml > env > default).
     """
@@ -69,20 +280,180 @@ def _get_max_concurrent_children() -> int:
         except (TypeError, ValueError):
             logger.warning(
                 "delegation.max_concurrent_children=%r is not a valid integer; "
-                "using default %d", val, _DEFAULT_MAX_CONCURRENT_CHILDREN,
+                "using default %d",
+                val,
+                _DEFAULT_MAX_CONCURRENT_CHILDREN,
             )
+            return _DEFAULT_MAX_CONCURRENT_CHILDREN
     env_val = os.getenv("DELEGATION_MAX_CONCURRENT_CHILDREN")
     if env_val:
         try:
             return max(1, int(env_val))
         except (TypeError, ValueError):
-            pass
+            return _DEFAULT_MAX_CONCURRENT_CHILDREN
     return _DEFAULT_MAX_CONCURRENT_CHILDREN
+
+
+def _get_child_timeout() -> float:
+    """Read delegation.child_timeout_seconds from config.
+
+    Returns the number of seconds a single child agent is allowed to run
+    before being considered stuck.  Default: 300 s (5 minutes).
+    """
+    cfg = _load_config()
+    val = cfg.get("child_timeout_seconds")
+    if val is not None:
+        try:
+            return max(30.0, float(val))
+        except (TypeError, ValueError):
+            logger.warning(
+                "delegation.child_timeout_seconds=%r is not a valid number; "
+                "using default %d",
+                val,
+                DEFAULT_CHILD_TIMEOUT,
+            )
+    env_val = os.getenv("DELEGATION_CHILD_TIMEOUT_SECONDS")
+    if env_val:
+        try:
+            return max(30.0, float(env_val))
+        except (TypeError, ValueError):
+            pass
+    return float(DEFAULT_CHILD_TIMEOUT)
+
+
+def _get_max_spawn_depth() -> int:
+    """Read delegation.max_spawn_depth from config, clamped to [1, 3].
+
+    depth 0 = parent agent.  max_spawn_depth = N means agents at depths
+    0..N-1 can spawn; depth N is the leaf floor.  Default 1 is flat:
+    parent spawns children (depth 1), depth-1 children cannot spawn
+    (blocked by this guard AND, for leaf children, by the delegation
+    toolset strip in _strip_blocked_tools).
+
+    Raise to 2 or 3 to unlock nested orchestration. role="orchestrator"
+    removes the toolset strip for depth-1 children when
+    max_spawn_depth >= 2, enabling them to spawn their own workers.
+    """
+    cfg = _load_config()
+    val = cfg.get("max_spawn_depth")
+    if val is None:
+        return MAX_DEPTH
+    try:
+        ival = int(val)
+    except (TypeError, ValueError):
+        logger.warning(
+            "delegation.max_spawn_depth=%r is not a valid integer; " "using default %d",
+            val,
+            MAX_DEPTH,
+        )
+        return MAX_DEPTH
+    clamped = max(_MIN_SPAWN_DEPTH, min(_MAX_SPAWN_DEPTH_CAP, ival))
+    if clamped != ival:
+        logger.warning(
+            "delegation.max_spawn_depth=%d out of range [%d, %d]; " "clamping to %d",
+            ival,
+            _MIN_SPAWN_DEPTH,
+            _MAX_SPAWN_DEPTH_CAP,
+            clamped,
+        )
+    return clamped
+
+
+def _get_orchestrator_enabled() -> bool:
+    """Global kill switch for the orchestrator role.
+
+    When False, role="orchestrator" is silently forced to "leaf" in
+    _build_child_agent and the delegation toolset is stripped as before.
+    Lets an operator disable the feature without a code revert.
+    """
+    cfg = _load_config()
+    val = cfg.get("orchestrator_enabled", True)
+    if isinstance(val, bool):
+        return val
+    # Accept "true"/"false" strings from YAML that doesn't auto-coerce.
+    if isinstance(val, str):
+        return val.strip().lower() in ("true", "1", "yes", "on")
+    return True
+
+
+def _get_inherit_mcp_toolsets() -> bool:
+    """Whether narrowed child toolsets should keep the parent's MCP toolsets."""
+    cfg = _load_config()
+    return is_truthy_value(cfg.get("inherit_mcp_toolsets"), default=True)
+
+
+def _is_mcp_toolset_name(name: str) -> bool:
+    """Return True for canonical MCP toolsets and their registered aliases."""
+    if not name:
+        return False
+    if str(name).startswith("mcp-"):
+        return True
+    try:
+        from tools.registry import registry
+
+        target = registry.get_toolset_alias_target(str(name))
+    except Exception:
+        target = None
+    return bool(target and str(target).startswith("mcp-"))
+
+
+def _preserve_parent_mcp_toolsets(
+    child_toolsets: List[str], parent_toolsets: set[str]
+) -> List[str]:
+    """Append any parent MCP toolsets that are missing from a narrowed child."""
+    preserved = list(child_toolsets)
+    for toolset_name in sorted(parent_toolsets):
+        if _is_mcp_toolset_name(toolset_name) and toolset_name not in preserved:
+            preserved.append(toolset_name)
+    return preserved
+
+
 DEFAULT_MAX_ITERATIONS = 50
+DEFAULT_CHILD_TIMEOUT = 300  # seconds before a child agent is considered stuck
 _HEARTBEAT_INTERVAL = 30  # seconds between parent activity heartbeats during delegation
+_HEARTBEAT_STALE_CYCLES = (
+    5  # mark child stale after this many heartbeats with no iteration progress
+)
 DEFAULT_TOOLSETS = ["terminal", "file", "web"]
 
 
+# ---------------------------------------------------------------------------
+# Delegation progress event types
+# ---------------------------------------------------------------------------
+
+
+class DelegateEvent(str, enum.Enum):
+    """Formal event types emitted during delegation progress.
+
+    _build_child_progress_callback normalises incoming legacy strings
+    (``tool.started``, ``_thinking``, …) to these enum values via
+    ``_LEGACY_EVENT_MAP``.  External consumers (gateway SSE, ACP adapter,
+    CLI) still receive the legacy strings during the deprecation window.
+
+    TASK_SPAWNED / TASK_COMPLETED / TASK_FAILED are reserved for
+    future orchestrator lifecycle events and are not currently emitted.
+    """
+
+    TASK_SPAWNED = "delegate.task_spawned"
+    TASK_PROGRESS = "delegate.task_progress"
+    TASK_COMPLETED = "delegate.task_completed"
+    TASK_FAILED = "delegate.task_failed"
+    TASK_THINKING = "delegate.task_thinking"
+    TASK_TOOL_STARTED = "delegate.tool_started"
+    TASK_TOOL_COMPLETED = "delegate.tool_completed"
+
+
+# Legacy event strings → DelegateEvent mapping.
+# Incoming child-agent events use the old names; the callback normalises them.
+_LEGACY_EVENT_MAP: Dict[str, DelegateEvent] = {
+    "_thinking": DelegateEvent.TASK_THINKING,
+    "reasoning.available": DelegateEvent.TASK_THINKING,
+    "tool.started": DelegateEvent.TASK_TOOL_STARTED,
+    "tool.completed": DelegateEvent.TASK_TOOL_COMPLETED,
+    "subagent_progress": DelegateEvent.TASK_PROGRESS,
+}
+
+
 def check_delegate_requirements() -> bool:
     """Delegation has no external requirements -- always available."""
     return True
@@ -93,8 +464,18 @@ def _build_child_system_prompt(
     context: Optional[str] = None,
     *,
     workspace_path: Optional[str] = None,
+    role: str = "leaf",
+    max_spawn_depth: int = 2,
+    child_depth: int = 1,
 ) -> str:
-    """Build a focused system prompt for a child agent."""
+    """Build a focused system prompt for a child agent.
+
+    When role='orchestrator', appends a delegation-capability block
+    modeled on OpenClaw's buildSubagentSystemPrompt (canSpawn branch at
+    inspiration/openclaw/src/agents/subagent-system-prompt.ts:63-95).
+    The depth note is literal truth (grounded in the passed config) so
+    the LLM doesn't confabulate nesting capabilities that don't exist.
+    """
     parts = [
         "You are a focused subagent working on a specific delegated task.",
         "",
@@ -120,6 +501,37 @@ def _build_child_system_prompt(
         "Be thorough but concise -- your response is returned to the "
         "parent agent as a summary."
     )
+    if role == "orchestrator":
+        child_note = (
+            "Your own children MUST be leaves (cannot delegate further) "
+            "because they would be at the depth floor — you cannot pass "
+            "role='orchestrator' to your own delegate_task calls."
+            if child_depth + 1 >= max_spawn_depth
+            else "Your own children can themselves be orchestrators or leaves, "
+            "depending on the `role` you pass to delegate_task. Default is "
+            "'leaf'; pass role='orchestrator' explicitly when a child "
+            "needs to further decompose its work."
+        )
+        parts.append(
+            "\n## Subagent Spawning (Orchestrator Role)\n"
+            "You have access to the `delegate_task` tool and CAN spawn "
+            "your own subagents to parallelize independent work.\n\n"
+            "WHEN to delegate:\n"
+            "- The goal decomposes into 2+ independent subtasks that can "
+            "run in parallel (e.g. research A and B simultaneously).\n"
+            "- A subtask is reasoning-heavy and would flood your context "
+            "with intermediate data.\n\n"
+            "WHEN NOT to delegate:\n"
+            "- Single-step mechanical work — do it directly.\n"
+            "- Trivial tasks you can execute in one or two tool calls.\n"
+            "- Re-delegating your entire assigned goal to one worker "
+            "(that's just pass-through with no value added).\n\n"
+            "Coordinate your workers' results and synthesize them before "
+            "reporting back to your parent. You are responsible for the "
+            "final summary, not your workers.\n\n"
+            f"NOTE: You are at depth {child_depth}. The delegation tree "
+            f"is capped at max_spawn_depth={max_spawn_depth}. {child_note}"
+        )
     return "\n".join(parts)
 
 
@@ -132,7 +544,9 @@ def _resolve_workspace_hint(parent_agent) -> Optional[str]:
     """
     candidates = [
         os.getenv("TERMINAL_CWD"),
-        getattr(getattr(parent_agent, "_subdirectory_hints", None), "working_dir", None),
+        getattr(
+            getattr(parent_agent, "_subdirectory_hints", None), "working_dir", None
+        ),
         getattr(parent_agent, "terminal_cwd", None),
         getattr(parent_agent, "cwd", None),
     ]
@@ -151,23 +565,43 @@ def _resolve_workspace_hint(parent_agent) -> Optional[str]:
 def _strip_blocked_tools(toolsets: List[str]) -> List[str]:
     """Remove toolsets that contain only blocked tools."""
     blocked_toolset_names = {
-        "delegation", "clarify", "memory", "code_execution",
+        "delegation",
+        "clarify",
+        "memory",
+        "code_execution",
     }
     return [t for t in toolsets if t not in blocked_toolset_names]
 
 
-def _build_child_progress_callback(task_index: int, goal: str, parent_agent, task_count: int = 1) -> Optional[callable]:
+def _build_child_progress_callback(
+    task_index: int,
+    goal: str,
+    parent_agent,
+    task_count: int = 1,
+    *,
+    subagent_id: Optional[str] = None,
+    parent_id: Optional[str] = None,
+    depth: Optional[int] = None,
+    model: Optional[str] = None,
+    toolsets: Optional[List[str]] = None,
+) -> Optional[callable]:
     """Build a callback that relays child agent tool calls to the parent display.
 
     Two display paths:
       CLI:     prints tree-view lines above the parent's delegation spinner
       Gateway: batches tool names and relays to parent's progress callback
 
+    The identity kwargs (``subagent_id``, ``parent_id``, ``depth``, ``model``,
+    ``toolsets``) are threaded into every relayed event so the TUI can
+    reconstruct the live spawn tree and route per-branch controls (kill,
+    pause) back by ``subagent_id``.  All are optional for backward compat —
+    older callers that ignore them still produce a flat list on the TUI.
+
     Returns None if no display mechanism is available, in which case the
     child agent runs with no progress callback (identical to current behavior).
     """
-    spinner = getattr(parent_agent, '_delegate_spinner', None)
-    parent_cb = getattr(parent_agent, 'tool_progress_callback', None)
+    spinner = getattr(parent_agent, "_delegate_spinner", None)
+    parent_cb = getattr(parent_agent, "tool_progress_callback", None)
 
     if not spinner and not parent_cb:
         return None  # No display → no callback → zero behavior change
@@ -179,31 +613,49 @@ def _build_child_progress_callback(task_index: int, goal: str, parent_agent, tas
     # Gateway: batch tool names, flush periodically
     _BATCH_SIZE = 5
     _batch: List[str] = []
+    _tool_count = [0]  # per-subagent running counter (list for closure mutation)
 
-    def _relay(event_type: str, tool_name: str = None, preview: str = None, args=None, **kwargs):
+    def _identity_kwargs() -> Dict[str, Any]:
+        kw: Dict[str, Any] = {
+            "task_index": task_index,
+            "task_count": task_count,
+            "goal": goal_label,
+        }
+        if subagent_id is not None:
+            kw["subagent_id"] = subagent_id
+        if parent_id is not None:
+            kw["parent_id"] = parent_id
+        if depth is not None:
+            kw["depth"] = depth
+        if model is not None:
+            kw["model"] = model
+        if toolsets is not None:
+            kw["toolsets"] = list(toolsets)
+        kw["tool_count"] = _tool_count[0]
+        return kw
+
+    def _relay(
+        event_type: str, tool_name: str = None, preview: str = None, args=None, **kwargs
+    ):
         if not parent_cb:
             return
+        payload = _identity_kwargs()
+        payload.update(kwargs)  # caller overrides (e.g. status, duration_seconds)
         try:
-            parent_cb(
-                event_type,
-                tool_name,
-                preview,
-                args,
-                task_index=task_index,
-                task_count=task_count,
-                goal=goal_label,
-                **kwargs,
-            )
+            parent_cb(event_type, tool_name, preview, args, **payload)
         except Exception as e:
             logger.debug("Parent callback failed: %s", e)
 
-    def _callback(event_type: str, tool_name: str = None, preview: str = None, args=None, **kwargs):
-        # event_type is one of: "tool.started", "tool.completed",
-        # "reasoning.available", "_thinking", "subagent.*"
-
+    def _callback(
+        event_type, tool_name: str = None, preview: str = None, args=None, **kwargs
+    ):
+        # Lifecycle events emitted by the orchestrator itself — handled
+        # before enum normalisation since they are not part of DelegateEvent.
         if event_type == "subagent.start":
             if spinner and goal_label:
-                short = (goal_label[:55] + "...") if len(goal_label) > 55 else goal_label
+                short = (
+                    (goal_label[:55] + "...") if len(goal_label) > 55 else goal_label
+                )
                 try:
                     spinner.print_above(f" {prefix}├─ 🔀 {short}")
                 except Exception as e:
@@ -215,30 +667,75 @@ def _build_child_progress_callback(task_index: int, goal: str, parent_agent, tas
             _relay("subagent.complete", preview=preview, **kwargs)
             return
 
-        # "_thinking" / reasoning events
-        if event_type in ("_thinking", "reasoning.available"):
+        # Normalise legacy strings, new-style "delegate.*" strings, and
+        # DelegateEvent enum values all to a single DelegateEvent.  The
+        # original implementation only accepted the five legacy strings;
+        # enum-typed callers were silently dropped.
+        if isinstance(event_type, DelegateEvent):
+            event = event_type
+        else:
+            event = _LEGACY_EVENT_MAP.get(event_type)
+            if event is None:
+                try:
+                    event = DelegateEvent(event_type)
+                except (ValueError, TypeError):
+                    return  # Unknown event — ignore
+
+        if event == DelegateEvent.TASK_THINKING:
             text = preview or tool_name or ""
             if spinner:
                 short = (text[:55] + "...") if len(text) > 55 else text
                 try:
-                    spinner.print_above(f" {prefix}├─ 💭 \"{short}\"")
+                    spinner.print_above(f' {prefix}├─ 💭 "{short}"')
                 except Exception as e:
                     logger.debug("Spinner print_above failed: %s", e)
             _relay("subagent.thinking", preview=text)
             return
 
-        # tool.completed — no display needed here (spinner shows on started)
-        if event_type == "tool.completed":
+        if event == DelegateEvent.TASK_TOOL_COMPLETED:
             return
 
-        # tool.started — display and batch for parent relay
+        if event == DelegateEvent.TASK_PROGRESS:
+            # Pre-batched progress summary relayed from a nested
+            # orchestrator's grandchild (upstream emits as
+            # parent_cb("subagent_progress", summary_string) where the
+            # summary lands in the tool_name positional slot).  Treat as
+            # a pass-through: render distinctly (not via the tool-start
+            # emoji lookup, which would mistake the summary string for a
+            # tool name) and relay upward without re-batching.
+            summary_text = tool_name or preview or ""
+            if spinner and summary_text:
+                try:
+                    spinner.print_above(f" {prefix}├─ 🔀 {summary_text}")
+                except Exception as e:
+                    logger.debug("Spinner print_above failed: %s", e)
+            if parent_cb:
+                try:
+                    parent_cb("subagent_progress", f"{prefix}{summary_text}")
+                except Exception as e:
+                    logger.debug("Parent callback relay failed: %s", e)
+            return
+
+        # TASK_TOOL_STARTED — display and batch for parent relay
+        _tool_count[0] += 1
+        if subagent_id is not None:
+            with _active_subagents_lock:
+                rec = _active_subagents.get(subagent_id)
+                if rec is not None:
+                    rec["tool_count"] = _tool_count[0]
+                    rec["last_tool"] = tool_name or ""
         if spinner:
-            short = (preview[:35] + "...") if preview and len(preview) > 35 else (preview or "")
+            short = (
+                (preview[:35] + "...")
+                if preview and len(preview) > 35
+                else (preview or "")
+            )
             from agent.display import get_tool_emoji
+
             emoji = get_tool_emoji(tool_name or "")
             line = f" {prefix}├─ {emoji} {tool_name}"
             if short:
-                line += f"  \"{short}\""
+                line += f'  "{short}"'
             try:
                 spinner.print_above(line)
             except Exception as e:
@@ -280,6 +777,10 @@ def _build_child_agent(
     # ACP transport overrides — lets a non-ACP parent spawn ACP child agents
     override_acp_command: Optional[str] = None,
     override_acp_args: Optional[List[str]] = None,
+    # Per-call role controlling whether the child can further delegate.
+    # 'leaf' (default) cannot; 'orchestrator' retains the delegation
+    # toolset subject to depth/kill-switch bounds applied below.
+    role: str = "leaf",
 ):
     """
     Build a child AIAgent on the main thread (thread-safe construction).
@@ -291,6 +792,29 @@ def _build_child_agent(
     model on OpenRouter while the parent runs on Nous Portal).
     """
     from run_agent import AIAgent
+    import uuid as _uuid
+
+    # ── Role resolution ─────────────────────────────────────────────────
+    # Honor the caller's role only when BOTH the kill switch and the
+    # child's depth allow it.  This is the single point where role
+    # degrades to 'leaf' — keeps the rule predictable.  Callers pass
+    # the normalised role (_normalize_role ran in delegate_task) so
+    # we only deal with 'leaf' or 'orchestrator' here.
+    child_depth = getattr(parent_agent, "_delegate_depth", 0) + 1
+    max_spawn = _get_max_spawn_depth()
+    orchestrator_ok = _get_orchestrator_enabled() and child_depth < max_spawn
+    effective_role = role if (role == "orchestrator" and orchestrator_ok) else "leaf"
+
+    # ── Subagent identity (stable across events, 0-indexed for TUI) ─────
+    # subagent_id is generated here so the progress callback, the
+    # spawn_requested event, and the _active_subagents registry all share
+    # one key.  parent_id is non-None when THIS parent is itself a subagent
+    # (nested orchestrator -> worker chain).
+    subagent_id = f"sa-{task_index}-{_uuid.uuid4().hex[:8]}"
+    parent_subagent_id = getattr(parent_agent, "_subagent_id", None)
+    tui_depth = max(0, child_depth - 1)  # 0 = first-level child for the UI
+
+    delegation_cfg = _load_config()
 
     # When no explicit toolsets given, inherit from parent's enabled toolsets
     # so disabled tools (e.g. web) don't leak to subagents.
@@ -302,8 +826,10 @@ def _build_child_agent(
     elif parent_agent and hasattr(parent_agent, "valid_tool_names"):
         # enabled_toolsets is None (all tools) — derive from loaded tool names
         import model_tools
+
         parent_toolsets = {
-            ts for name in parent_agent.valid_tool_names
+            ts
+            for name in parent_agent.valid_tool_names
             if (ts := model_tools.get_toolset_for_tool(name)) is not None
         }
     else:
@@ -311,7 +837,12 @@ def _build_child_agent(
 
     if toolsets:
         # Intersect with parent — subagent must not gain tools the parent lacks
-        child_toolsets = _strip_blocked_tools([t for t in toolsets if t in parent_toolsets])
+        child_toolsets = [t for t in toolsets if t in parent_toolsets]
+        if _get_inherit_mcp_toolsets():
+            child_toolsets = _preserve_parent_mcp_toolsets(
+                child_toolsets, parent_toolsets
+            )
+        child_toolsets = _strip_blocked_tools(child_toolsets)
     elif parent_agent and parent_enabled is not None:
         child_toolsets = _strip_blocked_tools(parent_enabled)
     elif parent_toolsets:
@@ -319,15 +850,44 @@ def _build_child_agent(
     else:
         child_toolsets = _strip_blocked_tools(DEFAULT_TOOLSETS)
 
+    # Orchestrators retain the 'delegation' toolset that _strip_blocked_tools
+    # removed.  The re-add is unconditional on parent-toolset membership because
+    # orchestrator capability is granted by role, not inherited — see the
+    # test_intersection_preserves_delegation_bound test for the design rationale.
+    if effective_role == "orchestrator" and "delegation" not in child_toolsets:
+        child_toolsets.append("delegation")
+
     workspace_hint = _resolve_workspace_hint(parent_agent)
-    child_prompt = _build_child_system_prompt(goal, context, workspace_path=workspace_hint)
+    child_prompt = _build_child_system_prompt(
+        goal,
+        context,
+        workspace_path=workspace_hint,
+        role=effective_role,
+        max_spawn_depth=max_spawn,
+        child_depth=child_depth,
+    )
     # Extract parent's API key so subagents inherit auth (e.g. Nous Portal).
     parent_api_key = getattr(parent_agent, "api_key", None)
     if (not parent_api_key) and hasattr(parent_agent, "_client_kwargs"):
         parent_api_key = parent_agent._client_kwargs.get("api_key")
 
-    # Build progress callback to relay tool calls to parent display
-    child_progress_cb = _build_child_progress_callback(task_index, goal, parent_agent, task_count)
+    # Resolve the child's effective model early so it can ride on every event.
+    effective_model_for_cb = model or getattr(parent_agent, "model", None)
+
+    # Build progress callback to relay tool calls to parent display.
+    # Identity kwargs thread the subagent_id through every emitted event so the
+    # TUI can reconstruct the spawn tree and route per-branch controls.
+    child_progress_cb = _build_child_progress_callback(
+        task_index,
+        goal,
+        parent_agent,
+        task_count,
+        subagent_id=subagent_id,
+        parent_id=parent_subagent_id,
+        depth=tui_depth,
+        model=effective_model_for_cb,
+        toolsets=child_toolsets,
+    )
 
     # Each subagent gets its own iteration budget capped at max_iterations
     # (configurable via delegation.max_iterations, default 50).  This means
@@ -336,6 +896,7 @@ def _build_child_agent(
 
     child_thinking_cb = None
     if child_progress_cb:
+
         def _child_thinking(text: str) -> None:
             if not text:
                 return
@@ -352,17 +913,23 @@ def _build_child_agent(
     effective_base_url = override_base_url or parent_agent.base_url
     effective_api_key = override_api_key or parent_api_key
     effective_api_mode = override_api_mode or getattr(parent_agent, "api_mode", None)
-    effective_acp_command = override_acp_command or getattr(parent_agent, "acp_command", None)
-    effective_acp_args = list(override_acp_args if override_acp_args is not None else (getattr(parent_agent, "acp_args", []) or []))
+    effective_acp_command = override_acp_command or getattr(
+        parent_agent, "acp_command", None
+    )
+    effective_acp_args = list(
+        override_acp_args
+        if override_acp_args is not None
+        else (getattr(parent_agent, "acp_args", []) or [])
+    )
 
     # Resolve reasoning config: delegation override > parent inherit
     parent_reasoning = getattr(parent_agent, "reasoning_config", None)
     child_reasoning = parent_reasoning
     try:
-        delegation_cfg = _load_config()
         delegation_effort = str(delegation_cfg.get("reasoning_effort") or "").strip()
         if delegation_effort:
             from hermes_constants import parse_reasoning_effort
+
             parsed = parse_reasoning_effort(delegation_effort)
             if parsed is not None:
                 child_reasoning = parsed
@@ -395,8 +962,8 @@ def _build_child_agent(
         skip_memory=True,
         clarify_callback=None,
         thinking_callback=child_thinking_cb,
-        session_db=getattr(parent_agent, '_session_db', None),
-        parent_session_id=getattr(parent_agent, 'session_id', None),
+        session_db=getattr(parent_agent, "_session_db", None),
+        parent_session_id=getattr(parent_agent, "session_id", None),
         providers_allowed=parent_agent.providers_allowed,
         providers_ignored=parent_agent.providers_ignored,
         providers_order=parent_agent.providers_order,
@@ -404,9 +971,17 @@ def _build_child_agent(
         tool_progress_callback=child_progress_cb,
         iteration_budget=None,  # fresh budget per subagent
     )
-    child._print_fn = getattr(parent_agent, '_print_fn', None)
+    child._print_fn = getattr(parent_agent, "_print_fn", None)
     # Set delegation depth so children can't spawn grandchildren
-    child._delegate_depth = getattr(parent_agent, '_delegate_depth', 0) + 1
+    child._delegate_depth = child_depth
+    # Stash the post-degrade role for introspection (leaf if the
+    # kill switch or depth bounded the caller's requested role).
+    child._delegate_role = effective_role
+    # Stash subagent identity for nested-delegation event propagation and
+    # for _run_single_child / interrupt_subagent to look up by id.
+    child._subagent_id = subagent_id
+    child._parent_subagent_id = parent_subagent_id
+    child._subagent_goal = goal
 
     # Share a credential pool with the child when possible so subagents can
     # rotate credentials on rate limits instead of getting pinned to one key.
@@ -415,16 +990,26 @@ def _build_child_agent(
         child._credential_pool = child_pool
 
     # Register child for interrupt propagation
-    if hasattr(parent_agent, '_active_children'):
-        lock = getattr(parent_agent, '_active_children_lock', None)
+    if hasattr(parent_agent, "_active_children"):
+        lock = getattr(parent_agent, "_active_children_lock", None)
         if lock:
             with lock:
                 parent_agent._active_children.append(child)
         else:
             parent_agent._active_children.append(child)
 
+    # Announce the spawn immediately — the child may sit in a queue
+    # for seconds if max_concurrent_children is saturated, so the TUI
+    # wants a node in the tree before run starts.
+    if child_progress_cb:
+        try:
+            child_progress_cb("subagent.spawn_requested", preview=goal)
+        except Exception as exc:
+            logger.debug("spawn_requested relay failed: %s", exc)
+
     return child
 
+
 def _run_single_child(
     task_index: int,
     goal: str,
@@ -439,22 +1024,24 @@ def _run_single_child(
     child_start = time.monotonic()
 
     # Get the progress callback from the child agent
-    child_progress_cb = getattr(child, 'tool_progress_callback', None)
+    child_progress_cb = getattr(child, "tool_progress_callback", None)
 
     # Restore parent tool names using the value saved before child construction
     # mutated the global. This is the correct parent toolset, not the child's.
     import model_tools
-    _saved_tool_names = getattr(child, "_delegate_saved_tool_names",
-                                list(model_tools._last_resolved_tool_names))
 
-    child_pool = getattr(child, '_credential_pool', None)
+    _saved_tool_names = getattr(
+        child, "_delegate_saved_tool_names", list(model_tools._last_resolved_tool_names)
+    )
+
+    child_pool = getattr(child, "_credential_pool", None)
     leased_cred_id = None
     if child_pool is not None:
         leased_cred_id = child_pool.acquire_lease()
         if leased_cred_id is not None:
             try:
                 leased_entry = child_pool.current()
-                if leased_entry is not None and hasattr(child, '_swap_credential'):
+                if leased_entry is not None and hasattr(child, "_swap_credential"):
                     child._swap_credential(leased_entry)
             except Exception as exc:
                 logger.debug("Failed to bind child to leased credential: %s", exc)
@@ -464,12 +1051,14 @@ def _run_single_child(
     # Without this, the parent's _last_activity_ts freezes when delegate_task
     # starts and the gateway eventually kills the agent for "no activity".
     _heartbeat_stop = threading.Event()
+    _last_seen_iter = [0]  # mutable container for heartbeat stale detection
+    _stale_count = [0]
 
     def _heartbeat_loop():
         while not _heartbeat_stop.wait(_HEARTBEAT_INTERVAL):
             if parent_agent is None:
                 continue
-            touch = getattr(parent_agent, '_touch_activity', None)
+            touch = getattr(parent_agent, "_touch_activity", None)
             if not touch:
                 continue
             # Pull detail from the child's own activity tracker
@@ -479,14 +1068,38 @@ def _run_single_child(
                 child_tool = child_summary.get("current_tool")
                 child_iter = child_summary.get("api_call_count", 0)
                 child_max = child_summary.get("max_iterations", 0)
+
+                # Stale detection: if iteration count hasn't advanced,
+                # increment stale counter.  After N cycles with no
+                # progress, stop masking the hang so the gateway
+                # inactivity timeout can fire as a last resort.
+                if child_iter <= _last_seen_iter[0]:
+                    _stale_count[0] += 1
+                else:
+                    _last_seen_iter[0] = child_iter
+                    _stale_count[0] = 0
+
+                if _stale_count[0] >= _HEARTBEAT_STALE_CYCLES:
+                    logger.warning(
+                        "Subagent %d appears stale (no iteration progress "
+                        "for %d heartbeat cycles) — stopping heartbeat",
+                        task_index,
+                        _stale_count[0],
+                    )
+                    break  # stop touching parent, let gateway timeout fire
+
                 if child_tool:
-                    desc = (f"delegate_task: subagent running {child_tool} "
-                            f"(iteration {child_iter}/{child_max})")
+                    desc = (
+                        f"delegate_task: subagent running {child_tool} "
+                        f"(iteration {child_iter}/{child_max})"
+                    )
                 else:
                     child_desc = child_summary.get("last_activity_desc", "")
                     if child_desc:
-                        desc = (f"delegate_task: subagent {child_desc} "
-                                f"(iteration {child_iter}/{child_max})")
+                        desc = (
+                            f"delegate_task: subagent {child_desc} "
+                            f"(iteration {child_iter}/{child_max})"
+                        )
             except Exception:
                 pass
             try:
@@ -497,6 +1110,34 @@ def _run_single_child(
     _heartbeat_thread = threading.Thread(target=_heartbeat_loop, daemon=True)
     _heartbeat_thread.start()
 
+    # Register the live agent in the module-level registry so the TUI can
+    # target it by subagent_id (kill, pause, status queries).  Unregistered
+    # in the finally block, even when the child raises.  Test doubles that
+    # hand us a MagicMock don't carry stable ids; skip registration then.
+    _raw_sid = getattr(child, "_subagent_id", None)
+    _subagent_id = _raw_sid if isinstance(_raw_sid, str) else None
+    if _subagent_id:
+        _raw_depth = getattr(child, "_delegate_depth", 1)
+        _tui_depth = max(0, _raw_depth - 1) if isinstance(_raw_depth, int) else 0
+        _parent_sid = getattr(child, "_parent_subagent_id", None)
+        _register_subagent(
+            {
+                "subagent_id": _subagent_id,
+                "parent_id": _parent_sid if isinstance(_parent_sid, str) else None,
+                "depth": _tui_depth,
+                "goal": goal,
+                "model": (
+                    getattr(child, "model", None)
+                    if isinstance(getattr(child, "model", None), str)
+                    else None
+                ),
+                "started_at": time.time(),
+                "status": "running",
+                "tool_count": 0,
+                "agent": child,
+            }
+        )
+
     try:
         if child_progress_cb:
             try:
@@ -504,10 +1145,89 @@ def _run_single_child(
             except Exception as e:
                 logger.debug("Progress callback start failed: %s", e)
 
-        result = child.run_conversation(user_message=goal)
+        # File-state coordination: reuse the stable subagent_id as the child's
+        # task_id so file_state writes, active-subagents registry, and TUI
+        # events all share one key.  Falls back to a fresh uuid only if the
+        # pre-built id is somehow missing.
+        import uuid as _uuid
+
+        child_task_id = _subagent_id or f"subagent-{task_index}-{_uuid.uuid4().hex[:8]}"
+        parent_task_id = getattr(parent_agent, "_current_task_id", None)
+        wall_start = time.time()
+        parent_reads_snapshot = (
+            list(file_state.known_reads(parent_task_id)) if parent_task_id else []
+        )
+
+        # Run child with a hard timeout to prevent indefinite blocking
+        # when the child's API call or tool-level HTTP request hangs.
+        child_timeout = _get_child_timeout()
+        _timeout_executor = ThreadPoolExecutor(max_workers=1)
+        _child_future = _timeout_executor.submit(
+            child.run_conversation,
+            user_message=goal,
+            task_id=child_task_id,
+        )
+        try:
+            result = _child_future.result(timeout=child_timeout)
+        except Exception as _timeout_exc:
+            # Signal the child to stop so its thread can exit cleanly.
+            try:
+                if hasattr(child, "interrupt"):
+                    child.interrupt()
+                elif hasattr(child, "_interrupt_requested"):
+                    child._interrupt_requested = True
+            except Exception:
+                pass
+
+            is_timeout = isinstance(_timeout_exc, (FuturesTimeoutError, TimeoutError))
+            duration = round(time.monotonic() - child_start, 2)
+            logger.warning(
+                "Subagent %d %s after %.1fs",
+                task_index,
+                "timed out" if is_timeout else f"raised {type(_timeout_exc).__name__}",
+                duration,
+            )
+
+            if child_progress_cb:
+                try:
+                    child_progress_cb(
+                        "subagent.complete",
+                        preview=(
+                            f"Timed out after {duration}s"
+                            if is_timeout
+                            else str(_timeout_exc)
+                        ),
+                        status="timeout" if is_timeout else "error",
+                        duration_seconds=duration,
+                        summary="",
+                    )
+                except Exception:
+                    pass
+
+            return {
+                "task_index": task_index,
+                "status": "timeout" if is_timeout else "error",
+                "summary": None,
+                "error": (
+                    (
+                        f"Subagent timed out after {child_timeout}s with no response. "
+                        "The child may be stuck on a slow API call or unresponsive network request."
+                    )
+                    if is_timeout
+                    else str(_timeout_exc)
+                ),
+                "exit_reason": "timeout" if is_timeout else "error",
+                "api_calls": 0,
+                "duration_seconds": duration,
+                "_child_role": getattr(child, "_delegate_role", None),
+            }
+        finally:
+            # Shut down executor without waiting — if the child thread
+            # is stuck on blocking I/O, wait=True would hang forever.
+            _timeout_executor.shutdown(wait=False)
 
         # Flush any remaining batched progress to gateway
-        if child_progress_cb and hasattr(child_progress_cb, '_flush'):
+        if child_progress_cb and hasattr(child_progress_cb, "_flush"):
             try:
                 child_progress_cb._flush()
             except Exception as e:
@@ -540,7 +1260,7 @@ def _run_single_child(
                 if not isinstance(msg, dict):
                     continue
                 if msg.get("role") == "assistant":
-                    for tc in (msg.get("tool_calls") or []):
+                    for tc in msg.get("tool_calls") or []:
                         fn = tc.get("function", {})
                         entry_t = {
                             "tool": fn.get("name", "unknown"),
@@ -552,9 +1272,7 @@ def _run_single_child(
                             trace_by_id[tc_id] = entry_t
                 elif msg.get("role") == "tool":
                     content = msg.get("content", "")
-                    is_error = bool(
-                        content and "error" in content[:80].lower()
-                    )
+                    is_error = bool(content and "error" in content[:80].lower())
                     result_meta = {
                         "result_bytes": len(content),
                         "status": "error" if is_error else "ok",
@@ -590,8 +1308,12 @@ def _run_single_child(
             "model": _model if isinstance(_model, str) else None,
             "exit_reason": exit_reason,
             "tokens": {
-                "input": _input_tokens if isinstance(_input_tokens, (int, float)) else 0,
-                "output": _output_tokens if isinstance(_output_tokens, (int, float)) else 0,
+                "input": (
+                    _input_tokens if isinstance(_input_tokens, (int, float)) else 0
+                ),
+                "output": (
+                    _output_tokens if isinstance(_output_tokens, (int, float)) else 0
+                ),
             },
             "tool_trace": tool_trace,
             # Captured before the finally block calls child.close() so the
@@ -602,15 +1324,97 @@ def _run_single_child(
         if status == "failed":
             entry["error"] = result.get("error", "Subagent did not produce a response.")
 
+        # Cross-agent file-state reminder.  If this subagent wrote any
+        # files the parent had already read, surface it so the parent
+        # knows to re-read before editing — the scenario that motivated
+        # the registry.  We check writes by ANY non-parent task_id (not
+        # just this child's), which also covers transitive writes from
+        # nested orchestrator→worker chains.
+        try:
+            if parent_task_id and parent_reads_snapshot:
+                sibling_writes = file_state.writes_since(
+                    parent_task_id, wall_start, parent_reads_snapshot
+                )
+                if sibling_writes:
+                    mod_paths = sorted(
+                        {p for paths in sibling_writes.values() for p in paths}
+                    )
+                    if mod_paths:
+                        reminder = (
+                            "\n\n[NOTE: subagent modified files the parent "
+                            "previously read — re-read before editing: "
+                            + ", ".join(mod_paths[:8])
+                            + (
+                                f" (+{len(mod_paths) - 8} more)"
+                                if len(mod_paths) > 8
+                                else ""
+                            )
+                            + "]"
+                        )
+                        if entry.get("summary"):
+                            entry["summary"] = entry["summary"] + reminder
+                        else:
+                            entry["stale_paths"] = mod_paths
+        except Exception:
+            logger.debug("file_state sibling-write check failed", exc_info=True)
+
+        # Per-branch observability payload: tokens, cost, files touched, and
+        # a tail of tool-call results.  Fed into the TUI's overlay detail
+        # pane + accordion rollups (features 1, 2, 4).  All fields are
+        # optional — missing data degrades gracefully on the client.
+        _cost_usd = getattr(child, "session_estimated_cost_usd", None)
+        _reasoning_tokens = getattr(child, "session_reasoning_tokens", 0)
+        try:
+            _files_read = list(file_state.known_reads(child_task_id))[:40]
+        except Exception:
+            _files_read = []
+        try:
+            _files_written_map = file_state.writes_since(
+                "", wall_start, []
+            )  # all writes since wall_start
+        except Exception:
+            _files_written_map = {}
+        _files_written = sorted(
+            {
+                p
+                for tid, paths in _files_written_map.items()
+                if tid == child_task_id
+                for p in paths
+            }
+        )[:40]
+
+        _output_tail = _extract_output_tail(result, max_entries=8, max_chars=600)
+
+        complete_kwargs: Dict[str, Any] = {
+            "preview": summary[:160] if summary else entry.get("error", ""),
+            "status": status,
+            "duration_seconds": duration,
+            "summary": summary[:500] if summary else entry.get("error", ""),
+            "input_tokens": (
+                int(_input_tokens) if isinstance(_input_tokens, (int, float)) else 0
+            ),
+            "output_tokens": (
+                int(_output_tokens) if isinstance(_output_tokens, (int, float)) else 0
+            ),
+            "reasoning_tokens": (
+                int(_reasoning_tokens)
+                if isinstance(_reasoning_tokens, (int, float))
+                else 0
+            ),
+            "api_calls": int(api_calls) if isinstance(api_calls, (int, float)) else 0,
+            "files_read": _files_read,
+            "files_written": _files_written,
+            "output_tail": _output_tail,
+        }
+        if _cost_usd is not None:
+            try:
+                complete_kwargs["cost_usd"] = float(_cost_usd)
+            except (TypeError, ValueError):
+                pass
+
         if child_progress_cb:
             try:
-                child_progress_cb(
-                    "subagent.complete",
-                    preview=summary[:160] if summary else entry.get("error", ""),
-                    status=status,
-                    duration_seconds=duration,
-                    summary=summary[:500] if summary else entry.get("error", ""),
-                )
+                child_progress_cb("subagent.complete", **complete_kwargs)
             except Exception as e:
                 logger.debug("Progress callback completion failed: %s", e)
 
@@ -646,6 +1450,11 @@ def _run_single_child(
         _heartbeat_stop.set()
         _heartbeat_thread.join(timeout=5)
 
+        # Drop the TUI-facing registry entry.  Safe to call even if the
+        # child was never registered (e.g. ID missing on test doubles).
+        if _subagent_id:
+            _unregister_subagent(_subagent_id)
+
         if child_pool is not None and leased_cred_id is not None:
             try:
                 child_pool.release_lease(leased_cred_id)
@@ -663,9 +1472,9 @@ def _run_single_child(
         # Remove child from active tracking
 
         # Unregister child from interrupt propagation
-        if hasattr(parent_agent, '_active_children'):
+        if hasattr(parent_agent, "_active_children"):
             try:
-                lock = getattr(parent_agent, '_active_children_lock', None)
+                lock = getattr(parent_agent, "_active_children_lock", None)
                 if lock:
                     with lock:
                         parent_agent._active_children.remove(child)
@@ -678,11 +1487,12 @@ def _run_single_child(
         # background processes, httpx clients) so subagent subprocesses
         # don't outlive the delegation.
         try:
-            if hasattr(child, 'close'):
+            if hasattr(child, "close"):
                 child.close()
         except Exception:
             logger.debug("Failed to close child agent after delegation")
 
+
 def delegate_task(
     goal: Optional[str] = None,
     context: Optional[str] = None,
@@ -691,29 +1501,53 @@ def delegate_task(
     max_iterations: Optional[int] = None,
     acp_command: Optional[str] = None,
     acp_args: Optional[List[str]] = None,
+    role: Optional[str] = None,
     parent_agent=None,
 ) -> str:
     """
     Spawn one or more child agents to handle delegated tasks.
 
     Supports two modes:
-      - Single: provide goal (+ optional context, toolsets)
-      - Batch:  provide tasks array [{goal, context, toolsets}, ...]
+      - Single: provide goal (+ optional context, toolsets, role)
+      - Batch:  provide tasks array [{goal, context, toolsets, role}, ...]
+
+    The 'role' parameter controls whether a child can further delegate:
+    'leaf' (default) cannot; 'orchestrator' retains the delegation
+    toolset and can spawn its own workers, bounded by
+    delegation.max_spawn_depth.  Per-task role beats the top-level one.
 
     Returns JSON with results array, one entry per task.
     """
     if parent_agent is None:
         return tool_error("delegate_task requires a parent agent context.")
 
-    # Depth limit
-    depth = getattr(parent_agent, '_delegate_depth', 0)
-    if depth >= MAX_DEPTH:
-        return json.dumps({
-            "error": (
-                f"Delegation depth limit reached ({MAX_DEPTH}). "
-                "Subagents cannot spawn further subagents."
-            )
-        })
+    # Operator-controlled kill switch — lets the TUI freeze new fan-out
+    # when a runaway tree is detected, without interrupting already-running
+    # children.  Cleared via the matching `delegation.pause` RPC.
+    if is_spawn_paused():
+        return tool_error(
+            "Delegation spawning is paused. Clear the pause via the TUI "
+            "(`p` in /agents) or the `delegation.pause` RPC before retrying."
+        )
+
+    # Normalise the top-level role once; per-task overrides re-normalise.
+    top_role = _normalize_role(role)
+
+    # Depth limit — configurable via delegation.max_spawn_depth,
+    # default 2 for parity with the original MAX_DEPTH constant.
+    depth = getattr(parent_agent, "_delegate_depth", 0)
+    max_spawn = _get_max_spawn_depth()
+    if depth >= max_spawn:
+        return json.dumps(
+            {
+                "error": (
+                    f"Delegation depth limit reached (depth={depth}, "
+                    f"max_spawn_depth={max_spawn}). Raise "
+                    f"delegation.max_spawn_depth in config.yaml if deeper "
+                    f"nesting is required (cap: {_MAX_SPAWN_DEPTH_CAP})."
+                )
+            }
+        )
 
     # Load config
     cfg = _load_config()
@@ -743,7 +1577,9 @@ def delegate_task(
             )
         task_list = tasks
     elif goal and isinstance(goal, str) and goal.strip():
-        task_list = [{"goal": goal, "context": context, "toolsets": toolsets}]
+        task_list = [
+            {"goal": goal, "context": context, "toolsets": toolsets, "role": top_role}
+        ]
     else:
         return tool_error("Provide either 'goal' (single task) or 'tasks' (batch).")
 
@@ -766,6 +1602,7 @@ def delegate_task(
     # _build_child_agent() calls AIAgent() which calls get_tool_definitions(),
     # which overwrites model_tools._last_resolved_tool_names with child's toolset.
     import model_tools as _model_tools
+
     _parent_tool_names = list(_model_tools._last_resolved_tool_names)
 
     # Build all child agents on the main thread (thread-safe construction)
@@ -775,17 +1612,31 @@ def delegate_task(
     try:
         for i, t in enumerate(task_list):
             task_acp_args = t.get("acp_args") if "acp_args" in t else None
+            # Per-task role beats top-level; normalise again so unknown
+            # per-task values warn and degrade to leaf uniformly.
+            effective_role = _normalize_role(t.get("role") or top_role)
             child = _build_child_agent(
-                task_index=i, goal=t["goal"], context=t.get("context"),
-                toolsets=t.get("toolsets") or toolsets, model=creds["model"],
-                max_iterations=effective_max_iter, task_count=n_tasks, parent_agent=parent_agent,
-                override_provider=creds["provider"], override_base_url=creds["base_url"],
+                task_index=i,
+                goal=t["goal"],
+                context=t.get("context"),
+                toolsets=t.get("toolsets") or toolsets,
+                model=creds["model"],
+                max_iterations=effective_max_iter,
+                task_count=n_tasks,
+                parent_agent=parent_agent,
+                override_provider=creds["provider"],
+                override_base_url=creds["base_url"],
                 override_api_key=creds["api_key"],
                 override_api_mode=creds["api_mode"],
-                override_acp_command=t.get("acp_command") or acp_command or creds.get("command"),
-                override_acp_args=task_acp_args if task_acp_args is not None else (
-                    acp_args if acp_args is not None else creds.get("args")
+                override_acp_command=t.get("acp_command")
+                or acp_command
+                or creds.get("command"),
+                override_acp_args=(
+                    task_acp_args
+                    if task_acp_args is not None
+                    else (acp_args if acp_args is not None else creds.get("args"))
                 ),
+                role=effective_role,
             )
             # Override with correct parent tool names (before child construction mutated global)
             child._delegate_saved_tool_names = _parent_tool_names
@@ -802,7 +1653,7 @@ def delegate_task(
     else:
         # Batch -- run in parallel with per-task progress lines
         completed_count = 0
-        spinner_ref = getattr(parent_agent, '_delegate_spinner', None)
+        spinner_ref = getattr(parent_agent, "_delegate_spinner", None)
 
         with ThreadPoolExecutor(max_workers=max_children) as executor:
             futures = {}
@@ -865,7 +1716,10 @@ def delegate_task(
                     break
 
                 from concurrent.futures import wait as _cf_wait, FIRST_COMPLETED
-                done, pending = _cf_wait(pending, timeout=0.5, return_when=FIRST_COMPLETED)
+
+                done, pending = _cf_wait(
+                    pending, timeout=0.5, return_when=FIRST_COMPLETED
+                )
                 for future in done:
                     try:
                         entry = future.result()
@@ -887,7 +1741,9 @@ def delegate_task(
 
                     # Print per-task completion line above the spinner
                     idx = entry["task_index"]
-                    label = task_labels[idx] if idx < len(task_labels) else f"Task {idx}"
+                    label = (
+                        task_labels[idx] if idx < len(task_labels) else f"Task {idx}"
+                    )
                     dur = entry.get("duration_seconds", 0)
                     status = entry.get("status", "?")
                     icon = "✓" if status == "completed" else "✗"
@@ -904,7 +1760,9 @@ def delegate_task(
                     # Update spinner text to show remaining count
                     if spinner_ref and remaining > 0:
                         try:
-                            spinner_ref.update_text(f"🔀 {remaining} task{'s' if remaining != 1 else ''} remaining")
+                            spinner_ref.update_text(
+                                f"🔀 {remaining} task{'s' if remaining != 1 else ''} remaining"
+                            )
                         except Exception as e:
                             logger.debug("Spinner update_text failed: %s", e)
 
@@ -912,14 +1770,26 @@ def delegate_task(
         results.sort(key=lambda r: r["task_index"])
 
     # Notify parent's memory provider of delegation outcomes
-    if parent_agent and hasattr(parent_agent, '_memory_manager') and parent_agent._memory_manager:
+    if (
+        parent_agent
+        and hasattr(parent_agent, "_memory_manager")
+        and parent_agent._memory_manager
+    ):
         for entry in results:
             try:
-                _task_goal = task_list[entry["task_index"]]["goal"] if entry["task_index"] < len(task_list) else ""
+                _task_goal = (
+                    task_list[entry["task_index"]]["goal"]
+                    if entry["task_index"] < len(task_list)
+                    else ""
+                )
                 parent_agent._memory_manager.on_delegation(
                     task=_task_goal,
                     result=entry.get("summary", "") or "",
-                    child_session_id=getattr(children[entry["task_index"]][2], "session_id", "") if entry["task_index"] < len(children) else "",
+                    child_session_id=(
+                        getattr(children[entry["task_index"]][2], "session_id", "")
+                        if entry["task_index"] < len(children)
+                        else ""
+                    ),
                 )
             except Exception:
                 pass
@@ -953,10 +1823,13 @@ def delegate_task(
 
     total_duration = round(time.monotonic() - overall_start, 2)
 
-    return json.dumps({
-        "results": results,
-        "total_duration_seconds": total_duration,
-    }, ensure_ascii=False)
+    return json.dumps(
+        {
+            "results": results,
+            "total_duration_seconds": total_duration,
+        },
+        ensure_ascii=False,
+    )
 
 
 def _resolve_child_credential_pool(effective_provider: Optional[str], parent_agent):
@@ -979,6 +1852,7 @@ def _resolve_child_credential_pool(effective_provider: Optional[str], parent_age
 
     try:
         from agent.credential_pool import load_pool
+
         pool = load_pool(effective_provider)
         if pool is not None and pool.has_credentials():
             return pool
@@ -1012,10 +1886,7 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
     configured_api_key = str(cfg.get("api_key") or "").strip() or None
 
     if configured_base_url:
-        api_key = (
-            configured_api_key
-            or os.getenv("OPENAI_API_KEY", "").strip()
-        )
+        api_key = configured_api_key or os.getenv("OPENAI_API_KEY", "").strip()
         if not api_key:
             raise ValueError(
                 "Delegation base_url is configured but no API key was found. "
@@ -1034,6 +1905,9 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
         elif base_url_hostname(configured_base_url) == "api.anthropic.com":
             provider = "anthropic"
             api_mode = "anthropic_messages"
+        elif "api.kimi.com/coding" in base_lower:
+            provider = "custom"
+            api_mode = "anthropic_messages"
 
         return {
             "model": configured_model,
@@ -1056,6 +1930,7 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
     # Provider is configured — resolve full credentials
     try:
         from hermes_cli.runtime_provider import resolve_runtime_provider
+
         runtime = resolve_runtime_provider(requested=configured_provider)
     except Exception as exc:
         raise ValueError(
@@ -1093,6 +1968,7 @@ def _load_config() -> dict:
     """
     try:
         from cli import CLI_CONFIG
+
         cfg = CLI_CONFIG.get("delegation", {})
         if cfg:
             return cfg
@@ -1100,6 +1976,7 @@ def _load_config() -> dict:
         pass
     try:
         from hermes_cli.config import load_config
+
         full = load_config()
         return full.get("delegation", {})
     except Exception:
@@ -1119,7 +1996,7 @@ DELEGATE_TASK_SCHEMA = {
         "never enter your context window.\n\n"
         "TWO MODES (one of 'goal' or 'tasks' is required):\n"
         "1. Single task: provide 'goal' (+ optional context, toolsets)\n"
-        "2. Batch (parallel): provide 'tasks' array with up to 3 items. "
+        "2. Batch (parallel): provide 'tasks' array with up to delegation.max_concurrent_children items (default 3). "
         "All run concurrently and results are returned together.\n\n"
         "WHEN TO USE delegate_task:\n"
         "- Reasoning-heavy subtasks (debugging, code review, research synthesis)\n"
@@ -1132,8 +2009,14 @@ DELEGATE_TASK_SCHEMA = {
         "IMPORTANT:\n"
         "- Subagents have NO memory of your conversation. Pass all relevant "
         "info (file paths, error messages, constraints) via the 'context' field.\n"
-        "- Subagents CANNOT call: delegate_task, clarify, memory, send_message, "
-        "execute_code.\n"
+        "- Leaf subagents (role='leaf', the default) CANNOT call: "
+        "delegate_task, clarify, memory, send_message, execute_code.\n"
+        "- Orchestrator subagents (role='orchestrator') retain "
+        "delegate_task so they can spawn their own workers, but still "
+        "cannot use clarify, memory, send_message, or execute_code. "
+        "Orchestrators are bounded by delegation.max_spawn_depth "
+        "(default 2) and can be disabled globally via "
+        "delegation.orchestrator_enabled=false.\n"
         "- Each subagent gets its own terminal session (separate working directory and state).\n"
         "- Results are always returned as an array, one entry per task."
     ),
@@ -1174,7 +2057,10 @@ DELEGATE_TASK_SCHEMA = {
                     "type": "object",
                     "properties": {
                         "goal": {"type": "string", "description": "Task goal"},
-                        "context": {"type": "string", "description": "Task-specific context"},
+                        "context": {
+                            "type": "string",
+                            "description": "Task-specific context",
+                        },
                         "toolsets": {
                             "type": "array",
                             "items": {"type": "string"},
@@ -1189,6 +2075,11 @@ DELEGATE_TASK_SCHEMA = {
                             "items": {"type": "string"},
                             "description": "Per-task ACP args override.",
                         },
+                        "role": {
+                            "type": "string",
+                            "enum": ["leaf", "orchestrator"],
+                            "description": "Per-task role override. See top-level 'role' for semantics.",
+                        },
                     },
                     "required": ["goal"],
                 },
@@ -1208,6 +2099,19 @@ DELEGATE_TASK_SCHEMA = {
                     "Only set lower for simple tasks."
                 ),
             },
+            "role": {
+                "type": "string",
+                "enum": ["leaf", "orchestrator"],
+                "description": (
+                    "Role of the child agent. 'leaf' (default) = focused "
+                    "worker, cannot delegate further. 'orchestrator' = can "
+                    "use delegate_task to spawn its own workers. Requires "
+                    "delegation.max_spawn_depth >= 2 in config; ignored "
+                    "(treated as 'leaf') when the child would exceed "
+                    "max_spawn_depth or when "
+                    "delegation.orchestrator_enabled=false."
+                ),
+            },
             "acp_command": {
                 "type": "string",
                 "description": (
@@ -1246,7 +2150,9 @@ registry.register(
         max_iterations=args.get("max_iterations"),
         acp_command=args.get("acp_command"),
         acp_args=args.get("acp_args"),
-        parent_agent=kw.get("parent_agent")),
+        role=args.get("role"),
+        parent_agent=kw.get("parent_agent"),
+    ),
     check_fn=check_delegate_requirements,
     emoji="🔀",
 )
diff --git a/tools/env_passthrough.py b/tools/env_passthrough.py
index b4686cb13..07bf333a6 100644
--- a/tools/env_passthrough.py
+++ b/tools/env_passthrough.py
@@ -44,16 +44,59 @@ def _get_allowed() -> set[str]:
 _config_passthrough: frozenset[str] | None = None
 
 
+def _is_hermes_provider_credential(name: str) -> bool:
+    """True if ``name`` is a Hermes-managed provider credential (API key,
+    token, or similar) per ``_HERMES_PROVIDER_ENV_BLOCKLIST``.
+
+    Skill-declared ``required_environment_variables`` frontmatter must
+    not be able to override this list — that was the bypass in
+    GHSA-rhgp-j443-p4rf where a malicious skill registered
+    ``ANTHROPIC_TOKEN`` / ``OPENAI_API_KEY`` as passthrough and received
+    the credential in the ``execute_code`` child process, defeating the
+    sandbox's scrubbing guarantee.
+
+    Non-Hermes API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT
+    in the blocklist and remain legitimately registerable — skills that
+    wrap third-party APIs still work.
+    """
+    try:
+        from tools.environments.local import _HERMES_PROVIDER_ENV_BLOCKLIST
+    except Exception:
+        return False
+    return name in _HERMES_PROVIDER_ENV_BLOCKLIST
+
+
 def register_env_passthrough(var_names: Iterable[str]) -> None:
     """Register environment variable names as allowed in sandboxed environments.
 
     Typically called when a skill declares ``required_environment_variables``.
+
+    Variables that are Hermes-managed provider credentials (from
+    ``_HERMES_PROVIDER_ENV_BLOCKLIST``) are rejected here to preserve
+    the ``execute_code`` sandbox's credential-scrubbing guarantee per
+    GHSA-rhgp-j443-p4rf. A skill that needs to talk to a Hermes-managed
+    provider should do so via the agent's main-process tools (web_search,
+    web_extract, etc.) where the credential remains safely in the main
+    process.
+
+    Non-Hermes third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.)
+    pass through normally — they were never in the sandbox scrub list.
     """
     for name in var_names:
         name = name.strip()
-        if name:
-            _get_allowed().add(name)
-            logger.debug("env passthrough: registered %s", name)
+        if not name:
+            continue
+        if _is_hermes_provider_credential(name):
+            logger.warning(
+                "env passthrough: refusing to register Hermes provider "
+                "credential %r (blocked by _HERMES_PROVIDER_ENV_BLOCKLIST). "
+                "Skills must not override the execute_code sandbox's "
+                "credential scrubbing; see GHSA-rhgp-j443-p4rf.",
+                name,
+            )
+            continue
+        _get_allowed().add(name)
+        logger.debug("env passthrough: registered %s", name)
 
 
 def _load_config_passthrough() -> frozenset[str]:
diff --git a/tools/environments/base.py b/tools/environments/base.py
index 19a637901..d89b66f19 100644
--- a/tools/environments/base.py
+++ b/tools/environments/base.py
@@ -383,7 +383,7 @@ class BaseEnvironment(ABC):
         quoted_cwd = (
             shlex.quote(cwd) if cwd != "~" and not cwd.startswith("~/") else cwd
         )
-        parts.append(f"cd {quoted_cwd} || exit 126")
+        parts.append(f"builtin cd {quoted_cwd} || exit 126")
 
         # Run the actual command
         parts.append(f"eval '{escaped}'")
diff --git a/tools/environments/docker.py b/tools/environments/docker.py
index d2ea5c964..65c33b349 100644
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@@ -148,6 +148,10 @@ def find_docker() -> Optional[str]:
 # We drop all capabilities then add back the minimum needed:
 #   DAC_OVERRIDE - root can write to bind-mounted dirs owned by host user
 #   CHOWN/FOWNER - package managers (pip, npm, apt) need to set file ownership
+#   SETUID/SETGID - the image entrypoint drops from root to the 'hermes'
+#       user via `gosu`, which requires these caps. Combined with
+#       `no-new-privileges`, gosu still cannot escalate back to root after
+#       the drop, so the security posture is preserved.
 # Block privilege escalation and limit PIDs.
 # /tmp is size-limited and nosuid but allows exec (needed by pip/npm builds).
 _SECURITY_ARGS = [
@@ -155,6 +159,8 @@ _SECURITY_ARGS = [
     "--cap-add", "DAC_OVERRIDE",
     "--cap-add", "CHOWN",
     "--cap-add", "FOWNER",
+    "--cap-add", "SETUID",
+    "--cap-add", "SETGID",
     "--security-opt", "no-new-privileges",
     "--pids-limit", "256",
     "--tmpfs", "/tmp:rw,nosuid,size=512m",
diff --git a/tools/environments/local.py b/tools/environments/local.py
index a1ab676d3..e4ef27829 100644
--- a/tools/environments/local.py
+++ b/tools/environments/local.py
@@ -213,6 +213,77 @@ def _make_run_env(env: dict) -> dict:
     return run_env
 
 
+def _read_terminal_shell_init_config() -> tuple[list[str], bool]:
+    """Return (shell_init_files, auto_source_bashrc) from config.yaml.
+
+    Best-effort — returns sensible defaults on any failure so terminal
+    execution never breaks because the config file is unreadable.
+    """
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config() or {}
+        terminal_cfg = cfg.get("terminal") or {}
+        files = terminal_cfg.get("shell_init_files") or []
+        if not isinstance(files, list):
+            files = []
+        auto_bashrc = bool(terminal_cfg.get("auto_source_bashrc", True))
+        return [str(f) for f in files if f], auto_bashrc
+    except Exception:
+        return [], True
+
+
+def _resolve_shell_init_files() -> list[str]:
+    """Resolve the list of files to source before the login-shell snapshot.
+
+    Expands ``~`` and ``${VAR}`` references and drops anything that doesn't
+    exist on disk, so a missing ``~/.bashrc`` never breaks the snapshot.
+    The ``auto_source_bashrc`` path runs only when the user hasn't supplied
+    an explicit list — once they have, Hermes trusts them.
+    """
+    explicit, auto_bashrc = _read_terminal_shell_init_config()
+
+    candidates: list[str] = []
+    if explicit:
+        candidates.extend(explicit)
+    elif auto_bashrc and not _IS_WINDOWS:
+        # Bash's login-shell invocation does NOT source ~/.bashrc by default,
+        # so tools like nvm / asdf / pyenv that self-install there stay
+        # invisible to the snapshot without this nudge.
+        candidates.append("~/.bashrc")
+
+    resolved: list[str] = []
+    for raw in candidates:
+        try:
+            path = os.path.expandvars(os.path.expanduser(raw))
+        except Exception:
+            continue
+        if path and os.path.isfile(path):
+            resolved.append(path)
+    return resolved
+
+
+def _prepend_shell_init(cmd_string: str, files: list[str]) -> str:
+    """Prepend ``source <file>`` lines (guarded + silent) to a bash script.
+
+    Each file is wrapped so a failing rc file doesn't abort the whole
+    bootstrap: ``set +e`` keeps going on errors, ``2>/dev/null`` hides
+    noisy prompts, and ``|| true`` neutralises the exit status.
+    """
+    if not files:
+        return cmd_string
+
+    prelude_parts = ["set +e"]
+    for path in files:
+        # shlex.quote isn't available here without an import; the files list
+        # comes from os.path.expanduser output so it's a concrete absolute
+        # path.  Escape single quotes defensively anyway.
+        safe = path.replace("'", "'\\''")
+        prelude_parts.append(f"[ -r '{safe}' ] && . '{safe}' 2>/dev/null || true")
+    prelude = "\n".join(prelude_parts) + "\n"
+    return prelude + cmd_string
+
+
 class LocalEnvironment(BaseEnvironment):
     """Run commands directly on the host machine.
 
@@ -255,6 +326,16 @@ class LocalEnvironment(BaseEnvironment):
                   timeout: int = 120,
                   stdin_data: str | None = None) -> subprocess.Popen:
         bash = _find_bash()
+        # For login-shell invocations (used by init_session to build the
+        # environment snapshot), prepend sources for the user's bashrc /
+        # custom init files so tools registered outside bash_profile
+        # (nvm, asdf, pyenv, …) end up on PATH in the captured snapshot.
+        # Non-login invocations are already sourcing the snapshot and
+        # don't need this.
+        if login:
+            init_files = _resolve_shell_init_files()
+            if init_files:
+                cmd_string = _prepend_shell_init(cmd_string, init_files)
         args = [bash, "-l", "-c", cmd_string] if login else [bash, "-c", cmd_string]
         run_env = _make_run_env(self.env)
 
@@ -268,6 +349,7 @@ class LocalEnvironment(BaseEnvironment):
             stderr=subprocess.STDOUT,
             stdin=subprocess.PIPE if stdin_data is not None else subprocess.DEVNULL,
             preexec_fn=None if _IS_WINDOWS else os.setsid,
+            cwd=self.cwd,
         )
 
         if stdin_data is not None:
diff --git a/tools/file_operations.py b/tools/file_operations.py
index 8c3897bb2..7e75578b2 100644
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -35,6 +35,13 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from tools.binary_extensions import BINARY_EXTENSIONS
 
+from agent.file_safety import (
+    build_write_denied_paths,
+    build_write_denied_prefixes,
+    get_safe_write_root as _shared_get_safe_write_root,
+    is_write_denied as _shared_is_write_denied,
+)
+
 
 # ---------------------------------------------------------------------------
 # Write-path deny list — blocks writes to sensitive system/credential files
@@ -42,41 +49,9 @@ from tools.binary_extensions import BINARY_EXTENSIONS
 
 _HOME = str(Path.home())
 
-WRITE_DENIED_PATHS = {
-    os.path.realpath(p) for p in [
-        os.path.join(_HOME, ".ssh", "authorized_keys"),
-        os.path.join(_HOME, ".ssh", "id_rsa"),
-        os.path.join(_HOME, ".ssh", "id_ed25519"),
-        os.path.join(_HOME, ".ssh", "config"),
-        str(get_hermes_home() / ".env"),
-        os.path.join(_HOME, ".bashrc"),
-        os.path.join(_HOME, ".zshrc"),
-        os.path.join(_HOME, ".profile"),
-        os.path.join(_HOME, ".bash_profile"),
-        os.path.join(_HOME, ".zprofile"),
-        os.path.join(_HOME, ".netrc"),
-        os.path.join(_HOME, ".pgpass"),
-        os.path.join(_HOME, ".npmrc"),
-        os.path.join(_HOME, ".pypirc"),
-        "/etc/sudoers",
-        "/etc/passwd",
-        "/etc/shadow",
-    ]
-}
+WRITE_DENIED_PATHS = build_write_denied_paths(_HOME)
 
-WRITE_DENIED_PREFIXES = [
-    os.path.realpath(p) + os.sep for p in [
-        os.path.join(_HOME, ".ssh"),
-        os.path.join(_HOME, ".aws"),
-        os.path.join(_HOME, ".gnupg"),
-        os.path.join(_HOME, ".kube"),
-        "/etc/sudoers.d",
-        "/etc/systemd",
-        os.path.join(_HOME, ".docker"),
-        os.path.join(_HOME, ".azure"),
-        os.path.join(_HOME, ".config", "gh"),
-    ]
-]
+WRITE_DENIED_PREFIXES = build_write_denied_prefixes(_HOME)
 
 
 def _get_safe_write_root() -> Optional[str]:
@@ -87,33 +62,12 @@ def _get_safe_write_root() -> Optional[str]:
     not on the static deny list.  Opt-in hardening for gateway/messaging
     deployments that should only touch a workspace checkout.
     """
-    root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
-    if not root:
-        return None
-    try:
-        return os.path.realpath(os.path.expanduser(root))
-    except Exception:
-        return None
+    return _shared_get_safe_write_root()
 
 
 def _is_write_denied(path: str) -> bool:
     """Return True if path is on the write deny list."""
-    resolved = os.path.realpath(os.path.expanduser(str(path)))
-
-    # 1) Static deny list
-    if resolved in WRITE_DENIED_PATHS:
-        return True
-    for prefix in WRITE_DENIED_PREFIXES:
-        if resolved.startswith(prefix):
-            return True
-
-    # 2) Optional safe-root sandbox
-    safe_root = _get_safe_write_root()
-    if safe_root:
-        if not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
-            return True
-
-    return False
+    return _shared_is_write_denied(path)
 
 
 # =============================================================================
@@ -317,6 +271,40 @@ LINTERS = {
 MAX_LINES = 2000
 MAX_LINE_LENGTH = 2000
 MAX_FILE_SIZE = 50 * 1024  # 50KB
+DEFAULT_READ_OFFSET = 1
+DEFAULT_READ_LIMIT = 500
+DEFAULT_SEARCH_OFFSET = 0
+DEFAULT_SEARCH_LIMIT = 50
+
+
+def _coerce_int(value: Any, default: int) -> int:
+    """Best-effort integer coercion for tool pagination inputs."""
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def normalize_read_pagination(offset: Any = DEFAULT_READ_OFFSET,
+                              limit: Any = DEFAULT_READ_LIMIT) -> tuple[int, int]:
+    """Return safe read_file pagination bounds.
+
+    Tool schemas declare minimum/maximum values, but not every caller or
+    provider enforces schemas before dispatch. Clamp here so invalid values
+    cannot leak into sed ranges like ``0,-1p``.
+    """
+    normalized_offset = max(1, _coerce_int(offset, DEFAULT_READ_OFFSET))
+    normalized_limit = _coerce_int(limit, DEFAULT_READ_LIMIT)
+    normalized_limit = max(1, min(normalized_limit, MAX_LINES))
+    return normalized_offset, normalized_limit
+
+
+def normalize_search_pagination(offset: Any = DEFAULT_SEARCH_OFFSET,
+                                limit: Any = DEFAULT_SEARCH_LIMIT) -> tuple[int, int]:
+    """Return safe search pagination bounds for shell head/tail pipelines."""
+    normalized_offset = max(0, _coerce_int(offset, DEFAULT_SEARCH_OFFSET))
+    normalized_limit = max(1, _coerce_int(limit, DEFAULT_SEARCH_LIMIT))
+    return normalized_offset, normalized_limit
 
 
 class ShellFileOperations(FileOperations):
@@ -507,8 +495,7 @@ class ShellFileOperations(FileOperations):
         # Expand ~ and other shell paths
         path = self._expand_path(path)
         
-        # Clamp limit
-        limit = min(limit, MAX_LINES)
+        offset, limit = normalize_read_pagination(offset, limit)
         
         # Check if file exists and get size (wc -c is POSIX, works on Linux + macOS)
         stat_cmd = f"wc -c < {self._escape_shell_arg(path)} 2>/dev/null"
@@ -784,12 +771,14 @@ class ShellFileOperations(FileOperations):
             content, old_string, new_string, replace_all
         )
         
-        if error:
-            return PatchResult(error=error)
-        
-        if match_count == 0:
-            return PatchResult(error=f"Could not find match for old_string in {path}")
-        
+        if error or match_count == 0:
+            err_msg = error or f"Could not find match for old_string in {path}"
+            try:
+                from tools.fuzzy_match import format_no_match_hint
+                err_msg += format_no_match_hint(err_msg, match_count, old_string, content)
+            except Exception:
+                pass
+            return PatchResult(error=err_msg)
         # Write back
         write_result = self.write_file(path, new_content)
         if write_result.error:
@@ -910,6 +899,8 @@ class ShellFileOperations(FileOperations):
         Returns:
             SearchResult with matches or file list
         """
+        offset, limit = normalize_search_pagination(offset, limit)
+
         # Expand ~ and other shell paths
         path = self._expand_path(path)
         
diff --git a/tools/file_state.py b/tools/file_state.py
new file mode 100644
index 000000000..f22a966e1
--- /dev/null
+++ b/tools/file_state.py
@@ -0,0 +1,332 @@
+"""Cross-agent file state coordination.
+
+Prevents mangled edits when concurrent subagents (same process, same
+filesystem) touch the same file. Complements the single-agent path-overlap
+check in ``run_agent._should_parallelize_tool_batch`` — this module catches
+the case where subagent B writes a file that subagent A already read, so
+A's next write would overwrite B's changes with stale content.
+
+Design
+------
+A process-wide singleton ``FileStateRegistry`` tracks, per resolved path:
+
+  * per-agent read stamps: {task_id: {path: (mtime, read_ts, partial)}}
+  * last writer globally: {path: (task_id, write_ts)}
+  * per-path ``threading.Lock`` for read→modify→write critical sections
+
+Three public hooks are used by the file tools:
+
+  * ``record_read(task_id, path, *, partial)`` — called by read_file
+  * ``note_write(task_id, path)`` — called after write_file / patch
+  * ``check_stale(task_id, path)`` — called BEFORE write_file / patch
+
+Plus ``lock_path(path)`` — a context-manager returning a per-path lock to
+wrap the whole read→modify→write block. And ``writes_since(task_id,
+since_ts, paths)`` for the subagent-completion reminder in delegate_tool.
+
+All methods are no-ops when ``HERMES_DISABLE_FILE_STATE_GUARD=1`` is set.
+
+This module is intentionally separate from ``_read_tracker`` in
+``file_tools.py`` — that tracker is per-task and handles consecutive-read
+loop detection, which is a different concern.
+"""
+from __future__ import annotations
+
+import os
+import threading
+import time
+from collections import defaultdict
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Tuple
+
+
+# ── Public stamp type ────────────────────────────────────────────────
+# (mtime, read_ts, partial).  partial=True when read_file returned a
+# windowed view (offset > 1 or limit < total_lines) — writes that happen
+# after a partial read should still warn so the model re-reads in full.
+ReadStamp = Tuple[float, float, bool]
+
+# Number of resolved-path entries retained per agent.  Bounded to keep
+# long sessions from accumulating unbounded state.  On overflow we drop
+# the oldest entries by insertion order.
+_MAX_PATHS_PER_AGENT = 4096
+
+# Global last-writer map cap.  Same policy.
+_MAX_GLOBAL_WRITERS = 4096
+
+
+class FileStateRegistry:
+    """Process-wide coordinator for cross-agent file edits."""
+
+    def __init__(self) -> None:
+        self._reads: Dict[str, Dict[str, ReadStamp]] = defaultdict(dict)
+        self._last_writer: Dict[str, Tuple[str, float]] = {}
+        self._path_locks: Dict[str, threading.Lock] = {}
+        self._meta_lock = threading.Lock()  # guards _path_locks
+        self._state_lock = threading.Lock()  # guards _reads + _last_writer
+
+    # ── Path lock management ────────────────────────────────────────
+    def _lock_for(self, resolved: str) -> threading.Lock:
+        with self._meta_lock:
+            lock = self._path_locks.get(resolved)
+            if lock is None:
+                lock = threading.Lock()
+                self._path_locks[resolved] = lock
+            return lock
+
+    @contextmanager
+    def lock_path(self, resolved: str):
+        """Acquire the per-path lock for a read→modify→write section.
+
+        Same process, same filesystem — threads on the same path serialize.
+        Different paths proceed in parallel.
+        """
+        lock = self._lock_for(resolved)
+        lock.acquire()
+        try:
+            yield
+        finally:
+            lock.release()
+
+    # ── Read/write accounting ───────────────────────────────────────
+    def record_read(
+        self,
+        task_id: str,
+        resolved: str,
+        *,
+        partial: bool = False,
+        mtime: Optional[float] = None,
+    ) -> None:
+        if _disabled():
+            return
+        if mtime is None:
+            try:
+                mtime = os.path.getmtime(resolved)
+            except OSError:
+                return
+        now = time.time()
+        with self._state_lock:
+            agent_reads = self._reads[task_id]
+            agent_reads[resolved] = (float(mtime), now, bool(partial))
+            _cap_dict(agent_reads, _MAX_PATHS_PER_AGENT)
+
+    def note_write(
+        self,
+        task_id: str,
+        resolved: str,
+        *,
+        mtime: Optional[float] = None,
+    ) -> None:
+        """Record a successful write.
+
+        Updates the global last-writer map AND this agent's own read stamp
+        (a write is an implicit read — the agent now knows the current
+        content).
+        """
+        if _disabled():
+            return
+        if mtime is None:
+            try:
+                mtime = os.path.getmtime(resolved)
+            except OSError:
+                return
+        now = time.time()
+        with self._state_lock:
+            self._last_writer[resolved] = (task_id, now)
+            _cap_dict(self._last_writer, _MAX_GLOBAL_WRITERS)
+            # Writer's own view is now up-to-date.
+            self._reads[task_id][resolved] = (float(mtime), now, False)
+            _cap_dict(self._reads[task_id], _MAX_PATHS_PER_AGENT)
+
+    def check_stale(self, task_id: str, resolved: str) -> Optional[str]:
+        """Return a model-facing warning if this write would be stale.
+
+        Three staleness classes, in order of severity:
+
+          1. Sibling subagent wrote this file after this agent's last read.
+          2. External/unknown change (mtime differs from our last read).
+          3. Agent never read the file (write-without-read).
+
+        Returns ``None`` when the write is safe.  Does not raise — callers
+        decide whether to block or warn.
+        """
+        if _disabled():
+            return None
+        with self._state_lock:
+            stamp = self._reads.get(task_id, {}).get(resolved)
+            last_writer = self._last_writer.get(resolved)
+
+        # Case 3: never read AND we have no write record — net-new file or
+        # first touch by this agent.  Let existing _check_sensitive_path
+        # and file-exists logic handle it; nothing to warn about here.
+        if stamp is None and last_writer is None:
+            return None
+
+        try:
+            current_mtime = os.path.getmtime(resolved)
+        except OSError:
+            # File doesn't exist — write will create it; not stale.
+            return None
+
+        # Case 1: sibling subagent modified after our last read.
+        if last_writer is not None:
+            writer_tid, writer_ts = last_writer
+            if writer_tid != task_id:
+                if stamp is None:
+                    return (
+                        f"{resolved} was modified by sibling subagent "
+                        f"{writer_tid!r} but this agent never read it. "
+                        "Read the file before writing to avoid overwriting "
+                        "the sibling's changes."
+                    )
+                read_ts = stamp[1]
+                if writer_ts > read_ts:
+                    return (
+                        f"{resolved} was modified by sibling subagent "
+                        f"{writer_tid!r} at {_fmt_ts(writer_ts)} — after "
+                        f"this agent's last read at {_fmt_ts(read_ts)}. "
+                        "Re-read the file before writing."
+                    )
+
+        # Case 2: external / unknown modification (mtime drifted).
+        if stamp is not None:
+            read_mtime, _read_ts, partial = stamp
+            if current_mtime != read_mtime:
+                return (
+                    f"{resolved} was modified since you last read it "
+                    "on disk (external edit or unrecorded writer). "
+                    "Re-read the file before writing."
+                )
+            if partial:
+                return (
+                    f"{resolved} was last read with offset/limit pagination "
+                    "(partial view). Re-read the whole file before "
+                    "overwriting it."
+                )
+
+        # Case 3b: agent truly never read the file.
+        if stamp is None:
+            return (
+                f"{resolved} was not read by this agent. "
+                "Read the file first so you can write an informed edit."
+            )
+
+        return None
+
+    # ── Reminder helper for delegate_tool ───────────────────────────
+    def writes_since(
+        self,
+        exclude_task_id: str,
+        since_ts: float,
+        paths: Iterable[str],
+    ) -> Dict[str, List[str]]:
+        """Return ``{writer_task_id: [paths]}`` for writes done after
+        ``since_ts`` by agents OTHER than ``exclude_task_id``.
+
+        Used by delegate_task to append a "subagent modified files the
+        parent previously read" reminder to the delegation result.
+        """
+        if _disabled():
+            return {}
+        paths_set = set(paths)
+        out: Dict[str, List[str]] = defaultdict(list)
+        with self._state_lock:
+            for p, (writer_tid, ts) in self._last_writer.items():
+                if writer_tid == exclude_task_id:
+                    continue
+                if ts < since_ts:
+                    continue
+                if p in paths_set:
+                    out[writer_tid].append(p)
+        return dict(out)
+
+    def known_reads(self, task_id: str) -> List[str]:
+        """Return the list of resolved paths this agent has read."""
+        if _disabled():
+            return []
+        with self._state_lock:
+            return list(self._reads.get(task_id, {}).keys())
+
+    # ── Testing hooks ───────────────────────────────────────────────
+    def clear(self) -> None:
+        """Reset all state.  Intended for tests only."""
+        with self._state_lock:
+            self._reads.clear()
+            self._last_writer.clear()
+        with self._meta_lock:
+            self._path_locks.clear()
+
+
+# ── Module-level singleton + helpers ─────────────────────────────────
+_registry = FileStateRegistry()
+
+
+def get_registry() -> FileStateRegistry:
+    return _registry
+
+
+def _disabled() -> bool:
+    # Re-read each call so tests can toggle via monkeypatch.setenv.
+    return os.environ.get("HERMES_DISABLE_FILE_STATE_GUARD", "").strip() == "1"
+
+
+def _fmt_ts(ts: float) -> str:
+    # Short relative wall-clock for error messages; avoids pulling in
+    # datetime formatting overhead on the hot path.
+    return time.strftime("%H:%M:%S", time.localtime(ts))
+
+
+def _cap_dict(d: dict, limit: int) -> None:
+    """Trim a dict to ``limit`` entries by dropping insertion-order oldest."""
+    over = len(d) - limit
+    if over <= 0:
+        return
+    # dict preserves insertion order (PY>=3.7) — pop the oldest keys.
+    it = iter(d)
+    for _ in range(over):
+        try:
+            d.pop(next(it))
+        except (StopIteration, KeyError):
+            break
+
+
+# ── Convenience wrappers (short names used at call sites) ────────────
+def record_read(task_id: str, resolved_or_path: str | Path, *, partial: bool = False) -> None:
+    _registry.record_read(task_id, str(resolved_or_path), partial=partial)
+
+
+def note_write(task_id: str, resolved_or_path: str | Path) -> None:
+    _registry.note_write(task_id, str(resolved_or_path))
+
+
+def check_stale(task_id: str, resolved_or_path: str | Path) -> Optional[str]:
+    return _registry.check_stale(task_id, str(resolved_or_path))
+
+
+def lock_path(resolved_or_path: str | Path):
+    return _registry.lock_path(str(resolved_or_path))
+
+
+def writes_since(
+    exclude_task_id: str,
+    since_ts: float,
+    paths: Iterable[str | Path],
+) -> Dict[str, List[str]]:
+    return _registry.writes_since(exclude_task_id, since_ts, [str(p) for p in paths])
+
+
+def known_reads(task_id: str) -> List[str]:
+    return _registry.known_reads(task_id)
+
+
+__all__ = [
+    "FileStateRegistry",
+    "get_registry",
+    "record_read",
+    "note_write",
+    "check_stale",
+    "lock_path",
+    "writes_since",
+    "known_reads",
+]
diff --git a/tools/file_tools.py b/tools/file_tools.py
index 3b2044c9d..3b6f45942 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -7,8 +7,16 @@ import logging
 import os
 import threading
 from pathlib import Path
+from typing import Optional
+
+from agent.file_safety import get_read_block_error
 from tools.binary_extensions import has_binary_extension
-from tools.file_operations import ShellFileOperations
+from tools.file_operations import (
+    ShellFileOperations,
+    normalize_read_pagination,
+    normalize_search_pagination,
+)
+from tools import file_state
 from agent.redact import redact_sensitive_text
 
 logger = logging.getLogger(__name__)
@@ -347,6 +355,8 @@ def clear_file_ops_cache(task_id: str = None):
 def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
     """Read a file with pagination and line numbers."""
     try:
+        offset, limit = normalize_read_pagination(offset, limit)
+
         # ── Device path guard ─────────────────────────────────────────
         # Block paths that would hang the process (infinite output,
         # blocking on input).  Pure path check — no I/O.
@@ -373,24 +383,9 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
 
         # ── Hermes internal path guard ────────────────────────────────
         # Prevent prompt injection via catalog or hub metadata files.
-        from hermes_constants import get_hermes_home as _get_hh
-        _hermes_home = _get_hh().resolve()
-        _blocked_dirs = [
-            _hermes_home / "skills" / ".hub" / "index-cache",
-            _hermes_home / "skills" / ".hub",
-        ]
-        for _blocked in _blocked_dirs:
-            try:
-                _resolved.relative_to(_blocked)
-                return json.dumps({
-                    "error": (
-                        f"Access denied: {path} is an internal Hermes cache file "
-                        "and cannot be read directly to prevent prompt injection. "
-                        "Use the skills_list or skill_view tools instead."
-                    )
-                })
-            except ValueError:
-                pass
+        block_error = get_read_block_error(path)
+        if block_error:
+            return json.dumps({"error": block_error})
 
         # ── Dedup check ───────────────────────────────────────────────
         # If we already read this exact (path, offset, limit) and the
@@ -495,6 +490,19 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
             # accumulate megabytes of dict/set state.  See _cap_read_tracker_data.
             _cap_read_tracker_data(task_data)
 
+        # Cross-agent file-state registry (separate from per-task read
+        # tracker above): records that THIS agent has read this path so
+        # write/patch can detect sibling-subagent writes that happened
+        # after our read.  Partial read when offset>1 or the read was
+        # truncated (large file with more content than limit covered).
+        # Outside the _read_tracker_lock so the registry's own locking
+        # isn't nested under ours.
+        try:
+            _partial = (offset > 1) or bool(result_dict.get("truncated"))
+            file_state.record_read(task_id, resolved_str, partial=_partial)
+        except Exception:
+            logger.debug("file_state.record_read failed", exc_info=True)
+
         if count >= 4:
             # Hard block: stop returning content to break the loop
             return json.dumps({
@@ -614,15 +622,43 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
     if sensitive_err:
         return tool_error(sensitive_err)
     try:
-        stale_warning = _check_file_staleness(path, task_id)
-        file_ops = _get_file_ops(task_id)
-        result = file_ops.write_file(path, content)
-        result_dict = result.to_dict()
-        if stale_warning:
-            result_dict["_warning"] = stale_warning
-        # Refresh the stored timestamp so consecutive writes by this
-        # task don't trigger false staleness warnings.
-        _update_read_timestamp(path, task_id)
+        # Resolve once for the registry lock + stale check.  Failures here
+        # fall back to the legacy path — write proceeds, per-task staleness
+        # check below still runs.
+        try:
+            _resolved = str(_resolve_path(path))
+        except Exception:
+            _resolved = None
+
+        if _resolved is None:
+            stale_warning = _check_file_staleness(path, task_id)
+            file_ops = _get_file_ops(task_id)
+            result = file_ops.write_file(path, content)
+            result_dict = result.to_dict()
+            if stale_warning:
+                result_dict["_warning"] = stale_warning
+            _update_read_timestamp(path, task_id)
+            return json.dumps(result_dict, ensure_ascii=False)
+
+        # Serialize the read→modify→write region per-path so concurrent
+        # subagents can't interleave on the same file.  Different paths
+        # remain fully parallel.
+        with file_state.lock_path(_resolved):
+            # Cross-agent staleness wins over per-task warning when both
+            # fire — its message names the sibling subagent.
+            cross_warning = file_state.check_stale(task_id, _resolved)
+            stale_warning = _check_file_staleness(path, task_id)
+            file_ops = _get_file_ops(task_id)
+            result = file_ops.write_file(path, content)
+            result_dict = result.to_dict()
+            effective_warning = cross_warning or stale_warning
+            if effective_warning:
+                result_dict["_warning"] = effective_warning
+            # Refresh stamps after the successful write so consecutive
+            # writes by this task don't trigger false staleness warnings.
+            _update_read_timestamp(path, task_id)
+            if not result_dict.get("error"):
+                file_state.note_write(task_id, _resolved)
         return json.dumps(result_dict, ensure_ascii=False)
     except Exception as e:
         if _is_expected_write_exception(e):
@@ -649,41 +685,78 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
         if sensitive_err:
             return tool_error(sensitive_err)
     try:
-        # Check staleness for all files this patch will touch.
-        stale_warnings = []
+        # Resolve paths for locking.  Ordered + deduplicated so concurrent
+        # callers lock in the same order — prevents deadlock on overlapping
+        # multi-file V4A patches.
+        _resolved_paths: list[str] = []
+        _seen: set[str] = set()
         for _p in _paths_to_check:
-            _sw = _check_file_staleness(_p, task_id)
-            if _sw:
-                stale_warnings.append(_sw)
+            try:
+                _r = str(_resolve_path(_p))
+            except Exception:
+                _r = None
+            if _r and _r not in _seen:
+                _resolved_paths.append(_r)
+                _seen.add(_r)
+        _resolved_paths.sort()
 
-        file_ops = _get_file_ops(task_id)
-        
-        if mode == "replace":
-            if not path:
-                return tool_error("path required")
-            if old_string is None or new_string is None:
-                return tool_error("old_string and new_string required")
-            result = file_ops.patch_replace(path, old_string, new_string, replace_all)
-        elif mode == "patch":
-            if not patch:
-                return tool_error("patch content required")
-            result = file_ops.patch_v4a(patch)
-        else:
-            return tool_error(f"Unknown mode: {mode}")
-        
-        result_dict = result.to_dict()
-        if stale_warnings:
-            result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
-        # Refresh stored timestamps for all successfully-patched paths so
-        # consecutive edits by this task don't trigger false warnings.
-        if not result_dict.get("error"):
+        # Acquire per-path locks in sorted order via ExitStack.  On single
+        # path this degenerates to one lock; on empty list (unresolvable)
+        # it's a no-op and execution falls through unchanged.
+        from contextlib import ExitStack
+        with ExitStack() as _locks:
+            for _r in _resolved_paths:
+                _locks.enter_context(file_state.lock_path(_r))
+
+            # Collect warnings — cross-agent registry first (names sibling),
+            # then per-task tracker as a fallback.
+            stale_warnings: list[str] = []
+            _path_to_resolved: dict[str, str] = {}
             for _p in _paths_to_check:
-                _update_read_timestamp(_p, task_id)
+                try:
+                    _r = str(_resolve_path(_p))
+                except Exception:
+                    _r = None
+                _path_to_resolved[_p] = _r
+                _cross = file_state.check_stale(task_id, _r) if _r else None
+                _sw = _cross or _check_file_staleness(_p, task_id)
+                if _sw:
+                    stale_warnings.append(_sw)
+
+            file_ops = _get_file_ops(task_id)
+
+            if mode == "replace":
+                if not path:
+                    return tool_error("path required")
+                if old_string is None or new_string is None:
+                    return tool_error("old_string and new_string required")
+                result = file_ops.patch_replace(path, old_string, new_string, replace_all)
+            elif mode == "patch":
+                if not patch:
+                    return tool_error("patch content required")
+                result = file_ops.patch_v4a(patch)
+            else:
+                return tool_error(f"Unknown mode: {mode}")
+
+            result_dict = result.to_dict()
+            if stale_warnings:
+                result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
+            # Refresh stored timestamps for all successfully-patched paths so
+            # consecutive edits by this task don't trigger false warnings.
+            if not result_dict.get("error"):
+                for _p in _paths_to_check:
+                    _update_read_timestamp(_p, task_id)
+                    _r = _path_to_resolved.get(_p)
+                    if _r:
+                        file_state.note_write(task_id, _r)
         result_json = json.dumps(result_dict, ensure_ascii=False)
         # Hint when old_string not found — saves iterations where the agent
         # retries with stale content instead of re-reading the file.
+        # Suppressed when patch_replace already attached a rich "Did you mean?"
+        # snippet (which is strictly more useful than the generic hint).
         if result_dict.get("error") and "Could not find" in str(result_dict["error"]):
-            result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]"
+            if "Did you mean one of these sections?" not in str(result_dict["error"]):
+                result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]"
         return result_json
     except Exception as e:
         return tool_error(str(e))
@@ -695,6 +768,8 @@ def search_tool(pattern: str, target: str = "content", path: str = ".",
                 task_id: str = "default") -> str:
     """Search for content or files."""
     try:
+        offset, limit = normalize_search_pagination(offset, limit)
+
         # Track searches to detect *consecutive* repeated search loops.
         # Include pagination args so users can page through truncated
         # results without tripping the repeated-search guard.
diff --git a/tools/fuzzy_match.py b/tools/fuzzy_match.py
index a9dc4272e..9a922cd9b 100644
--- a/tools/fuzzy_match.py
+++ b/tools/fuzzy_match.py
@@ -619,3 +619,86 @@ def _map_normalized_positions(original: str, normalized: str,
         original_matches.append((orig_start, min(orig_end, len(original))))
     
     return original_matches
+
+
+def find_closest_lines(old_string: str, content: str, context_lines: int = 2, max_results: int = 3) -> str:
+    """Find lines in content most similar to old_string for "did you mean?" feedback.
+
+    Returns a formatted string showing the closest matching lines with context,
+    or empty string if no useful match is found.
+    """
+    if not old_string or not content:
+        return ""
+
+    old_lines = old_string.splitlines()
+    content_lines = content.splitlines()
+
+    if not old_lines or not content_lines:
+        return ""
+
+    # Use first line of old_string as anchor for search
+    anchor = old_lines[0].strip()
+    if not anchor:
+        # Try second line if first is blank
+        candidates = [l.strip() for l in old_lines if l.strip()]
+        if not candidates:
+            return ""
+        anchor = candidates[0]
+
+    # Score each line in content by similarity to anchor
+    scored = []
+    for i, line in enumerate(content_lines):
+        stripped = line.strip()
+        if not stripped:
+            continue
+        ratio = SequenceMatcher(None, anchor, stripped).ratio()
+        if ratio > 0.3:
+            scored.append((ratio, i))
+
+    if not scored:
+        return ""
+
+    # Take top matches
+    scored.sort(key=lambda x: -x[0])
+    top = scored[:max_results]
+
+    parts = []
+    seen_ranges = set()
+    for _, line_idx in top:
+        start = max(0, line_idx - context_lines)
+        end = min(len(content_lines), line_idx + len(old_lines) + context_lines)
+        key = (start, end)
+        if key in seen_ranges:
+            continue
+        seen_ranges.add(key)
+        snippet = "\n".join(
+            f"{start + j + 1:4d}| {content_lines[start + j]}"
+            for j in range(end - start)
+        )
+        parts.append(snippet)
+
+    if not parts:
+        return ""
+
+    return "\n---\n".join(parts)
+
+
+def format_no_match_hint(error: Optional[str], match_count: int,
+                         old_string: str, content: str) -> str:
+    """Return a '\\n\\nDid you mean...' snippet for plain no-match errors.
+
+    Gated so the hint only fires for actual "old_string not found" failures.
+    Ambiguous-match ("Found N matches"), escape-drift, and identical-strings
+    errors all have ``match_count == 0`` but a "did you mean?" snippet would
+    be misleading — those failed for unrelated reasons.
+
+    Returns an empty string when there's nothing useful to append.
+    """
+    if match_count != 0:
+        return ""
+    if not error or not error.startswith("Could not find"):
+        return ""
+    hint = find_closest_lines(old_string, content)
+    if not hint:
+        return ""
+    return "\n\nDid you mean one of these sections?\n" + hint
diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py
index cf1003d12..9631e74ee 100644
--- a/tools/image_generation_tool.py
+++ b/tools/image_generation_tool.py
@@ -33,7 +33,11 @@ import fal_client
 
 from tools.debug_helpers import DebugSession
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
-from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway
+from tools.tool_backend_helpers import (
+    fal_key_is_configured,
+    managed_nous_tools_enabled,
+    prefers_gateway,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -184,6 +188,38 @@ FAL_MODELS: Dict[str, Dict[str, Any]] = {
         },
         "upscale": False,
     },
+    "fal-ai/gpt-image-2": {
+        "display": "GPT Image 2",
+        "speed": "~20s",
+        "strengths": "SOTA text rendering + CJK, world-aware photorealism",
+        "price": "$0.04–0.06/image",
+        # GPT Image 2 uses FAL's standard preset enum (unlike 1.5's literal
+        # dimensions). We map to the 4:3 variants — the 16:9 presets
+        # (1024x576) fall below GPT-Image-2's 655,360 min-pixel requirement
+        # and would be rejected. 4:3 keeps us above the minimum on all
+        # three aspect ratios.
+        "size_style": "image_size_preset",
+        "sizes": {
+            "landscape": "landscape_4_3",   # 1024x768
+            "square": "square_hd",            # 1024x1024
+            "portrait": "portrait_4_3",       # 768x1024
+        },
+        "defaults": {
+            # Same quality pinning as gpt-image-1.5: medium keeps Nous
+            # Portal billing predictable. "high" is 3-4x the per-image
+            # cost at the same size; "low" is too rough for production use.
+            "quality": "medium",
+            "num_images": 1,
+            "output_format": "png",
+        },
+        "supports": {
+            "prompt", "image_size", "quality", "num_images", "output_format",
+            "sync_mode",
+            # openai_api_key (BYOK) intentionally omitted — all users go
+            # through the shared FAL billing path.
+        },
+        "upscale": False,
+    },
     "fal-ai/ideogram/v3": {
         "display": "Ideogram V3",
         "speed": "~5s",
@@ -286,7 +322,7 @@ _managed_fal_client_lock = threading.Lock()
 def _resolve_managed_fal_gateway():
     """Return managed fal-queue gateway config when the user prefers the gateway
     or direct FAL credentials are absent."""
-    if os.getenv("FAL_KEY") and not prefers_gateway("image_gen"):
+    if fal_key_is_configured() and not prefers_gateway("image_gen"):
         return None
     return resolve_managed_tool_gateway("fal-queue")
 
@@ -623,7 +659,7 @@ def image_generate_tool(
         if not prompt or not isinstance(prompt, str) or len(prompt.strip()) == 0:
             raise ValueError("Prompt is required and must be a non-empty string")
 
-        if not (os.getenv("FAL_KEY") or _resolve_managed_fal_gateway()):
+        if not (fal_key_is_configured() or _resolve_managed_fal_gateway()):
             message = "FAL_KEY environment variable not set"
             if managed_nous_tools_enabled():
                 message += " and managed FAL gateway is unavailable"
@@ -734,18 +770,45 @@ def image_generate_tool(
 
 def check_fal_api_key() -> bool:
     """True if the FAL.ai API key (direct or managed gateway) is available."""
-    return bool(os.getenv("FAL_KEY") or _resolve_managed_fal_gateway())
+    return bool(fal_key_is_configured() or _resolve_managed_fal_gateway())
 
 
 def check_image_generation_requirements() -> bool:
-    """True if FAL credentials and fal_client SDK are both available."""
+    """True if any image gen backend is available.
+
+    Providers are considered in this order:
+
+    1. The in-tree FAL backend (FAL_KEY or managed gateway).
+    2. Any plugin-registered provider whose ``is_available()`` returns True.
+
+    Plugins win only when the in-tree FAL path is NOT ready, which matches
+    the historical behavior: shipping hermes with a FAL key configured
+    should still expose the tool. The active selection among ready
+    providers is resolved per-call by ``image_gen.provider``.
+    """
     try:
-        if not check_fal_api_key():
-            return False
-        import fal_client  # noqa: F401 — SDK presence check
-        return True
+        if check_fal_api_key():
+            fal_client  # noqa: F401 — SDK presence check
+            return True
     except ImportError:
-        return False
+        pass
+
+    # Probe plugin providers. Discovery is idempotent and cheap.
+    try:
+        from agent.image_gen_registry import list_providers
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        for provider in list_providers():
+            try:
+                if provider.is_available():
+                    return True
+            except Exception:
+                continue
+    except Exception:
+        pass
+
+    return False
 
 
 # ---------------------------------------------------------------------------
@@ -791,10 +854,11 @@ from tools.registry import registry, tool_error
 IMAGE_GENERATE_SCHEMA = {
     "name": "image_generate",
     "description": (
-        "Generate high-quality images from text prompts using FAL.ai. "
-        "The underlying model is user-configured (default: FLUX 2 Klein 9B, "
-        "sub-1s generation) and is not selectable by the agent. Returns a "
-        "single image URL. Display it using markdown: ![description](URL)"
+        "Generate high-quality images from text prompts. The underlying "
+        "backend (FAL, OpenAI, etc.) and model are user-configured and not "
+        "selectable by the agent. Returns either a URL or an absolute file "
+        "path in the `image` field; display it with markdown "
+        "![description](url-or-path) and the gateway will deliver it."
     ),
     "parameters": {
         "type": "object",
@@ -815,13 +879,104 @@ IMAGE_GENERATE_SCHEMA = {
 }
 
 
+def _read_configured_image_provider():
+    """Return the value of ``image_gen.provider`` from config.yaml, or None.
+
+    We only consult the plugin registry when this is explicitly set — an
+    unset value keeps users on the legacy in-tree FAL path even when other
+    providers happen to be registered (e.g. a user has OPENAI_API_KEY set
+    for other features but never asked for OpenAI image gen).
+    """
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        if isinstance(section, dict):
+            value = section.get("provider")
+            if isinstance(value, str) and value.strip():
+                return value.strip()
+    except Exception as exc:
+        logger.debug("Could not read image_gen.provider: %s", exc)
+    return None
+
+
+def _dispatch_to_plugin_provider(prompt: str, aspect_ratio: str):
+    """Route the call to a plugin-registered provider when one is selected.
+
+    Returns a JSON string on dispatch, or ``None`` to fall through to the
+    built-in FAL path.
+
+    Dispatch only fires when ``image_gen.provider`` is explicitly set AND
+    it does not point to ``fal`` (FAL still lives in-tree in this PR;
+    a later PR ports it into ``plugins/image_gen/fal/``). Any other value
+    that matches a registered plugin provider wins.
+    """
+    configured = _read_configured_image_provider()
+    if not configured or configured == "fal":
+        return None
+
+    try:
+        # Import locally so plugin discovery isn't triggered just by
+        # importing this module (tests rely on that).
+        from agent.image_gen_registry import get_provider
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        provider = get_provider(configured)
+    except Exception as exc:
+        logger.debug("image_gen plugin dispatch skipped: %s", exc)
+        return None
+
+    if provider is None:
+        return json.dumps({
+            "success": False,
+            "image": None,
+            "error": (
+                f"image_gen.provider='{configured}' is set but no plugin "
+                f"registered that name. Run `hermes plugins list` to see "
+                f"available image gen backends."
+            ),
+            "error_type": "provider_not_registered",
+        })
+
+    try:
+        result = provider.generate(prompt=prompt, aspect_ratio=aspect_ratio)
+    except Exception as exc:
+        logger.warning(
+            "Image gen provider '%s' raised: %s",
+            getattr(provider, "name", "?"), exc,
+        )
+        return json.dumps({
+            "success": False,
+            "image": None,
+            "error": f"Provider '{getattr(provider, 'name', '?')}' error: {exc}",
+            "error_type": "provider_exception",
+        })
+    if not isinstance(result, dict):
+        return json.dumps({
+            "success": False,
+            "image": None,
+            "error": "Provider returned a non-dict result",
+            "error_type": "provider_contract",
+        })
+    return json.dumps(result)
+
+
 def _handle_image_generate(args, **kw):
     prompt = args.get("prompt", "")
     if not prompt:
         return tool_error("prompt is required for image generation")
+    aspect_ratio = args.get("aspect_ratio", DEFAULT_ASPECT_RATIO)
+
+    # Route to a plugin-registered provider if one is active (and it's
+    # not the in-tree FAL path).
+    dispatched = _dispatch_to_plugin_provider(prompt, aspect_ratio)
+    if dispatched is not None:
+        return dispatched
+
     return image_generate_tool(
         prompt=prompt,
-        aspect_ratio=args.get("aspect_ratio", DEFAULT_ASPECT_RATIO),
+        aspect_ratio=aspect_ratio,
     )
 
 
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index e5e856d0b..aecc0cc23 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -1249,9 +1249,47 @@ _servers: Dict[str, MCPServerTask] = {}
 # _CIRCUIT_BREAKER_THRESHOLD consecutive failures, the handler returns
 # a "server unreachable" message that tells the model to stop retrying,
 # preventing the 90-iteration burn loop described in #10447.
-# Reset to 0 on any successful call.
+#
+# State machine:
+#   closed    — error count below threshold; all calls go through.
+#   open      — threshold reached; calls short-circuit until the
+#               cooldown elapses.
+#   half-open — cooldown elapsed; the next call is a probe that
+#               actually hits the session. Probe success → closed.
+#               Probe failure → reopens (cooldown re-armed).
+#
+# ``_server_breaker_opened_at`` records the monotonic timestamp when
+# the breaker most recently transitioned into the open state. Use the
+# ``_bump_server_error`` / ``_reset_server_error`` helpers to mutate
+# this state — they keep the count and timestamp in sync.
 _server_error_counts: Dict[str, int] = {}
+_server_breaker_opened_at: Dict[str, float] = {}
 _CIRCUIT_BREAKER_THRESHOLD = 3
+_CIRCUIT_BREAKER_COOLDOWN_SEC = 60.0
+
+
+def _bump_server_error(server_name: str) -> None:
+    """Increment the consecutive-failure count for ``server_name``.
+
+    When the count crosses :data:`_CIRCUIT_BREAKER_THRESHOLD`, stamp the
+    breaker-open timestamp so the cooldown clock starts (or re-starts,
+    for probe failures in the half-open state).
+    """
+    n = _server_error_counts.get(server_name, 0) + 1
+    _server_error_counts[server_name] = n
+    if n >= _CIRCUIT_BREAKER_THRESHOLD:
+        _server_breaker_opened_at[server_name] = time.monotonic()
+
+
+def _reset_server_error(server_name: str) -> None:
+    """Fully close the breaker for ``server_name``.
+
+    Clears both the failure count and the breaker-open timestamp. Call
+    this on any unambiguous success signal (successful tool call,
+    successful reconnect, manual /mcp refresh).
+    """
+    _server_error_counts[server_name] = 0
+    _server_breaker_opened_at.pop(server_name, None)
 
 # ---------------------------------------------------------------------------
 # Auth-failure detection helpers (Task 6 of MCP OAuth consolidation)
@@ -1391,15 +1429,25 @@ def _handle_auth_error_and_retry(
                         break
                     time.sleep(0.25)
 
+        # A successful OAuth recovery is independent evidence that the
+        # server is viable again, so close the circuit breaker here —
+        # not only on retry success. Without this, a reconnect
+        # followed by a failing retry would leave the breaker pinned
+        # above threshold forever (the retry-exception branch below
+        # bumps the count again).  The post-reset retry still goes
+        # through _bump_server_error on failure, so a genuinely broken
+        # server will re-trip the breaker as normal.
+        _reset_server_error(server_name)
+
         try:
             result = retry_call()
             try:
                 parsed = json.loads(result)
                 if "error" not in parsed:
-                    _server_error_counts[server_name] = 0
+                    _reset_server_error(server_name)
                     return result
             except (json.JSONDecodeError, TypeError):
-                _server_error_counts[server_name] = 0
+                _reset_server_error(server_name)
                 return result
         except Exception as retry_exc:
             logger.warning(
@@ -1410,7 +1458,7 @@ def _handle_auth_error_and_retry(
     # No recovery available, or retry also failed: surface a structured
     # needs_reauth error. Bumps the circuit breaker so the model stops
     # retrying the tool.
-    _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
+    _bump_server_error(server_name)
     return json.dumps({
         "error": (
             f"MCP server '{server_name}' requires re-authentication. "
@@ -1540,7 +1588,6 @@ def _interrupted_call_result() -> str:
 def _interpolate_env_vars(value):
     """Recursively resolve ``${VAR}`` placeholders from ``os.environ``."""
     if isinstance(value, str):
-        import re
         def _replace(m):
             return os.environ.get(m.group(1), m.group(0))
         return re.sub(r"\$\{([^}]+)\}", _replace, value)
@@ -1615,20 +1662,33 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
         # Circuit breaker: if this server has failed too many times
         # consecutively, short-circuit with a clear message so the model
         # stops retrying and uses alternative approaches (#10447).
+        #
+        # Once the cooldown elapses, the breaker transitions to
+        # half-open: we let the *next* call through as a probe. On
+        # success the success-path below resets the breaker; on
+        # failure the error paths below bump the count again, which
+        # re-stamps the open-time via _bump_server_error (re-arming
+        # the cooldown).
         if _server_error_counts.get(server_name, 0) >= _CIRCUIT_BREAKER_THRESHOLD:
-            return json.dumps({
-                "error": (
-                    f"MCP server '{server_name}' is unreachable after "
-                    f"{_CIRCUIT_BREAKER_THRESHOLD} consecutive failures. "
-                    f"Do NOT retry this tool — use alternative approaches "
-                    f"or ask the user to check the MCP server."
-                )
-            }, ensure_ascii=False)
+            opened_at = _server_breaker_opened_at.get(server_name, 0.0)
+            age = time.monotonic() - opened_at
+            if age < _CIRCUIT_BREAKER_COOLDOWN_SEC:
+                remaining = max(1, int(_CIRCUIT_BREAKER_COOLDOWN_SEC - age))
+                return json.dumps({
+                    "error": (
+                        f"MCP server '{server_name}' is unreachable after "
+                        f"{_server_error_counts[server_name]} consecutive "
+                        f"failures. Auto-retry available in ~{remaining}s. "
+                        f"Do NOT retry this tool yet — use alternative "
+                        f"approaches or ask the user to check the MCP server."
+                    )
+                }, ensure_ascii=False)
+            # Cooldown elapsed → fall through as a half-open probe.
 
         with _lock:
             server = _servers.get(server_name)
         if not server or not server.session:
-            _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
+            _bump_server_error(server_name)
             return json.dumps({
                 "error": f"MCP server '{server_name}' is not connected"
             }, ensure_ascii=False)
@@ -1677,11 +1737,11 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
             try:
                 parsed = json.loads(result)
                 if "error" in parsed:
-                    _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
+                    _bump_server_error(server_name)
                 else:
-                    _server_error_counts[server_name] = 0  # success — reset
+                    _reset_server_error(server_name)  # success — reset
             except (json.JSONDecodeError, TypeError):
-                _server_error_counts[server_name] = 0  # non-JSON = success
+                _reset_server_error(server_name)  # non-JSON = success
             return result
         except InterruptedError:
             return _interrupted_call_result()
@@ -1696,7 +1756,7 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
             if recovered is not None:
                 return recovered
 
-            _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
+            _bump_server_error(server_name)
             logger.error(
                 "MCP tool %s/%s call failed: %s",
                 server_name, tool_name, exc,
diff --git a/tools/patch_parser.py b/tools/patch_parser.py
index 0c961083c..d2a298fc9 100644
--- a/tools/patch_parser.py
+++ b/tools/patch_parser.py
@@ -290,10 +290,16 @@ def _validate_operations(
                 )
                 if count == 0:
                     label = f"'{hunk.context_hint}'" if hunk.context_hint else "(no hint)"
-                    errors.append(
+                    msg = (
                         f"{op.file_path}: hunk {label} not found"
                         + (f" — {match_error}" if match_error else "")
                     )
+                    try:
+                        from tools.fuzzy_match import format_no_match_hint
+                        msg += format_no_match_hint(match_error, count, search_pattern, simulated)
+                    except Exception:
+                        pass
+                    errors.append(msg)
                 else:
                     # Advance simulation so subsequent hunks validate correctly.
                     # Reuse the result from the call above — no second fuzzy run.
@@ -537,7 +543,13 @@ def _apply_update(op: PatchOperation, file_ops: Any) -> Tuple[bool, str]:
                             error = None
                 
                 if error:
-                    return False, f"Could not apply hunk: {error}"
+                    err_msg = f"Could not apply hunk: {error}"
+                    try:
+                        from tools.fuzzy_match import format_no_match_hint
+                        err_msg += format_no_match_hint(error, 0, search_pattern, new_content)
+                    except Exception:
+                        pass
+                    return False, err_msg
         else:
             # Addition-only hunk (no context or removed lines).
             # Insert at the location indicated by the context hint, or at end of file.
diff --git a/tools/process_registry.py b/tools/process_registry.py
index 92f3db2a1..ec510cae0 100644
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@@ -1167,32 +1167,31 @@ PROCESS_SCHEMA = {
 
 
 def _handle_process(args, **kw):
-    import json as _json
     task_id = kw.get("task_id")
     action = args.get("action", "")
     # Coerce to string — some models send session_id as an integer
     session_id = str(args.get("session_id", "")) if args.get("session_id") is not None else ""
 
     if action == "list":
-        return _json.dumps({"processes": process_registry.list_sessions(task_id=task_id)}, ensure_ascii=False)
+        return json.dumps({"processes": process_registry.list_sessions(task_id=task_id)}, ensure_ascii=False)
     elif action in ("poll", "log", "wait", "kill", "write", "submit", "close"):
         if not session_id:
             return tool_error(f"session_id is required for {action}")
         if action == "poll":
-            return _json.dumps(process_registry.poll(session_id), ensure_ascii=False)
+            return json.dumps(process_registry.poll(session_id), ensure_ascii=False)
         elif action == "log":
-            return _json.dumps(process_registry.read_log(
+            return json.dumps(process_registry.read_log(
                 session_id, offset=args.get("offset", 0), limit=args.get("limit", 200)), ensure_ascii=False)
         elif action == "wait":
-            return _json.dumps(process_registry.wait(session_id, timeout=args.get("timeout")), ensure_ascii=False)
+            return json.dumps(process_registry.wait(session_id, timeout=args.get("timeout")), ensure_ascii=False)
         elif action == "kill":
-            return _json.dumps(process_registry.kill_process(session_id), ensure_ascii=False)
+            return json.dumps(process_registry.kill_process(session_id), ensure_ascii=False)
         elif action == "write":
-            return _json.dumps(process_registry.write_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False)
+            return json.dumps(process_registry.write_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False)
         elif action == "submit":
-            return _json.dumps(process_registry.submit_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False)
+            return json.dumps(process_registry.submit_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False)
         elif action == "close":
-            return _json.dumps(process_registry.close_stdin(session_id), ensure_ascii=False)
+            return json.dumps(process_registry.close_stdin(session_id), ensure_ascii=False)
     return tool_error(f"Unknown process action: {action}. Use: list, poll, log, wait, kill, write, submit, close")
 
 
diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index dacc7e17a..19da4f55a 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -226,7 +226,6 @@ def _handle_send(args):
         # Weixin can be configured purely via .env; synthesize a pconfig so
         # send_message and cron delivery work without a gateway.yaml entry.
         if platform_name == "weixin":
-            import os
             wx_token = os.getenv("WEIXIN_TOKEN", "").strip()
             wx_account = os.getenv("WEIXIN_ACCOUNT_ID", "").strip()
             if wx_token and wx_account:
@@ -254,7 +253,6 @@ def _handle_send(args):
     if not chat_id:
         home = config.get_home_channel(platform)
         if not home and platform_name == "weixin":
-            import os
             wx_home = os.getenv("WEIXIN_HOME_CHANNEL", "").strip()
             if wx_home:
                 from gateway.config import HomeChannel
diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py
index 33d3976ea..493b434c5 100644
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@@ -449,9 +449,15 @@ def _patch_skill(
     if match_error:
         # Show a short preview of the file so the model can self-correct
         preview = content[:500] + ("..." if len(content) > 500 else "")
+        err_msg = match_error
+        try:
+            from tools.fuzzy_match import format_no_match_hint
+            err_msg += format_no_match_hint(match_error, match_count, old_string, content)
+        except Exception:
+            pass
         return {
             "success": False,
-            "error": match_error,
+            "error": err_msg,
             "file_preview": preview,
         }
 
diff --git a/tools/skills_tool.py b/tools/skills_tool.py
index ed8c8cfb0..40a6990ea 100644
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@@ -111,7 +111,7 @@ def load_env() -> Dict[str, str]:
     if not env_path.exists():
         return env_vars
 
-    with env_path.open() as f:
+    with env_path.open(encoding="utf-8") as f:
         for line in f:
             line = line.strip()
             if line and not line.startswith("#") and "=" in line:
@@ -507,14 +507,33 @@ def _get_disabled_skill_names() -> Set[str]:
     return get_disabled_skill_names()
 
 
+def _get_session_platform() -> str:
+    """Resolve the current platform from gateway session context.
+
+    Mirrors the platform-resolution logic in
+    ``agent.skill_utils.get_disabled_skill_names`` so that
+    ``_is_skill_disabled`` respects ``HERMES_SESSION_PLATFORM``.
+    """
+    try:
+        from gateway.session_context import get_session_env
+        return get_session_env("HERMES_SESSION_PLATFORM") or ""
+    except Exception:
+        return ""
+
+
 def _is_skill_disabled(name: str, platform: str = None) -> bool:
-    """Check if a skill is disabled in config."""
-    import os
+    """Check if a skill is disabled in config.
+
+    Resolves the active platform from (in order of precedence):
+    1. Explicit ``platform`` argument
+    2. ``HERMES_PLATFORM`` environment variable
+    3. ``HERMES_SESSION_PLATFORM`` from gateway session context
+    """
     try:
         from hermes_cli.config import load_config
         config = load_config()
         skills_cfg = config.get("skills", {})
-        resolved_platform = platform or os.getenv("HERMES_PLATFORM")
+        resolved_platform = platform or os.getenv("HERMES_PLATFORM") or _get_session_platform()
         if resolved_platform:
             platform_disabled = skills_cfg.get("platform_disabled", {}).get(resolved_platform)
             if platform_disabled is not None:
@@ -976,8 +995,7 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
                 _warnings.append(f"skill file is outside the trusted skills directory (~/.hermes/skills/): {skill_md}")
             if _injection_detected:
                 _warnings.append("skill content contains patterns that may indicate prompt injection")
-            import logging as _logging
-            _logging.getLogger(__name__).warning("Skill security warning for '%s': %s", name, "; ".join(_warnings))
+            logging.getLogger(__name__).warning("Skill security warning for '%s': %s", name, "; ".join(_warnings))
 
         parsed_frontmatter: Dict[str, Any] = {}
         try:
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 732b50b14..22c8dcbc6 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -72,11 +72,48 @@ from tools.tool_backend_helpers import (
 )
 
 
+def _safe_parse_import_env(
+    name: str,
+    default: Any,
+    converter,
+    type_label: str,
+):
+    """Parse module-level numeric env vars without breaking import.
+
+    Terminal tool is imported by CLI, ACP, tests, and tool discovery. A single
+    malformed env var must not make the whole module unloadable at import time.
+    """
+    raw = os.getenv(name)
+    if raw is None or raw == "":
+        return default
+    try:
+        return converter(raw)
+    except (TypeError, ValueError):
+        logger.warning(
+            "Invalid value for %s: %r (expected %s). Falling back to %r.",
+            name,
+            raw,
+            type_label,
+            default,
+        )
+        return default
+
+
 # Hard cap on foreground timeout; override via TERMINAL_MAX_FOREGROUND_TIMEOUT env var.
-FOREGROUND_MAX_TIMEOUT = int(os.getenv("TERMINAL_MAX_FOREGROUND_TIMEOUT", "600"))
+FOREGROUND_MAX_TIMEOUT = _safe_parse_import_env(
+    "TERMINAL_MAX_FOREGROUND_TIMEOUT",
+    600,
+    int,
+    "integer",
+)
 
 # Disk usage warning threshold (in GB)
-DISK_USAGE_WARNING_THRESHOLD_GB = float(os.getenv("TERMINAL_DISK_WARNING_GB", "500"))
+DISK_USAGE_WARNING_THRESHOLD_GB = _safe_parse_import_env(
+    "TERMINAL_DISK_WARNING_GB",
+    500.0,
+    float,
+    "number",
+)
 
 
 def _check_disk_usage_warning():
@@ -114,22 +151,44 @@ _cached_sudo_password: str = ""
 # Optional UI callbacks for interactive prompts. When set, these are called
 # instead of the default /dev/tty or input() readers. The CLI registers these
 # so prompts route through prompt_toolkit's event loop.
-#   _sudo_password_callback() -> str  (return password or "" to skip)
-#   _approval_callback(command, description) -> str  ("once"/"session"/"always"/"deny")
-_sudo_password_callback = None
-_approval_callback = None
+# Callback slots used by the approval prompt and sudo password prompt
+# routines. Stored in thread-local state so overlapping ACP sessions —
+# each running in its own ThreadPoolExecutor thread — don't stomp on
+# each other's callbacks. See GHSA-qg5c-hvr5-hjgr.
+#
+# CLI mode is single-threaded, so each thread (the only one) holds its
+# own callback exactly like before. Gateway mode resolves approvals via
+# the per-session queue in tools.approval, not through these callbacks,
+# so it's unaffected.
+import threading
+_callback_tls = threading.local()
+
+
+def _get_sudo_password_callback():
+    return getattr(_callback_tls, "sudo_password", None)
+
+
+def _get_approval_callback():
+    return getattr(_callback_tls, "approval", None)
 
 
 def set_sudo_password_callback(cb):
-    """Register a callback for sudo password prompts (used by CLI)."""
-    global _sudo_password_callback
-    _sudo_password_callback = cb
+    """Register a callback for sudo password prompts (used by CLI).
+
+    Per-thread scope — ACP sessions that run concurrently in a
+    ThreadPoolExecutor each have their own callback slot.
+    """
+    _callback_tls.sudo_password = cb
 
 
 def set_approval_callback(cb):
-    """Register a callback for dangerous command approval prompts (used by CLI)."""
-    global _approval_callback
-    _approval_callback = cb
+    """Register a callback for dangerous command approval prompts.
+
+    Per-thread scope — ACP sessions that run concurrently in a
+    ThreadPoolExecutor each have their own callback slot. See
+    GHSA-qg5c-hvr5-hjgr.
+    """
+    _callback_tls.approval = cb
 
 # =============================================================================
 # Dangerous Command Approval System
@@ -144,7 +203,7 @@ from tools.approval import (
 def _check_all_guards(command: str, env_type: str) -> dict:
     """Delegate to consolidated guard (tirith + dangerous cmd) with CLI callback."""
     return _check_all_guards_impl(command, env_type,
-                                  approval_callback=_approval_callback)
+                                  approval_callback=_get_approval_callback())
 
 
 # Allowlist: characters that can legitimately appear in directory paths.
@@ -217,12 +276,12 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
     directly from /dev/tty with echo disabled.
     """
     import sys
-    import time as time_module
     
     # Use the registered callback when available (prompt_toolkit-compatible)
-    if _sudo_password_callback is not None:
+    _sudo_cb = _get_sudo_password_callback()
+    if _sudo_cb is not None:
         try:
-            return _sudo_password_callback() or ""
+            return _sudo_cb() or ""
         except Exception:
             return ""
 
@@ -278,7 +337,7 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
     
     try:
         os.environ["HERMES_SPINNER_PAUSE"] = "1"
-        time_module.sleep(0.2)
+        time.sleep(0.2)
         
         print()
         print("┌" + "─" * 58 + "┐")
@@ -1488,6 +1547,8 @@ def terminal_tool(
                                 "modal_mode": config.get("modal_mode", "auto"),
                                 "docker_volumes": config.get("docker_volumes", []),
                                 "docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False),
+                                "docker_forward_env": config.get("docker_forward_env", []),
+                                "docker_env": config.get("docker_env", {}),
                             }
 
                         local_config = None
diff --git a/tools/tool_backend_helpers.py b/tools/tool_backend_helpers.py
index a770fe747..810a51c63 100644
--- a/tools/tool_backend_helpers.py
+++ b/tools/tool_backend_helpers.py
@@ -119,3 +119,24 @@ def prefers_gateway(config_section: str) -> bool:
     except Exception:
         pass
     return False
+
+
+def fal_key_is_configured() -> bool:
+    """Return True when FAL_KEY is set to a non-whitespace value.
+
+    Consults both ``os.environ`` and ``~/.hermes/.env`` (via
+    ``hermes_cli.config.get_env_value`` when available) so tool-side
+    checks and CLI setup-time checks agree.  A whitespace-only value
+    is treated as unset everywhere.
+    """
+    value = os.getenv("FAL_KEY")
+    if value is None:
+        # Fall back to the .env file for CLI paths that may run before
+        # dotenv is loaded into os.environ.
+        try:
+            from hermes_cli.config import get_env_value
+
+            value = get_env_value("FAL_KEY")
+        except Exception:
+            value = None
+    return bool(value and value.strip())
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index adc6524c4..a7ca57fab 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -79,6 +79,12 @@ def _import_sounddevice():
     return sd
 
 
+def _import_kittentts():
+    """Lazy import KittenTTS. Returns the class or raises ImportError."""
+    from kittentts import KittenTTS
+    return KittenTTS
+
+
 # ===========================================================================
 # Defaults
 # ===========================================================================
@@ -88,6 +94,8 @@ DEFAULT_ELEVENLABS_VOICE_ID = "pNInz6obpgDQGcFmaJgB"  # Adam
 DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2"
 DEFAULT_ELEVENLABS_STREAMING_MODEL_ID = "eleven_flash_v2_5"
 DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts"
+DEFAULT_KITTENTTS_MODEL = "KittenML/kitten-tts-nano-0.8-int8"  # 25MB
+DEFAULT_KITTENTTS_VOICE = "Jasper"
 DEFAULT_OPENAI_VOICE = "alloy"
 DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1"
 DEFAULT_MINIMAX_MODEL = "speech-2.8-hd"
@@ -113,7 +121,80 @@ def _get_default_output_dir() -> str:
     return str(get_hermes_dir("cache/audio", "audio_cache"))
 
 DEFAULT_OUTPUT_DIR = _get_default_output_dir()
-MAX_TEXT_LENGTH = 4000
+
+# ---------------------------------------------------------------------------
+# Per-provider input-character limits (from official provider docs).
+# A single global cap was wrong: OpenAI is 4096, xAI is 15k, MiniMax is 10k,
+# ElevenLabs is model-dependent (5k / 10k / 30k / 40k), Gemini caps at ~8k
+# input tokens.  Users can override any of these via
+# ``tts.<provider>.max_text_length`` in config.yaml.
+# ---------------------------------------------------------------------------
+PROVIDER_MAX_TEXT_LENGTH: Dict[str, int] = {
+    "edge": 5000,         # edge-tts practical sync limit
+    "openai": 4096,       # https://platform.openai.com/docs/guides/text-to-speech
+    "xai": 15000,         # https://docs.x.ai/developers/model-capabilities/audio/text-to-speech
+    "minimax": 10000,     # https://platform.minimax.io/docs/api-reference/speech-t2a-http (sync)
+    "mistral": 4000,      # conservative; no published per-request cap
+    "gemini": 5000,       # Gemini TTS caps at ~8k input tokens / ~655s audio
+    "elevenlabs": 10000,  # fallback when model-aware lookup can't resolve (multilingual_v2)
+    "neutts": 2000,       # local model, quality falls off on long text
+    "kittentts": 2000,    # local 25MB model
+}
+
+# ElevenLabs caps vary by model_id. https://elevenlabs.io/docs/overview/models
+ELEVENLABS_MODEL_MAX_TEXT_LENGTH: Dict[str, int] = {
+    "eleven_v3": 5000,
+    "eleven_ttv_v3": 5000,
+    "eleven_multilingual_v2": 10000,
+    "eleven_multilingual_v1": 10000,
+    "eleven_english_sts_v2": 10000,
+    "eleven_english_sts_v1": 10000,
+    "eleven_flash_v2": 30000,
+    "eleven_flash_v2_5": 40000,
+}
+
+# Final fallback when provider isn't recognised at all.
+FALLBACK_MAX_TEXT_LENGTH = 4000
+
+# Back-compat alias. Prefer ``_resolve_max_text_length()`` for new code.
+MAX_TEXT_LENGTH = FALLBACK_MAX_TEXT_LENGTH
+
+
+def _resolve_max_text_length(
+    provider: Optional[str],
+    tts_config: Optional[Dict[str, Any]] = None,
+) -> int:
+    """Return the input-character cap for *provider*.
+
+    Resolution order:
+      1. ``tts.<provider>.max_text_length`` (user override in config.yaml)
+      2. ElevenLabs model-aware table (keyed on configured ``model_id``)
+      3. ``PROVIDER_MAX_TEXT_LENGTH`` default
+      4. ``FALLBACK_MAX_TEXT_LENGTH`` (4000)
+
+    Non-positive or non-integer overrides fall through to the default so a
+    broken config can't accidentally disable truncation entirely.
+    """
+    if not provider:
+        return FALLBACK_MAX_TEXT_LENGTH
+    key = provider.lower().strip()
+    cfg = tts_config or {}
+    prov_cfg = cfg.get(key) if isinstance(cfg.get(key), dict) else {}
+
+    override = prov_cfg.get("max_text_length") if prov_cfg else None
+    if isinstance(override, bool):
+        # bool is an int subclass; treat explicit booleans as "not set"
+        override = None
+    if isinstance(override, int) and override > 0:
+        return override
+
+    if key == "elevenlabs":
+        model_id = (prov_cfg or {}).get("model_id") or DEFAULT_ELEVENLABS_MODEL_ID
+        mapped = ELEVENLABS_MODEL_MAX_TEXT_LENGTH.get(str(model_id).strip())
+        if mapped:
+            return mapped
+
+    return PROVIDER_MAX_TEXT_LENGTH.get(key, FALLBACK_MAX_TEXT_LENGTH)
 
 
 # ===========================================================================
@@ -695,6 +776,15 @@ def _check_neutts_available() -> bool:
         return False
 
 
+def _check_kittentts_available() -> bool:
+    """Check if the kittentts engine is importable (installed locally)."""
+    try:
+        import importlib.util
+        return importlib.util.find_spec("kittentts") is not None
+    except Exception:
+        return False
+
+
 def _default_neutts_ref_audio() -> str:
     """Return path to the bundled default voice reference audio."""
     return str(Path(__file__).parent / "neutts_samples" / "jo.wav")
@@ -758,6 +848,69 @@ def _generate_neutts(text: str, output_path: str, tts_config: Dict[str, Any]) ->
     return output_path
 
 
+# ===========================================================================
+# Provider: KittenTTS (local, lightweight)
+# ===========================================================================
+
+# Module-level cache for KittenTTS model instance
+_kittentts_model_cache: Dict[str, Any] = {}
+
+
+def _generate_kittentts(text: str, output_path: str, tts_config: Dict[str, Any]) -> str:
+    """Generate speech using KittenTTS local ONNX model.
+
+    KittenTTS is a lightweight TTS engine (25-80MB models) that runs
+    entirely on CPU without requiring a GPU or API key.
+
+    Args:
+        text: Text to convert to speech.
+        output_path: Where to save the audio file.
+        tts_config: TTS config dict.
+
+    Returns:
+        Path to the saved audio file.
+    """
+    KittenTTS = _import_kittentts()
+    kt_config = tts_config.get("kittentts", {})
+    model_name = kt_config.get("model", DEFAULT_KITTENTTS_MODEL)
+    voice = kt_config.get("voice", DEFAULT_KITTENTTS_VOICE)
+    speed = kt_config.get("speed", 1.0)
+    clean_text = kt_config.get("clean_text", True)
+
+    # Use cached model instance if available
+    global _kittentts_model_cache
+    if model_name not in _kittentts_model_cache:
+        logger.info("[KittenTTS] Loading model: %s", model_name)
+        _kittentts_model_cache[model_name] = KittenTTS(model_name)
+        logger.info("[KittenTTS] Model loaded successfully")
+
+    model = _kittentts_model_cache[model_name]
+
+    # Generate audio (returns numpy array at 24kHz)
+    audio = model.generate(text, voice=voice, speed=speed, clean_text=clean_text)
+
+    # Save as WAV
+    import soundfile as sf
+    wav_path = output_path
+    if not output_path.endswith(".wav"):
+        wav_path = output_path.rsplit(".", 1)[0] + ".wav"
+
+    sf.write(wav_path, audio, 24000)
+
+    # Convert to desired format if needed
+    if wav_path != output_path:
+        ffmpeg = shutil.which("ffmpeg")
+        if ffmpeg:
+            conv_cmd = [ffmpeg, "-i", wav_path, "-y", "-loglevel", "error", output_path]
+            subprocess.run(conv_cmd, check=True, timeout=30)
+            os.remove(wav_path)
+        else:
+            # No ffmpeg — rename the WAV to the expected path
+            os.rename(wav_path, output_path)
+
+    return output_path
+
+
 # ===========================================================================
 # Main tool function
 # ===========================================================================
@@ -785,14 +938,19 @@ def text_to_speech_tool(
     if not text or not text.strip():
         return tool_error("Text is required", success=False)
 
-    # Truncate very long text with a warning
-    if len(text) > MAX_TEXT_LENGTH:
-        logger.warning("TTS text too long (%d chars), truncating to %d", len(text), MAX_TEXT_LENGTH)
-        text = text[:MAX_TEXT_LENGTH]
-
     tts_config = _load_tts_config()
     provider = _get_provider(tts_config)
 
+    # Truncate very long text with a warning. The cap is per-provider
+    # (OpenAI 4096, xAI 15k, MiniMax 10k, ElevenLabs model-aware, etc.).
+    max_len = _resolve_max_text_length(provider, tts_config)
+    if len(text) > max_len:
+        logger.warning(
+            "TTS text too long for provider %s (%d chars), truncating to %d",
+            provider, len(text), max_len,
+        )
+        text = text[:max_len]
+
     # Detect platform from gateway env var to choose the best output format.
     # Telegram voice bubbles require Opus (.ogg); OpenAI and ElevenLabs can
     # produce Opus natively (no ffmpeg needed).  Edge TTS always outputs MP3
@@ -877,6 +1035,19 @@ def text_to_speech_tool(
             logger.info("Generating speech with NeuTTS (local)...")
             _generate_neutts(text, file_str, tts_config)
 
+        elif provider == "kittentts":
+            try:
+                _import_kittentts()
+            except ImportError:
+                return json.dumps({
+                    "success": False,
+                    "error": "KittenTTS provider selected but 'kittentts' package not installed. "
+                             "Run 'hermes setup tts' and choose KittenTTS, or install manually: "
+                             "pip install https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whl"
+                }, ensure_ascii=False)
+            logger.info("Generating speech with KittenTTS (local, ~25MB)...")
+            _generate_kittentts(text, file_str, tts_config)
+
         else:
             # Default: Edge TTS (free), with NeuTTS as local fallback
             edge_available = True
@@ -914,9 +1085,9 @@ def text_to_speech_tool(
             }, ensure_ascii=False)
 
         # Try Opus conversion for Telegram compatibility
-        # Edge TTS outputs MP3, NeuTTS outputs WAV — both need ffmpeg conversion
+        # Edge TTS outputs MP3, NeuTTS/KittenTTS output WAV — all need ffmpeg conversion
         voice_compatible = False
-        if provider in ("edge", "neutts", "minimax", "xai") and not file_str.endswith(".ogg"):
+        if provider in ("edge", "neutts", "minimax", "xai", "kittentts") and not file_str.endswith(".ogg"):
             opus_path = _convert_to_opus(file_str)
             if opus_path:
                 file_str = opus_path
@@ -1001,6 +1172,8 @@ def check_tts_requirements() -> bool:
         pass
     if _check_neutts_available():
         return True
+    if _check_kittentts_available():
+        return True
     return False
 
 
@@ -1096,6 +1269,14 @@ def stream_tts_to_speaker(
         voice_id = el_config.get("voice_id", voice_id)
         model_id = el_config.get("streaming_model_id",
                                  el_config.get("model_id", model_id))
+        # Per-sentence cap for the streaming path. Look up the cap against
+        # the *streaming* model_id (defaults to eleven_flash_v2_5 = 40k chars),
+        # not the sync model_id. A user override
+        # (tts.elevenlabs.max_text_length) still wins.
+        stream_max_len = _resolve_max_text_length(
+            "elevenlabs",
+            {**tts_config, "elevenlabs": {**el_config, "model_id": model_id}},
+        )
 
         api_key = os.getenv("ELEVENLABS_API_KEY", "")
         if not api_key:
@@ -1151,9 +1332,9 @@ def stream_tts_to_speaker(
             # Skip audio generation if no TTS client available
             if client is None:
                 return
-            # Truncate very long sentences
-            if len(cleaned) > MAX_TEXT_LENGTH:
-                cleaned = cleaned[:MAX_TEXT_LENGTH]
+            # Truncate very long sentences (ElevenLabs streaming path)
+            if len(cleaned) > stream_max_len:
+                cleaned = cleaned[:stream_max_len]
             try:
                 audio_iter = client.text_to_speech.convert(
                     text=cleaned,
@@ -1311,7 +1492,7 @@ TTS_SCHEMA = {
         "properties": {
             "text": {
                 "type": "string",
-                "description": "The text to convert to speech. Keep under 4000 characters."
+                "description": "The text to convert to speech. Provider-specific character caps apply and are enforced automatically (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k depending on model); over-long input is truncated."
             },
             "output_path": {
                 "type": "string",
diff --git a/tools/url_safety.py b/tools/url_safety.py
index c961f722c..7ff09ebb5 100644
--- a/tools/url_safety.py
+++ b/tools/url_safety.py
@@ -5,6 +5,13 @@ skill could trick the agent into fetching internal resources like cloud
 metadata endpoints (169.254.169.254), localhost services, or private
 network hosts.
 
+The check can be globally disabled via ``security.allow_private_urls: true``
+in config.yaml for environments where DNS resolves external domains to
+private/benchmark-range IPs (OpenWrt routers, corporate proxies, VPNs
+that use 198.18.0.0/15 or 100.64.0.0/10).  Even when disabled, cloud
+metadata hostnames (metadata.google.internal, 169.254.169.254) are
+**always** blocked — those are never legitimate agent targets.
+
 Limitations (documented, not fixable at pre-flight level):
   - DNS rebinding (TOCTOU): an attacker-controlled DNS server with TTL=0
     can return a public IP for the check, then a private IP for the actual
@@ -18,17 +25,35 @@ Limitations (documented, not fixable at pre-flight level):
 
 import ipaddress
 import logging
+import os
 import socket
 from urllib.parse import urlparse
 
 logger = logging.getLogger(__name__)
 
 # Hostnames that should always be blocked regardless of IP resolution
+# or any config toggle.  These are cloud metadata endpoints that an
+# attacker could use to steal instance credentials.
 _BLOCKED_HOSTNAMES = frozenset({
     "metadata.google.internal",
     "metadata.goog",
 })
 
+# IPs and networks that should always be blocked regardless of the
+# allow_private_urls toggle.  These are cloud metadata / credential
+# endpoints — the #1 SSRF target — and the link-local range where
+# they all live.
+_ALWAYS_BLOCKED_IPS = frozenset({
+    ipaddress.ip_address("169.254.169.254"),  # AWS/GCP/Azure/DO/Oracle metadata
+    ipaddress.ip_address("169.254.170.2"),     # AWS ECS task metadata (task IAM creds)
+    ipaddress.ip_address("169.254.169.253"),   # Azure IMDS wire server
+    ipaddress.ip_address("fd00:ec2::254"),     # AWS metadata (IPv6)
+    ipaddress.ip_address("100.100.100.200"),   # Alibaba Cloud metadata
+})
+_ALWAYS_BLOCKED_NETWORKS = (
+    ipaddress.ip_network("169.254.0.0/16"),    # Entire link-local range (no legit agent target)
+)
+
 # Exact HTTPS hostnames allowed to resolve to private/benchmark-space IPs.
 # This is intentionally narrow: QQ media downloads can legitimately resolve
 # to 198.18.0.0/15 behind local proxy/benchmark infrastructure.
@@ -42,6 +67,67 @@ _TRUSTED_PRIVATE_IP_HOSTS = frozenset({
 # VPNs, and some cloud internal networks.
 _CGNAT_NETWORK = ipaddress.ip_network("100.64.0.0/10")
 
+# ---------------------------------------------------------------------------
+# Global toggle: allow private/internal IP resolution
+# ---------------------------------------------------------------------------
+# Cached after first read so we don't hit the filesystem on every URL check.
+_allow_private_resolved = False
+_cached_allow_private: bool = False
+
+
+def _global_allow_private_urls() -> bool:
+    """Return True when the user has opted out of private-IP blocking.
+
+    Checks (in priority order):
+    1. ``HERMES_ALLOW_PRIVATE_URLS`` env var  (``true``/``1``/``yes``)
+    2. ``security.allow_private_urls`` in config.yaml
+    3. ``browser.allow_private_urls`` in config.yaml  (legacy / backward compat)
+
+    Result is cached for the process lifetime.
+    """
+    global _allow_private_resolved, _cached_allow_private
+    if _allow_private_resolved:
+        return _cached_allow_private
+
+    _allow_private_resolved = True
+    _cached_allow_private = False  # safe default
+
+    # 1. Env var override (highest priority)
+    env_val = os.getenv("HERMES_ALLOW_PRIVATE_URLS", "").strip().lower()
+    if env_val in ("true", "1", "yes"):
+        _cached_allow_private = True
+        return _cached_allow_private
+    if env_val in ("false", "0", "no"):
+        # Explicit false — don't fall through to config
+        return _cached_allow_private
+
+    # 2. Config file
+    try:
+        from hermes_cli.config import read_raw_config
+        cfg = read_raw_config()
+        # security.allow_private_urls (preferred)
+        sec = cfg.get("security", {})
+        if isinstance(sec, dict) and sec.get("allow_private_urls"):
+            _cached_allow_private = True
+            return _cached_allow_private
+        # browser.allow_private_urls (legacy fallback)
+        browser = cfg.get("browser", {})
+        if isinstance(browser, dict) and browser.get("allow_private_urls"):
+            _cached_allow_private = True
+            return _cached_allow_private
+    except Exception:
+        # Config unavailable (e.g. tests, early import) — keep default
+        pass
+
+    return _cached_allow_private
+
+
+def _reset_allow_private_cache() -> None:
+    """Reset the cached toggle — only for tests."""
+    global _allow_private_resolved, _cached_allow_private
+    _allow_private_resolved = False
+    _cached_allow_private = False
+
 
 def _is_blocked_ip(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
     """Return True if the IP should be blocked for SSRF protection."""
@@ -65,6 +151,11 @@ def is_safe_url(url: str) -> bool:
 
     Resolves the hostname to an IP and checks against private ranges.
     Fails closed: DNS errors and unexpected exceptions block the request.
+
+    When ``security.allow_private_urls`` is enabled (or the env var
+    ``HERMES_ALLOW_PRIVATE_URLS=true``), private-IP blocking is skipped.
+    Cloud metadata endpoints (169.254.169.254, metadata.google.internal)
+    remain blocked regardless — they are never legitimate agent targets.
     """
     try:
         parsed = urlparse(url)
@@ -73,11 +164,14 @@ def is_safe_url(url: str) -> bool:
         if not hostname:
             return False
 
-        # Block known internal hostnames
+        # Block known internal hostnames — ALWAYS, even with toggle on
         if hostname in _BLOCKED_HOSTNAMES:
             logger.warning("Blocked request to internal hostname: %s", hostname)
             return False
 
+        # Check the global toggle AFTER blocking metadata hostnames
+        allow_all_private = _global_allow_private_urls()
+
         allow_private_ip = _allows_private_ip_resolution(hostname, scheme)
 
         # Try to resolve and check IP
@@ -96,14 +190,27 @@ def is_safe_url(url: str) -> bool:
             except ValueError:
                 continue
 
-            if not allow_private_ip and _is_blocked_ip(ip):
+            # Always block cloud metadata IPs and link-local, even with toggle on
+            if ip in _ALWAYS_BLOCKED_IPS or any(ip in net for net in _ALWAYS_BLOCKED_NETWORKS):
+                logger.warning(
+                    "Blocked request to cloud metadata address: %s -> %s",
+                    hostname, ip_str,
+                )
+                return False
+
+            if not allow_all_private and not allow_private_ip and _is_blocked_ip(ip):
                 logger.warning(
                     "Blocked request to private/internal address: %s -> %s",
                     hostname, ip_str,
                 )
                 return False
 
-        if allow_private_ip:
+        if allow_all_private:
+            logger.debug(
+                "Allowing private/internal resolution (security.allow_private_urls=true): %s",
+                hostname,
+            )
+        elif allow_private_ip:
             logger.debug(
                 "Allowing trusted hostname despite private/internal resolution: %s",
                 hostname,
diff --git a/tools/web_tools.py b/tools/web_tools.py
index c24f1fc38..9e5d878da 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -282,7 +282,7 @@ def _get_async_parallel_client():
 
 # ─── Tavily Client ───────────────────────────────────────────────────────────
 
-_TAVILY_BASE_URL = "https://api.tavily.com"
+_TAVILY_BASE_URL = os.getenv("TAVILY_BASE_URL", "https://api.tavily.com")
 
 
 def _tavily_request(endpoint: str, payload: dict) -> dict:
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 6a20b612a..165b47bf9 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -2,6 +2,7 @@ import atexit
 import concurrent.futures
 import copy
 import json
+import logging
 import os
 import queue
 import subprocess
@@ -15,11 +16,16 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv
 
+logger = logging.getLogger(__name__)
+
 _hermes_home = get_hermes_home()
-load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).parent.parent / ".env")
+load_hermes_dotenv(
+    hermes_home=_hermes_home, project_env=Path(__file__).parent.parent / ".env"
+)
 
 try:
     from hermes_cli.banner import prefetch_update_check
+
     prefetch_update_check()
 except Exception:
     pass
@@ -31,22 +37,34 @@ _methods: dict[str, callable] = {}
 _pending: dict[str, tuple[str, threading.Event]] = {}
 _answers: dict[str, str] = {}
 _db = None
+_db_error: str | None = None
 _stdout_lock = threading.Lock()
 _cfg_lock = threading.Lock()
 _cfg_cache: dict | None = None
 _cfg_mtime: float | None = None
-_SLASH_WORKER_TIMEOUT_S = max(5.0, float(os.environ.get("HERMES_TUI_SLASH_TIMEOUT_S", "45") or 45))
+_SLASH_WORKER_TIMEOUT_S = max(
+    5.0, float(os.environ.get("HERMES_TUI_SLASH_TIMEOUT_S", "45") or 45)
+)
 
 # ── Async RPC dispatch (#12546) ──────────────────────────────────────
 # A handful of handlers block the dispatcher loop in entry.py for seconds
 # to minutes (slash.exec, cli.exec, shell.exec, session.resume,
-# session.branch). While they're running, inbound RPCs — notably
-# approval.respond and session.interrupt — sit unread in the stdin pipe.
-# We route only those slow handlers onto a small thread pool; everything
-# else stays on the main thread so ordering stays sane for the fast path.
-# write_json is already _stdout_lock-guarded, so concurrent response
-# writes are safe.
-_LONG_HANDLERS = frozenset({"cli.exec", "session.branch", "session.resume", "shell.exec", "slash.exec"})
+# session.branch, skills.manage).  While they're running, inbound RPCs —
+# notably approval.respond and session.interrupt — sit unread in the
+# stdin pipe.  We route only those slow handlers onto a small thread pool;
+# everything else stays on the main thread so ordering stays sane for the
+# fast path.  write_json is already _stdout_lock-guarded, so concurrent
+# response writes are safe.
+_LONG_HANDLERS = frozenset(
+    {
+        "cli.exec",
+        "session.branch",
+        "session.resume",
+        "shell.exec",
+        "skills.manage",
+        "slash.exec",
+    }
+)
 
 _pool = concurrent.futures.ThreadPoolExecutor(
     max_workers=max(2, int(os.environ.get("HERMES_TUI_RPC_POOL_WORKERS", "4") or 4)),
@@ -70,19 +88,31 @@ class _SlashWorker:
         self.stderr_tail: list[str] = []
         self.stdout_queue: queue.Queue[dict | None] = queue.Queue()
 
-        argv = [sys.executable, "-m", "tui_gateway.slash_worker", "--session-key", session_key]
+        argv = [
+            sys.executable,
+            "-m",
+            "tui_gateway.slash_worker",
+            "--session-key",
+            session_key,
+        ]
         if model:
             argv += ["--model", model]
 
         self.proc = subprocess.Popen(
-            argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-            text=True, bufsize=1, cwd=os.getcwd(), env=os.environ.copy(),
+            argv,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            bufsize=1,
+            cwd=os.getcwd(),
+            env=os.environ.copy(),
         )
         threading.Thread(target=self._drain_stdout, daemon=True).start()
         threading.Thread(target=self._drain_stderr, daemon=True).start()
 
     def _drain_stdout(self):
-        for line in (self.proc.stdout or []):
+        for line in self.proc.stdout or []:
             try:
                 self.stdout_queue.put(json.loads(line))
             except json.JSONDecodeError:
@@ -90,7 +120,7 @@ class _SlashWorker:
         self.stdout_queue.put(None)
 
     def _drain_stderr(self):
-        for line in (self.proc.stderr or []):
+        for line in self.proc.stderr or []:
             if text := line.rstrip("\n"):
                 self.stderr_tail = (self.stderr_tail + [text])[-80:]
 
@@ -117,7 +147,9 @@ class _SlashWorker:
                     raise RuntimeError(msg.get("error", "slash worker failed"))
                 return str(msg.get("output", "")).rstrip()
 
-            raise RuntimeError(f"slash worker closed pipe{': ' + chr(10).join(self.stderr_tail[-8:]) if self.stderr_tail else ''}")
+            raise RuntimeError(
+                f"slash worker closed pipe{': ' + chr(10).join(self.stderr_tail[-8:]) if self.stderr_tail else ''}"
+            )
 
     def close(self):
         try:
@@ -125,26 +157,45 @@ class _SlashWorker:
                 self.proc.terminate()
                 self.proc.wait(timeout=1)
         except Exception:
-            try: self.proc.kill()
-            except Exception: pass
+            try:
+                self.proc.kill()
+            except Exception:
+                pass
 
 
-atexit.register(lambda: [
-    s.get("slash_worker") and s["slash_worker"].close()
-    for s in _sessions.values()
-])
+atexit.register(
+    lambda: [
+        s.get("slash_worker") and s["slash_worker"].close() for s in _sessions.values()
+    ]
+)
 
 
 # ── Plumbing ──────────────────────────────────────────────────────────
 
+
 def _get_db():
-    global _db
+    global _db, _db_error
     if _db is None:
         from hermes_state import SessionDB
-        _db = SessionDB()
+
+        try:
+            _db = SessionDB()
+            _db_error = None
+        except Exception as exc:
+            _db_error = str(exc)
+            logger.warning(
+                "TUI session store unavailable — continuing without state.db features: %s",
+                exc,
+            )
+            return None
     return _db
 
 
+def _db_unavailable_error(rid, *, code: int):
+    detail = _db_error or "state.db unavailable"
+    return _err(rid, code, f"state.db unavailable: {detail}")
+
+
 def write_json(obj: dict) -> bool:
     line = json.dumps(obj, ensure_ascii=False) + "\n"
     try:
@@ -167,7 +218,11 @@ def _status_update(sid: str, kind: str, text: str | None = None):
     body = (text if text is not None else kind).strip()
     if not body:
         return
-    _emit("status.update", sid, {"kind": kind if text is not None else "status", "text": body})
+    _emit(
+        "status.update",
+        sid,
+        {"kind": kind if text is not None else "status", "text": body},
+    )
 
 
 def _estimate_image_tokens(width: int, height: int) -> int:
@@ -208,6 +263,7 @@ def method(name: str):
     def dec(fn):
         _methods[name] = fn
         return fn
+
     return dec
 
 
@@ -263,17 +319,24 @@ def _normalize_completion_path(path_part: str) -> str:
     expanded = os.path.expanduser(path_part)
     if os.name != "nt":
         normalized = expanded.replace("\\", "/")
-        if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha():
+        if (
+            len(normalized) >= 3
+            and normalized[1] == ":"
+            and normalized[2] == "/"
+            and normalized[0].isalpha()
+        ):
             return f"/mnt/{normalized[0].lower()}/{normalized[3:]}"
     return expanded
 
 
 # ── Config I/O ────────────────────────────────────────────────────────
 
+
 def _load_cfg() -> dict:
     global _cfg_cache, _cfg_mtime
     try:
         import yaml
+
         p = _hermes_home / "config.yaml"
         mtime = p.stat().st_mtime if p.exists() else None
         with _cfg_lock:
@@ -296,6 +359,7 @@ def _load_cfg() -> dict:
 def _save_cfg(cfg: dict):
     global _cfg_cache, _cfg_mtime
     import yaml
+
     path = _hermes_home / "config.yaml"
     with open(path, "w") as f:
         yaml.safe_dump(cfg, f)
@@ -310,6 +374,7 @@ def _save_cfg(cfg: dict):
 def _set_session_context(session_key: str) -> list:
     try:
         from gateway.session_context import set_session_vars
+
         return set_session_vars(session_key=session_key)
     except Exception:
         return []
@@ -320,6 +385,7 @@ def _clear_session_context(tokens: list) -> None:
         return
     try:
         from gateway.session_context import clear_session_vars
+
         clear_session_vars(tokens)
     except Exception:
         pass
@@ -334,6 +400,7 @@ def _enable_gateway_prompts() -> None:
 
 # ── Blocking prompt factory ──────────────────────────────────────────
 
+
 def _block(event: str, sid: str, payload: dict, timeout: int = 300) -> str:
     rid = uuid.uuid4().hex[:8]
     ev = threading.Event()
@@ -362,9 +429,11 @@ def _clear_pending(sid: str | None = None) -> None:
 
 # ── Agent factory ────────────────────────────────────────────────────
 
+
 def resolve_skin() -> dict:
     try:
         from hermes_cli.skin_engine import init_skin_from_config, get_active_skin
+
         init_skin_from_config(_load_cfg())
         skin = get_active_skin()
         return {
@@ -404,6 +473,17 @@ def _write_config_key(key_path: str, value):
     _save_cfg(cfg)
 
 
+_STATUSBAR_MODES = frozenset({"off", "top", "bottom"})
+
+
+def _coerce_statusbar(raw) -> str:
+    if raw is False:
+        return "off"
+    if isinstance(raw, str) and (s := raw.strip().lower()) in _STATUSBAR_MODES:
+        return s
+    return "top"
+
+
 def _load_reasoning_config() -> dict | None:
     from hermes_constants import parse_reasoning_effort
 
@@ -412,7 +492,9 @@ def _load_reasoning_config() -> dict | None:
 
 
 def _load_service_tier() -> str | None:
-    raw = str(_load_cfg().get("agent", {}).get("service_tier", "") or "").strip().lower()
+    raw = (
+        str(_load_cfg().get("agent", {}).get("service_tier", "") or "").strip().lower()
+    )
     if not raw or raw in {"normal", "default", "standard", "off", "none"}:
         return None
     if raw in {"fast", "priority", "on"}:
@@ -439,7 +521,9 @@ def _load_enabled_toolsets() -> list[str] | None:
         from hermes_cli.config import load_config
         from hermes_cli.tools_config import _get_platform_tools
 
-        enabled = sorted(_get_platform_tools(load_config(), "cli", include_default_mcp_servers=False))
+        enabled = sorted(
+            _get_platform_tools(load_config(), "cli", include_default_mcp_servers=False)
+        )
         return enabled or None
     except Exception:
         return None
@@ -461,7 +545,10 @@ def _restart_slash_worker(session: dict):
         except Exception:
             pass
     try:
-        session["slash_worker"] = _SlashWorker(session["session_key"], getattr(session.get("agent"), "model", _resolve_model()))
+        session["slash_worker"] = _SlashWorker(
+            session["session_key"],
+            getattr(session.get("agent"), "model", _resolve_model()),
+        )
     except Exception:
         session["slash_worker"] = None
 
@@ -529,12 +616,20 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
         _emit("session.info", sid, _session_info(agent))
 
     os.environ["HERMES_MODEL"] = result.new_model
+    # Keep the process-level provider env var in sync with the user's explicit
+    # choice so any ambient re-resolution (credential pool refresh, compressor
+    # rebuild, aux clients) resolves to the new provider instead of the
+    # original one persisted in config or env.
+    if result.target_provider:
+        os.environ["HERMES_INFERENCE_PROVIDER"] = result.target_provider
     if persist_global:
         _persist_model_switch(result)
     return {"value": result.new_model, "warning": result.warning_message or ""}
 
 
-def _compress_session_history(session: dict, focus_topic: str | None = None) -> tuple[int, dict]:
+def _compress_session_history(
+    session: dict, focus_topic: str | None = None
+) -> tuple[int, dict]:
     from agent.model_metadata import estimate_messages_tokens_rough
 
     agent = session["agent"]
@@ -577,6 +672,7 @@ def _get_usage(agent) -> dict:
         usage["compressions"] = getattr(comp, "compression_count", 0) or 0
     try:
         from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
+
         cost = estimate_usage_cost(
             usage["model"],
             CanonicalUsage(
@@ -622,30 +718,37 @@ def _session_info(agent) -> dict:
     }
     try:
         from hermes_cli import __version__, __release_date__
+
         info["version"] = __version__
         info["release_date"] = __release_date__
     except Exception:
         pass
     try:
         from model_tools import get_toolset_for_tool
+
         for t in getattr(agent, "tools", []) or []:
             name = t["function"]["name"]
-            info["tools"].setdefault(get_toolset_for_tool(name) or "other", []).append(name)
+            info["tools"].setdefault(get_toolset_for_tool(name) or "other", []).append(
+                name
+            )
     except Exception:
         pass
     try:
         from hermes_cli.banner import get_available_skills
+
         info["skills"] = get_available_skills()
     except Exception:
         pass
     try:
         from tools.mcp_tool import get_mcp_status
+
         info["mcp_servers"] = get_mcp_status()
     except Exception:
         info["mcp_servers"] = []
     try:
         from hermes_cli.banner import get_update_result
         from hermes_cli.config import recommended_update_command
+
         info["update_behind"] = get_update_result(timeout=0.5)
         info["update_command"] = recommended_update_command()
     except Exception:
@@ -656,6 +759,7 @@ def _session_info(agent) -> dict:
 def _tool_ctx(name: str, args: dict) -> str:
     try:
         from agent.display import build_tool_preview
+
         return build_tool_preview(name, args, max_len=80) or ""
     except Exception:
         return ""
@@ -717,7 +821,11 @@ def _on_tool_start(sid: str, tool_call_id: str, name: str, args: dict):
             pass
         session.setdefault("tool_started_at", {})[tool_call_id] = time.time()
     if _tool_progress_enabled(sid):
-        _emit("tool.start", sid, {"tool_id": tool_call_id, "name": name, "context": _tool_ctx(name, args)})
+        _emit(
+            "tool.start",
+            sid,
+            {"tool_id": tool_call_id, "name": name, "context": _tool_ctx(name, args)},
+        )
 
 
 def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result: str):
@@ -738,7 +846,13 @@ def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result
         from agent.display import render_edit_diff_with_delta
 
         rendered: list[str] = []
-        if render_edit_diff_with_delta(name, result, function_args=args, snapshot=snapshot, print_fn=rendered.append):
+        if render_edit_diff_with_delta(
+            name,
+            result,
+            function_args=args,
+            snapshot=snapshot,
+            print_fn=rendered.append,
+        ):
             payload["inline_diff"] = "\n".join(rendered)
     except Exception:
         pass
@@ -768,6 +882,44 @@ def _on_tool_progress(
             "task_count": int(_kwargs.get("task_count") or 1),
             "task_index": int(_kwargs.get("task_index") or 0),
         }
+        # Identity fields for the TUI spawn tree.  All optional — older
+        # emitters that omit them fall back to flat rendering client-side.
+        if _kwargs.get("subagent_id"):
+            payload["subagent_id"] = str(_kwargs["subagent_id"])
+        if _kwargs.get("parent_id"):
+            payload["parent_id"] = str(_kwargs["parent_id"])
+        if _kwargs.get("depth") is not None:
+            payload["depth"] = int(_kwargs["depth"])
+        if _kwargs.get("model"):
+            payload["model"] = str(_kwargs["model"])
+        if _kwargs.get("tool_count") is not None:
+            payload["tool_count"] = int(_kwargs["tool_count"])
+        if _kwargs.get("toolsets"):
+            payload["toolsets"] = [str(t) for t in _kwargs["toolsets"]]
+        # Per-branch rollups emitted on subagent.complete (features 1+2+4).
+        for int_key in (
+            "input_tokens",
+            "output_tokens",
+            "reasoning_tokens",
+            "api_calls",
+        ):
+            val = _kwargs.get(int_key)
+            if val is not None:
+                try:
+                    payload[int_key] = int(val)
+                except (TypeError, ValueError):
+                    pass
+        if _kwargs.get("cost_usd") is not None:
+            try:
+                payload["cost_usd"] = float(_kwargs["cost_usd"])
+            except (TypeError, ValueError):
+                pass
+        if _kwargs.get("files_read"):
+            payload["files_read"] = [str(p) for p in _kwargs["files_read"]]
+        if _kwargs.get("files_written"):
+            payload["files_written"] = [str(p) for p in _kwargs["files_written"]]
+        if _kwargs.get("output_tail"):
+            payload["output_tail"] = list(_kwargs["output_tail"])  # list of dicts
         if name:
             payload["tool_name"] = str(name)
         if preview:
@@ -786,16 +938,25 @@ def _on_tool_progress(
 
 def _agent_cbs(sid: str) -> dict:
     return dict(
-        tool_start_callback=lambda tc_id, name, args: _on_tool_start(sid, tc_id, name, args),
-        tool_complete_callback=lambda tc_id, name, args, result: _on_tool_complete(sid, tc_id, name, args, result),
+        tool_start_callback=lambda tc_id, name, args: _on_tool_start(
+            sid, tc_id, name, args
+        ),
+        tool_complete_callback=lambda tc_id, name, args, result: _on_tool_complete(
+            sid, tc_id, name, args, result
+        ),
         tool_progress_callback=lambda event_type, name=None, preview=None, args=None, **kwargs: _on_tool_progress(
             sid, event_type, name, preview, args, **kwargs
         ),
-        tool_gen_callback=lambda name: _tool_progress_enabled(sid) and _emit("tool.generating", sid, {"name": name}),
+        tool_gen_callback=lambda name: _tool_progress_enabled(sid)
+        and _emit("tool.generating", sid, {"name": name}),
         thinking_callback=lambda text: _emit("thinking.delta", sid, {"text": text}),
         reasoning_callback=lambda text: _emit("reasoning.delta", sid, {"text": text}),
-        status_callback=lambda kind, text=None: _status_update(sid, str(kind), None if text is None else str(text)),
-        clarify_callback=lambda q, c: _block("clarify.request", sid, {"question": q, "choices": c}),
+        status_callback=lambda kind, text=None: _status_update(
+            sid, str(kind), None if text is None else str(text)
+        ),
+        clarify_callback=lambda q, c: _block(
+            "clarify.request", sid, {"question": q, "choices": c}
+        ),
     )
 
 
@@ -811,9 +972,20 @@ def _wire_callbacks(sid: str):
             pl["metadata"] = metadata
         val = _block("secret.request", sid, pl)
         if not val:
-            return {"success": True, "stored_as": env_var, "validated": False, "skipped": True, "message": "skipped"}
+            return {
+                "success": True,
+                "stored_as": env_var,
+                "validated": False,
+                "skipped": True,
+                "message": "skipped",
+            }
         from hermes_cli.config import save_env_value_secure
-        return {**save_env_value_secure(env_var, val), "skipped": False, "message": "ok"}
+
+        return {
+            **save_env_value_secure(env_var, val),
+            "skipped": False,
+            "message": "ok",
+        }
 
     set_secret_capture_callback(secret_cb)
 
@@ -886,7 +1058,9 @@ def _validate_personality(value: str, cfg: dict | None = None) -> tuple[str, str
     return name, _render_personality_prompt(personalities[name])
 
 
-def _apply_personality_to_session(sid: str, session: dict, new_prompt: str) -> tuple[bool, dict | None]:
+def _apply_personality_to_session(
+    sid: str, session: dict, new_prompt: str
+) -> tuple[bool, dict | None]:
     if not session:
         return False, None
 
@@ -916,18 +1090,23 @@ def _background_agent_kwargs(agent, task_id: str) -> dict:
         "acp_args": getattr(agent, "acp_args", None) or None,
         "model": getattr(agent, "model", None) or _resolve_model(),
         "max_iterations": int(cfg.get("max_turns", 25) or 25),
-        "enabled_toolsets": getattr(agent, "enabled_toolsets", None) or _load_enabled_toolsets(),
+        "enabled_toolsets": getattr(agent, "enabled_toolsets", None)
+        or _load_enabled_toolsets(),
         "quiet_mode": True,
         "verbose_logging": False,
-        "ephemeral_system_prompt": getattr(agent, "ephemeral_system_prompt", None) or None,
+        "ephemeral_system_prompt": getattr(agent, "ephemeral_system_prompt", None)
+        or None,
         "providers_allowed": getattr(agent, "providers_allowed", None),
         "providers_ignored": getattr(agent, "providers_ignored", None),
         "providers_order": getattr(agent, "providers_order", None),
         "provider_sort": getattr(agent, "provider_sort", None),
-        "provider_require_parameters": getattr(agent, "provider_require_parameters", False),
+        "provider_require_parameters": getattr(
+            agent, "provider_require_parameters", False
+        ),
         "provider_data_collection": getattr(agent, "provider_data_collection", None),
         "session_id": task_id,
-        "reasoning_config": getattr(agent, "reasoning_config", None) or _load_reasoning_config(),
+        "reasoning_config": getattr(agent, "reasoning_config", None)
+        or _load_reasoning_config(),
         "service_tier": getattr(agent, "service_tier", None) or _load_service_tier(),
         "request_overrides": dict(getattr(agent, "request_overrides", {}) or {}),
         "platform": "tui",
@@ -939,7 +1118,9 @@ def _background_agent_kwargs(agent, task_id: str) -> dict:
 def _reset_session_agent(sid: str, session: dict) -> dict:
     tokens = _set_session_context(session["session_key"])
     try:
-        new_agent = _make_agent(sid, session["session_key"], session_id=session["session_key"])
+        new_agent = _make_agent(
+            sid, session["session_key"], session_id=session["session_key"]
+        )
     finally:
         _clear_session_context(tokens)
     session["agent"] = new_agent
@@ -961,19 +1142,30 @@ def _reset_session_agent(sid: str, session: dict) -> dict:
 
 def _make_agent(sid: str, key: str, session_id: str | None = None):
     from run_agent import AIAgent
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+
     cfg = _load_cfg()
     system_prompt = cfg.get("agent", {}).get("system_prompt", "") or ""
     if not system_prompt:
         system_prompt = _resolve_personality_prompt(cfg)
+    runtime = resolve_runtime_provider(requested=None)
     return AIAgent(
         model=_resolve_model(),
+        provider=runtime.get("provider"),
+        base_url=runtime.get("base_url"),
+        api_key=runtime.get("api_key"),
+        api_mode=runtime.get("api_mode"),
+        acp_command=runtime.get("command"),
+        acp_args=runtime.get("args"),
+        credential_pool=runtime.get("credential_pool"),
         quiet_mode=True,
         verbose_logging=_load_tool_progress_mode() == "verbose",
         reasoning_config=_load_reasoning_config(),
         service_tier=_load_service_tier(),
         enabled_toolsets=_load_enabled_toolsets(),
         platform="tui",
-        session_id=session_id or key, session_db=_get_db(),
+        session_id=session_id or key,
+        session_db=_get_db(),
         ephemeral_system_prompt=system_prompt or None,
         **_agent_cbs(sid),
     )
@@ -997,12 +1189,15 @@ def _init_session(sid: str, key: str, agent, history: list, cols: int = 80):
         "tool_started_at": {},
     }
     try:
-        _sessions[sid]["slash_worker"] = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
+        _sessions[sid]["slash_worker"] = _SlashWorker(
+            key, getattr(agent, "model", _resolve_model())
+        )
     except Exception:
         # Defer hard-failure to slash.exec; chat still works without slash worker.
         _sessions[sid]["slash_worker"] = None
     try:
         from tools.approval import register_gateway_notify, load_permanent_allowlist
+
         register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
         load_permanent_allowlist()
     except Exception:
@@ -1048,10 +1243,15 @@ def _enrich_with_attached_images(user_text: str, image_paths: list[str]) -> str:
             continue
         hint = f"[You can examine it with vision_analyze using image_url: {p}]"
         try:
-            r = _json.loads(asyncio.run(vision_analyze_tool(image_url=str(p), user_prompt=prompt)))
+            r = _json.loads(
+                asyncio.run(vision_analyze_tool(image_url=str(p), user_prompt=prompt))
+            )
             desc = r.get("analysis", "") if r.get("success") else None
-            parts.append(f"[The user attached an image:\n{desc}]\n{hint}" if desc
-                         else f"[The user attached an image but analysis failed.]\n{hint}")
+            parts.append(
+                f"[The user attached an image:\n{desc}]\n{hint}"
+                if desc
+                else f"[The user attached an image but analysis failed.]\n{hint}"
+            )
         except Exception:
             parts.append(f"[The user attached an image but analysis failed.]\n{hint}")
 
@@ -1089,7 +1289,9 @@ def _history_to_messages(history: list[dict]) -> list[dict]:
             tc_info = tool_call_args.get(tc_id) if tc_id else None
             name = (tc_info[0] if tc_info else None) or m.get("tool_name") or "tool"
             args = (tc_info[1] if tc_info else None) or {}
-            messages.append({"role": "tool", "name": name, "context": _tool_ctx(name, args)})
+            messages.append(
+                {"role": "tool", "name": name, "context": _tool_ctx(name, args)}
+            )
             continue
         if not (m.get("content") or "").strip():
             continue
@@ -1100,6 +1302,7 @@ def _history_to_messages(history: list[dict]) -> list[dict]:
 
 # ── Methods: session ─────────────────────────────────────────────────
 
+
 @method("session.create")
 def _(rid, params: dict) -> dict:
     sid = uuid.uuid4().hex[:8]
@@ -1153,7 +1356,9 @@ def _(rid, params: dict) -> dict:
             finally:
                 _clear_session_context(tokens)
 
-            _get_db().create_session(key, source="tui", model=_resolve_model())
+            db = _get_db()
+            if db is not None:
+                db.create_session(key, source="tui", model=_resolve_model())
             session["agent"] = agent
 
             try:
@@ -1163,8 +1368,14 @@ def _(rid, params: dict) -> dict:
                 pass
 
             try:
-                from tools.approval import register_gateway_notify, load_permanent_allowlist
-                register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
+                from tools.approval import (
+                    register_gateway_notify,
+                    load_permanent_allowlist,
+                )
+
+                register_gateway_notify(
+                    key, lambda data: _emit("approval.request", sid, data)
+                )
                 notify_registered = True
                 load_permanent_allowlist()
             except Exception:
@@ -1195,6 +1406,7 @@ def _(rid, params: dict) -> dict:
                 if notify_registered:
                     try:
                         from tools.approval import unregister_gateway_notify
+
                         unregister_gateway_notify(key)
                     except Exception:
                         pass
@@ -1202,32 +1414,70 @@ def _(rid, params: dict) -> dict:
 
     threading.Thread(target=_build, daemon=True).start()
 
-    return _ok(rid, {
-        "session_id": sid,
-        "info": {
-            "model": _resolve_model(),
-            "tools": {},
-            "skills": {},
-            "cwd": os.getenv("TERMINAL_CWD", os.getcwd()),
+    return _ok(
+        rid,
+        {
+            "session_id": sid,
+            "info": {
+                "model": _resolve_model(),
+                "tools": {},
+                "skills": {},
+                "cwd": os.getenv("TERMINAL_CWD", os.getcwd()),
+            },
         },
-    })
+    )
 
 
 @method("session.list")
 def _(rid, params: dict) -> dict:
+    db = _get_db()
+    if db is None:
+        return _db_unavailable_error(rid, code=5006)
     try:
-        db = _get_db()
-        # Show both TUI and CLI sessions — TUI is the successor to the CLI,
-        # so users expect to resume their old CLI sessions here too.
-        tui = db.list_sessions_rich(source="tui", limit=params.get("limit", 20))
-        cli = db.list_sessions_rich(source="cli", limit=params.get("limit", 20))
-        rows = sorted(tui + cli, key=lambda s: s.get("started_at") or 0, reverse=True)[:params.get("limit", 20)]
-        return _ok(rid, {"sessions": [
-            {"id": s["id"], "title": s.get("title") or "", "preview": s.get("preview") or "",
-             "started_at": s.get("started_at") or 0, "message_count": s.get("message_count") or 0,
-             "source": s.get("source") or ""}
-            for s in rows
-        ]})
+        # Resume picker should include human conversation surfaces beyond
+        # tui/cli (notably telegram from blitz row #7), but avoid internal
+        # sources that clutter the modal (tool/acp/etc).
+        allow = frozenset(
+            {
+                "cli",
+                "tui",
+                "telegram",
+                "discord",
+                "slack",
+                "whatsapp",
+                "wecom",
+                "weixin",
+                "feishu",
+                "signal",
+                "mattermost",
+                "matrix",
+                "qq",
+            }
+        )
+
+        limit = int(params.get("limit", 20) or 20)
+        fetch_limit = max(limit * 5, 100)
+        rows = [
+            s
+            for s in db.list_sessions_rich(source=None, limit=fetch_limit)
+            if (s.get("source") or "").strip().lower() in allow
+        ][:limit]
+        return _ok(
+            rid,
+            {
+                "sessions": [
+                    {
+                        "id": s["id"],
+                        "title": s.get("title") or "",
+                        "preview": s.get("preview") or "",
+                        "started_at": s.get("started_at") or 0,
+                        "message_count": s.get("message_count") or 0,
+                        "source": s.get("source") or "",
+                    }
+                    for s in rows
+                ]
+            },
+        )
     except Exception as e:
         return _err(rid, 5006, str(e))
 
@@ -1238,6 +1488,8 @@ def _(rid, params: dict) -> dict:
     if not target:
         return _err(rid, 4006, "session_id required")
     db = _get_db()
+    if db is None:
+        return _db_unavailable_error(rid, code=5000)
     found = db.get_session(target)
     if not found:
         found = db.get_session_by_title(target)
@@ -1276,11 +1528,16 @@ def _(rid, params: dict) -> dict:
     session, err = _sess(params, rid)
     if err:
         return err
+    db = _get_db()
+    if db is None:
+        return _db_unavailable_error(rid, code=5007)
     title, key = params.get("title", ""), session["session_key"]
     if not title:
-        return _ok(rid, {"title": _get_db().get_session_title(key) or "", "session_key": key})
+        return _ok(
+            rid, {"title": db.get_session_title(key) or "", "session_key": key}
+        )
     try:
-        _get_db().set_session_title(key, title)
+        db.set_session_title(key, title)
         return _ok(rid, {"title": title})
     except Exception as e:
         return _err(rid, 5007, str(e))
@@ -1315,7 +1572,9 @@ def _(rid, params: dict) -> dict:
     # silently drop the agent's output (version mismatch, see below).
     # Neither is what the user wants — make them /interrupt first.
     if session.get("running"):
-        return _err(rid, 4009, "session busy — /interrupt the current turn before /undo")
+        return _err(
+            rid, 4009, "session busy — /interrupt the current turn before /undo"
+        )
     removed = 0
     with session["history_lock"]:
         history = session.get("history", [])
@@ -1336,14 +1595,27 @@ def _(rid, params: dict) -> dict:
     if err:
         return err
     if session.get("running"):
-        return _err(rid, 4009, "session busy — /interrupt the current turn before /compress")
+        return _err(
+            rid, 4009, "session busy — /interrupt the current turn before /compress"
+        )
     try:
         with session["history_lock"]:
-            removed, usage = _compress_session_history(session, str(params.get("focus_topic", "") or "").strip())
+            removed, usage = _compress_session_history(
+                session, str(params.get("focus_topic", "") or "").strip()
+            )
             messages = list(session.get("history", []))
         info = _session_info(session["agent"])
         _emit("session.info", params.get("session_id", ""), info)
-        return _ok(rid, {"status": "compressed", "removed": removed, "usage": usage, "info": info, "messages": messages})
+        return _ok(
+            rid,
+            {
+                "status": "compressed",
+                "removed": removed,
+                "usage": usage,
+                "info": info,
+                "messages": messages,
+            },
+        )
     except Exception as e:
         return _err(rid, 5005, str(e))
 
@@ -1354,11 +1626,21 @@ def _(rid, params: dict) -> dict:
     if err:
         return err
     import time as _time
-    filename = os.path.abspath(f"hermes_conversation_{_time.strftime('%Y%m%d_%H%M%S')}.json")
+
+    filename = os.path.abspath(
+        f"hermes_conversation_{_time.strftime('%Y%m%d_%H%M%S')}.json"
+    )
     try:
         with open(filename, "w") as f:
-            json.dump({"model": getattr(session["agent"], "model", ""), "messages": session.get("history", [])},
-                      f, indent=2, ensure_ascii=False)
+            json.dump(
+                {
+                    "model": getattr(session["agent"], "model", ""),
+                    "messages": session.get("history", []),
+                },
+                f,
+                indent=2,
+                ensure_ascii=False,
+            )
         return _ok(rid, {"file": filename})
     except Exception as e:
         return _err(rid, 5011, str(e))
@@ -1391,6 +1673,8 @@ def _(rid, params: dict) -> dict:
     if err:
         return err
     db = _get_db()
+    if db is None:
+        return _db_unavailable_error(rid, code=5008)
     old_key = session["session_key"]
     with session["history_lock"]:
         history = [dict(msg) for msg in session.get("history", [])]
@@ -1403,10 +1687,20 @@ def _(rid, params: dict) -> dict:
             title = branch_name
         else:
             current = db.get_session_title(old_key) or "branch"
-            title = db.get_next_title_in_lineage(current) if hasattr(db, "get_next_title_in_lineage") else f"{current} (branch)"
-        db.create_session(new_key, source="tui", model=_resolve_model(), parent_session_id=old_key)
+            title = (
+                db.get_next_title_in_lineage(current)
+                if hasattr(db, "get_next_title_in_lineage")
+                else f"{current} (branch)"
+            )
+        db.create_session(
+            new_key, source="tui", model=_resolve_model(), parent_session_id=old_key
+        )
         for msg in history:
-            db.append_message(session_id=new_key, role=msg.get("role", "user"), content=msg.get("content"))
+            db.append_message(
+                session_id=new_key,
+                role=msg.get("role", "user"),
+                content=msg.get("content"),
+            )
         db.set_session_title(new_key, title)
     except Exception as e:
         return _err(rid, 5008, f"branch failed: {e}")
@@ -1417,7 +1711,9 @@ def _(rid, params: dict) -> dict:
             agent = _make_agent(new_sid, new_key, session_id=new_key)
         finally:
             _clear_session_context(tokens)
-        _init_session(new_sid, new_key, agent, list(history), cols=session.get("cols", 80))
+        _init_session(
+            new_sid, new_key, agent, list(history), cols=session.get("cols", 80)
+        )
     except Exception as e:
         return _err(rid, 5000, f"agent init failed on branch: {e}")
     return _ok(rid, {"session_id": new_sid, "title": title, "parent": old_key})
@@ -1437,12 +1733,239 @@ def _(rid, params: dict) -> dict:
     _clear_pending(params.get("session_id", ""))
     try:
         from tools.approval import resolve_gateway_approval
+
         resolve_gateway_approval(session["session_key"], "deny", resolve_all=True)
     except Exception:
         pass
     return _ok(rid, {"status": "interrupted"})
 
 
+# ── Delegation: subagent tree observability + controls ───────────────
+# Powers the TUI's /agents overlay (see ui-tui/src/components/agentsOverlay).
+# The registry lives in tools/delegate_tool — these handlers are thin
+# translators between JSON-RPC and the Python API.
+
+
+@method("delegation.status")
+def _(rid, params: dict) -> dict:
+    from tools.delegate_tool import (
+        is_spawn_paused,
+        list_active_subagents,
+        _get_max_concurrent_children,
+        _get_max_spawn_depth,
+    )
+
+    return _ok(
+        rid,
+        {
+            "active": list_active_subagents(),
+            "paused": is_spawn_paused(),
+            "max_spawn_depth": _get_max_spawn_depth(),
+            "max_concurrent_children": _get_max_concurrent_children(),
+        },
+    )
+
+
+@method("delegation.pause")
+def _(rid, params: dict) -> dict:
+    from tools.delegate_tool import set_spawn_paused
+
+    paused = bool(params.get("paused", True))
+    return _ok(rid, {"paused": set_spawn_paused(paused)})
+
+
+@method("subagent.interrupt")
+def _(rid, params: dict) -> dict:
+    from tools.delegate_tool import interrupt_subagent
+
+    subagent_id = str(params.get("subagent_id") or "").strip()
+    if not subagent_id:
+        return _err(rid, 4000, "subagent_id required")
+    ok = interrupt_subagent(subagent_id)
+    return _ok(rid, {"found": ok, "subagent_id": subagent_id})
+
+
+# ── Spawn-tree snapshots: TUI-written, disk-persisted ────────────────
+# The TUI is the source of truth for subagent state (it assembles payloads
+# from the event stream).  On turn-complete it posts the final tree here;
+# /replay and /replay-diff fetch past snapshots by session_id + filename.
+#
+# Layout:  $HERMES_HOME/spawn-trees/<session_id>/<timestamp>.json
+# Each file contains { session_id, started_at, finished_at, subagents: [...] }.
+
+
+def _spawn_trees_root():
+    from pathlib import Path as _P
+    from hermes_constants import get_hermes_home
+
+    root = get_hermes_home() / "spawn-trees"
+    root.mkdir(parents=True, exist_ok=True)
+    return root
+
+
+def _spawn_tree_session_dir(session_id: str):
+    safe = (
+        "".join(c if c.isalnum() or c in "-_" else "_" for c in session_id) or "unknown"
+    )
+    d = _spawn_trees_root() / safe
+    d.mkdir(parents=True, exist_ok=True)
+    return d
+
+
+# Per-session append-only index of lightweight snapshot metadata.  Read by
+# `spawn_tree.list` so scanning doesn't require reading every full snapshot
+# file (Copilot review on #14045).  One JSON object per line.
+_SPAWN_TREE_INDEX = "_index.jsonl"
+
+
+def _append_spawn_tree_index(session_dir, entry: dict) -> None:
+    try:
+        with (session_dir / _SPAWN_TREE_INDEX).open("a", encoding="utf-8") as f:
+            f.write(json.dumps(entry, ensure_ascii=False) + "\n")
+    except OSError as exc:
+        # Index is a cache — losing a line just means list() falls back
+        # to a directory scan for that entry.  Never block the save.
+        logger.debug("spawn_tree index append failed: %s", exc)
+
+
+def _read_spawn_tree_index(session_dir) -> list[dict]:
+    index_path = session_dir / _SPAWN_TREE_INDEX
+    if not index_path.exists():
+        return []
+    out: list[dict] = []
+    try:
+        with index_path.open("r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    out.append(json.loads(line))
+                except json.JSONDecodeError:
+                    continue
+    except OSError:
+        return []
+    return out
+
+
+@method("spawn_tree.save")
+def _(rid, params: dict) -> dict:
+    session_id = str(params.get("session_id") or "").strip()
+    subagents = params.get("subagents") or []
+    if not isinstance(subagents, list) or not subagents:
+        return _err(rid, 4000, "subagents list required")
+
+    from datetime import datetime
+
+    started_at = params.get("started_at")
+    finished_at = params.get("finished_at") or time.time()
+    label = str(params.get("label") or "")
+    ts = datetime.utcfromtimestamp(float(finished_at)).strftime("%Y%m%dT%H%M%S")
+    fname = f"{ts}.json"
+    d = _spawn_tree_session_dir(session_id or "default")
+    path = d / fname
+    try:
+        payload = {
+            "session_id": session_id,
+            "started_at": float(started_at) if started_at else None,
+            "finished_at": float(finished_at),
+            "label": label,
+            "subagents": subagents,
+        }
+        path.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
+    except OSError as exc:
+        return _err(rid, 5000, f"spawn_tree.save failed: {exc}")
+
+    _append_spawn_tree_index(
+        d,
+        {
+            "path": str(path),
+            "session_id": session_id,
+            "started_at": payload["started_at"],
+            "finished_at": payload["finished_at"],
+            "label": label,
+            "count": len(subagents),
+        },
+    )
+
+    return _ok(rid, {"path": str(path), "session_id": session_id})
+
+
+@method("spawn_tree.list")
+def _(rid, params: dict) -> dict:
+    session_id = str(params.get("session_id") or "").strip()
+    limit = int(params.get("limit") or 50)
+    cross_session = bool(params.get("cross_session"))
+
+    if cross_session:
+        root = _spawn_trees_root()
+        roots = [p for p in root.iterdir() if p.is_dir()]
+    else:
+        roots = [_spawn_tree_session_dir(session_id or "default")]
+
+    entries: list[dict] = []
+    for d in roots:
+        indexed = _read_spawn_tree_index(d)
+        if indexed:
+            # Skip index entries whose snapshot file was manually deleted.
+            entries.extend(
+                e for e in indexed if (p := e.get("path")) and Path(p).exists()
+            )
+            continue
+
+        # Fallback for legacy (pre-index) sessions: full scan.  O(N) reads
+        # but only runs once per session until the next save writes the index.
+        for p in d.glob("*.json"):
+            if p.name == _SPAWN_TREE_INDEX:
+                continue
+            try:
+                stat = p.stat()
+                try:
+                    raw = json.loads(p.read_text(encoding="utf-8"))
+                except Exception:
+                    raw = {}
+                subagents = raw.get("subagents") or []
+                entries.append(
+                    {
+                        "path": str(p),
+                        "session_id": raw.get("session_id") or d.name,
+                        "finished_at": raw.get("finished_at") or stat.st_mtime,
+                        "started_at": raw.get("started_at"),
+                        "label": raw.get("label") or "",
+                        "count": len(subagents) if isinstance(subagents, list) else 0,
+                    }
+                )
+            except OSError:
+                continue
+
+    entries.sort(key=lambda e: e.get("finished_at") or 0, reverse=True)
+    return _ok(rid, {"entries": entries[:limit]})
+
+
+@method("spawn_tree.load")
+def _(rid, params: dict) -> dict:
+    from pathlib import Path
+
+    raw_path = str(params.get("path") or "").strip()
+    if not raw_path:
+        return _err(rid, 4000, "path required")
+
+    # Reject paths escaping the spawn-trees root.
+    root = _spawn_trees_root().resolve()
+    try:
+        resolved = Path(raw_path).resolve()
+        resolved.relative_to(root)
+    except (ValueError, OSError) as exc:
+        return _err(rid, 4030, f"path outside spawn-trees root: {exc}")
+
+    try:
+        payload = json.loads(resolved.read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError) as exc:
+        return _err(rid, 5000, f"spawn_tree.load failed: {exc}")
+
+    return _ok(rid, payload)
+
+
 @method("session.steer")
 def _(rid, params: dict) -> dict:
     """Inject a user message into the next tool result without interrupting.
@@ -1479,6 +2002,7 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: prompt ──────────────────────────────────────────────────
 
+
 @method("prompt.submit")
 def _(rid, params: dict) -> dict:
     sid, text = params.get("session_id", ""), params.get("text", "")
@@ -1500,7 +2024,11 @@ def _(rid, params: dict) -> dict:
         approval_token = None
         session_tokens = []
         try:
-            from tools.approval import reset_current_session_key, set_current_session_key
+            from tools.approval import (
+                reset_current_session_key,
+                set_current_session_key,
+            )
+
             approval_token = set_current_session_key(session["session_key"])
             session_tokens = _set_session_context(session["session_key"])
             cols = session.get("cols", 80)
@@ -1523,7 +2051,14 @@ def _(rid, params: dict) -> dict:
                     context_length=ctx_len,
                 )
                 if ctx.blocked:
-                    _emit("error", sid, {"message": "\n".join(ctx.warnings) or "Context injection refused."})
+                    _emit(
+                        "error",
+                        sid,
+                        {
+                            "message": "\n".join(ctx.warnings)
+                            or "Context injection refused."
+                        },
+                    )
                     return
                 prompt = ctx.message
 
@@ -1536,7 +2071,8 @@ def _(rid, params: dict) -> dict:
                 _emit("message.delta", sid, payload)
 
             result = agent.run_conversation(
-                prompt, conversation_history=list(history),
+                prompt,
+                conversation_history=list(history),
                 stream_callback=_stream,
             )
 
@@ -1569,7 +2105,11 @@ def _(rid, params: dict) -> dict:
                                 "but was not saved to session history."
                             )
                 raw = result.get("final_response", "")
-                status = "interrupted" if result.get("interrupted") else "error" if result.get("error") else "complete"
+                status = (
+                    "interrupted"
+                    if result.get("interrupted")
+                    else "error" if result.get("error") else "complete"
+                )
                 lr = result.get("last_reasoning")
                 if isinstance(lr, str) and lr.strip():
                     last_reasoning = lr.strip()
@@ -1608,7 +2148,6 @@ def _(rid, params: dict) -> dict:
     if err:
         return err
     try:
-        from datetime import datetime
         from hermes_cli.clipboard import has_clipboard_image, save_clipboard_image
     except Exception as e:
         return _err(rid, 5027, f"clipboard unavailable: {e}")
@@ -1616,12 +2155,19 @@ def _(rid, params: dict) -> dict:
     session["image_counter"] = session.get("image_counter", 0) + 1
     img_dir = _hermes_home / "images"
     img_dir.mkdir(parents=True, exist_ok=True)
-    img_path = img_dir / f"clip_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{session['image_counter']}.png"
+    img_path = (
+        img_dir
+        / f"clip_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{session['image_counter']}.png"
+    )
 
     # Save-first: mirrors CLI keybinding path; more robust than has_image() precheck
     if not save_clipboard_image(img_path):
         session["image_counter"] = max(0, session["image_counter"] - 1)
-        msg = "Clipboard has image but extraction failed" if has_clipboard_image() else "No image found in clipboard"
+        msg = (
+            "Clipboard has image but extraction failed"
+            if has_clipboard_image()
+            else "No image found in clipboard"
+        )
         return _ok(rid, {"attached": False, "message": msg})
 
     session.setdefault("attached_images", []).append(str(img_path))
@@ -1645,12 +2191,22 @@ def _(rid, params: dict) -> dict:
     if not raw:
         return _err(rid, 4015, "path required")
     try:
-        from cli import _IMAGE_EXTENSIONS, _resolve_attachment_path, _split_path_input
+        from cli import (
+            _IMAGE_EXTENSIONS,
+            _detect_file_drop,
+            _resolve_attachment_path,
+            _split_path_input,
+        )
 
-        path_token, remainder = _split_path_input(raw)
-        image_path = _resolve_attachment_path(path_token)
-        if image_path is None:
-            return _err(rid, 4016, f"image not found: {path_token}")
+        dropped = _detect_file_drop(raw)
+        if dropped:
+            image_path = dropped["path"]
+            remainder = dropped["remainder"]
+        else:
+            path_token, remainder = _split_path_input(raw)
+            image_path = _resolve_attachment_path(path_token)
+            if image_path is None:
+                return _err(rid, 4016, f"image not found: {path_token}")
         if image_path.suffix.lower() not in _IMAGE_EXTENSIONS:
             return _err(rid, 4016, f"unsupported image: {image_path.name}")
         session.setdefault("attached_images", []).append(str(image_path))
@@ -1699,7 +2255,9 @@ def _(rid, params: dict) -> dict:
                 },
             )
 
-        text = f"[User attached file: {drop_path}]" + (f"\n{remainder}" if remainder else "")
+        text = f"[User attached file: {drop_path}]" + (
+            f"\n{remainder}" if remainder else ""
+        )
         return _ok(
             rid,
             {
@@ -1728,14 +2286,31 @@ def _(rid, params: dict) -> dict:
         session_tokens = _set_session_context(task_id)
         try:
             from run_agent import AIAgent
-            result = AIAgent(**_background_agent_kwargs(session["agent"], task_id)).run_conversation(
+
+            result = AIAgent(
+                **_background_agent_kwargs(session["agent"], task_id)
+            ).run_conversation(
                 user_message=text,
                 task_id=task_id,
             )
-            _emit("background.complete", parent, {"task_id": task_id,
-                  "text": result.get("final_response", str(result)) if isinstance(result, dict) else str(result)})
+            _emit(
+                "background.complete",
+                parent,
+                {
+                    "task_id": task_id,
+                    "text": (
+                        result.get("final_response", str(result))
+                        if isinstance(result, dict)
+                        else str(result)
+                    ),
+                },
+            )
         except Exception as e:
-            _emit("background.complete", parent, {"task_id": task_id, "text": f"error: {e}"})
+            _emit(
+                "background.complete",
+                parent,
+                {"task_id": task_id, "text": f"error: {e}"},
+            )
         finally:
             _clear_session_context(session_tokens)
 
@@ -1757,9 +2332,25 @@ def _(rid, params: dict) -> dict:
         session_tokens = _set_session_context(session["session_key"])
         try:
             from run_agent import AIAgent
-            result = AIAgent(model=_resolve_model(), quiet_mode=True, platform="tui",
-                             max_iterations=8, enabled_toolsets=[]).run_conversation(text, conversation_history=snapshot)
-            _emit("btw.complete", sid, {"text": result.get("final_response", str(result)) if isinstance(result, dict) else str(result)})
+
+            result = AIAgent(
+                model=_resolve_model(),
+                quiet_mode=True,
+                platform="tui",
+                max_iterations=8,
+                enabled_toolsets=[],
+            ).run_conversation(text, conversation_history=snapshot)
+            _emit(
+                "btw.complete",
+                sid,
+                {
+                    "text": (
+                        result.get("final_response", str(result))
+                        if isinstance(result, dict)
+                        else str(result)
+                    )
+                },
+            )
         except Exception as e:
             _emit("btw.complete", sid, {"text": f"error: {e}"})
         finally:
@@ -1771,6 +2362,7 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: respond ─────────────────────────────────────────────────
 
+
 def _respond(rid, params, key):
     r = params.get("request_id", "")
     entry = _pending.get(r)
@@ -1786,14 +2378,17 @@ def _respond(rid, params, key):
 def _(rid, params: dict) -> dict:
     return _respond(rid, params, "answer")
 
+
 @method("sudo.respond")
 def _(rid, params: dict) -> dict:
     return _respond(rid, params, "password")
 
+
 @method("secret.respond")
 def _(rid, params: dict) -> dict:
     return _respond(rid, params, "value")
 
+
 @method("approval.respond")
 def _(rid, params: dict) -> dict:
     session, err = _sess(params, rid)
@@ -1801,14 +2396,24 @@ def _(rid, params: dict) -> dict:
         return err
     try:
         from tools.approval import resolve_gateway_approval
-        return _ok(rid, {"resolved": resolve_gateway_approval(
-            session["session_key"], params.get("choice", "deny"), resolve_all=params.get("all", False))})
+
+        return _ok(
+            rid,
+            {
+                "resolved": resolve_gateway_approval(
+                    session["session_key"],
+                    params.get("choice", "deny"),
+                    resolve_all=params.get("all", False),
+                )
+            },
+        )
     except Exception as e:
         return _err(rid, 5004, str(e))
 
 
 # ── Methods: config ──────────────────────────────────────────────────
 
+
 @method("config.set")
 def _(rid, params: dict) -> dict:
     key, value = params.get("key", ""), params.get("value", "")
@@ -1829,19 +2434,29 @@ def _(rid, params: dict) -> dict:
                 # with the gateway's running-agent /model guard.
                 if session.get("running"):
                     return _err(
-                        rid, 4009,
+                        rid,
+                        4009,
                         "session busy — /interrupt the current turn before switching models",
                     )
-                result = _apply_model_switch(params.get("session_id", ""), session, value)
+                result = _apply_model_switch(
+                    params.get("session_id", ""), session, value
+                )
             else:
                 result = _apply_model_switch("", {"agent": None}, value)
-            return _ok(rid, {"key": key, "value": result["value"], "warning": result["warning"]})
+            return _ok(
+                rid,
+                {"key": key, "value": result["value"], "warning": result["warning"]},
+            )
         except Exception as e:
             return _err(rid, 5001, str(e))
 
     if key == "verbose":
         cycle = ["off", "new", "all", "verbose"]
-        cur = session.get("tool_progress_mode", _load_tool_progress_mode()) if session else _load_tool_progress_mode()
+        cur = (
+            session.get("tool_progress_mode", _load_tool_progress_mode())
+            if session
+            else _load_tool_progress_mode()
+        )
         if value and value != "cycle":
             nv = str(value).strip().lower()
             if nv not in cycle:
@@ -1929,15 +2544,16 @@ def _(rid, params: dict) -> dict:
             return _err(rid, 4002, f"unknown thinking_mode: {value}")
         _write_config_key("display.thinking_mode", nv)
         # Backward compatibility bridge: keep details_mode aligned.
-        _write_config_key("display.details_mode", "expanded" if nv == "full" else "collapsed")
+        _write_config_key(
+            "display.details_mode", "expanded" if nv == "full" else "collapsed"
+        )
         return _ok(rid, {"key": key, "value": nv})
 
-    if key in ("compact", "statusbar"):
+    if key == "compact":
         raw = str(value or "").strip().lower()
         cfg0 = _load_cfg()
         d0 = cfg0.get("display") if isinstance(cfg0.get("display"), dict) else {}
-        def_key = "tui_compact" if key == "compact" else "tui_statusbar"
-        cur_b = bool(d0.get(def_key, False if key == "compact" else True))
+        cur_b = bool(d0.get("tui_compact", False))
         if raw in ("", "toggle"):
             nv_b = not cur_b
         elif raw == "on":
@@ -1945,10 +2561,27 @@ def _(rid, params: dict) -> dict:
         elif raw == "off":
             nv_b = False
         else:
-            return _err(rid, 4002, f"unknown {key} value: {value}")
-        _write_config_key(f"display.{def_key}", nv_b)
-        out = "on" if nv_b else "off"
-        return _ok(rid, {"key": key, "value": out})
+            return _err(rid, 4002, f"unknown compact value: {value}")
+        _write_config_key("display.tui_compact", nv_b)
+        return _ok(rid, {"key": key, "value": "on" if nv_b else "off"})
+
+    if key == "statusbar":
+        raw = str(value or "").strip().lower()
+        display = _load_cfg().get("display")
+        d0 = display if isinstance(display, dict) else {}
+        current = _coerce_statusbar(d0.get("tui_statusbar", "top"))
+
+        if raw in ("", "toggle"):
+            nv = "top" if current == "off" else "off"
+        elif raw == "on":
+            nv = "top"
+        elif raw in _STATUSBAR_MODES:
+            nv = raw
+        else:
+            return _err(rid, 4002, f"unknown statusbar value: {value}")
+
+        _write_config_key("display.tui_statusbar", nv)
+        return _ok(rid, {"key": key, "value": nv})
 
     if key in ("prompt", "personality", "skin"):
         try:
@@ -1967,7 +2600,9 @@ def _(rid, params: dict) -> dict:
                 _write_config_key("display.personality", pname)
                 _write_config_key("agent.system_prompt", new_prompt)
                 nv = str(value or "default")
-                history_reset, info = _apply_personality_to_session(sid_key, session, new_prompt)
+                history_reset, info = _apply_personality_to_session(
+                    sid_key, session, new_prompt
+                )
             else:
                 _write_config_key(f"display.{key}", value)
                 nv = value
@@ -1991,31 +2626,56 @@ def _(rid, params: dict) -> dict:
     if key == "provider":
         try:
             from hermes_cli.models import list_available_providers, normalize_provider
+
             model = _resolve_model()
             parts = model.split("/", 1)
-            return _ok(rid, {"model": model, "provider": normalize_provider(parts[0]) if len(parts) > 1 else "unknown",
-                             "providers": list_available_providers()})
+            return _ok(
+                rid,
+                {
+                    "model": model,
+                    "provider": (
+                        normalize_provider(parts[0]) if len(parts) > 1 else "unknown"
+                    ),
+                    "providers": list_available_providers(),
+                },
+            )
         except Exception as e:
             return _err(rid, 5013, str(e))
     if key == "profile":
         from hermes_constants import display_hermes_home
+
         return _ok(rid, {"home": str(_hermes_home), "display": display_hermes_home()})
     if key == "full":
         return _ok(rid, {"config": _load_cfg()})
     if key == "prompt":
         return _ok(rid, {"prompt": _load_cfg().get("custom_prompt", "")})
     if key == "skin":
-        return _ok(rid, {"value": _load_cfg().get("display", {}).get("skin", "default")})
+        return _ok(
+            rid, {"value": _load_cfg().get("display", {}).get("skin", "default")}
+        )
     if key == "personality":
-        return _ok(rid, {"value": _load_cfg().get("display", {}).get("personality", "default")})
+        return _ok(
+            rid, {"value": _load_cfg().get("display", {}).get("personality", "default")}
+        )
     if key == "reasoning":
         cfg = _load_cfg()
         effort = str(cfg.get("agent", {}).get("reasoning_effort", "medium") or "medium")
-        display = "show" if bool(cfg.get("display", {}).get("show_reasoning", False)) else "hide"
+        display = (
+            "show"
+            if bool(cfg.get("display", {}).get("show_reasoning", False))
+            else "hide"
+        )
         return _ok(rid, {"value": effort, "display": display})
     if key == "details_mode":
         allowed_dm = frozenset({"hidden", "collapsed", "expanded"})
-        raw = str(_load_cfg().get("display", {}).get("details_mode", "collapsed") or "collapsed").strip().lower()
+        raw = (
+            str(
+                _load_cfg().get("display", {}).get("details_mode", "collapsed")
+                or "collapsed"
+            )
+            .strip()
+            .lower()
+        )
         nv = raw if raw in allowed_dm else "collapsed"
         return _ok(rid, {"value": nv})
     if key == "thinking_mode":
@@ -2025,19 +2685,31 @@ def _(rid, params: dict) -> dict:
         if raw in allowed_tm:
             nv = raw
         else:
-            dm = str(cfg.get("display", {}).get("details_mode", "collapsed") or "collapsed").strip().lower()
+            dm = (
+                str(
+                    cfg.get("display", {}).get("details_mode", "collapsed")
+                    or "collapsed"
+                )
+                .strip()
+                .lower()
+            )
             nv = "full" if dm == "expanded" else "collapsed"
         return _ok(rid, {"value": nv})
     if key == "compact":
         on = bool(_load_cfg().get("display", {}).get("tui_compact", False))
         return _ok(rid, {"value": "on" if on else "off"})
     if key == "statusbar":
-        on = bool(_load_cfg().get("display", {}).get("tui_statusbar", True))
-        return _ok(rid, {"value": "on" if on else "off"})
+        display = _load_cfg().get("display")
+        raw = (
+            display.get("tui_statusbar", "top") if isinstance(display, dict) else "top"
+        )
+        return _ok(rid, {"value": _coerce_statusbar(raw)})
     if key == "mtime":
         cfg_path = _hermes_home / "config.yaml"
         try:
-            return _ok(rid, {"mtime": cfg_path.stat().st_mtime if cfg_path.exists() else 0})
+            return _ok(
+                rid, {"mtime": cfg_path.stat().st_mtime if cfg_path.exists() else 0}
+            )
         except Exception:
             return _ok(rid, {"mtime": 0})
     return _err(rid, 4002, f"unknown config key: {key}")
@@ -2047,6 +2719,7 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from hermes_cli.main import _has_any_provider_configured
+
         return _ok(rid, {"provider_configured": bool(_has_any_provider_configured())})
     except Exception as e:
         return _err(rid, 5016, str(e))
@@ -2054,10 +2727,12 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: tools & system ──────────────────────────────────────────
 
+
 @method("process.stop")
 def _(rid, params: dict) -> dict:
     try:
         from tools.process_registry import process_registry
+
         return _ok(rid, {"killed": process_registry.kill_all()})
     except Exception as e:
         return _err(rid, 5010, str(e))
@@ -2068,6 +2743,7 @@ def _(rid, params: dict) -> dict:
     session = _sessions.get(params.get("session_id", ""))
     try:
         from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools
+
         shutdown_mcp_servers()
         discover_mcp_tools()
         if session:
@@ -2080,9 +2756,17 @@ def _(rid, params: dict) -> dict:
         return _err(rid, 5015, str(e))
 
 
-_TUI_HIDDEN: frozenset[str] = frozenset({
-    "sethome", "set-home", "update", "commands", "status", "approve", "deny",
-})
+_TUI_HIDDEN: frozenset[str] = frozenset(
+    {
+        "sethome",
+        "set-home",
+        "update",
+        "commands",
+        "status",
+        "approve",
+        "deny",
+    }
+)
 
 _TUI_EXTRA: list[tuple[str, str, str]] = [
     ("/compact", "Toggle compact display mode", "TUI"),
@@ -2092,16 +2776,26 @@ _TUI_EXTRA: list[tuple[str, str, str]] = [
 # Commands that queue messages onto _pending_input in the CLI.
 # In the TUI the slash worker subprocess has no reader for that queue,
 # so slash.exec rejects them → TUI falls through to command.dispatch.
-_PENDING_INPUT_COMMANDS: frozenset[str] = frozenset({
-    "retry", "queue", "q", "steer", "plan",
-})
+_PENDING_INPUT_COMMANDS: frozenset[str] = frozenset(
+    {
+        "retry",
+        "queue",
+        "q",
+        "steer",
+        "plan",
+    }
+)
 
 
 @method("commands.catalog")
 def _(rid, params: dict) -> dict:
     """Registry-backed slash metadata for the TUI — categorized, no aliases."""
     try:
-        from hermes_cli.commands import COMMAND_REGISTRY, SUBCOMMANDS, _build_description
+        from hermes_cli.commands import (
+            COMMAND_REGISTRY,
+            SUBCOMMANDS,
+            _build_description,
+        )
 
         all_pairs: list[list[str]] = []
         canon: dict[str, str] = {}
@@ -2165,6 +2859,7 @@ def _(rid, params: dict) -> dict:
         skill_count = 0
         try:
             from agent.skill_commands import scan_skill_commands
+
             for k, info in sorted(scan_skill_commands().items()):
                 d = str(info.get("description", "Skill"))
                 all_pairs.append([k, d[:120] + ("…" if len(d) > 120 else "")])
@@ -2176,14 +2871,17 @@ def _(rid, params: dict) -> dict:
             categories.append({"name": cat, "pairs": cat_map[cat]})
 
         sub = {k: v[:] for k, v in SUBCOMMANDS.items()}
-        return _ok(rid, {
-            "pairs": all_pairs,
-            "sub": sub,
-            "canon": canon,
-            "categories": categories,
-            "skill_count": skill_count,
-            "warning": warning,
-        })
+        return _ok(
+            rid,
+            {
+                "pairs": all_pairs,
+                "sub": sub,
+                "canon": canon,
+                "categories": categories,
+                "skill_count": skill_count,
+                "warning": warning,
+            },
+        )
     except Exception as e:
         return _err(rid, 5020, str(e))
 
@@ -2224,7 +2922,9 @@ def _(rid, params: dict) -> dict:
         )
         parts = [r.stdout or "", r.stderr or ""]
         out = "\n".join(p for p in parts if p).strip() or "(no output)"
-        return _ok(rid, {"blocked": False, "code": r.returncode, "output": out[:48_000]})
+        return _ok(
+            rid, {"blocked": False, "code": r.returncode, "output": out[:48_000]}
+        )
     except subprocess.TimeoutExpired:
         return _err(rid, 5016, "cli.exec: timeout")
     except Exception as e:
@@ -2235,9 +2935,17 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from hermes_cli.commands import resolve_command
+
         r = resolve_command(params.get("name", ""))
         if r:
-            return _ok(rid, {"canonical": r.name, "description": r.description, "category": r.category})
+            return _ok(
+                rid,
+                {
+                    "canonical": r.name,
+                    "description": r.description,
+                    "category": r.category,
+                },
+            )
         return _err(rid, 4011, f"unknown command: {params.get('name')}")
     except Exception as e:
         return _err(rid, 5012, str(e))
@@ -2246,6 +2954,7 @@ def _(rid, params: dict) -> dict:
 def _resolve_name(name: str) -> str:
     try:
         from hermes_cli.commands import resolve_command
+
         r = resolve_command(name)
         return r.name if r else name
     except Exception:
@@ -2264,16 +2973,31 @@ def _(rid, params: dict) -> dict:
     if name in qcmds:
         qc = qcmds[name]
         if qc.get("type") == "exec":
-            r = subprocess.run(qc.get("command", ""), shell=True, capture_output=True, text=True, timeout=30)
-            output = ((r.stdout or "") + ("\n" if r.stdout and r.stderr else "") + (r.stderr or "")).strip()[:4000]
+            r = subprocess.run(
+                qc.get("command", ""),
+                shell=True,
+                capture_output=True,
+                text=True,
+                timeout=30,
+            )
+            output = (
+                (r.stdout or "")
+                + ("\n" if r.stdout and r.stderr else "")
+                + (r.stderr or "")
+            ).strip()[:4000]
             if r.returncode != 0:
-                return _err(rid, 4018, output or f"quick command failed with exit code {r.returncode}")
+                return _err(
+                    rid,
+                    4018,
+                    output or f"quick command failed with exit code {r.returncode}",
+                )
             return _ok(rid, {"type": "exec", "output": output})
         if qc.get("type") == "alias":
             return _ok(rid, {"type": "alias", "target": qc.get("target", "")})
 
     try:
         from hermes_cli.plugins import get_plugin_command_handler
+
         handler = get_plugin_command_handler(name)
         if handler:
             return _ok(rid, {"type": "plugin", "output": str(handler(arg) or "")})
@@ -2281,13 +3005,26 @@ def _(rid, params: dict) -> dict:
         pass
 
     try:
-        from agent.skill_commands import scan_skill_commands, build_skill_invocation_message
+        from agent.skill_commands import (
+            scan_skill_commands,
+            build_skill_invocation_message,
+        )
+
         cmds = scan_skill_commands()
         key = f"/{name}"
         if key in cmds:
-            msg = build_skill_invocation_message(key, arg, task_id=session.get("session_key", "") if session else "")
+            msg = build_skill_invocation_message(
+                key, arg, task_id=session.get("session_key", "") if session else ""
+            )
             if msg:
-                return _ok(rid, {"type": "skill", "message": msg, "name": cmds[key].get("name", name)})
+                return _ok(
+                    rid,
+                    {
+                        "type": "skill",
+                        "message": msg,
+                        "name": cmds[key].get("name", name),
+                    },
+                )
     except Exception:
         pass
 
@@ -2304,7 +3041,9 @@ def _(rid, params: dict) -> dict:
         if not session:
             return _err(rid, 4001, "no active session to retry")
         if session.get("running"):
-            return _err(rid, 4009, "session busy — /interrupt the current turn before /retry")
+            return _err(
+                rid, 4009, "session busy — /interrupt the current turn before /retry"
+            )
         history = session.get("history", [])
         if not history:
             return _err(rid, 4018, "no previous user message to retry")
@@ -2319,7 +3058,9 @@ def _(rid, params: dict) -> dict:
         content = history[last_user_idx].get("content", "")
         if isinstance(content, list):
             content = " ".join(
-                p.get("text", "") for p in content if isinstance(p, dict) and p.get("type") == "text"
+                p.get("text", "")
+                for p in content
+                if isinstance(p, dict) and p.get("type") == "text"
             )
         if not content:
             return _err(rid, 4018, "last user message is empty")
@@ -2338,7 +3079,13 @@ def _(rid, params: dict) -> dict:
             try:
                 accepted = agent.steer(arg)
                 if accepted:
-                    return _ok(rid, {"type": "exec", "output": f"⏩ Steer queued — arrives after the next tool call: {arg[:80]}{'...' if len(arg) > 80 else ''}"})
+                    return _ok(
+                        rid,
+                        {
+                            "type": "exec",
+                            "output": f"⏩ Steer queued — arrives after the next tool call: {arg[:80]}{'...' if len(arg) > 80 else ''}",
+                        },
+                    )
             except Exception:
                 pass
         # Fallback: no active run, treat as next-turn message
@@ -2346,11 +3093,16 @@ def _(rid, params: dict) -> dict:
 
     if name == "plan":
         try:
-            from agent.skill_commands import build_skill_invocation_message as _bsim, build_plan_path
+            from agent.skill_commands import (
+                build_skill_invocation_message as _bsim,
+                build_plan_path,
+            )
+
             user_instruction = arg or ""
             plan_path = build_plan_path(user_instruction)
             msg = _bsim(
-                "/plan", user_instruction,
+                "/plan",
+                user_instruction,
                 task_id=session.get("session_key", "") if session else "",
                 runtime_note=(
                     "Save the markdown plan with write_file to this exact relative path "
@@ -2369,6 +3121,7 @@ def _(rid, params: dict) -> dict:
 
 _paste_counter = 0
 
+
 @method("paste.collapse")
 def _(rid, params: dict) -> dict:
     global _paste_counter
@@ -2377,20 +3130,28 @@ def _(rid, params: dict) -> dict:
         return _err(rid, 4004, "empty paste")
 
     _paste_counter += 1
-    line_count = text.count('\n') + 1
+    line_count = text.count("\n") + 1
     paste_dir = _hermes_home / "pastes"
     paste_dir.mkdir(parents=True, exist_ok=True)
 
     from datetime import datetime
-    paste_file = paste_dir / f"paste_{_paste_counter}_{datetime.now().strftime('%H%M%S')}.txt"
+
+    paste_file = (
+        paste_dir / f"paste_{_paste_counter}_{datetime.now().strftime('%H%M%S')}.txt"
+    )
     paste_file.write_text(text, encoding="utf-8")
 
-    placeholder = f"[Pasted text #{_paste_counter}: {line_count} lines \u2192 {paste_file}]"
-    return _ok(rid, {"placeholder": placeholder, "path": str(paste_file), "lines": line_count})
+    placeholder = (
+        f"[Pasted text #{_paste_counter}: {line_count} lines \u2192 {paste_file}]"
+    )
+    return _ok(
+        rid, {"placeholder": placeholder, "path": str(paste_file), "lines": line_count}
+    )
 
 
 # ── Methods: complete ─────────────────────────────────────────────────
 
+
 @method("complete.path")
 def _(rid, params: dict) -> dict:
     word = params.get("word", "")
@@ -2413,15 +3174,22 @@ def _(rid, params: dict) -> dict:
             ]
             return _ok(rid, {"items": items})
 
-        if is_context and query.startswith(("file:", "folder:")):
-            prefix_tag = query.split(":", 1)[0]
-            path_part = query.split(":", 1)[1] or "."
+        # Accept both `@folder:path` and the bare `@folder` form so the user
+        # sees directory listings as soon as they finish typing the keyword,
+        # without first accepting the static `@folder:` hint.
+        if is_context and query in ("file", "folder"):
+            prefix_tag, path_part = query, ""
+        elif is_context and query.startswith(("file:", "folder:")):
+            prefix_tag, _, tail = query.partition(":")
+            path_part = tail
         else:
             prefix_tag = ""
-            path_part = query if not is_context else query
+            path_part = query if is_context else query
 
-        expanded = _normalize_completion_path(path_part)
-        if expanded.endswith("/"):
+        expanded = _normalize_completion_path(path_part) if path_part else "."
+        if expanded == "." or not expanded:
+            search_dir, match = ".", ""
+        elif expanded.endswith("/"):
             search_dir, match = expanded, ""
         else:
             search_dir = os.path.dirname(expanded) or "."
@@ -2430,6 +3198,7 @@ def _(rid, params: dict) -> dict:
         if not os.path.isdir(search_dir):
             return _ok(rid, {"items": []})
 
+        want_dir = prefix_tag == "folder"
         match_lower = match.lower()
         for entry in sorted(os.listdir(search_dir)):
             if match and not entry.lower().startswith(match_lower):
@@ -2438,6 +3207,11 @@ def _(rid, params: dict) -> dict:
                 continue
             full = os.path.join(search_dir, entry)
             is_dir = os.path.isdir(full)
+            # Explicit `@folder:` / `@file:` — honour the user's filter.  Skip
+            # the opposite kind instead of auto-rewriting the completion tag,
+            # which used to defeat the prefix and let `@folder:` list files.
+            if prefix_tag and want_dir != is_dir:
+                continue
             rel = os.path.relpath(full)
             suffix = "/" if is_dir else ""
 
@@ -2453,7 +3227,13 @@ def _(rid, params: dict) -> dict:
             else:
                 text = rel + suffix
 
-            items.append({"text": text, "display": entry + suffix, "meta": "dir" if is_dir else ""})
+            items.append(
+                {
+                    "text": text,
+                    "display": entry + suffix,
+                    "meta": "dir" if is_dir else "",
+                }
+            )
             if len(items) >= 30:
                 break
     except Exception as e:
@@ -2475,22 +3255,40 @@ def _(rid, params: dict) -> dict:
 
         from agent.skill_commands import get_skill_commands
 
-        completer = SlashCommandCompleter(skill_commands_provider=lambda: get_skill_commands())
+        completer = SlashCommandCompleter(
+            skill_commands_provider=lambda: get_skill_commands()
+        )
         doc = Document(text, len(text))
         items = [
-            {"text": c.text, "display": c.display or c.text,
-             "meta": to_plain_text(c.display_meta) if c.display_meta else ""}
+            {
+                "text": c.text,
+                "display": c.display or c.text,
+                "meta": to_plain_text(c.display_meta) if c.display_meta else "",
+            }
             for c in completer.get_completions(doc, None)
         ][:30]
         text_lower = text.lower()
         extras = [
-            {"text": "/compact", "display": "/compact", "meta": "Toggle compact display mode"},
-            {"text": "/logs", "display": "/logs", "meta": "Show recent gateway log lines"},
+            {
+                "text": "/compact",
+                "display": "/compact",
+                "meta": "Toggle compact display mode",
+            },
+            {
+                "text": "/logs",
+                "display": "/logs",
+                "meta": "Show recent gateway log lines",
+            },
         ]
         for extra in extras:
-            if extra["text"].startswith(text_lower) and not any(item["text"] == extra["text"] for item in items):
+            if extra["text"].startswith(text_lower) and not any(
+                item["text"] == extra["text"] for item in items
+            ):
                 items.append(extra)
-        return _ok(rid, {"items": items, "replace_from": text.rfind(" ") + 1 if " " in text else 1})
+        return _ok(
+            rid,
+            {"items": items, "replace_from": text.rfind(" ") + 1 if " " in text else 1},
+        )
     except Exception as e:
         return _err(rid, 5020, str(e))
 
@@ -2513,11 +3311,24 @@ def _(rid, params: dict) -> dict:
         # TTS, embeddings, rerankers, image/video generators).
         providers = list_authenticated_providers(
             current_provider=current_provider,
-            user_providers=cfg.get("providers") if isinstance(cfg.get("providers"), dict) else {},
-            custom_providers=cfg.get("custom_providers") if isinstance(cfg.get("custom_providers"), list) else [],
+            user_providers=(
+                cfg.get("providers") if isinstance(cfg.get("providers"), dict) else {}
+            ),
+            custom_providers=(
+                cfg.get("custom_providers")
+                if isinstance(cfg.get("custom_providers"), list)
+                else []
+            ),
             max_models=50,
         )
-        return _ok(rid, {"providers": providers, "model": current_model, "provider": current_provider})
+        return _ok(
+            rid,
+            {
+                "providers": providers,
+                "model": current_model,
+                "provider": current_provider,
+            },
+        )
     except Exception as e:
         return _err(rid, 5033, str(e))
 
@@ -2530,7 +3341,11 @@ def _mirror_slash_side_effects(sid: str, session: dict, command: str) -> str:
     parts = command.lstrip("/").split(None, 1)
     if not parts:
         return ""
-    name, arg, agent = parts[0], (parts[1].strip() if len(parts) > 1 else ""), session.get("agent")
+    name, arg, agent = (
+        parts[0],
+        (parts[1].strip() if len(parts) > 1 else ""),
+        session.get("agent"),
+    )
 
     # Reject agent-mutating commands during an in-flight turn.  These
     # all do read-then-mutate on live agent/session state that the
@@ -2539,9 +3354,7 @@ def _mirror_slash_side_effects(sid: str, session: dict, command: str) -> str:
     # runner's running-agent /model guard.
     _MUTATES_WHILE_RUNNING = {"model", "personality", "prompt", "compress"}
     if name in _MUTATES_WHILE_RUNNING and session.get("running"):
-        return (
-            f"session busy — /interrupt the current turn before running /{name}"
-        )
+        return f"session busy — /interrupt the current turn before running /{name}"
 
     try:
         if name == "model" and arg and agent:
@@ -2570,6 +3383,7 @@ def _mirror_slash_side_effects(sid: str, session: dict, command: str) -> str:
             agent.reload_mcp_tools()
         elif name == "stop":
             from tools.process_registry import process_registry
+
             process_registry.kill_all()
     except Exception as e:
         return f"live session sync failed: {e}"
@@ -2596,20 +3410,28 @@ def _(rid, params: dict) -> dict:
     _cmd_base = _cmd_parts[0] if _cmd_parts else ""
 
     if _cmd_base in _PENDING_INPUT_COMMANDS:
-        return _err(rid, 4018, f"pending-input command: use command.dispatch for /{_cmd_base}")
+        return _err(
+            rid, 4018, f"pending-input command: use command.dispatch for /{_cmd_base}"
+        )
 
     try:
         from agent.skill_commands import get_skill_commands
+
         _cmd_key = f"/{_cmd_base}"
         if _cmd_key in get_skill_commands():
-            return _err(rid, 4018, f"skill command: use command.dispatch for {_cmd_key}")
+            return _err(
+                rid, 4018, f"skill command: use command.dispatch for {_cmd_key}"
+            )
     except Exception:
         pass
 
     worker = session.get("slash_worker")
     if not worker:
         try:
-            worker = _SlashWorker(session["session_key"], getattr(session.get("agent"), "model", _resolve_model()))
+            worker = _SlashWorker(
+                session["session_key"],
+                getattr(session.get("agent"), "model", _resolve_model()),
+            )
             session["slash_worker"] = worker
         except Exception as e:
             return _err(rid, 5030, f"slash worker start failed: {e}")
@@ -2632,6 +3454,7 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: voice ───────────────────────────────────────────────────
 
+
 @method("voice.toggle")
 def _(rid, params: dict) -> dict:
     action = params.get("action", "status")
@@ -2639,7 +3462,14 @@ def _(rid, params: dict) -> dict:
         env = os.environ.get("HERMES_VOICE", "").strip()
         if env in {"0", "1"}:
             return _ok(rid, {"enabled": env == "1"})
-        return _ok(rid, {"enabled": bool(_load_cfg().get("display", {}).get("voice_enabled", False))})
+        return _ok(
+            rid,
+            {
+                "enabled": bool(
+                    _load_cfg().get("display", {}).get("voice_enabled", False)
+                )
+            },
+        )
     if action in ("on", "off"):
         enabled = action == "on"
         os.environ["HERMES_VOICE"] = "1" if enabled else "0"
@@ -2654,14 +3484,18 @@ def _(rid, params: dict) -> dict:
     try:
         if action == "start":
             from hermes_cli.voice import start_recording
+
             start_recording()
             return _ok(rid, {"status": "recording"})
         if action == "stop":
             from hermes_cli.voice import stop_and_transcribe
+
             return _ok(rid, {"text": stop_and_transcribe() or ""})
         return _err(rid, 4019, f"unknown voice action: {action}")
     except ImportError:
-        return _err(rid, 5025, "voice module not available — install audio dependencies")
+        return _err(
+            rid, 5025, "voice module not available — install audio dependencies"
+        )
     except Exception as e:
         return _err(rid, 5025, str(e))
 
@@ -2673,6 +3507,7 @@ def _(rid, params: dict) -> dict:
         return _err(rid, 4020, "text required")
     try:
         from hermes_cli.voice import speak_text
+
         threading.Thread(target=speak_text, args=(text,), daemon=True).start()
         return _ok(rid, {"status": "speaking"})
     except ImportError:
@@ -2683,32 +3518,60 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: insights ────────────────────────────────────────────────
 
+
 @method("insights.get")
 def _(rid, params: dict) -> dict:
     days = params.get("days", 30)
+    db = _get_db()
+    if db is None:
+        return _db_unavailable_error(rid, code=5017)
     try:
-        import time
         cutoff = time.time() - days * 86400
-        rows = [s for s in _get_db().list_sessions_rich(limit=500) if (s.get("started_at") or 0) >= cutoff]
-        return _ok(rid, {"days": days, "sessions": len(rows), "messages": sum(s.get("message_count", 0) for s in rows)})
+        rows = [
+            s
+            for s in db.list_sessions_rich(limit=500)
+            if (s.get("started_at") or 0) >= cutoff
+        ]
+        return _ok(
+            rid,
+            {
+                "days": days,
+                "sessions": len(rows),
+                "messages": sum(s.get("message_count", 0) for s in rows),
+            },
+        )
     except Exception as e:
         return _err(rid, 5017, str(e))
 
 
 # ── Methods: rollback ────────────────────────────────────────────────
 
+
 @method("rollback.list")
 def _(rid, params: dict) -> dict:
     session, err = _sess(params, rid)
     if err:
         return err
     try:
+
         def go(mgr, cwd):
             if not mgr.enabled:
                 return _ok(rid, {"enabled": False, "checkpoints": []})
-            return _ok(rid, {"enabled": True, "checkpoints": [
-                {"hash": c.get("hash", ""), "timestamp": c.get("timestamp", ""), "message": c.get("message", "")}
-                for c in mgr.list_checkpoints(cwd)]})
+            return _ok(
+                rid,
+                {
+                    "enabled": True,
+                    "checkpoints": [
+                        {
+                            "hash": c.get("hash", ""),
+                            "timestamp": c.get("timestamp", ""),
+                            "message": c.get("message", ""),
+                        }
+                        for c in mgr.list_checkpoints(cwd)
+                    ],
+                },
+            )
+
         return _with_checkpoints(session, go)
     except Exception as e:
         return _err(rid, 5020, str(e))
@@ -2729,8 +3592,13 @@ def _(rid, params: dict) -> dict:
     # rollback (version-matches path).  A file-scoped rollback only
     # touches disk, so we allow it.
     if not file_path and session.get("running"):
-        return _err(rid, 4009, "session busy — /interrupt the current turn before full rollback.restore")
+        return _err(
+            rid,
+            4009,
+            "session busy — /interrupt the current turn before full rollback.restore",
+        )
     try:
+
         def go(mgr, cwd):
             resolved = _resolve_checkpoint_hash(mgr, cwd, target)
             result = mgr.restore(cwd, resolved, file_path=file_path or None)
@@ -2745,7 +3613,9 @@ def _(rid, params: dict) -> dict:
                         history.pop()
                         removed += 1
                     if removed:
-                        session["history_version"] = int(session.get("history_version", 0)) + 1
+                        session["history_version"] = (
+                            int(session.get("history_version", 0)) + 1
+                        )
                 result["history_removed"] = removed
             return result
 
@@ -2763,7 +3633,10 @@ def _(rid, params: dict) -> dict:
     if not target:
         return _err(rid, 4014, "hash required")
     try:
-        r = _with_checkpoints(session, lambda mgr, cwd: mgr.diff(cwd, _resolve_checkpoint_hash(mgr, cwd, target)))
+        r = _with_checkpoints(
+            session,
+            lambda mgr, cwd: mgr.diff(cwd, _resolve_checkpoint_hash(mgr, cwd, target)),
+        )
         raw = r.get("diff", "")[:4000]
         payload = {"stat": r.get("stat", ""), "diff": raw}
         rendered = render_diff(raw, session.get("cols", 80))
@@ -2776,6 +3649,7 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: browser / plugins / cron / skills ───────────────────────
 
+
 @method("browser.manage")
 def _(rid, params: dict) -> dict:
     action = params.get("action", "status")
@@ -2792,10 +3666,11 @@ def _(rid, params: dict) -> dict:
             parsed = urlparse(url if "://" in url else f"http://{url}")
             if parsed.scheme not in {"http", "https", "ws", "wss"}:
                 return _err(rid, 4015, f"unsupported browser url: {url}")
-            probe_root = (
-                f"{'https' if parsed.scheme == 'wss' else 'http' if parsed.scheme == 'ws' else parsed.scheme}://{parsed.netloc}"
-            )
-            probe_urls = [f"{probe_root.rstrip('/')}/json/version", f"{probe_root.rstrip('/')}/json"]
+            probe_root = f"{'https' if parsed.scheme == 'wss' else 'http' if parsed.scheme == 'ws' else parsed.scheme}://{parsed.netloc}"
+            probe_urls = [
+                f"{probe_root.rstrip('/')}/json/version",
+                f"{probe_root.rstrip('/')}/json",
+            ]
             ok = False
             for probe in probe_urls:
                 try:
@@ -2817,6 +3692,7 @@ def _(rid, params: dict) -> dict:
         os.environ.pop("BROWSER_CDP_URL", None)
         try:
             from tools.browser_tool import cleanup_all_browsers
+
             cleanup_all_browsers()
         except Exception:
             pass
@@ -2828,9 +3704,20 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from hermes_cli.plugins import get_plugin_manager
-        return _ok(rid, {"plugins": [
-            {"name": n, "version": getattr(i, "version", "?"), "enabled": getattr(i, "enabled", True)}
-            for n, i in get_plugin_manager()._plugins.items()]})
+
+        return _ok(
+            rid,
+            {
+                "plugins": [
+                    {
+                        "name": n,
+                        "version": getattr(i, "version", "?"),
+                        "enabled": getattr(i, "enabled", True),
+                    }
+                    for n, i in get_plugin_manager()._plugins.items()
+                ]
+            },
+        )
     except Exception as e:
         return _err(rid, 5032, str(e))
 
@@ -2844,27 +3731,31 @@ def _(rid, params: dict) -> dict:
         masked = f"****{api_key[-4:]}" if len(api_key) > 4 else "(not set)"
         base_url = os.environ.get("HERMES_BASE_URL", "") or cfg.get("base_url", "")
 
-        sections = [{
-            "title": "Model",
-            "rows": [
-                ["Model", model],
-                ["Base URL", base_url or "(default)"],
-                ["API Key", masked],
-            ]
-        }, {
-            "title": "Agent",
-            "rows": [
-                ["Max Turns", str(cfg.get("max_turns", 25))],
-                ["Toolsets", ", ".join(cfg.get("enabled_toolsets", [])) or "all"],
-                ["Verbose", str(cfg.get("verbose", False))],
-            ]
-        }, {
-            "title": "Environment",
-            "rows": [
-                ["Working Dir", os.getcwd()],
-                ["Config File", str(_hermes_home / "config.yaml")],
-            ]
-        }]
+        sections = [
+            {
+                "title": "Model",
+                "rows": [
+                    ["Model", model],
+                    ["Base URL", base_url or "(default)"],
+                    ["API Key", masked],
+                ],
+            },
+            {
+                "title": "Agent",
+                "rows": [
+                    ["Max Turns", str(cfg.get("max_turns", 25))],
+                    ["Toolsets", ", ".join(cfg.get("enabled_toolsets", [])) or "all"],
+                    ["Verbose", str(cfg.get("verbose", False))],
+                ],
+            },
+            {
+                "title": "Environment",
+                "rows": [
+                    ["Working Dir", os.getcwd()],
+                    ["Config File", str(_hermes_home / "config.yaml")],
+                ],
+            },
+        ]
         return _ok(rid, {"sections": sections})
     except Exception as e:
         return _err(rid, 5030, str(e))
@@ -2874,21 +3765,28 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from toolsets import get_all_toolsets, get_toolset_info
+
         session = _sessions.get(params.get("session_id", ""))
-        enabled = set(getattr(session["agent"], "enabled_toolsets", []) or []) if session else set(_load_enabled_toolsets() or [])
+        enabled = (
+            set(getattr(session["agent"], "enabled_toolsets", []) or [])
+            if session
+            else set(_load_enabled_toolsets() or [])
+        )
 
         items = []
         for name in sorted(get_all_toolsets().keys()):
             info = get_toolset_info(name)
             if not info:
                 continue
-            items.append({
-                "name": name,
-                "description": info["description"],
-                "tool_count": info["tool_count"],
-                "enabled": name in enabled if enabled else True,
-                "tools": info["resolved_tools"],
-            })
+            items.append(
+                {
+                    "name": name,
+                    "description": info["description"],
+                    "tool_count": info["tool_count"],
+                    "enabled": name in enabled if enabled else True,
+                    "tools": info["resolved_tools"],
+                }
+            )
         return _ok(rid, {"toolsets": items})
     except Exception as e:
         return _err(rid, 5031, str(e))
@@ -2900,7 +3798,11 @@ def _(rid, params: dict) -> dict:
         from model_tools import get_toolset_for_tool, get_tool_definitions
 
         session = _sessions.get(params.get("session_id", ""))
-        enabled = getattr(session["agent"], "enabled_toolsets", None) if session else _load_enabled_toolsets()
+        enabled = (
+            getattr(session["agent"], "enabled_toolsets", None)
+            if session
+            else _load_enabled_toolsets()
+        )
         tools = get_tool_definitions(enabled_toolsets=enabled, quiet_mode=True)
         sections = {}
 
@@ -2908,16 +3810,24 @@ def _(rid, params: dict) -> dict:
             name = tool["function"]["name"]
             desc = str(tool["function"].get("description", "") or "").split("\n")[0]
             if ". " in desc:
-                desc = desc[:desc.index(". ") + 1]
-            sections.setdefault(get_toolset_for_tool(name) or "unknown", []).append({
-                "name": name,
-                "description": desc,
-            })
+                desc = desc[: desc.index(". ") + 1]
+            sections.setdefault(get_toolset_for_tool(name) or "unknown", []).append(
+                {
+                    "name": name,
+                    "description": desc,
+                }
+            )
 
-        return _ok(rid, {
-            "sections": [{"name": name, "tools": rows} for name, rows in sorted(sections.items())],
-            "total": len(tools),
-        })
+        return _ok(
+            rid,
+            {
+                "sections": [
+                    {"name": name, "tools": rows}
+                    for name, rows in sorted(sections.items())
+                ],
+                "total": len(tools),
+            },
+        )
     except Exception as e:
         return _err(rid, 5034, str(e))
 
@@ -2925,7 +3835,9 @@ def _(rid, params: dict) -> dict:
 @method("tools.configure")
 def _(rid, params: dict) -> dict:
     action = str(params.get("action", "") or "").strip().lower()
-    targets = [str(name).strip() for name in params.get("names", []) or [] if str(name).strip()]
+    targets = [
+        str(name).strip() for name in params.get("names", []) or [] if str(name).strip()
+    ]
     if action not in {"disable", "enable"}:
         return _err(rid, 4017, f"unknown tools action: {action}")
     if not targets:
@@ -2942,7 +3854,9 @@ def _(rid, params: dict) -> dict:
         )
 
         cfg = load_config()
-        valid_toolsets = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS} | _get_plugin_toolset_keys()
+        valid_toolsets = {
+            ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS
+        } | _get_plugin_toolset_keys()
         toolset_targets = [name for name in targets if ":" not in name]
         mcp_targets = [name for name in targets if ":" in name]
         unknown = [name for name in toolset_targets if name not in valid_toolsets]
@@ -2951,25 +3865,38 @@ def _(rid, params: dict) -> dict:
         if toolset_targets:
             _apply_toolset_change(cfg, "cli", toolset_targets, action)
 
-        missing_servers = _apply_mcp_change(cfg, mcp_targets, action) if mcp_targets else set()
+        missing_servers = (
+            _apply_mcp_change(cfg, mcp_targets, action) if mcp_targets else set()
+        )
         save_config(cfg)
 
         session = _sessions.get(params.get("session_id", ""))
-        info = _reset_session_agent(params.get("session_id", ""), session) if session else None
-        enabled = sorted(_get_platform_tools(load_config(), "cli", include_default_mcp_servers=False))
+        info = (
+            _reset_session_agent(params.get("session_id", ""), session)
+            if session
+            else None
+        )
+        enabled = sorted(
+            _get_platform_tools(load_config(), "cli", include_default_mcp_servers=False)
+        )
         changed = [
-            name for name in targets
-            if name not in unknown and (":" not in name or name.split(":", 1)[0] not in missing_servers)
+            name
+            for name in targets
+            if name not in unknown
+            and (":" not in name or name.split(":", 1)[0] not in missing_servers)
         ]
 
-        return _ok(rid, {
-            "changed": changed,
-            "enabled_toolsets": enabled,
-            "info": info,
-            "missing_servers": sorted(missing_servers),
-            "reset": bool(session),
-            "unknown": unknown,
-        })
+        return _ok(
+            rid,
+            {
+                "changed": changed,
+                "enabled_toolsets": enabled,
+                "info": info,
+                "missing_servers": sorted(missing_servers),
+                "reset": bool(session),
+                "unknown": unknown,
+            },
+        )
     except Exception as e:
         return _err(rid, 5035, str(e))
 
@@ -2978,20 +3905,27 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from toolsets import get_all_toolsets, get_toolset_info
+
         session = _sessions.get(params.get("session_id", ""))
-        enabled = set(getattr(session["agent"], "enabled_toolsets", []) or []) if session else set(_load_enabled_toolsets() or [])
+        enabled = (
+            set(getattr(session["agent"], "enabled_toolsets", []) or [])
+            if session
+            else set(_load_enabled_toolsets() or [])
+        )
 
         items = []
         for name in sorted(get_all_toolsets().keys()):
             info = get_toolset_info(name)
             if not info:
                 continue
-            items.append({
-                "name": name,
-                "description": info["description"],
-                "tool_count": info["tool_count"],
-                "enabled": name in enabled if enabled else True,
-            })
+            items.append(
+                {
+                    "name": name,
+                    "description": info["description"],
+                    "tool_count": info["tool_count"],
+                    "enabled": name in enabled if enabled else True,
+                }
+            )
         return _ok(rid, {"toolsets": items})
     except Exception as e:
         return _err(rid, 5032, str(e))
@@ -3001,15 +3935,22 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from tools.process_registry import process_registry
+
         procs = process_registry.list_sessions()
-        return _ok(rid, {
-            "processes": [{
-                "session_id": p["session_id"],
-                "command": p["command"][:80],
-                "status": p["status"],
-                "uptime": p["uptime_seconds"],
-            } for p in procs]
-        })
+        return _ok(
+            rid,
+            {
+                "processes": [
+                    {
+                        "session_id": p["session_id"],
+                        "command": p["command"][:80],
+                        "status": p["status"],
+                        "uptime": p["uptime_seconds"],
+                    }
+                    for p in procs
+                ]
+            },
+        )
     except Exception as e:
         return _err(rid, 5033, str(e))
 
@@ -3019,11 +3960,21 @@ def _(rid, params: dict) -> dict:
     action, jid = params.get("action", "list"), params.get("name", "")
     try:
         from tools.cronjob_tools import cronjob
+
         if action == "list":
             return _ok(rid, json.loads(cronjob(action="list")))
         if action == "add":
-            return _ok(rid, json.loads(cronjob(action="create", name=jid,
-                                               schedule=params.get("schedule", ""), prompt=params.get("prompt", ""))))
+            return _ok(
+                rid,
+                json.loads(
+                    cronjob(
+                        action="create",
+                        name=jid,
+                        schedule=params.get("schedule", ""),
+                        prompt=params.get("prompt", ""),
+                    )
+                ),
+            )
         if action in ("remove", "pause", "resume"):
             return _ok(rid, json.loads(cronjob(action=action, job_id=jid)))
         return _err(rid, 4016, f"unknown cron action: {action}")
@@ -3037,23 +3988,53 @@ def _(rid, params: dict) -> dict:
     try:
         if action == "list":
             from hermes_cli.banner import get_available_skills
+
             return _ok(rid, {"skills": get_available_skills()})
         if action == "search":
-            from hermes_cli.skills_hub import unified_search, GitHubAuth, create_source_router
-            raw = unified_search(query, create_source_router(GitHubAuth()), source_filter="all", limit=20) or []
-            return _ok(rid, {"results": [{"name": r.name, "description": r.description} for r in raw]})
+            from hermes_cli.skills_hub import (
+                unified_search,
+                GitHubAuth,
+                create_source_router,
+            )
+
+            raw = (
+                unified_search(
+                    query,
+                    create_source_router(GitHubAuth()),
+                    source_filter="all",
+                    limit=20,
+                )
+                or []
+            )
+            return _ok(
+                rid,
+                {
+                    "results": [
+                        {"name": r.name, "description": r.description} for r in raw
+                    ]
+                },
+            )
         if action == "install":
             from hermes_cli.skills_hub import do_install
+
             class _Q:
-                def print(self, *a, **k): pass
+                def print(self, *a, **k):
+                    pass
+
             do_install(query, skip_confirm=True, console=_Q())
             return _ok(rid, {"installed": True, "name": query})
         if action == "browse":
             from hermes_cli.skills_hub import browse_skills
-            pg = int(params.get("page", 0) or 0) or (int(query) if query.isdigit() else 1)
-            return _ok(rid, browse_skills(page=pg, page_size=int(params.get("page_size", 20))))
+
+            pg = int(params.get("page", 0) or 0) or (
+                int(query) if query.isdigit() else 1
+            )
+            return _ok(
+                rid, browse_skills(page=pg, page_size=int(params.get("page_size", 20)))
+            )
         if action == "inspect":
             from hermes_cli.skills_hub import inspect_skill
+
             return _ok(rid, {"info": inspect_skill(query) or {}})
         return _err(rid, 4017, f"unknown skills action: {action}")
     except Exception as e:
@@ -3062,6 +4043,7 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: shell ───────────────────────────────────────────────────
 
+
 @method("shell.exec")
 def _(rid, params: dict) -> dict:
     cmd = params.get("command", "")
@@ -3069,14 +4051,26 @@ def _(rid, params: dict) -> dict:
         return _err(rid, 4004, "empty command")
     try:
         from tools.approval import detect_dangerous_command
+
         is_dangerous, _, desc = detect_dangerous_command(cmd)
         if is_dangerous:
-            return _err(rid, 4005, f"blocked: {desc}. Use the agent for dangerous commands.")
+            return _err(
+                rid, 4005, f"blocked: {desc}. Use the agent for dangerous commands."
+            )
     except ImportError:
         pass
     try:
-        r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30, cwd=os.getcwd())
-        return _ok(rid, {"stdout": r.stdout[-4000:], "stderr": r.stderr[-2000:], "code": r.returncode})
+        r = subprocess.run(
+            cmd, shell=True, capture_output=True, text=True, timeout=30, cwd=os.getcwd()
+        )
+        return _ok(
+            rid,
+            {
+                "stdout": r.stdout[-4000:],
+                "stderr": r.stderr[-2000:],
+                "code": r.returncode,
+            },
+        )
     except subprocess.TimeoutExpired:
         return _err(rid, 5002, "command timed out (30s)")
     except Exception as e:
diff --git a/ui-tui/README.md b/ui-tui/README.md
index 38d206baf..4d7090d5a 100644
--- a/ui-tui/README.md
+++ b/ui-tui/README.md
@@ -112,7 +112,7 @@ Current input behavior is split across `app.tsx`, `components/textInput.tsx`, an
 | `Ctrl+D`                        | Exit                                                                                                                                                    |
 | `Ctrl+G`                        | Open `$EDITOR` with the current draft                                                                                                                   |
 | `Ctrl+L`                        | New session (same as `/clear`)                                                                                                                          |
-| `Ctrl+V` / `Alt+V`              | Paste clipboard image (same as `/paste`)                                                                                                                |
+| `Ctrl+V` / `Alt+V`              | Paste text first, then fall back to image/path attachment when applicable                                                                               |
 | `Tab`                           | Apply the active completion                                                                                                                             |
 | `Up/Down`                       | Cycle completions if the completion list is open; otherwise edit queued messages first, then walk input history                                         |
 | `Left/Right`                    | Move the cursor                                                                                                                                         |
@@ -217,8 +217,8 @@ The local slash handler covers the built-ins that need direct client behavior:
 Notes:
 
 - `/copy` sends the selected assistant response through OSC 52.
-- `/paste` with no args asks the gateway for clipboard image attachment state.
-- `/paste` does not manage text paste entries; text paste is inline-only.
+- `/paste` with no args asks the gateway to attach a clipboard image.
+- Text paste remains inline-only; `Cmd+V` / `Ctrl+V` handle layered text/OSC52/image fallback before `/paste` is needed.
 - `/details [hidden|collapsed|expanded|cycle]` controls thinking/tool-detail visibility.
 - `/statusbar` toggles the status rule on/off.
 
diff --git a/ui-tui/packages/hermes-ink/src/ink/components/AlternateScreen.tsx b/ui-tui/packages/hermes-ink/src/ink/components/AlternateScreen.tsx
index bb1860817..f135d70c6 100644
--- a/ui-tui/packages/hermes-ink/src/ink/components/AlternateScreen.tsx
+++ b/ui-tui/packages/hermes-ink/src/ink/components/AlternateScreen.tsx
@@ -2,6 +2,7 @@ import React, { type PropsWithChildren, useContext, useInsertionEffect } from 'r
 import { c as _c } from 'react/compiler-runtime'
 
 import instances from '../instances.js'
+import { CURSOR_HOME, ERASE_SCREEN, ERASE_SCROLLBACK } from '../termio/csi.js'
 import { DISABLE_MOUSE_TRACKING, ENABLE_MOUSE_TRACKING, ENTER_ALT_SCREEN, EXIT_ALT_SCREEN } from '../termio/dec.js'
 import { TerminalWriteContext } from '../useTerminalNotification.js'
 
@@ -51,7 +52,9 @@ export function AlternateScreen(t0: Props) {
         return
       }
 
-      writeRaw(ENTER_ALT_SCREEN + '\x1B[2J\x1B[H' + (mouseTracking ? ENABLE_MOUSE_TRACKING : ''))
+      writeRaw(
+        ENTER_ALT_SCREEN + ERASE_SCROLLBACK + ERASE_SCREEN + CURSOR_HOME + (mouseTracking ? ENABLE_MOUSE_TRACKING : '')
+      )
       ink?.setAltScreenActive(true, mouseTracking)
 
       return () => {
diff --git a/ui-tui/packages/hermes-ink/src/ink/components/Text.tsx b/ui-tui/packages/hermes-ink/src/ink/components/Text.tsx
index ea2a74c9a..9459b78a2 100644
--- a/ui-tui/packages/hermes-ink/src/ink/components/Text.tsx
+++ b/ui-tui/packages/hermes-ink/src/ink/components/Text.tsx
@@ -69,6 +69,12 @@ const memoizedStylesForWrap: Record<NonNullable<Styles['textWrap']>, Styles> = {
     flexDirection: 'row',
     textWrap: 'wrap'
   },
+  'wrap-char': {
+    flexGrow: 0,
+    flexShrink: 1,
+    flexDirection: 'row',
+    textWrap: 'wrap-char'
+  },
   'wrap-trim': {
     flexGrow: 0,
     flexShrink: 1,
diff --git a/ui-tui/packages/hermes-ink/src/ink/events/cmd-shortcuts.test.ts b/ui-tui/packages/hermes-ink/src/ink/events/cmd-shortcuts.test.ts
new file mode 100644
index 000000000..1abd7bbe0
--- /dev/null
+++ b/ui-tui/packages/hermes-ink/src/ink/events/cmd-shortcuts.test.ts
@@ -0,0 +1,38 @@
+import { describe, expect, it } from 'vitest'
+
+import { parseMultipleKeypresses } from '../parse-keypress.js'
+
+import { InputEvent } from './input-event.js'
+
+function parseOne(sequence: string) {
+  const [keys] = parseMultipleKeypresses({ incomplete: '', mode: 'NORMAL' }, sequence)
+  expect(keys).toHaveLength(1)
+
+  return keys[0]!
+}
+
+describe('InputEvent macOS command modifiers', () => {
+  it('preserves Cmd as super for kitty keyboard CSI-u sequences', () => {
+    const parsed = parseOne('\u001b[99;9u')
+    const event = new InputEvent(parsed)
+
+    expect(parsed.name).toBe('c')
+    expect(event.key.meta).toBe(false)
+    expect(event.key.super).toBe(true)
+  })
+
+  it('preserves Cmd on word-delete and word-navigation sequences', () => {
+    const backspace = new InputEvent(parseOne('\u001b[127;9u'))
+    const left = new InputEvent(parseOne('\u001b[1;9D'))
+    const right = new InputEvent(parseOne('\u001b[1;9C'))
+
+    expect(backspace.key.backspace).toBe(true)
+    expect(backspace.key.super).toBe(true)
+
+    expect(left.key.leftArrow).toBe(true)
+    expect(left.key.super).toBe(true)
+
+    expect(right.key.rightArrow).toBe(true)
+    expect(right.key.super).toBe(true)
+  })
+})
diff --git a/ui-tui/packages/hermes-ink/src/ink/render-node-to-output.ts b/ui-tui/packages/hermes-ink/src/ink/render-node-to-output.ts
index 5c9e62b46..dd7372a09 100644
--- a/ui-tui/packages/hermes-ink/src/ink/render-node-to-output.ts
+++ b/ui-tui/packages/hermes-ink/src/ink/render-node-to-output.ts
@@ -343,7 +343,7 @@ function wrapWithSoftWrap(
   maxWidth: number,
   textWrap: Parameters<typeof wrapText>[2]
 ): { wrapped: string; softWrap: boolean[] | undefined } {
-  if (textWrap !== 'wrap' && textWrap !== 'wrap-trim') {
+  if (textWrap !== 'wrap' && textWrap !== 'wrap-char' && textWrap !== 'wrap-trim') {
     return {
       wrapped: wrapText(plainText, maxWidth, textWrap),
       softWrap: undefined
diff --git a/ui-tui/packages/hermes-ink/src/ink/styles.ts b/ui-tui/packages/hermes-ink/src/ink/styles.ts
index e5321f6e5..0fa6cc66e 100644
--- a/ui-tui/packages/hermes-ink/src/ink/styles.ts
+++ b/ui-tui/packages/hermes-ink/src/ink/styles.ts
@@ -55,6 +55,7 @@ export type TextStyles = {
 export type Styles = {
   readonly textWrap?:
     | 'wrap'
+    | 'wrap-char'
     | 'wrap-trim'
     | 'end'
     | 'middle'
diff --git a/ui-tui/packages/hermes-ink/src/ink/wrap-text.ts b/ui-tui/packages/hermes-ink/src/ink/wrap-text.ts
index 4d157bc2a..e8290feac 100644
--- a/ui-tui/packages/hermes-ink/src/ink/wrap-text.ts
+++ b/ui-tui/packages/hermes-ink/src/ink/wrap-text.ts
@@ -50,6 +50,10 @@ export default function wrapText(text: string, maxWidth: number, wrapType: Style
     })
   }
 
+  if (wrapType === 'wrap-char') {
+    return wrapAnsi(text, maxWidth, { trim: false, hard: true, wordWrap: false })
+  }
+
   if (wrapType === 'wrap-trim') {
     return wrapAnsi(text, maxWidth, {
       trim: true,
diff --git a/ui-tui/src/__tests__/clipboard.test.ts b/ui-tui/src/__tests__/clipboard.test.ts
index e9bf4f5a7..ba14e9beb 100644
--- a/ui-tui/src/__tests__/clipboard.test.ts
+++ b/ui-tui/src/__tests__/clipboard.test.ts
@@ -1,26 +1,101 @@
 import { describe, expect, it, vi } from 'vitest'
 
-import { readClipboardText, writeClipboardText } from '../lib/clipboard.js'
+import { isUsableClipboardText, readClipboardText, writeClipboardText } from '../lib/clipboard.js'
 
 describe('readClipboardText', () => {
-  it('does nothing off macOS', async () => {
-    const run = vi.fn()
-
-    await expect(readClipboardText('linux', run)).resolves.toBeNull()
-    expect(run).not.toHaveBeenCalled()
-  })
-
   it('reads text from pbpaste on macOS', async () => {
     const run = vi.fn().mockResolvedValue({ stdout: 'hello world\n' })
 
     await expect(readClipboardText('darwin', run)).resolves.toBe('hello world\n')
-    expect(run).toHaveBeenCalledWith('pbpaste', [], expect.objectContaining({ encoding: 'utf8', windowsHide: true }))
+    expect(run).toHaveBeenCalledWith(
+      'pbpaste',
+      [],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
   })
 
-  it('returns null when pbpaste fails', async () => {
-    const run = vi.fn().mockRejectedValue(new Error('pbpaste failed'))
+  it('reads text from PowerShell on Windows', async () => {
+    const run = vi.fn().mockResolvedValue({ stdout: 'from windows\r\n' })
 
-    await expect(readClipboardText('darwin', run)).resolves.toBeNull()
+    await expect(readClipboardText('win32', run)).resolves.toBe('from windows\r\n')
+    expect(run).toHaveBeenCalledWith(
+      'powershell',
+      ['-NoProfile', '-NonInteractive', '-Command', 'Get-Clipboard -Raw'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('tries powershell.exe first on WSL', async () => {
+    const run = vi.fn().mockResolvedValue({ stdout: 'from wsl\n' })
+
+    await expect(readClipboardText('linux', run, { WSL_INTEROP: '/tmp/socket' } as NodeJS.ProcessEnv)).resolves.toBe(
+      'from wsl\n'
+    )
+    expect(run).toHaveBeenCalledWith(
+      'powershell.exe',
+      ['-NoProfile', '-NonInteractive', '-Command', 'Get-Clipboard -Raw'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('uses wl-paste on Wayland Linux', async () => {
+    const run = vi.fn().mockResolvedValue({ stdout: 'from wayland\n' })
+
+    await expect(readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)).resolves.toBe(
+      'from wayland\n'
+    )
+    expect(run).toHaveBeenCalledWith(
+      'wl-paste',
+      ['--type', 'text'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('falls back to xclip on Linux when wl-paste fails', async () => {
+    const run = vi
+      .fn()
+      .mockRejectedValueOnce(new Error('wl-paste missing'))
+      .mockResolvedValueOnce({ stdout: 'from xclip\n' })
+
+    await expect(readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)).resolves.toBe(
+      'from xclip\n'
+    )
+    expect(run).toHaveBeenNthCalledWith(
+      1,
+      'wl-paste',
+      ['--type', 'text'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+    expect(run).toHaveBeenNthCalledWith(
+      2,
+      'xclip',
+      ['-selection', 'clipboard', '-out'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('returns null when every clipboard backend fails', async () => {
+    const run = vi.fn().mockRejectedValue(new Error('clipboard failed'))
+
+    await expect(
+      readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)
+    ).resolves.toBeNull()
+  })
+})
+
+describe('isUsableClipboardText', () => {
+  it('accepts normal text', () => {
+    expect(isUsableClipboardText('hello world\n')).toBe(true)
+  })
+
+  it('rejects empty or whitespace-only content', () => {
+    expect(isUsableClipboardText('')).toBe(false)
+    expect(isUsableClipboardText('  \n\t')).toBe(false)
+  })
+
+  it('rejects binary-looking clipboard payloads', () => {
+    expect(isUsableClipboardText('PNG\u0000\u0001\u0002\u0003IHDR')).toBe(false)
+    expect(isUsableClipboardText('TIFF\ufffd\ufffd\ufffdmetadata')).toBe(false)
   })
 })
 
@@ -34,6 +109,7 @@ describe('writeClipboardText', () => {
 
   it('writes text to pbcopy on macOS', async () => {
     const stdin = { end: vi.fn() }
+
     const child = {
       once: vi.fn((event: string, cb: (code?: number) => void) => {
         if (event === 'close') {
@@ -44,10 +120,15 @@ describe('writeClipboardText', () => {
       }),
       stdin
     }
+
     const start = vi.fn().mockReturnValue(child)
 
     await expect(writeClipboardText('hello world', 'darwin', start as any)).resolves.toBe(true)
-    expect(start).toHaveBeenCalledWith('pbcopy', [], expect.objectContaining({ stdio: ['pipe', 'ignore', 'ignore'], windowsHide: true }))
+    expect(start).toHaveBeenCalledWith(
+      'pbcopy',
+      [],
+      expect.objectContaining({ stdio: ['pipe', 'ignore', 'ignore'], windowsHide: true })
+    )
     expect(stdin.end).toHaveBeenCalledWith('hello world')
   })
 
@@ -62,6 +143,7 @@ describe('writeClipboardText', () => {
       }),
       stdin: { end: vi.fn() }
     }
+
     const start = vi.fn().mockReturnValue(child)
 
     await expect(writeClipboardText('hello world', 'darwin', start as any)).resolves.toBe(false)
diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
index f1f0c306b..23f7c4646 100644
--- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
+++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
@@ -1,9 +1,9 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest'
 
 import { createGatewayEventHandler } from '../app/createGatewayEventHandler.js'
-import { resetOverlayState } from '../app/overlayStore.js'
+import { getOverlayState, resetOverlayState } from '../app/overlayStore.js'
 import { turnController } from '../app/turnController.js'
-import { resetTurnState } from '../app/turnStore.js'
+import { getTurnState, resetTurnState } from '../app/turnStore.js'
 import { patchUiState, resetUiState } from '../app/uiStore.js'
 import { estimateTokensRough } from '../lib/text.js'
 import type { Msg } from '../types.js'
@@ -143,6 +143,117 @@ describe('createGatewayEventHandler', () => {
     expect(appended[0]?.thinkingTokens).toBe(estimateTokensRough(fromServer))
   })
 
+  it('attaches inline_diff to the assistant completion body', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const diff = '\u001b[31m--- a/foo.ts\u001b[0m\n\u001b[32m+++ b/foo.ts\u001b[0m\n@@\n-old\n+new'
+    const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+
+    onEvent({
+      payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
+      type: 'tool.start'
+    } as any)
+    onEvent({
+      payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+
+    // Diff is buffered for message.complete and sanitized (ANSI stripped).
+    expect(appended).toHaveLength(0)
+    expect(turnController.pendingInlineDiffs).toEqual([cleaned])
+
+    onEvent({
+      payload: { text: 'patch applied' },
+      type: 'message.complete'
+    } as any)
+
+    // Diff is rendered in the same assistant message body as the completion.
+    expect(appended).toHaveLength(1)
+    expect(appended[0]).toMatchObject({ role: 'assistant' })
+    expect(appended[0]?.text).toContain('patch applied')
+    expect(appended[0]?.text).toContain('```diff')
+    expect(appended[0]?.text).toContain(cleaned)
+  })
+
+  it('does not append inline_diff twice when assistant text already contains it', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+    const assistantText = `Done. Here's the inline diff:\n\n\`\`\`diff\n${cleaned}\n\`\`\``
+
+    onEvent({
+      payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+    onEvent({
+      payload: { text: assistantText },
+      type: 'message.complete'
+    } as any)
+
+    expect(appended).toHaveLength(1)
+    expect(appended[0]?.text).toBe(assistantText)
+    expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1)
+  })
+
+  it('strips the CLI "┊ review diff" header from queued inline diffs', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const raw = '  \u001b[33m┊ review diff\u001b[0m\n--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+
+    onEvent({
+      payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+    onEvent({
+      payload: { text: 'done' },
+      type: 'message.complete'
+    } as any)
+
+    expect(appended).toHaveLength(1)
+    expect(appended[0]?.text).not.toContain('┊ review diff')
+    expect(appended[0]?.text).toContain('--- a/foo.ts')
+  })
+
+  it('suppresses inline_diff when assistant already wrote a diff fence', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+    const assistantText = 'Done. Clean swap:\n\n```diff\n-old\n+new\n```'
+
+    onEvent({
+      payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+    onEvent({
+      payload: { text: assistantText },
+      type: 'message.complete'
+    } as any)
+
+    expect(appended).toHaveLength(1)
+    expect(appended[0]?.text).toBe(assistantText)
+    expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1)
+  })
+
+  it('keeps tool trail terse when inline_diff is present', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const diff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+
+    onEvent({
+      payload: { inline_diff: diff, name: 'review_diff', summary: diff, tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+    onEvent({
+      payload: { text: 'done' },
+      type: 'message.complete'
+    } as any)
+
+    expect(appended).toHaveLength(1)
+    expect(appended[0]?.tools?.[0]).toContain('Review Diff')
+    expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts')
+    expect(appended[0]?.text).toContain('```diff')
+  })
+
   it('shows setup panel for missing provider startup error', () => {
     const appended: Msg[] = []
     const onEvent = createGatewayEventHandler(buildCtx(appended))
@@ -162,4 +273,42 @@ describe('createGatewayEventHandler', () => {
       role: 'system'
     })
   })
+
+  it('keeps gateway noise informational and approval out of Activity', async () => {
+    const appended: Msg[] = []
+    const ctx = buildCtx(appended)
+    ctx.gateway.rpc = vi.fn(async () => {
+      throw new Error('cold start')
+    })
+
+    const onEvent = createGatewayEventHandler(ctx)
+
+    onEvent({ payload: { line: 'Traceback: noisy but non-fatal' }, type: 'gateway.stderr' } as any)
+    onEvent({ payload: { preview: 'bad framing' }, type: 'gateway.protocol_error' } as any)
+    onEvent({
+      payload: { command: 'rm -rf /tmp/nope', description: 'dangerous command' },
+      type: 'approval.request'
+    } as any)
+    onEvent({ payload: {}, type: 'gateway.ready' } as any)
+
+    await Promise.resolve()
+    await Promise.resolve()
+
+    expect(getOverlayState().approval).toMatchObject({ description: 'dangerous command' })
+    expect(getTurnState().activity).toMatchObject([
+      { text: 'Traceback: noisy but non-fatal', tone: 'info' },
+      { text: 'protocol noise detected · /logs to inspect', tone: 'info' },
+      { text: 'protocol noise: bad framing', tone: 'info' },
+      { text: 'command catalog unavailable: cold start', tone: 'info' }
+    ])
+  })
+
+  it('still surfaces terminal turn failures as errors', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+
+    onEvent({ payload: { message: 'boom' }, type: 'error' } as any)
+
+    expect(getTurnState().activity).toMatchObject([{ text: 'boom', tone: 'error' }])
+  })
 })
diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts
index 1f2f938a9..901564f73 100644
--- a/ui-tui/src/__tests__/createSlashHandler.test.ts
+++ b/ui-tui/src/__tests__/createSlashHandler.test.ts
@@ -211,6 +211,42 @@ describe('createSlashHandler', () => {
     expect(ctx.transcript.send).toHaveBeenCalledWith(skillMessage)
   })
 
+  it('/history pages the current TUI transcript (user + assistant)', () => {
+    const ctx = buildCtx({
+      local: {
+        ...buildLocal(),
+        getHistoryItems: vi.fn(() => [
+          { role: 'user', text: 'hello' },
+          { role: 'system', text: 'ignore me' },
+          { role: 'assistant', text: 'hi there' },
+          { role: 'user', text: 'test' }
+        ])
+      }
+    })
+
+    createSlashHandler(ctx)('/history')
+    expect(ctx.transcript.page).toHaveBeenCalledTimes(1)
+
+    const [body, title] = ctx.transcript.page.mock.calls[0]!
+
+    expect(title).toBe('History')
+    expect(body).toContain('[You #1]')
+    expect(body).toContain('hello')
+    expect(body).toContain('[Hermes #2]')
+    expect(body).toContain('hi there')
+    expect(body).toContain('[You #3]')
+    expect(body).not.toContain('ignore me')
+    expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
+  })
+
+  it('/history reports empty state without paging', () => {
+    const ctx = buildCtx()
+
+    createSlashHandler(ctx)('/history')
+    expect(ctx.transcript.page).not.toHaveBeenCalled()
+    expect(ctx.transcript.sys).toHaveBeenCalledWith('no conversation yet')
+  })
+
   it('handles send-type dispatch for /plan command', async () => {
     const planMessage = 'Plan skill content loaded'
 
diff --git a/ui-tui/src/__tests__/emoji.test.ts b/ui-tui/src/__tests__/emoji.test.ts
new file mode 100644
index 000000000..929fd53e0
--- /dev/null
+++ b/ui-tui/src/__tests__/emoji.test.ts
@@ -0,0 +1,64 @@
+import { describe, expect, it } from 'vitest'
+
+import { ensureEmojiPresentation } from '../lib/emoji.js'
+
+const VS16 = '\uFE0F'
+
+describe('ensureEmojiPresentation', () => {
+  it('passes through ASCII unchanged', () => {
+    expect(ensureEmojiPresentation('hello world')).toBe('hello world')
+    expect(ensureEmojiPresentation('')).toBe('')
+  })
+
+  it('passes through emoji that already defaults to emoji presentation', () => {
+    expect(ensureEmojiPresentation('🚀 rocket')).toBe('🚀 rocket')
+    expect(ensureEmojiPresentation('😀')).toBe('😀')
+  })
+
+  it('injects VS16 after text-default emoji codepoints', () => {
+    expect(ensureEmojiPresentation('⚠ careful')).toBe(`⚠${VS16} careful`)
+    expect(ensureEmojiPresentation('ℹ info')).toBe(`ℹ${VS16} info`)
+    expect(ensureEmojiPresentation('love ❤ you')).toBe(`love ❤${VS16} you`)
+    expect(ensureEmojiPresentation('✔ done')).toBe(`✔${VS16} done`)
+  })
+
+  it('is idempotent when VS16 is already present', () => {
+    const already = `⚠${VS16} ℹ${VS16} ❤${VS16}`
+
+    expect(ensureEmojiPresentation(already)).toBe(already)
+    expect(ensureEmojiPresentation(ensureEmojiPresentation('⚠'))).toBe(`⚠${VS16}`)
+  })
+
+  it('leaves keycap sequences alone when the base is not a text-default emoji', () => {
+    expect(ensureEmojiPresentation('1\u20e3')).toBe('1\u20e3')
+  })
+
+  it('injects VS16 before ZWJ so text-default bases participate in emoji sequences', () => {
+    // ❤ + ZWJ + 🔥 → ❤️‍🔥 (heart on fire).  Without VS16 between the heart
+    // and the ZWJ, terminals render the heart in text/monochrome form and
+    // the ZWJ ligature can fail to form.
+    const heartFire = '\u2764\u200d\ud83d\udd25'
+
+    expect(ensureEmojiPresentation(heartFire)).toBe(`\u2764\uFE0F\u200d\ud83d\udd25`)
+  })
+
+  it('leaves explicit text-presentation selector (VS15) alone', () => {
+    // `❤︎` (U+2764 + U+FE0E) asks for text presentation — injecting VS16
+    // would create an invalid double-variation sequence.
+    const explicitText = '\u2764\ufe0e'
+
+    expect(ensureEmojiPresentation(explicitText)).toBe(explicitText)
+  })
+
+  it('returns the original reference when no change is needed', () => {
+    const already = `⚠${VS16} ℹ${VS16} ❤${VS16}`
+
+    // Reference equality — the lazy allocator should short-circuit to the
+    // input when nothing needed injection.
+    expect(ensureEmojiPresentation(already)).toBe(already)
+  })
+
+  it('handles mixed content', () => {
+    expect(ensureEmojiPresentation('⚠ path: /tmp/x ❤ done')).toBe(`⚠${VS16} path: /tmp/x ❤${VS16} done`)
+  })
+})
diff --git a/ui-tui/src/__tests__/markdown.test.ts b/ui-tui/src/__tests__/markdown.test.ts
index 478cb6255..0e95ba6c0 100644
--- a/ui-tui/src/__tests__/markdown.test.ts
+++ b/ui-tui/src/__tests__/markdown.test.ts
@@ -23,6 +23,31 @@ describe('INLINE_RE emphasis', () => {
     expect(matches('a*b*c')).toEqual(['*b*'])
     expect(matches('a**bold**c')).toEqual(['**bold**'])
   })
+
+  it('matches short alphanumeric subscript (H~2~O, CO~2~, X~n~)', () => {
+    expect(matches('H~2~O')).toEqual(['~2~'])
+    expect(matches('CO~2~ levels')).toEqual(['~2~'])
+    expect(matches('the X~n~ term')).toEqual(['~n~'])
+  })
+
+  it('ignores kaomoji-style ~! and ~? punctuation', () => {
+    // Kimi / Qwen / GLM emit these as decorators and the whole span between
+    // two tildes used to get collapsed into one dim blob.
+    expect(matches('Aww ~! Building step by step, I love it ~!')).toEqual([])
+    expect(matches('cool ~? yeah ~?')).toEqual([])
+    expect(matches('mixed ~! and ~? flow')).toEqual([])
+  })
+
+  it('ignores tilde spans that contain spaces or punctuation', () => {
+    // Real subscript doesn't contain spaces; a tilde followed by words-then-
+    // tilde is almost always conversational. Matching it swallows text.
+    expect(matches('hello ~good idea~ there')).toEqual([])
+    expect(matches('x ~oh no!~ y')).toEqual([])
+  })
+
+  it('does not let strikethrough eat subscript', () => {
+    expect(matches('~~strike~~ and H~2~O')).toEqual(['~~strike~~', '~2~'])
+  })
 })
 
 describe('stripInlineMarkup', () => {
@@ -31,6 +56,11 @@ describe('stripInlineMarkup', () => {
     expect(stripInlineMarkup('browser_screenshot_ecc.png')).toBe('browser_screenshot_ecc.png')
     expect(stripInlineMarkup('__bold__ and foo__bar__')).toBe('bold and foo__bar__')
   })
+
+  it('leaves ~!/~? kaomoji alone and still handles real subscript', () => {
+    expect(stripInlineMarkup('Yay ~! nice work ~!')).toBe('Yay ~! nice work ~!')
+    expect(stripInlineMarkup('H~2~O and CO~2~')).toBe('H_2O and CO_2')
+  })
 })
 
 describe('protocol sentinels', () => {
diff --git a/ui-tui/src/__tests__/osc52.test.ts b/ui-tui/src/__tests__/osc52.test.ts
new file mode 100644
index 000000000..a1f5242dd
--- /dev/null
+++ b/ui-tui/src/__tests__/osc52.test.ts
@@ -0,0 +1,67 @@
+import { afterEach, describe, expect, it, vi } from 'vitest'
+
+import {
+  buildOsc52ClipboardQuery,
+  OSC52_CLIPBOARD_QUERY,
+  parseOsc52ClipboardData,
+  readOsc52Clipboard
+} from '../lib/osc52.js'
+
+const envBackup = { ...process.env }
+
+afterEach(() => {
+  process.env = { ...envBackup }
+})
+
+describe('buildOsc52ClipboardQuery', () => {
+  it('returns the raw OSC52 query outside multiplexers', () => {
+    delete process.env.TMUX
+    delete process.env.STY
+
+    expect(buildOsc52ClipboardQuery()).toBe(OSC52_CLIPBOARD_QUERY)
+  })
+
+  it('wraps the query for tmux passthrough', () => {
+    process.env.TMUX = '/tmp/tmux-123/default,1,0'
+
+    expect(buildOsc52ClipboardQuery()).toContain('\x1bPtmux;')
+    expect(buildOsc52ClipboardQuery()).toContain(']52;c;?')
+  })
+})
+
+describe('parseOsc52ClipboardData', () => {
+  it('decodes clipboard payloads', () => {
+    const encoded = Buffer.from('hello from osc52', 'utf8').toString('base64')
+
+    expect(parseOsc52ClipboardData(`c;${encoded}`)).toBe('hello from osc52')
+  })
+
+  it('returns null for empty or query payloads', () => {
+    expect(parseOsc52ClipboardData('c;?')).toBeNull()
+    expect(parseOsc52ClipboardData('c;')).toBeNull()
+  })
+})
+
+describe('readOsc52Clipboard', () => {
+  it('returns decoded text from a terminal OSC52 response', async () => {
+    const send = vi.fn().mockResolvedValue({
+      code: 52,
+      data: `c;${Buffer.from('queried text', 'utf8').toString('base64')}`,
+      type: 'osc'
+    })
+
+    const flush = vi.fn().mockResolvedValue(undefined)
+
+    await expect(readOsc52Clipboard({ flush, send })).resolves.toBe('queried text')
+    expect(send).toHaveBeenCalled()
+    expect(flush).toHaveBeenCalled()
+  })
+
+  it('returns null when the querier is missing or unsupported', async () => {
+    await expect(readOsc52Clipboard(null)).resolves.toBeNull()
+
+    const send = vi.fn().mockResolvedValue(undefined)
+    const flush = vi.fn().mockResolvedValue(undefined)
+    await expect(readOsc52Clipboard({ flush, send })).resolves.toBeNull()
+  })
+})
diff --git a/ui-tui/src/__tests__/platform.test.ts b/ui-tui/src/__tests__/platform.test.ts
new file mode 100644
index 000000000..dbb6f0fe6
--- /dev/null
+++ b/ui-tui/src/__tests__/platform.test.ts
@@ -0,0 +1,51 @@
+import { afterEach, describe, expect, it, vi } from 'vitest'
+
+const originalPlatform = process.platform
+
+async function importPlatform(platform: NodeJS.Platform) {
+  vi.resetModules()
+  Object.defineProperty(process, 'platform', { value: platform })
+
+  return import('../lib/platform.js')
+}
+
+afterEach(() => {
+  Object.defineProperty(process, 'platform', { value: originalPlatform })
+  vi.resetModules()
+})
+
+describe('platform action modifier', () => {
+  it('treats kitty Cmd sequences as the macOS action modifier', async () => {
+    const { isActionMod } = await importPlatform('darwin')
+
+    expect(isActionMod({ ctrl: false, meta: false, super: true })).toBe(true)
+    expect(isActionMod({ ctrl: false, meta: true, super: false })).toBe(true)
+    expect(isActionMod({ ctrl: true, meta: false, super: false })).toBe(false)
+  })
+
+  it('still uses Ctrl as the action modifier on non-macOS', async () => {
+    const { isActionMod } = await importPlatform('linux')
+
+    expect(isActionMod({ ctrl: true, meta: false, super: false })).toBe(true)
+    expect(isActionMod({ ctrl: false, meta: false, super: true })).toBe(false)
+  })
+})
+
+describe('isMacActionFallback', () => {
+  it('routes raw Ctrl+K and Ctrl+W to readline kill-to-end / delete-word on macOS', async () => {
+    const { isMacActionFallback } = await importPlatform('darwin')
+
+    expect(isMacActionFallback({ ctrl: true, meta: false, super: false }, 'k', 'k')).toBe(true)
+    expect(isMacActionFallback({ ctrl: true, meta: false, super: false }, 'w', 'w')).toBe(true)
+    // Must not fire when Cmd (meta/super) is held — those are distinct chords.
+    expect(isMacActionFallback({ ctrl: true, meta: true, super: false }, 'k', 'k')).toBe(false)
+    expect(isMacActionFallback({ ctrl: true, meta: false, super: true }, 'w', 'w')).toBe(false)
+  })
+
+  it('is a no-op on non-macOS (Linux routes Ctrl+K/W through isActionMod directly)', async () => {
+    const { isMacActionFallback } = await importPlatform('linux')
+
+    expect(isMacActionFallback({ ctrl: true, meta: false, super: false }, 'k', 'k')).toBe(false)
+    expect(isMacActionFallback({ ctrl: true, meta: false, super: false }, 'w', 'w')).toBe(false)
+  })
+})
diff --git a/ui-tui/src/__tests__/subagentTree.test.ts b/ui-tui/src/__tests__/subagentTree.test.ts
new file mode 100644
index 000000000..bd892d7ac
--- /dev/null
+++ b/ui-tui/src/__tests__/subagentTree.test.ts
@@ -0,0 +1,407 @@
+import { describe, expect, it } from 'vitest'
+
+import {
+  buildSubagentTree,
+  descendantIds,
+  flattenTree,
+  fmtCost,
+  fmtDuration,
+  fmtTokens,
+  formatSummary,
+  hotnessBucket,
+  peakHotness,
+  sparkline,
+  topLevelSubagents,
+  treeTotals,
+  widthByDepth
+} from '../lib/subagentTree.js'
+import type { SubagentProgress } from '../types.js'
+
+const makeItem = (overrides: Partial<SubagentProgress> & Pick<SubagentProgress, 'id' | 'index'>): SubagentProgress => ({
+  depth: 0,
+  goal: overrides.id,
+  notes: [],
+  parentId: null,
+  status: 'running',
+  taskCount: 1,
+  thinking: [],
+  toolCount: 0,
+  tools: [],
+  ...overrides
+})
+
+describe('aggregate: tokens, cost, files, hotness', () => {
+  it('sums tokens and cost across subtree', () => {
+    const items = [
+      makeItem({ costUsd: 0.01, id: 'p', index: 0, inputTokens: 1000, outputTokens: 500 }),
+      makeItem({
+        costUsd: 0.005,
+        depth: 1,
+        id: 'c1',
+        index: 0,
+        inputTokens: 500,
+        outputTokens: 100,
+        parentId: 'p'
+      }),
+      makeItem({
+        costUsd: 0.008,
+        depth: 1,
+        id: 'c2',
+        index: 1,
+        inputTokens: 300,
+        outputTokens: 200,
+        parentId: 'p'
+      })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate).toMatchObject({
+      costUsd: 0.023,
+      inputTokens: 1800,
+      outputTokens: 800
+    })
+  })
+
+  it('counts files read + written across subtree', () => {
+    const items = [
+      makeItem({ filesRead: ['a.ts', 'b.ts'], id: 'p', index: 0 }),
+      makeItem({ depth: 1, filesWritten: ['c.ts'], id: 'c', index: 0, parentId: 'p' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate.filesTouched).toBe(3)
+  })
+
+  it('hotness = totalTools / totalDuration', () => {
+    const items = [
+      makeItem({
+        durationSeconds: 10,
+        id: 'p',
+        index: 0,
+        status: 'completed',
+        toolCount: 20
+      })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate.hotness).toBeCloseTo(2)
+  })
+
+  it('hotness is zero when duration is zero', () => {
+    const items = [makeItem({ id: 'p', index: 0, toolCount: 10 })]
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate.hotness).toBe(0)
+  })
+})
+
+describe('hotnessBucket + peakHotness', () => {
+  it('peakHotness walks subtree', () => {
+    const items = [
+      makeItem({ durationSeconds: 100, id: 'p', index: 0, status: 'completed', toolCount: 1 }),
+      makeItem({
+        depth: 1,
+        durationSeconds: 1,
+        id: 'c',
+        index: 0,
+        parentId: 'p',
+        status: 'completed',
+        toolCount: 5
+      })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(peakHotness(tree)).toBeGreaterThan(2)
+  })
+
+  it('hotnessBucket clamps and normalizes', () => {
+    expect(hotnessBucket(0, 10, 4)).toBe(0)
+    expect(hotnessBucket(10, 10, 4)).toBe(3)
+    expect(hotnessBucket(5, 10, 4)).toBe(2)
+    expect(hotnessBucket(100, 10, 4)).toBe(3) // clamped
+    expect(hotnessBucket(5, 0, 4)).toBe(0) // guard against divide-by-zero
+  })
+})
+
+describe('fmtCost + fmtTokens', () => {
+  it('fmtCost handles ranges', () => {
+    expect(fmtCost(0)).toBe('')
+    expect(fmtCost(0.001)).toBe('<$0.01')
+    expect(fmtCost(0.42)).toBe('$0.42')
+    expect(fmtCost(1.23)).toBe('$1.23')
+    expect(fmtCost(12.5)).toBe('$12.5')
+  })
+
+  it('fmtTokens handles ranges', () => {
+    expect(fmtTokens(0)).toBe('0')
+    expect(fmtTokens(542)).toBe('542')
+    expect(fmtTokens(1234)).toBe('1.2k')
+    expect(fmtTokens(45678)).toBe('46k')
+  })
+})
+
+describe('formatSummary with tokens + cost', () => {
+  it('includes token + cost when present', () => {
+    expect(
+      formatSummary({
+        activeCount: 0,
+        costUsd: 0.42,
+        descendantCount: 3,
+        filesTouched: 0,
+        hotness: 0,
+        inputTokens: 8000,
+        maxDepthFromHere: 2,
+        outputTokens: 2000,
+        totalDuration: 30,
+        totalTools: 14
+      })
+    ).toBe('d2 · 3 agents · 14 tools · 30s · 10k tok · $0.42')
+  })
+})
+
+describe('buildSubagentTree', () => {
+  it('returns empty list for empty input', () => {
+    expect(buildSubagentTree([])).toEqual([])
+  })
+
+  it('treats flat list as top-level when no parentId is given', () => {
+    const items = [makeItem({ id: 'a', index: 0 }), makeItem({ id: 'b', index: 1 }), makeItem({ id: 'c', index: 2 })]
+
+    const tree = buildSubagentTree(items)
+    expect(tree).toHaveLength(3)
+    expect(tree.map(n => n.item.id)).toEqual(['a', 'b', 'c'])
+    expect(tree.every(n => n.children.length === 0)).toBe(true)
+  })
+
+  it('nests children under their parent by subagent_id', () => {
+    const items = [
+      makeItem({ id: 'parent', index: 0 }),
+      makeItem({ depth: 1, id: 'child-1', index: 0, parentId: 'parent' }),
+      makeItem({ depth: 1, id: 'child-2', index: 1, parentId: 'parent' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree).toHaveLength(1)
+    expect(tree[0]!.children).toHaveLength(2)
+    expect(tree[0]!.children.map(n => n.item.id)).toEqual(['child-1', 'child-2'])
+  })
+
+  it('builds multi-level nesting', () => {
+    const items = [
+      makeItem({ id: 'p', index: 0 }),
+      makeItem({ depth: 1, id: 'c', index: 0, parentId: 'p' }),
+      makeItem({ depth: 2, id: 'gc', index: 0, parentId: 'c' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.children[0]!.children[0]!.item.id).toBe('gc')
+    expect(tree[0]!.aggregate.maxDepthFromHere).toBe(2)
+    expect(tree[0]!.aggregate.descendantCount).toBe(2)
+  })
+
+  it('promotes orphaned children (missing parent) to top level', () => {
+    const items = [makeItem({ id: 'a', index: 0 }), makeItem({ depth: 1, id: 'orphan', index: 1, parentId: 'ghost' })]
+
+    const tree = buildSubagentTree(items)
+    expect(tree).toHaveLength(2)
+    expect(tree.map(n => n.item.id)).toEqual(['a', 'orphan'])
+  })
+
+  it('stable sort: children ordered by (depth, index) not insert order', () => {
+    const items = [
+      makeItem({ id: 'p', index: 0 }),
+      makeItem({ depth: 1, id: 'c3', index: 2, parentId: 'p' }),
+      makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p' }),
+      makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.children.map(n => n.item.id)).toEqual(['c1', 'c2', 'c3'])
+  })
+})
+
+describe('aggregate', () => {
+  it('sums tool counts and durations across subtree', () => {
+    const items = [
+      makeItem({ durationSeconds: 10, id: 'p', index: 0, status: 'completed', toolCount: 5 }),
+      makeItem({ depth: 1, durationSeconds: 4, id: 'c1', index: 0, parentId: 'p', status: 'completed', toolCount: 3 }),
+      makeItem({ depth: 1, durationSeconds: 2, id: 'c2', index: 1, parentId: 'p', status: 'completed', toolCount: 1 })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate).toMatchObject({
+      activeCount: 0,
+      descendantCount: 2,
+      totalDuration: 16,
+      totalTools: 9
+    })
+  })
+
+  it('counts queued + running as active', () => {
+    const items = [
+      makeItem({ id: 'p', index: 0, status: 'running' }),
+      makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p', status: 'queued' }),
+      makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p', status: 'completed' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate.activeCount).toBe(2)
+  })
+})
+
+describe('widthByDepth', () => {
+  it('returns empty array for empty tree', () => {
+    expect(widthByDepth([])).toEqual([])
+  })
+
+  it('tallies nodes at each depth', () => {
+    const items = [
+      makeItem({ id: 'p1', index: 0 }),
+      makeItem({ id: 'p2', index: 1 }),
+      makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p1' }),
+      makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p1' }),
+      makeItem({ depth: 1, id: 'c3', index: 0, parentId: 'p2' }),
+      makeItem({ depth: 2, id: 'gc1', index: 0, parentId: 'c1' })
+    ]
+
+    expect(widthByDepth(buildSubagentTree(items))).toEqual([2, 3, 1])
+  })
+})
+
+describe('treeTotals', () => {
+  it('folds a full tree into a single rollup', () => {
+    const items = [
+      makeItem({ id: 'p1', index: 0, toolCount: 5 }),
+      makeItem({ id: 'p2', index: 1, toolCount: 2 }),
+      makeItem({ depth: 1, id: 'c', index: 0, parentId: 'p1', toolCount: 3 })
+    ]
+
+    const totals = treeTotals(buildSubagentTree(items))
+    expect(totals.descendantCount).toBe(3)
+    expect(totals.totalTools).toBe(10)
+    expect(totals.maxDepthFromHere).toBe(2)
+  })
+
+  it('returns zeros for empty tree', () => {
+    expect(treeTotals([])).toEqual({
+      activeCount: 0,
+      costUsd: 0,
+      descendantCount: 0,
+      filesTouched: 0,
+      hotness: 0,
+      inputTokens: 0,
+      maxDepthFromHere: 0,
+      outputTokens: 0,
+      totalDuration: 0,
+      totalTools: 0
+    })
+  })
+})
+
+describe('flattenTree + descendantIds', () => {
+  const items = [
+    makeItem({ id: 'p', index: 0 }),
+    makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p' }),
+    makeItem({ depth: 2, id: 'gc', index: 0, parentId: 'c1' }),
+    makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p' })
+  ]
+
+  it('flattens in visit order (depth-first, pre-order)', () => {
+    const tree = buildSubagentTree(items)
+    expect(flattenTree(tree).map(n => n.item.id)).toEqual(['p', 'c1', 'gc', 'c2'])
+  })
+
+  it('collects descendant ids excluding the node itself', () => {
+    const tree = buildSubagentTree(items)
+    expect(descendantIds(tree[0]!)).toEqual(['c1', 'gc', 'c2'])
+  })
+})
+
+describe('sparkline', () => {
+  it('returns empty string for empty input', () => {
+    expect(sparkline([])).toBe('')
+  })
+
+  it('renders zeroes as spaces (not bottom glyph)', () => {
+    expect(sparkline([0, 0])).toBe('  ')
+  })
+
+  it('scales to the max value', () => {
+    const out = sparkline([1, 8])
+    expect(out).toHaveLength(2)
+    expect(out[1]).toBe('█')
+  })
+
+  it('sparse widths render as expected', () => {
+    const out = sparkline([2, 3, 7, 4])
+    expect(out).toHaveLength(4)
+    expect([...out].every(ch => /[\s▁-█]/.test(ch))).toBe(true)
+  })
+})
+
+describe('formatSummary', () => {
+  const emptyTotals = {
+    activeCount: 0,
+    costUsd: 0,
+    descendantCount: 0,
+    filesTouched: 0,
+    hotness: 0,
+    inputTokens: 0,
+    maxDepthFromHere: 0,
+    outputTokens: 0,
+    totalDuration: 0,
+    totalTools: 0
+  }
+
+  it('collapses zero-valued components', () => {
+    expect(formatSummary({ ...emptyTotals, descendantCount: 1 })).toBe('d0 · 1 agent')
+  })
+
+  it('emits rich summary with all pieces', () => {
+    expect(
+      formatSummary({
+        ...emptyTotals,
+        activeCount: 2,
+        descendantCount: 7,
+        maxDepthFromHere: 3,
+        totalDuration: 134,
+        totalTools: 124
+      })
+    ).toBe('d3 · 7 agents · 124 tools · 2m 14s · ⚡2')
+  })
+})
+
+describe('fmtDuration', () => {
+  it('formats under a minute as plain seconds', () => {
+    expect(fmtDuration(0)).toBe('0s')
+    expect(fmtDuration(42)).toBe('42s')
+    expect(fmtDuration(59.4)).toBe('59s')
+  })
+
+  it('formats whole minutes without trailing seconds', () => {
+    expect(fmtDuration(60)).toBe('1m')
+    expect(fmtDuration(180)).toBe('3m')
+  })
+
+  it('mixes minutes and seconds', () => {
+    expect(fmtDuration(134)).toBe('2m 14s')
+    expect(fmtDuration(605)).toBe('10m 5s')
+  })
+})
+
+describe('topLevelSubagents', () => {
+  it('returns items with no parent', () => {
+    const items = [makeItem({ id: 'a', index: 0 }), makeItem({ id: 'b', index: 1 })]
+    expect(topLevelSubagents(items).map(s => s.id)).toEqual(['a', 'b'])
+  })
+
+  it('excludes children whose parent is present', () => {
+    const items = [makeItem({ id: 'p', index: 0 }), makeItem({ depth: 1, id: 'c', index: 0, parentId: 'p' })]
+
+    expect(topLevelSubagents(items).map(s => s.id)).toEqual(['p'])
+  })
+
+  it('promotes orphans whose parent is missing', () => {
+    const items = [makeItem({ id: 'a', index: 0 }), makeItem({ depth: 1, id: 'orphan', index: 1, parentId: 'ghost' })]
+    expect(topLevelSubagents(items).map(s => s.id)).toEqual(['a', 'orphan'])
+  })
+})
diff --git a/ui-tui/src/__tests__/terminalParity.test.ts b/ui-tui/src/__tests__/terminalParity.test.ts
new file mode 100644
index 000000000..005434396
--- /dev/null
+++ b/ui-tui/src/__tests__/terminalParity.test.ts
@@ -0,0 +1,71 @@
+import { describe, expect, it, vi } from 'vitest'
+
+import { terminalParityHints } from '../lib/terminalParity.js'
+
+describe('terminalParityHints', () => {
+  it('warns for Apple Terminal and SSH/tmux sessions', async () => {
+    const hints = await terminalParityHints({
+      TERM_PROGRAM: 'Apple_Terminal',
+      TERM_SESSION_ID: 'w0t0p0:123',
+      SSH_CONNECTION: '1',
+      TMUX: '/tmp/tmux-1/default,1,0'
+    } as NodeJS.ProcessEnv)
+
+    expect(hints.map(h => h.key)).toEqual(expect.arrayContaining(['apple-terminal', 'remote', 'tmux']))
+  })
+
+  it('suggests IDE setup only for VS Code-family terminals that still need bindings', async () => {
+    const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
+
+    const hints = await terminalParityHints({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv, {
+      fileOps: { readFile },
+      homeDir: '/tmp/fake-home'
+    })
+
+    expect(hints.some(h => h.key === 'ide-setup')).toBe(true)
+  })
+
+  it('suppresses IDE setup hint when keybindings are already configured', async () => {
+    const readFile = vi.fn().mockResolvedValue(
+      JSON.stringify([
+        {
+          key: 'shift+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'ctrl+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'cmd+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'cmd+z',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\u001b[122;9u' }
+        },
+        {
+          key: 'shift+cmd+z',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\u001b[122;10u' }
+        }
+      ])
+    )
+
+    const hints = await terminalParityHints({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv, {
+      fileOps: { readFile },
+      homeDir: '/tmp/fake-home'
+    })
+
+    expect(hints.some(h => h.key === 'ide-setup')).toBe(false)
+  })
+})
diff --git a/ui-tui/src/__tests__/terminalSetup.test.ts b/ui-tui/src/__tests__/terminalSetup.test.ts
new file mode 100644
index 000000000..de23176f2
--- /dev/null
+++ b/ui-tui/src/__tests__/terminalSetup.test.ts
@@ -0,0 +1,237 @@
+import { describe, expect, it, vi } from 'vitest'
+
+import {
+  configureDetectedTerminalKeybindings,
+  configureTerminalKeybindings,
+  detectVSCodeLikeTerminal,
+  getVSCodeStyleConfigDir,
+  shouldPromptForTerminalSetup,
+  stripJsonComments
+} from '../lib/terminalSetup.js'
+
+describe('terminalSetup helpers', () => {
+  it('detects VS Code family terminals from environment', () => {
+    expect(detectVSCodeLikeTerminal({ CURSOR_TRACE_ID: 'x' } as NodeJS.ProcessEnv)).toBe('cursor')
+    expect(detectVSCodeLikeTerminal({ VSCODE_GIT_ASKPASS_MAIN: '/tmp/windsurf' } as NodeJS.ProcessEnv)).toBe('windsurf')
+    expect(detectVSCodeLikeTerminal({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv)).toBe('vscode')
+    expect(detectVSCodeLikeTerminal({} as NodeJS.ProcessEnv)).toBeNull()
+  })
+
+  it('computes VS Code style config dirs cross-platform', () => {
+    expect(getVSCodeStyleConfigDir('Code', 'darwin', {} as NodeJS.ProcessEnv, '/home/me')).toBe(
+      '/home/me/Library/Application Support/Code/User'
+    )
+    expect(getVSCodeStyleConfigDir('Code', 'linux', {} as NodeJS.ProcessEnv, '/home/me')).toBe(
+      '/home/me/.config/Code/User'
+    )
+    expect(
+      getVSCodeStyleConfigDir(
+        'Code',
+        'win32',
+        { APPDATA: 'C:/Users/me/AppData/Roaming' } as NodeJS.ProcessEnv,
+        '/home/me'
+      )
+    ).toBe('C:/Users/me/AppData/Roaming/Code/User')
+  })
+
+  it('strips line comments from keybindings JSON', () => {
+    expect(stripJsonComments('// comment\n[{"key":"shift+enter"}]')).toBe('\n[{"key":"shift+enter"}]')
+  })
+
+  it('strips inline comments and block comments', () => {
+    expect(stripJsonComments('[{"key":"a"} // inline\n]')).toBe('[{"key":"a"} \n]')
+    expect(stripJsonComments('[/* block */{"key":"a"}]')).toBe('[{"key":"a"}]')
+  })
+
+  it('removes trailing commas before ] or }', () => {
+    expect(JSON.parse(stripJsonComments('[{"key":"a"},]'))).toEqual([{ key: 'a' }])
+    expect(JSON.parse(stripJsonComments('[{"key":"a",}]'))).toEqual([{ key: 'a' }])
+  })
+
+  it('preserves comment-like sequences inside strings', () => {
+    const input = '[{"key":"a","args":{"text":"// not a comment"}}]'
+    expect(JSON.parse(stripJsonComments(input))).toEqual([{ key: 'a', args: { text: '// not a comment' } }])
+  })
+
+  it('handles unterminated block comments gracefully', () => {
+    const input = '[{"key":"a"} /* never closed'
+    const stripped = stripJsonComments(input)
+    // The unterminated comment is consumed to end-of-file; the remainder is parseable
+    expect(stripped).toBe('[{"key":"a"} ')
+  })
+})
+
+describe('configureTerminalKeybindings', () => {
+  it('writes missing bindings into a VS Code style keybindings file', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+    const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureTerminalKeybindings('vscode', {
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(true)
+    expect(result.requiresRestart).toBe(true)
+    expect(writeFile).toHaveBeenCalledTimes(1)
+    expect(copyFile).not.toHaveBeenCalled() // no existing file to back up
+    const written = writeFile.mock.calls[0]?.[1] as string
+    expect(written).toContain('shift+enter')
+    expect(written).toContain('cmd+enter')
+    expect(written).toContain('cmd+z')
+  })
+
+  it('reports conflicts without overwriting existing bindings', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+
+    const readFile = vi.fn().mockResolvedValue(
+      JSON.stringify([
+        {
+          key: 'cmd+z',
+          command: 'something.else',
+          when: 'terminalFocus',
+          args: { text: 'noop' }
+        }
+      ])
+    )
+
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureTerminalKeybindings('cursor', {
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(false)
+    expect(result.message).toContain('cmd+z')
+    expect(writeFile).not.toHaveBeenCalled()
+    expect(copyFile).not.toHaveBeenCalled() // no backup when not writing
+  })
+
+  it('backs up existing keybindings.json only when writing changes', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+    const readFile = vi.fn().mockResolvedValue(JSON.stringify([]))
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureTerminalKeybindings('vscode', {
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(true)
+    expect(writeFile).toHaveBeenCalledTimes(1)
+    expect(copyFile).toHaveBeenCalledTimes(1) // backup created before writing
+  })
+
+  it('reports error when keybindings.json is not readable (EACCES)', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+    const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('permission denied'), { code: 'EACCES' }))
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureTerminalKeybindings('vscode', {
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(false)
+    expect(result.message).toContain('Failed to read')
+    expect(writeFile).not.toHaveBeenCalled()
+  })
+
+  it('auto-detects the current IDE terminal', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+    const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureDetectedTerminalKeybindings({
+      env: { CURSOR_TRACE_ID: 'trace' } as NodeJS.ProcessEnv,
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(true)
+    expect(writeFile).toHaveBeenCalled()
+  })
+
+  it('refuses to configure IDE bindings from an SSH session', async () => {
+    const result = await configureDetectedTerminalKeybindings({
+      env: { SSH_CONNECTION: '1 2 3 4', TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv,
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(false)
+    expect(result.message).toContain('local machine')
+  })
+
+  it('prompts for setup when bindings are missing and suppresses prompt when complete', async () => {
+    const readMissing = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
+    await expect(
+      shouldPromptForTerminalSetup({
+        env: { TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv,
+        fileOps: { readFile: readMissing }
+      })
+    ).resolves.toBe(true)
+
+    const readComplete = vi.fn().mockResolvedValue(
+      JSON.stringify([
+        {
+          key: 'shift+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'ctrl+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'cmd+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'cmd+z',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\u001b[122;9u' }
+        },
+        {
+          key: 'shift+cmd+z',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\u001b[122;10u' }
+        }
+      ])
+    )
+
+    await expect(
+      shouldPromptForTerminalSetup({
+        env: { TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv,
+        fileOps: { readFile: readComplete }
+      })
+    ).resolves.toBe(false)
+  })
+
+  it('suppresses terminal setup prompts inside SSH sessions', async () => {
+    await expect(
+      shouldPromptForTerminalSetup({
+        env: { SSH_CONNECTION: '1 2 3 4', TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv
+      })
+    ).resolves.toBe(false)
+  })
+})
diff --git a/ui-tui/src/__tests__/text.test.ts b/ui-tui/src/__tests__/text.test.ts
index 0a11e3cc0..d4a2469e8 100644
--- a/ui-tui/src/__tests__/text.test.ts
+++ b/ui-tui/src/__tests__/text.test.ts
@@ -97,4 +97,12 @@ describe('estimateRows', () => {
 
     expect(estimateRows(md, 40)).toBe(2)
   })
+
+  it('keeps intraword underscores when sizing snake_case identifiers', () => {
+    const w = 80
+    const snake = 'look at test_case_with_underscores now'
+    const plain = 'look at test case with underscores now'
+
+    expect(estimateRows(snake, w)).toBe(estimateRows(plain, w))
+  })
 })
diff --git a/ui-tui/src/__tests__/textInputLineNav.test.ts b/ui-tui/src/__tests__/textInputLineNav.test.ts
new file mode 100644
index 000000000..56b3772a9
--- /dev/null
+++ b/ui-tui/src/__tests__/textInputLineNav.test.ts
@@ -0,0 +1,55 @@
+import { describe, expect, it } from 'vitest'
+
+import { lineNav } from '../components/textInput.js'
+
+describe('lineNav', () => {
+  it('returns null for single-line input (up)', () => {
+    expect(lineNav('hello world', 6, -1)).toBeNull()
+  })
+
+  it('returns null for single-line input (down)', () => {
+    expect(lineNav('hello world', 6, 1)).toBeNull()
+  })
+
+  it('returns null when cursor already on first line of a multiline block', () => {
+    expect(lineNav('one\ntwo\nthree', 2, -1)).toBeNull()
+  })
+
+  it('returns null when cursor on last line of a multiline block', () => {
+    expect(lineNav('one\ntwo\nthree', 10, 1)).toBeNull()
+  })
+
+  it('moves cursor up one line preserving column', () => {
+    // "hello\nworld" — cursor at col 3 of line 1 ('l' in world) → col 3 of line 0 ('l' in hello)
+    expect(lineNav('hello\nworld', 9, -1)).toBe(3)
+  })
+
+  it('moves cursor down one line preserving column', () => {
+    // cursor at col 2 of line 0 → col 2 of line 1
+    expect(lineNav('hello\nworld', 2, 1)).toBe(8)
+  })
+
+  it('clamps to end of shorter destination line on up', () => {
+    // col 10 on long line → clamp to end of short line "abc"
+    const s = 'abc\nlong long text'
+    const from = 14
+
+    expect(lineNav(s, from, -1)).toBe(3)
+  })
+
+  it('clamps to end of shorter destination line on down', () => {
+    // col 10 on line 0 → clamp to end of "abc" on line 1
+    const s = 'long long text\nabc'
+
+    expect(lineNav(s, 10, 1)).toBe(18)
+  })
+
+  it('handles empty lines correctly', () => {
+    // "a\n\nb" — cursor at line 2 (b) → up to empty line 1
+    expect(lineNav('a\n\nb', 3, -1)).toBe(2)
+  })
+
+  it('handles leading newline without crashing', () => {
+    expect(lineNav('\nfoo', 2, -1)).toBe(0)
+  })
+})
diff --git a/ui-tui/src/__tests__/textInputWrap.test.ts b/ui-tui/src/__tests__/textInputWrap.test.ts
new file mode 100644
index 000000000..9414b9fbd
--- /dev/null
+++ b/ui-tui/src/__tests__/textInputWrap.test.ts
@@ -0,0 +1,60 @@
+import { describe, expect, it } from 'vitest'
+
+import { cursorLayout, offsetFromPosition } from '../components/textInput.js'
+
+describe('cursorLayout — char-wrap parity with wrap-ansi', () => {
+  it('places cursor mid-line at its column', () => {
+    expect(cursorLayout('hello world', 6, 40)).toEqual({ column: 6, line: 0 })
+  })
+
+  it('places cursor at end of a non-full line', () => {
+    expect(cursorLayout('hi', 2, 10)).toEqual({ column: 2, line: 0 })
+  })
+
+  it('wraps to next line when cursor lands exactly at the right edge', () => {
+    // 8 chars on an 8-col line: text fills the row exactly; the cursor's
+    // inverted-space cell overflows to col 0 of the next row.
+    expect(cursorLayout('abcdefgh', 8, 8)).toEqual({ column: 0, line: 1 })
+  })
+
+  it('tracks a word across a char-wrap boundary without jumping', () => {
+    // With wordWrap:false, "hello world" at cols=8 is "hello wo\nrld" —
+    // typing incremental letters doesn't reshuffle the word across lines.
+    expect(cursorLayout('hello wo', 8, 8)).toEqual({ column: 0, line: 1 })
+    expect(cursorLayout('hello wor', 9, 8)).toEqual({ column: 1, line: 1 })
+    expect(cursorLayout('hello worl', 10, 8)).toEqual({ column: 2, line: 1 })
+  })
+
+  it('honours explicit newlines', () => {
+    expect(cursorLayout('one\ntwo', 5, 40)).toEqual({ column: 1, line: 1 })
+    expect(cursorLayout('one\ntwo', 4, 40)).toEqual({ column: 0, line: 1 })
+  })
+
+  it('does not wrap when cursor is before the right edge', () => {
+    expect(cursorLayout('abcdefg', 7, 8)).toEqual({ column: 7, line: 0 })
+  })
+})
+
+describe('offsetFromPosition — char-wrap inverse of cursorLayout', () => {
+  it('returns 0 for empty input', () => {
+    expect(offsetFromPosition('', 0, 0, 10)).toBe(0)
+  })
+
+  it('maps clicks within a single line', () => {
+    expect(offsetFromPosition('hello', 0, 3, 40)).toBe(3)
+  })
+
+  it('maps clicks past end to value length', () => {
+    expect(offsetFromPosition('hi', 0, 10, 40)).toBe(2)
+  })
+
+  it('maps clicks on a wrapped second row at cols boundary', () => {
+    // "abcdefghij" at cols=8 wraps to "abcdefgh\nij" — click at row 1 col 0
+    // should land on 'i' (offset 8).
+    expect(offsetFromPosition('abcdefghij', 1, 0, 8)).toBe(8)
+  })
+
+  it('maps clicks past a \\n into the target line', () => {
+    expect(offsetFromPosition('one\ntwo', 1, 2, 40)).toBe(6)
+  })
+})
diff --git a/ui-tui/src/__tests__/useComposerState.test.ts b/ui-tui/src/__tests__/useComposerState.test.ts
new file mode 100644
index 000000000..ff446153a
--- /dev/null
+++ b/ui-tui/src/__tests__/useComposerState.test.ts
@@ -0,0 +1,59 @@
+import { describe, expect, it } from 'vitest'
+
+import { looksLikeDroppedPath } from '../app/useComposerState.js'
+
+describe('looksLikeDroppedPath', () => {
+  it('recognizes macOS screenshot temp paths and file URIs', () => {
+    expect(looksLikeDroppedPath('/var/folders/x/T/TemporaryItems/Screenshot\\ 2026-04-21\\ at\\ 1.04.43 PM.png')).toBe(
+      true
+    )
+    expect(
+      looksLikeDroppedPath('file:///var/folders/x/T/TemporaryItems/Screenshot%202026-04-21%20at%201.04.43%20PM.png')
+    ).toBe(true)
+  })
+
+  it('rejects normal multiline or plain text paste', () => {
+    expect(looksLikeDroppedPath('hello world')).toBe(false)
+    expect(looksLikeDroppedPath('line one\nline two')).toBe(false)
+  })
+
+  it('recognizes common image file extensions', () => {
+    expect(looksLikeDroppedPath('/Users/me/Desktop/photo.jpg')).toBe(true)
+    expect(looksLikeDroppedPath('/Users/me/Desktop/diagram.png')).toBe(true)
+    expect(looksLikeDroppedPath('/tmp/capture.webp')).toBe(true)
+    expect(looksLikeDroppedPath('/tmp/image.gif')).toBe(true)
+  })
+
+  it('recognizes file:// URIs with various extensions', () => {
+    expect(looksLikeDroppedPath('file:///home/user/doc.pdf')).toBe(true)
+    expect(looksLikeDroppedPath('file:///tmp/screenshot.png')).toBe(true)
+  })
+
+  it('recognizes paths with spaces (not backslash-escaped)', () => {
+    expect(looksLikeDroppedPath('/var/folders/x/T/TemporaryItems/Screenshot 2026-04-21 at 1.04.43 PM.png')).toBe(true)
+  })
+
+  it('rejects empty/whitespace-only input', () => {
+    expect(looksLikeDroppedPath('')).toBe(false)
+    expect(looksLikeDroppedPath('   ')).toBe(false)
+    expect(looksLikeDroppedPath('\n')).toBe(false)
+  })
+
+  it('rejects URLs that are not file:// URIs', () => {
+    expect(looksLikeDroppedPath('https://example.com/image.png')).toBe(false)
+    expect(looksLikeDroppedPath('http://localhost/file.pdf')).toBe(false)
+  })
+
+  it('rejects short slash-like strings without path structure', () => {
+    // No second '/' or '.' → not a plausible file path
+    expect(looksLikeDroppedPath('/help')).toBe(false)
+    expect(looksLikeDroppedPath('/model sonnet')).toBe(false)
+    expect(looksLikeDroppedPath('/api')).toBe(false)
+  })
+
+  it('accepts absolute paths with directory separators or extensions', () => {
+    expect(looksLikeDroppedPath('/usr/bin/test')).toBe(true)
+    expect(looksLikeDroppedPath('/tmp/file.txt')).toBe(true)
+    expect(looksLikeDroppedPath('/etc/hosts')).toBe(true) // has second /
+  })
+})
diff --git a/ui-tui/src/__tests__/useConfigSync.test.ts b/ui-tui/src/__tests__/useConfigSync.test.ts
index c14ecff3a..c5a0a97dc 100644
--- a/ui-tui/src/__tests__/useConfigSync.test.ts
+++ b/ui-tui/src/__tests__/useConfigSync.test.ts
@@ -1,7 +1,7 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest'
 
 import { $uiState, resetUiState } from '../app/uiStore.js'
-import { applyDisplay } from '../app/useConfigSync.js'
+import { applyDisplay, normalizeStatusBar } from '../app/useConfigSync.js'
 
 describe('applyDisplay', () => {
   beforeEach(() => {
@@ -36,10 +36,20 @@ describe('applyDisplay', () => {
     expect(s.inlineDiffs).toBe(false)
     expect(s.showCost).toBe(true)
     expect(s.showReasoning).toBe(true)
-    expect(s.statusBar).toBe(false)
+    expect(s.statusBar).toBe('off')
     expect(s.streaming).toBe(false)
   })
 
+  it('coerces legacy true + "on" alias to top', () => {
+    const setBell = vi.fn()
+
+    applyDisplay({ config: { display: { tui_statusbar: true as unknown as 'on' } } }, setBell)
+    expect($uiState.get().statusBar).toBe('top')
+
+    applyDisplay({ config: { display: { tui_statusbar: 'on' } } }, setBell)
+    expect($uiState.get().statusBar).toBe('top')
+  })
+
   it('applies v1 parity defaults when display fields are missing', () => {
     const setBell = vi.fn()
 
@@ -50,7 +60,7 @@ describe('applyDisplay', () => {
     expect(s.inlineDiffs).toBe(true)
     expect(s.showCost).toBe(false)
     expect(s.showReasoning).toBe(false)
-    expect(s.statusBar).toBe(true)
+    expect(s.statusBar).toBe('top')
     expect(s.streaming).toBe(true)
   })
 
@@ -64,4 +74,42 @@ describe('applyDisplay', () => {
     expect(s.inlineDiffs).toBe(true)
     expect(s.streaming).toBe(true)
   })
+
+  it('accepts the new string statusBar modes', () => {
+    const setBell = vi.fn()
+
+    applyDisplay({ config: { display: { tui_statusbar: 'bottom' } } }, setBell)
+    expect($uiState.get().statusBar).toBe('bottom')
+
+    applyDisplay({ config: { display: { tui_statusbar: 'top' } } }, setBell)
+    expect($uiState.get().statusBar).toBe('top')
+  })
+})
+
+describe('normalizeStatusBar', () => {
+  it('maps legacy bool + on alias to top/off', () => {
+    expect(normalizeStatusBar(true)).toBe('top')
+    expect(normalizeStatusBar(false)).toBe('off')
+    expect(normalizeStatusBar('on')).toBe('top')
+  })
+
+  it('passes through the canonical enum', () => {
+    expect(normalizeStatusBar('off')).toBe('off')
+    expect(normalizeStatusBar('top')).toBe('top')
+    expect(normalizeStatusBar('bottom')).toBe('bottom')
+  })
+
+  it('defaults missing/unknown values to top', () => {
+    expect(normalizeStatusBar(undefined)).toBe('top')
+    expect(normalizeStatusBar(null)).toBe('top')
+    expect(normalizeStatusBar('sideways')).toBe('top')
+    expect(normalizeStatusBar(42)).toBe('top')
+  })
+
+  it('trims whitespace and folds case', () => {
+    expect(normalizeStatusBar(' Bottom ')).toBe('bottom')
+    expect(normalizeStatusBar('TOP')).toBe('top')
+    expect(normalizeStatusBar('  on  ')).toBe('top')
+    expect(normalizeStatusBar('OFF')).toBe('off')
+  })
 })
diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts
index 8f45bb3d7..1ec123f11 100644
--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@@ -1,17 +1,18 @@
 import { STREAM_BATCH_MS } from '../config/timing.js'
 import { buildSetupRequiredSections, SETUP_REQUIRED_TITLE } from '../content/setup.js'
-import type { CommandsCatalogResponse, GatewayEvent, GatewaySkin } from '../gatewayTypes.js'
+import type { CommandsCatalogResponse, DelegationStatusResponse, GatewayEvent, GatewaySkin } from '../gatewayTypes.js'
 import { rpcErrorMessage } from '../lib/rpc.js'
-import { formatToolCall } from '../lib/text.js'
+import { topLevelSubagents } from '../lib/subagentTree.js'
+import { formatToolCall, stripAnsi } from '../lib/text.js'
 import { fromSkin } from '../theme.js'
 import type { Msg, SubagentProgress } from '../types.js'
 
+import { applyDelegationStatus, getDelegationState } from './delegationStore.js'
 import type { GatewayEventHandlerContext } from './interfaces.js'
 import { patchOverlayState } from './overlayStore.js'
 import { turnController } from './turnController.js'
 import { getUiState, patchUiState } from './uiStore.js'
 
-const ERRLIKE_RE = /\b(error|traceback|exception|failed|spawn)\b/i
 const NO_PROVIDER_RE = /\bNo (?:LLM|inference) provider configured\b/i
 
 const statusFromBusy = () => (getUiState().busy ? 'running…' : 'ready')
@@ -54,6 +55,55 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
   let pendingThinkingStatus = ''
   let thinkingStatusTimer: null | ReturnType<typeof setTimeout> = null
 
+  // Inject the disk-save callback into turnController so recordMessageComplete
+  // can fire-and-forget a persist without having to plumb a gateway ref around.
+  turnController.persistSpawnTree = async (subagents, sessionId) => {
+    try {
+      const startedAt = subagents.reduce<number>((min, s) => {
+        if (!s.startedAt) {
+          return min
+        }
+
+        return min === 0 ? s.startedAt : Math.min(min, s.startedAt)
+      }, 0)
+
+      const top = topLevelSubagents(subagents)
+        .map(s => s.goal)
+        .filter(Boolean)
+        .slice(0, 2)
+
+      const label = top.length ? top.join(' · ') : `${subagents.length} subagents`
+
+      await rpc('spawn_tree.save', {
+        finished_at: Date.now() / 1000,
+        label: label.slice(0, 120),
+        session_id: sessionId ?? 'default',
+        started_at: startedAt ? startedAt / 1000 : null,
+        subagents
+      })
+    } catch {
+      // Persistence is best-effort; in-memory history is the authoritative
+      // same-session source.  A write failure doesn't block the turn.
+    }
+  }
+
+  // Refresh delegation caps at most every 5s so the status bar HUD can
+  // render a /warning close to the configured cap without spamming the RPC.
+  let lastDelegationFetchAt = 0
+
+  const refreshDelegationStatus = (force = false) => {
+    const now = Date.now()
+
+    if (!force && now - lastDelegationFetchAt < 5000) {
+      return
+    }
+
+    lastDelegationFetchAt = now
+    rpc<DelegationStatusResponse>('delegation.status', {})
+      .then(r => applyDelegationStatus(r))
+      .catch(() => {})
+  }
+
   const setStatus = (status: string) => {
     pendingThinkingStatus = ''
 
@@ -86,7 +136,12 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
     }, ms)
   }
 
-  const keepCompletedElseRunning = (s: SubagentProgress['status']) => (s === 'completed' ? s : 'running')
+  // Terminal statuses are never overwritten by late-arriving live events —
+  // otherwise a stale `subagent.start` / `spawn_requested` can clobber a
+  // `failed` or `interrupted` terminal state (Copilot review #14045).
+  const isTerminalStatus = (s: SubagentProgress['status']) => s === 'completed' || s === 'failed' || s === 'interrupted'
+
+  const keepTerminalElseRunning = (s: SubagentProgress['status']) => (isTerminalStatus(s) ? s : 'running')
 
   const handleReady = (skin?: GatewaySkin) => {
     if (skin) {
@@ -111,7 +166,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           turnController.pushActivity(String(r.warning), 'warn')
         }
       })
-      .catch((e: unknown) => turnController.pushActivity(`command catalog unavailable: ${rpcErrorMessage(e)}`, 'warn'))
+      .catch((e: unknown) => turnController.pushActivity(`command catalog unavailable: ${rpcErrorMessage(e)}`, 'info'))
 
     if (!STARTUP_RESUME_ID) {
       patchUiState({ status: 'forging session…' })
@@ -201,7 +256,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
       case 'gateway.stderr': {
         const line = String(ev.payload.line).slice(0, 120)
 
-        turnController.pushActivity(line, ERRLIKE_RE.test(line) ? 'error' : 'warn')
+        turnController.pushActivity(line, 'info')
 
         return
       }
@@ -222,11 +277,11 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
 
         if (!turnController.protocolWarned) {
           turnController.protocolWarned = true
-          turnController.pushActivity('protocol noise detected · /logs to inspect', 'warn')
+          turnController.pushActivity('protocol noise detected · /logs to inspect', 'info')
         }
 
         if (ev.payload?.preview) {
-          turnController.pushActivity(`protocol noise: ${String(ev.payload.preview).slice(0, 120)}`, 'warn')
+          turnController.pushActivity(`protocol noise: ${String(ev.payload.preview).slice(0, 120)}`, 'info')
         }
 
         return
@@ -261,15 +316,28 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
         turnController.recordToolStart(ev.payload.tool_id, ev.payload.name ?? 'tool', ev.payload.context ?? '')
 
         return
+      case 'tool.complete': {
+        const inlineDiffText =
+          ev.payload.inline_diff && getUiState().inlineDiffs ? stripAnsi(String(ev.payload.inline_diff)).trim() : ''
 
-      case 'tool.complete':
-        turnController.recordToolComplete(ev.payload.tool_id, ev.payload.name, ev.payload.error, ev.payload.summary)
+        turnController.recordToolComplete(
+          ev.payload.tool_id,
+          ev.payload.name,
+          ev.payload.error,
+          inlineDiffText ? '' : ev.payload.summary
+        )
 
-        if (ev.payload.inline_diff && getUiState().inlineDiffs) {
-          sys(ev.payload.inline_diff)
+        if (!inlineDiffText) {
+          return
         }
 
+        // Keep inline diffs attached to the assistant completion body so
+        // they render in the same message flow, not as a standalone system
+        // artifact that can look out-of-place around tool rows.
+        turnController.queueInlineDiff(inlineDiffText)
+
         return
+      }
 
       case 'clarify.request':
         patchOverlayState({
@@ -282,7 +350,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
         const description = String(ev.payload.description ?? 'dangerous command')
 
         patchOverlayState({ approval: { command: String(ev.payload.command ?? ''), description } })
-        turnController.pushActivity(`approval needed · ${description}`, 'warn')
         setStatus('approval needed')
 
         return
@@ -314,8 +381,23 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
 
         return
 
+      case 'subagent.spawn_requested':
+        // Child built but not yet running (waiting on ThreadPoolExecutor slot).
+        // Preserve completed state if a later event races in before this one.
+        turnController.upsertSubagent(ev.payload, c => (isTerminalStatus(c.status) ? {} : { status: 'queued' }))
+
+        // Prime the status-bar HUD: fetch caps (once every 5s) so we can
+        // warn as depth/concurrency approaches the configured ceiling.
+        if (getDelegationState().maxSpawnDepth === null) {
+          refreshDelegationStatus(true)
+        } else {
+          refreshDelegationStatus()
+        }
+
+        return
+
       case 'subagent.start':
-        turnController.upsertSubagent(ev.payload, () => ({ status: 'running' }))
+        turnController.upsertSubagent(ev.payload, c => (isTerminalStatus(c.status) ? {} : { status: 'running' }))
 
         return
       case 'subagent.thinking': {
@@ -325,10 +407,16 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           return
         }
 
-        turnController.upsertSubagent(ev.payload, c => ({
-          status: keepCompletedElseRunning(c.status),
-          thinking: pushThinking(c.thinking, text)
-        }))
+        // Update-only: never resurrect subagents whose spawn_requested/start
+        // we missed or that already flushed via message.complete.
+        turnController.upsertSubagent(
+          ev.payload,
+          c => ({
+            status: keepTerminalElseRunning(c.status),
+            thinking: pushThinking(c.thinking, text)
+          }),
+          { createIfMissing: false }
+        )
 
         return
       }
@@ -339,10 +427,14 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           ev.payload.tool_preview ?? ev.payload.text ?? ''
         )
 
-        turnController.upsertSubagent(ev.payload, c => ({
-          status: keepCompletedElseRunning(c.status),
-          tools: pushTool(c.tools, line)
-        }))
+        turnController.upsertSubagent(
+          ev.payload,
+          c => ({
+            status: keepTerminalElseRunning(c.status),
+            tools: pushTool(c.tools, line)
+          }),
+          { createIfMissing: false }
+        )
 
         return
       }
@@ -354,20 +446,28 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           return
         }
 
-        turnController.upsertSubagent(ev.payload, c => ({
-          notes: pushNote(c.notes, text),
-          status: keepCompletedElseRunning(c.status)
-        }))
+        turnController.upsertSubagent(
+          ev.payload,
+          c => ({
+            notes: pushNote(c.notes, text),
+            status: keepTerminalElseRunning(c.status)
+          }),
+          { createIfMissing: false }
+        )
 
         return
       }
 
       case 'subagent.complete':
-        turnController.upsertSubagent(ev.payload, c => ({
-          durationSeconds: ev.payload.duration_seconds ?? c.durationSeconds,
-          status: ev.payload.status ?? 'completed',
-          summary: ev.payload.summary || ev.payload.text || c.summary
-        }))
+        turnController.upsertSubagent(
+          ev.payload,
+          c => ({
+            durationSeconds: ev.payload.duration_seconds ?? c.durationSeconds,
+            status: ev.payload.status ?? 'completed',
+            summary: ev.payload.summary || ev.payload.text || c.summary
+          }),
+          { createIfMissing: false }
+        )
 
         return
 
diff --git a/ui-tui/src/app/delegationStore.ts b/ui-tui/src/app/delegationStore.ts
new file mode 100644
index 000000000..aa50738ed
--- /dev/null
+++ b/ui-tui/src/app/delegationStore.ts
@@ -0,0 +1,77 @@
+import { atom } from 'nanostores'
+
+import type { DelegationStatusResponse } from '../gatewayTypes.js'
+
+export interface DelegationState {
+  // Last known caps from `delegation.status` RPC.  null until fetched.
+  maxConcurrentChildren: null | number
+  maxSpawnDepth: null | number
+  // True when spawning is globally paused (see tools/delegate_tool.py).
+  paused: boolean
+  // Monotonic clock of the last successful status fetch.
+  updatedAt: null | number
+}
+
+const buildState = (): DelegationState => ({
+  maxConcurrentChildren: null,
+  maxSpawnDepth: null,
+  paused: false,
+  updatedAt: null
+})
+
+export const $delegationState = atom<DelegationState>(buildState())
+
+export const getDelegationState = () => $delegationState.get()
+
+export const patchDelegationState = (next: Partial<DelegationState>) =>
+  $delegationState.set({ ...$delegationState.get(), ...next })
+
+export const resetDelegationState = () => $delegationState.set(buildState())
+
+// ── Overlay accordion open-state ──────────────────────────────────────
+//
+// Lifted out of OverlaySection's local useState so collapse choices
+// survive:
+//   - navigating to a different subagent (Detail remounts)
+//   - switching list ↔ detail mode (Detail unmounts in list mode)
+//   - walking history (←/→)
+// Keyed by section title; missing entries fall back to the section's
+// `defaultOpen` prop.
+
+export const $overlaySectionsOpen = atom<Record<string, boolean>>({})
+
+export const toggleOverlaySection = (title: string, defaultOpen: boolean) => {
+  const state = $overlaySectionsOpen.get()
+  const current = title in state ? state[title]! : defaultOpen
+
+  $overlaySectionsOpen.set({ ...state, [title]: !current })
+}
+
+export const getOverlaySectionOpen = (title: string, defaultOpen: boolean): boolean => {
+  const state = $overlaySectionsOpen.get()
+
+  return title in state ? state[title]! : defaultOpen
+}
+
+/** Merge a raw RPC response into the store.  Tolerant of partial/omitted fields. */
+export const applyDelegationStatus = (r: DelegationStatusResponse | null | undefined) => {
+  if (!r) {
+    return
+  }
+
+  const patch: Partial<DelegationState> = { updatedAt: Date.now() }
+
+  if (typeof r.max_spawn_depth === 'number') {
+    patch.maxSpawnDepth = r.max_spawn_depth
+  }
+
+  if (typeof r.max_concurrent_children === 'number') {
+    patch.maxConcurrentChildren = r.max_concurrent_children
+  }
+
+  if (typeof r.paused === 'boolean') {
+    patch.paused = r.paused
+  }
+
+  patchDelegationState(patch)
+}
diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts
index da9d0baed..c1c427739 100644
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -3,6 +3,7 @@ import type { MutableRefObject, ReactNode, RefObject, SetStateAction } from 'rea
 
 import type { PasteEvent } from '../components/textInput.js'
 import type { GatewayClient } from '../gatewayClient.js'
+import type { ImageAttachResponse } from '../gatewayTypes.js'
 import type { RpcResult } from '../lib/rpc.js'
 import type { Theme } from '../theme.js'
 import type {
@@ -26,6 +27,8 @@ export interface StateSetter<T> {
   (value: SetStateAction<T>): void
 }
 
+export type StatusBarMode = 'bottom' | 'off' | 'top'
+
 export interface SelectionApi {
   clearSelection: () => void
   copySelection: () => string
@@ -52,6 +55,8 @@ export interface GatewayProviderProps {
 }
 
 export interface OverlayState {
+  agents: boolean
+  agentsInitialHistoryIndex: number
   approval: ApprovalReq | null
   clarify: ClarifyReq | null
   confirm: ConfirmReq | null
@@ -86,7 +91,7 @@ export interface UiState {
   showReasoning: boolean
   sid: null | string
   status: string
-  statusBar: boolean
+  statusBar: StatusBarMode
   streaming: boolean
   theme: Theme
   usage: Usage
@@ -106,11 +111,13 @@ export interface ComposerPasteResult {
   value: string
 }
 
+export type MaybePromise<T> = Promise<T> | T
+
 export interface ComposerActions {
   clearIn: () => void
   dequeue: () => string | undefined
   enqueue: (text: string) => void
-  handleTextPaste: (event: PasteEvent) => ComposerPasteResult | null
+  handleTextPaste: (event: PasteEvent) => MaybePromise<ComposerPasteResult | null>
   openEditor: () => void
   pushHistory: (text: string) => void
   replaceQueue: (index: number, text: string) => void
@@ -146,6 +153,7 @@ export interface ComposerState {
 export interface UseComposerStateOptions {
   gw: GatewayClient
   onClipboardPaste: (quiet?: boolean) => Promise<void> | void
+  onImageAttached?: (info: ImageAttachResponse) => void
   submitRef: MutableRefObject<(value: string) => void>
 }
 
@@ -268,7 +276,7 @@ export interface AppLayoutComposerProps {
   compIdx: number
   completions: CompletionItem[]
   empty: boolean
-  handleTextPaste: (event: PasteEvent) => ComposerPasteResult | null
+  handleTextPaste: (event: PasteEvent) => MaybePromise<ComposerPasteResult | null>
   input: string
   inputBuf: string[]
   pagerPageSize: number
diff --git a/ui-tui/src/app/overlayStore.ts b/ui-tui/src/app/overlayStore.ts
index 06dbd27a7..60aa09c44 100644
--- a/ui-tui/src/app/overlayStore.ts
+++ b/ui-tui/src/app/overlayStore.ts
@@ -3,6 +3,8 @@ import { atom, computed } from 'nanostores'
 import type { OverlayState } from './interfaces.js'
 
 const buildOverlayState = (): OverlayState => ({
+  agents: false,
+  agentsInitialHistoryIndex: 0,
   approval: null,
   clarify: null,
   confirm: null,
@@ -18,8 +20,8 @@ export const $overlayState = atom<OverlayState>(buildOverlayState())
 
 export const $isBlocked = computed(
   $overlayState,
-  ({ approval, clarify, confirm, modelPicker, pager, picker, secret, skillsHub, sudo }) =>
-    Boolean(approval || clarify || confirm || modelPicker || pager || picker || secret || skillsHub || sudo)
+  ({ agents, approval, clarify, confirm, modelPicker, pager, picker, secret, skillsHub, sudo }) =>
+    Boolean(agents || approval || clarify || confirm || modelPicker || pager || picker || secret || skillsHub || sudo)
 )
 
 export const getOverlayState = () => $overlayState.get()
@@ -27,4 +29,23 @@ export const getOverlayState = () => $overlayState.get()
 export const patchOverlayState = (next: Partial<OverlayState> | ((state: OverlayState) => OverlayState)) =>
   $overlayState.set(typeof next === 'function' ? next($overlayState.get()) : { ...$overlayState.get(), ...next })
 
+/** Full reset — used by session/turn teardown and tests. */
 export const resetOverlayState = () => $overlayState.set(buildOverlayState())
+
+/**
+ * Soft reset: drop FLOW-scoped overlays (approval / clarify / confirm / sudo
+ * / secret / pager) but PRESERVE user-toggled ones — agents dashboard, model
+ * picker, skills hub, session picker.  Those are opened deliberately and
+ * shouldn't vanish when a turn ends.  Called from turnController.idle() on
+ * every turn completion / interrupt; the old "reset everything" behaviour
+ * silently closed /agents the moment delegation finished.
+ */
+export const resetFlowOverlays = () =>
+  $overlayState.set({
+    ...buildOverlayState(),
+    agents: $overlayState.get().agents,
+    agentsInitialHistoryIndex: $overlayState.get().agentsInitialHistoryIndex,
+    modelPicker: $overlayState.get().modelPicker,
+    picker: $overlayState.get().picker,
+    skillsHub: $overlayState.get().skillsHub
+  })
diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts
index fd80acb09..7f4b1009e 100644
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@@ -9,7 +9,9 @@ import type {
   SessionUndoResponse
 } from '../../../gatewayTypes.js'
 import { writeOsc52Clipboard } from '../../../lib/osc52.js'
+import { configureDetectedTerminalKeybindings, configureTerminalKeybindings } from '../../../lib/terminalSetup.js'
 import type { DetailsMode, Msg, PanelSection } from '../../../types.js'
+import type { StatusBarMode } from '../../interfaces.js'
 import { patchOverlayState } from '../../overlayStore.js'
 import { patchUiState } from '../../uiStore.js'
 import type { SlashCommand } from '../types.js'
@@ -224,11 +226,46 @@ export const coreCommands: SlashCommand[] = [
   },
 
   {
-    help: 'paste clipboard image',
+    help: 'attach clipboard image',
     name: 'paste',
     run: (arg, ctx) => (arg ? ctx.transcript.sys('usage: /paste') : ctx.composer.paste())
   },
 
+  {
+    help: 'configure IDE terminal keybindings for multiline + undo/redo',
+    name: 'terminal-setup',
+    run: (arg, ctx) => {
+      const target = arg.trim().toLowerCase()
+
+      if (target && !['auto', 'cursor', 'vscode', 'windsurf'].includes(target)) {
+        return ctx.transcript.sys('usage: /terminal-setup [auto|vscode|cursor|windsurf]')
+      }
+
+      const runner =
+        !target || target === 'auto'
+          ? configureDetectedTerminalKeybindings()
+          : configureTerminalKeybindings(target as 'cursor' | 'vscode' | 'windsurf')
+
+      void runner
+        .then(result => {
+          if (ctx.stale()) {
+            return
+          }
+
+          ctx.transcript.sys(result.message)
+
+          if (result.success && result.requiresRestart) {
+            ctx.transcript.sys('restart the IDE terminal for the new keybindings to take effect')
+          }
+        })
+        .catch(error => {
+          if (!ctx.stale()) {
+            ctx.transcript.sys(`terminal setup failed: ${String(error)}`)
+          }
+        })
+    }
+  },
+
   {
     help: 'view gateway logs',
     name: 'logs',
@@ -239,21 +276,59 @@ export const coreCommands: SlashCommand[] = [
     }
   },
 
+  {
+    help: 'view current transcript (user + assistant messages)',
+    name: 'history',
+    run: (arg, ctx) => {
+      // The CLI-side `/history` runs in a detached slash-worker subprocess
+      // that never sees the TUI's turns — it only surfaces whatever was
+      // persisted before this process started.  Render the TUI's own
+      // transcript so `/history` actually reflects what the user just did.
+      const items = ctx.local.getHistoryItems().filter(m => m.role === 'user' || m.role === 'assistant')
+
+      if (!items.length) {
+        return ctx.transcript.sys('no conversation yet')
+      }
+
+      const preview = Math.max(80, parseInt(arg, 10) || 400)
+
+      const lines = items.map((m, i) => {
+        const tag = m.role === 'user' ? `You #${i + 1}` : `Hermes #${i + 1}`
+        const body = m.text.trim() || (m.tools?.length ? `(${m.tools.length} tool calls)` : '(empty)')
+        const clipped = body.length > preview ? `${body.slice(0, preview).trimEnd()}…` : body
+
+        return `[${tag}]\n${clipped}`
+      })
+
+      ctx.transcript.page(lines.join('\n\n'), 'History')
+    }
+  },
+
   {
     aliases: ['sb'],
-    help: 'toggle status bar',
+    help: 'status bar position (on|off|top|bottom)',
     name: 'statusbar',
     run: (arg, ctx) => {
-      const next = flagFromArg(arg, ctx.ui.statusBar)
+      const mode = arg.trim().toLowerCase()
+      const toggle: StatusBarMode = ctx.ui.statusBar === 'off' ? 'top' : 'off'
 
-      if (next === null) {
-        return ctx.transcript.sys('usage: /statusbar [on|off|toggle]')
+      const next: null | StatusBarMode =
+        !mode || mode === 'toggle'
+          ? toggle
+          : mode === 'on' || mode === 'top'
+            ? 'top'
+            : mode === 'off' || mode === 'bottom'
+              ? mode
+              : null
+
+      if (!next) {
+        return ctx.transcript.sys('usage: /statusbar [on|off|top|bottom|toggle]')
       }
 
       patchUiState({ statusBar: next })
-      ctx.gateway.rpc<ConfigSetResponse>('config.set', { key: 'statusbar', value: next ? 'on' : 'off' }).catch(() => {})
+      ctx.gateway.rpc<ConfigSetResponse>('config.set', { key: 'statusbar', value: next }).catch(() => {})
 
-      queueMicrotask(() => ctx.transcript.sys(`status bar ${next ? 'on' : 'off'}`))
+      queueMicrotask(() => ctx.transcript.sys(`status bar ${next}`))
     }
   },
 
diff --git a/ui-tui/src/app/slash/commands/ops.ts b/ui-tui/src/app/slash/commands/ops.ts
index 26318b3fb..210c6301e 100644
--- a/ui-tui/src/app/slash/commands/ops.ts
+++ b/ui-tui/src/app/slash/commands/ops.ts
@@ -1,6 +1,14 @@
-import type { ToolsConfigureResponse } from '../../../gatewayTypes.js'
+import type {
+  DelegationPauseResponse,
+  SlashExecResponse,
+  SpawnTreeListResponse,
+  SpawnTreeLoadResponse,
+  ToolsConfigureResponse
+} from '../../../gatewayTypes.js'
 import type { PanelSection } from '../../../types.js'
+import { applyDelegationStatus, getDelegationState } from '../../delegationStore.js'
 import { patchOverlayState } from '../../overlayStore.js'
+import { getSpawnHistory, pushDiskSnapshot, setDiffPair, type SpawnSnapshot } from '../../spawnHistoryStore.js'
 import type { SlashCommand } from '../types.js'
 
 interface SkillInfo {
@@ -42,6 +50,163 @@ interface SkillsBrowseResponse {
 }
 
 export const opsCommands: SlashCommand[] = [
+  {
+    aliases: ['tasks'],
+    help: 'open the spawn-tree dashboard (live audit + kill/pause controls)',
+    name: 'agents',
+    run: (arg, ctx) => {
+      const sub = arg.trim().toLowerCase()
+
+      // Stay compatible with the gateway `/agents [pause|resume|status]` CLI —
+      // explicit subcommands skip the overlay and act directly so scripts and
+      // multi-step flows can drive it without entering interactive mode.
+      if (sub === 'pause' || sub === 'resume' || sub === 'unpause') {
+        const paused = sub === 'pause'
+        ctx.gateway.gw
+          .request<DelegationPauseResponse>('delegation.pause', { paused })
+          .then(r => {
+            applyDelegationStatus({ paused: r?.paused })
+            ctx.transcript.sys(`delegation · ${r?.paused ? 'paused' : 'resumed'}`)
+          })
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      if (sub === 'status') {
+        const d = getDelegationState()
+        ctx.transcript.sys(
+          `delegation · ${d.paused ? 'paused' : 'active'} · caps d${d.maxSpawnDepth ?? '?'}/${d.maxConcurrentChildren ?? '?'}`
+        )
+
+        return
+      }
+
+      patchOverlayState({ agents: true, agentsInitialHistoryIndex: 0 })
+    }
+  },
+
+  {
+    help: 'replay a completed spawn tree · `/replay [N|last|list|load <path>]`',
+    name: 'replay',
+    run: (arg, ctx) => {
+      const history = getSpawnHistory()
+      const raw = arg.trim()
+      const lower = raw.toLowerCase()
+
+      // ── Disk-backed listing ─────────────────────────────────────
+      if (lower === 'list' || lower === 'ls') {
+        ctx.gateway
+          .rpc<SpawnTreeListResponse>('spawn_tree.list', {
+            limit: 30,
+            session_id: ctx.sid ?? 'default'
+          })
+          .then(
+            ctx.guarded<SpawnTreeListResponse>(r => {
+              const entries = r.entries ?? []
+
+              if (!entries.length) {
+                return ctx.transcript.sys('no archived spawn trees on disk for this session')
+              }
+
+              const rows: [string, string][] = entries.map(e => {
+                const ts = e.finished_at ? new Date(e.finished_at * 1000).toLocaleString() : '?'
+                const label = e.label || `${e.count} subagents`
+
+                return [`${ts} · ${e.count}×`, `${label}\n  ${e.path}`]
+              })
+
+              ctx.transcript.panel('Archived spawn trees', [{ rows }])
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      // ── Disk-backed load by path ─────────────────────────────────
+      if (lower.startsWith('load ')) {
+        const path = raw.slice(5).trim()
+
+        if (!path) {
+          return ctx.transcript.sys('usage: /replay load <path>')
+        }
+
+        ctx.gateway
+          .rpc<SpawnTreeLoadResponse>('spawn_tree.load', { path })
+          .then(
+            ctx.guarded<SpawnTreeLoadResponse>(r => {
+              if (!r.subagents?.length) {
+                return ctx.transcript.sys('snapshot empty or unreadable')
+              }
+
+              // Push onto the in-memory history so the overlay picks it up
+              // by index 1 just like any other snapshot.
+              pushDiskSnapshot(r, path)
+              patchOverlayState({ agents: true, agentsInitialHistoryIndex: 1 })
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      // ── In-memory nav (same-session) ─────────────────────────────
+      if (!history.length) {
+        return ctx.transcript.sys('no completed spawn trees this session · try /replay list')
+      }
+
+      let index = 1
+
+      if (raw && lower !== 'last') {
+        const parsed = parseInt(raw, 10)
+
+        if (Number.isNaN(parsed) || parsed < 1 || parsed > history.length) {
+          return ctx.transcript.sys(`replay: index out of range 1..${history.length} · use /replay list for disk`)
+        }
+
+        index = parsed
+      }
+
+      patchOverlayState({ agents: true, agentsInitialHistoryIndex: index })
+    }
+  },
+
+  {
+    help: 'diff two completed spawn trees · `/replay-diff <baseline> <candidate>` (indexes from /replay list or history N)',
+    name: 'replay-diff',
+    run: (arg, ctx) => {
+      const parts = arg.trim().split(/\s+/).filter(Boolean)
+
+      if (parts.length !== 2) {
+        return ctx.transcript.sys('usage: /replay-diff <a> <b>  (e.g. /replay-diff 1 2 for last two)')
+      }
+
+      const [a, b] = parts
+      const history = getSpawnHistory()
+
+      const resolve = (token: string): null | SpawnSnapshot => {
+        const n = parseInt(token!, 10)
+
+        if (Number.isFinite(n) && n >= 1 && n <= history.length) {
+          return history[n - 1] ?? null
+        }
+
+        return null
+      }
+
+      const baseline = resolve(a!)
+      const candidate = resolve(b!)
+
+      if (!baseline || !candidate) {
+        return ctx.transcript.sys(`replay-diff: could not resolve indices · history has ${history.length} entries`)
+      }
+
+      setDiffPair({ baseline, candidate })
+      patchOverlayState({ agents: true, agentsInitialHistoryIndex: 0 })
+    }
+  },
+
   {
     help: 'browse, inspect, install skills',
     name: 'skills',
@@ -207,10 +372,25 @@ export const opsCommands: SlashCommand[] = [
   {
     help: 'enable or disable tools (client-side history reset on change)',
     name: 'tools',
-    run: (arg, ctx) => {
+    run: (arg, ctx, cmd) => {
       const [subcommand, ...names] = arg.trim().split(/\s+/).filter(Boolean)
 
       if (subcommand !== 'disable' && subcommand !== 'enable') {
+        ctx.gateway.gw
+          .request<SlashExecResponse>('slash.exec', { command: cmd.slice(1), session_id: ctx.sid })
+          .then(r => {
+            if (ctx.stale()) {
+              return
+            }
+
+            const body = r?.output || '/tools: no output'
+            const text = r?.warning ? `warning: ${r.warning}\n${body}` : body
+            const long = text.length > 180 || text.split('\n').filter(Boolean).length > 2
+
+            long ? ctx.transcript.page(text, 'Tools') : ctx.transcript.sys(text)
+          })
+          .catch(ctx.guardedErr)
+
         return
       }
 
diff --git a/ui-tui/src/app/slash/commands/session.ts b/ui-tui/src/app/slash/commands/session.ts
index 354d3c197..5f17667f0 100644
--- a/ui-tui/src/app/slash/commands/session.ts
+++ b/ui-tui/src/app/slash/commands/session.ts
@@ -1,4 +1,4 @@
-import { imageTokenMeta, introMsg, toTranscriptMessages } from '../../../domain/messages.js'
+import { attachedImageNotice, introMsg, toTranscriptMessages } from '../../../domain/messages.js'
 import type {
   BackgroundStartResponse,
   BtwStartResponse,
@@ -92,9 +92,7 @@ export const sessionCommands: SlashCommand[] = [
     run: (arg, ctx) => {
       ctx.gateway.rpc<ImageAttachResponse>('image.attach', { path: arg, session_id: ctx.sid }).then(
         ctx.guarded<ImageAttachResponse>(r => {
-          const meta = imageTokenMeta(r)
-
-          ctx.transcript.sys(`attached image: ${r.name ?? ''}${meta ? ` · ${meta}` : ''}`)
+          ctx.transcript.sys(attachedImageNotice(r))
 
           if (r.remainder) {
             ctx.composer.setInput(r.remainder)
diff --git a/ui-tui/src/app/spawnHistoryStore.ts b/ui-tui/src/app/spawnHistoryStore.ts
new file mode 100644
index 000000000..9adb2b59c
--- /dev/null
+++ b/ui-tui/src/app/spawnHistoryStore.ts
@@ -0,0 +1,139 @@
+import { atom } from 'nanostores'
+
+import type { SpawnTreeLoadResponse } from '../gatewayTypes.js'
+import type { SubagentProgress } from '../types.js'
+
+export interface SpawnSnapshot {
+  finishedAt: number
+  fromDisk?: boolean
+  id: string
+  label: string
+  path?: string
+  sessionId: null | string
+  startedAt: number
+  subagents: SubagentProgress[]
+}
+
+export interface SpawnDiffPair {
+  baseline: SpawnSnapshot
+  candidate: SpawnSnapshot
+}
+
+const HISTORY_LIMIT = 10
+
+export const $spawnHistory = atom<SpawnSnapshot[]>([])
+export const $spawnDiff = atom<null | SpawnDiffPair>(null)
+
+export const getSpawnHistory = () => $spawnHistory.get()
+export const getSpawnDiff = () => $spawnDiff.get()
+
+export const clearSpawnHistory = () => $spawnHistory.set([])
+export const clearDiffPair = () => $spawnDiff.set(null)
+export const setDiffPair = (pair: SpawnDiffPair) => $spawnDiff.set(pair)
+
+/**
+ * Commit a finished turn's spawn tree to history.  Keeps the last 10
+ * non-empty snapshots — empty turns (no subagents) are dropped.
+ *
+ * Why in-memory?  The primary investigation loop is "I just ran a fan-out,
+ * it misbehaved, let me look at what happened" — same-session debugging.
+ * Disk persistence across process restarts is a natural extension but
+ * adds RPC surface for a less-common path.
+ */
+export const pushSnapshot = (
+  subagents: readonly SubagentProgress[],
+  meta: { sessionId?: null | string; startedAt?: null | number }
+) => {
+  if (!subagents.length) {
+    return
+  }
+
+  const now = Date.now()
+  const started = meta.startedAt ?? Math.min(...subagents.map(s => s.startedAt ?? now))
+
+  const snap: SpawnSnapshot = {
+    finishedAt: now,
+    id: `snap-${now.toString(36)}`,
+    label: summarizeLabel(subagents),
+    sessionId: meta.sessionId ?? null,
+    startedAt: Number.isFinite(started) ? started : now,
+    subagents: subagents.map(item => ({ ...item }))
+  }
+
+  const next = [snap, ...$spawnHistory.get()].slice(0, HISTORY_LIMIT)
+  $spawnHistory.set(next)
+}
+
+function summarizeLabel(subagents: readonly SubagentProgress[]): string {
+  const top = subagents
+    .filter(s => s.parentId == null || subagents.every(o => o.id !== s.parentId))
+    .slice(0, 2)
+    .map(s => s.goal || 'subagent')
+    .join(' · ')
+
+  return top || `${subagents.length} agent${subagents.length === 1 ? '' : 's'}`
+}
+
+/**
+ * Push a disk-loaded snapshot onto the front of the history stack so the
+ * overlay can pick it up at index 1 via /replay load.  Normalises the
+ * server payload (arbitrary list) into the same SubagentProgress shape
+ * used for live data — defensive against cross-version reads.
+ */
+export const pushDiskSnapshot = (r: SpawnTreeLoadResponse, path: string) => {
+  const raw = Array.isArray(r.subagents) ? r.subagents : []
+  const normalised = raw.map(normaliseSubagent)
+
+  if (!normalised.length) {
+    return
+  }
+
+  const snap: SpawnSnapshot = {
+    finishedAt: (r.finished_at ?? Date.now() / 1000) * 1000,
+    fromDisk: true,
+    id: `disk-${path}`,
+    label: r.label || `${normalised.length} subagents`,
+    path,
+    sessionId: r.session_id ?? null,
+    startedAt: (r.started_at ?? r.finished_at ?? Date.now() / 1000) * 1000,
+    subagents: normalised
+  }
+
+  const next = [snap, ...$spawnHistory.get()].slice(0, HISTORY_LIMIT)
+  $spawnHistory.set(next)
+}
+
+function normaliseSubagent(raw: unknown): SubagentProgress {
+  const o = raw as Record<string, unknown>
+  const s = (v: unknown) => (typeof v === 'string' ? v : undefined)
+  const n = (v: unknown) => (typeof v === 'number' ? v : undefined)
+  const arr = <T>(v: unknown): T[] | undefined => (Array.isArray(v) ? (v as T[]) : undefined)
+
+  return {
+    apiCalls: n(o.apiCalls),
+    costUsd: n(o.costUsd),
+    depth: typeof o.depth === 'number' ? o.depth : 0,
+    durationSeconds: n(o.durationSeconds),
+    filesRead: arr<string>(o.filesRead),
+    filesWritten: arr<string>(o.filesWritten),
+    goal: s(o.goal) ?? 'subagent',
+    id: s(o.id) ?? `sa-${Math.random().toString(36).slice(2, 8)}`,
+    index: typeof o.index === 'number' ? o.index : 0,
+    inputTokens: n(o.inputTokens),
+    iteration: n(o.iteration),
+    model: s(o.model),
+    notes: (arr<string>(o.notes) ?? []).filter(x => typeof x === 'string'),
+    outputTail: arr(o.outputTail) as SubagentProgress['outputTail'],
+    outputTokens: n(o.outputTokens),
+    parentId: s(o.parentId) ?? null,
+    reasoningTokens: n(o.reasoningTokens),
+    startedAt: n(o.startedAt),
+    status: (s(o.status) as SubagentProgress['status']) ?? 'completed',
+    summary: s(o.summary),
+    taskCount: typeof o.taskCount === 'number' ? o.taskCount : 1,
+    thinking: (arr<string>(o.thinking) ?? []).filter(x => typeof x === 'string'),
+    toolCount: typeof o.toolCount === 'number' ? o.toolCount : 0,
+    tools: (arr<string>(o.tools) ?? []).filter(x => typeof x === 'string'),
+    toolsets: arr<string>(o.toolsets)
+  }
+}
diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts
index 236324ffb..804394bb1 100644
--- a/ui-tui/src/app/turnController.ts
+++ b/ui-tui/src/app/turnController.ts
@@ -10,8 +10,9 @@ import {
 } from '../lib/text.js'
 import type { ActiveTool, ActivityItem, Msg, SubagentProgress } from '../types.js'
 
-import { resetOverlayState } from './overlayStore.js'
-import { patchTurnState, resetTurnState } from './turnStore.js'
+import { resetFlowOverlays } from './overlayStore.js'
+import { pushSnapshot } from './spawnHistoryStore.js'
+import { getTurnState, patchTurnState, resetTurnState } from './turnStore.js'
 import { getUiState, patchUiState } from './uiStore.js'
 
 const INTERRUPT_COOLDOWN_MS = 1500
@@ -39,7 +40,9 @@ class TurnController {
   bufRef = ''
   interrupted = false
   lastStatusNote = ''
+  pendingInlineDiffs: string[] = []
   persistedToolLabels = new Set<string>()
+  persistSpawnTree?: (subagents: SubagentProgress[], sessionId: null | string) => Promise<void>
   protocolWarned = false
   reasoningText = ''
   segmentMessages: Msg[] = []
@@ -76,6 +79,7 @@ class TurnController {
     this.activeTools = []
     this.streamTimer = clear(this.streamTimer)
     this.bufRef = ''
+    this.pendingInlineDiffs = []
     this.pendingSegmentTools = []
     this.segmentMessages = []
 
@@ -88,21 +92,43 @@ class TurnController {
       turnTrail: []
     })
     patchUiState({ busy: false })
-    resetOverlayState()
+    resetFlowOverlays()
   }
 
   interruptTurn({ appendMessage, gw, sid, sys }: InterruptDeps) {
     this.interrupted = true
     gw.request<SessionInterruptResponse>('session.interrupt', { session_id: sid }).catch(() => {})
 
+    const segments = this.segmentMessages
     const partial = this.bufRef.trimStart()
+    const tools = this.pendingSegmentTools
 
-    partial ? appendMessage({ role: 'assistant', text: `${partial}\n\n*[interrupted]*` }) : sys('interrupted')
-
+    // Drain streaming/segment state off the nanostore before writing the
+    // preserved snapshot to the transcript — otherwise each flushed segment
+    // appears in both `turn.streamSegments` and the transcript for one frame.
     this.idle()
     this.clearReasoning()
     this.turnTools = []
     patchTurnState({ activity: [], outcome: '' })
+
+    for (const msg of segments) {
+      appendMessage(msg)
+    }
+
+    // Always surface an interruption indicator — if there's an in-flight
+    // `partial` or pending tools, fold them into a single assistant message;
+    // otherwise emit a sys note so the transcript always records that the
+    // turn was cancelled, even when only prior `segments` were preserved.
+    if (partial || tools.length) {
+      appendMessage({
+        role: 'assistant',
+        text: partial ? `${partial}\n\n*[interrupted]*` : '*[interrupted]*',
+        ...(tools.length && { tools })
+      })
+    } else {
+      sys('interrupted')
+    }
+
     patchUiState({ status: 'interrupted' })
     this.clearStatusTimer()
 
@@ -160,6 +186,20 @@ class TurnController {
     }, REASONING_PULSE_MS)
   }
 
+  queueInlineDiff(diffText: string) {
+    // Strip CLI chrome the gateway emits before the unified diff (e.g. a
+    // leading "┊ review diff" header written by `_emit_inline_diff` for the
+    // terminal printer). That header only makes sense as stdout dressing,
+    // not inside a markdown ```diff block.
+    const text = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()
+
+    if (!text || this.pendingInlineDiffs.includes(text)) {
+      return
+    }
+
+    this.pendingInlineDiffs = [...this.pendingInlineDiffs, text]
+  }
+
   pushActivity(text: string, tone: ActivityItem['tone'] = 'info', replaceLabel?: string) {
     patchTurnState(state => {
       const base = replaceLabel
@@ -194,6 +234,7 @@ class TurnController {
     this.idle()
     this.clearReasoning()
     this.clearStatusTimer()
+    this.pendingInlineDiffs = []
     this.pendingSegmentTools = []
     this.segmentMessages = []
     this.turnTools = []
@@ -204,6 +245,20 @@ class TurnController {
     const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
     const split = splitReasoning(rawText)
     const finalText = split.text
+    // Skip appending if the assistant already narrated the diff inside a
+    // markdown fence of its own — otherwise we render two stacked diff
+    // blocks for the same edit.
+    const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText)
+
+    const remainingInlineDiffs = assistantAlreadyHasDiff
+      ? []
+      : this.pendingInlineDiffs.filter(diff => !finalText.includes(diff))
+
+    const inlineDiffBlock = remainingInlineDiffs.length
+      ? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\``
+      : ''
+
+    const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n')
     const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
     const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
     const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0
@@ -211,10 +266,10 @@ class TurnController {
     const tools = this.pendingSegmentTools
     const finalMessages = [...this.segmentMessages]
 
-    if (finalText) {
+    if (mergedText) {
       finalMessages.push({
         role: 'assistant',
-        text: finalText,
+        text: mergedText,
         thinking: savedReasoning || undefined,
         thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
         toolTokens: savedToolTokens || undefined,
@@ -224,6 +279,20 @@ class TurnController {
 
     const wasInterrupted = this.interrupted
 
+    // Archive the turn's spawn tree to history BEFORE idle() drops subagents
+    // from turnState.  Lets /replay and the overlay's history nav pull up
+    // finished fan-outs without a round-trip to disk.
+    const finishedSubagents = getTurnState().subagents
+    const sessionId = getUiState().sid
+
+    if (finishedSubagents.length > 0) {
+      pushSnapshot(finishedSubagents, { sessionId, startedAt: null })
+      // Fire-and-forget disk persistence so /replay survives process restarts.
+      // The same snapshot lives in memory via spawnHistoryStore for immediate
+      // recall — disk is the long-term archive.
+      void this.persistSpawnTree?.(finishedSubagents, sessionId)
+    }
+
     this.idle()
     this.clearReasoning()
     this.turnTools = []
@@ -231,7 +300,7 @@ class TurnController {
     this.bufRef = ''
     patchTurnState({ activity: [], outcome: '' })
 
-    return { finalMessages, finalText, wasInterrupted }
+    return { finalMessages, finalText: mergedText, wasInterrupted }
   }
 
   recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) {
@@ -337,6 +406,7 @@ class TurnController {
     this.bufRef = ''
     this.interrupted = false
     this.lastStatusNote = ''
+    this.pendingInlineDiffs = []
     this.pendingSegmentTools = []
     this.protocolWarned = false
     this.segmentMessages = []
@@ -382,6 +452,7 @@ class TurnController {
     this.endReasoningPhase()
     this.clearReasoning()
     this.activeTools = []
+    this.pendingInlineDiffs = []
     this.turnTools = []
     this.toolTokenAcc = 0
     this.persistedToolLabels.clear()
@@ -389,33 +460,82 @@ class TurnController {
     patchTurnState({ activity: [], outcome: '', subagents: [], toolTokens: 0, tools: [], turnTrail: [] })
   }
 
-  upsertSubagent(p: SubagentEventPayload, patch: (current: SubagentProgress) => Partial<SubagentProgress>) {
-    const id = `sa:${p.task_index}:${p.goal || 'subagent'}`
+  upsertSubagent(
+    p: SubagentEventPayload,
+    patch: (current: SubagentProgress) => Partial<SubagentProgress>,
+    opts: { createIfMissing?: boolean } = { createIfMissing: true }
+  ) {
+    // Stable id: prefer the server-issued subagent_id (survives nested
+    // grandchildren + cross-tree joins).  Fall back to the composite key
+    // for older gateways that omit the field — those produce a flat list.
+    const id = p.subagent_id || `sa:${p.task_index}:${p.goal || 'subagent'}`
 
     patchTurnState(state => {
       const existing = state.subagents.find(item => item.id === id)
 
+      // Late events (subagent.complete/tool/progress arriving after message.complete
+      // has already fired idle()) would otherwise resurrect a finished
+      // subagent into turn.subagents and block the "finished" title on the
+      // /agents overlay.  When `createIfMissing` is false we drop silently.
+      if (!existing && !opts.createIfMissing) {
+        return state
+      }
+
       const base: SubagentProgress = existing ?? {
+        depth: p.depth ?? 0,
         goal: p.goal,
         id,
         index: p.task_index,
+        model: p.model,
         notes: [],
+        parentId: p.parent_id ?? null,
+        startedAt: Date.now(),
         status: 'running',
         taskCount: p.task_count ?? 1,
         thinking: [],
-        tools: []
+        toolCount: p.tool_count ?? 0,
+        tools: [],
+        toolsets: p.toolsets
       }
 
+      // Map snake_case payload keys onto camelCase state.  Only overwrite
+      // when the event actually carries the field; `??` preserves prior
+      // values across streaming events that emit partial payloads.
+      const outputTail = p.output_tail
+        ? p.output_tail.map(e => ({
+            isError: Boolean(e.is_error),
+            preview: String(e.preview ?? ''),
+            tool: String(e.tool ?? 'tool')
+          }))
+        : base.outputTail
+
       const next: SubagentProgress = {
         ...base,
+        apiCalls: p.api_calls ?? base.apiCalls,
+        costUsd: p.cost_usd ?? base.costUsd,
+        depth: p.depth ?? base.depth,
+        filesRead: p.files_read ?? base.filesRead,
+        filesWritten: p.files_written ?? base.filesWritten,
         goal: p.goal || base.goal,
+        inputTokens: p.input_tokens ?? base.inputTokens,
+        iteration: p.iteration ?? base.iteration,
+        model: p.model ?? base.model,
+        outputTail,
+        outputTokens: p.output_tokens ?? base.outputTokens,
+        parentId: p.parent_id ?? base.parentId,
+        reasoningTokens: p.reasoning_tokens ?? base.reasoningTokens,
         taskCount: p.task_count ?? base.taskCount,
+        toolCount: p.tool_count ?? base.toolCount,
+        toolsets: p.toolsets ?? base.toolsets,
         ...patch(base)
       }
 
+      // Stable order: by spawn (depth, parent, index) rather than insert time.
+      // Without it, grandchildren can shuffle relative to siblings when
+      // events arrive out of order under high concurrency.
       const subagents = existing
         ? state.subagents.map(item => (item.id === id ? next : item))
-        : [...state.subagents, next].sort((a, b) => a.index - b.index)
+        : [...state.subagents, next].sort((a, b) => a.depth - b.depth || a.index - b.index)
 
       return { ...state, subagents }
     })
diff --git a/ui-tui/src/app/uiStore.ts b/ui-tui/src/app/uiStore.ts
index 81089f179..fcf2e5d88 100644
--- a/ui-tui/src/app/uiStore.ts
+++ b/ui-tui/src/app/uiStore.ts
@@ -16,7 +16,7 @@ const buildUiState = (): UiState => ({
   showReasoning: false,
   sid: null,
   status: 'summoning hermes…',
-  statusBar: true,
+  statusBar: 'top',
   streaming: true,
   theme: DEFAULT_THEME,
   usage: ZERO
diff --git a/ui-tui/src/app/useComposerState.ts b/ui-tui/src/app/useComposerState.ts
index 4c47b2b70..f229067ed 100644
--- a/ui-tui/src/app/useComposerState.ts
+++ b/ui-tui/src/app/useComposerState.ts
@@ -3,18 +3,24 @@ import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
 import { tmpdir } from 'node:os'
 import { join } from 'node:path'
 
+import { useStdin } from '@hermes/ink'
 import { useStore } from '@nanostores/react'
 import { useCallback, useMemo, useState } from 'react'
 
 import type { PasteEvent } from '../components/textInput.js'
 import { LARGE_PASTE } from '../config/limits.js'
+import type { ImageAttachResponse, InputDetectDropResponse } from '../gatewayTypes.js'
 import { useCompletion } from '../hooks/useCompletion.js'
 import { useInputHistory } from '../hooks/useInputHistory.js'
 import { useQueue } from '../hooks/useQueue.js'
+import { isUsableClipboardText, readClipboardText } from '../lib/clipboard.js'
+import { readOsc52Clipboard } from '../lib/osc52.js'
+import { isRemoteShellSession } from '../lib/terminalSetup.js'
 import { pasteTokenLabel, stripTrailingPasteNewlines } from '../lib/text.js'
 
-import type { PasteSnippet, UseComposerStateOptions, UseComposerStateResult } from './interfaces.js'
+import type { MaybePromise, PasteSnippet, UseComposerStateOptions, UseComposerStateResult } from './interfaces.js'
 import { $isBlocked } from './overlayStore.js'
+import { getUiState } from './uiStore.js'
 
 const PASTE_SNIP_MAX_COUNT = 32
 const PASTE_SNIP_MAX_TOTAL_BYTES = 4 * 1024 * 1024
@@ -38,11 +44,70 @@ const trimSnips = (snips: PasteSnippet[]): PasteSnippet[] => {
   return out.length === snips.length ? snips : out
 }
 
-export function useComposerState({ gw, onClipboardPaste, submitRef }: UseComposerStateOptions): UseComposerStateResult {
+/** Insert text at the cursor position, adding spacing to separate from adjacent non-whitespace. */
+function insertAtCursor(value: string, cursor: number, text: string): { cursor: number; value: string } {
+  const lead = cursor > 0 && !/\s/.test(value[cursor - 1] ?? '') ? ' ' : ''
+  const tail = cursor < value.length && !/\s/.test(value[cursor] ?? '') ? ' ' : ''
+  const insert = `${lead}${text}${tail}`
+
+  return {
+    cursor: cursor + insert.length,
+    value: value.slice(0, cursor) + insert + value.slice(cursor)
+  }
+}
+
+/**
+ * Quick client-side heuristic to detect text that looks like a dropped file path.
+ * When this returns true the composer sends RPC calls to the server for actual
+ * validation. Keep in sync with _detect_file_drop() in cli.py — see that
+ * function for the canonical prefix list.
+ */
+export function looksLikeDroppedPath(text: string): boolean {
+  const trimmed = text.trim()
+
+  if (!trimmed || trimmed.includes('\n')) {
+    return false
+  }
+
+  // file:// URIs, relative, home-relative, quoted, and Windows drive paths
+  if (
+    trimmed.startsWith('file://') ||
+    trimmed.startsWith('~/') ||
+    trimmed.startsWith('./') ||
+    trimmed.startsWith('../') ||
+    trimmed.startsWith('"/') ||
+    trimmed.startsWith("'/") ||
+    trimmed.startsWith('"~') ||
+    trimmed.startsWith("'~") ||
+    /^[A-Za-z]:[/\\]/.test(trimmed) ||
+    /^["'][A-Za-z]:[/\\]/.test(trimmed)
+  ) {
+    return true
+  }
+
+  // Bare absolute paths (start with /) — require a second '/' or a '.' to avoid
+  // false positives on short strings like "/api" or "/help" which would trigger
+  // unnecessary RPC round-trips.
+  if (trimmed.startsWith('/')) {
+    const rest = trimmed.slice(1)
+
+    return rest.includes('/') || rest.includes('.')
+  }
+
+  return false
+}
+
+export function useComposerState({
+  gw,
+  onClipboardPaste,
+  onImageAttached,
+  submitRef
+}: UseComposerStateOptions): UseComposerStateResult {
   const [input, setInput] = useState('')
   const [inputBuf, setInputBuf] = useState<string[]>([])
   const [pasteSnips, setPasteSnips] = useState<PasteSnippet[]>([])
   const isBlocked = useStore($isBlocked)
+  const { querier } = useStdin() as { querier: Parameters<typeof readOsc52Clipboard>[0] }
 
   const { queueRef, queueEditRef, queuedDisplay, queueEditIdx, enqueue, dequeue, replaceQ, setQueueEdit, syncQueue } =
     useQueue()
@@ -59,14 +124,13 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
     historyDraftRef.current = ''
   }, [historyDraftRef, setQueueEdit, setHistoryIdx])
 
-  const handleTextPaste = useCallback(
-    ({ bracketed, cursor, hotkey, text, value }: PasteEvent) => {
-      if (hotkey) {
-        void onClipboardPaste(false)
-
-        return null
-      }
-
+  const handleResolvedPaste = useCallback(
+    async ({
+      bracketed,
+      cursor,
+      text,
+      value
+    }: Omit<PasteEvent, 'hotkey'>): Promise<null | { cursor: number; value: string }> => {
       const cleanedText = stripTrailingPasteNewlines(text)
 
       if (!cleanedText || !/[^\n]/.test(cleanedText)) {
@@ -77,6 +141,43 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
         return null
       }
 
+      const sid = getUiState().sid
+
+      if (sid && looksLikeDroppedPath(cleanedText)) {
+        try {
+          const attached = await gw.request<ImageAttachResponse>('image.attach', {
+            path: cleanedText,
+            session_id: sid
+          })
+
+          if (attached?.name) {
+            onImageAttached?.(attached)
+            const remainder = attached.remainder?.trim() ?? ''
+
+            if (!remainder) {
+              return { cursor, value }
+            }
+
+            return insertAtCursor(value, cursor, remainder)
+          }
+        } catch {
+          // Fall back to generic file-drop detection below.
+        }
+
+        try {
+          const dropped = await gw.request<InputDetectDropResponse>('input.detect_drop', {
+            session_id: sid,
+            text: cleanedText
+          })
+
+          if (dropped?.matched && dropped.text) {
+            return insertAtCursor(value, cursor, dropped.text)
+          }
+        } catch {
+          // Fall through to normal text paste behavior.
+        }
+      }
+
       const lineCount = cleanedText.split('\n').length
 
       if (cleanedText.length < LARGE_PASTE.chars && lineCount < LARGE_PASTE.lines) {
@@ -87,9 +188,7 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
       }
 
       const label = pasteTokenLabel(cleanedText, lineCount)
-      const lead = cursor > 0 && !/\s/.test(value[cursor - 1] ?? '') ? ' ' : ''
-      const tail = cursor < value.length && !/\s/.test(value[cursor] ?? '') ? ' ' : ''
-      const insert = `${lead}${label}${tail}`
+      const inserted = insertAtCursor(value, cursor, label)
 
       setPasteSnips(prev => trimSnips([...prev, { label, text: cleanedText }]))
 
@@ -106,12 +205,52 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
         })
         .catch(() => {})
 
-      return {
-        cursor: cursor + insert.length,
-        value: value.slice(0, cursor) + insert + value.slice(cursor)
-      }
+      return inserted
     },
-    [gw, onClipboardPaste]
+    [gw, onClipboardPaste, onImageAttached]
+  )
+
+  const handleTextPaste = useCallback(
+    ({
+      bracketed,
+      cursor,
+      hotkey,
+      text,
+      value
+    }: PasteEvent): MaybePromise<null | { cursor: number; value: string }> => {
+      if (hotkey) {
+        const preferOsc52 = isRemoteShellSession(process.env)
+
+        const readPreferredText = preferOsc52
+          ? readOsc52Clipboard(querier).then(async osc52Text => {
+              if (isUsableClipboardText(osc52Text)) {
+                return osc52Text
+              }
+
+              return readClipboardText()
+            })
+          : readClipboardText().then(async clipText => {
+              if (isUsableClipboardText(clipText)) {
+                return clipText
+              }
+
+              return readOsc52Clipboard(querier)
+            })
+
+        return readPreferredText.then(async preferredText => {
+          if (isUsableClipboardText(preferredText)) {
+            return handleResolvedPaste({ bracketed: false, cursor, text: preferredText, value })
+          }
+
+          void onClipboardPaste(false)
+
+          return null
+        })
+      }
+
+      return handleResolvedPaste({ bracketed: !!bracketed, cursor, text, value })
+    },
+    [handleResolvedPaste, onClipboardPaste, querier]
   )
 
   const openEditor = useCallback(() => {
diff --git a/ui-tui/src/app/useConfigSync.ts b/ui-tui/src/app/useConfigSync.ts
index 8a3756342..9e7c93ce9 100644
--- a/ui-tui/src/app/useConfigSync.ts
+++ b/ui-tui/src/app/useConfigSync.ts
@@ -10,9 +10,20 @@ import type {
 } from '../gatewayTypes.js'
 import { asRpcResult } from '../lib/rpc.js'
 
+import type { StatusBarMode } from './interfaces.js'
 import { turnController } from './turnController.js'
 import { patchUiState } from './uiStore.js'
 
+const STATUSBAR_ALIAS: Record<string, StatusBarMode> = {
+  bottom: 'bottom',
+  off: 'off',
+  on: 'top',
+  top: 'top'
+}
+
+export const normalizeStatusBar = (raw: unknown): StatusBarMode =>
+  raw === false ? 'off' : typeof raw === 'string' ? (STATUSBAR_ALIAS[raw.trim().toLowerCase()] ?? 'top') : 'top'
+
 const MTIME_POLL_MS = 5000
 
 const quietRpc = async <T extends Record<string, any> = Record<string, any>>(
@@ -37,7 +48,7 @@ export const applyDisplay = (cfg: ConfigFullResponse | null, setBell: (v: boolea
     inlineDiffs: d.inline_diffs !== false,
     showCost: !!d.show_cost,
     showReasoning: !!d.show_reasoning,
-    statusBar: d.tui_statusbar !== false,
+    statusBar: normalizeStatusBar(d.tui_statusbar),
     streaming: d.streaming !== false
   })
 }
diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts
index be2e5379e..72cd5b9e5 100644
--- a/ui-tui/src/app/useInputHandlers.ts
+++ b/ui-tui/src/app/useInputHandlers.ts
@@ -3,11 +3,11 @@ import { useStore } from '@nanostores/react'
 
 import type {
   ApprovalRespondResponse,
+  ConfigSetResponse,
   SecretRespondResponse,
   SudoRespondResponse,
   VoiceRecordResponse
 } from '../gatewayTypes.js'
-
 import { isAction, isMac } from '../lib/platform.js'
 
 import { getInputSelection } from './inputSelectionStore.js'
@@ -75,6 +75,10 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
     if (overlay.picker) {
       return patchOverlayState({ picker: false })
     }
+
+    if (overlay.agents) {
+      return patchOverlayState({ agents: false })
+    }
   }
 
   const cycleQueue = (dir: 1 | -1) => {
@@ -173,15 +177,73 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
     const live = getUiState()
 
     if (isBlocked) {
-      if (overlay.pager) {
-        if (key.return || ch === ' ') {
-          const nextOffset = overlay.pager.offset + pagerPageSize
+      // When approval/clarify/confirm overlays are active, their own useInput
+      // handlers must receive keystrokes (arrow keys, numbers, Enter).  Only
+      // intercept Ctrl+C here so the user can deny/dismiss — all other keys
+      // fall through to the component-level handlers.
+      if (overlay.approval || overlay.clarify || overlay.confirm) {
+        if (isCtrl(key, ch, 'c')) {
+          cancelOverlayFromCtrlC()
+        }
 
-          patchOverlayState({
-            pager: nextOffset >= overlay.pager.lines.length ? null : { ...overlay.pager, offset: nextOffset }
+        return
+      }
+
+      if (overlay.pager) {
+        if (key.escape || isCtrl(key, ch, 'c') || ch === 'q') {
+          return patchOverlayState({ pager: null })
+        }
+
+        const move = (delta: number | 'top' | 'bottom') =>
+          patchOverlayState(prev => {
+            if (!prev.pager) {
+              return prev
+            }
+
+            const { lines, offset } = prev.pager
+            const max = Math.max(0, lines.length - pagerPageSize)
+            const step = delta === 'top' ? -lines.length : delta === 'bottom' ? lines.length : delta
+            const next = Math.max(0, Math.min(offset + step, max))
+
+            return next === offset ? prev : { ...prev, pager: { ...prev.pager, offset: next } }
+          })
+
+        if (key.upArrow || ch === 'k') {
+          return move(-1)
+        }
+
+        if (key.downArrow || ch === 'j') {
+          return move(1)
+        }
+
+        if (key.pageUp || ch === 'b') {
+          return move(-pagerPageSize)
+        }
+
+        if (ch === 'g') {
+          return move('top')
+        }
+
+        if (ch === 'G') {
+          return move('bottom')
+        }
+
+        if (key.return || ch === ' ' || key.pageDown) {
+          patchOverlayState(prev => {
+            if (!prev.pager) {
+              return prev
+            }
+
+            const { lines, offset } = prev.pager
+            const max = Math.max(0, lines.length - pagerPageSize)
+
+            // Auto-close only when already at the last page — otherwise clamp
+            // to `max` so the offset matches what the line/page-back handlers
+            // can reach (prevents a snap-back jump on the next ↑/↓/PgUp).
+            return offset >= max
+              ? { ...prev, pager: null }
+              : { ...prev, pager: { ...prev.pager, offset: Math.min(offset + pagerPageSize, max) } }
           })
-        } else if (key.escape || isCtrl(key, ch, 'c') || ch === 'q') {
-          patchOverlayState({ pager: null })
         }
 
         return
@@ -232,15 +294,29 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
     }
 
     if (key.upArrow && !cState.inputBuf.length) {
-      cycleQueue(1) || cycleHistory(-1)
+      const inputSel = getInputSelection()
+      const cursor = inputSel && inputSel.start === inputSel.end ? inputSel.start : null
 
-      return
+      const noLineAbove =
+        !cState.input || (cursor !== null && cState.input.lastIndexOf('\n', Math.max(0, cursor - 1)) < 0)
+
+      if (noLineAbove) {
+        cycleQueue(1) || cycleHistory(-1)
+
+        return
+      }
     }
 
     if (key.downArrow && !cState.inputBuf.length) {
-      cycleQueue(-1) || cycleHistory(1)
+      const inputSel = getInputSelection()
+      const cursor = inputSel && inputSel.start === inputSel.end ? inputSel.start : null
+      const noLineBelow = !cState.input || (cursor !== null && cState.input.indexOf('\n', cursor) < 0)
 
-      return
+      if (noLineBelow || cState.historyIdx !== null) {
+        cycleQueue(-1) || cycleHistory(1)
+
+        return
+      }
     }
 
     if (isAction(key, ch, 'c')) {
@@ -302,6 +378,29 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
       return cActions.openEditor()
     }
 
+    // shift-tab flips yolo without spending a turn (claude-code parity)
+    if (key.shift && key.tab && !cState.completions.length) {
+      if (!live.sid) {
+        return void actions.sys('yolo needs an active session')
+      }
+
+      // gateway.rpc swallows errors with its own sys() message and resolves to null,
+      // so we only speak when it came back with a real shape. null = rpc already spoke.
+      return void gateway.rpc<ConfigSetResponse>('config.set', { key: 'yolo', session_id: live.sid }).then(r => {
+        if (r?.value === '1') {
+          return actions.sys('yolo on')
+        }
+
+        if (r?.value === '0') {
+          return actions.sys('yolo off')
+        }
+
+        if (r) {
+          actions.sys('failed to toggle yolo')
+        }
+      })
+    }
+
     if (key.tab && cState.completions.length) {
       const row = cState.completions[cState.compIdx]
 
diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts
index 77c2681c6..39c4b534c 100644
--- a/ui-tui/src/app/useMainApp.ts
+++ b/ui-tui/src/app/useMainApp.ts
@@ -4,8 +4,8 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 
 import { STARTUP_RESUME_ID } from '../config/env.js'
 import { MAX_HISTORY, WHEEL_SCROLL_STEP } from '../config/limits.js'
-import { imageTokenMeta } from '../domain/messages.js'
-import { fmtCwdBranch } from '../domain/paths.js'
+import { attachedImageNotice, imageTokenMeta } from '../domain/messages.js'
+import { fmtCwdBranch, shortCwd } from '../domain/paths.js'
 import { type GatewayClient } from '../gatewayClient.js'
 import type {
   ClarifyRespondResponse,
@@ -16,6 +16,7 @@ import type {
 import { useGitBranch } from '../hooks/useGitBranch.js'
 import { useVirtualHistory } from '../hooks/useVirtualHistory.js'
 import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
+import { terminalParityHints } from '../lib/terminalParity.js'
 import { buildToolTrailLine, sameToolTrailGroup, toolTrailLabel } from '../lib/text.js'
 import type { Msg, PanelSection, SlashCatalog } from '../types.js'
 
@@ -117,6 +118,7 @@ export function useMainApp(gw: GatewayClient) {
   const onEventRef = useRef<(ev: GatewayEvent) => void>(() => {})
   const clipboardPasteRef = useRef<(quiet?: boolean) => Promise<void> | void>(() => {})
   const submitRef = useRef<(value: string) => void>(() => {})
+  const terminalHintsShownRef = useRef(new Set<string>())
   const historyItemsRef = useRef(historyItems)
   const lastUserMsgRef = useRef(lastUserMsg)
   const msgIdsRef = useRef(new WeakMap<Msg, string>())
@@ -136,12 +138,30 @@ export function useMainApp(gw: GatewayClient) {
   const composer = useComposerState({
     gw,
     onClipboardPaste: quiet => clipboardPasteRef.current(quiet),
+    onImageAttached: info => {
+      sys(attachedImageNotice(info))
+    },
     submitRef
   })
 
   const { actions: composerActions, refs: composerRefs, state: composerState } = composer
   const empty = !historyItems.some(msg => msg.kind !== 'intro')
 
+  useEffect(() => {
+    void terminalParityHints()
+      .then(hints => {
+        for (const hint of hints) {
+          if (terminalHintsShownRef.current.has(hint.key)) {
+            continue
+          }
+
+          terminalHintsShownRef.current.add(hint.key)
+          turnController.pushActivity(hint.message, hint.tone)
+        }
+      })
+      .catch(() => {})
+  }, [])
+
   const messageId = useCallback((msg: Msg) => {
     const hit = msgIdsRef.current.get(msg)
 
@@ -294,12 +314,14 @@ export function useMainApp(gw: GatewayClient) {
 
   useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid: ui.sid })
 
-  // ── Terminal tab title ─────────────────────────────────────────────
-  // Show model name + status so users can identify the Hermes tab.
-  const shortModel = ui.info?.model?.replace(/^.*\//, '') ?? ''
-  const titleStatus = ui.busy ? '⏳' : '✓'
-  const terminalTitle = shortModel ? `${titleStatus} ${shortModel} — Hermes` : 'Hermes'
-  useTerminalTitle(terminalTitle)
+  // Tab title: `⚠` waiting on approval/sudo/secret/clarify, `⏳` busy, `✓` idle.
+  const model = ui.info?.model?.replace(/^.*\//, '') ?? ''
+
+  const marker = overlay.approval || overlay.sudo || overlay.secret || overlay.clarify ? '⚠' : ui.busy ? '⏳' : '✓'
+
+  const tabCwd = ui.info?.cwd
+
+  useTerminalTitle(model ? `${marker} ${model}${tabCwd ? ` · ${shortCwd(tabCwd, 24)}` : ''}` : 'Hermes')
 
   useEffect(() => {
     if (!ui.sid || !stdout) {
diff --git a/ui-tui/src/app/useSubmission.ts b/ui-tui/src/app/useSubmission.ts
index f8a40f5a0..f09dc3634 100644
--- a/ui-tui/src/app/useSubmission.ts
+++ b/ui-tui/src/app/useSubmission.ts
@@ -1,6 +1,6 @@
 import { type MutableRefObject, useCallback, useRef } from 'react'
 
-import { imageTokenMeta } from '../domain/messages.js'
+import { attachedImageNotice } from '../domain/messages.js'
 import { looksLikeSlashCommand } from '../domain/slash.js'
 import type { GatewayClient } from '../gatewayClient.js'
 import type { InputDetectDropResponse, PromptSubmitResponse, ShellExecResponse } from '../gatewayTypes.js'
@@ -83,9 +83,7 @@ export function useSubmission(opts: UseSubmissionOptions) {
           }
 
           if (r.is_image) {
-            const meta = imageTokenMeta(r)
-
-            turnController.pushActivity(`attached image: ${r.name}${meta ? ` · ${meta}` : ''}`)
+            turnController.pushActivity(attachedImageNotice(r))
           } else {
             turnController.pushActivity(`detected file: ${r.name}`)
           }
@@ -236,11 +234,11 @@ export function useSubmission(opts: UseSubmissionOptions) {
 
   const submit = useCallback(
     (value: string) => {
-      if (value.startsWith('/') && composerState.completions.length) {
+      if (composerState.completions.length) {
         const row = composerState.completions[composerState.compIdx]
 
         if (row?.text) {
-          const text = row.text.startsWith('/') && composerState.compReplace > 0 ? row.text.slice(1) : row.text
+          const text = value.startsWith('/') && row.text.startsWith('/') ? row.text.slice(1) : row.text
           const next = value.slice(0, composerState.compReplace) + text
 
           if (next !== value) {
diff --git a/ui-tui/src/bootBanner.ts b/ui-tui/src/bootBanner.ts
deleted file mode 100644
index 2c85387bd..000000000
--- a/ui-tui/src/bootBanner.ts
+++ /dev/null
@@ -1,26 +0,0 @@
-const GOLD = '\x1b[38;2;255;215;0m'
-const AMBER = '\x1b[38;2;255;191;0m'
-const BRONZE = '\x1b[38;2;205;127;50m'
-const DIM = '\x1b[38;2;184;134;11m'
-const RESET = '\x1b[0m'
-
-const LOGO = [
-  '██╗  ██╗███████╗██████╗ ███╗   ███╗███████╗███████╗       █████╗  ██████╗ ███████╗███╗   ██╗████████╗',
-  '██║  ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝      ██╔══██╗██╔════╝ ██╔════╝████╗  ██║╚══██╔══╝',
-  '███████║█████╗  ██████╔╝██╔████╔██║█████╗  ███████╗█████╗███████║██║  ███╗█████╗  ██╔██╗ ██║   ██║   ',
-  '██╔══██║██╔══╝  ██╔══██╗██║╚██╔╝██║██╔══╝  ╚════██║╚════╝██╔══██║██║   ██║██╔══╝  ██║╚██╗██║   ██║   ',
-  '██║  ██║███████╗██║  ██║██║ ╚═╝ ██║███████╗███████║      ██║  ██║╚██████╔╝███████╗██║ ╚████║   ██║   ',
-  '╚═╝  ╚═╝╚══════╝╚═╝  ╚═╝╚═╝     ╚═╝╚══════╝╚══════╝      ╚═╝  ╚═╝ ╚═════╝ ╚══════╝╚═╝  ╚═══╝   ╚═╝   '
-]
-
-const GRADIENT = [GOLD, GOLD, AMBER, AMBER, BRONZE, BRONZE] as const
-const LOGO_WIDTH = 98
-
-const TAGLINE = `${DIM}⚕ Nous Research · Messenger of the Digital Gods${RESET}`
-const FALLBACK = `\x1b[1m${GOLD}⚕ NOUS HERMES${RESET}`
-
-export function bootBanner(cols: number = process.stdout.columns || 80): string {
-  const body = cols >= LOGO_WIDTH ? LOGO.map((text, i) => `${GRADIENT[i]}${text}${RESET}`).join('\n') : FALLBACK
-
-  return `\n${body}\n${TAGLINE}\n\n`
-}
diff --git a/ui-tui/src/components/agentsOverlay.tsx b/ui-tui/src/components/agentsOverlay.tsx
new file mode 100644
index 000000000..a8ad91758
--- /dev/null
+++ b/ui-tui/src/components/agentsOverlay.tsx
@@ -0,0 +1,1064 @@
+import { Box, NoSelect, ScrollBox, type ScrollBoxHandle, Text, useInput, useStdout } from '@hermes/ink'
+import { useStore } from '@nanostores/react'
+import { type ReactNode, type RefObject, useEffect, useMemo, useRef, useState } from 'react'
+
+import {
+  $delegationState,
+  $overlaySectionsOpen,
+  applyDelegationStatus,
+  toggleOverlaySection
+} from '../app/delegationStore.js'
+import { patchOverlayState } from '../app/overlayStore.js'
+import { $spawnDiff, $spawnHistory, clearDiffPair, type SpawnSnapshot } from '../app/spawnHistoryStore.js'
+import { $turnState } from '../app/turnStore.js'
+import type { GatewayClient } from '../gatewayClient.js'
+import type { DelegationPauseResponse, DelegationStatusResponse, SubagentInterruptResponse } from '../gatewayTypes.js'
+import { asRpcResult } from '../lib/rpc.js'
+import {
+  buildSubagentTree,
+  descendantIds,
+  flattenTree,
+  fmtCost,
+  fmtDuration,
+  fmtTokens,
+  formatSummary,
+  hotnessBucket,
+  peakHotness,
+  sparkline,
+  topLevelSubagents,
+  treeTotals,
+  widthByDepth
+} from '../lib/subagentTree.js'
+import { compactPreview } from '../lib/text.js'
+import type { Theme } from '../theme.js'
+import type { SubagentNode, SubagentProgress } from '../types.js'
+
+// ── Types + lookup tables ────────────────────────────────────────────
+
+type SortMode = 'depth-first' | 'duration-desc' | 'status' | 'tools-desc'
+type FilterMode = 'all' | 'failed' | 'leaf' | 'running'
+type Status = SubagentProgress['status']
+
+const SORT_ORDER: readonly SortMode[] = ['depth-first', 'tools-desc', 'duration-desc', 'status']
+const FILTER_ORDER: readonly FilterMode[] = ['all', 'running', 'failed', 'leaf']
+
+const SORT_LABEL: Record<SortMode, string> = {
+  'depth-first': 'spawn order',
+  'duration-desc': 'slowest',
+  status: 'status',
+  'tools-desc': 'busiest'
+}
+
+const FILTER_LABEL: Record<FilterMode, string> = {
+  all: 'all',
+  failed: 'failed',
+  leaf: 'leaves',
+  running: 'running'
+}
+
+const STATUS_RANK: Record<Status, number> = {
+  failed: 0,
+  interrupted: 1,
+  running: 2,
+  queued: 3,
+  completed: 4
+}
+
+const SORT_COMPARATORS: Record<SortMode, (a: SubagentNode, b: SubagentNode) => number> = {
+  'depth-first': (a, b) => a.item.depth - b.item.depth || a.item.index - b.item.index,
+  'tools-desc': (a, b) => b.aggregate.totalTools - a.aggregate.totalTools,
+  'duration-desc': (a, b) => b.aggregate.totalDuration - a.aggregate.totalDuration,
+  status: (a, b) => STATUS_RANK[a.item.status] - STATUS_RANK[b.item.status]
+}
+
+const FILTER_PREDICATES: Record<FilterMode, (n: SubagentNode) => boolean> = {
+  all: () => true,
+  leaf: n => n.children.length === 0,
+  running: n => n.item.status === 'running' || n.item.status === 'queued',
+  failed: n => n.item.status === 'failed' || n.item.status === 'interrupted'
+}
+
+const STATUS_GLYPH: Record<Status, { color: (t: Theme) => string; glyph: string }> = {
+  running: { color: t => t.color.amber, glyph: '●' },
+  queued: { color: t => t.color.dim, glyph: '○' },
+  completed: { color: t => t.color.statusGood, glyph: '✓' },
+  interrupted: { color: t => t.color.warn, glyph: '■' },
+  failed: { color: t => t.color.error, glyph: '✗' }
+}
+
+// Heatmap palette — cold → hot, resolved against the active theme.
+const heatPalette = (t: Theme) => [t.color.bronze, t.color.amber, t.color.gold, t.color.warn, t.color.error]
+
+// ── Pure helpers ─────────────────────────────────────────────────────
+
+const fmtDur = (seconds?: number) => (seconds == null || seconds <= 0 ? '' : fmtDuration(seconds))
+const fmtElapsedLabel = (seconds: number) => (seconds < 0 ? '' : fmtDuration(seconds))
+
+const displayElapsedSeconds = (item: SubagentProgress, nowMs: number): number | null => {
+  if (item.durationSeconds != null) {
+    return item.durationSeconds
+  }
+
+  if (item.startedAt != null && (item.status === 'running' || item.status === 'queued')) {
+    return Math.max(0, (nowMs - item.startedAt) / 1000)
+  }
+
+  return null
+}
+
+const indentFor = (depth: number): string => '  '.repeat(Math.max(0, depth))
+const formatRowId = (n: number): string => String(n + 1).padStart(2, ' ')
+const cycle = <T,>(order: readonly T[], current: T): T => order[(order.indexOf(current) + 1) % order.length]!
+
+const statusGlyph = (item: SubagentProgress, t: Theme) => {
+  const g = STATUS_GLYPH[item.status]
+
+  return { color: g.color(t), glyph: g.glyph }
+}
+
+const prepareRows = (tree: SubagentNode[], sort: SortMode, filter: FilterMode): SubagentNode[] =>
+  tree.length === 0 ? [] : flattenTree([...tree].sort(SORT_COMPARATORS[sort])).filter(FILTER_PREDICATES[filter])
+
+const diffMetricLine = (name: string, a: number, b: number, fmt: (n: number) => string) => {
+  const d = b - a
+  const sign = d === 0 ? '' : d > 0 ? '+' : '-'
+
+  return `${name}: ${fmt(a)} → ${fmt(b)}  (${sign}${fmt(Math.abs(d)) || '0'})`
+}
+
+// ── Sub-components ───────────────────────────────────────────────────
+
+/** Polled on parent `tick` so accordions can resize the thumb without a scroll event. */
+function OverlayScrollbar({
+  scrollRef,
+  t,
+  tick
+}: {
+  scrollRef: RefObject<null | ScrollBoxHandle>
+  t: Theme
+  tick: number
+}) {
+  void tick // ensures re-render when the parent clock advances
+
+  const [hover, setHover] = useState(false)
+  const [grab, setGrab] = useState<null | number>(null)
+
+  const s = scrollRef.current
+  const vp = Math.max(0, s?.getViewportHeight() ?? 0)
+
+  if (!vp) {
+    return <Box width={1} />
+  }
+
+  const total = Math.max(vp, s?.getScrollHeight() ?? vp)
+  const scrollable = total > vp
+  const thumb = scrollable ? Math.max(1, Math.round((vp * vp) / total)) : vp
+  const travel = Math.max(1, vp - thumb)
+  const pos = Math.max(0, (s?.getScrollTop() ?? 0) + (s?.getPendingDelta() ?? 0))
+  const thumbTop = scrollable ? Math.round((pos / Math.max(1, total - vp)) * travel) : 0
+  const below = Math.max(0, vp - thumbTop - thumb)
+
+  const vBar = (n: number) => (n > 0 ? `${'│\n'.repeat(n - 1)}│` : '')
+  const thumbBody = `${'┃\n'.repeat(Math.max(0, thumb - 1))}┃`
+  const thumbColor = grab !== null ? t.color.gold : t.color.amber
+  const trackColor = hover ? t.color.bronze : t.color.dim
+
+  const jump = (row: number, offset: number) => {
+    if (!s || !scrollable) {
+      return
+    }
+
+    s.scrollTo(Math.round((Math.max(0, Math.min(travel, row - offset)) / travel) * Math.max(0, total - vp)))
+  }
+
+  return (
+    <Box
+      flexDirection="column"
+      onMouseDown={(e: { localRow?: number }) => {
+        const row = Math.max(0, Math.min(vp - 1, e.localRow ?? 0))
+        const off = row >= thumbTop && row < thumbTop + thumb ? row - thumbTop : Math.floor(thumb / 2)
+        setGrab(off)
+        jump(row, off)
+      }}
+      onMouseDrag={(e: { localRow?: number }) =>
+        jump(Math.max(0, Math.min(vp - 1, e.localRow ?? 0)), grab ?? Math.floor(thumb / 2))
+      }
+      onMouseEnter={() => setHover(true)}
+      onMouseLeave={() => setHover(false)}
+      onMouseUp={() => setGrab(null)}
+      width={1}
+    >
+      {!scrollable ? (
+        <Text color={trackColor} dim>
+          {vBar(vp)}
+        </Text>
+      ) : (
+        <>
+          {thumbTop > 0 ? (
+            <Text color={trackColor} dim={!hover}>
+              {vBar(thumbTop)}
+            </Text>
+          ) : null}
+
+          <Text color={thumbColor}>{thumbBody}</Text>
+
+          {below > 0 ? (
+            <Text color={trackColor} dim={!hover}>
+              {vBar(below)}
+            </Text>
+          ) : null}
+        </>
+      )}
+    </Box>
+  )
+}
+
+function GanttStrip({
+  cols,
+  cursor,
+  flatNodes,
+  maxRows,
+  now,
+  t
+}: {
+  cols: number
+  cursor: number
+  flatNodes: SubagentNode[]
+  maxRows: number
+  now: number
+  t: Theme
+}) {
+  const spans = flatNodes
+    .map((node, idx) => {
+      const started = node.item.startedAt ?? now
+
+      const ended =
+        node.item.durationSeconds != null && node.item.startedAt != null
+          ? node.item.startedAt + node.item.durationSeconds * 1000
+          : now
+
+      return { endAt: ended, idx, node, startAt: started }
+    })
+    .filter(s => s.endAt >= s.startAt)
+
+  if (!spans.length) {
+    return null
+  }
+
+  const globalStart = Math.min(...spans.map(s => s.startAt))
+  const globalEnd = Math.max(...spans.map(s => s.endAt))
+  const totalSpan = Math.max(1, globalEnd - globalStart)
+  const totalSeconds = (globalEnd - globalStart) / 1000
+
+  // 5-col id gutter ("  12  ") so the bar doesn't press against the id.
+  // 10-col right reserve: pad + up to `12m 30s`-style label without
+  // truncate-end against a full-width bar.
+  const idGutter = 5
+  const labelReserve = 10
+  const barWidth = Math.max(10, cols - idGutter - labelReserve)
+  const startIdx = Math.max(0, Math.min(Math.max(0, spans.length - maxRows), cursor - Math.floor(maxRows / 2)))
+  const shown = spans.slice(startIdx, startIdx + maxRows)
+
+  const bar = (startAt: number, endAt: number) => {
+    const s = Math.floor(((startAt - globalStart) / totalSpan) * barWidth)
+    const e = Math.min(barWidth, Math.ceil(((endAt - globalStart) / totalSpan) * barWidth))
+    const fill = Math.max(1, e - s)
+
+    return ' '.repeat(s) + '█'.repeat(fill) + ' '.repeat(Math.max(0, barWidth - s - fill))
+  }
+
+  const charStep = totalSeconds < 20 && barWidth > 20 ? 5 : 10
+
+  const ruler = Array.from({ length: barWidth }, (_, i) => {
+    if (i > 0 && i % 10 === 0) {
+      return '┼'
+    }
+
+    if (i > 0 && i % 5 === 0) {
+      return '·'
+    }
+
+    return '─'
+  }).join('')
+
+  const rulerLabels = (() => {
+    const chars = new Array(barWidth).fill(' ')
+
+    for (let pos = 0; pos < barWidth; pos += charStep) {
+      const secs = (pos / barWidth) * totalSeconds
+      const label = pos === 0 ? '0' : secs >= 1 ? `${Math.round(secs)}s` : `${secs.toFixed(1)}s`
+
+      for (let j = 0; j < label.length && pos + j < barWidth; j++) {
+        chars[pos + j] = label[j]!
+      }
+    }
+
+    return chars.join('')
+  })()
+
+  const windowLabel =
+    spans.length > maxRows ? `  (${startIdx + 1}-${Math.min(spans.length, startIdx + maxRows)}/${spans.length})` : ''
+
+  return (
+    <Box flexDirection="column" marginBottom={1}>
+      <Text color={t.color.dim}>
+        Timeline · {fmtElapsedLabel(Math.max(0, totalSeconds))}
+        {windowLabel}
+      </Text>
+
+      {shown.map(({ endAt, idx, node, startAt }) => {
+        const active = idx === cursor
+        const { color } = statusGlyph(node.item, t)
+        const accent = active ? t.color.amber : t.color.dim
+
+        const elSec = displayElapsedSeconds(node.item, now)
+        const elLabel = elSec != null ? fmtElapsedLabel(elSec) : ''
+
+        return (
+          <Text key={node.item.id} wrap="truncate-end">
+            <Text bold={active} color={accent}>
+              {formatRowId(idx)}
+              {'  '}
+            </Text>
+
+            <Text color={active ? t.color.amber : color}>{bar(startAt, endAt)}</Text>
+
+            {elLabel ? (
+              <Text color={accent}>
+                {'   '}
+                {elLabel}
+              </Text>
+            ) : null}
+          </Text>
+        )
+      })}
+
+      <Text color={t.color.dim} dim>
+        {'    '}
+        {ruler}
+      </Text>
+
+      {totalSeconds > 0 ? (
+        <Text color={t.color.dim} dim>
+          {'    '}
+          {rulerLabels}
+        </Text>
+      ) : null}
+    </Box>
+  )
+}
+
+function OverlaySection({
+  children,
+  count,
+  defaultOpen = false,
+  title,
+  t
+}: {
+  children: ReactNode
+  count?: number
+  defaultOpen?: boolean
+  title: string
+  t: Theme
+}) {
+  const openMap = useStore($overlaySectionsOpen)
+  const open = title in openMap ? openMap[title]! : defaultOpen
+
+  return (
+    <Box flexDirection="column" marginTop={1}>
+      <Box onClick={() => toggleOverlaySection(title, defaultOpen)}>
+        <Text color={t.color.label}>
+          <Text color={t.color.amber}>{open ? '▾ ' : '▸ '}</Text>
+          {title}
+          {typeof count === 'number' ? ` (${count})` : ''}
+        </Text>
+      </Box>
+
+      {open ? <Box flexDirection="column">{children}</Box> : null}
+    </Box>
+  )
+}
+
+function Field({ name, t, value }: { name: string; t: Theme; value: ReactNode }) {
+  return (
+    <Text wrap="truncate-end">
+      <Text color={t.color.label}>{name} · </Text>
+      <Text color={t.color.cornsilk}>{value}</Text>
+    </Text>
+  )
+}
+
+function Detail({ id, node, t }: { id?: string; node: SubagentNode; t: Theme }) {
+  const { aggregate: agg, item } = node
+  const { color, glyph } = statusGlyph(item, t)
+
+  const inputTokens = item.inputTokens ?? 0
+  const outputTokens = item.outputTokens ?? 0
+  const localTokens = inputTokens + outputTokens
+  const subtreeTokens = agg.inputTokens + agg.outputTokens - localTokens
+  const localCost = item.costUsd ?? 0
+  const subtreeCost = agg.costUsd - localCost
+
+  const filesRead = item.filesRead ?? []
+  const filesWritten = item.filesWritten ?? []
+  const outputTail = item.outputTail ?? []
+  // Tool calls: prefer the live stream; for archived / post-turn views
+  // that stream is often empty even when tool_count > 0, so fall back to
+  // the tool names captured in outputTail at subagent.complete time.
+  const toolLines = item.tools.length > 0 ? item.tools : outputTail.map(e => e.tool).filter(Boolean)
+
+  const filesOverflow = Math.max(0, filesRead.length - 8) + Math.max(0, filesWritten.length - 8)
+
+  return (
+    <Box flexDirection="column">
+      <Text bold color={t.color.cornsilk} wrap="wrap">
+        {id ? <Text color={t.color.amber}>#{id} </Text> : null}
+        <Text color={color}>{glyph}</Text> {item.goal}
+      </Text>
+
+      <Box flexDirection="column" marginTop={1}>
+        <Field name="depth" t={t} value={`${item.depth} · ${item.status}`} />
+        {item.model ? <Field name="model" t={t} value={item.model} /> : null}
+        {item.toolsets?.length ? <Field name="toolsets" t={t} value={item.toolsets.join(', ')} /> : null}
+        <Field name="tools" t={t} value={`${item.toolCount ?? 0} (subtree ${agg.totalTools})`} />
+        <Field
+          name="subtree"
+          t={t}
+          value={`${agg.descendantCount} agent${agg.descendantCount === 1 ? '' : 's'} · d${agg.maxDepthFromHere} · ⚡${agg.activeCount}`}
+        />
+        {item.durationSeconds ? <Field name="elapsed" t={t} value={fmtDur(item.durationSeconds)} /> : null}
+        {item.iteration != null ? <Field name="iteration" t={t} value={String(item.iteration)} /> : null}
+        {item.apiCalls ? <Field name="api calls" t={t} value={String(item.apiCalls)} /> : null}
+      </Box>
+
+      {localTokens > 0 || localCost > 0 ? (
+        <OverlaySection defaultOpen t={t} title="Budget">
+          {localTokens > 0 ? (
+            <Field
+              name="tokens"
+              t={t}
+              value={
+                <>
+                  {fmtTokens(inputTokens)} in · {fmtTokens(outputTokens)} out
+                  {item.reasoningTokens ? ` · ${fmtTokens(item.reasoningTokens)} reasoning` : ''}
+                </>
+              }
+            />
+          ) : null}
+
+          {localCost > 0 ? (
+            <Field
+              name="cost"
+              t={t}
+              value={
+                <>
+                  {fmtCost(localCost)}
+                  {subtreeCost >= 0.01 ? ` · subtree +${fmtCost(subtreeCost)}` : ''}
+                </>
+              }
+            />
+          ) : null}
+
+          {subtreeTokens > 0 ? <Field name="subtree tokens" t={t} value={`+${fmtTokens(subtreeTokens)}`} /> : null}
+        </OverlaySection>
+      ) : null}
+
+      {filesRead.length > 0 || filesWritten.length > 0 ? (
+        <OverlaySection count={filesRead.length + filesWritten.length} t={t} title="Files">
+          {filesWritten.slice(0, 8).map((p, i) => (
+            <Text color={t.color.statusGood} key={`w-${i}`} wrap="truncate-end">
+              +{p}
+            </Text>
+          ))}
+
+          {filesRead.slice(0, 8).map((p, i) => (
+            <Text color={t.color.cornsilk} key={`r-${i}`} wrap="truncate-end">
+              <Text color={t.color.dim}>·</Text> {p}
+            </Text>
+          ))}
+
+          {filesOverflow > 0 ? <Text color={t.color.dim}>…+{filesOverflow} more</Text> : null}
+        </OverlaySection>
+      ) : null}
+
+      {toolLines.length > 0 ? (
+        <OverlaySection count={toolLines.length} defaultOpen t={t} title="Tool calls">
+          {toolLines.map((line, i) => (
+            <Text color={t.color.cornsilk} key={i} wrap="wrap">
+              <Text color={t.color.dim}>·</Text> {line}
+            </Text>
+          ))}
+        </OverlaySection>
+      ) : null}
+
+      {outputTail.length > 0 ? (
+        <OverlaySection count={outputTail.length} defaultOpen t={t} title="Output">
+          {outputTail.map((entry, i) => (
+            <Text color={entry.isError ? t.color.error : t.color.cornsilk} key={i} wrap="wrap">
+              <Text bold color={entry.isError ? t.color.error : t.color.amber}>
+                {entry.tool}
+              </Text>{' '}
+              {entry.preview}
+            </Text>
+          ))}
+        </OverlaySection>
+      ) : null}
+
+      {item.notes.length ? (
+        <OverlaySection count={item.notes.length} t={t} title="Progress">
+          {item.notes.slice(-6).map((line, i) => (
+            <Text color={t.color.cornsilk} key={i} wrap="wrap">
+              <Text color={t.color.label}>·</Text> {line}
+            </Text>
+          ))}
+        </OverlaySection>
+      ) : null}
+
+      {item.summary ? (
+        <OverlaySection defaultOpen t={t} title="Summary">
+          <Text color={t.color.cornsilk} wrap="wrap">
+            {item.summary}
+          </Text>
+        </OverlaySection>
+      ) : null}
+    </Box>
+  )
+}
+
+function ListRow({
+  active,
+  index,
+  node,
+  peak,
+  t,
+  width
+}: {
+  active: boolean
+  index: number
+  node: SubagentNode
+  peak: number
+  t: Theme
+  width: number
+}) {
+  const { color, glyph } = statusGlyph(node.item, t)
+  const palette = heatPalette(t)
+  const heatIdx = hotnessBucket(node.aggregate.hotness, peak, palette.length)
+  const heatMarker = heatIdx >= 2 ? palette[heatIdx]! : null
+
+  const goal = compactPreview(node.item.goal || 'subagent', width - 28 - node.item.depth * 2)
+  const toolsCount = node.aggregate.totalTools > 0 ? ` ·${node.aggregate.totalTools}t` : ''
+  const kids = node.children.length ? ` ·${node.children.length}↓` : ''
+  const line = node.item.status === 'running' ? node.item.tools.at(-1) : undefined
+  const paren = line ? line.indexOf('(') : -1
+  const toolShort = line ? (paren > 0 ? line.slice(0, paren) : line).trim() : ''
+  const trailing = toolShort ? ` · ${compactPreview(toolShort, 14)}` : ''
+  const fg = active ? t.color.amber : t.color.cornsilk
+
+  return (
+    <Text bold={active} color={fg} inverse={active} wrap="truncate-end">
+      {' '}
+      <Text color={active ? fg : t.color.dim}>{formatRowId(index)} </Text>
+      {indentFor(node.item.depth)}
+      {heatMarker ? <Text color={heatMarker}>▍</Text> : null}
+      <Text color={active ? fg : color}>{glyph}</Text> {goal}
+      <Text color={active ? fg : t.color.dim}>
+        {toolsCount}
+        {kids}
+        {trailing}
+      </Text>
+    </Text>
+  )
+}
+
+function DiffPane({
+  label,
+  snapshot,
+  t,
+  totals,
+  width
+}: {
+  label: string
+  snapshot: SpawnSnapshot
+  t: Theme
+  totals: ReturnType<typeof treeTotals>
+  width: number
+}) {
+  return (
+    <Box flexDirection="column" width={width}>
+      <Text bold color={t.color.cornsilk}>
+        {label}
+      </Text>
+
+      <Text color={t.color.dim} wrap="truncate-end">
+        {snapshot.label}
+      </Text>
+
+      <Box marginTop={1}>
+        <Text color={t.color.dim} wrap="truncate-end">
+          {formatSummary(totals)}
+        </Text>
+      </Box>
+
+      <Box flexDirection="column" marginTop={1}>
+        {topLevelSubagents(snapshot.subagents)
+          .slice(0, 8)
+          .map(s => {
+            const { color, glyph } = statusGlyph(s, t)
+
+            return (
+              <Text color={t.color.dim} key={s.id} wrap="truncate-end">
+                <Text color={color}>{glyph}</Text> {s.goal || 'subagent'}
+              </Text>
+            )
+          })}
+      </Box>
+    </Box>
+  )
+}
+
+function DiffView({
+  cols,
+  onClose,
+  pair,
+  t
+}: {
+  cols: number
+  onClose: () => void
+  pair: { baseline: SpawnSnapshot; candidate: SpawnSnapshot }
+  t: Theme
+}) {
+  const aTotals = useMemo(() => treeTotals(buildSubagentTree(pair.baseline.subagents)), [pair.baseline])
+  const bTotals = useMemo(() => treeTotals(buildSubagentTree(pair.candidate.subagents)), [pair.candidate])
+  const paneWidth = Math.floor((cols - 4) / 2)
+
+  useInput((ch, key) => {
+    if (key.escape || ch === 'q') {
+      onClose()
+    }
+  })
+
+  const round = (n: number) => String(Math.round(n))
+  const sumTokens = (x: typeof aTotals) => x.inputTokens + x.outputTokens
+  const dollars = (n: number) => fmtCost(n) || '$0.00'
+
+  return (
+    <Box flexDirection="column" flexGrow={1} paddingX={1} paddingY={1}>
+      <Box flexDirection="column" marginBottom={1}>
+        <Text bold color={t.color.bronze}>
+          Replay diff
+        </Text>
+        <Text color={t.color.dim}>baseline vs candidate · esc/q close</Text>
+      </Box>
+
+      <Box flexDirection="row" marginBottom={1}>
+        <DiffPane label="A · baseline" snapshot={pair.baseline} t={t} totals={aTotals} width={paneWidth} />
+        <Box width={2} />
+        <DiffPane label="B · candidate" snapshot={pair.candidate} t={t} totals={bTotals} width={paneWidth} />
+      </Box>
+
+      <Box flexDirection="column" marginTop={1}>
+        <Text bold color={t.color.amber}>
+          Δ
+        </Text>
+
+        <Text color={t.color.cornsilk}>
+          {diffMetricLine('agents', aTotals.descendantCount, bTotals.descendantCount, round)}
+        </Text>
+        <Text color={t.color.cornsilk}>{diffMetricLine('tools', aTotals.totalTools, bTotals.totalTools, round)}</Text>
+        <Text color={t.color.cornsilk}>
+          {diffMetricLine('depth', aTotals.maxDepthFromHere, bTotals.maxDepthFromHere, round)}
+        </Text>
+        <Text color={t.color.cornsilk}>
+          {diffMetricLine('duration', aTotals.totalDuration, bTotals.totalDuration, n => `${n.toFixed(1)}s`)}
+        </Text>
+        <Text color={t.color.cornsilk}>
+          {diffMetricLine('tokens', sumTokens(aTotals), sumTokens(bTotals), fmtTokens)}
+        </Text>
+        <Text color={t.color.cornsilk}>{diffMetricLine('cost', aTotals.costUsd, bTotals.costUsd, dollars)}</Text>
+      </Box>
+    </Box>
+  )
+}
+
+// ── Main overlay ─────────────────────────────────────────────────────
+
+export function AgentsOverlay({ gw, initialHistoryIndex = 0, onClose, t }: AgentsOverlayProps) {
+  const turn = useStore($turnState)
+  const delegation = useStore($delegationState)
+  const history = useStore($spawnHistory)
+  const diffPair = useStore($spawnDiff)
+  const { stdout } = useStdout()
+
+  // historyIndex === 0: live turn.  1..N pulls the Nth-most-recent archived
+  // snapshot.  /replay passes N on open.
+  const [historyIndex, setHistoryIndex] = useState(() =>
+    Math.max(0, Math.min(history.length, Math.floor(initialHistoryIndex)))
+  )
+
+  const [sort, setSort] = useState<SortMode>('depth-first')
+  const [filter, setFilter] = useState<FilterMode>('all')
+  const [cursor, setCursor] = useState(0)
+  const [flash, setFlash] = useState<string>('')
+  const [now, setNow] = useState(() => Date.now())
+  // cc-style view switching: list = full-width row picker, detail = full-width
+  // scrollable pane.  Two panes side-by-side in Ink fought Yoga flex.
+  const [mode, setMode] = useState<'detail' | 'list'>('list')
+
+  const detailScrollRef = useRef<null | ScrollBoxHandle>(null)
+  const prevLiveCountRef = useRef(turn.subagents.length)
+
+  // ── Derived state ──────────────────────────────────────────────────
+
+  const activeSnapshot = historyIndex > 0 ? history[historyIndex - 1] : null
+  // Instant fallback to history[0] the moment the live list clears — avoids
+  // a one-frame "no subagents" flash while the auto-follow effect fires.
+  const justFinishedSnapshot = historyIndex === 0 && turn.subagents.length === 0 ? (history[0] ?? null) : null
+  const effectiveSnapshot = activeSnapshot ?? justFinishedSnapshot
+  const replayMode = effectiveSnapshot != null
+  const subagents = replayMode ? effectiveSnapshot.subagents : turn.subagents
+
+  const tree = useMemo(() => buildSubagentTree(subagents), [subagents])
+  const totals = useMemo(() => treeTotals(tree), [tree])
+  const widths = useMemo(() => widthByDepth(tree), [tree])
+  const spark = useMemo(() => sparkline(widths), [widths])
+  const peak = useMemo(() => peakHotness(tree), [tree])
+  const rows = useMemo(() => prepareRows(tree, sort, filter), [tree, sort, filter])
+
+  const selected = rows[cursor] ?? null
+
+  const cols = stdout?.columns ?? 80
+  const rowsH = Math.max(8, (stdout?.rows ?? 24) - 10)
+  const listWindowStart = Math.max(0, cursor - Math.floor(rowsH / 2))
+
+  // ── Effects ────────────────────────────────────────────────────────
+
+  useEffect(() => {
+    // Ticker drives both the live gantt and OverlayScrollbar content-reflow
+    // detection.  Slower in replay (nothing's growing) but not stopped
+    // because accordions still expand.
+    const id = setInterval(() => setNow(Date.now()), replayMode ? 300 : 500)
+
+    return () => clearInterval(id)
+  }, [replayMode])
+
+  useEffect(() => {
+    // Clamp stale index when history grows/shrinks beneath us.
+    if (historyIndex > history.length) {
+      setHistoryIndex(history.length)
+    }
+  }, [history.length, historyIndex])
+
+  useEffect(() => {
+    // Auto-follow the just-finished turn onto history[1] so the user isn't
+    // dropped into an empty live view.  Fires only when transitioning from
+    // "had live subagents" → "live empty" while in live mode.
+    const prev = prevLiveCountRef.current
+    prevLiveCountRef.current = turn.subagents.length
+
+    if (historyIndex === 0 && prev > 0 && turn.subagents.length === 0 && history.length > 0) {
+      setHistoryIndex(1)
+      setCursor(0)
+      setFlash('turn finished · inspect freely · q to close')
+    }
+  }, [history.length, historyIndex, turn.subagents.length])
+
+  useEffect(() => {
+    // Reset detail scroll on navigation so the top of the new node shows.
+    detailScrollRef.current?.scrollTo(0)
+  }, [cursor, historyIndex, mode])
+
+  useEffect(() => {
+    // Warm caps + paused flag on open.
+    gw.request<DelegationStatusResponse>('delegation.status', {})
+      .then(r => applyDelegationStatus(asRpcResult<DelegationStatusResponse>(r)))
+      .catch(() => {})
+  }, [gw])
+
+  useEffect(() => {
+    if (cursor >= rows.length) {
+      setCursor(Math.max(0, rows.length - 1))
+    }
+  }, [cursor, rows.length])
+
+  // ── Actions ────────────────────────────────────────────────────────
+
+  const guardLive = (action: () => void) => {
+    if (replayMode) {
+      setFlash('replay mode — controls disabled')
+    } else {
+      action()
+    }
+  }
+
+  const interrupt = (id: string) => gw.request<SubagentInterruptResponse>('subagent.interrupt', { subagent_id: id })
+
+  const killOne = (id: string) =>
+    guardLive(() => {
+      interrupt(id)
+        .then(raw => {
+          const r = asRpcResult<SubagentInterruptResponse>(raw)
+          setFlash(r?.found ? `killing ${id}` : `not found: ${id}`)
+        })
+        .catch(() => setFlash(`kill failed: ${id}`))
+    })
+
+  const killSubtree = (node: SubagentNode) =>
+    guardLive(() => {
+      const ids = [node.item.id, ...descendantIds(node)]
+      ids.forEach(id => interrupt(id).catch(() => {}))
+      setFlash(`killing subtree · ${ids.length} node${ids.length === 1 ? '' : 's'}`)
+    })
+
+  const togglePause = () =>
+    guardLive(() => {
+      gw.request<DelegationPauseResponse>('delegation.pause', { paused: !delegation.paused })
+        .then(raw => {
+          const r = asRpcResult<DelegationPauseResponse>(raw)
+          applyDelegationStatus({ paused: r?.paused })
+          setFlash(r?.paused ? 'spawning paused' : 'spawning resumed')
+        })
+        .catch(() => setFlash('pause failed'))
+    })
+
+  const stepHistory = (delta: -1 | 1) =>
+    setHistoryIndex(idx => {
+      const next = Math.max(0, Math.min(history.length, idx + delta))
+
+      if (next !== idx) {
+        setCursor(0)
+        setFlash(next === 0 ? 'live turn' : `replay · ${next}/${history.length}`)
+      }
+
+      return next
+    })
+
+  const closeWithCleanup = () => {
+    clearDiffPair()
+    onClose()
+  }
+
+  // ── Input ──────────────────────────────────────────────────────────
+
+  const detailPageSize = Math.max(4, rowsH - 2)
+  const wheelDetailDy = 3
+  const scrollDetail = (dy: number) => detailScrollRef.current?.scrollBy(dy)
+
+  useInput((ch, key) => {
+    if (ch === 'q') {
+      return closeWithCleanup()
+    }
+
+    if (key.escape) {
+      return mode === 'detail' ? setMode('list') : closeWithCleanup()
+    }
+
+    // Shared actions (both modes).
+    if (ch === '<' || ch === '[') {
+      return stepHistory(1)
+    }
+
+    if (ch === '>' || ch === ']') {
+      return stepHistory(-1)
+    }
+
+    if (ch === 'p') {
+      return togglePause()
+    }
+
+    if (ch === 'x' && selected) {
+      return killOne(selected.item.id)
+    }
+
+    if (ch === 'X' && selected) {
+      return killSubtree(selected)
+    }
+
+    if (mode === 'detail') {
+      if (key.leftArrow || ch === 'h') {
+        return setMode('list')
+      }
+
+      if (key.pageUp || (key.ctrl && ch === 'u')) {
+        return scrollDetail(-detailPageSize)
+      }
+
+      if (key.pageDown || (key.ctrl && ch === 'd')) {
+        return scrollDetail(detailPageSize)
+      }
+
+      if (key.wheelUp) {
+        return scrollDetail(-wheelDetailDy)
+      }
+
+      if (key.wheelDown) {
+        return scrollDetail(wheelDetailDy)
+      }
+
+      if (key.upArrow || ch === 'k') {
+        return scrollDetail(-2)
+      }
+
+      if (key.downArrow || ch === 'j') {
+        return scrollDetail(2)
+      }
+
+      if (ch === 'g') {
+        return detailScrollRef.current?.scrollTo(0)
+      }
+
+      if (ch === 'G') {
+        return detailScrollRef.current?.scrollToBottom?.()
+      }
+
+      return
+    }
+
+    // List mode.
+    if ((key.return || key.rightArrow || ch === 'l') && selected) {
+      return setMode('detail')
+    }
+
+    if (key.upArrow || ch === 'k' || key.wheelUp) {
+      return setCursor(c => Math.max(0, c - 1))
+    }
+
+    if (key.downArrow || ch === 'j' || key.wheelDown) {
+      return setCursor(c => Math.min(Math.max(0, rows.length - 1), c + 1))
+    }
+
+    if (ch === 'g') {
+      return setCursor(0)
+    }
+
+    if (ch === 'G') {
+      return setCursor(Math.max(0, rows.length - 1))
+    }
+
+    if (ch === 's') {
+      return setSort(m => cycle(SORT_ORDER, m))
+    }
+
+    if (ch === 'f') {
+      return setFilter(m => cycle(FILTER_ORDER, m))
+    }
+  })
+
+  // ── Header assembly ────────────────────────────────────────────────
+
+  const mix = Object.entries(
+    subagents.reduce<Record<string, number>>((acc, it) => {
+      const key = it.model ? it.model.split('/').pop()! : 'inherit'
+      acc[key] = (acc[key] ?? 0) + 1
+
+      return acc
+    }, {})
+  )
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 4)
+    .map(([k, v]) => `${k}×${v}`)
+    .join(' · ')
+
+  const capsLabel = delegation.maxSpawnDepth
+    ? `caps d${delegation.maxSpawnDepth}/${delegation.maxConcurrentChildren ?? '?'}`
+    : ''
+
+  const title =
+    replayMode && effectiveSnapshot
+      ? `${historyIndex > 0 ? `Replay ${historyIndex}/${history.length}` : 'Last turn'} · finished ${new Date(
+          effectiveSnapshot.finishedAt
+        ).toLocaleTimeString()}`
+      : `Spawn tree${delegation.paused ? ' · ⏸ paused' : ''}`
+
+  const metaLine = [formatSummary(totals), spark, capsLabel, mix ? `· ${mix}` : ''].filter(Boolean).join('  ')
+
+  const controlsHint = replayMode
+    ? ' · controls locked'
+    : ` · x kill · X subtree · p ${delegation.paused ? 'resume' : 'pause'}`
+
+  // ── Rendering ──────────────────────────────────────────────────────
+
+  if (diffPair) {
+    return <DiffView cols={cols} onClose={closeWithCleanup} pair={diffPair} t={t} />
+  }
+
+  return (
+    <Box alignItems="stretch" flexDirection="column" flexGrow={1} paddingX={1} paddingY={1}>
+      <Box flexDirection="column" marginBottom={1}>
+        <Text wrap="truncate-end">
+          <Text bold color={replayMode ? t.color.bronze : t.color.gold}>
+            {title}
+          </Text>
+          {metaLine ? (
+            <Text color={t.color.dim}>
+              {'   '}
+              {metaLine}
+            </Text>
+          ) : null}
+        </Text>
+      </Box>
+
+      {rows.length === 0 ? (
+        <Box flexDirection="column" flexGrow={1}>
+          <Text color={t.color.dim}>No subagents this turn. Trigger delegate_task to populate the tree.</Text>
+        </Box>
+      ) : mode === 'list' ? (
+        <Box flexDirection="column" flexGrow={1} flexShrink={1} minHeight={0}>
+          <GanttStrip cols={cols} cursor={cursor} flatNodes={rows} maxRows={6} now={now} t={t} />
+
+          <Box flexDirection="column" flexGrow={0} flexShrink={0} overflow="hidden">
+            {rows.slice(listWindowStart, listWindowStart + rowsH).map((node, i) => (
+              <ListRow
+                active={listWindowStart + i === cursor}
+                index={listWindowStart + i}
+                key={node.item.id}
+                node={node}
+                peak={peak}
+                t={t}
+                width={cols}
+              />
+            ))}
+          </Box>
+        </Box>
+      ) : (
+        <Box flexDirection="row" flexGrow={1} flexShrink={1} minHeight={0}>
+          <ScrollBox flexDirection="column" flexGrow={1} flexShrink={1} ref={detailScrollRef}>
+            <Box flexDirection="column" paddingBottom={4} paddingRight={1}>
+              {selected ? <Detail id={formatRowId(cursor).trim()} node={selected} t={t} /> : null}
+            </Box>
+          </ScrollBox>
+
+          <NoSelect flexShrink={0} marginLeft={1}>
+            <OverlayScrollbar scrollRef={detailScrollRef} t={t} tick={now} />
+          </NoSelect>
+        </Box>
+      )}
+
+      <Box flexDirection="column" marginTop={1}>
+        {flash ? <Text color={t.color.amber}>{flash}</Text> : null}
+
+        {mode === 'list' ? (
+          <Text color={t.color.dim}>
+            ↑↓/jk move · g/G top/bottom · Enter/→ open detail{controlsHint} · s sort:{SORT_LABEL[sort]} · f filter:
+            {FILTER_LABEL[filter]}
+            {history.length > 0 ? ` · [ / ] history ${historyIndex}/${history.length}` : ''}
+            {' · q close'}
+          </Text>
+        ) : (
+          <Text color={t.color.dim}>
+            ↑↓/jk scroll · PgUp/PgDn page · g/G top/bottom · Esc/← back to list{controlsHint} · q close
+          </Text>
+        )}
+      </Box>
+    </Box>
+  )
+}
+
+interface AgentsOverlayProps {
+  gw: GatewayClient
+  initialHistoryIndex?: number
+  onClose: () => void
+  t: Theme
+}
+
+export const closeAgentsOverlay = () => patchOverlayState({ agents: false })
+export const openAgentsOverlay = () => patchOverlayState({ agents: true })
diff --git a/ui-tui/src/components/appChrome.tsx b/ui-tui/src/components/appChrome.tsx
index da5507e28..d12a4debf 100644
--- a/ui-tui/src/components/appChrome.tsx
+++ b/ui-tui/src/components/appChrome.tsx
@@ -1,10 +1,14 @@
 import { Box, type ScrollBoxHandle, Text } from '@hermes/ink'
-import { type ReactNode, type RefObject, useCallback, useEffect, useState, useSyncExternalStore } from 'react'
+import { useStore } from '@nanostores/react'
+import { type ReactNode, type RefObject, useCallback, useEffect, useMemo, useState, useSyncExternalStore } from 'react'
 
+import { $delegationState } from '../app/delegationStore.js'
+import { $turnState } from '../app/turnStore.js'
 import { FACES } from '../content/faces.js'
 import { VERBS } from '../content/verbs.js'
 import { fmtDuration } from '../domain/messages.js'
 import { stickyPromptFromViewport } from '../domain/viewport.js'
+import { buildSubagentTree, treeTotals, widthByDepth } from '../lib/subagentTree.js'
 import { fmtK } from '../lib/text.js'
 import type { Theme } from '../theme.js'
 import type { Msg, Usage } from '../types.js'
@@ -28,8 +32,7 @@ function FaceTicker({ color, startedAt }: { color: string; startedAt?: null | nu
 
   return (
     <Text color={color}>
-      {FACES[tick % FACES.length]} {VERBS[tick % VERBS.length]}…
-      {startedAt ? ` · ${fmtDuration(now - startedAt)}` : ''}
+      {FACES[tick % FACES.length]} {VERBS[tick % VERBS.length]}…{startedAt ? ` · ${fmtDuration(now - startedAt)}` : ''}
     </Text>
   )
 }
@@ -61,6 +64,67 @@ function ctxBar(pct: number | undefined, w = 10) {
   return '█'.repeat(filled) + '░'.repeat(w - filled)
 }
 
+function SpawnHud({ t }: { t: Theme }) {
+  // Tight HUD that only appears when the session is actually fanning out.
+  // Colour escalates to warn/error as depth or concurrency approaches the cap.
+  const delegation = useStore($delegationState)
+  const turn = useStore($turnState)
+
+  const tree = useMemo(() => buildSubagentTree(turn.subagents), [turn.subagents])
+  const totals = useMemo(() => treeTotals(tree), [tree])
+
+  if (!totals.descendantCount && !delegation.paused) {
+    return null
+  }
+
+  const maxDepth = delegation.maxSpawnDepth
+  const maxConc = delegation.maxConcurrentChildren
+  const depth = Math.max(0, totals.maxDepthFromHere)
+  const active = totals.activeCount
+
+  // `max_concurrent_children` is a per-parent cap, not a global one.
+  // `activeCount` sums every running agent across the tree and would
+  // over-warn for multi-orchestrator runs.  The widest level of the tree
+  // is a closer proxy to "most concurrent spawns that could be hitting a
+  // single parent's slot budget".
+  const widestLevel = widthByDepth(tree).reduce((a, b) => Math.max(a, b), 0)
+  const depthRatio = maxDepth ? depth / maxDepth : 0
+  const concRatio = maxConc ? widestLevel / maxConc : 0
+  const ratio = Math.max(depthRatio, concRatio)
+
+  const color = delegation.paused || ratio >= 1 ? t.color.error : ratio >= 0.66 ? t.color.warn : t.color.dim
+
+  const pieces: string[] = []
+
+  if (delegation.paused) {
+    pieces.push('⏸ paused')
+  }
+
+  if (totals.descendantCount > 0) {
+    const depthLabel = maxDepth ? `${depth}/${maxDepth}` : `${depth}`
+    pieces.push(`d${depthLabel}`)
+
+    if (active > 0) {
+      // Label pairs the widest-level count (drives concRatio above) with
+      // the total active count for context.  `W/cap` triggers the warn,
+      // `+N` is everything else currently running across the tree.
+      const extra = Math.max(0, active - widestLevel)
+      const widthLabel = maxConc ? `${widestLevel}/${maxConc}` : `${widestLevel}`
+      const suffix = extra > 0 ? `+${extra}` : ''
+      pieces.push(`⚡${widthLabel}${suffix}`)
+    }
+  }
+
+  const atCap = depthRatio >= 1 || concRatio >= 1
+
+  return (
+    <Text color={color}>
+      {atCap ? ' │ ⚠ ' : ' │ '}
+      {pieces.join(' ')}
+    </Text>
+  )
+}
+
 function SessionDuration({ startedAt }: { startedAt: number }) {
   const [now, setNow] = useState(() => Date.now())
 
@@ -92,7 +156,11 @@ export function GoodVibesHeart({ tick, t }: { tick: number; t: Theme }) {
     return () => clearTimeout(id)
   }, [t.color.amber, tick])
 
-  return <Text color={color}>{active ? '♥' : ' '}</Text>
+  if (!active) {
+    return null
+  }
+
+  return <Text color={color}>♥</Text>
 }
 
 export function StatusRule({
@@ -123,11 +191,15 @@ export function StatusRule({
   const leftWidth = Math.max(12, cols - cwdLabel.length - 3)
 
   return (
-    <Box>
+    <Box height={1}>
       <Box flexShrink={1} width={leftWidth}>
         <Text color={t.color.bronze} wrap="truncate-end">
           {'─ '}
-          {busy ? <FaceTicker color={statusColor} startedAt={turnStartedAt} /> : <Text color={statusColor}>{status}</Text>}
+          {busy ? (
+            <FaceTicker color={statusColor} startedAt={turnStartedAt} />
+          ) : (
+            <Text color={statusColor}>{status}</Text>
+          )}
           <Text color={t.color.dim}> │ {model}</Text>
           {ctxLabel ? <Text color={t.color.dim}> │ {ctxLabel}</Text> : null}
           {bar ? (
@@ -142,6 +214,7 @@ export function StatusRule({
               <SessionDuration startedAt={sessionStartedAt} />
             </Text>
           ) : null}
+          <SpawnHud t={t} />
           {voiceLabel ? <Text color={t.color.dim}> │ {voiceLabel}</Text> : null}
           {bgCount > 0 ? <Text color={t.color.dim}> │ {bgCount} bg</Text> : null}
           {showCost && typeof usage.cost_usd === 'number' ? (
diff --git a/ui-tui/src/components/appLayout.tsx b/ui-tui/src/components/appLayout.tsx
index ad854033a..cdac992d3 100644
--- a/ui-tui/src/components/appLayout.tsx
+++ b/ui-tui/src/components/appLayout.tsx
@@ -2,13 +2,15 @@ import { AlternateScreen, Box, NoSelect, ScrollBox, Text } from '@hermes/ink'
 import { useStore } from '@nanostores/react'
 import { memo } from 'react'
 
+import { useGateway } from '../app/gatewayContext.js'
 import type { AppLayoutProgressProps, AppLayoutProps } from '../app/interfaces.js'
-import { $isBlocked } from '../app/overlayStore.js'
+import { $isBlocked, $overlayState, patchOverlayState } from '../app/overlayStore.js'
 import { $uiState } from '../app/uiStore.js'
 import { PLACEHOLDER } from '../content/placeholders.js'
 import type { Theme } from '../theme.js'
 import type { DetailsMode } from '../types.js'
 
+import { AgentsOverlay } from './agentsOverlay.js'
 import { GoodVibesHeart, StatusRule, StickyPromptTracker, TranscriptScrollbar } from './appChrome.js'
 import { FloatingOverlays, PromptZone } from './appOverlays.js'
 import { Banner, Panel, SessionPanel } from './branding.js'
@@ -181,37 +183,19 @@ const ComposerPane = memo(function ComposerPane({
         <Text> </Text>
       )}
 
-      <Box flexDirection="column" position="relative">
-        {ui.statusBar && (
-          <StatusRule
-            bgCount={ui.bgTasks.size}
-            busy={ui.busy}
-            cols={composer.cols}
-            cwdLabel={status.cwdLabel}
-            model={ui.info?.model?.split('/').pop() ?? ''}
-            sessionStartedAt={status.sessionStartedAt}
-            showCost={ui.showCost}
-            status={ui.status}
-            statusColor={status.statusColor}
-            t={ui.theme}
-            turnStartedAt={status.turnStartedAt}
-            usage={ui.usage}
-            voiceLabel={status.voiceLabel}
-          />
-        )}
-
-        <FloatingOverlays
-          cols={composer.cols}
-          compIdx={composer.compIdx}
-          completions={composer.completions}
-          onModelSelect={actions.onModelSelect}
-          onPickerSelect={actions.resumeById}
-          pagerPageSize={composer.pagerPageSize}
-        />
-      </Box>
+      <StatusRulePane at="top" composer={composer} status={status} />
 
       {!isBlocked && (
-        <Box flexDirection="column" marginBottom={1}>
+        <Box flexDirection="column" marginTop={ui.statusBar === 'top' ? 0 : 1} position="relative">
+          <FloatingOverlays
+            cols={composer.cols}
+            compIdx={composer.compIdx}
+            completions={composer.completions}
+            onModelSelect={actions.onModelSelect}
+            onPickerSelect={actions.resumeById}
+            pagerPageSize={composer.pagerPageSize}
+          />
+
           {composer.inputBuf.map((line, i) => (
             <Box key={i}>
               <Box width={3}>
@@ -234,8 +218,9 @@ const ComposerPane = memo(function ComposerPane({
             </Box>
 
             <Box flexGrow={1} position="relative">
+              {/* subtract NoSelect paddingX={1} (2 cols) + pw so wrap-ansi and cursorLayout agree */}
               <TextInput
-                columns={Math.max(20, composer.cols - pw)}
+                columns={Math.max(20, composer.cols - pw - 2)}
                 onChange={composer.updateInput}
                 onPaste={composer.handleTextPaste}
                 onSubmit={composer.submit}
@@ -256,6 +241,53 @@ const ComposerPane = memo(function ComposerPane({
   )
 })
 
+const AgentsOverlayPane = memo(function AgentsOverlayPane() {
+  const { gw } = useGateway()
+  const ui = useStore($uiState)
+  const overlay = useStore($overlayState)
+
+  return (
+    <AgentsOverlay
+      gw={gw}
+      initialHistoryIndex={overlay.agentsInitialHistoryIndex}
+      onClose={() => patchOverlayState({ agents: false, agentsInitialHistoryIndex: 0 })}
+      t={ui.theme}
+    />
+  )
+})
+
+const StatusRulePane = memo(function StatusRulePane({
+  at,
+  composer,
+  status
+}: Pick<AppLayoutProps, 'composer' | 'status'> & { at: 'bottom' | 'top' }) {
+  const ui = useStore($uiState)
+
+  if (ui.statusBar !== at) {
+    return null
+  }
+
+  return (
+    <Box marginTop={at === 'top' ? 1 : 0}>
+      <StatusRule
+        bgCount={ui.bgTasks.size}
+        busy={ui.busy}
+        cols={composer.cols}
+        cwdLabel={status.cwdLabel}
+        model={ui.info?.model?.split('/').pop() ?? ''}
+        sessionStartedAt={status.sessionStartedAt}
+        showCost={ui.showCost}
+        status={ui.status}
+        statusColor={status.statusColor}
+        t={ui.theme}
+        turnStartedAt={status.turnStartedAt}
+        usage={ui.usage}
+        voiceLabel={status.voiceLabel}
+      />
+    </Box>
+  )
+})
+
 export const AppLayout = memo(function AppLayout({
   actions,
   composer,
@@ -264,22 +296,34 @@ export const AppLayout = memo(function AppLayout({
   status,
   transcript
 }: AppLayoutProps) {
+  const overlay = useStore($overlayState)
+
   return (
     <AlternateScreen mouseTracking={mouseTracking}>
       <Box flexDirection="column" flexGrow={1}>
         <Box flexDirection="row" flexGrow={1}>
-          <TranscriptPane actions={actions} composer={composer} progress={progress} transcript={transcript} />
+          {overlay.agents ? (
+            <AgentsOverlayPane />
+          ) : (
+            <TranscriptPane actions={actions} composer={composer} progress={progress} transcript={transcript} />
+          )}
         </Box>
 
-        <PromptZone
-          cols={composer.cols}
-          onApprovalChoice={actions.answerApproval}
-          onClarifyAnswer={actions.answerClarify}
-          onSecretSubmit={actions.answerSecret}
-          onSudoSubmit={actions.answerSudo}
-        />
+        {!overlay.agents && (
+          <>
+            <PromptZone
+              cols={composer.cols}
+              onApprovalChoice={actions.answerApproval}
+              onClarifyAnswer={actions.answerClarify}
+              onSecretSubmit={actions.answerSecret}
+              onSudoSubmit={actions.answerSudo}
+            />
 
-        <ComposerPane actions={actions} composer={composer} status={status} />
+            <ComposerPane actions={actions} composer={composer} status={status} />
+
+            <StatusRulePane at="bottom" composer={composer} status={status} />
+          </>
+        )}
       </Box>
     </AlternateScreen>
   )
diff --git a/ui-tui/src/components/appOverlays.tsx b/ui-tui/src/components/appOverlays.tsx
index 844996af3..331fb5873 100644
--- a/ui-tui/src/components/appOverlays.tsx
+++ b/ui-tui/src/components/appOverlays.tsx
@@ -13,6 +13,8 @@ import { ApprovalPrompt, ClarifyPrompt, ConfirmPrompt } from './prompts.js'
 import { SessionPicker } from './sessionPicker.js'
 import { SkillsHub } from './skillsHub.js'
 
+const COMPLETION_WINDOW = 16
+
 export function PromptZone({
   cols,
   onApprovalChoice,
@@ -106,7 +108,12 @@ export function FloatingOverlays({
     return null
   }
 
-  const start = Math.max(0, compIdx - 8)
+  // Fixed viewport centered on compIdx — previously the slice end was
+  // compIdx + 8 so the dropdown grew from 8 rows to 16 as the user scrolled
+  // down, bouncing the height on every keystroke.
+  const viewportSize = Math.min(COMPLETION_WINDOW, completions.length)
+
+  const start = Math.max(0, Math.min(compIdx - Math.floor(COMPLETION_WINDOW / 2), completions.length - viewportSize))
 
   return (
     <Box alignItems="flex-start" bottom="100%" flexDirection="column" left={0} position="absolute" right={0}>
@@ -157,8 +164,8 @@ export function FloatingOverlays({
             <Box marginTop={1}>
               <Text color={ui.theme.color.dim}>
                 {overlay.pager.offset + pagerPageSize < overlay.pager.lines.length
-                  ? `Enter/Space for more · q to close (${Math.min(overlay.pager.offset + pagerPageSize, overlay.pager.lines.length)}/${overlay.pager.lines.length})`
-                  : `end · q to close (${overlay.pager.lines.length} lines)`}
+                  ? `↑↓/jk line · Enter/Space/PgDn page · b/PgUp back · g/G top/bottom · q close (${Math.min(overlay.pager.offset + pagerPageSize, overlay.pager.lines.length)}/${overlay.pager.lines.length})`
+                  : `end · ↑↓/jk · b/PgUp back · g top · q close (${overlay.pager.lines.length} lines)`}
               </Text>
             </Box>
           </Box>
@@ -168,7 +175,7 @@ export function FloatingOverlays({
       {!!completions.length && (
         <FloatBox color={ui.theme.color.gold}>
           <Box flexDirection="column" width={Math.max(28, cols - 6)}>
-            {completions.slice(start, compIdx + 8).map((item, i) => {
+            {completions.slice(start, start + viewportSize).map((item, i) => {
               const active = start + i === compIdx
 
               return (
diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx
index ebb3425a7..3fd1b494a 100644
--- a/ui-tui/src/components/markdown.tsx
+++ b/ui-tui/src/components/markdown.tsx
@@ -1,100 +1,85 @@
 import { Box, Link, Text } from '@hermes/ink'
 import { memo, type ReactNode, useMemo } from 'react'
 
+import { ensureEmojiPresentation } from '../lib/emoji.js'
 import { highlightLine, isHighlightable } from '../lib/syntax.js'
 import type { Theme } from '../theme.js'
 
 const FENCE_RE = /^\s*(`{3,}|~{3,})(.*)$/
+const FENCE_CLOSE_RE = /^\s*(`{3,}|~{3,})\s*$/
 const HR_RE = /^ {0,3}([-*_])(?:\s*\1){2,}\s*$/
 const HEADING_RE = /^\s{0,3}(#{1,6})\s+(.*?)(?:\s+#+\s*)?$/
+const SETEXT_RE = /^\s{0,3}(=+|-+)\s*$/
 const FOOTNOTE_RE = /^\[\^([^\]]+)\]:\s*(.*)$/
 const DEF_RE = /^\s*:\s+(.+)$/
+const BULLET_RE = /^(\s*)[-+*]\s+(.*)$/
+const TASK_RE = /^\[( |x|X)\]\s+(.*)$/
+const NUMBERED_RE = /^(\s*)(\d+)[.)]\s+(.*)$/
+const QUOTE_RE = /^\s*(?:>\s*)+/
 const TABLE_DIVIDER_CELL_RE = /^:?-{3,}:?$/
 const MD_URL_RE = '((?:[^\\s()]|\\([^\\s()]*\\))+?)'
 
 export const MEDIA_LINE_RE = /^\s*[`"']?MEDIA:\s*(\S+?)[`"']?\s*$/
 export const AUDIO_DIRECTIVE_RE = /^\s*\[\[audio_as_voice\]\]\s*$/
 
+// Inline markdown tokens, in priority order. The outer regex picks the
+// leftmost match at each position, preferring earlier alternatives on tie —
+// so `**` must come before `*`, `__` before `_`, etc. Each pattern owns its
+// own capture groups; MdInline dispatches on which group matched.
+//
+// Subscript (`~x~`) is restricted to short alphanumeric runs so prose like
+// `thing ~! more ~?` from Kimi / Qwen / GLM (kaomoji-style decorators)
+// doesn't pair up the first `~` with the next one on the line and swallow
+// the text between them as a dim `_`-prefixed span.
 export const INLINE_RE = new RegExp(
-  `(!\\[(.*?)\\]\\(${MD_URL_RE}\\)|\\[(.+?)\\]\\(${MD_URL_RE}\\)|<((?:https?:\\/\\/|mailto:)[^>\\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})>|~~(.+?)~~|\`([^\\\`]+)\`|\\*\\*(.+?)\\*\\*|(?<!\\w)__(.+?)__(?!\\w)|\\*(.+?)\\*|(?<!\\w)_(.+?)_(?!\\w)|==(.+?)==|\\[\\^([^\\]]+)\\]|\\^([^^\\s][^^]*?)\\^|~([^~\\s][^~]*?)~|(https?:\\/\\/[^\\s<]+))`,
+  [
+    `!\\[(.*?)\\]\\(${MD_URL_RE}\\)`, // 1,2  image
+    `\\[(.+?)\\]\\(${MD_URL_RE}\\)`, // 3,4  link
+    `<((?:https?:\\/\\/|mailto:)[^>\\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})>`, // 5   autolink
+    `~~(.+?)~~`, // 6    strike
+    `\`([^\\\`]+)\``, // 7    code
+    `\\*\\*(.+?)\\*\\*`, // 8    bold *
+    `(?<!\\w)__(.+?)__(?!\\w)`, // 9    bold _
+    `\\*(.+?)\\*`, // 10   italic *
+    `(?<!\\w)_(.+?)_(?!\\w)`, // 11   italic _
+    `==(.+?)==`, // 12   highlight
+    `\\[\\^([^\\]]+)\\]`, // 13   footnote ref
+    `\\^([^^\\s][^^]*?)\\^`, // 14   superscript
+    `~([A-Za-z0-9]{1,8})~`, // 15   subscript
+    `https?:\\/\\/[^\\s<]+` //  16   bare URL
+  ].join('|'),
   'g'
 )
 
-type Fence = {
-  char: '`' | '~'
-  lang: string
-  len: number
-}
+const indentDepth = (s: string) => Math.floor(s.replace(/\t/g, '  ').length / 2)
 
-const renderLink = (key: number, t: Theme, label: string, url: string) => (
-  <Link key={key} url={url}>
-    <Text color={t.color.amber} underline>
-      {label}
-    </Text>
-  </Link>
-)
-
-const trimBareUrl = (value: string) => {
-  const trimmed = value.replace(/[),.;:!?]+$/g, '')
-
-  return {
-    tail: value.slice(trimmed.length),
-    url: trimmed
-  }
-}
-
-const renderAutolink = (key: number, t: Theme, raw: string) => {
-  const url = raw.startsWith('mailto:') ? raw : raw.includes('@') && !raw.startsWith('http') ? `mailto:${raw}` : raw
-
-  return (
-    <Link key={key} url={url}>
-      <Text color={t.color.amber} underline>
-        {raw.replace(/^mailto:/, '')}
-      </Text>
-    </Link>
-  )
-}
-
-const indentDepth = (indent: string) => Math.floor(indent.replace(/\t/g, '  ').length / 2)
-
-const parseFence = (line: string): Fence | null => {
-  const m = line.match(FENCE_RE)
-
-  if (!m) {
-    return null
-  }
-
-  return {
-    char: m[1]![0] as '`' | '~',
-    lang: m[2]!.trim().toLowerCase(),
-    len: m[1]!.length
-  }
-}
-
-const isFenceClose = (line: string, fence: Fence) => {
-  const end = line.match(/^\s*(`{3,}|~{3,})\s*$/)
-
-  return Boolean(end && end[1]![0] === fence.char && end[1]!.length >= fence.len)
-}
-
-const isMarkdownFence = (lang: string) => ['md', 'markdown'].includes(lang)
-
-const splitTableRow = (row: string) =>
+const splitRow = (row: string) =>
   row
     .trim()
     .replace(/^\|/, '')
     .replace(/\|$/, '')
     .split('|')
-    .map(cell => cell.trim())
+    .map(c => c.trim())
 
 const isTableDivider = (row: string) => {
-  const cells = splitTableRow(row)
+  const cells = splitRow(row)
 
-  return cells.length > 1 && cells.every(cell => TABLE_DIVIDER_CELL_RE.test(cell))
+  return cells.length > 1 && cells.every(c => TABLE_DIVIDER_CELL_RE.test(c))
 }
 
-export const stripInlineMarkup = (value: string) =>
-  value
+const autolinkUrl = (raw: string) =>
+  raw.startsWith('mailto:') || raw.startsWith('http') || !raw.includes('@') ? raw : `mailto:${raw}`
+
+const renderAutolink = (k: number, t: Theme, raw: string) => (
+  <Link key={k} url={autolinkUrl(raw)}>
+    <Text color={t.color.amber} underline>
+      {raw.replace(/^mailto:/, '')}
+    </Text>
+  </Link>
+)
+
+export const stripInlineMarkup = (v: string) =>
+  v
     .replace(/!\[(.*?)\]\(((?:[^\s()]|\([^\s()]*\))+?)\)/g, '[image: $1] $2')
     .replace(/\[(.+?)\]\(((?:[^\s()]|\([^\s()]*\))+?)\)/g, '$1')
     .replace(/<((?:https?:\/\/|mailto:)[^>\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})>/g, '$1')
@@ -107,27 +92,22 @@ export const stripInlineMarkup = (value: string) =>
     .replace(/==(.+?)==/g, '$1')
     .replace(/\[\^([^\]]+)\]/g, '[$1]')
     .replace(/\^([^^\s][^^]*?)\^/g, '^$1')
-    .replace(/~([^~\s][^~]*?)~/g, '_$1')
+    .replace(/~([A-Za-z0-9]{1,8})~/g, '_$1')
 
-const renderTable = (key: number, rows: string[][], t: Theme) => {
+const renderTable = (k: number, rows: string[][], t: Theme) => {
   const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => stripInlineMarkup(r[ci] ?? '').length)))
 
   return (
-    <Box flexDirection="column" key={key} paddingLeft={2}>
+    <Box flexDirection="column" key={k} paddingLeft={2}>
       {rows.map((row, ri) => (
         <Box key={ri}>
-          {widths.map((width, ci) => {
-            const cell = row[ci] ?? ''
-            const pad = ' '.repeat(Math.max(0, width - stripInlineMarkup(cell).length))
-
-            return (
-              <Text color={ri === 0 ? t.color.amber : undefined} key={ci}>
-                <MdInline t={t} text={cell} />
-                {pad}
-                {ci < widths.length - 1 ? '  ' : ''}
-              </Text>
-            )
-          })}
+          {widths.map((w, ci) => (
+            <Text color={ri === 0 ? t.color.amber : undefined} key={ci}>
+              <MdInline t={t} text={row[ci] ?? ''} />
+              {' '.repeat(Math.max(0, w - stripInlineMarkup(row[ci] ?? '').length))}
+              {ci < widths.length - 1 ? '  ' : ''}
+            </Text>
+          ))}
         </Box>
       ))}
     </Box>
@@ -141,76 +121,85 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
 
   for (const m of text.matchAll(INLINE_RE)) {
     const i = m.index ?? 0
+    const k = parts.length
 
     if (i > last) {
-      parts.push(<Text key={parts.length}>{text.slice(last, i)}</Text>)
+      parts.push(<Text key={k}>{text.slice(last, i)}</Text>)
     }
 
-    if (m[2] && m[3]) {
+    if (m[1] && m[2]) {
       parts.push(
         <Text color={t.color.dim} key={parts.length}>
-          [image: {m[2]}] {m[3]}
+          [image: {m[1]}] {m[2]}
         </Text>
       )
-    } else if (m[4] && m[5]) {
-      parts.push(renderLink(parts.length, t, m[4], m[5]))
+    } else if (m[3] && m[4]) {
+      parts.push(
+        <Link key={parts.length} url={m[4]}>
+          <Text color={t.color.amber} underline>
+            {m[3]}
+          </Text>
+        </Link>
+      )
+    } else if (m[5]) {
+      parts.push(renderAutolink(parts.length, t, m[5]))
     } else if (m[6]) {
-      parts.push(renderAutolink(parts.length, t, m[6]))
-    } else if (m[7]) {
       parts.push(
         <Text key={parts.length} strikethrough>
+          {m[6]}
+        </Text>
+      )
+    } else if (m[7]) {
+      parts.push(
+        <Text color={t.color.amber} dimColor key={parts.length}>
           {m[7]}
         </Text>
       )
-    } else if (m[8]) {
-      parts.push(
-        <Text color={t.color.amber} dimColor key={parts.length}>
-          {m[8]}
-        </Text>
-      )
-    } else if (m[9] || m[10]) {
+    } else if (m[8] ?? m[9]) {
       parts.push(
         <Text bold key={parts.length}>
-          {m[9] ?? m[10]}
+          {m[8] ?? m[9]}
         </Text>
       )
-    } else if (m[11] || m[12]) {
+    } else if (m[10] ?? m[11]) {
       parts.push(
         <Text italic key={parts.length}>
-          {m[11] ?? m[12]}
+          {m[10] ?? m[11]}
+        </Text>
+      )
+    } else if (m[12]) {
+      parts.push(
+        <Text backgroundColor={t.color.diffAdded} color={t.color.diffAddedWord} key={parts.length}>
+          {m[12]}
         </Text>
       )
     } else if (m[13]) {
       parts.push(
-        <Text backgroundColor={t.color.diffAdded} color={t.color.diffAddedWord} key={parts.length}>
-          {m[13]}
+        <Text color={t.color.dim} key={parts.length}>
+          [{m[13]}]
         </Text>
       )
     } else if (m[14]) {
       parts.push(
         <Text color={t.color.dim} key={parts.length}>
-          [{m[14]}]
+          ^{m[14]}
         </Text>
       )
     } else if (m[15]) {
       parts.push(
         <Text color={t.color.dim} key={parts.length}>
-          ^{m[15]}
+          _{m[15]}
         </Text>
       )
     } else if (m[16]) {
-      parts.push(
-        <Text color={t.color.dim} key={parts.length}>
-          _{m[16]}
-        </Text>
-      )
-    } else if (m[17]) {
-      const { tail, url } = trimBareUrl(m[17])
+      // Bare URL — trim trailing prose punctuation into a sibling text node
+      // so `see https://x.com/, which…` keeps the comma outside the link.
+      const url = m[16].replace(/[),.;:!?]+$/g, '')
 
       parts.push(renderAutolink(parts.length, t, url))
 
-      if (tail) {
-        parts.push(<Text key={parts.length}>{tail}</Text>)
+      if (url.length < m[16].length) {
+        parts.push(<Text key={parts.length}>{m[16].slice(url.length)}</Text>)
       }
     }
 
@@ -224,19 +213,13 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
   return <Text>{parts.length ? parts : <Text>{text}</Text>}</Text>
 }
 
-interface MdProps {
-  compact?: boolean
-  t: Theme
-  text: string
-}
-
 function MdImpl({ compact, t, text }: MdProps) {
   const nodes = useMemo(() => {
-    const lines = text.split('\n')
+    const lines = ensureEmojiPresentation(text).split('\n')
     const nodes: ReactNode[] = []
-    let i = 0
 
-    let prevKind: 'blank' | 'code' | 'heading' | 'list' | 'paragraph' | 'quote' | 'rule' | 'table' | null = null
+    let prevKind: Kind = null
+    let i = 0
 
     const gap = () => {
       if (nodes.length && prevKind !== 'blank') {
@@ -245,7 +228,7 @@ function MdImpl({ compact, t, text }: MdProps) {
       }
     }
 
-    const start = (kind: Exclude<typeof prevKind, null | 'blank'>) => {
+    const start = (kind: Exclude<Kind, null | 'blank'>) => {
       if (prevKind && prevKind !== 'blank' && prevKind !== kind) {
         gap()
       }
@@ -257,14 +240,11 @@ function MdImpl({ compact, t, text }: MdProps) {
       const line = lines[i]!
       const key = nodes.length
 
-      if (compact && !line.trim()) {
-        i++
-
-        continue
-      }
-
       if (!line.trim()) {
-        gap()
+        if (!compact) {
+          gap()
+        }
+
         i++
 
         continue
@@ -276,20 +256,17 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const media = line.match(MEDIA_LINE_RE)
+      const media = line.match(MEDIA_LINE_RE)?.[1]
 
       if (media) {
         start('paragraph')
-
-        const path = media[1]!
-        const url = /^(?:\/|[a-z]:[\\/])/i.test(path) ? `file://${path}` : path
-
         nodes.push(
           <Text color={t.color.dim} key={key}>
             {'▸ '}
-            <Link url={url}>
+
+            <Link url={/^(?:\/|[a-z]:[\\/])/i.test(media) ? `file://${media}` : media}>
               <Text color={t.color.amber} underline>
-                {path}
+                {media}
               </Text>
             </Link>
           </Text>
@@ -299,13 +276,21 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const fence = parseFence(line)
+      const fence = line.match(FENCE_RE)
 
       if (fence) {
+        const char = fence[1]![0] as '`' | '~'
+        const len = fence[1]!.length
+        const lang = fence[2]!.trim().toLowerCase()
         const block: string[] = []
-        const lang = fence.lang
 
-        for (i++; i < lines.length && !isFenceClose(lines[i]!, fence); i++) {
+        for (i++; i < lines.length; i++) {
+          const close = lines[i]!.match(FENCE_CLOSE_RE)?.[1]
+
+          if (close && close[0] === char && close.length >= len) {
+            break
+          }
+
           block.push(lines[i]!)
         }
 
@@ -313,7 +298,7 @@ function MdImpl({ compact, t, text }: MdProps) {
           i++
         }
 
-        if (isMarkdownFence(lang)) {
+        if (['md', 'markdown'].includes(lang)) {
           start('paragraph')
           nodes.push(<Md compact={compact} key={key} t={t} text={block.join('\n')} />)
 
@@ -328,17 +313,18 @@ function MdImpl({ compact, t, text }: MdProps) {
         nodes.push(
           <Box flexDirection="column" key={key} paddingLeft={2}>
             {lang && !isDiff && <Text color={t.color.dim}>{'─ ' + lang}</Text>}
+
             {block.map((l, j) => {
               if (highlighted) {
                 return (
                   <Text key={j}>
-                    {highlightLine(l, lang, t).map(([color, text], k) =>
+                    {highlightLine(l, lang, t).map(([color, text], kk) =>
                       color ? (
-                        <Text color={color} key={k}>
+                        <Text color={color} key={kk}>
                           {text}
                         </Text>
                       ) : (
-                        <Text key={k}>{text}</Text>
+                        <Text key={kk}>{text}</Text>
                       )
                     )}
                   </Text>
@@ -384,6 +370,7 @@ function MdImpl({ compact, t, text }: MdProps) {
         nodes.push(
           <Box flexDirection="column" key={key} paddingLeft={2}>
             <Text color={t.color.dim}>─ math</Text>
+
             {block.map((l, j) => (
               <Text color={t.color.amber} key={j}>
                 {l}
@@ -395,13 +382,13 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const heading = line.match(HEADING_RE)
+      const heading = line.match(HEADING_RE)?.[2]
 
       if (heading) {
         start('heading')
         nodes.push(
           <Text bold color={t.color.amber} key={key}>
-            {heading[2]}
+            {heading}
           </Text>
         )
         i++
@@ -409,20 +396,16 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      if (i + 1 < lines.length && line.trim()) {
-        const setext = lines[i + 1]!.match(/^\s{0,3}(=+|-+)\s*$/)
+      if (i + 1 < lines.length && SETEXT_RE.test(lines[i + 1]!)) {
+        start('heading')
+        nodes.push(
+          <Text bold color={t.color.amber} key={key}>
+            {line.trim()}
+          </Text>
+        )
+        i += 2
 
-        if (setext) {
-          start('heading')
-          nodes.push(
-            <Text bold color={t.color.amber} key={key}>
-              {line.trim()}
-            </Text>
-          )
-          i += 2
-
-          continue
-        }
+        continue
       }
 
       if (HR_RE.test(line)) {
@@ -472,7 +455,7 @@ function MdImpl({ compact, t, text }: MdProps) {
         i++
 
         while (i < lines.length) {
-          const def = lines[i]!.match(DEF_RE)
+          const def = lines[i]!.match(DEF_RE)?.[1]
 
           if (!def) {
             break
@@ -481,7 +464,7 @@ function MdImpl({ compact, t, text }: MdProps) {
           nodes.push(
             <Text key={`${key}-def-${i}`}>
               <Text color={t.color.dim}> · </Text>
-              <MdInline t={t} text={def[1]!} />
+              <MdInline t={t} text={def} />
             </Text>
           )
           i++
@@ -490,22 +473,22 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const bullet = line.match(/^(\s*)[-+*]\s+(.*)$/)
+      const bullet = line.match(BULLET_RE)
 
       if (bullet) {
         start('list')
-        const depth = indentDepth(bullet[1]!)
-        const task = bullet[2]!.match(/^\[( |x|X)\]\s+(.*)$/)
+
+        const task = bullet[2]!.match(TASK_RE)
         const marker = task ? (task[1]!.toLowerCase() === 'x' ? '☑' : '☐') : '•'
-        const body = task ? task[2]! : bullet[2]!
 
         nodes.push(
           <Text key={key}>
             <Text color={t.color.dim}>
-              {' '.repeat(depth * 2)}
+              {' '.repeat(indentDepth(bullet[1]!) * 2)}
               {marker}{' '}
             </Text>
-            <MdInline t={t} text={body} />
+
+            <MdInline t={t} text={task ? task[2]! : bullet[2]!} />
           </Text>
         )
         i++
@@ -513,18 +496,17 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const numbered = line.match(/^(\s*)(\d+)[.)]\s+(.*)$/)
+      const numbered = line.match(NUMBERED_RE)
 
       if (numbered) {
         start('list')
-        const depth = indentDepth(numbered[1]!)
-
         nodes.push(
           <Text key={key}>
             <Text color={t.color.dim}>
-              {' '.repeat(depth * 2)}
+              {' '.repeat(indentDepth(numbered[1]!) * 2)}
               {numbered[2]}.{' '}
             </Text>
+
             <MdInline t={t} text={numbered[3]!} />
           </Text>
         )
@@ -533,18 +515,15 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      if (/^\s*(?:>\s*)+/.test(line)) {
+      if (QUOTE_RE.test(line)) {
         start('quote')
+
         const quoteLines: Array<{ depth: number; text: string }> = []
 
-        while (i < lines.length && /^\s*(?:>\s*)+/.test(lines[i]!)) {
-          const raw = lines[i]!
-          const prefix = raw.match(/^\s*(?:>\s*)+/)?.[0] ?? ''
+        while (i < lines.length && QUOTE_RE.test(lines[i]!)) {
+          const prefix = lines[i]!.match(QUOTE_RE)?.[0] ?? ''
 
-          quoteLines.push({
-            depth: (prefix.match(/>/g) ?? []).length,
-            text: raw.slice(prefix.length)
-          })
+          quoteLines.push({ depth: (prefix.match(/>/g) ?? []).length, text: lines[i]!.slice(prefix.length) })
           i++
         }
 
@@ -565,34 +544,31 @@ function MdImpl({ compact, t, text }: MdProps) {
 
       if (line.includes('|') && i + 1 < lines.length && isTableDivider(lines[i + 1]!)) {
         start('table')
-        const tableRows: string[][] = []
 
-        tableRows.push(splitTableRow(line))
-        i += 2
+        const rows: string[][] = [splitRow(line)]
 
-        while (i < lines.length && lines[i]!.includes('|') && lines[i]!.trim()) {
-          tableRows.push(splitTableRow(lines[i]!))
-          i++
+        for (i += 2; i < lines.length && lines[i]!.includes('|') && lines[i]!.trim(); i++) {
+          rows.push(splitRow(lines[i]!))
         }
 
-        nodes.push(renderTable(key, tableRows, t))
+        nodes.push(renderTable(key, rows, t))
 
         continue
       }
 
-      if (/^<details\b/i.test(line) || /^<\/details>/i.test(line)) {
+      if (/^<\/?details\b/i.test(line)) {
         i++
 
         continue
       }
 
-      const summary = line.match(/^<summary>(.*?)<\/summary>$/i)
+      const summary = line.match(/^<summary>(.*?)<\/summary>$/i)?.[1]
 
       if (summary) {
         start('paragraph')
         nodes.push(
           <Text color={t.color.dim} key={key}>
-            ▶ {summary[1]}
+            ▶ {summary}
           </Text>
         )
         i++
@@ -614,20 +590,21 @@ function MdImpl({ compact, t, text }: MdProps) {
 
       if (line.includes('|') && line.trim().startsWith('|')) {
         start('table')
-        const tableRows: string[][] = []
+
+        const rows: string[][] = []
 
         while (i < lines.length && lines[i]!.trim().startsWith('|')) {
           const row = lines[i]!.trim()
 
           if (!/^[|\s:-]+$/.test(row)) {
-            tableRows.push(splitTableRow(row))
+            rows.push(splitRow(row))
           }
 
           i++
         }
 
-        if (tableRows.length) {
-          nodes.push(renderTable(key, tableRows, t))
+        if (rows.length) {
+          nodes.push(renderTable(key, rows, t))
         }
 
         continue
@@ -635,7 +612,6 @@ function MdImpl({ compact, t, text }: MdProps) {
 
       start('paragraph')
       nodes.push(<MdInline key={key} t={t} text={line} />)
-
       i++
     }
 
@@ -646,3 +622,11 @@ function MdImpl({ compact, t, text }: MdProps) {
 }
 
 export const Md = memo(MdImpl)
+
+type Kind = 'blank' | 'code' | 'heading' | 'list' | 'paragraph' | 'quote' | 'rule' | 'table' | null
+
+interface MdProps {
+  compact?: boolean
+  t: Theme
+  text: string
+}
diff --git a/ui-tui/src/components/modelPicker.tsx b/ui-tui/src/components/modelPicker.tsx
index 5ee19e407..7927f3b73 100644
--- a/ui-tui/src/components/modelPicker.tsx
+++ b/ui-tui/src/components/modelPicker.tsx
@@ -1,4 +1,4 @@
-import { Box, Text, useInput } from '@hermes/ink'
+import { Box, Text, useInput, useStdout } from '@hermes/ink'
 import { useEffect, useMemo, useState } from 'react'
 
 import { providerDisplayNames } from '../domain/providers.js'
@@ -8,6 +8,8 @@ import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
 import type { Theme } from '../theme.js'
 
 const VISIBLE = 12
+const MIN_WIDTH = 40
+const MAX_WIDTH = 90
 
 const pageOffset = (count: number, sel: number) => Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), count - VISIBLE))
 
@@ -27,6 +29,13 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
   const [modelIdx, setModelIdx] = useState(0)
   const [stage, setStage] = useState<'model' | 'provider'>('provider')
 
+  const { stdout } = useStdout()
+  // Pin the picker to a stable width so the FloatBox parent (which shrinks-
+  // to-fit with alignSelf="flex-start") doesn't resize as long provider /
+  // model names scroll into view, and so `wrap="truncate-end"` on each row
+  // has an actual constraint to truncate against.
+  const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
+
   useEffect(() => {
     gw.request<ModelOptionsResponse>('model.options', sessionId ? { session_id: sessionId } : {})
       .then(raw => {
@@ -168,32 +177,53 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
     const { items, off } = visibleItems(rows, providerIdx)
 
     return (
-      <Box flexDirection="column">
-        <Text bold color={t.color.amber}>
+      <Box flexDirection="column" width={width}>
+        <Text bold color={t.color.amber} wrap="truncate-end">
           Select Provider
         </Text>
 
-        <Text color={t.color.dim}>Current model: {currentModel || '(unknown)'}</Text>
-        {provider?.warning ? <Text color={t.color.label}>warning: {provider.warning}</Text> : null}
-        {off > 0 && <Text color={t.color.dim}> ↑ {off} more</Text>}
+        <Text color={t.color.dim} wrap="truncate-end">
+          Current model: {currentModel || '(unknown)'}
+        </Text>
+        <Text color={t.color.label} wrap="truncate-end">
+          {provider?.warning ? `warning: ${provider.warning}` : ' '}
+        </Text>
+        <Text color={t.color.dim} wrap="truncate-end">
+          {off > 0 ? ` ↑ ${off} more` : ' '}
+        </Text>
 
-        {items.map((row, i) => {
+        {Array.from({ length: VISIBLE }, (_, i) => {
+          const row = items[i]
           const idx = off + i
 
-          return (
+          return row ? (
             <Text
-              color={providerIdx === idx ? t.color.cornsilk : t.color.dim}
+              bold={providerIdx === idx}
+              color={providerIdx === idx ? t.color.amber : t.color.dim}
+              inverse={providerIdx === idx}
               key={providers[idx]?.slug ?? `row-${idx}`}
+              wrap="truncate-end"
             >
               {providerIdx === idx ? '▸ ' : '  '}
               {i + 1}. {row}
             </Text>
+          ) : (
+            <Text color={t.color.dim} key={`pad-${i}`} wrap="truncate-end">
+              {' '}
+            </Text>
           )
         })}
 
-        {off + VISIBLE < rows.length && <Text color={t.color.dim}> ↓ {rows.length - off - VISIBLE} more</Text>}
-        <Text color={t.color.dim}>persist: {persistGlobal ? 'global' : 'session'} · g toggle</Text>
-        <Text color={t.color.dim}>↑/↓ select · Enter choose · 1-9,0 quick · Esc cancel</Text>
+        <Text color={t.color.dim} wrap="truncate-end">
+          {off + VISIBLE < rows.length ? ` ↓ ${rows.length - off - VISIBLE} more` : ' '}
+        </Text>
+
+        <Text color={t.color.dim} wrap="truncate-end">
+          persist: {persistGlobal ? 'global' : 'session'} · g toggle
+        </Text>
+        <Text color={t.color.dim} wrap="truncate-end">
+          ↑/↓ select · Enter choose · 1-9,0 quick · Esc cancel
+        </Text>
       </Box>
     )
   }
@@ -201,23 +231,44 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
   const { items, off } = visibleItems(models, modelIdx)
 
   return (
-    <Box flexDirection="column">
-      <Text bold color={t.color.amber}>
+    <Box flexDirection="column" width={width}>
+      <Text bold color={t.color.amber} wrap="truncate-end">
         Select Model
       </Text>
 
-      <Text color={t.color.dim}>{names[providerIdx] || '(unknown provider)'}</Text>
-      {!models.length ? <Text color={t.color.dim}>no models listed for this provider</Text> : null}
-      {provider?.warning ? <Text color={t.color.label}>warning: {provider.warning}</Text> : null}
-      {off > 0 && <Text color={t.color.dim}> ↑ {off} more</Text>}
+      <Text color={t.color.dim} wrap="truncate-end">
+        {names[providerIdx] || '(unknown provider)'}
+      </Text>
+      <Text color={t.color.label} wrap="truncate-end">
+        {provider?.warning ? `warning: ${provider.warning}` : ' '}
+      </Text>
+      <Text color={t.color.dim} wrap="truncate-end">
+        {off > 0 ? ` ↑ ${off} more` : ' '}
+      </Text>
 
-      {items.map((row, i) => {
+      {Array.from({ length: VISIBLE }, (_, i) => {
+        const row = items[i]
         const idx = off + i
 
+        if (!row) {
+          return !models.length && i === 0 ? (
+            <Text color={t.color.dim} key="empty" wrap="truncate-end">
+              no models listed for this provider
+            </Text>
+          ) : (
+            <Text color={t.color.dim} key={`pad-${i}`} wrap="truncate-end">
+              {' '}
+            </Text>
+          )
+        }
+
         return (
           <Text
-            color={modelIdx === idx ? t.color.cornsilk : t.color.dim}
+            bold={modelIdx === idx}
+            color={modelIdx === idx ? t.color.amber : t.color.dim}
+            inverse={modelIdx === idx}
             key={`${provider?.slug ?? 'prov'}:${idx}:${row}`}
+            wrap="truncate-end"
           >
             {modelIdx === idx ? '▸ ' : '  '}
             {i + 1}. {row}
@@ -225,9 +276,14 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
         )
       })}
 
-      {off + VISIBLE < models.length && <Text color={t.color.dim}> ↓ {models.length - off - VISIBLE} more</Text>}
-      <Text color={t.color.dim}>persist: {persistGlobal ? 'global' : 'session'} · g toggle</Text>
-      <Text color={t.color.dim}>
+      <Text color={t.color.dim} wrap="truncate-end">
+        {off + VISIBLE < models.length ? ` ↓ ${models.length - off - VISIBLE} more` : ' '}
+      </Text>
+
+      <Text color={t.color.dim} wrap="truncate-end">
+        persist: {persistGlobal ? 'global' : 'session'} · g toggle
+      </Text>
+      <Text color={t.color.dim} wrap="truncate-end">
         {models.length ? '↑/↓ select · Enter switch · 1-9,0 quick · Esc back' : 'Enter/Esc back'}
       </Text>
     </Box>
diff --git a/ui-tui/src/components/prompts.tsx b/ui-tui/src/components/prompts.tsx
index 967634d41..1be68da17 100644
--- a/ui-tui/src/components/prompts.tsx
+++ b/ui-tui/src/components/prompts.tsx
@@ -1,11 +1,11 @@
 import { Box, Text, useInput } from '@hermes/ink'
 import { useState } from 'react'
 
+import { isMac } from '../lib/platform.js'
 import type { Theme } from '../theme.js'
 import type { ApprovalReq, ClarifyReq, ConfirmReq } from '../types.js'
 
 import { TextInput } from './textInput.js'
-import { isMac } from '../lib/platform.js'
 
 const OPTS = ['once', 'session', 'always', 'deny'] as const
 const LABELS = { always: 'Always allow', deny: 'Deny', once: 'Allow once', session: 'Allow this session' } as const
@@ -64,8 +64,8 @@ export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) {
 
       {OPTS.map((o, i) => (
         <Text key={o}>
-          <Text color={sel === i ? t.color.warn : t.color.dim}>{sel === i ? '▸ ' : '  '}</Text>
-          <Text color={sel === i ? t.color.cornsilk : t.color.dim}>
+          <Text bold={sel === i} color={sel === i ? t.color.warn : t.color.dim} inverse={sel === i}>
+            {sel === i ? '▸ ' : '  '}
             {i + 1}. {LABELS[o]}
           </Text>
         </Text>
@@ -130,7 +130,8 @@ export function ClarifyPrompt({ cols = 80, onAnswer, onCancel, req, t }: Clarify
         </Box>
 
         <Text color={t.color.dim}>
-          Enter send · Esc {choices.length ? 'back' : 'cancel'} · {isMac ? 'Cmd+C copy · Cmd+V paste · Ctrl+C cancel' : 'Ctrl+C cancel'}
+          Enter send · Esc {choices.length ? 'back' : 'cancel'} ·{' '}
+          {isMac ? 'Cmd+C copy · Cmd+V paste · Ctrl+C cancel' : 'Ctrl+C cancel'}
         </Text>
       </Box>
     )
@@ -142,8 +143,8 @@ export function ClarifyPrompt({ cols = 80, onAnswer, onCancel, req, t }: Clarify
 
       {[...choices, 'Other (type your answer)'].map((c, i) => (
         <Text key={i}>
-          <Text color={sel === i ? t.color.label : t.color.dim}>{sel === i ? '▸ ' : '  '}</Text>
-          <Text color={sel === i ? t.color.cornsilk : t.color.dim}>
+          <Text bold={sel === i} color={sel === i ? t.color.label : t.color.dim} inverse={sel === i}>
+            {sel === i ? '▸ ' : '  '}
             {i + 1}. {c}
           </Text>
         </Text>
diff --git a/ui-tui/src/components/sessionPicker.tsx b/ui-tui/src/components/sessionPicker.tsx
index 905fa707e..c84078239 100644
--- a/ui-tui/src/components/sessionPicker.tsx
+++ b/ui-tui/src/components/sessionPicker.tsx
@@ -1,4 +1,4 @@
-import { Box, Text, useInput } from '@hermes/ink'
+import { Box, Text, useInput, useStdout } from '@hermes/ink'
 import { useEffect, useState } from 'react'
 
 import type { GatewayClient } from '../gatewayClient.js'
@@ -7,6 +7,8 @@ import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
 import type { Theme } from '../theme.js'
 
 const VISIBLE = 15
+const MIN_WIDTH = 60
+const MAX_WIDTH = 120
 
 const age = (ts: number) => {
   const d = (Date.now() / 1000 - ts) / 86400
@@ -28,6 +30,9 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
   const [sel, setSel] = useState(0)
   const [loading, setLoading] = useState(true)
 
+  const { stdout } = useStdout()
+  const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
+
   useEffect(() => {
     gw.request<SessionListResponse>('session.list', { limit: 20 })
       .then(raw => {
@@ -99,7 +104,7 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
   const off = Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), items.length - VISIBLE))
 
   return (
-    <Box flexDirection="column">
+    <Box flexDirection="column" width={width}>
       <Text bold color={t.color.amber}>
         Resume Session
       </Text>
@@ -108,24 +113,29 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
 
       {items.slice(off, off + VISIBLE).map((s, vi) => {
         const i = off + vi
+        const selected = sel === i
 
         return (
           <Box key={s.id}>
-            <Text color={sel === i ? t.color.label : t.color.dim}>{sel === i ? '▸ ' : '  '}</Text>
+            <Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected}>
+              {selected ? '▸ ' : '  '}
+            </Text>
 
             <Box width={30}>
-              <Text color={sel === i ? t.color.cornsilk : t.color.dim}>
+              <Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected}>
                 {String(i + 1).padStart(2)}. [{s.id}]
               </Text>
             </Box>
 
             <Box width={30}>
-              <Text color={t.color.dim}>
+              <Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected}>
                 ({s.message_count} msgs, {age(s.started_at)}, {s.source || 'tui'})
               </Text>
             </Box>
 
-            <Text color={sel === i ? t.color.cornsilk : t.color.dim}>{s.title || s.preview || '(untitled)'}</Text>
+            <Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected} wrap="truncate-end">
+              {s.title || s.preview || '(untitled)'}
+            </Text>
           </Box>
         )
       })}
diff --git a/ui-tui/src/components/skillsHub.tsx b/ui-tui/src/components/skillsHub.tsx
index 877bb0ef3..1bff92c0c 100644
--- a/ui-tui/src/components/skillsHub.tsx
+++ b/ui-tui/src/components/skillsHub.tsx
@@ -1,4 +1,4 @@
-import { Box, Text, useInput } from '@hermes/ink'
+import { Box, Text, useInput, useStdout } from '@hermes/ink'
 import { useEffect, useState } from 'react'
 
 import type { GatewayClient } from '../gatewayClient.js'
@@ -6,6 +6,8 @@ import { rpcErrorMessage } from '../lib/rpc.js'
 import type { Theme } from '../theme.js'
 
 const VISIBLE = 12
+const MIN_WIDTH = 40
+const MAX_WIDTH = 90
 
 const pageOffset = (count: number, sel: number) => Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), count - VISIBLE))
 
@@ -26,6 +28,9 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
   const [err, setErr] = useState('')
   const [loading, setLoading] = useState(true)
 
+  const { stdout } = useStdout()
+  const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
+
   useEffect(() => {
     gw.request<{ skills?: Record<string, string[]> }>('skills.manage', { action: 'list' })
       .then(r => {
@@ -186,7 +191,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
 
   if (err && stage === 'category') {
     return (
-      <Box flexDirection="column">
+      <Box flexDirection="column" width={width}>
         <Text color={t.color.label}>error: {err}</Text>
         <Text color={t.color.dim}>Esc to cancel</Text>
       </Box>
@@ -195,7 +200,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
 
   if (!cats.length) {
     return (
-      <Box flexDirection="column">
+      <Box flexDirection="column" width={width}>
         <Text color={t.color.dim}>no skills available</Text>
         <Text color={t.color.dim}>Esc to cancel</Text>
       </Box>
@@ -207,7 +212,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
     const { items, off } = visibleItems(rows, catIdx)
 
     return (
-      <Box flexDirection="column">
+      <Box flexDirection="column" width={width}>
         <Text bold color={t.color.amber}>
           Skills Hub
         </Text>
@@ -219,7 +224,13 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
           const idx = off + i
 
           return (
-            <Text color={catIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+            <Text
+              bold={catIdx === idx}
+              color={catIdx === idx ? t.color.amber : t.color.dim}
+              inverse={catIdx === idx}
+              key={row}
+              wrap="truncate-end"
+            >
               {catIdx === idx ? '▸ ' : '  '}
               {i + 1}. {row}
             </Text>
@@ -236,7 +247,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
     const { items, off } = visibleItems(skills, skillIdx)
 
     return (
-      <Box flexDirection="column">
+      <Box flexDirection="column" width={width}>
         <Text bold color={t.color.amber}>
           {selectedCat}
         </Text>
@@ -249,7 +260,13 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
           const idx = off + i
 
           return (
-            <Text color={skillIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+            <Text
+              bold={skillIdx === idx}
+              color={skillIdx === idx ? t.color.amber : t.color.dim}
+              inverse={skillIdx === idx}
+              key={row}
+              wrap="truncate-end"
+            >
               {skillIdx === idx ? '▸ ' : '  '}
               {i + 1}. {row}
             </Text>
@@ -265,7 +282,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
   }
 
   return (
-    <Box flexDirection="column">
+    <Box flexDirection="column" width={width}>
       <Text bold color={t.color.amber}>
         {info?.name ?? skillName}
       </Text>
diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index 34ae5b798..e91143c00 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -4,7 +4,7 @@ import { useEffect, useMemo, useRef, useState } from 'react'
 
 import { setInputSelection } from '../app/inputSelectionStore.js'
 import { readClipboardText, writeClipboardText } from '../lib/clipboard.js'
-import { isActionMod, isMac } from '../lib/platform.js'
+import { isActionMod, isMac, isMacActionFallback } from '../lib/platform.js'
 
 type InkExt = typeof Ink & {
   stringWidth: (s: string) => number
@@ -134,9 +134,44 @@ function wordRight(s: string, p: number) {
   return i
 }
 
-function cursorLayout(value: string, cursor: number, cols: number) {
+/**
+ * Move cursor one logical line up or down inside `s` while preserving the
+ * column offset from the current line's start. Returns `null` when the cursor
+ * is already on the first line (up) or last line (down) — callers use that
+ * signal to fall through to history cycling instead of eating the arrow key.
+ */
+export function lineNav(s: string, p: number, dir: -1 | 1): null | number {
+  const pos = snapPos(s, p)
+  const curStart = s.lastIndexOf('\n', pos - 1) + 1
+  const col = pos - curStart
+
+  if (dir < 0) {
+    if (curStart === 0) {
+      return null
+    }
+
+    const prevStart = s.lastIndexOf('\n', curStart - 2) + 1
+
+    return snapPos(s, Math.min(prevStart + col, curStart - 1))
+  }
+
+  const nextBreak = s.indexOf('\n', pos)
+
+  if (nextBreak < 0) {
+    return null
+  }
+
+  const nextEnd = s.indexOf('\n', nextBreak + 1)
+  const lineEnd = nextEnd < 0 ? s.length : nextEnd
+
+  return snapPos(s, Math.min(nextBreak + 1 + col, lineEnd))
+}
+
+// mirrors wrap-ansi(..., { wordWrap: false, hard: true }) so the declared
+// cursor lines up with what <Text wrap="wrap-char"> actually renders
+export function cursorLayout(value: string, cursor: number, cols: number) {
   const pos = Math.max(0, Math.min(cursor, value.length))
-  const w = Math.max(1, cols - 1)
+  const w = Math.max(1, cols)
 
   let col = 0,
     line = 0
@@ -167,17 +202,23 @@ function cursorLayout(value: string, cursor: number, cols: number) {
     col += sw
   }
 
+  // trailing cursor-cell overflows to the next row at the wrap column
+  if (col >= w) {
+    line++
+    col = 0
+  }
+
   return { column: col, line }
 }
 
-function offsetFromPosition(value: string, row: number, col: number, cols: number) {
+export function offsetFromPosition(value: string, row: number, col: number, cols: number) {
   if (!value.length) {
     return 0
   }
 
   const targetRow = Math.max(0, Math.floor(row))
   const targetCol = Math.max(0, Math.floor(col))
-  const w = Math.max(1, cols - 1)
+  const w = Math.max(1, cols)
 
   let line = 0
   let column = 0
@@ -275,6 +316,12 @@ function useFwdDelete(active: boolean) {
   return ref
 }
 
+type PasteResult = { cursor: number; value: string } | null
+
+const isPasteResultPromise = (
+  value: PasteResult | Promise<PasteResult> | null | undefined
+): value is Promise<PasteResult> => !!value && typeof (value as PromiseLike<PasteResult>).then === 'function'
+
 export function TextInput({
   columns = 80,
   value,
@@ -298,6 +345,7 @@ export function TextInput({
   const pasteEnd = useRef<null | number>(null)
   const pasteTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
   const pastePos = useRef(0)
+  const editVersionRef = useRef(0)
   const undo = useRef<{ cursor: number; value: string }[]>([])
   const redo = useRef<{ cursor: number; value: string }[]>([])
 
@@ -360,22 +408,20 @@ export function TextInput({
       return
     }
 
-    if (selected) {
-      setInputSelection({
-        clear: () => {
+    setInputSelection({
+      clear: () => {
+        if (selRef.current) {
           selRef.current = null
           setSel(null)
-        },
-        end: selected.end,
-        start: selected.start,
-        value: vRef.current
-      })
-    } else {
-      setInputSelection(null)
-    }
+        }
+      },
+      end: selected?.end ?? curRef.current,
+      start: selected?.start ?? curRef.current,
+      value: vRef.current
+    })
 
     return () => setInputSelection(null)
-  }, [focus, selected])
+  }, [cur, focus, selected])
 
   useEffect(
     () => () => {
@@ -389,6 +435,7 @@ export function TextInput({
   const commit = (next: string, nextCur: number, track = true) => {
     const prev = vRef.current
     const c = snapPos(next, nextCur)
+    editVersionRef.current += 1
 
     if (selRef.current) {
       selRef.current = null
@@ -427,8 +474,29 @@ export function TextInput({
   }
 
   const emitPaste = (e: PasteEvent) => {
+    const startVersion = editVersionRef.current
     const h = cbPaste.current?.(e)
 
+    if (isPasteResultPromise(h)) {
+      const fallbackText = e.text
+
+      void h
+        .then(result => {
+          if (result && editVersionRef.current === startVersion) {
+            commit(result.value, result.cursor)
+          } else if (result && fallbackText && PRINTABLE.test(fallbackText)) {
+            // User typed while async paste was in-flight — fall back to raw text insert
+            // so the pasted content is not silently lost.
+            const cur = curRef.current
+            const v = vRef.current
+            commit(v.slice(0, cur) + fallbackText + v.slice(cur), cur + fallbackText.length)
+          }
+        })
+        .catch(() => {})
+
+      return true
+    }
+
     if (h) {
       commit(h.value, h.cursor)
     }
@@ -494,9 +562,11 @@ export function TextInput({
     }
 
     const range = selRange()
+
     const nextValue = range
       ? vRef.current.slice(0, range.start) + cleaned + vRef.current.slice(range.end)
       : vRef.current.slice(0, curRef.current) + cleaned + vRef.current.slice(curRef.current)
+
     const nextCursor = range ? range.start + cleaned.length : curRef.current + cleaned.length
 
     commit(nextValue, nextCursor)
@@ -506,7 +576,12 @@ export function TextInput({
     (inp: string, k: Key, event: InputEvent) => {
       const eventRaw = event.keypress.raw
 
-      if (eventRaw === '\x1bv' || eventRaw === '\x1bV' || eventRaw === '\x16' || (isMac && k.meta && inp.toLowerCase() === 'v')) {
+      if (
+        eventRaw === '\x1bv' ||
+        eventRaw === '\x1bV' ||
+        eventRaw === '\x16' ||
+        (isMac && isActionMod(k) && inp.toLowerCase() === 'v')
+      ) {
         if (cbPaste.current) {
           return void emitPaste({ cursor: curRef.current, hotkey: true, text: '', value: vRef.current })
         }
@@ -522,7 +597,7 @@ export function TextInput({
         return
       }
 
-      if (isMac && k.meta && inp.toLowerCase() === 'c') {
+      if (isMac && isActionMod(k) && inp.toLowerCase() === 'c') {
         const range = selRange()
 
         if (range) {
@@ -534,21 +609,26 @@ export function TextInput({
         return
       }
 
-      if (
-        k.upArrow ||
-        k.downArrow ||
-        (k.ctrl && inp === 'c') ||
-        k.tab ||
-        (k.shift && k.tab) ||
-        k.pageUp ||
-        k.pageDown ||
-        k.escape
-      ) {
+      if (k.upArrow || k.downArrow) {
+        const next = lineNav(vRef.current, curRef.current, k.upArrow ? -1 : 1)
+
+        if (next !== null) {
+          clearSel()
+          setCur(next)
+          curRef.current = next
+
+          return
+        }
+
+        return
+      }
+
+      if ((k.ctrl && inp === 'c') || k.tab || (k.shift && k.tab) || k.pageUp || k.pageDown || k.escape) {
         return
       }
 
       if (k.return) {
-        k.shift || k.meta
+        k.shift || (isMac ? isActionMod(k) : k.meta)
           ? commit(ins(vRef.current, curRef.current, '\n'), curRef.current + 1)
           : cbSubmit.current?.(vRef.current)
 
@@ -558,6 +638,12 @@ export function TextInput({
       let c = curRef.current
       let v = vRef.current
       const mod = isActionMod(k)
+      const wordMod = mod || k.meta
+      const actionHome = k.home || (!isMac && mod && inp === 'a') || isMacActionFallback(k, inp, 'a')
+      const actionEnd = k.end || (mod && inp === 'e') || isMacActionFallback(k, inp, 'e')
+      const actionDeleteToStart = (mod && inp === 'u') || isMacActionFallback(k, inp, 'u')
+      const actionKillToEnd = (mod && inp === 'k') || isMacActionFallback(k, inp, 'k')
+      const actionDeleteWord = (mod && inp === 'w') || isMacActionFallback(k, inp, 'w')
       const range = selRange()
       const delFwd = k.delete || fwdDel.current
 
@@ -569,43 +655,43 @@ export function TextInput({
         return swap(redo, undo)
       }
 
-      if (mod && inp === 'a') {
+      if (isMac && mod && inp === 'a') {
         return selectAll()
       }
 
-      if (k.home) {
+      if (actionHome) {
         clearSel()
         c = 0
-      } else if (k.end || (mod && inp === 'e')) {
+      } else if (actionEnd) {
         clearSel()
         c = v.length
       } else if (k.leftArrow) {
-        if (range && !mod) {
+        if (range && !wordMod) {
           clearSel()
           c = range.start
         } else {
           clearSel()
-          c = mod ? wordLeft(v, c) : prevPos(v, c)
+          c = wordMod ? wordLeft(v, c) : prevPos(v, c)
         }
       } else if (k.rightArrow) {
-        if (range && !mod) {
+        if (range && !wordMod) {
           clearSel()
           c = range.end
         } else {
           clearSel()
-          c = mod ? wordRight(v, c) : nextPos(v, c)
+          c = wordMod ? wordRight(v, c) : nextPos(v, c)
         }
-      } else if (mod && inp === 'b') {
+      } else if (wordMod && inp === 'b') {
         clearSel()
         c = wordLeft(v, c)
-      } else if (mod && inp === 'f') {
+      } else if (wordMod && inp === 'f') {
         clearSel()
         c = wordRight(v, c)
       } else if (range && (k.backspace || delFwd)) {
         v = v.slice(0, range.start) + v.slice(range.end)
         c = range.start
       } else if (k.backspace && c > 0) {
-        if (mod) {
+        if (wordMod) {
           const t = wordLeft(v, c)
           v = v.slice(0, t) + v.slice(c)
           c = t
@@ -615,13 +701,13 @@ export function TextInput({
           c = t
         }
       } else if (delFwd && c < v.length) {
-        if (mod) {
+        if (wordMod) {
           const t = wordRight(v, c)
           v = v.slice(0, c) + v.slice(t)
         } else {
           v = v.slice(0, c) + v.slice(nextPos(v, c))
         }
-      } else if (mod && inp === 'w') {
+      } else if (actionDeleteWord) {
         if (range) {
           v = v.slice(0, range.start) + v.slice(range.end)
           c = range.start
@@ -633,7 +719,7 @@ export function TextInput({
         } else {
           return
         }
-      } else if (mod && inp === 'u') {
+      } else if (actionDeleteToStart) {
         if (range) {
           v = v.slice(0, range.start) + v.slice(range.end)
           c = range.start
@@ -641,7 +727,7 @@ export function TextInput({
           v = v.slice(c)
           c = 0
         }
-      } else if (mod && inp === 'k') {
+      } else if (actionKillToEnd) {
         if (range) {
           v = v.slice(0, range.start) + v.slice(range.end)
           c = range.start
@@ -724,7 +810,7 @@ export function TextInput({
       }}
       ref={boxRef}
     >
-      <Text wrap="wrap">{rendered}</Text>
+      <Text wrap="wrap-char">{rendered}</Text>
     </Box>
   )
 }
@@ -742,7 +828,9 @@ interface TextInputProps {
   focus?: boolean
   mask?: string
   onChange: (v: string) => void
-  onPaste?: (e: PasteEvent) => { cursor: number; value: string } | null
+  onPaste?: (
+    e: PasteEvent
+  ) => { cursor: number; value: string } | Promise<{ cursor: number; value: string } | null> | null
   onSubmit?: (v: string) => void
   placeholder?: string
   value: string
diff --git a/ui-tui/src/components/thinking.tsx b/ui-tui/src/components/thinking.tsx
index 2c741caad..a59cdc41d 100644
--- a/ui-tui/src/components/thinking.tsx
+++ b/ui-tui/src/components/thinking.tsx
@@ -3,6 +3,17 @@ import { memo, type ReactNode, useEffect, useMemo, useState } from 'react'
 import spinners, { type BrailleSpinnerName } from 'unicode-animations'
 
 import { THINKING_COT_MAX } from '../config/limits.js'
+import {
+  buildSubagentTree,
+  fmtCost,
+  fmtTokens,
+  formatSummary as formatSpawnSummary,
+  hotnessBucket,
+  peakHotness,
+  sparkline,
+  treeTotals,
+  widthByDepth
+} from '../lib/subagentTree.js'
 import {
   compactPreview,
   estimateTokensRough,
@@ -14,7 +25,7 @@ import {
   toolTrailLabel
 } from '../lib/text.js'
 import type { Theme } from '../theme.js'
-import type { ActiveTool, ActivityItem, DetailsMode, SubagentProgress, ThinkingMode } from '../types.js'
+import type { ActiveTool, ActivityItem, DetailsMode, SubagentNode, SubagentProgress, ThinkingMode } from '../types.js'
 
 const THINK: BrailleSpinnerName[] = ['helix', 'breathe', 'orbit', 'dna', 'waverows', 'snake', 'pulse']
 const TOOL: BrailleSpinnerName[] = ['cascade', 'scan', 'diagswipe', 'fillsweep', 'rain', 'columns', 'sparkle']
@@ -106,6 +117,8 @@ function TreeNode({
   header,
   open,
   rails = [],
+  stemColor,
+  stemDim,
   t
 }: {
   branch: TreeBranch
@@ -113,11 +126,13 @@ function TreeNode({
   header: ReactNode
   open: boolean
   rails?: TreeRails
+  stemColor?: string
+  stemDim?: boolean
   t: Theme
 }) {
   return (
     <Box flexDirection="column">
-      <TreeRow branch={branch} rails={rails} t={t}>
+      <TreeRow branch={branch} rails={rails} stemColor={stemColor} stemDim={stemDim} t={t}>
         {header}
       </TreeRow>
       {open ? children?.(nextTreeRails(rails, branch)) : null}
@@ -239,16 +254,31 @@ function Chevron({
   )
 }
 
+function heatColor(node: SubagentNode, peak: number, theme: Theme): string | undefined {
+  const palette = [theme.color.bronze, theme.color.amber, theme.color.gold, theme.color.warn, theme.color.error]
+  const idx = hotnessBucket(node.aggregate.hotness, peak, palette.length)
+
+  // Below the median bucket we keep the default dim stem so cool branches
+  // fade into the chrome — only "hot" branches draw the eye.
+  if (idx < 2) {
+    return undefined
+  }
+
+  return palette[idx]
+}
+
 function SubagentAccordion({
   branch,
   expanded,
-  item,
+  node,
+  peak,
   rails = [],
   t
 }: {
   branch: TreeBranch
   expanded: boolean
-  item: SubagentProgress
+  node: SubagentNode
+  peak: number
   rails?: TreeRails
   t: Theme
 }) {
@@ -257,6 +287,7 @@ function SubagentAccordion({
   const [openThinking, setOpenThinking] = useState(expanded)
   const [openTools, setOpenTools] = useState(expanded)
   const [openNotes, setOpenNotes] = useState(expanded)
+  const [openKids, setOpenKids] = useState(expanded)
 
   useEffect(() => {
     if (!expanded) {
@@ -268,6 +299,7 @@ function SubagentAccordion({
     setOpenThinking(true)
     setOpenTools(true)
     setOpenNotes(true)
+    setOpenKids(true)
   }, [expanded])
 
   const expandAll = () => {
@@ -276,8 +308,13 @@ function SubagentAccordion({
     setOpenThinking(true)
     setOpenTools(true)
     setOpenNotes(true)
+    setOpenKids(true)
   }
 
+  const item = node.item
+  const children = node.children
+  const aggregate = node.aggregate
+
   const statusTone: 'dim' | 'error' | 'warn' =
     item.status === 'failed' ? 'error' : item.status === 'interrupted' ? 'warn' : 'dim'
 
@@ -286,10 +323,60 @@ function SubagentAccordion({
   const title = `${prefix}${open ? goalLabel : compactPreview(goalLabel, 60)}`
   const summary = compactPreview((item.summary || '').replace(/\s+/g, ' ').trim(), 72)
 
-  const suffix =
-    item.status === 'running'
-      ? 'running'
-      : `${item.status}${item.durationSeconds ? ` · ${fmtElapsed(item.durationSeconds * 1000)}` : ''}`
+  // Suffix packs branch rollup: status · elapsed · per-branch tool/agent/token/cost.
+  // Emphasises the numbers the user can't easily eyeball from a flat list.
+  const statusLabel = item.status === 'queued' ? 'queued' : item.status === 'running' ? 'running' : String(item.status)
+
+  const rollupBits: string[] = [statusLabel]
+
+  if (item.durationSeconds) {
+    rollupBits.push(fmtElapsed(item.durationSeconds * 1000))
+  }
+
+  const localTools = item.toolCount ?? 0
+  const subtreeTools = aggregate.totalTools - localTools
+
+  if (localTools > 0) {
+    rollupBits.push(`${localTools} tool${localTools === 1 ? '' : 's'}`)
+  }
+
+  const localTokens = (item.inputTokens ?? 0) + (item.outputTokens ?? 0)
+
+  if (localTokens > 0) {
+    rollupBits.push(`${fmtTokens(localTokens)} tok`)
+  }
+
+  const localCost = item.costUsd ?? 0
+
+  if (localCost > 0) {
+    rollupBits.push(fmtCost(localCost))
+  }
+
+  const filesLocal = (item.filesWritten?.length ?? 0) + (item.filesRead?.length ?? 0)
+
+  if (filesLocal > 0) {
+    rollupBits.push(`⎘${filesLocal}`)
+  }
+
+  if (children.length > 0) {
+    rollupBits.push(`${aggregate.descendantCount}↓`)
+
+    if (subtreeTools > 0) {
+      rollupBits.push(`+${subtreeTools}t sub`)
+    }
+
+    const subCost = aggregate.costUsd - localCost
+
+    if (subCost >= 0.01) {
+      rollupBits.push(`+${fmtCost(subCost)} sub`)
+    }
+
+    if (aggregate.activeCount > 0 && item.status !== 'running') {
+      rollupBits.push(`⚡${aggregate.activeCount}`)
+    }
+  }
+
+  const suffix = rollupBits.join(' · ')
 
   const thinkingText = item.thinking.join('\n')
   const hasThinking = Boolean(thinkingText)
@@ -418,6 +505,50 @@ function SubagentAccordion({
     })
   }
 
+  if (children.length > 0) {
+    // Nested grandchildren — rendered recursively via SubagentAccordion,
+    // sharing the same keybindings / expand semantics as top-level nodes.
+    sections.push({
+      header: (
+        <Chevron
+          count={children.length}
+          onClick={shift => {
+            if (shift) {
+              expandAll()
+            } else {
+              setOpenKids(v => !v)
+            }
+          }}
+          open={showChildren || openKids}
+          suffix={`d${item.depth + 1} · ${aggregate.descendantCount} total`}
+          t={t}
+          title="Spawned"
+        />
+      ),
+      key: 'subagents',
+      open: showChildren || openKids,
+      render: childRails => (
+        <Box flexDirection="column">
+          {children.map((child, i) => (
+            <SubagentAccordion
+              branch={i === children.length - 1 ? 'last' : 'mid'}
+              expanded={expanded || deep}
+              key={child.item.id}
+              node={child}
+              peak={peak}
+              rails={childRails}
+              t={t}
+            />
+          ))}
+        </Box>
+      )
+    })
+  }
+
+  // Heatmap: amber→error gradient on the stem when this branch is "hot"
+  // (high tools/sec) relative to the whole tree's peak.
+  const stem = heatColor(node, peak, t)
+
   return (
     <TreeNode
       branch={branch}
@@ -447,6 +578,8 @@ function SubagentAccordion({
       }
       open={open}
       rails={rails}
+      stemColor={stem}
+      stemDim={stem == null}
       t={t}
     >
       {childRails => (
@@ -596,19 +729,18 @@ export const ToolTrail = memo(function ToolTrail({
     }
   }, [detailsMode])
 
-  const latestErrorId = useMemo(
-    () => activity.reduce((max, i) => (i.tone === 'error' && i.id > max ? i.id : max), -1),
-    [activity]
-  )
-
-  useEffect(() => {
-    if (latestErrorId >= 0) {
-      setOpenMeta(true)
-    }
-  }, [latestErrorId])
-
   const cot = useMemo(() => thinkingPreview(reasoning, 'full', THINKING_COT_MAX), [reasoning])
 
+  // Spawn-tree derivations must live above any early return so React's
+  // rules-of-hooks sees a stable call order.  Cheap O(N) builds memoised
+  // by subagent-list identity.
+  const spawnTree = useMemo(() => buildSubagentTree(subagents), [subagents])
+  const spawnPeak = useMemo(() => peakHotness(spawnTree), [spawnTree])
+  const spawnTotals = useMemo(() => treeTotals(spawnTree), [spawnTree])
+  const spawnWidths = useMemo(() => widthByDepth(spawnTree), [spawnTree])
+  const spawnSpark = useMemo(() => sparkline(spawnWidths), [spawnWidths])
+  const spawnSummaryLabel = useMemo(() => formatSpawnSummary(spawnTotals), [spawnTotals])
+
   if (
     !busy &&
     !trail.length &&
@@ -764,12 +896,13 @@ export const ToolTrail = memo(function ToolTrail({
 
   const renderSubagentList = (rails: boolean[]) => (
     <Box flexDirection="column">
-      {subagents.map((item, index) => (
+      {spawnTree.map((node, index) => (
         <SubagentAccordion
-          branch={index === subagents.length - 1 ? 'last' : 'mid'}
+          branch={index === spawnTree.length - 1 ? 'last' : 'mid'}
           expanded={detailsMode === 'expanded' || deepSubagents}
-          item={item}
-          key={item.id}
+          key={node.item.id}
+          node={node}
+          peak={spawnPeak}
           rails={rails}
           t={t}
         />
@@ -892,10 +1025,14 @@ export const ToolTrail = memo(function ToolTrail({
   }
 
   if (hasSubagents && !inlineDelegateKey) {
+    // Spark + summary give a one-line read on the branch shape before
+    // opening the subtree.  `/agents` opens the full-screen audit overlay.
+    const suffix = spawnSpark ? `${spawnSummaryLabel}  ${spawnSpark}  (/agents)` : `${spawnSummaryLabel}  (/agents)`
+
     sections.push({
       header: (
         <Chevron
-          count={subagents.length}
+          count={spawnTotals.descendantCount}
           onClick={shift => {
             if (shift) {
               expandAll()
@@ -906,8 +1043,9 @@ export const ToolTrail = memo(function ToolTrail({
             }
           }}
           open={detailsMode === 'expanded' || openSubagents}
+          suffix={suffix}
           t={t}
-          title="Subagents"
+          title="Spawn tree"
         />
       ),
       key: 'subagents',
diff --git a/ui-tui/src/content/hotkeys.ts b/ui-tui/src/content/hotkeys.ts
index 3d1bb011b..0a58e305b 100644
--- a/ui-tui/src/content/hotkeys.ts
+++ b/ui-tui/src/content/hotkeys.ts
@@ -4,18 +4,16 @@ const action = isMac ? 'Cmd' : 'Ctrl'
 const paste = isMac ? 'Cmd' : 'Alt'
 
 export const HOTKEYS: [string, string][] = [
-  ...(
-    isMac
-      ? ([
-          ['Cmd+C', 'copy selection'],
-          ['Ctrl+C', 'interrupt / clear draft / exit']
-        ] as [string, string][])
-      : ([['Ctrl+C', 'copy selection / interrupt / clear draft / exit']] as [string, string][])
-  ),
+  ...(isMac
+    ? ([
+        ['Cmd+C', 'copy selection'],
+        ['Ctrl+C', 'interrupt / clear draft / exit']
+      ] as [string, string][])
+    : ([['Ctrl+C', 'copy selection / interrupt / clear draft / exit']] as [string, string][])),
   [action + '+D', 'exit'],
   [action + '+G', 'open $EDITOR for prompt'],
   [action + '+L', 'new session (clear)'],
-  [paste + '+V / /paste', 'paste clipboard image'],
+  [paste + '+V / /paste', 'paste text; /paste attaches clipboard image'],
   ['Tab', 'apply completion'],
   ['↑/↓', 'completions / queue edit / history'],
   [action + '+A/E', 'home / end of line'],
@@ -26,6 +24,6 @@ export const HOTKEYS: [string, string][] = [
   ['Home/End', 'start / end of line'],
   ['Shift+Enter / Alt+Enter', 'insert newline'],
   ['\\+Enter', 'multi-line continuation (fallback)'],
-  ['!cmd', 'run shell command'],
-  ['{!cmd}', 'interpolate shell output inline']
+  ['!<cmd>', 'run a shell command (e.g. !ls, !git status)'],
+  ['{!<cmd>}', 'interpolate shell output inline (e.g. "branch is {!git branch --show-current}")']
 ]
diff --git a/ui-tui/src/domain/messages.ts b/ui-tui/src/domain/messages.ts
index 34b072f01..73f86c3e0 100644
--- a/ui-tui/src/domain/messages.ts
+++ b/ui-tui/src/domain/messages.ts
@@ -12,6 +12,13 @@ export const imageTokenMeta = (info?: ImageMeta | null) => {
     .join(' · ')
 }
 
+export const attachedImageNotice = (info?: ({ name?: string } & ImageMeta) | null) => {
+  const meta = imageTokenMeta(info)
+  const label = info?.name ? `📎 Attached image: ${info.name}` : '📎 Attached image'
+
+  return `${label}${meta ? ` · ${meta}` : ''}`
+}
+
 export const userDisplay = (text: string) => {
   if (text.length <= LONG_MSG) {
     return text
diff --git a/ui-tui/src/entry.tsx b/ui-tui/src/entry.tsx
index 6f1506e5a..8fdf9f68f 100644
--- a/ui-tui/src/entry.tsx
+++ b/ui-tui/src/entry.tsx
@@ -1,5 +1,4 @@
 #!/usr/bin/env -S node --max-old-space-size=8192 --expose-gc
-import { bootBanner } from './bootBanner.js'
 import { GatewayClient } from './gatewayClient.js'
 import { setupGracefulExit } from './lib/gracefulExit.js'
 import { formatBytes, type HeapDumpResult, performHeapDump } from './lib/memory.js'
@@ -10,8 +9,6 @@ if (!process.stdin.isTTY) {
   process.exit(0)
 }
 
-process.stdout.write(bootBanner())
-
 const gw = new GatewayClient()
 
 gw.start()
diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts
index 6fa1ad92e..1dc8ea5be 100644
--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@@ -60,7 +60,7 @@ export interface ConfigDisplayConfig {
   streaming?: boolean
   thinking_mode?: string
   tui_compact?: boolean
-  tui_statusbar?: boolean
+  tui_statusbar?: 'bottom' | 'off' | 'on' | 'top' | boolean
 }
 
 export interface ConfigFullResponse {
@@ -280,15 +280,85 @@ export interface ReloadMcpResponse {
 // ── Subagent events ──────────────────────────────────────────────────
 
 export interface SubagentEventPayload {
+  api_calls?: number
+  cost_usd?: number
+  depth?: number
   duration_seconds?: number
+  files_read?: string[]
+  files_written?: string[]
   goal: string
-  status?: 'completed' | 'failed' | 'interrupted' | 'running'
+  input_tokens?: number
+  iteration?: number
+  model?: string
+  output_tail?: { is_error?: boolean; preview?: string; tool?: string }[]
+  output_tokens?: number
+  parent_id?: null | string
+  reasoning_tokens?: number
+  status?: 'completed' | 'failed' | 'interrupted' | 'queued' | 'running'
+  subagent_id?: string
   summary?: string
   task_count?: number
   task_index: number
   text?: string
+  tool_count?: number
   tool_name?: string
   tool_preview?: string
+  toolsets?: string[]
+}
+
+// ── Delegation control RPCs ──────────────────────────────────────────
+
+export interface DelegationStatusResponse {
+  active?: {
+    depth?: number
+    goal?: string
+    model?: null | string
+    parent_id?: null | string
+    started_at?: number
+    status?: string
+    subagent_id?: string
+    tool_count?: number
+  }[]
+  max_concurrent_children?: number
+  max_spawn_depth?: number
+  paused?: boolean
+}
+
+export interface DelegationPauseResponse {
+  paused?: boolean
+}
+
+export interface SubagentInterruptResponse {
+  found?: boolean
+  subagent_id?: string
+}
+
+// ── Spawn-tree snapshots ─────────────────────────────────────────────
+
+export interface SpawnTreeListEntry {
+  count: number
+  finished_at?: number
+  label?: string
+  path: string
+  session_id?: string
+  started_at?: number | null
+}
+
+export interface SpawnTreeListResponse {
+  entries?: SpawnTreeListEntry[]
+}
+
+export interface SpawnTreeLoadResponse {
+  finished_at?: number
+  label?: string
+  session_id?: string
+  started_at?: null | number
+  subagents?: unknown[]
+}
+
+export interface SpawnTreeSaveResponse {
+  path?: string
+  session_id?: string
 }
 
 export type GatewayEvent =
@@ -320,6 +390,7 @@ export type GatewayEvent =
   | { payload: { env_var: string; prompt: string; request_id: string }; session_id?: string; type: 'secret.request' }
   | { payload: { task_id: string; text: string }; session_id?: string; type: 'background.complete' }
   | { payload: { text: string }; session_id?: string; type: 'btw.complete' }
+  | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.spawn_requested' }
   | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.start' }
   | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.thinking' }
   | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.tool' }
diff --git a/ui-tui/src/lib/clipboard.ts b/ui-tui/src/lib/clipboard.ts
index 64dccc5b4..23e03e5fe 100644
--- a/ui-tui/src/lib/clipboard.ts
+++ b/ui-tui/src/lib/clipboard.ts
@@ -2,29 +2,93 @@ import { execFile, spawn } from 'node:child_process'
 import { promisify } from 'node:util'
 
 const execFileAsync = promisify(execFile)
+const CLIPBOARD_MAX_BUFFER = 4 * 1024 * 1024
+const POWERSHELL_ARGS = ['-NoProfile', '-NonInteractive', '-Command', 'Get-Clipboard -Raw'] as const
+
+type ClipboardRun = typeof execFileAsync
+
+export function isUsableClipboardText(text: null | string): text is string {
+  if (!text || !/[^\s]/.test(text)) {
+    return false
+  }
+
+  if (text.includes('\u0000')) {
+    return false
+  }
+
+  let suspicious = 0
+
+  for (const ch of text) {
+    const code = ch.charCodeAt(0)
+    const isControl = code < 0x20 && ch !== '\n' && ch !== '\r' && ch !== '\t'
+
+    if (isControl || ch === '\ufffd') {
+      suspicious += 1
+    }
+  }
+
+  return suspicious <= Math.max(2, Math.floor(text.length * 0.02))
+}
+
+function readClipboardCommands(
+  platform: NodeJS.Platform,
+  env: NodeJS.ProcessEnv
+): Array<{ args: readonly string[]; cmd: string }> {
+  if (platform === 'darwin') {
+    return [{ cmd: 'pbpaste', args: [] }]
+  }
+
+  if (platform === 'win32') {
+    return [{ cmd: 'powershell', args: POWERSHELL_ARGS }]
+  }
+
+  const attempts: Array<{ args: readonly string[]; cmd: string }> = []
+
+  if (env.WSL_INTEROP) {
+    attempts.push({ cmd: 'powershell.exe', args: POWERSHELL_ARGS })
+  }
+
+  if (env.WAYLAND_DISPLAY) {
+    attempts.push({ cmd: 'wl-paste', args: ['--type', 'text'] })
+  }
+
+  attempts.push({ cmd: 'xclip', args: ['-selection', 'clipboard', '-out'] })
+
+  return attempts
+}
 
 /**
  * Read plain text from the system clipboard.
  *
- * On macOS this uses `pbpaste`. On other platforms we intentionally return
- * null for now; the TUI's text-paste hotkeys are primarily targeted at the
- * macOS clarify/input flow.
+ * Uses native platform tools in fallback order:
+ * - macOS: pbpaste
+ * - Windows: PowerShell Get-Clipboard -Raw
+ * - WSL: powershell.exe Get-Clipboard -Raw
+ * - Linux Wayland: wl-paste --type text
+ * - Linux X11: xclip -selection clipboard -out
  */
 export async function readClipboardText(
   platform: NodeJS.Platform = process.platform,
-  run: typeof execFileAsync = execFileAsync
+  run: ClipboardRun = execFileAsync,
+  env: NodeJS.ProcessEnv = process.env
 ): Promise<string | null> {
-  if (platform !== 'darwin') {
-    return null
+  for (const attempt of readClipboardCommands(platform, env)) {
+    try {
+      const result = await run(attempt.cmd, [...attempt.args], {
+        encoding: 'utf8',
+        maxBuffer: CLIPBOARD_MAX_BUFFER,
+        windowsHide: true
+      })
+
+      if (typeof result.stdout === 'string') {
+        return result.stdout
+      }
+    } catch {
+      // Fall through to the next clipboard backend.
+    }
   }
 
-  try {
-    const result = await run('pbpaste', [], { encoding: 'utf8', windowsHide: true })
-
-    return typeof result.stdout === 'string' ? result.stdout : null
-  } catch {
-    return null
-  }
+  return null
 }
 
 /**
diff --git a/ui-tui/src/lib/emoji.ts b/ui-tui/src/lib/emoji.ts
new file mode 100644
index 000000000..6c22e811e
--- /dev/null
+++ b/ui-tui/src/lib/emoji.ts
@@ -0,0 +1,55 @@
+const VS15 = 0xfe0e
+const VS16 = 0xfe0f
+const KEYCAP = 0x20e3
+
+const TEXT_DEFAULT_EMOJI = new Set<number>([
+  0x00a9, 0x00ae, 0x203c, 0x2049, 0x2122, 0x2139, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199, 0x21a9, 0x21aa,
+  0x2328, 0x23cf, 0x23ed, 0x23ee, 0x23ef, 0x23f1, 0x23f2, 0x23f8, 0x23f9, 0x23fa, 0x24c2, 0x25aa, 0x25ab, 0x25b6,
+  0x25c0, 0x25fb, 0x25fc, 0x2600, 0x2601, 0x2602, 0x2603, 0x2604, 0x260e, 0x2611, 0x2618, 0x261d, 0x2620, 0x2622,
+  0x2623, 0x2626, 0x262a, 0x262e, 0x262f, 0x2638, 0x2639, 0x263a, 0x2640, 0x2642, 0x265f, 0x2660, 0x2663, 0x2665,
+  0x2666, 0x2668, 0x267b, 0x267e, 0x2692, 0x2694, 0x2695, 0x2696, 0x2697, 0x2699, 0x269b, 0x269c, 0x26a0, 0x26a7,
+  0x26b0, 0x26b1, 0x26c8, 0x26cf, 0x26d1, 0x26d3, 0x26d4, 0x26e9, 0x26f0, 0x26f1, 0x26f4, 0x26f7, 0x26f8, 0x26f9,
+  0x2702, 0x2708, 0x2709, 0x270c, 0x270d, 0x270f, 0x2712, 0x2714, 0x2716, 0x271d, 0x2721, 0x2733, 0x2734, 0x2744,
+  0x2747, 0x2763, 0x2764, 0x27a1, 0x2934, 0x2935, 0x2b05, 0x2b06, 0x2b07, 0x3030, 0x303d, 0x3297, 0x3299
+])
+
+const MAYBE_TEXT_EMOJI_RE =
+  /[\u00a9\u00ae\u203c\u2049\u2122\u2139\u2194-\u2199\u21a9\u21aa\u2328\u23cf\u23ed-\u23ef\u23f1\u23f2\u23f8-\u23fa\u24c2\u25aa\u25ab\u25b6\u25c0\u25fb\u25fc\u2600-\u2604\u260e\u2611\u2618\u261d\u2620\u2622\u2623\u2626\u262a\u262e\u262f\u2638-\u263a\u2640\u2642\u265f\u2660\u2663\u2665\u2666\u2668\u267b\u267e\u2692\u2694-\u2697\u2699\u269b\u269c\u26a0\u26a7\u26b0\u26b1\u26c8\u26cf\u26d1\u26d3\u26d4\u26e9\u26f0\u26f1\u26f4\u26f7-\u26f9\u2702\u2708\u2709\u270c\u270d\u270f\u2712\u2714\u2716\u271d\u2721\u2733\u2734\u2744\u2747\u2763\u2764\u27a1\u2934\u2935\u2b05-\u2b07\u3030\u303d\u3297\u3299]/
+
+export function ensureEmojiPresentation(text: string): string {
+  if (!text || !MAYBE_TEXT_EMOJI_RE.test(text)) {
+    return text
+  }
+
+  // Lazy output: only start building when we actually need to insert VS16.
+  // Short-circuits the whole walk for strings where every text-default emoji
+  // is already followed by VS16/VS15, avoiding per-codepoint string growth.
+  let out: null | string = null
+  let last = 0
+  let i = 0
+
+  while (i < text.length) {
+    const cp = text.codePointAt(i)!
+    const size = cp > 0xffff ? 2 : 1
+
+    if (TEXT_DEFAULT_EMOJI.has(cp)) {
+      const next = text.codePointAt(i + size)
+
+      // Skip only when the sequence already carries an explicit presentation
+      // selector.  VS16 means the user (or a prior pass) already requested
+      // emoji presentation; VS15 is an explicit text-presentation request so
+      // leave it alone and don't pile VS16 on top of it.  Inject before ZWJ
+      // and KEYCAP so ZWJ-joined sequences (e.g. ❤️‍🔥) and digit keycaps
+      // both render as emoji rather than text.
+      if (next !== VS16 && next !== VS15) {
+        out ??= ''
+        out += text.slice(last, i + size) + '\uFE0F'
+        last = i + size
+      }
+    }
+
+    i += size
+  }
+
+  return out === null ? text : out + text.slice(last)
+}
diff --git a/ui-tui/src/lib/osc52.ts b/ui-tui/src/lib/osc52.ts
index d99082992..aaeecf4c9 100644
--- a/ui-tui/src/lib/osc52.ts
+++ b/ui-tui/src/lib/osc52.ts
@@ -1,2 +1,73 @@
+const ESC = '\x1b'
+const BEL = '\x07'
+const ST = `${ESC}\\`
+
+export const OSC52_CLIPBOARD_QUERY = `${ESC}]52;c;?${BEL}`
+
+type OscResponse = { code: number; data: string; type: 'osc' }
+
+type OscQuerier = {
+  flush: () => Promise<void>
+  send: <T>(query: { match: (r: unknown) => r is T; request: string }) => Promise<T | undefined>
+}
+
+function wrapForMultiplexer(sequence: string): string {
+  if (process.env['TMUX']) {
+    return `${ESC}Ptmux;${sequence.split(ESC).join(ESC + ESC)}${ST}`
+  }
+
+  if (process.env['STY']) {
+    return `${ESC}P${sequence}${ST}`
+  }
+
+  return sequence
+}
+
+export function buildOsc52ClipboardQuery(): string {
+  return wrapForMultiplexer(OSC52_CLIPBOARD_QUERY)
+}
+
+export function parseOsc52ClipboardData(data: string): null | string {
+  const firstSep = data.indexOf(';')
+
+  if (firstSep === -1) {
+    return null
+  }
+
+  const selection = data.slice(0, firstSep)
+  const payload = data.slice(firstSep + 1)
+
+  if ((selection !== 'c' && selection !== 'p') || !payload || payload === '?') {
+    return null
+  }
+
+  try {
+    return Buffer.from(payload, 'base64').toString('utf8')
+  } catch {
+    return null
+  }
+}
+
+export async function readOsc52Clipboard(querier: null | OscQuerier, timeoutMs = 500): Promise<null | string> {
+  if (!querier) {
+    return null
+  }
+
+  const timeout = new Promise<undefined>(resolve => setTimeout(resolve, timeoutMs))
+
+  const query = querier.send<OscResponse>({
+    request: buildOsc52ClipboardQuery(),
+    match: (r: unknown): r is OscResponse => {
+      return !!r && typeof r === 'object' && (r as OscResponse).type === 'osc' && (r as OscResponse).code === 52
+    }
+  })
+
+  const response = await Promise.race([query, timeout])
+
+  await querier.flush()
+
+  return response ? parseOsc52ClipboardData(response.data) : null
+}
+
 export const writeOsc52Clipboard = (s: string) =>
   process.stdout.write(`\x1b]52;c;${Buffer.from(s, 'utf8').toString('base64')}\x07`)
diff --git a/ui-tui/src/lib/platform.ts b/ui-tui/src/lib/platform.ts
index 8995351a1..ab694baaf 100644
--- a/ui-tui/src/lib/platform.ts
+++ b/ui-tui/src/lib/platform.ts
@@ -1,15 +1,35 @@
 /** Platform-aware keybinding helpers.
  *
- * On macOS the "action" modifier is Cmd (key.meta in Ink), on other platforms
- * it is Ctrl. Ctrl+C is ALWAYS the interrupt key regardless of platform — it
- * must never be remapped to copy.
+ * On macOS the "action" modifier is Cmd. Modern terminals that support kitty
+ * keyboard protocol report Cmd as `key.super`; legacy terminals often surface it
+ * as `key.meta`. Some macOS terminals also translate Cmd+Left/Right/Backspace
+ * into readline-style Ctrl+A/Ctrl+E/Ctrl+U before the app sees them.
+ * On other platforms the action modifier is Ctrl.
+ * Ctrl+C is ALWAYS the interrupt key regardless of platform — it must never be
+ * remapped to copy.
  */
 
 export const isMac = process.platform === 'darwin'
 
 /** True when the platform action-modifier is pressed (Cmd on macOS, Ctrl elsewhere). */
-export const isActionMod = (key: { ctrl: boolean; meta: boolean }): boolean => (isMac ? key.meta : key.ctrl)
+export const isActionMod = (key: { ctrl: boolean; meta: boolean; super?: boolean }): boolean =>
+  isMac ? key.meta || key.super === true : key.ctrl
+
+/**
+ * Accept raw Ctrl+<letter> as an action shortcut on macOS, where `isActionMod`
+ * otherwise means Cmd. Two motivations:
+ *   - Some macOS terminals rewrite Cmd navigation/deletion into readline control
+ *     keys (Cmd+Left → Ctrl+A, Cmd+Right → Ctrl+E, Cmd+Backspace → Ctrl+U).
+ *   - Ctrl+K (kill-to-end) and Ctrl+W (delete-word-back) are standard readline
+ *     bindings that users expect to work regardless of platform, even though
+ *     no terminal rewrites Cmd into them.
+ */
+export const isMacActionFallback = (
+  key: { ctrl: boolean; meta: boolean; super?: boolean },
+  ch: string,
+  target: 'a' | 'e' | 'u' | 'k' | 'w'
+): boolean => isMac && key.ctrl && !key.meta && key.super !== true && ch.toLowerCase() === target
 
 /** Match action-modifier + a single character (case-insensitive). */
-export const isAction = (key: { ctrl: boolean; meta: boolean }, ch: string, target: string): boolean =>
+export const isAction = (key: { ctrl: boolean; meta: boolean; super?: boolean }, ch: string, target: string): boolean =>
   isActionMod(key) && ch.toLowerCase() === target
diff --git a/ui-tui/src/lib/subagentTree.ts b/ui-tui/src/lib/subagentTree.ts
new file mode 100644
index 000000000..513559b80
--- /dev/null
+++ b/ui-tui/src/lib/subagentTree.ts
@@ -0,0 +1,355 @@
+import type { SubagentAggregate, SubagentNode, SubagentProgress } from '../types.js'
+
+const ROOT_KEY = '__root__'
+
+/**
+ * Reconstruct the subagent spawn tree from a flat event-ordered list.
+ *
+ * Grouping is by `parentId`; a missing `parentId` (or one pointing at an
+ * unknown subagent) is treated as a top-level spawn of the current turn.
+ * Children within a parent are sorted by `depth` then `index` — same key
+ * used in `turnController.upsertSubagent`, so render order matches spawn
+ * order regardless of network reordering of gateway events.
+ *
+ * Older gateways omit `parentId`; every subagent is then a top-level node
+ * and the tree renders flat — matching pre-observability behaviour.
+ */
+export function buildSubagentTree(items: readonly SubagentProgress[]): SubagentNode[] {
+  if (!items.length) {
+    return []
+  }
+
+  const byParent = new Map<string, SubagentProgress[]>()
+  const known = new Set<string>()
+
+  for (const item of items) {
+    known.add(item.id)
+  }
+
+  for (const item of items) {
+    const parentKey = item.parentId && known.has(item.parentId) ? item.parentId : ROOT_KEY
+    const bucket = byParent.get(parentKey) ?? []
+    bucket.push(item)
+    byParent.set(parentKey, bucket)
+  }
+
+  for (const bucket of byParent.values()) {
+    bucket.sort((a, b) => a.depth - b.depth || a.index - b.index)
+  }
+
+  const build = (item: SubagentProgress): SubagentNode => {
+    const kids = byParent.get(item.id) ?? []
+    const children = kids.map(build)
+
+    return { aggregate: aggregate(item, children), children, item }
+  }
+
+  return (byParent.get(ROOT_KEY) ?? []).map(build)
+}
+
+/**
+ * Roll up counts for a node's whole subtree.  Kept pure so the live view
+ * and the post-hoc replay can share the same renderer unchanged.
+ *
+ * `hotness` = tools per second across the subtree — a crude proxy for
+ * "how much work is happening in this branch".  Used to colour tree rails
+ * in the overlay / inline view so the eye spots the expensive branch.
+ */
+export function aggregate(item: SubagentProgress, children: readonly SubagentNode[]): SubagentAggregate {
+  let totalTools = item.toolCount ?? 0
+  let totalDuration = item.durationSeconds ?? 0
+  let descendantCount = 0
+  let activeCount = isRunning(item) ? 1 : 0
+  let maxDepthFromHere = 0
+  let inputTokens = item.inputTokens ?? 0
+  let outputTokens = item.outputTokens ?? 0
+  let costUsd = item.costUsd ?? 0
+  let filesTouched = (item.filesRead?.length ?? 0) + (item.filesWritten?.length ?? 0)
+
+  for (const child of children) {
+    totalTools += child.aggregate.totalTools
+    totalDuration += child.aggregate.totalDuration
+    descendantCount += child.aggregate.descendantCount + 1
+    activeCount += child.aggregate.activeCount
+    maxDepthFromHere = Math.max(maxDepthFromHere, child.aggregate.maxDepthFromHere + 1)
+    inputTokens += child.aggregate.inputTokens
+    outputTokens += child.aggregate.outputTokens
+    costUsd += child.aggregate.costUsd
+    filesTouched += child.aggregate.filesTouched
+  }
+
+  const hotness = totalDuration > 0 ? totalTools / totalDuration : 0
+
+  return {
+    activeCount,
+    costUsd,
+    descendantCount,
+    filesTouched,
+    hotness,
+    inputTokens,
+    maxDepthFromHere,
+    outputTokens,
+    totalDuration,
+    totalTools
+  }
+}
+
+/**
+ * Count of subagents at each depth level, indexed by depth (0 = top level).
+ * Drives the inline sparkline (`▁▃▇▅`) and the status-bar HUD.
+ */
+export function widthByDepth(tree: readonly SubagentNode[]): number[] {
+  const widths: number[] = []
+
+  const walk = (nodes: readonly SubagentNode[], depth: number) => {
+    if (!nodes.length) {
+      return
+    }
+
+    widths[depth] = (widths[depth] ?? 0) + nodes.length
+
+    for (const node of nodes) {
+      walk(node.children, depth + 1)
+    }
+  }
+
+  walk(tree, 0)
+
+  return widths
+}
+
+/**
+ * Flat totals across the full tree — feeds the summary chip header.
+ */
+export function treeTotals(tree: readonly SubagentNode[]): SubagentAggregate {
+  let totalTools = 0
+  let totalDuration = 0
+  let descendantCount = 0
+  let activeCount = 0
+  let maxDepthFromHere = 0
+  let inputTokens = 0
+  let outputTokens = 0
+  let costUsd = 0
+  let filesTouched = 0
+
+  for (const node of tree) {
+    totalTools += node.aggregate.totalTools
+    totalDuration += node.aggregate.totalDuration
+    descendantCount += node.aggregate.descendantCount + 1
+    activeCount += node.aggregate.activeCount
+    maxDepthFromHere = Math.max(maxDepthFromHere, node.aggregate.maxDepthFromHere + 1)
+    inputTokens += node.aggregate.inputTokens
+    outputTokens += node.aggregate.outputTokens
+    costUsd += node.aggregate.costUsd
+    filesTouched += node.aggregate.filesTouched
+  }
+
+  const hotness = totalDuration > 0 ? totalTools / totalDuration : 0
+
+  return {
+    activeCount,
+    costUsd,
+    descendantCount,
+    filesTouched,
+    hotness,
+    inputTokens,
+    maxDepthFromHere,
+    outputTokens,
+    totalDuration,
+    totalTools
+  }
+}
+
+/**
+ * Flatten the tree into visit order — useful for keyboard navigation and
+ * for "kill subtree" walks that fire one RPC per descendant.
+ */
+export function flattenTree(tree: readonly SubagentNode[]): SubagentNode[] {
+  const out: SubagentNode[] = []
+
+  const walk = (nodes: readonly SubagentNode[]) => {
+    for (const node of nodes) {
+      out.push(node)
+      walk(node.children)
+    }
+  }
+
+  walk(tree)
+
+  return out
+}
+
+/**
+ * Collect every descendant's id for a given node (excluding the node itself).
+ */
+export function descendantIds(node: SubagentNode): string[] {
+  const ids: string[] = []
+
+  const walk = (children: readonly SubagentNode[]) => {
+    for (const child of children) {
+      ids.push(child.item.id)
+      walk(child.children)
+    }
+  }
+
+  walk(node.children)
+
+  return ids
+}
+
+export function isRunning(item: Pick<SubagentProgress, 'status'>): boolean {
+  return item.status === 'running' || item.status === 'queued'
+}
+
+const SPARK_RAMP = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'] as const
+
+/**
+ * 8-step unicode bar sparkline from a positive-integer array.  Zeroes render
+ * as spaces so a sparse tree doesn't read as equal activity at every depth.
+ */
+export function sparkline(values: readonly number[]): string {
+  if (!values.length) {
+    return ''
+  }
+
+  const max = Math.max(...values)
+
+  if (max <= 0) {
+    return ' '.repeat(values.length)
+  }
+
+  return values
+    .map(v => {
+      if (v <= 0) {
+        return ' '
+      }
+
+      const idx = Math.min(SPARK_RAMP.length - 1, Math.max(0, Math.ceil((v / max) * (SPARK_RAMP.length - 1))))
+
+      return SPARK_RAMP[idx]
+    })
+    .join('')
+}
+
+/**
+ * Format totals into a compact one-line summary: `d2 · 7 agents · 124 tools · 2m 14s`
+ */
+export function formatSummary(totals: SubagentAggregate): string {
+  const pieces = [`d${Math.max(0, totals.maxDepthFromHere)}`]
+  pieces.push(`${totals.descendantCount} agent${totals.descendantCount === 1 ? '' : 's'}`)
+
+  if (totals.totalTools > 0) {
+    pieces.push(`${totals.totalTools} tool${totals.totalTools === 1 ? '' : 's'}`)
+  }
+
+  if (totals.totalDuration > 0) {
+    pieces.push(fmtDuration(totals.totalDuration))
+  }
+
+  const tokens = totals.inputTokens + totals.outputTokens
+
+  if (tokens > 0) {
+    pieces.push(`${fmtTokens(tokens)} tok`)
+  }
+
+  if (totals.costUsd > 0) {
+    pieces.push(fmtCost(totals.costUsd))
+  }
+
+  if (totals.activeCount > 0) {
+    pieces.push(`⚡${totals.activeCount}`)
+  }
+
+  return pieces.join(' · ')
+}
+
+/** Compact dollar amount: `$0.02`, `$1.34`, `$12.4` — never > 5 chars beyond the `$`. */
+export function fmtCost(usd: number): string {
+  if (!Number.isFinite(usd) || usd <= 0) {
+    return ''
+  }
+
+  if (usd < 0.01) {
+    return '<$0.01'
+  }
+
+  if (usd < 10) {
+    return `$${usd.toFixed(2)}`
+  }
+
+  return `$${usd.toFixed(1)}`
+}
+
+/** Compact token count: `12k`, `1.2k`, `542`. */
+export function fmtTokens(n: number): string {
+  if (!Number.isFinite(n) || n <= 0) {
+    return '0'
+  }
+
+  if (n < 1000) {
+    return String(Math.round(n))
+  }
+
+  if (n < 10_000) {
+    return `${(n / 1000).toFixed(1)}k`
+  }
+
+  return `${Math.round(n / 1000)}k`
+}
+
+/**
+ * `Ns` / `Nm` / `Nm Ss` formatter for seconds.  Shared with the agents
+ * overlay so the timeline + list + summary all speak the same dialect.
+ */
+export function fmtDuration(seconds: number): string {
+  if (seconds < 60) {
+    return `${Math.max(0, Math.round(seconds))}s`
+  }
+
+  const m = Math.floor(seconds / 60)
+  const s = Math.round(seconds - m * 60)
+
+  return s === 0 ? `${m}m` : `${m}m ${s}s`
+}
+
+/**
+ * A subagent is top-level if it has no `parentId`, or its parent isn't in
+ * the same snapshot (orphaned by a pruned mid-flight root).  Same rule
+ * `buildSubagentTree` uses — keep call sites consistent across the live
+ * view, disk label, and diff pane.
+ */
+export function topLevelSubagents(items: readonly SubagentProgress[]): SubagentProgress[] {
+  const ids = new Set(items.map(s => s.id))
+
+  return items.filter(s => !s.parentId || !ids.has(s.parentId))
+}
+
+/**
+ * Normalize a node's hotness into a palette index 0..N-1 where N = buckets.
+ * Higher hotness = "hotter" colour. Normalized against the tree's peak hotness
+ * so a uniformly slow tree still shows gradient across its busiest branches.
+ */
+export function hotnessBucket(hotness: number, peakHotness: number, buckets: number): number {
+  if (!Number.isFinite(hotness) || hotness <= 0 || peakHotness <= 0 || buckets <= 1) {
+    return 0
+  }
+
+  const ratio = Math.min(1, hotness / peakHotness)
+
+  return Math.min(buckets - 1, Math.max(0, Math.round(ratio * (buckets - 1))))
+}
+
+export function peakHotness(tree: readonly SubagentNode[]): number {
+  let peak = 0
+
+  const walk = (nodes: readonly SubagentNode[]) => {
+    for (const node of nodes) {
+      peak = Math.max(peak, node.aggregate.hotness)
+      walk(node.children)
+    }
+  }
+
+  walk(tree)
+
+  return peak
+}
diff --git a/ui-tui/src/lib/terminalParity.ts b/ui-tui/src/lib/terminalParity.ts
new file mode 100644
index 000000000..9010dedfc
--- /dev/null
+++ b/ui-tui/src/lib/terminalParity.ts
@@ -0,0 +1,78 @@
+import {
+  detectVSCodeLikeTerminal,
+  type FileOps,
+  isRemoteShellSession,
+  shouldPromptForTerminalSetup
+} from './terminalSetup.js'
+
+export type MacTerminalHint = {
+  key: string
+  message: string
+  tone: 'info' | 'warn'
+}
+
+export type MacTerminalContext = {
+  isAppleTerminal: boolean
+  isRemote: boolean
+  isTmux: boolean
+  vscodeLike: null | 'cursor' | 'vscode' | 'windsurf'
+}
+
+export function detectMacTerminalContext(env: NodeJS.ProcessEnv = process.env): MacTerminalContext {
+  const termProgram = env['TERM_PROGRAM'] ?? ''
+
+  return {
+    isAppleTerminal: termProgram === 'Apple_Terminal' || !!env['TERM_SESSION_ID'],
+    isRemote: isRemoteShellSession(env),
+    isTmux: !!env['TMUX'],
+    vscodeLike: detectVSCodeLikeTerminal(env)
+  }
+}
+
+export async function terminalParityHints(
+  env: NodeJS.ProcessEnv = process.env,
+  options?: { fileOps?: Partial<FileOps>; homeDir?: string }
+): Promise<MacTerminalHint[]> {
+  const ctx = detectMacTerminalContext(env)
+  const hints: MacTerminalHint[] = []
+
+  if (
+    ctx.vscodeLike &&
+    (await shouldPromptForTerminalSetup({ env, fileOps: options?.fileOps, homeDir: options?.homeDir }))
+  ) {
+    hints.push({
+      key: 'ide-setup',
+      tone: 'info',
+      message: `Detected ${ctx.vscodeLike} terminal · run /terminal-setup for best Cmd+Enter / undo parity`
+    })
+  }
+
+  if (ctx.isAppleTerminal) {
+    hints.push({
+      key: 'apple-terminal',
+      tone: 'warn',
+      message:
+        'Apple Terminal detected · use /paste for image-only clipboard fallback, and try Ctrl+A / Ctrl+E / Ctrl+U if Cmd+←/→/⌫ gets rewritten'
+    })
+  }
+
+  if (ctx.isTmux) {
+    hints.push({
+      key: 'tmux',
+      tone: 'warn',
+      message:
+        'tmux detected · clipboard copy/paste uses passthrough when available; allow-passthrough improves OSC52 reliability'
+    })
+  }
+
+  if (ctx.isRemote) {
+    hints.push({
+      key: 'remote',
+      tone: 'warn',
+      message:
+        'SSH session detected · text clipboard can bridge via OSC52, but image clipboard and local screenshot paths still depend on the machine running Hermes'
+    })
+  }
+
+  return hints
+}
diff --git a/ui-tui/src/lib/terminalSetup.ts b/ui-tui/src/lib/terminalSetup.ts
new file mode 100644
index 000000000..3c17734c6
--- /dev/null
+++ b/ui-tui/src/lib/terminalSetup.ts
@@ -0,0 +1,349 @@
+import { copyFile, mkdir, readFile, writeFile } from 'node:fs/promises'
+import { homedir } from 'node:os'
+import { join } from 'node:path'
+
+export type SupportedTerminal = 'cursor' | 'vscode' | 'windsurf'
+
+export type FileOps = {
+  copyFile: typeof copyFile
+  mkdir: typeof mkdir
+  readFile: typeof readFile
+  writeFile: typeof writeFile
+}
+
+type Keybinding = {
+  args?: { text?: string }
+  command?: string
+  key?: string
+  when?: string
+}
+
+export type TerminalSetupResult = {
+  message: string
+  requiresRestart?: boolean
+  success: boolean
+}
+
+const DEFAULT_FILE_OPS: FileOps = { copyFile, mkdir, readFile, writeFile }
+const MULTILINE_SEQUENCE = '\\\r\n'
+
+const TERMINAL_META: Record<SupportedTerminal, { appName: string; label: string }> = {
+  vscode: { appName: 'Code', label: 'VS Code' },
+  cursor: { appName: 'Cursor', label: 'Cursor' },
+  windsurf: { appName: 'Windsurf', label: 'Windsurf' }
+}
+
+const TARGET_BINDINGS: Keybinding[] = [
+  {
+    key: 'shift+enter',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: MULTILINE_SEQUENCE }
+  },
+  {
+    key: 'ctrl+enter',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: MULTILINE_SEQUENCE }
+  },
+  {
+    key: 'cmd+enter',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: MULTILINE_SEQUENCE }
+  },
+  {
+    key: 'cmd+z',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: '\u001b[122;9u' }
+  },
+  {
+    key: 'shift+cmd+z',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: '\u001b[122;10u' }
+  }
+]
+
+export function detectVSCodeLikeTerminal(env: NodeJS.ProcessEnv = process.env): null | SupportedTerminal {
+  const askpass = env['VSCODE_GIT_ASKPASS_MAIN']?.toLowerCase() ?? ''
+
+  if (env['CURSOR_TRACE_ID'] || askpass.includes('cursor')) {
+    return 'cursor'
+  }
+
+  if (askpass.includes('windsurf')) {
+    return 'windsurf'
+  }
+
+  if (env['TERM_PROGRAM'] === 'vscode' || env['VSCODE_GIT_IPC_HANDLE']) {
+    return 'vscode'
+  }
+
+  return null
+}
+
+/**
+ * Strip JSONC features (// line comments, /* block comments *\/, trailing commas)
+ * so the result is valid JSON parseable by JSON.parse().
+ * Handles comments inside strings correctly (preserves them).
+ */
+export function stripJsonComments(content: string): string {
+  let result = ''
+  let i = 0
+  const len = content.length
+
+  while (i < len) {
+    const ch = content[i]!
+
+    // String literal — copy as-is, including any comment-like chars inside
+    if (ch === '"') {
+      let j = i + 1
+
+      while (j < len) {
+        if (content[j] === '\\') {
+          j += 2 // skip escaped char
+        } else if (content[j] === '"') {
+          j++
+
+          break
+        } else {
+          j++
+        }
+      }
+
+      result += content.slice(i, j)
+      i = j
+
+      continue
+    }
+
+    // Line comment
+    if (ch === '/' && content[i + 1] === '/') {
+      const eol = content.indexOf('\n', i)
+      i = eol === -1 ? len : eol
+
+      continue
+    }
+
+    // Block comment
+    if (ch === '/' && content[i + 1] === '*') {
+      const end = content.indexOf('*/', i + 2)
+      i = end === -1 ? len : end + 2
+
+      continue
+    }
+
+    result += ch
+    i++
+  }
+
+  // Remove trailing commas before ] or }
+  return result.replace(/,(\s*[}\]])/g, '$1')
+}
+
+export function isRemoteShellSession(env: NodeJS.ProcessEnv): boolean {
+  return Boolean(env['SSH_CONNECTION'] || env['SSH_TTY'] || env['SSH_CLIENT'])
+}
+
+export function getVSCodeStyleConfigDir(
+  appName: string,
+  platform: NodeJS.Platform = process.platform,
+  env: NodeJS.ProcessEnv = process.env,
+  homeDir: string = homedir()
+): null | string {
+  if (platform === 'darwin') {
+    return join(homeDir, 'Library', 'Application Support', appName, 'User')
+  }
+
+  if (platform === 'win32') {
+    return env['APPDATA'] ? join(env['APPDATA'], appName, 'User') : null
+  }
+
+  return join(homeDir, '.config', appName, 'User')
+}
+
+function isKeybinding(value: unknown): value is Keybinding {
+  return typeof value === 'object' && value !== null
+}
+
+function sameBinding(a: Keybinding, b: Keybinding): boolean {
+  return a.key === b.key && a.command === b.command && a.when === b.when && a.args?.text === b.args?.text
+}
+
+async function backupFile(filePath: string, ops: FileOps): Promise<void> {
+  const stamp = new Date().toISOString().replace(/[:.]/g, '-')
+  await ops.copyFile(filePath, `${filePath}.backup.${stamp}`)
+}
+
+export async function configureTerminalKeybindings(
+  terminal: SupportedTerminal,
+  options?: {
+    env?: NodeJS.ProcessEnv
+    fileOps?: Partial<FileOps>
+    homeDir?: string
+    platform?: NodeJS.Platform
+  }
+): Promise<TerminalSetupResult> {
+  const env = options?.env ?? process.env
+  const platform = options?.platform ?? process.platform
+  const homeDir = options?.homeDir ?? homedir()
+  const ops: FileOps = { ...DEFAULT_FILE_OPS, ...(options?.fileOps ?? {}) }
+  const meta = TERMINAL_META[terminal]
+
+  if (isRemoteShellSession(env)) {
+    return {
+      success: false,
+      message: `${meta.label} terminal setup must be run on the local machine, not inside an SSH session.`
+    }
+  }
+
+  const configDir = getVSCodeStyleConfigDir(meta.appName, platform, env, homeDir)
+
+  if (!configDir) {
+    return {
+      success: false,
+      message: `Could not determine ${meta.label} settings path on this platform.`
+    }
+  }
+
+  const keybindingsFile = join(configDir, 'keybindings.json')
+
+  try {
+    await ops.mkdir(configDir, { recursive: true })
+
+    let keybindings: unknown[] = []
+    let hasExistingFile = false
+
+    try {
+      const content = await ops.readFile(keybindingsFile, 'utf8')
+      hasExistingFile = true
+      const parsed: unknown = JSON.parse(stripJsonComments(content))
+
+      if (!Array.isArray(parsed)) {
+        return {
+          success: false,
+          message: `${meta.label} keybindings.json is not a JSON array: ${keybindingsFile}`
+        }
+      }
+
+      keybindings = parsed
+    } catch (error) {
+      const code = (error as NodeJS.ErrnoException | undefined)?.code
+
+      if (code !== 'ENOENT') {
+        return {
+          success: false,
+          message: `Failed to read ${meta.label} keybindings: ${error}`
+        }
+      }
+    }
+
+    const conflicts = TARGET_BINDINGS.filter(target =>
+      keybindings.some(
+        existing => isKeybinding(existing) && existing.key === target.key && !sameBinding(existing, target)
+      )
+    )
+
+    if (conflicts.length) {
+      return {
+        success: false,
+        message:
+          `Existing terminal keybindings would conflict in ${keybindingsFile}: ` + conflicts.map(c => c.key).join(', ')
+      }
+    }
+
+    let added = 0
+
+    for (const target of TARGET_BINDINGS.slice().reverse()) {
+      const exists = keybindings.some(existing => isKeybinding(existing) && sameBinding(existing, target))
+
+      if (!exists) {
+        keybindings.unshift(target)
+        added += 1
+      }
+    }
+
+    if (!added) {
+      return {
+        success: true,
+        message: `${meta.label} terminal keybindings already configured.`
+      }
+    }
+
+    if (hasExistingFile) {
+      await backupFile(keybindingsFile, ops)
+    }
+
+    await ops.writeFile(keybindingsFile, `${JSON.stringify(keybindings, null, 2)}\n`, 'utf8')
+
+    return {
+      success: true,
+      requiresRestart: true,
+      message: `Added ${added} ${meta.label} terminal keybinding${added === 1 ? '' : 's'} in ${keybindingsFile}`
+    }
+  } catch (error) {
+    return {
+      success: false,
+      message: `Failed to configure ${meta.label} terminal shortcuts: ${error}`
+    }
+  }
+}
+
+export async function configureDetectedTerminalKeybindings(options?: {
+  env?: NodeJS.ProcessEnv
+  fileOps?: Partial<FileOps>
+  homeDir?: string
+  platform?: NodeJS.Platform
+}): Promise<TerminalSetupResult> {
+  const detected = detectVSCodeLikeTerminal(options?.env ?? process.env)
+
+  if (!detected) {
+    return {
+      success: false,
+      message: 'No supported IDE terminal detected. Supported: VS Code, Cursor, Windsurf.'
+    }
+  }
+
+  return configureTerminalKeybindings(detected, options)
+}
+
+export async function shouldPromptForTerminalSetup(options?: {
+  env?: NodeJS.ProcessEnv
+  fileOps?: Partial<FileOps>
+  homeDir?: string
+  platform?: NodeJS.Platform
+}): Promise<boolean> {
+  const env = options?.env ?? process.env
+  const detected = detectVSCodeLikeTerminal(env)
+
+  if (!detected || isRemoteShellSession(env)) {
+    return false
+  }
+
+  const platform = options?.platform ?? process.platform
+  const homeDir = options?.homeDir ?? homedir()
+  const ops: FileOps = { ...DEFAULT_FILE_OPS, ...(options?.fileOps ?? {}) }
+  const meta = TERMINAL_META[detected]
+  const configDir = getVSCodeStyleConfigDir(meta.appName, platform, env, homeDir)
+
+  if (!configDir) {
+    return false
+  }
+
+  try {
+    const content = await ops.readFile(join(configDir, 'keybindings.json'), 'utf8')
+    const parsed: unknown = JSON.parse(stripJsonComments(content))
+
+    if (!Array.isArray(parsed)) {
+      return true
+    }
+
+    return TARGET_BINDINGS.some(
+      target => !parsed.some(existing => isKeybinding(existing) && sameBinding(existing, target))
+    )
+  } catch {
+    return true
+  }
+}
diff --git a/ui-tui/src/lib/text.ts b/ui-tui/src/lib/text.ts
index fb10d7d2d..8541ac3f6 100644
--- a/ui-tui/src/lib/text.ts
+++ b/ui-tui/src/lib/text.ts
@@ -25,9 +25,9 @@ const renderEstimateLine = (line: string) => {
     .replace(/\[(.+?)\]\((https?:\/\/[^\s)]+)\)/g, '$1')
     .replace(/`([^`]+)`/g, '$1')
     .replace(/\*\*(.+?)\*\*/g, '$1')
-    .replace(/__(.+?)__/g, '$1')
+    .replace(/(?<!\w)__(.+?)__(?!\w)/g, '$1')
     .replace(/\*(.+?)\*/g, '$1')
-    .replace(/_(.+?)_/g, '$1')
+    .replace(/(?<!\w)_(.+?)_(?!\w)/g, '$1')
     .replace(/~~(.+?)~~/g, '$1')
     .replace(/==(.+?)==/g, '$1')
     .replace(/\[\^([^\]]+)\]/g, '[$1]')
diff --git a/ui-tui/src/theme.ts b/ui-tui/src/theme.ts
index 122907895..daeedb337 100644
--- a/ui-tui/src/theme.ts
+++ b/ui-tui/src/theme.ts
@@ -94,7 +94,12 @@ export const DARK_THEME: Theme = {
     amber: '#FFBF00',
     bronze: '#CD7F32',
     cornsilk: '#FFF8DC',
-    dim: '#B8860B',
+    // Bumped from the old `#B8860B` darkgoldenrod (~53% luminance) which
+    // read as barely-visible on dark terminals for long body text.  The
+    // new value sits ~60% luminance — readable without losing the "muted /
+    // secondary" semantic.  Field labels still use `label` (65%) which
+    // stays brighter so hierarchy holds.
+    dim: '#CC9B1F',
     completionBg: '#FFFFFF',
     completionCurrentBg: mix('#FFFFFF', '#FFBF00', 0.25),
 
@@ -104,8 +109,11 @@ export const DARK_THEME: Theme = {
     warn: '#ffa726',
 
     prompt: '#FFF8DC',
-    sessionLabel: '#B8860B',
-    sessionBorder: '#B8860B',
+    // sessionLabel/sessionBorder intentionally track the `dim` value — they
+    // are "same role, same colour" by design.  fromSkin's banner_dim fallback
+    // relies on this pairing (#11300).
+    sessionLabel: '#CC9B1F',
+    sessionBorder: '#CC9B1F',
 
     statusBg: '#1a1a2e',
     statusFg: '#C0C0C0',
diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts
index 3045a74a8..63d6c6d4f 100644
--- a/ui-tui/src/types.ts
+++ b/ui-tui/src/types.ts
@@ -12,16 +12,72 @@ export interface ActivityItem {
 }
 
 export interface SubagentProgress {
+  apiCalls?: number
+  costUsd?: number
+  depth: number
   durationSeconds?: number
+  filesRead?: string[]
+  filesWritten?: string[]
   goal: string
   id: string
   index: number
+  inputTokens?: number
+  iteration?: number
+  model?: string
   notes: string[]
-  status: 'completed' | 'failed' | 'interrupted' | 'running'
+  outputTail?: SubagentOutputEntry[]
+  outputTokens?: number
+  parentId: null | string
+  reasoningTokens?: number
+  startedAt?: number
+  status: 'completed' | 'failed' | 'interrupted' | 'queued' | 'running'
   summary?: string
   taskCount: number
   thinking: string[]
+  toolCount: number
   tools: string[]
+  toolsets?: string[]
+}
+
+export interface SubagentOutputEntry {
+  isError: boolean
+  preview: string
+  tool: string
+}
+
+export interface SubagentNode {
+  aggregate: SubagentAggregate
+  children: SubagentNode[]
+  item: SubagentProgress
+}
+
+export interface SubagentAggregate {
+  activeCount: number
+  costUsd: number
+  descendantCount: number
+  filesTouched: number
+  hotness: number
+  inputTokens: number
+  maxDepthFromHere: number
+  outputTokens: number
+  totalDuration: number
+  totalTools: number
+}
+
+export interface DelegationStatus {
+  active: {
+    depth?: number
+    goal?: string
+    model?: null | string
+    parent_id?: null | string
+    started_at?: number
+    status?: string
+    subagent_id?: string
+    tool_count?: number
+  }[]
+  max_concurrent_children?: number
+  max_spawn_depth?: number
+  paused: boolean
 }
 
 export interface ApprovalReq {
diff --git a/ui-tui/src/types/hermes-ink.d.ts b/ui-tui/src/types/hermes-ink.d.ts
index 9f8987ad3..507be85a3 100644
--- a/ui-tui/src/types/hermes-ink.d.ts
+++ b/ui-tui/src/types/hermes-ink.d.ts
@@ -4,6 +4,7 @@ declare module '@hermes/ink' {
   export type Key = {
     readonly ctrl: boolean
     readonly meta: boolean
+    readonly super: boolean
     readonly shift: boolean
     readonly alt: boolean
     readonly upArrow: boolean
diff --git a/utils.py b/utils.py
index 6b998e223..f3d38006d 100644
--- a/utils.py
+++ b/utils.py
@@ -197,6 +197,39 @@ def env_bool(key: str, default: bool = False) -> bool:
     return is_truthy_value(os.getenv(key, ""), default=default)
 
 
+# ─── Proxy Helpers ────────────────────────────────────────────────────────────
+
+
+_PROXY_ENV_KEYS = (
+    "HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+    "https_proxy", "http_proxy", "all_proxy",
+)
+
+
+def normalize_proxy_url(proxy_url: str | None) -> str | None:
+    """Normalize proxy URLs for httpx/aiohttp compatibility.
+
+    WSL/Clash-style environments often export SOCKS proxies as
+    ``socks://127.0.0.1:PORT``. httpx rejects that alias and expects the
+    explicit ``socks5://`` scheme instead.
+    """
+    candidate = str(proxy_url or "").strip()
+    if not candidate:
+        return None
+    if candidate.lower().startswith("socks://"):
+        return f"socks5://{candidate[len('socks://'):]}"
+    return candidate
+
+
+def normalize_proxy_env_vars() -> None:
+    """Rewrite supported proxy env vars to canonical URL forms in-place."""
+    for key in _PROXY_ENV_KEYS:
+        value = os.getenv(key, "")
+        normalized = normalize_proxy_url(value)
+        if normalized and normalized != value:
+            os.environ[key] = normalized
+
+
 # ─── URL Parsing Helpers ──────────────────────────────────────────────────────
 
 
@@ -236,4 +269,3 @@ def base_url_host_matches(base_url: str, domain: str) -> bool:
     if not domain:
         return False
     return hostname == domain or hostname.endswith("." + domain)
-
diff --git a/uv.lock b/uv.lock
index 133bd3f78..33b5c6628 100644
--- a/uv.lock
+++ b/uv.lock
@@ -426,7 +426,7 @@ wheels = [
 [[package]]
 name = "atroposlib"
 version = "0.4.0"
-source = { git = "https://github.com/NousResearch/atropos.git#c421582b6f7ce8a32f751aab3117d3824ac8f709" }
+source = { git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30#c20c85256e5a45ad31edf8b7276e9c5ee1995a30" }
 dependencies = [
     { name = "aiofiles" },
     { name = "aiohttp" },
@@ -558,6 +558,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" },
 ]
 
+[[package]]
+name = "boto3"
+version = "1.42.92"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "botocore" },
+    { name = "jmespath" },
+    { name = "s3transfer" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e7/3b/84cafa37e85a57618554bd2bc21bd569417097f45f18c23ef488e6c69683/boto3-1.42.92.tar.gz", hash = "sha256:55ec6ef6fc81f46d567a7d1d398d1e5c375d468905d0ccd9e1f767f0c77dbe9b", size = 113207, upload-time = "2026-04-20T19:38:17.293Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8f/8f/350ffd50aaa515429464deb1dc85893a21a64cb41892feb6b22ce87304ad/boto3-1.42.92-py3-none-any.whl", hash = "sha256:c90d9a170faa0585755fa103a3cd9595e1f53443864e902c180f3d8177589125", size = 140555, upload-time = "2026-04-20T19:38:14.323Z" },
+]
+
+[[package]]
+name = "botocore"
+version = "1.42.92"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jmespath" },
+    { name = "python-dateutil" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d5/0a/6785ce224ba4483b3e1282d959e1dd2c2898823336f013464c43cb154036/botocore-1.42.92.tar.gz", hash = "sha256:f1193d3057a2d0267353d7ef4e136be37ea432336d097fcb1951fae566ca3a22", size = 15235239, upload-time = "2026-04-20T19:38:05.085Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/b8/41d4d7ba75a4fb4f11362e96371a12695bc6ba0bb7cc680137db0213f97e/botocore-1.42.92-py3-none-any.whl", hash = "sha256:09ddefddbb1565ceef4b44b4b6e61b1ca5f12701d1494ecc85c1133d1b1e81fb", size = 14916275, upload-time = "2026-04-20T19:38:01.684Z" },
+]
+
 [[package]]
 name = "cachetools"
 version = "5.5.2"
@@ -1838,7 +1866,7 @@ wheels = [
 
 [[package]]
 name = "hermes-agent"
-version = "0.9.0"
+version = "0.10.0"
 source = { editable = "." }
 dependencies = [
     { name = "anthropic" },
@@ -1871,6 +1899,7 @@ all = [
     { name = "aiosqlite", marker = "sys_platform == 'linux'" },
     { name = "alibabacloud-dingtalk" },
     { name = "asyncpg", marker = "sys_platform == 'linux'" },
+    { name = "boto3" },
     { name = "croniter" },
     { name = "daytona" },
     { name = "debugpy" },
@@ -1893,12 +1922,16 @@ all = [
     { name = "pytest-xdist" },
     { name = "python-telegram-bot", extra = ["webhooks"] },
     { name = "pywinpty", marker = "sys_platform == 'win32'" },
+    { name = "qrcode" },
     { name = "simple-term-menu" },
     { name = "slack-bolt" },
     { name = "slack-sdk" },
     { name = "sounddevice" },
     { name = "uvicorn", extra = ["standard"] },
 ]
+bedrock = [
+    { name = "boto3" },
+]
 cli = [
     { name = "simple-term-menu" },
 ]
@@ -1918,9 +1951,11 @@ dev = [
 dingtalk = [
     { name = "alibabacloud-dingtalk" },
     { name = "dingtalk-stream" },
+    { name = "qrcode" },
 ]
 feishu = [
     { name = "lark-oapi" },
+    { name = "qrcode" },
 ]
 homeassistant = [
     { name = "aiohttp" },
@@ -1941,6 +1976,7 @@ messaging = [
     { name = "aiohttp" },
     { name = "discord-py", extra = ["voice"] },
     { name = "python-telegram-bot", extra = ["webhooks"] },
+    { name = "qrcode" },
     { name = "slack-bolt" },
     { name = "slack-sdk" },
 ]
@@ -1974,6 +2010,7 @@ termux = [
     { name = "honcho-ai" },
     { name = "mcp" },
     { name = "ptyprocess", marker = "sys_platform != 'win32'" },
+    { name = "python-telegram-bot", extra = ["webhooks"] },
     { name = "pywinpty", marker = "sys_platform == 'win32'" },
     { name = "simple-term-menu" },
 ]
@@ -2003,7 +2040,8 @@ requires-dist = [
     { name = "alibabacloud-dingtalk", marker = "extra == 'dingtalk'", specifier = ">=2.0.0" },
     { name = "anthropic", specifier = ">=0.39.0,<1" },
     { name = "asyncpg", marker = "extra == 'matrix'", specifier = ">=0.29" },
-    { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git" },
+    { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30" },
+    { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.35.0,<2" },
     { name = "croniter", marker = "extra == 'cron'", specifier = ">=6.0.0,<7" },
     { name = "daytona", marker = "extra == 'daytona'", specifier = ">=0.148.0,<1" },
     { name = "debugpy", marker = "extra == 'dev'", specifier = ">=1.8.0,<2" },
@@ -2020,6 +2058,7 @@ requires-dist = [
     { name = "firecrawl-py", specifier = ">=4.16.0,<5" },
     { name = "hermes-agent", extras = ["acp"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["acp"], marker = "extra == 'termux'" },
+    { name = "hermes-agent", extras = ["bedrock"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["cli"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["cli"], marker = "extra == 'termux'" },
     { name = "hermes-agent", extras = ["cron"], marker = "extra == 'all'" },
@@ -2066,8 +2105,12 @@ requires-dist = [
     { name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.0,<4" },
     { name = "python-dotenv", specifier = ">=1.2.1,<2" },
     { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'messaging'", specifier = ">=22.6,<23" },
+    { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'termux'", specifier = ">=22.6,<23" },
     { name = "pywinpty", marker = "sys_platform == 'win32' and extra == 'pty'", specifier = ">=2.0.0,<3" },
     { name = "pyyaml", specifier = ">=6.0.2,<7" },
+    { name = "qrcode", marker = "extra == 'dingtalk'", specifier = ">=7.0,<8" },
+    { name = "qrcode", marker = "extra == 'feishu'", specifier = ">=7.0,<8" },
+    { name = "qrcode", marker = "extra == 'messaging'", specifier = ">=7.0,<8" },
     { name = "requests", specifier = ">=2.33.0,<3" },
     { name = "rich", specifier = ">=14.3.3,<15" },
     { name = "simple-term-menu", marker = "extra == 'cli'", specifier = ">=1.0,<2" },
@@ -2077,13 +2120,13 @@ requires-dist = [
     { name = "slack-sdk", marker = "extra == 'slack'", specifier = ">=3.27.0,<4" },
     { name = "sounddevice", marker = "extra == 'voice'", specifier = ">=0.4.6,<1" },
     { name = "tenacity", specifier = ">=9.1.4,<10" },
-    { name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git" },
+    { name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b" },
     { name = "uvicorn", extras = ["standard"], marker = "extra == 'rl'", specifier = ">=0.24.0,<1" },
     { name = "uvicorn", extras = ["standard"], marker = "extra == 'web'", specifier = ">=0.24.0,<1" },
     { name = "wandb", marker = "extra == 'rl'", specifier = ">=0.15.0,<1" },
-    { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git" },
+    { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c" },
 ]
-provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "termux", "dingtalk", "feishu", "web", "rl", "yc-bench", "all"]
+provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "bedrock", "termux", "dingtalk", "feishu", "web", "rl", "yc-bench", "all"]
 
 [[package]]
 name = "hf-transfer"
@@ -2410,6 +2453,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" },
 ]
 
+[[package]]
+name = "jmespath"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" },
+]
+
 [[package]]
 name = "joblib"
 version = "1.5.3"
@@ -4109,6 +4161,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" },
 ]
 
+[[package]]
+name = "pypng"
+version = "0.20220715.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/93/cd/112f092ec27cca83e0516de0a3368dbd9128c187fb6b52aaaa7cde39c96d/pypng-0.20220715.0.tar.gz", hash = "sha256:739c433ba96f078315de54c0db975aee537cbc3e1d0ae4ed9aab0ca1e427e2c1", size = 128992, upload-time = "2022-07-15T14:11:05.301Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3e/b9/3766cc361d93edb2ce81e2e1f87dd98f314d7d513877a342d31b30741680/pypng-0.20220715.0-py3-none-any.whl", hash = "sha256:4a43e969b8f5aaafb2a415536c1a8ec7e341cd6a3f957fd5b5f32a4cfeed902c", size = 58057, upload-time = "2022-07-15T14:11:03.713Z" },
+]
+
 [[package]]
 name = "pytest"
 version = "9.0.2"
@@ -4311,6 +4372,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
 ]
 
+[[package]]
+name = "qrcode"
+version = "7.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "pypng" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/30/35/ad6d4c5a547fe9a5baf85a9edbafff93fc6394b014fab30595877305fa59/qrcode-7.4.2.tar.gz", hash = "sha256:9dd969454827e127dbd93696b20747239e6d540e082937c90f14ac95b30f5845", size = 535974, upload-time = "2023-02-05T22:11:46.548Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/24/79/aaf0c1c7214f2632badb2771d770b1500d3d7cbdf2590ae62e721ec50584/qrcode-7.4.2-py3-none-any.whl", hash = "sha256:581dca7a029bcb2deef5d01068e39093e80ef00b4a61098a2182eac59d01643a", size = 46197, upload-time = "2023-02-05T22:11:43.4Z" },
+]
+
 [[package]]
 name = "referencing"
 version = "0.37.0"
@@ -4577,6 +4652,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" },
 ]
 
+[[package]]
+name = "s3transfer"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "botocore" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" },
+]
+
 [[package]]
 name = "safetensors"
 version = "0.7.0"
@@ -4927,8 +5014,8 @@ wheels = [
 
 [[package]]
 name = "tinker"
-version = "0.16.1"
-source = { git = "https://github.com/thinking-machines-lab/tinker.git#07bd3c2dd3cd4398ac1c26f0ec0deccbf3c1f913" }
+version = "0.18.0"
+source = { git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b#30517b667f18a3dfb7ef33fb56cf686d5820ba2b" }
 dependencies = [
     { name = "anyio" },
     { name = "click" },
@@ -5653,7 +5740,7 @@ wheels = [
 [[package]]
 name = "yc-bench"
 version = "0.1.0"
-source = { git = "https://github.com/collinear-ai/yc-bench.git#0c53c98f01a431db2e391482bc46013045854ab2" }
+source = { git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c#bfb0c88062450f46341bd9a5298903fc2e952a5c" }
 dependencies = [
     { name = "litellm", marker = "python_full_version >= '3.12'" },
     { name = "matplotlib", marker = "python_full_version >= '3.12'" },
diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts
index 5b43354ec..f6d7aebde 100644
--- a/web/src/i18n/en.ts
+++ b/web/src/i18n/en.ts
@@ -66,27 +66,36 @@ export const en: Translations = {
   },
 
   status: {
+    actionFailed: "Action failed",
+    actionFinished: "Finished",
+    actions: "Actions",
     agent: "Agent",
-    gateway: "Gateway",
     activeSessions: "Active Sessions",
-    recentSessions: "Recent Sessions",
-    connectedPlatforms: "Connected Platforms",
-    running: "Running",
-    starting: "Starting",
-    failed: "Failed",
-    stopped: "Stopped",
     connected: "Connected",
+    connectedPlatforms: "Connected Platforms",
     disconnected: "Disconnected",
     error: "Error",
-    notRunning: "Not running",
-    startFailed: "Start failed",
-    pid: "PID",
-    runningRemote: "Running (remote)",
-    noneRunning: "None",
+    failed: "Failed",
+    gateway: "Gateway",
     gatewayFailedToStart: "Gateway failed to start",
     lastUpdate: "Last update",
-    platformError: "error",
+    noneRunning: "None",
+    notRunning: "Not running",
+    pid: "PID",
     platformDisconnected: "disconnected",
+    platformError: "error",
+    recentSessions: "Recent Sessions",
+    restartGateway: "Restart Gateway",
+    restartingGateway: "Restarting gateway…",
+    running: "Running",
+    runningRemote: "Running (remote)",
+    startFailed: "Start failed",
+    starting: "Starting",
+    startedInBackground: "Started in background — check logs for progress",
+    stopped: "Stopped",
+    updateHermes: "Update Hermes",
+    updatingHermes: "Updating Hermes…",
+    waitingForOutput: "Waiting for output…",
   },
 
   sessions: {
diff --git a/web/src/i18n/types.ts b/web/src/i18n/types.ts
index 2a6414934..30b80bbe3 100644
--- a/web/src/i18n/types.ts
+++ b/web/src/i18n/types.ts
@@ -69,27 +69,36 @@ export interface Translations {
 
   // ── Status page ──
   status: {
+    actionFailed: string;
+    actionFinished: string;
+    actions: string;
     agent: string;
-    gateway: string;
-    activeSessions: string;
-    recentSessions: string;
-    connectedPlatforms: string;
-    running: string;
-    starting: string;
-    failed: string;
-    stopped: string;
     connected: string;
+    connectedPlatforms: string;
     disconnected: string;
     error: string;
-    notRunning: string;
-    startFailed: string;
-    pid: string;
-    runningRemote: string;
-    noneRunning: string;
+    failed: string;
+    gateway: string;
     gatewayFailedToStart: string;
     lastUpdate: string;
-    platformError: string;
+    noneRunning: string;
+    notRunning: string;
+    pid: string;
     platformDisconnected: string;
+    platformError: string;
+    activeSessions: string;
+    recentSessions: string;
+    restartGateway: string;
+    restartingGateway: string;
+    running: string;
+    runningRemote: string;
+    startFailed: string;
+    starting: string;
+    startedInBackground: string;
+    stopped: string;
+    updateHermes: string;
+    updatingHermes: string;
+    waitingForOutput: string;
   };
 
   // ── Sessions page ──
diff --git a/web/src/i18n/zh.ts b/web/src/i18n/zh.ts
index e29e45d56..9252254ab 100644
--- a/web/src/i18n/zh.ts
+++ b/web/src/i18n/zh.ts
@@ -66,27 +66,36 @@ export const zh: Translations = {
   },
 
   status: {
+    actionFailed: "操作失败",
+    actionFinished: "已完成",
+    actions: "操作",
     agent: "代理",
-    gateway: "网关",
     activeSessions: "活跃会话",
-    recentSessions: "最近会话",
-    connectedPlatforms: "已连接平台",
-    running: "运行中",
-    starting: "启动中",
-    failed: "失败",
-    stopped: "已停止",
     connected: "已连接",
+    connectedPlatforms: "已连接平台",
     disconnected: "已断开",
     error: "错误",
-    notRunning: "未运行",
-    startFailed: "启动失败",
-    pid: "进程",
-    runningRemote: "运行中（远程）",
-    noneRunning: "无",
+    failed: "失败",
+    gateway: "网关",
     gatewayFailedToStart: "网关启动失败",
     lastUpdate: "最后更新",
-    platformError: "错误",
+    noneRunning: "无",
+    notRunning: "未运行",
+    pid: "进程",
     platformDisconnected: "已断开",
+    platformError: "错误",
+    recentSessions: "最近会话",
+    restartGateway: "重启网关",
+    restartingGateway: "正在重启网关…",
+    running: "运行中",
+    runningRemote: "运行中（远程）",
+    startFailed: "启动失败",
+    starting: "启动中",
+    startedInBackground: "已在后台启动 — 请查看日志",
+    stopped: "已停止",
+    updateHermes: "更新 Hermes",
+    updatingHermes: "正在更新 Hermes…",
+    waitingForOutput: "等待输出…",
   },
 
   sessions: {
diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts
index 4d3960406..04951c02b 100644
--- a/web/src/lib/api.ts
+++ b/web/src/lib/api.ts
@@ -183,6 +183,16 @@ export const api = {
     );
   },
 
+  // Gateway / update actions
+  restartGateway: () =>
+    fetchJSON<ActionResponse>("/api/gateway/restart", { method: "POST" }),
+  updateHermes: () =>
+    fetchJSON<ActionResponse>("/api/hermes/update", { method: "POST" }),
+  getActionStatus: (name: string, lines = 200) =>
+    fetchJSON<ActionStatusResponse>(
+      `/api/actions/${encodeURIComponent(name)}/status?lines=${lines}`,
+    ),
+
   // Dashboard plugins
   getPlugins: () =>
     fetchJSON<PluginManifestResponse[]>("/api/dashboard/plugins"),
@@ -200,6 +210,20 @@ export const api = {
     }),
 };
 
+export interface ActionResponse {
+  name: string;
+  ok: boolean;
+  pid: number;
+}
+
+export interface ActionStatusResponse {
+  exit_code: number | null;
+  lines: string[];
+  name: string;
+  pid: number | null;
+  running: boolean;
+}
+
 export interface PlatformStatus {
   error_code?: string;
   error_message?: string;
@@ -290,6 +314,7 @@ export interface AnalyticsDailyEntry {
   estimated_cost: number;
   actual_cost: number;
   sessions: number;
+  api_calls: number;
 }
 
 export interface AnalyticsModelEntry {
@@ -298,6 +323,7 @@ export interface AnalyticsModelEntry {
   output_tokens: number;
   estimated_cost: number;
   sessions: number;
+  api_calls: number;
 }
 
 export interface AnalyticsSkillEntry {
@@ -327,6 +353,7 @@ export interface AnalyticsResponse {
     total_estimated_cost: number;
     total_actual_cost: number;
     total_sessions: number;
+    total_api_calls: number;
   };
   skills: {
     summary: AnalyticsSkillsSummary;
diff --git a/web/src/pages/AnalyticsPage.tsx b/web/src/pages/AnalyticsPage.tsx
index c9efd70ac..92384e137 100644
--- a/web/src/pages/AnalyticsPage.tsx
+++ b/web/src/pages/AnalyticsPage.tsx
@@ -347,7 +347,7 @@ export default function AnalyticsPage() {
             <SummaryCard
               icon={TrendingUp}
               label={t.analytics.apiCalls}
-              value={String(data.daily.reduce((sum, d) => sum + d.sessions, 0))}
+              value={String(data.totals.total_api_calls ?? data.daily.reduce((sum, d) => sum + d.sessions, 0))}
               sub={t.analytics.acrossModels.replace("{count}", String(data.by_model.length))}
             />
           </div>
diff --git a/web/src/pages/SkillsPage.tsx b/web/src/pages/SkillsPage.tsx
index d7414937b..a007b3b9e 100644
--- a/web/src/pages/SkillsPage.tsx
+++ b/web/src/pages/SkillsPage.tsx
@@ -378,7 +378,6 @@ export default function SkillsPage() {
                       : t.skills.all}
                   </CardTitle>
                   <Badge variant="secondary" className="text-[10px]">
-                    {activeSkills.length}{" "}
                     {t.skills.skillCount
                       .replace("{count}", String(activeSkills.length))
                       .replace("{s}", activeSkills.length !== 1 ? "s" : "")}
diff --git a/web/src/pages/StatusPage.tsx b/web/src/pages/StatusPage.tsx
index 51e87e8e2..3c213b5cb 100644
--- a/web/src/pages/StatusPage.tsx
+++ b/web/src/pages/StatusPage.tsx
@@ -1,25 +1,54 @@
-import { useEffect, useState } from "react";
+import { useEffect, useRef, useState } from "react";
 import {
   Activity,
   AlertTriangle,
+  CheckCircle2,
   Clock,
   Cpu,
   Database,
+  Download,
+  Loader2,
   Radio,
+  RotateCw,
   Wifi,
   WifiOff,
+  Wrench,
+  X,
 } from "lucide-react";
 import { Cell, Grid } from "@nous-research/ui";
 import { api } from "@/lib/api";
-import type { PlatformStatus, SessionInfo, StatusResponse } from "@/lib/api";
-import { timeAgo, isoTimeAgo } from "@/lib/utils";
+import type {
+  ActionStatusResponse,
+  PlatformStatus,
+  SessionInfo,
+  StatusResponse,
+} from "@/lib/api";
+import { cn, timeAgo, isoTimeAgo } from "@/lib/utils";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Toast } from "@/components/Toast";
 import { useI18n } from "@/i18n";
 
+const ACTION_NAMES: Record<"restart" | "update", string> = {
+  restart: "gateway-restart",
+  update: "hermes-update",
+};
+
 export default function StatusPage() {
   const [status, setStatus] = useState<StatusResponse | null>(null);
   const [sessions, setSessions] = useState<SessionInfo[]>([]);
+  const [pendingAction, setPendingAction] = useState<
+    "restart" | "update" | null
+  >(null);
+  const [activeAction, setActiveAction] = useState<"restart" | "update" | null>(
+    null,
+  );
+  const [actionStatus, setActionStatus] = useState<ActionStatusResponse | null>(
+    null,
+  );
+  const [toast, setToast] = useState<ToastState | null>(null);
+  const logScrollRef = useRef<HTMLPreElement | null>(null);
   const { t } = useI18n();
 
   useEffect(() => {
@@ -38,6 +67,75 @@ export default function StatusPage() {
     return () => clearInterval(interval);
   }, []);
 
+  useEffect(() => {
+    if (!toast) return;
+    const timer = setTimeout(() => setToast(null), 4000);
+    return () => clearTimeout(timer);
+  }, [toast]);
+
+  useEffect(() => {
+    if (!activeAction) return;
+    const name = ACTION_NAMES[activeAction];
+    let cancelled = false;
+
+    const poll = async () => {
+      try {
+        const resp = await api.getActionStatus(name);
+        if (cancelled) return;
+        setActionStatus(resp);
+        if (!resp.running) {
+          const ok = resp.exit_code === 0;
+          setToast({
+            type: ok ? "success" : "error",
+            message: ok
+              ? t.status.actionFinished
+              : `${t.status.actionFailed} (exit ${resp.exit_code ?? "?"})`,
+          });
+          return;
+        }
+      } catch {
+        // transient fetch error; keep polling
+      }
+      if (!cancelled) setTimeout(poll, 1500);
+    };
+
+    poll();
+    return () => {
+      cancelled = true;
+    };
+  }, [activeAction, t.status.actionFinished, t.status.actionFailed]);
+
+  useEffect(() => {
+    const el = logScrollRef.current;
+    if (el) el.scrollTop = el.scrollHeight;
+  }, [actionStatus?.lines]);
+
+  const runAction = async (action: "restart" | "update") => {
+    setPendingAction(action);
+    setActionStatus(null);
+    try {
+      if (action === "restart") {
+        await api.restartGateway();
+      } else {
+        await api.updateHermes();
+      }
+      setActiveAction(action);
+    } catch (err) {
+      const detail = err instanceof Error ? err.message : String(err);
+      setToast({
+        type: "error",
+        message: `${t.status.actionFailed}: ${detail}`,
+      });
+    } finally {
+      setPendingAction(null);
+    }
+  };
+
+  const dismissLog = () => {
+    setActiveAction(null);
+    setActionStatus(null);
+  };
+
   if (!status) {
     return (
       <div className="flex items-center justify-center py-24">
@@ -144,6 +242,8 @@ export default function StatusPage() {
 
   return (
     <div className="flex flex-col gap-6">
+      <Toast toast={toast} />
+
       {alerts.length > 0 && (
         <div className="border border-destructive/30 bg-destructive/[0.06] p-4">
           <div className="flex items-start gap-3">
@@ -166,7 +266,7 @@ export default function StatusPage() {
         </div>
       )}
 
-      <Grid className="border-b lg:!grid-cols-3">
+      <Grid className="border-b md:!grid-cols-2 lg:!grid-cols-4">
         {items.map(({ icon: Icon, label, value, badgeText, badgeVariant }) => (
           <Cell
             key={label}
@@ -194,8 +294,130 @@ export default function StatusPage() {
             )}
           </Cell>
         ))}
+
+        <Cell className="flex min-w-0 flex-col gap-2 overflow-hidden">
+          <div className="flex items-center justify-between">
+            <CardTitle className="text-sm font-medium">
+              {t.status.actions}
+            </CardTitle>
+            <Wrench className="h-4 w-4 text-muted-foreground" />
+          </div>
+
+          <div className="flex gap-4">
+            <Button
+              variant="outline"
+              size="sm"
+              onClick={() => runAction("restart")}
+              disabled={
+                pendingAction !== null ||
+                (activeAction !== null && actionStatus?.running !== false)
+              }
+              className="flex-1 min-w-0"
+            >
+              <RotateCw
+                className={cn(
+                  "h-3.5 w-3.5",
+                  (pendingAction === "restart" ||
+                    (activeAction === "restart" && actionStatus?.running)) &&
+                    "animate-spin",
+                )}
+              />
+
+              {activeAction === "restart" && actionStatus?.running
+                ? t.status.restartingGateway
+                : t.status.restartGateway}
+            </Button>
+
+            <Button
+              variant="outline"
+              size="sm"
+              onClick={() => runAction("update")}
+              disabled={
+                pendingAction !== null ||
+                (activeAction !== null && actionStatus?.running !== false)
+              }
+              className="flex-1 min-w-0"
+            >
+              <Download
+                className={cn(
+                  "h-3.5 w-3.5",
+                  (pendingAction === "update" ||
+                    (activeAction === "update" && actionStatus?.running)) &&
+                    "animate-pulse",
+                )}
+              />
+
+              {activeAction === "update" && actionStatus?.running
+                ? t.status.updatingHermes
+                : t.status.updateHermes}
+            </Button>
+          </div>
+        </Cell>
       </Grid>
 
+      {activeAction && (
+        <div className="border border-border bg-background-base/50">
+          <div className="flex items-center justify-between gap-2 border-b border-border px-3 py-2">
+            <div className="flex items-center gap-2 min-w-0">
+              {actionStatus?.running ? (
+                <Loader2 className="h-3.5 w-3.5 shrink-0 animate-spin text-warning" />
+              ) : actionStatus?.exit_code === 0 ? (
+                <CheckCircle2 className="h-3.5 w-3.5 shrink-0 text-success" />
+              ) : actionStatus !== null ? (
+                <AlertTriangle className="h-3.5 w-3.5 shrink-0 text-destructive" />
+              ) : (
+                <Loader2 className="h-3.5 w-3.5 shrink-0 animate-spin text-muted-foreground" />
+              )}
+
+              <span className="text-xs font-mondwest tracking-[0.12em] truncate">
+                {activeAction === "restart"
+                  ? t.status.restartGateway
+                  : t.status.updateHermes}
+              </span>
+
+              <Badge
+                variant={
+                  actionStatus?.running
+                    ? "warning"
+                    : actionStatus?.exit_code === 0
+                      ? "success"
+                      : actionStatus
+                        ? "destructive"
+                        : "outline"
+                }
+                className="text-[10px] shrink-0"
+              >
+                {actionStatus?.running
+                  ? t.status.running
+                  : actionStatus?.exit_code === 0
+                    ? t.status.actionFinished
+                    : actionStatus
+                      ? `${t.status.actionFailed} (${actionStatus.exit_code ?? "?"})`
+                      : t.common.loading}
+              </Badge>
+            </div>
+
+            <button
+              type="button"
+              onClick={dismissLog}
+              className="shrink-0 opacity-60 hover:opacity-100 cursor-pointer"
+              aria-label={t.common.close}
+            >
+              <X className="h-3.5 w-3.5" />
+            </button>
+          </div>
+
+          <pre
+            ref={logScrollRef}
+            className="max-h-72 overflow-auto px-3 py-2 font-mono-ui text-[11px] leading-relaxed whitespace-pre-wrap break-all"
+          >
+            {actionStatus?.lines && actionStatus.lines.length > 0
+              ? actionStatus.lines.join("\n")
+              : t.status.waitingForOutput}
+          </pre>
+        </div>
+      )}
+
       {platforms.length > 0 && (
         <PlatformsCard
           platforms={platforms}
@@ -378,6 +600,11 @@ function PlatformsCard({ platforms, platformStateBadge }: PlatformsCardProps) {
   );
 }
 
+interface ToastState {
+  message: string;
+  type: "success" | "error";
+}
+
 interface PlatformsCardProps {
   platforms: [string, PlatformStatus][];
   platformStateBadge: Record<
diff --git a/website/docs/developer-guide/creating-skills.md b/website/docs/developer-guide/creating-skills.md
index 9fdb7fd11..43f088a9a 100644
--- a/website/docs/developer-guide/creating-skills.md
+++ b/website/docs/developer-guide/creating-skills.md
@@ -272,6 +272,45 @@ Put the most common workflow first. Edge cases and advanced usage go at the bott
 
 For XML/JSON parsing or complex logic, include helper scripts in `scripts/` — don't expect the LLM to write parsers inline every time.
 
+#### Referencing bundled scripts from SKILL.md
+
+When a skill is loaded, the activation message exposes the absolute skill directory as `[Skill directory: /abs/path]` and also substitutes two template tokens anywhere in the SKILL.md body:
+
+| Token | Replaced with |
+|---|---|
+| `${HERMES_SKILL_DIR}` | Absolute path to the skill's directory |
+| `${HERMES_SESSION_ID}` | The active session id (left in place if there is no session) |
+
+So a SKILL.md can tell the agent to run a bundled script directly with:
+
+```markdown
+To analyse the input, run:
+
+    node ${HERMES_SKILL_DIR}/scripts/analyse.js <input>
+```
+
+The agent sees the substituted absolute path and invokes the `terminal` tool with a ready-to-run command — no path math, no extra `skill_view` round-trip. Disable substitution globally with `skills.template_vars: false` in `config.yaml`.
+
+#### Inline shell snippets (opt-in)
+
+Skills can also embed inline shell snippets written as `` !`cmd` `` in the SKILL.md body. When enabled, each snippet's stdout is inlined into the message before the agent reads it, so skills can inject dynamic context:
+
+```markdown
+Current date: !`date -u +%Y-%m-%d`
+Git branch: !`git -C ${HERMES_SKILL_DIR} rev-parse --abbrev-ref HEAD`
+```
+
+This is **off by default** — any snippet in a SKILL.md runs on the host without approval, so only enable it for skill sources you trust:
+
+```yaml
+# config.yaml
+skills:
+  inline_shell: true
+  inline_shell_timeout: 10   # seconds per snippet
+```
+
+Snippets run with the skill directory as their working directory, and output is capped at 4000 characters. Failures (timeouts, non-zero exits) show up as a short `[inline-shell error: ...]` marker instead of breaking the whole skill.
+
 ### Test It
 
 Run the skill and verify the agent follows the instructions correctly:
diff --git a/website/docs/guides/delegation-patterns.md b/website/docs/guides/delegation-patterns.md
index 4de7ebbd9..11e276b12 100644
--- a/website/docs/guides/delegation-patterns.md
+++ b/website/docs/guides/delegation-patterns.md
@@ -216,10 +216,10 @@ Restricting toolsets keeps the subagent focused and prevents accidental side eff
 
 ## Constraints
 
-- **Default 3 parallel tasks** — batches default to 3 concurrent subagents (configurable via `delegation.max_concurrent_children` in config.yaml)
-- **No nesting** — subagents cannot call `delegate_task`, `clarify`, `memory`, `send_message`, or `execute_code`
+- **Default 3 parallel tasks** — batches default to 3 concurrent subagents (configurable via `delegation.max_concurrent_children` in config.yaml — no hard ceiling, only a floor of 1)
+- **Nested delegation is opt-in** — leaf subagents (default) cannot call `delegate_task`, `clarify`, `memory`, `send_message`, or `execute_code`. Orchestrator subagents (`role="orchestrator"`) retain `delegate_task` for further delegation, but only when `delegation.max_spawn_depth` is raised above the default of 1 (1-3 supported); the other four remain blocked. Disable globally via `delegation.orchestrator_enabled: false`.
 - **Separate terminals** — each subagent gets its own terminal session with separate working directory and state
-- **No conversation history** — subagents see only what you put in `goal` and `context`
+- **No conversation history** — subagents see only the `goal` and `context` the parent agent passes when calling `delegate_task`
 - **Default 50 iterations** — set `max_iterations` lower for simple tasks to save cost
 
 ---
diff --git a/website/docs/guides/use-voice-mode-with-hermes.md b/website/docs/guides/use-voice-mode-with-hermes.md
index 42b335559..d43c0a018 100644
--- a/website/docs/guides/use-voice-mode-with-hermes.md
+++ b/website/docs/guides/use-voice-mode-with-hermes.md
@@ -164,6 +164,7 @@ voice:
   record_key: "ctrl+b"
   max_recording_seconds: 120
   auto_tts: false
+  beep_enabled: true
   silence_threshold: 200
   silence_duration: 3.0
 
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 46ab98d48..886db482c 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -367,6 +367,7 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `HERMES_ENABLE_PROJECT_PLUGINS` | Enable auto-discovery of repo-local plugins from `./.hermes/plugins/` (`true`/`false`, default: `false`) |
 | `HERMES_BACKGROUND_NOTIFICATIONS` | Background process notification mode in gateway: `all` (default), `result`, `error`, `off` |
 | `HERMES_EPHEMERAL_SYSTEM_PROMPT` | Ephemeral system prompt injected at API-call time (never persisted to sessions) |
+| `DELEGATION_MAX_CONCURRENT_CHILDREN` | Max parallel subagents per `delegate_task` batch (default: `3`, floor of 1, no ceiling). Also configurable via `delegation.max_concurrent_children` in `config.yaml` — the config value takes priority. |
 
 ## Interface
 
diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md
index 132a4d00a..8a8b9df41 100644
--- a/website/docs/reference/faq.md
+++ b/website/docs/reference/faq.md
@@ -160,6 +160,33 @@ brew install python@3.12      # macOS
 
 The installer handles this automatically — if you see this error during manual installation, upgrade Python first.
 
+#### Terminal commands say `node: command not found` (or `nvm`, `pyenv`, `asdf`, …)
+
+**Cause:** Hermes builds a per-session environment snapshot by running `bash -l` once at startup. A bash login shell reads `/etc/profile`, `~/.bash_profile`, and `~/.profile`, but **does not source `~/.bashrc`** — so tools that install themselves there (`nvm`, `asdf`, `pyenv`, `cargo`, custom `PATH` exports) stay invisible to the snapshot. This most commonly happens when Hermes runs under systemd or in a minimal shell where nothing has pre-loaded the interactive shell profile.
+
+**Solution:** Hermes auto-sources `~/.bashrc` by default. If that's not enough — e.g. you're a zsh user whose PATH lives in `~/.zshrc`, or you init `nvm` from a standalone file — list the extra files to source in `~/.hermes/config.yaml`:
+
+```yaml
+terminal:
+  shell_init_files:
+    - ~/.zshrc                     # zsh users: pulls zsh-managed PATH into the bash snapshot
+    - ~/.nvm/nvm.sh                # direct nvm init (works regardless of shell)
+    - /etc/profile.d/cargo.sh      # system-wide rc files
+  # When this list is set, the default ~/.bashrc auto-source is NOT added —
+  # include it explicitly if you want both:
+  #   - ~/.bashrc
+  #   - ~/.zshrc
+```
+
+Missing files are skipped silently. Sourcing happens in bash, so files that rely on zsh-only syntax may error — if that's a concern, source just the PATH-setting portion (e.g. nvm's `nvm.sh` directly) rather than the whole rc file.
+
+To disable the auto-source behaviour (strict login-shell semantics only):
+
+```yaml
+terminal:
+  auto_source_bashrc: false
+```
+
 #### `uv: command not found`
 
 **Cause:** The `uv` package manager isn't installed or not in PATH.
diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index 9cb1f386b..ab48e036d 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -58,6 +58,12 @@ hermes skills uninstall <skill-name>
 | **meme-generation** | Generate real meme images by picking a template and overlaying text with Pillow. Produces actual `.png` meme files. |
 | **touchdesigner-mcp** | Control a running TouchDesigner instance via the twozero MCP plugin — create operators, set parameters, wire connections, execute Python, build real-time audio-reactive visuals and GLSL networks. 36 native tools. |
 
+## Dogfood
+
+| Skill | Description |
+|-------|-------------|
+| **adversarial-ux-test** | Roleplay the most difficult, tech-resistant user for a product — browse in-persona, rant, then filter through a RED/YELLOW/WHITE/GREEN pragmatism layer so only real UX friction becomes tickets. |
+
 ## DevOps
 
 | Skill | Description |
diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md
index 46c29929f..301d7ee54 100644
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@@ -69,6 +69,7 @@ Internal dogfooding and QA skills used to test Hermes Agent itself.
 | Skill | Description | Path |
 |-------|-------------|------|
 | `dogfood` | Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports | `dogfood` |
+| `adversarial-ux-test` | Roleplay the most difficult, tech-resistant user for a product — browse in-persona, rant, then filter through a RED/YELLOW/WHITE/GREEN pragmatism layer so only real UX friction becomes tickets. | `dogfood/adversarial-ux-test` |
 
 ## email
 
diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md
index 79453474f..bde142820 100644
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@@ -78,9 +78,10 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/usage` | Show token usage, cost breakdown, and session duration |
 | `/insights` | Show usage insights and analytics (last 30 days) |
 | `/platforms` (alias: `/gateway`) | Show gateway/messaging platform status |
-| `/paste` | Check clipboard for an image and attach it |
+| `/paste` | Attach a clipboard image |
 | `/copy [number]` | Copy the last assistant response to clipboard (or the Nth-from-last with a number). CLI-only. |
 | `/image <path>` | Attach a local image file for your next prompt. |
+| `/terminal-setup [auto\|vscode\|cursor\|windsurf]` | TUI-only: configure local VS Code-family terminal bindings for better multiline + undo/redo parity. |
 | `/debug` | Upload debug report (system info + logs) and get shareable links. Also available in messaging. |
 | `/profile` | Show active profile name and home directory |
 | `/gquota` | Show Google Gemini Code Assist quota usage with progress bars (only available when the `google-gemini-cli` provider is active). |
@@ -157,7 +158,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
 
 ## Notes
 
-- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/cron`, `/skills`, `/platforms`, `/paste`, `/image`, `/statusbar`, and `/plugins` are **CLI-only** commands.
+- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/cron`, `/skills`, `/platforms`, `/paste`, `/image`, `/terminal-setup`, `/statusbar`, and `/plugins` are **CLI-only** commands.
 - `/verbose` is **CLI-only by default**, but can be enabled for messaging platforms by setting `display.tool_progress_command: true` in `config.yaml`. When enabled, it cycles the `display.tool_progress` mode and saves to config.
 - `/sethome`, `/update`, `/restart`, `/approve`, `/deny`, and `/commands` are **messaging-only** commands.
 - `/status`, `/background`, `/voice`, `/reload-mcp`, `/rollback`, `/snapshot`, `/debug`, `/fast`, and `/yolo` work in **both** the CLI and the messaging gateway.
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 4eb0c56d9..1c491a48c 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -1049,6 +1049,7 @@ voice:
   record_key: "ctrl+b"         # Push-to-talk key inside the CLI
   max_recording_seconds: 120    # Hard stop for long recordings
   auto_tts: false               # Enable spoken replies automatically when /voice on
+  beep_enabled: true            # Play record start/stop beeps in CLI voice mode
   silence_threshold: 200        # RMS threshold for speech detection
   silence_duration: 3.0         # Seconds of silence before auto-stop
 ```
@@ -1331,6 +1332,9 @@ delegation:
   # provider: "openrouter"                  # Override provider (empty = inherit parent)
   # base_url: "http://localhost:1234/v1"    # Direct OpenAI-compatible endpoint (takes precedence over provider)
   # api_key: "local-key"                    # API key for base_url (falls back to OPENAI_API_KEY)
+  max_concurrent_children: 3                # Parallel children per batch (floor 1, no ceiling). Also via DELEGATION_MAX_CONCURRENT_CHILDREN env var.
+  max_spawn_depth: 1                        # Delegation tree depth cap (1-3, clamped). 1 = flat (default): parent spawns leaves that cannot delegate. 2 = orchestrator children can spawn leaf grandchildren. 3 = three levels.
+  orchestrator_enabled: true                # Global kill switch. When false, role="orchestrator" is ignored and every child is forced to leaf regardless of max_spawn_depth.
 ```
 
 **Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
@@ -1341,6 +1345,8 @@ The delegation provider uses the same credential resolution as CLI/gateway start
 
 **Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
 
+**Width and depth:** `max_concurrent_children` caps how many subagents run in parallel per batch (default `3`, floor of 1, no ceiling). Can also be set via the `DELEGATION_MAX_CONCURRENT_CHILDREN` env var. When the model submits a `tasks` array longer than the cap, `delegate_task` returns a tool error explaining the limit rather than silently truncating. `max_spawn_depth` controls the delegation tree depth (clamped to 1-3). At the default `1`, delegation is flat: children cannot spawn grandchildren, and passing `role="orchestrator"` silently degrades to `leaf`. Raise to `2` so orchestrator children can spawn leaf grandchildren; `3` for three-level trees. The agent opts into orchestration per call via `role="orchestrator"`; `orchestrator_enabled: false` forces every child back to leaf regardless. Cost scales multiplicatively — at `max_spawn_depth: 3` with `max_concurrent_children: 3`, the tree can reach 3×3×3 = 27 concurrent leaf agents. See [Subagent Delegation → Depth Limit and Nested Orchestration](features/delegation.md#depth-limit-and-nested-orchestration) for usage patterns.
+
 ## Clarify
 
 Configure the clarification prompt behavior:
diff --git a/website/docs/user-guide/features/delegation.md b/website/docs/user-guide/features/delegation.md
index 2e22bada3..1ab8f8cbd 100644
--- a/website/docs/user-guide/features/delegation.md
+++ b/website/docs/user-guide/features/delegation.md
@@ -20,7 +20,7 @@ delegate_task(
 
 ## Parallel Batch
 
-Up to 3 concurrent subagents:
+Up to 3 concurrent subagents by default (configurable, no hard ceiling):
 
 ```python
 delegate_task(tasks=[
@@ -33,10 +33,10 @@ delegate_task(tasks=[
 ## How Subagent Context Works
 
 :::warning Critical: Subagents Know Nothing
-Subagents start with a **completely fresh conversation**. They have zero knowledge of the parent's conversation history, prior tool calls, or anything discussed before delegation. The subagent's only context comes from the `goal` and `context` fields you provide.
+Subagents start with a **completely fresh conversation**. They have zero knowledge of the parent's conversation history, prior tool calls, or anything discussed before delegation. The subagent's only context comes from the `goal` and `context` fields the parent agent populates when it calls `delegate_task`.
 :::
 
-This means you must pass **everything** the subagent needs:
+This means the parent agent must pass **everything** the subagent needs in the call:
 
 ```python
 # BAD - subagent has no idea what "the error" is
@@ -121,8 +121,8 @@ delegate_task(
 
 When you provide a `tasks` array, subagents run in **parallel** using a thread pool:
 
-- **Maximum concurrency:** 3 tasks (the `tasks` array is truncated to 3 if longer)
-- **Thread pool:** Uses `ThreadPoolExecutor` with `MAX_CONCURRENT_CHILDREN = 3` workers
+- **Maximum concurrency:** 3 tasks by default (configurable via `delegation.max_concurrent_children` or the `DELEGATION_MAX_CONCURRENT_CHILDREN` env var; floor of 1, no hard ceiling). Batches larger than the limit return a tool error rather than being silently truncated.
+- **Thread pool:** Uses `ThreadPoolExecutor` with the configured concurrency limit as max workers
 - **Progress display:** In CLI mode, a tree-view shows tool calls from each subagent in real-time with per-task completion lines. In gateway mode, progress is batched and relayed to the parent's progress callback
 - **Result ordering:** Results are sorted by task index to match input order regardless of completion order
 - **Interrupt propagation:** Interrupting the parent (e.g., sending a new message) interrupts all active children
@@ -154,8 +154,8 @@ The `toolsets` parameter controls what tools the subagent has access to. Choose
 | `["file"]` | Read-only analysis, code review without execution |
 | `["terminal"]` | System administration, process management |
 
-Certain toolsets are **always blocked** for subagents regardless of what you specify:
-- `delegation` — no recursive delegation (prevents infinite spawning)
+Certain toolsets are blocked for subagents regardless of what you specify:
+- `delegation` — blocked for leaf subagents (the default). Retained for `role="orchestrator"` children, bounded by `max_spawn_depth` — see [Depth Limit and Nested Orchestration](#depth-limit-and-nested-orchestration) below.
 - `clarify` — subagents cannot interact with the user
 - `memory` — no writes to shared persistent memory
 - `code_execution` — children should reason step-by-step
@@ -173,16 +173,32 @@ delegate_task(
 )
 ```
 
-## Depth Limit
+## Depth Limit and Nested Orchestration
 
-Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children (depth 1), but children cannot delegate further. This prevents runaway recursive delegation chains.
+By default, delegation is **flat**: a parent (depth 0) spawns children (depth 1), and those children cannot delegate further. This prevents runaway recursive delegation.
+
+For multi-stage workflows (research → synthesis, or parallel orchestration over sub-problems), a parent can spawn **orchestrator** children that *can* delegate their own workers:
+
+```python
+delegate_task(
+    goal="Survey three code review approaches and recommend one",
+    role="orchestrator",  # Allows this child to spawn its own workers
+    context="...",
+)
+```
+
+- `role="leaf"` (default): child cannot delegate further — identical to the flat-delegation behavior.
+- `role="orchestrator"`: child retains the `delegation` toolset. Gated by `delegation.max_spawn_depth` (default **1** = flat, so `role="orchestrator"` is a no-op at defaults). Raise `max_spawn_depth` to 2 to allow orchestrator children to spawn leaf grandchildren; 3 for three levels (cap).
+- `delegation.orchestrator_enabled: false`: global kill switch that forces every child to `leaf` regardless of the `role` parameter.
+
+**Cost warning:** With `max_spawn_depth: 3` and `max_concurrent_children: 3`, the tree can reach 3×3×3 = 27 concurrent leaf agents. Each extra level multiplies spend — raise `max_spawn_depth` intentionally.
 
 ## Key Properties
 
 - Each subagent gets its **own terminal session** (separate from the parent)
-- **No nested delegation** — children cannot delegate further (no grandchildren)
-- Subagents **cannot** call: `delegate_task`, `clarify`, `memory`, `send_message`, `execute_code`
-- **Interrupt propagation** — interrupting the parent interrupts all active children
+- **Nested delegation is opt-in** — only `role="orchestrator"` children can delegate further, and only when `max_spawn_depth` is raised from its default of 1 (flat). Disable globally with `orchestrator_enabled: false`.
+- Leaf subagents **cannot** call: `delegate_task`, `clarify`, `memory`, `send_message`, `execute_code`. Orchestrator subagents retain `delegate_task` but still cannot use the other four.
+- **Interrupt propagation** — interrupting the parent interrupts all active children (including grandchildren under orchestrators)
 - Only the final summary enters the parent's context, keeping token usage efficient
 - Subagents inherit the parent's **API key, provider configuration, and credential pool** (enabling key rotation on rate limits)
 
@@ -193,7 +209,7 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
 | **Reasoning** | Full LLM reasoning loop | Just Python code execution |
 | **Context** | Fresh isolated conversation | No conversation, just script |
 | **Tool access** | All non-blocked tools with reasoning | 7 tools via RPC, no reasoning |
-| **Parallelism** | Up to 3 concurrent subagents | Single script |
+| **Parallelism** | 3 concurrent subagents by default (configurable) | Single script |
 | **Best for** | Complex tasks needing judgment | Mechanical multi-step pipelines |
 | **Token cost** | Higher (full LLM loop) | Lower (only stdout returned) |
 | **User interaction** | None (subagents can't clarify) | None |
@@ -206,7 +222,9 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
 # In ~/.hermes/config.yaml
 delegation:
   max_iterations: 50                        # Max turns per child (default: 50)
-  default_toolsets: ["terminal", "file", "web"]  # Default toolsets
+  # max_concurrent_children: 3              # Parallel children per batch (default: 3)
+  # max_spawn_depth: 1                      # Tree depth (1-3, default 1 = flat). Raise to 2 to allow orchestrator children to spawn leaves; 3 for three levels.
+  # orchestrator_enabled: true              # Disable to force all children to leaf role.
   model: "google/gemini-3-flash-preview"             # Optional provider/model override
   provider: "openrouter"                             # Optional built-in provider
 
diff --git a/website/docs/user-guide/features/image-generation.md b/website/docs/user-guide/features/image-generation.md
index 43abc6c20..118459429 100644
--- a/website/docs/user-guide/features/image-generation.md
+++ b/website/docs/user-guide/features/image-generation.md
@@ -1,13 +1,13 @@
 ---
 title: Image Generation
-description: Generate images via FAL.ai — 8 models including FLUX 2, GPT-Image, Nano Banana Pro, Ideogram, Recraft V4 Pro, and more, selectable via `hermes tools`.
+description: Generate images via FAL.ai — 9 models including FLUX 2, GPT Image (1.5 & 2), Nano Banana Pro, Ideogram, Recraft V4 Pro, and more, selectable via `hermes tools`.
 sidebar_label: Image Generation
 sidebar_position: 6
 ---
 
 # Image Generation
 
-Hermes Agent generates images from text prompts via FAL.ai. Eight models are supported out of the box, each with different speed, quality, and cost tradeoffs. The active model is user-configurable via `hermes tools` and persists in `config.yaml`.
+Hermes Agent generates images from text prompts via FAL.ai. Nine models are supported out of the box, each with different speed, quality, and cost tradeoffs. The active model is user-configurable via `hermes tools` and persists in `config.yaml`.
 
 ## Supported Models
 
@@ -18,6 +18,7 @@ Hermes Agent generates images from text prompts via FAL.ai. Eight models are sup
 | `fal-ai/z-image/turbo` | ~2s | Bilingual EN/CN, 6B params | $0.005/MP |
 | `fal-ai/nano-banana-pro` | ~8s | Gemini 3 Pro, reasoning depth, text rendering | $0.15/image (1K) |
 | `fal-ai/gpt-image-1.5` | ~15s | Prompt adherence | $0.034/image |
+| `fal-ai/gpt-image-2` | ~20s | SOTA text rendering + CJK, world-aware photorealism | $0.04–0.06/image |
 | `fal-ai/ideogram/v3` | ~5s | Best typography | $0.03–0.09/image |
 | `fal-ai/recraft/v4/pro/text-to-image` | ~8s | Design, brand systems, production-ready | $0.25/image |
 | `fal-ai/qwen-image` | ~12s | LLM-based, complex text | $0.02/MP |
@@ -65,7 +66,7 @@ image_gen:
 
 ### GPT-Image Quality
 
-The `fal-ai/gpt-image-1.5` request quality is pinned to `medium` (~$0.034/image at 1024×1024). We don't expose the `low` / `high` tiers as a user-facing option so that Nous Portal billing stays predictable across all users — the cost spread between tiers is ~22×. If you want a cheaper GPT-Image option, pick a different model; if you want higher quality, use Klein 9B or Imagen-class models.
+The `fal-ai/gpt-image-1.5` and `fal-ai/gpt-image-2` request quality is pinned to `medium` (~$0.034–$0.06/image at 1024×1024). We don't expose the `low` / `high` tiers as a user-facing option so that Nous Portal billing stays predictable across all users — the cost spread between tiers is 3–22×. If you want a cheaper option, pick Klein 9B or Z-Image Turbo; if you want higher quality, use Nano Banana Pro or Recraft V4 Pro.
 
 ## Usage
 
@@ -87,11 +88,13 @@ Make me a futuristic cityscape, landscape orientation
 
 Every model accepts the same three aspect ratios from the agent's perspective. Internally, each model's native size spec is filled in automatically:
 
-| Agent input | image_size (flux/z-image/qwen/recraft/ideogram) | aspect_ratio (nano-banana-pro) | image_size (gpt-image) |
-|---|---|---|---|
-| `landscape` | `landscape_16_9` | `16:9` | `1536x1024` |
-| `square` | `square_hd` | `1:1` | `1024x1024` |
-| `portrait` | `portrait_16_9` | `9:16` | `1024x1536` |
+| Agent input | image_size (flux/z-image/qwen/recraft/ideogram) | aspect_ratio (nano-banana-pro) | image_size (gpt-image-1.5) | image_size (gpt-image-2) |
+|---|---|---|---|---|
+| `landscape` | `landscape_16_9` | `16:9` | `1536x1024` | `landscape_4_3` (1024×768) |
+| `square` | `square_hd` | `1:1` | `1024x1024` | `square_hd` (1024×1024) |
+| `portrait` | `portrait_16_9` | `9:16` | `1024x1536` | `portrait_4_3` (768×1024) |
+
+GPT Image 2 maps to 4:3 presets rather than 16:9 because its minimum pixel count is 655,360 — the `landscape_16_9` preset (1024×576 = 589,824) would be rejected.
 
 This translation happens in `_build_fal_payload()` — agent code never has to know about per-model schema differences.
 
diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md
index d11c36657..afbdac5fc 100644
--- a/website/docs/user-guide/features/memory-providers.md
+++ b/website/docs/user-guide/features/memory-providers.md
@@ -359,7 +359,11 @@ The setup wizard installs dependencies automatically and only installs what's ne
 | `auto_retain` | `true` | Automatically retain conversation turns |
 | `auto_recall` | `true` | Automatically recall memories before each turn |
 | `retain_async` | `true` | Process retain asynchronously on the server |
-| `tags` | — | Tags applied when storing memories |
+| `retain_context` | `conversation between Hermes Agent and the User` | Context label for retained memories |
+| `retain_tags` | — | Default tags applied to retained memories; merged with per-call tool tags |
+| `retain_source` | — | Optional `metadata.source` attached to retained memories |
+| `retain_user_prefix` | `User` | Label used before user turns in auto-retained transcripts |
+| `retain_assistant_prefix` | `Assistant` | Label used before assistant turns in auto-retained transcripts |
 | `recall_tags` | — | Tags to filter on recall |
 
 See [plugin README](https://github.com/NousResearch/hermes-agent/blob/main/plugins/memory/hindsight/README.md) for the full configuration reference.
diff --git a/website/docs/user-guide/features/overview.md b/website/docs/user-guide/features/overview.md
index df3c26bec..ff45a54a4 100644
--- a/website/docs/user-guide/features/overview.md
+++ b/website/docs/user-guide/features/overview.md
@@ -20,7 +20,7 @@ Hermes Agent includes a rich set of capabilities that extend far beyond basic ch
 ## Automation
 
 - **[Scheduled Tasks (Cron)](cron.md)** — Schedule tasks to run automatically with natural language or cron expressions. Jobs can attach skills, deliver results to any platform, and support pause/resume/edit operations.
-- **[Subagent Delegation](delegation.md)** — The `delegate_task` tool spawns child agent instances with isolated context, restricted toolsets, and their own terminal sessions. Run up to 3 concurrent subagents for parallel workstreams.
+- **[Subagent Delegation](delegation.md)** — The `delegate_task` tool spawns child agent instances with isolated context, restricted toolsets, and their own terminal sessions. Run 3 concurrent subagents by default (configurable) for parallel workstreams.
 - **[Code Execution](code-execution.md)** — The `execute_code` tool lets the agent write Python scripts that call Hermes tools programmatically, collapsing multi-step workflows into a single LLM turn via sandboxed RPC execution.
 - **[Event Hooks](hooks.md)** — Run custom code at key lifecycle points. Gateway hooks handle logging, alerts, and webhooks; plugin hooks handle tool interception, metrics, and guardrails.
 - **[Batch Processing](batch-processing.md)** — Run the Hermes agent across hundreds or thousands of prompts in parallel, generating structured ShareGPT-format trajectory data for training data generation or evaluation.
diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md
index 6f7fc8950..2bf6430ff 100644
--- a/website/docs/user-guide/features/tts.md
+++ b/website/docs/user-guide/features/tts.md
@@ -14,7 +14,7 @@ If you have a paid [Nous Portal](https://portal.nousresearch.com) subscription,
 
 ## Text-to-Speech
 
-Convert text to speech with eight providers:
+Convert text to speech with nine providers:
 
 | Provider | Quality | Cost | API Key |
 |----------|---------|------|---------|
@@ -25,7 +25,8 @@ Convert text to speech with eight providers:
 | **Mistral (Voxtral TTS)** | Excellent | Paid | `MISTRAL_API_KEY` |
 | **Google Gemini TTS** | Excellent | Free tier | `GEMINI_API_KEY` |
 | **xAI TTS** | Excellent | Paid | `XAI_API_KEY` |
-| **NeuTTS** | Good | Free | None needed |
+| **NeuTTS** | Good | Free (local) | None needed |
+| **KittenTTS** | Good | Free (local) | None needed |
 
 ### Platform Delivery
 
@@ -41,7 +42,7 @@ Convert text to speech with eight providers:
 ```yaml
 # In ~/.hermes/config.yaml
 tts:
-  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "xai" | "neutts"
+  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "xai" | "neutts" | "kittentts"
   speed: 1.0                    # Global speed multiplier (provider-specific settings override this)
   edge:
     voice: "en-US-AriaNeural"   # 322 voices, 74 languages
@@ -77,6 +78,11 @@ tts:
     ref_text: ''
     model: neuphonic/neutts-air-q4-gguf
     device: cpu
+  kittentts:
+    model: KittenML/kitten-tts-nano-0.8-int8   # 25MB int8; also: kitten-tts-micro-0.8 (41MB), kitten-tts-mini-0.8 (80MB)
+    voice: Jasper                               # Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo
+    speed: 1.0                                  # 0.5 - 2.0
+    clean_text: true                            # Expand numbers, currencies, units
 ```
 
 **Speed control**: The global `tts.speed` value applies to all providers by default. Each provider can override it with its own `speed` setting (e.g., `tts.openai.speed: 1.5`). Provider-specific speed takes precedence over the global value. Default is `1.0` (normal speed).
@@ -91,6 +97,7 @@ Telegram voice bubbles require Opus/OGG audio format:
 - **Google Gemini TTS** outputs raw PCM and uses **ffmpeg** to encode Opus directly for Telegram voice bubbles
 - **xAI TTS** outputs MP3 and needs **ffmpeg** to convert for Telegram voice bubbles
 - **NeuTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles
+- **KittenTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles
 
 ```bash
 # Ubuntu/Debian
@@ -103,7 +110,7 @@ brew install ffmpeg
 sudo dnf install ffmpeg
 ```
 
-Without ffmpeg, Edge TTS, MiniMax TTS, and NeuTTS audio are sent as regular audio files (playable, but shown as a rectangular player instead of a voice bubble).
+Without ffmpeg, Edge TTS, MiniMax TTS, NeuTTS, and KittenTTS audio are sent as regular audio files (playable, but shown as a rectangular player instead of a voice bubble).
 
 :::tip
 If you want voice bubbles without installing ffmpeg, switch to the OpenAI, ElevenLabs, or Mistral provider.
diff --git a/website/docs/user-guide/features/vision.md b/website/docs/user-guide/features/vision.md
index 8257c186c..0ef77128d 100644
--- a/website/docs/user-guide/features/vision.md
+++ b/website/docs/user-guide/features/vision.md
@@ -27,50 +27,52 @@ How you attach an image depends on your terminal environment. Not all methods wo
 
 ### `/paste` Command
 
-**The most reliable method. Works everywhere.**
+**The most reliable explicit image-attach fallback.**
 
 ```
 /paste
 ```
 
-Type `/paste` and press Enter. Hermes checks your clipboard for an image and attaches it. This works in every environment because it explicitly calls the clipboard backend — no terminal keybinding interception to worry about.
+Type `/paste` and press Enter. Hermes checks your clipboard for an image and attaches it. This is the safest option when your terminal rewrites `Cmd+V`/`Ctrl+V`, or when you copied only an image and there is no bracketed-paste text payload to inspect.
 
-### Ctrl+V / Cmd+V (Bracketed Paste)
+### Ctrl+V / Cmd+V
 
-When you paste text that's on the clipboard alongside an image, Hermes automatically checks for an image too. This works when:
-- Your clipboard contains **both text and an image** (some apps put both on the clipboard when you copy)
-- Your terminal supports bracketed paste (most modern terminals do)
+Hermes now treats paste as a layered flow:
+- normal text paste first
+- native clipboard / OSC52 text fallback if the terminal did not deliver text cleanly
+- image attach when the clipboard or pasted payload resolves to an image or image path
+
+This means pasted macOS screenshot temp paths and `file://...` image URIs can attach immediately instead of sitting in the composer as raw text.
 
 :::warning
-If your clipboard has **only an image** (no text), Ctrl+V does nothing in most terminals. Terminals can only paste text — there's no standard mechanism to paste binary image data. Use `/paste` or Alt+V instead.
+If your clipboard has **only an image** (no text), terminals still cannot send binary image bytes directly. Use `/paste` as the explicit image-attach fallback.
 :::
 
-### Alt+V
+### `/terminal-setup` for VS Code / Cursor / Windsurf
 
-Alt key combinations pass through most terminal emulators (they're sent as ESC + key rather than being intercepted). Press `Alt+V` to check the clipboard for an image.
+If you run the TUI inside a local VS Code-family integrated terminal on macOS, Hermes can install the recommended `workbench.action.terminal.sendSequence` bindings for better multiline and undo/redo parity:
 
-:::caution
-**Does not work in VSCode's integrated terminal.** VSCode intercepts many Alt+key combos for its own UI. Use `/paste` instead.
-:::
+```text
+/terminal-setup
+```
 
-### Ctrl+V (Raw — Linux Only)
-
-On Linux desktop terminals (GNOME Terminal, Konsole, Alacritty, etc.), `Ctrl+V` is **not** the paste shortcut — `Ctrl+Shift+V` is. So `Ctrl+V` sends a raw byte to the application, and Hermes catches it to check the clipboard. This only works on Linux desktop terminals with X11 or Wayland clipboard access.
+This is especially useful when `Cmd+Enter`, `Cmd+Z`, or `Shift+Cmd+Z` are being intercepted by the IDE. Run it on the local machine only — not inside an SSH session.
 
 ## Platform Compatibility
 
-| Environment | `/paste` | Ctrl+V text+image | Alt+V | Notes |
+| Environment | `/paste` | Cmd/Ctrl+V | `/terminal-setup` | Notes |
 |---|:---:|:---:|:---:|---|
-| **macOS Terminal / iTerm2** | ✅ | ✅ | ✅ | Best experience — `osascript` always available |
-| **Linux X11 desktop** | ✅ | ✅ | ✅ | Requires `xclip` (`apt install xclip`) |
-| **Linux Wayland desktop** | ✅ | ✅ | ✅ | Requires `wl-paste` (`apt install wl-clipboard`) |
-| **WSL2 (Windows Terminal)** | ✅ | ✅¹ | ✅ | Uses `powershell.exe` — no extra install needed |
-| **VSCode Terminal (local)** | ✅ | ✅¹ | ❌ | VSCode intercepts Alt+key |
-| **VSCode Terminal (SSH)** | ❌² | ❌² | ❌ | Remote clipboard not accessible |
-| **SSH terminal (any)** | ❌² | ❌² | ❌² | Remote clipboard not accessible |
+| **macOS Terminal / iTerm2** | ✅ | ✅ | n/a | Best experience — native clipboard + screenshot-path recovery |
+| **Apple Terminal** | ✅ | ✅ | n/a | If Cmd+←/→/⌫ gets rewritten, use Ctrl+A / Ctrl+E / Ctrl+U fallbacks |
+| **Linux X11 desktop** | ✅ | ✅ | n/a | Requires `xclip` (`apt install xclip`) |
+| **Linux Wayland desktop** | ✅ | ✅ | n/a | Requires `wl-paste` (`apt install wl-clipboard`) |
+| **WSL2 (Windows Terminal)** | ✅ | ✅ | n/a | Uses `powershell.exe` — no extra install needed |
+| **VS Code / Cursor / Windsurf (local)** | ✅ | ✅ | ✅ | Recommended for better Cmd+Enter / undo / redo parity |
+| **VS Code / Cursor / Windsurf (SSH)** | ❌² | ❌² | ❌³ | Run `/terminal-setup` on the local machine instead |
+| **SSH terminal (any)** | ❌² | ❌² | n/a | Remote clipboard not accessible |
 
-¹ Only when clipboard has both text and an image (image-only clipboard = nothing happens)
 ² See [SSH & Remote Sessions](#ssh--remote-sessions) below
+³ The command writes local IDE keybindings and should not be run from the remote host
 
 ## Platform-Specific Setup
 
@@ -145,7 +147,9 @@ powershell.exe -NoProfile -Command "Add-Type -AssemblyName System.Windows.Forms;
 
 ## SSH & Remote Sessions
 
-**Clipboard paste does not work over SSH.** When you SSH into a remote machine, the Hermes CLI runs on the remote host. All clipboard tools (`xclip`, `wl-paste`, `powershell.exe`, `osascript`) read the clipboard of the machine they run on — which is the remote server, not your local machine. Your local clipboard is inaccessible from the remote side.
+**Clipboard image paste does not fully work over SSH.** When you SSH into a remote machine, the Hermes CLI runs on the remote host. Clipboard tools (`xclip`, `wl-paste`, `powershell.exe`, `osascript`) read the clipboard of the machine they run on — which is the remote server, not your local machine. Your local clipboard image is therefore inaccessible from the remote side.
+
+Text can sometimes still bridge through terminal paste or OSC52, but image clipboard access and local screenshot temp paths remain tied to the machine running Hermes.
 
 ### Workarounds for SSH
 
diff --git a/website/docs/user-guide/features/voice-mode.md b/website/docs/user-guide/features/voice-mode.md
index 2befd59e0..b82718cf0 100644
--- a/website/docs/user-guide/features/voice-mode.md
+++ b/website/docs/user-guide/features/voice-mode.md
@@ -149,7 +149,7 @@ Two-stage algorithm detects when you've finished speaking:
 
 If no speech is detected at all for 15 seconds, recording stops automatically.
 
-Both `silence_threshold` and `silence_duration` are configurable in `config.yaml`.
+Both `silence_threshold` and `silence_duration` are configurable in `config.yaml`. You can also disable the record start/stop beeps with `voice.beep_enabled: false`.
 
 ### Streaming TTS
 
@@ -383,6 +383,7 @@ voice:
   record_key: "ctrl+b"            # Key to start/stop recording
   max_recording_seconds: 120       # Maximum recording length
   auto_tts: false                  # Auto-enable TTS when voice mode starts
+  beep_enabled: true               # Play record start/stop beeps
   silence_threshold: 200           # RMS level (0-32767) below which counts as silence
   silence_duration: 3.0            # Seconds of silence before auto-stop
 
diff --git a/website/docs/user-guide/messaging/wecom.md b/website/docs/user-guide/messaging/wecom.md
index 937872b9a..1a98c8225 100644
--- a/website/docs/user-guide/messaging/wecom.md
+++ b/website/docs/user-guide/messaging/wecom.md
@@ -17,24 +17,52 @@ Connect Hermes to [WeCom](https://work.weixin.qq.com/) (企业微信), Tencent's
 
 ## Setup
 
-### 1. Create an AI Bot
+### Step 1: Create an AI Bot
 
-1. Log in to the [WeCom Admin Console](https://work.weixin.qq.com/wework_admin/frame)
-2. Navigate to **Applications** → **Create Application** → **AI Bot**
-3. Configure the bot name and description
-4. Copy the **Bot ID** and **Secret** from the credentials page
-
-### 2. Configure Hermes
-
-Run the interactive setup:
+#### Recommended: Scan-to-Create (one command)
 
 ```bash
 hermes gateway setup
 ```
 
-Select **WeCom** and enter your Bot ID and Secret.
+Select **WeCom** and scan the QR code with your WeCom mobile app. Hermes will automatically create a bot application with the correct permissions and save the credentials.
 
-Or set environment variables in `~/.hermes/.env`:
+The setup wizard will:
+1. Display a QR code in your terminal
+2. Wait for you to scan it with the WeCom mobile app
+3. Automatically retrieve the Bot ID and Secret
+4. Guide you through access control configuration
+
+#### Alternative: Manual Setup
+
+If scan-to-create is not available, the wizard falls back to manual input:
+
+1. Log in to the [WeCom Admin Console](https://work.weixin.qq.com/wework_admin/frame)
+2. Navigate to **Applications** → **Create Application** → **AI Bot**
+3. Configure the bot name and description
+4. Copy the **Bot ID** and **Secret** from the credentials page
+5. Run `hermes gateway setup`, select **WeCom**, and enter the credentials when prompted
+
+:::warning
+Keep the Bot Secret private. Anyone with it can impersonate your bot.
+:::
+
+### Step 2: Configure Hermes
+
+#### Option A: Interactive Setup (Recommended)
+
+```bash
+hermes gateway setup
+```
+
+Select **WeCom** and follow the prompts. The wizard will guide you through:
+- Bot credentials (via QR scan or manual entry)
+- Access control settings (allowlist, pairing mode, or open access)
+- Home channel for notifications
+
+#### Option B: Manual Configuration
+
+Add the following to `~/.hermes/.env`:
 
 ```bash
 WECOM_BOT_ID=your-bot-id
@@ -47,7 +75,7 @@ WECOM_ALLOWED_USERS=user_id_1,user_id_2
 WECOM_HOME_CHANNEL=chat_id
 ```
 
-### 3. Start the gateway
+### Step 3: Start the gateway
 
 ```bash
 hermes gateway
diff --git a/website/docs/user-guide/sessions.md b/website/docs/user-guide/sessions.md
index bd1007859..a60f35776 100644
--- a/website/docs/user-guide/sessions.md
+++ b/website/docs/user-guide/sessions.md
@@ -386,7 +386,21 @@ Key tables in `state.db`:
 
 - Gateway sessions auto-reset based on the configured reset policy
 - Before reset, the agent saves memories and skills from the expiring session
-- Ended sessions remain in the database until pruned
+- Opt-in auto-pruning: when `sessions.auto_prune` is `true`, ended sessions older than `sessions.retention_days` (default 90) are pruned at CLI/gateway startup
+- After a prune that actually removed rows, `state.db` is `VACUUM`ed to reclaim disk space (SQLite does not shrink the file on plain DELETE)
+- Pruning runs at most once per `sessions.min_interval_hours` (default 24); the last-run timestamp is tracked inside `state.db` itself so it's shared across every Hermes process in the same `HERMES_HOME`
+
+Default is **off** — session history is valuable for `session_search` recall, and silently deleting it could surprise users. Enable in `~/.hermes/config.yaml`:
+
+```yaml
+sessions:
+  auto_prune: true          # opt in — default is false
+  retention_days: 90        # keep ended sessions this many days
+  vacuum_after_prune: true  # reclaim disk space after a pruning sweep
+  min_interval_hours: 24    # don't re-run the sweep more often than this
+```
+
+Active sessions are never auto-pruned, regardless of age.
 
 ### Manual Cleanup
 
@@ -403,5 +417,5 @@ hermes sessions prune --older-than 30 --yes
 ```
 
 :::tip
-The database grows slowly (typical: 10-15 MB for hundreds of sessions). Pruning is mainly useful for removing old conversations you no longer need for search recall.
+The database grows slowly (typical: 10-15 MB for hundreds of sessions) and session history powers `session_search` recall across past conversations, so auto-prune ships disabled. Enable it if you're running a heavy gateway/cron workload where `state.db` is meaningfully affecting performance (observed failure mode: 384 MB state.db with ~1000 sessions slowing down FTS5 inserts and `/resume` listing). Use `hermes sessions prune` for one-off cleanup without turning on the automatic sweep.
 :::
diff --git a/website/docs/user-guide/tui.md b/website/docs/user-guide/tui.md
index 9024c690d..72c0a4712 100644
--- a/website/docs/user-guide/tui.md
+++ b/website/docs/user-guide/tui.md
@@ -46,7 +46,7 @@ The classic CLI remains available as the default. Anything documented in [CLI In
 - **Live session panel** — tools and skills fill in progressively as they initialize.
 - **Mouse-friendly selection** — drag to highlight with a uniform background instead of SGR inverse. Copy with your terminal's normal copy gesture.
 - **Alternate-screen rendering** — differential updates mean no flicker when streaming, no scrollback clutter after you quit.
-- **Composer affordances** — inline paste-collapse for long snippets, image paste from the clipboard (`Alt+V`), bracketed-paste safety.
+- **Composer affordances** — inline paste-collapse for long snippets, `Cmd+V` / `Ctrl+V` text paste with clipboard-image fallback, bracketed-paste safety, and image/file-path attachment normalization.
 
 Same [skins](features/skins.md) and [personalities](features/personality.md) apply. Switch mid-session with `/skin ares`, `/personality pirate`, and the UI repaints live. See [Skins & Themes](features/skins.md) for the full list of customizable keys and which ones apply to classic vs TUI — the TUI honors the banner palette, UI colors, prompt glyph/color, session display, completion menu, selection bg, `tool_prefix`, and `help_header`.
 
@@ -73,7 +73,8 @@ The directory must contain `dist/entry.js` and an up-to-date `node_modules`.
 Keybindings match the [Classic CLI](cli.md#keybindings) exactly. The only behavioral differences:
 
 - **Mouse drag** highlights text with a uniform selection background.
-- **`Ctrl+V`** pastes text from your clipboard directly into the composer; multi-line pastes stay on one row until you expand them.
+- **`Cmd+V` / `Ctrl+V`** first tries normal text paste, then falls back to OSC52/native clipboard reads, and finally image attach when the clipboard or pasted payload resolves to an image.
+- **`/terminal-setup`** installs local VS Code / Cursor / Windsurf terminal bindings for better `Cmd+Enter` and undo/redo parity on macOS.
 - **Slash autocompletion** opens as a floating panel with descriptions, not an inline dropdown.
 
 ## Slash commands
diff --git a/website/package.json b/website/package.json
index 6bf50e700..e3aa70fc4 100644
--- a/website/package.json
+++ b/website/package.json
@@ -4,7 +4,9 @@
   "private": true,
   "scripts": {
     "docusaurus": "docusaurus",
+    "prestart": "node scripts/prebuild.mjs",
     "start": "docusaurus start",
+    "prebuild": "node scripts/prebuild.mjs",
     "build": "docusaurus build",
     "swizzle": "docusaurus swizzle",
     "deploy": "docusaurus deploy",
diff --git a/website/scripts/prebuild.mjs b/website/scripts/prebuild.mjs
new file mode 100644
index 000000000..f129d745f
--- /dev/null
+++ b/website/scripts/prebuild.mjs
@@ -0,0 +1,50 @@
+#!/usr/bin/env node
+// Runs website/scripts/extract-skills.py before docusaurus build/start so
+// that website/src/data/skills.json (imported by src/pages/skills/index.tsx)
+// exists without contributors needing to remember to run the Python script
+// manually. CI workflows still run the extraction explicitly, which is a
+// no-op duplicate but matches their historical behaviour.
+//
+// If python3 or its deps (pyyaml) aren't available on the local machine, we
+// fall back to writing an empty skills.json so `npm run build` still
+// succeeds — the Skills Hub page just shows an empty state. CI always has
+// the deps installed, so production deploys get real data.
+
+import { spawnSync } from "node:child_process";
+import { mkdirSync, writeFileSync, existsSync } from "node:fs";
+import { dirname, join, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const scriptDir = dirname(fileURLToPath(import.meta.url));
+const websiteDir = resolve(scriptDir, "..");
+const extractScript = join(scriptDir, "extract-skills.py");
+const outputFile = join(websiteDir, "src", "data", "skills.json");
+
+function writeEmptyFallback(reason) {
+  mkdirSync(dirname(outputFile), { recursive: true });
+  writeFileSync(outputFile, "[]\n");
+  console.warn(
+    `[prebuild] extract-skills.py skipped (${reason}); wrote empty skills.json. ` +
+      `Install python3 + pyyaml locally for a populated Skills Hub page.`,
+  );
+}
+
+if (!existsSync(extractScript)) {
+  writeEmptyFallback("extract script missing");
+  process.exit(0);
+}
+
+const result = spawnSync("python3", [extractScript], {
+  stdio: "inherit",
+  cwd: websiteDir,
+});
+
+if (result.error && result.error.code === "ENOENT") {
+  writeEmptyFallback("python3 not found");
+  process.exit(0);
+}
+
+if (result.status !== 0) {
+  writeEmptyFallback(`extract-skills.py exited with status ${result.status}`);
+  process.exit(0);
+}