diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 46afe67f3b..18c384a9c2 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -266,6 +266,14 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
     return True  # Any other endpoint is a third-party proxy
 
 
+def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
+    """Return True for Kimi's /coding endpoint that requires claude-code UA."""
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
+        return False
+    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
+
+
 def _requires_bearer_auth(base_url: str | None) -> bool:
     """Return True for Anthropic-compatible providers that require Bearer auth.
 
@@ -323,9 +331,18 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional
         kwargs["base_url"] = normalized_base_url
     common_betas = _common_betas_for_base_url(normalized_base_url)
 
-    if _requires_bearer_auth(normalized_base_url):
+    if _is_kimi_coding_endpoint(base_url):
+        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
+        # to be recognized as a valid Coding Agent. Without it, returns 403.
+        # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
+        kwargs["api_key"] = api_key
+        kwargs["default_headers"] = {
+            "User-Agent": "claude-code/0.1.0",
+            **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
+        }
+    elif _requires_bearer_auth(normalized_base_url):
         # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
-        # Authorization: Bearer even for regular API keys. Route those endpoints
+        # Authorization: Bearer *** for regular API keys. Route those endpoints
         # through auth_token so the SDK sends Bearer auth instead of x-api-key.
         # Check this before OAuth token shape detection because MiniMax secrets do
         # not use Anthropic's sk-ant-api prefix and would otherwise be misread as
@@ -1409,11 +1426,25 @@ def build_anthropic_kwargs(
     # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
     # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
     #
+    # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
+    # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
+    # validates the message history and requires every prior assistant
+    # tool-call message to carry OpenAI-style ``reasoning_content``.  The
+    # Anthropic path never populates that field, and
+    # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
+    # on third-party endpoints — so the request fails with HTTP 400
+    # "thinking is enabled but reasoning_content is missing in assistant
+    # tool call message at index N".  Kimi's reasoning is driven server-side
+    # on the /coding route, so skip Anthropic's thinking parameter entirely
+    # for that host.  (Kimi on chat_completions enables thinking via
+    # extra_body in the ChatCompletionsTransport — see #13503.)
+    #
     # On 4.7+ the `thinking.display` field defaults to "omitted", which
     # silently hides reasoning text that Hermes surfaces in its CLI. We
     # request "summarized" so the reasoning blocks stay populated — matching
     # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    if reasoning_config and isinstance(reasoning_config, dict):
+    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
+    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
         if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
             effort = str(reasoning_config.get("effort", "medium")).lower()
             budget = THINKING_BUDGET.get(effort, 8000)
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index e4223771fd..b164b54d20 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -185,8 +185,6 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
-_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
-_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -731,6 +729,33 @@ def _nous_base_url() -> str:
     return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
 
 
+def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
+    """Return fresh Nous runtime credentials when available.
+
+    This mirrors the main agent's 401 recovery path and keeps auxiliary
+    clients aligned with the singleton auth store + mint flow instead of
+    relying only on whatever raw tokens happen to be sitting in auth.json
+    or the credential pool.
+    """
+    try:
+        from hermes_cli.auth import resolve_nous_runtime_credentials
+
+        creds = resolve_nous_runtime_credentials(
+            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            force_mint=force_refresh,
+        )
+    except Exception as exc:
+        logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
+        return None
+
+    api_key = str(creds.get("api_key") or "").strip()
+    base_url = str(creds.get("base_url") or "").strip().rstrip("/")
+    if not api_key or not base_url:
+        return None
+    return api_key, base_url
+
+
 def _read_codex_access_token() -> Optional[str]:
     """Read a valid, non-expired Codex OAuth access token from Hermes auth store.
 
@@ -826,7 +851,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeCli
                     return GeminiNativeClient(api_key=api_key, base_url=base_url), model
             extra = {}
             if base_url_host_matches(base_url, "api.kimi.com"):
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
             elif base_url_host_matches(base_url, "api.githubcopilot.com"):
                 from hermes_cli.models import copilot_default_headers
 
@@ -852,7 +877,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeCli
                 return GeminiNativeClient(api_key=api_key, base_url=base_url), model
         extra = {}
         if base_url_host_matches(base_url, "api.kimi.com"):
-            extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
         elif base_url_host_matches(base_url, "api.githubcopilot.com"):
             from hermes_cli.models import copilot_default_headers
 
@@ -902,29 +927,50 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
         pass
 
     nous = _read_nous_auth()
-    if not nous:
+    runtime = _resolve_nous_runtime_api(force_refresh=False)
+    if runtime is None and not nous:
         return None, None
     global auxiliary_is_nous
     auxiliary_is_nous = True
     logger.debug("Auxiliary client: Nous Portal")
-    if nous.get("source") == "pool":
-        model = "gemini-3-flash"
-    else:
-        model = _NOUS_MODEL
-    # Free-tier users can't use paid auxiliary models — use the free
-    # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
+
+    # Ask the Portal which model it currently recommends for this task type.
+    # The /api/nous/recommended-models endpoint is the authoritative source:
+    # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
+    # auto-detects the caller's tier via check_nous_free_tier().  Fall back to
+    # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
+    # or returns a null recommendation for this task type.
+    model = _NOUS_MODEL
     try:
-        from hermes_cli.models import check_nous_free_tier
-        if check_nous_free_tier():
-            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
-            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
-                         model, "vision" if vision else "text")
-    except Exception:
-        pass
+        from hermes_cli.models import get_nous_recommended_aux_model
+        recommended = get_nous_recommended_aux_model(vision=vision)
+        if recommended:
+            model = recommended
+            logger.debug(
+                "Auxiliary/%s: using Portal-recommended model %s",
+                "vision" if vision else "text", model,
+            )
+        else:
+            logger.debug(
+                "Auxiliary/%s: no Portal recommendation, falling back to %s",
+                "vision" if vision else "text", model,
+            )
+    except Exception as exc:
+        logger.debug(
+            "Auxiliary/%s: recommended-models lookup failed (%s); "
+            "falling back to %s",
+            "vision" if vision else "text", exc, model,
+        )
+
+    if runtime is not None:
+        api_key, base_url = runtime
+    else:
+        api_key = _nous_api_key(nous or {})
+        base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
     return (
         OpenAI(
-            api_key=_nous_api_key(nous),
-            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
+            api_key=api_key,
+            base_url=base_url,
         ),
         model,
     )
@@ -1268,6 +1314,15 @@ def _is_connection_error(exc: Exception) -> bool:
     return False
 
 
+def _is_auth_error(exc: Exception) -> bool:
+    """Detect auth failures that should trigger provider-specific refresh."""
+    status = getattr(exc, "status_code", None)
+    if status == 401:
+        return True
+    err_lower = str(exc).lower()
+    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
+
+
 def _try_payment_fallback(
     failed_provider: str,
     task: str = None,
@@ -1451,7 +1506,7 @@ def _to_async_client(sync_client, model: str):
 
         async_kwargs["default_headers"] = copilot_default_headers()
     elif base_url_host_matches(sync_base_url, "api.kimi.com"):
-        async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
     return AsyncOpenAI(**async_kwargs), model
 
 
@@ -1575,7 +1630,13 @@ def resolve_provider_client(
 
     # ── Nous Portal (OAuth) ──────────────────────────────────────────
     if provider == "nous":
-        client, default = _try_nous()
+        # Detect vision tasks: either explicit model override from
+        # _PROVIDER_VISION_MODELS, or caller passed a known vision model.
+        _is_vision = (
+            model in _PROVIDER_VISION_MODELS.values()
+            or (model or "").strip().lower() == "mimo-v2-omni"
+        )
+        client, default = _try_nous(vision=_is_vision)
         if client is None:
             logger.warning("resolve_provider_client: nous requested "
                            "but Nous Portal not configured (run: hermes auth)")
@@ -1632,7 +1693,7 @@ def resolve_provider_client(
             )
             extra = {}
             if base_url_host_matches(custom_base, "api.kimi.com"):
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
             elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
                 from hermes_cli.models import copilot_default_headers
                 extra["default_headers"] = copilot_default_headers()
@@ -1739,7 +1800,7 @@ def resolve_provider_client(
         # Provider-specific headers
         headers = {}
         if base_url_host_matches(base_url, "api.kimi.com"):
-            headers["User-Agent"] = "KimiCLI/1.30.0"
+            headers["User-Agent"] = "claude-code/0.1.0"
         elif base_url_host_matches(base_url, "api.githubcopilot.com"):
             from hermes_cli.models import copilot_default_headers
 
@@ -1971,24 +2032,35 @@ def resolve_vision_provider_client(
         #      _PROVIDER_VISION_MODELS provides per-provider vision model
         #      overrides when the provider has a dedicated multimodal model
         #      that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
-        #      zai → glm-5v-turbo).
+        #      zai → glm-5v-turbo). Nous is the exception: it has a dedicated
+        #      strict vision backend with tier-aware defaults, so it must not
+        #      fall through to the user's text chat model here.
         #   2. OpenRouter  (vision-capable aggregator fallback)
         #   3. Nous Portal (vision-capable aggregator fallback)
         #   4. Stop
         main_provider = _read_main_provider()
         main_model = _read_main_model()
         if main_provider and main_provider not in ("auto", ""):
-            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
-            rpc_client, rpc_model = resolve_provider_client(
-                main_provider, vision_model,
-                api_mode=resolved_api_mode)
-            if rpc_client is not None:
-                logger.info(
-                    "Vision auto-detect: using main provider %s (%s)",
-                    main_provider, rpc_model or vision_model,
-                )
-                return _finalize(
-                    main_provider, rpc_client, rpc_model or vision_model)
+            if main_provider == "nous":
+                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
+                if sync_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using main provider %s (%s)",
+                        main_provider, default_model or resolved_model or main_model,
+                    )
+                    return _finalize(main_provider, sync_client, default_model)
+            else:
+                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
+                rpc_client, rpc_model = resolve_provider_client(
+                    main_provider, vision_model,
+                    api_mode=resolved_api_mode)
+                if rpc_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using main provider %s (%s)",
+                        main_provider, rpc_model or vision_model,
+                    )
+                    return _finalize(
+                        main_provider, rpc_client, rpc_model or vision_model)
 
         # Fall back through aggregators (uses their dedicated vision model,
         # not the user's main model) when main provider has no client.
@@ -2063,6 +2135,76 @@ _client_cache_lock = threading.Lock()
 _CLIENT_CACHE_MAX_SIZE = 64  # safety belt — evict oldest when exceeded
 
 
+def _client_cache_key(
+    provider: str,
+    *,
+    async_mode: bool,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_mode: Optional[str] = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> tuple:
+    runtime = _normalize_main_runtime(main_runtime)
+    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+
+
+def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
+    with _client_cache_lock:
+        old_entry = _client_cache.get(cache_key)
+        if old_entry is not None and old_entry[0] is not client:
+            _force_close_async_httpx(old_entry[0])
+            try:
+                close_fn = getattr(old_entry[0], "close", None)
+                if callable(close_fn):
+                    close_fn()
+            except Exception:
+                pass
+        _client_cache[cache_key] = (client, default_model, bound_loop)
+
+
+def _refresh_nous_auxiliary_client(
+    *,
+    cache_provider: str,
+    model: Optional[str],
+    async_mode: bool,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_mode: Optional[str] = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> Tuple[Optional[Any], Optional[str]]:
+    """Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
+    runtime = _resolve_nous_runtime_api(force_refresh=True)
+    if runtime is None:
+        return None, model
+
+    fresh_key, fresh_base_url = runtime
+    sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
+    final_model = model
+
+    current_loop = None
+    if async_mode:
+        try:
+            import asyncio as _aio
+            current_loop = _aio.get_event_loop()
+        except RuntimeError:
+            pass
+        client, final_model = _to_async_client(sync_client, final_model or "")
+    else:
+        client = sync_client
+
+    cache_key = _client_cache_key(
+        cache_provider,
+        async_mode=async_mode,
+        base_url=base_url,
+        api_key=api_key,
+        api_mode=api_mode,
+        main_runtime=main_runtime,
+    )
+    _store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
+    return client, final_model
+
+
 def neuter_async_httpx_del() -> None:
     """Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.
 
@@ -2216,8 +2358,14 @@ def _get_cached_client(
         except RuntimeError:
             pass
     runtime = _normalize_main_runtime(main_runtime)
-    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+    cache_key = _client_cache_key(
+        provider,
+        async_mode=async_mode,
+        base_url=base_url,
+        api_key=api_key,
+        api_mode=api_mode,
+        main_runtime=main_runtime,
+    )
     with _client_cache_lock:
         if cache_key in _client_cache:
             cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -2665,6 +2813,29 @@ def call_llm(
                     raise
                 first_err = retry_err
 
+        # ── Nous auth refresh parity with main agent ──────────────────
+        client_is_nous = (
+            resolved_provider == "nous"
+            or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
+        )
+        if _is_auth_error(first_err) and client_is_nous:
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=False,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+                main_runtime=main_runtime,
+            )
+            if refreshed_client is not None:
+                logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
+                            task or "call")
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                return _validate_llm_response(
+                    refreshed_client.chat.completions.create(**kwargs), task)
+
         # ── Payment / credit exhaustion fallback ──────────────────────
         # When the resolved provider returns 402 or a credit-related error,
         # try alternative providers instead of giving up.  This handles the
@@ -2863,6 +3034,28 @@ async def async_call_llm(
                     raise
                 first_err = retry_err
 
+        # ── Nous auth refresh parity with main agent ──────────────────
+        client_is_nous = (
+            resolved_provider == "nous"
+            or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
+        )
+        if _is_auth_error(first_err) and client_is_nous:
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=True,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+            )
+            if refreshed_client is not None:
+                logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
+                            task or "call")
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                return _validate_llm_response(
+                    await refreshed_client.chat.completions.create(**kwargs), task)
+
         # ── Payment / connection fallback (mirrors sync call_llm) ─────
         should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
         is_auto = resolved_provider in ("auto", "", None)
diff --git a/agent/image_gen_provider.py b/agent/image_gen_provider.py
new file mode 100644
index 0000000000..47f65c1b34
--- /dev/null
+++ b/agent/image_gen_provider.py
@@ -0,0 +1,242 @@
+"""
+Image Generation Provider ABC
+=============================
+
+Defines the pluggable-backend interface for image generation. Providers register
+instances via ``PluginContext.register_image_gen_provider()``; the active one
+(selected via ``image_gen.provider`` in ``config.yaml``) services every
+``image_generate`` tool call.
+
+Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
+as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
+via ``plugins.enabled``).
+
+Response shape
+--------------
+All providers return a dict that :func:`success_response` / :func:`error_response`
+produce. The tool wrapper JSON-serializes it. Keys:
+
+    success        bool
+    image          str | None       URL or absolute file path
+    model          str              provider-specific model identifier
+    prompt         str              echoed prompt
+    aspect_ratio   str              "landscape" | "square" | "portrait"
+    provider       str              provider name (for diagnostics)
+    error          str              only when success=False
+    error_type     str              only when success=False
+"""
+
+from __future__ import annotations
+
+import abc
+import base64
+import datetime
+import logging
+import uuid
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
+DEFAULT_ASPECT_RATIO = "landscape"
+
+
+# ---------------------------------------------------------------------------
+# ABC
+# ---------------------------------------------------------------------------
+
+
+class ImageGenProvider(abc.ABC):
+    """Abstract base class for an image generation backend.
+
+    Subclasses must implement :meth:`generate`. Everything else has sane
+    defaults — override only what your provider needs.
+    """
+
+    @property
+    @abc.abstractmethod
+    def name(self) -> str:
+        """Stable short identifier used in ``image_gen.provider`` config.
+
+        Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
+        """
+
+    @property
+    def display_name(self) -> str:
+        """Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
+        return self.name.title()
+
+    def is_available(self) -> bool:
+        """Return True when this provider can service calls.
+
+        Typically checks for a required API key. Default: True
+        (providers with no external dependencies are always available).
+        """
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        """Return catalog entries for ``hermes tools`` model picker.
+
+        Each entry::
+
+            {
+                "id": "gpt-image-1.5",               # required
+                "display": "GPT Image 1.5",          # optional; defaults to id
+                "speed": "~10s",                     # optional
+                "strengths": "...",                  # optional
+                "price": "$...",                     # optional
+            }
+
+        Default: empty list (provider has no user-selectable models).
+        """
+        return []
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        """Return provider metadata for the ``hermes tools`` picker.
+
+        Used by ``tools_config.py`` to inject this provider as a row in
+        the Image Generation provider list. Shape::
+
+            {
+                "name": "OpenAI",                     # picker label
+                "badge": "paid",                      # optional short tag
+                "tag": "One-line description...",     # optional subtitle
+                "env_vars": [                         # keys to prompt for
+                    {"key": "OPENAI_API_KEY",
+                     "prompt": "OpenAI API key",
+                     "url": "https://platform.openai.com/api-keys"},
+                ],
+            }
+
+        Default: minimal entry derived from ``display_name``. Override to
+        expose API key prompts and custom badges.
+        """
+        return {
+            "name": self.display_name,
+            "badge": "",
+            "tag": "",
+            "env_vars": [],
+        }
+
+    def default_model(self) -> Optional[str]:
+        """Return the default model id, or None if not applicable."""
+        models = self.list_models()
+        if models:
+            return models[0].get("id")
+        return None
+
+    @abc.abstractmethod
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Generate an image.
+
+        Implementations should return the dict from :func:`success_response`
+        or :func:`error_response`. ``kwargs`` may contain forward-compat
+        parameters future versions of the schema will expose — implementations
+        should ignore unknown keys.
+        """
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def resolve_aspect_ratio(value: Optional[str]) -> str:
+    """Clamp an aspect_ratio value to the valid set, defaulting to landscape.
+
+    Invalid values are coerced rather than rejected so the tool surface is
+    forgiving of agent mistakes.
+    """
+    if not isinstance(value, str):
+        return DEFAULT_ASPECT_RATIO
+    v = value.strip().lower()
+    if v in VALID_ASPECT_RATIOS:
+        return v
+    return DEFAULT_ASPECT_RATIO
+
+
+def _images_cache_dir() -> Path:
+    """Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
+    from hermes_constants import get_hermes_home
+
+    path = get_hermes_home() / "cache" / "images"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def save_b64_image(
+    b64_data: str,
+    *,
+    prefix: str = "image",
+    extension: str = "png",
+) -> Path:
+    """Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
+
+    Returns the absolute :class:`Path` to the saved file.
+
+    Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
+    """
+    raw = base64.b64decode(b64_data)
+    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    short = uuid.uuid4().hex[:8]
+    path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
+    path.write_bytes(raw)
+    return path
+
+
+def success_response(
+    *,
+    image: str,
+    model: str,
+    prompt: str,
+    aspect_ratio: str,
+    provider: str,
+    extra: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """Build a uniform success response dict.
+
+    ``image`` may be an HTTP URL or an absolute filesystem path (for b64
+    providers like OpenAI). Callers that need to pass through additional
+    backend-specific fields can supply ``extra``.
+    """
+    payload: Dict[str, Any] = {
+        "success": True,
+        "image": image,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
+    if extra:
+        for k, v in extra.items():
+            payload.setdefault(k, v)
+    return payload
+
+
+def error_response(
+    *,
+    error: str,
+    error_type: str = "provider_error",
+    provider: str = "",
+    model: str = "",
+    prompt: str = "",
+    aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+) -> Dict[str, Any]:
+    """Build a uniform error response dict."""
+    return {
+        "success": False,
+        "image": None,
+        "error": error,
+        "error_type": error_type,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
diff --git a/agent/image_gen_registry.py b/agent/image_gen_registry.py
new file mode 100644
index 0000000000..715133231c
--- /dev/null
+++ b/agent/image_gen_registry.py
@@ -0,0 +1,120 @@
+"""
+Image Generation Provider Registry
+==================================
+
+Central map of registered providers. Populated by plugins at import-time via
+``PluginContext.register_image_gen_provider()``; consumed by the
+``image_generate`` tool to dispatch each call to the active backend.
+
+Active selection
+----------------
+The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
+If unset, :func:`get_active_provider` applies fallback logic:
+
+1. If exactly one provider is registered, use it.
+2. Otherwise if a provider named ``fal`` is registered, use it (legacy
+   default — matches pre-plugin behavior).
+3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
+   the user at ``hermes tools``).
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from typing import Dict, List, Optional
+
+from agent.image_gen_provider import ImageGenProvider
+
+logger = logging.getLogger(__name__)
+
+
+_providers: Dict[str, ImageGenProvider] = {}
+_lock = threading.Lock()
+
+
+def register_provider(provider: ImageGenProvider) -> None:
+    """Register an image generation provider.
+
+    Re-registration (same ``name``) overwrites the previous entry and logs
+    a debug message — this makes hot-reload scenarios (tests, dev loops)
+    behave predictably.
+    """
+    if not isinstance(provider, ImageGenProvider):
+        raise TypeError(
+            f"register_provider() expects an ImageGenProvider instance, "
+            f"got {type(provider).__name__}"
+        )
+    name = provider.name
+    if not isinstance(name, str) or not name.strip():
+        raise ValueError("Image gen provider .name must be a non-empty string")
+    with _lock:
+        existing = _providers.get(name)
+        _providers[name] = provider
+    if existing is not None:
+        logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
+    else:
+        logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
+
+
+def list_providers() -> List[ImageGenProvider]:
+    """Return all registered providers, sorted by name."""
+    with _lock:
+        items = list(_providers.values())
+    return sorted(items, key=lambda p: p.name)
+
+
+def get_provider(name: str) -> Optional[ImageGenProvider]:
+    """Return the provider registered under *name*, or None."""
+    if not isinstance(name, str):
+        return None
+    with _lock:
+        return _providers.get(name.strip())
+
+
+def get_active_provider() -> Optional[ImageGenProvider]:
+    """Resolve the currently-active provider.
+
+    Reads ``image_gen.provider`` from config.yaml; falls back per the
+    module docstring.
+    """
+    configured: Optional[str] = None
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        if isinstance(section, dict):
+            raw = section.get("provider")
+            if isinstance(raw, str) and raw.strip():
+                configured = raw.strip()
+    except Exception as exc:
+        logger.debug("Could not read image_gen.provider from config: %s", exc)
+
+    with _lock:
+        snapshot = dict(_providers)
+
+    if configured:
+        provider = snapshot.get(configured)
+        if provider is not None:
+            return provider
+        logger.debug(
+            "image_gen.provider='%s' configured but not registered; falling back",
+            configured,
+        )
+
+    # Fallback: single-provider case
+    if len(snapshot) == 1:
+        return next(iter(snapshot.values()))
+
+    # Fallback: prefer legacy FAL for backward compat
+    if "fal" in snapshot:
+        return snapshot["fal"]
+
+    return None
+
+
+def _reset_for_tests() -> None:
+    """Clear the registry. **Test-only.**"""
+    with _lock:
+        _providers.clear()
diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index 2a21043494..8e061f831b 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -350,7 +350,13 @@ PLATFORM_HINTS = {
     ),
     "cli": (
         "You are a CLI AI Agent. Try not to use markdown but simple text "
-        "renderable inside a terminal."
+        "renderable inside a terminal. "
+        "File delivery: there is no attachment channel — the user reads your "
+        "response directly in their terminal. Do NOT emit MEDIA:/path tags "
+        "(those are only intercepted on messaging platforms like Telegram, "
+        "Discord, Slack, etc.; on the CLI they render as literal text). "
+        "When referring to a file you created or changed, just state its "
+        "absolute path in plain text; the user can open it from there."
     ),
     "sms": (
         "You are communicating via SMS. Keep responses concise and use plain text "
diff --git a/agent/transports/__init__.py b/agent/transports/__init__.py
index 6cd3a277a1..5752113325 100644
--- a/agent/transports/__init__.py
+++ b/agent/transports/__init__.py
@@ -37,3 +37,15 @@ def _discover_transports() -> None:
         import agent.transports.anthropic  # noqa: F401
     except ImportError:
         pass
+    try:
+        import agent.transports.codex  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.chat_completions  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.bedrock  # noqa: F401
+    except ImportError:
+        pass
diff --git a/agent/transports/bedrock.py b/agent/transports/bedrock.py
new file mode 100644
index 0000000000..af549e7eae
--- /dev/null
+++ b/agent/transports/bedrock.py
@@ -0,0 +1,154 @@
+"""AWS Bedrock Converse API transport.
+
+Delegates to the existing adapter functions in agent/bedrock_adapter.py.
+Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
+owns format conversion and normalization, while client construction and
+boto3 calls stay on AIAgent.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class BedrockTransport(ProviderTransport):
+    """Transport for api_mode='bedrock_converse'."""
+
+    @property
+    def api_mode(self) -> str:
+        return "bedrock_converse"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI messages to Bedrock Converse format."""
+        from agent.bedrock_adapter import convert_messages_to_converse
+        return convert_messages_to_converse(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
+        from agent.bedrock_adapter import convert_tools_to_converse
+        return convert_tools_to_converse(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Bedrock converse() kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            max_tokens: int — output token limit (default 4096)
+            temperature: float | None
+            guardrail_config: dict | None — Bedrock guardrails
+            region: str — AWS region (default 'us-east-1')
+        """
+        from agent.bedrock_adapter import build_converse_kwargs
+
+        region = params.get("region", "us-east-1")
+        guardrail = params.get("guardrail_config")
+
+        kwargs = build_converse_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 4096),
+            temperature=params.get("temperature"),
+            guardrail_config=guardrail,
+        )
+        # Sentinel keys for dispatch — agent pops these before the boto3 call
+        kwargs["__bedrock_converse__"] = True
+        kwargs["__bedrock_region__"] = region
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Bedrock response to NormalizedResponse.
+
+        Handles two shapes:
+        1. Raw boto3 dict (from direct converse() calls)
+        2. Already-normalized SimpleNamespace with .choices (from dispatch site)
+        """
+        from agent.bedrock_adapter import normalize_converse_response
+
+        # Normalize to OpenAI-compatible SimpleNamespace
+        if hasattr(response, "choices") and response.choices:
+            # Already normalized at dispatch site
+            ns = response
+        else:
+            # Raw boto3 dict
+            ns = normalize_converse_response(response)
+
+        choice = ns.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = [
+                ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                )
+                for tc in msg.tool_calls
+            ]
+
+        usage = None
+        if hasattr(ns, "usage") and ns.usage:
+            u = ns.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Bedrock response structure.
+
+        After normalize_converse_response, the response has OpenAI-compatible
+        .choices — same check as chat_completions.
+        """
+        if response is None:
+            return False
+        # Raw Bedrock dict response — check for 'output' key
+        if isinstance(response, dict):
+            return "output" in response
+        # Already-normalized SimpleNamespace
+        if hasattr(response, "choices"):
+            return bool(response.choices)
+        return False
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Bedrock stop reason to OpenAI finish_reason.
+
+        The adapter already does this mapping inside normalize_converse_response,
+        so this is only used for direct access to raw responses.
+        """
+        _MAP = {
+            "end_turn": "stop",
+            "tool_use": "tool_calls",
+            "max_tokens": "length",
+            "stop_sequence": "stop",
+            "guardrail_intervened": "content_filter",
+            "content_filtered": "content_filter",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("bedrock_converse", BedrockTransport)
diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py
new file mode 100644
index 0000000000..900f59dcf4
--- /dev/null
+++ b/agent/transports/chat_completions.py
@@ -0,0 +1,387 @@
+"""OpenAI Chat Completions transport.
+
+Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
+providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
+
+Messages and tools are already in OpenAI format — convert_messages and
+convert_tools are near-identity.  The complexity lives in build_kwargs
+which has provider-specific conditionals for max_tokens defaults,
+reasoning configuration, temperature handling, and extra_body assembly.
+"""
+
+import copy
+from typing import Any, Dict, List, Optional
+
+from agent.prompt_builder import DEVELOPER_ROLE_MODELS
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ChatCompletionsTransport(ProviderTransport):
+    """Transport for api_mode='chat_completions'.
+
+    The default path for OpenAI-compatible providers.
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "chat_completions"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
+        """Messages are already in OpenAI format — sanitize Codex leaks only.
+
+        Strips Codex Responses API fields (``codex_reasoning_items`` on the
+        message, ``call_id``/``response_item_id`` on tool_calls) that strict
+        chat-completions providers reject with 400/422.
+        """
+        needs_sanitize = False
+        for msg in messages:
+            if not isinstance(msg, dict):
+                continue
+            if "codex_reasoning_items" in msg:
+                needs_sanitize = True
+                break
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
+                        needs_sanitize = True
+                        break
+                if needs_sanitize:
+                    break
+
+        if not needs_sanitize:
+            return messages
+
+        sanitized = copy.deepcopy(messages)
+        for msg in sanitized:
+            if not isinstance(msg, dict):
+                continue
+            msg.pop("codex_reasoning_items", None)
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict):
+                        tc.pop("call_id", None)
+                        tc.pop("response_item_id", None)
+        return sanitized
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Tools are already in OpenAI format — identity."""
+        return tools
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build chat.completions.create() kwargs.
+
+        This is the most complex transport method — it handles ~16 providers
+        via params rather than subclasses.
+
+        params:
+            timeout: float — API call timeout
+            max_tokens: int | None — user-configured max tokens
+            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
+            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
+            reasoning_config: dict | None
+            request_overrides: dict | None
+            session_id: str | None
+            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
+            model_lower: str — lowercase model name for pattern matching
+            # Provider detection flags (all optional, default False)
+            is_openrouter: bool
+            is_nous: bool
+            is_qwen_portal: bool
+            is_github_models: bool
+            is_nvidia_nim: bool
+            is_kimi: bool
+            is_custom_provider: bool
+            ollama_num_ctx: int | None
+            # Provider routing
+            provider_preferences: dict | None
+            # Qwen-specific
+            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
+            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
+            # Temperature
+            fixed_temperature: Any — from _fixed_temperature_for_model()
+            omit_temperature: bool
+            # Reasoning
+            supports_reasoning: bool
+            github_reasoning_extra: dict | None
+            # Claude on OpenRouter/Nous max output
+            anthropic_max_output: int | None
+            # Extra
+            extra_body_additions: dict | None — pre-built extra_body entries
+        """
+        # Codex sanitization: drop reasoning_items / call_id / response_item_id
+        sanitized = self.convert_messages(messages)
+
+        # Qwen portal prep AFTER codex sanitization.  If sanitize already
+        # deepcopied, reuse that copy via the in-place variant to avoid a
+        # second deepcopy.
+        is_qwen = params.get("is_qwen_portal", False)
+        if is_qwen:
+            qwen_prep = params.get("qwen_prepare_fn")
+            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
+            if sanitized is messages:
+                if qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+            else:
+                # Already deepcopied — transform in place
+                if qwen_prep_inplace is not None:
+                    qwen_prep_inplace(sanitized)
+                elif qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+
+        # Developer role swap for GPT-5/Codex models
+        model_lower = params.get("model_lower", (model or "").lower())
+        if (
+            sanitized
+            and isinstance(sanitized[0], dict)
+            and sanitized[0].get("role") == "system"
+            and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
+        ):
+            sanitized = list(sanitized)
+            sanitized[0] = {**sanitized[0], "role": "developer"}
+
+        api_kwargs: Dict[str, Any] = {
+            "model": model,
+            "messages": sanitized,
+        }
+
+        timeout = params.get("timeout")
+        if timeout is not None:
+            api_kwargs["timeout"] = timeout
+
+        # Temperature
+        fixed_temp = params.get("fixed_temperature")
+        omit_temp = params.get("omit_temperature", False)
+        if omit_temp:
+            api_kwargs.pop("temperature", None)
+        elif fixed_temp is not None:
+            api_kwargs["temperature"] = fixed_temp
+
+        # Qwen metadata (caller precomputes {sessionId, promptId})
+        qwen_meta = params.get("qwen_session_metadata")
+        if qwen_meta and is_qwen:
+            api_kwargs["metadata"] = qwen_meta
+
+        # Tools
+        if tools:
+            api_kwargs["tools"] = tools
+
+        # max_tokens resolution — priority: ephemeral > user > provider default
+        max_tokens_fn = params.get("max_tokens_param_fn")
+        ephemeral = params.get("ephemeral_max_output_tokens")
+        max_tokens = params.get("max_tokens")
+        anthropic_max_out = params.get("anthropic_max_output")
+        is_nvidia_nim = params.get("is_nvidia_nim", False)
+        is_kimi = params.get("is_kimi", False)
+        reasoning_config = params.get("reasoning_config")
+
+        if ephemeral is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(ephemeral))
+        elif max_tokens is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(max_tokens))
+        elif is_nvidia_nim and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(16384))
+        elif is_qwen and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(65536))
+        elif is_kimi and max_tokens_fn:
+            # Kimi/Moonshot: 32000 matches Kimi CLI's default
+            api_kwargs.update(max_tokens_fn(32000))
+        elif anthropic_max_out is not None:
+            api_kwargs["max_tokens"] = anthropic_max_out
+
+        # Kimi: top-level reasoning_effort (unless thinking disabled)
+        if is_kimi:
+            _kimi_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _kimi_thinking_off:
+                _kimi_effort = "medium"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _kimi_effort = _e
+                api_kwargs["reasoning_effort"] = _kimi_effort
+
+        # extra_body assembly
+        extra_body: Dict[str, Any] = {}
+
+        is_openrouter = params.get("is_openrouter", False)
+        is_nous = params.get("is_nous", False)
+        is_github_models = params.get("is_github_models", False)
+
+        provider_prefs = params.get("provider_preferences")
+        if provider_prefs and is_openrouter:
+            extra_body["provider"] = provider_prefs
+
+        # Kimi extra_body.thinking
+        if is_kimi:
+            _kimi_thinking_enabled = True
+            if reasoning_config and isinstance(reasoning_config, dict):
+                if reasoning_config.get("enabled") is False:
+                    _kimi_thinking_enabled = False
+            extra_body["thinking"] = {
+                "type": "enabled" if _kimi_thinking_enabled else "disabled",
+            }
+
+        # Reasoning
+        if params.get("supports_reasoning", False):
+            if is_github_models:
+                gh_reasoning = params.get("github_reasoning_extra")
+                if gh_reasoning is not None:
+                    extra_body["reasoning"] = gh_reasoning
+            else:
+                if reasoning_config is not None:
+                    rc = dict(reasoning_config)
+                    if is_nous and rc.get("enabled") is False:
+                        pass  # omit for Nous when disabled
+                    else:
+                        extra_body["reasoning"] = rc
+                else:
+                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+
+        if is_nous:
+            extra_body["tags"] = ["product=hermes-agent"]
+
+        # Ollama num_ctx
+        ollama_ctx = params.get("ollama_num_ctx")
+        if ollama_ctx:
+            options = extra_body.get("options", {})
+            options["num_ctx"] = ollama_ctx
+            extra_body["options"] = options
+
+        # Ollama/custom think=false
+        if params.get("is_custom_provider", False):
+            if reasoning_config and isinstance(reasoning_config, dict):
+                _effort = (reasoning_config.get("effort") or "").strip().lower()
+                _enabled = reasoning_config.get("enabled", True)
+                if _effort == "none" or _enabled is False:
+                    extra_body["think"] = False
+
+        if is_qwen:
+            extra_body["vl_high_resolution_images"] = True
+
+        # Merge any pre-built extra_body additions
+        additions = params.get("extra_body_additions")
+        if additions:
+            extra_body.update(additions)
+
+        if extra_body:
+            api_kwargs["extra_body"] = extra_body
+
+        # Request overrides last (service_tier etc.)
+        overrides = params.get("request_overrides")
+        if overrides:
+            api_kwargs.update(overrides)
+
+        return api_kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize OpenAI ChatCompletion to NormalizedResponse.
+
+        For chat_completions, this is near-identity — the response is already
+        in OpenAI format.  extra_content on tool_calls (Gemini thought_signature)
+        is preserved via ToolCall.provider_data.  reasoning_details (OpenRouter
+        unified format) and reasoning_content (DeepSeek/Moonshot) are also
+        preserved for downstream replay.
+        """
+        choice = response.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                # Preserve provider-specific extras on the tool call.
+                # Gemini 3 thinking models attach extra_content with
+                # thought_signature — without replay on the next turn the API
+                # rejects the request with 400.
+                tc_provider_data: Dict[str, Any] = {}
+                extra = getattr(tc, "extra_content", None)
+                if extra is None and hasattr(tc, "model_extra"):
+                    extra = (tc.model_extra or {}).get("extra_content")
+                if extra is not None:
+                    if hasattr(extra, "model_dump"):
+                        try:
+                            extra = extra.model_dump()
+                        except Exception:
+                            pass
+                    tc_provider_data["extra_content"] = extra
+                tool_calls.append(ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                    provider_data=tc_provider_data or None,
+                ))
+
+        usage = None
+        if hasattr(response, "usage") and response.usage:
+            u = response.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        # Preserve reasoning fields separately.  DeepSeek/Moonshot use
+        # ``reasoning_content``; others use ``reasoning``.  Downstream code
+        # (_extract_reasoning, thinking-prefill retry) reads both distinctly,
+        # so keep them apart in provider_data rather than merging.
+        reasoning = getattr(msg, "reasoning", None)
+        reasoning_content = getattr(msg, "reasoning_content", None)
+
+        provider_data: Dict[str, Any] = {}
+        if reasoning_content:
+            provider_data["reasoning_content"] = reasoning_content
+        rd = getattr(msg, "reasoning_details", None)
+        if rd:
+            provider_data["reasoning_details"] = rd
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check that response has valid choices."""
+        if response is None:
+            return False
+        if not hasattr(response, "choices") or response.choices is None:
+            return False
+        if not response.choices:
+            return False
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        details = getattr(usage, "prompt_tokens_details", None)
+        if details is None:
+            return None
+        cached = getattr(details, "cached_tokens", 0) or 0
+        written = getattr(details, "cache_write_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("chat_completions", ChatCompletionsTransport)
diff --git a/agent/transports/codex.py b/agent/transports/codex.py
new file mode 100644
index 0000000000..ec48352193
--- /dev/null
+++ b/agent/transports/codex.py
@@ -0,0 +1,217 @@
+"""OpenAI Responses API (Codex) transport.
+
+Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle,
+streaming, or the _run_codex_stream() call path.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ResponsesApiTransport(ProviderTransport):
+    """Transport for api_mode='codex_responses'.
+
+    Wraps the functions extracted into codex_responses_adapter.py (PR 1).
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "codex_responses"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI chat messages to Responses API input items."""
+        from agent.codex_responses_adapter import _chat_messages_to_responses_input
+        return _chat_messages_to_responses_input(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Responses API function definitions."""
+        from agent.codex_responses_adapter import _responses_tools
+        return _responses_tools(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Responses API kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            instructions: str — system prompt (extracted from messages[0] if not given)
+            reasoning_config: dict | None — {effort, enabled}
+            session_id: str | None — used for prompt_cache_key + xAI conv header
+            max_tokens: int | None — max_output_tokens
+            request_overrides: dict | None — extra kwargs merged in
+            provider: str | None — provider name for backend-specific logic
+            base_url: str | None — endpoint URL
+            base_url_hostname: str | None — hostname for backend detection
+            is_github_responses: bool — Copilot/GitHub models backend
+            is_codex_backend: bool — chatgpt.com/backend-api/codex
+            is_xai_responses: bool — xAI/Grok backend
+            github_reasoning_extra: dict | None — Copilot reasoning params
+        """
+        from agent.codex_responses_adapter import (
+            _chat_messages_to_responses_input,
+            _responses_tools,
+        )
+
+        from run_agent import DEFAULT_AGENT_IDENTITY
+
+        instructions = params.get("instructions", "")
+        payload_messages = messages
+        if not instructions:
+            if messages and messages[0].get("role") == "system":
+                instructions = str(messages[0].get("content") or "").strip()
+                payload_messages = messages[1:]
+        if not instructions:
+            instructions = DEFAULT_AGENT_IDENTITY
+
+        is_github_responses = params.get("is_github_responses", False)
+        is_codex_backend = params.get("is_codex_backend", False)
+        is_xai_responses = params.get("is_xai_responses", False)
+
+        # Resolve reasoning effort
+        reasoning_effort = "medium"
+        reasoning_enabled = True
+        reasoning_config = params.get("reasoning_config")
+        if reasoning_config and isinstance(reasoning_config, dict):
+            if reasoning_config.get("enabled") is False:
+                reasoning_enabled = False
+            elif reasoning_config.get("effort"):
+                reasoning_effort = reasoning_config["effort"]
+
+        _effort_clamp = {"minimal": "low"}
+        reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
+
+        kwargs = {
+            "model": model,
+            "instructions": instructions,
+            "input": _chat_messages_to_responses_input(payload_messages),
+            "tools": _responses_tools(tools),
+            "tool_choice": "auto",
+            "parallel_tool_calls": True,
+            "store": False,
+        }
+
+        session_id = params.get("session_id")
+        if not is_github_responses and session_id:
+            kwargs["prompt_cache_key"] = session_id
+
+        if reasoning_enabled and is_xai_responses:
+            kwargs["include"] = ["reasoning.encrypted_content"]
+        elif reasoning_enabled:
+            if is_github_responses:
+                github_reasoning = params.get("github_reasoning_extra")
+                if github_reasoning is not None:
+                    kwargs["reasoning"] = github_reasoning
+            else:
+                kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
+                kwargs["include"] = ["reasoning.encrypted_content"]
+        elif not is_github_responses and not is_xai_responses:
+            kwargs["include"] = []
+
+        request_overrides = params.get("request_overrides")
+        if request_overrides:
+            kwargs.update(request_overrides)
+
+        max_tokens = params.get("max_tokens")
+        if max_tokens is not None and not is_codex_backend:
+            kwargs["max_output_tokens"] = max_tokens
+
+        if is_xai_responses and session_id:
+            kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
+
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Codex Responses API response to NormalizedResponse."""
+        from agent.codex_responses_adapter import (
+            _normalize_codex_response,
+            _extract_responses_message_text,
+            _extract_responses_reasoning_text,
+        )
+
+        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
+        msg, finish_reason = _normalize_codex_response(response)
+
+        tool_calls = None
+        if msg and msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                provider_data = {}
+                if hasattr(tc, "call_id") and tc.call_id:
+                    provider_data["call_id"] = tc.call_id
+                if hasattr(tc, "response_item_id") and tc.response_item_id:
+                    provider_data["response_item_id"] = tc.response_item_id
+                tool_calls.append(ToolCall(
+                    id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
+                    name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
+                    arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
+                    provider_data=provider_data or None,
+                ))
+
+        # Extract reasoning items for provider_data
+        provider_data = {}
+        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
+            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
+        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
+            provider_data["reasoning_details"] = msg.reasoning_details
+
+        return NormalizedResponse(
+            content=msg.content if msg else None,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason or "stop",
+            reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
+            usage=None,  # Codex usage is extracted separately in normalize_usage()
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Codex Responses API response has valid output structure.
+
+        Returns True only if response.output is a non-empty list.
+        Does NOT check output_text fallback — the caller handles that
+        with diagnostic logging for stream backfill recovery.
+        """
+        if response is None:
+            return False
+        output = getattr(response, "output", None)
+        if not isinstance(output, list) or not output:
+            return False
+        return True
+
+    def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
+        """Validate and sanitize Codex API kwargs before the call.
+
+        Normalizes input items, strips unsupported fields, validates structure.
+        """
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Codex response.status to OpenAI finish_reason.
+
+        Codex uses response.status ('completed', 'incomplete') +
+        response.incomplete_details.reason for granular mapping.
+        This method handles the simple status string; the caller
+        should check incomplete_details separately for 'max_output_tokens'.
+        """
+        _MAP = {
+            "completed": "stop",
+            "incomplete": "length",
+            "failed": "stop",
+            "cancelled": "stop",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("codex_responses", ResponsesApiTransport)
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index a4a5ffda76..e8e3d30af6 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -770,10 +770,12 @@ code_execution:
 # Subagent Delegation
 # =============================================================================
 # The delegate_task tool spawns child agents with isolated context.
-# Supports single tasks and batch mode (up to 3 parallel).
+# Supports single tasks and batch mode (default 3 parallel, configurable).
 delegation:
   max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  default_toolsets: ["terminal", "file", "web"]  # Default toolsets for subagents
+  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
+  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
+  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
   # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
   # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
   #                                           # Resolves full credentials (base_url, api_key) automatically.
diff --git a/cli.py b/cli.py
index 3851aea295..19bae13fdb 100644
--- a/cli.py
+++ b/cli.py
@@ -26,6 +26,7 @@ import tempfile
 import time
 import uuid
 import textwrap
+from urllib.parse import unquote, urlparse
 from contextlib import contextmanager
 from pathlib import Path
 from datetime import datetime
@@ -398,7 +399,6 @@ def load_cli_config() -> Dict[str, Any]:
         },
         "delegation": {
             "max_iterations": 45,  # Max tool-calling turns per child agent
-            "default_toolsets": ["terminal", "file", "web"],  # Default toolsets for subagents
             "model": "",       # Subagent model override (empty = inherit parent model)
             "provider": "",    # Subagent provider override (empty = inherit parent provider)
             "base_url": "",    # Direct OpenAI-compatible endpoint for subagents
@@ -1182,11 +1182,11 @@ def _strip_markdown_syntax(text: str) -> str:
     plain = re.sub(r"!\[([^\]]*)\]\([^\)]*\)", r"\1", plain)
     plain = re.sub(r"\[([^\]]+)\]\([^\)]*\)", r"\1", plain)
     plain = re.sub(r"\*\*\*([^*]+)\*\*\*", r"\1", plain)
-    plain = re.sub(r"___([^_]+)___", r"\1", plain)
+    plain = re.sub(r"(?<!\w)___([^_]+)___(?!\w)", r"\1", plain)
     plain = re.sub(r"\*\*([^*]+)\*\*", r"\1", plain)
-    plain = re.sub(r"__([^_]+)__", r"\1", plain)
+    plain = re.sub(r"(?<!\w)__([^_]+)__(?!\w)", r"\1", plain)
     plain = re.sub(r"\*([^*]+)\*", r"\1", plain)
-    plain = re.sub(r"_([^_]+)_", r"\1", plain)
+    plain = re.sub(r"(?<!\w)_([^_]+)_(?!\w)", r"\1", plain)
     plain = re.sub(r"~~([^~]+)~~", r"\1", plain)
     plain = re.sub(r"\n{3,}", "\n\n", plain)
     return plain.strip("\n")
@@ -1299,10 +1299,21 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
 
     if (token.startswith('"') and token.endswith('"')) or (token.startswith("'") and token.endswith("'")):
         token = token[1:-1].strip()
+    token = token.replace('\\ ', ' ')
     if not token:
         return None
 
-    expanded = os.path.expandvars(os.path.expanduser(token))
+    expanded = token
+    if token.startswith("file://"):
+        try:
+            parsed = urlparse(token)
+            if parsed.scheme == "file":
+                expanded = unquote(parsed.path or "")
+                if parsed.netloc and os.name == "nt":
+                    expanded = f"//{parsed.netloc}{expanded}"
+        except Exception:
+            expanded = token
+    expanded = os.path.expandvars(os.path.expanduser(expanded))
     if os.name != "nt":
         normalized = expanded.replace("\\", "/")
         if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha():
@@ -1389,6 +1400,7 @@ def _detect_file_drop(user_input: str) -> "dict | None":
         or stripped.startswith("~")
         or stripped.startswith("./")
         or stripped.startswith("../")
+        or stripped.startswith("file://")
         or (len(stripped) >= 3 and stripped[1] == ":" and stripped[2] in ("\\", "/") and stripped[0].isalpha())
         or stripped.startswith('"/')
         or stripped.startswith('"~')
@@ -1399,8 +1411,25 @@ def _detect_file_drop(user_input: str) -> "dict | None":
     if not starts_like_path:
         return None
 
+    direct_path = _resolve_attachment_path(stripped)
+    if direct_path is not None:
+        return {
+            "path": direct_path,
+            "is_image": direct_path.suffix.lower() in _IMAGE_EXTENSIONS,
+            "remainder": "",
+        }
+
     first_token, remainder = _split_path_input(stripped)
     drop_path = _resolve_attachment_path(first_token)
+    if drop_path is None and " " in stripped and stripped[0] not in {"'", '"'}:
+        space_positions = [idx for idx, ch in enumerate(stripped) if ch == " "]
+        for pos in reversed(space_positions):
+            candidate = stripped[:pos].rstrip()
+            resolved = _resolve_attachment_path(candidate)
+            if resolved is not None:
+                drop_path = resolved
+                remainder = stripped[pos + 1 :].strip()
+                break
     if drop_path is None:
         return None
 
@@ -8369,6 +8398,17 @@ class HermesCLI:
 
             def run_agent():
                 nonlocal result
+                # Set callbacks inside the agent thread so thread-local storage
+                # in terminal_tool is populated for this thread.  The main thread
+                # registration (run() line ~9046) is invisible here because
+                # _callback_tls is threading.local().  Matches the pattern used
+                # by acp_adapter/server.py for ACP sessions.
+                set_sudo_password_callback(self._sudo_password_callback)
+                set_approval_callback(self._approval_callback)
+                try:
+                    set_secret_capture_callback(self._secret_capture_callback)
+                except Exception:
+                    pass
                 agent_message = _voice_prefix + message if _voice_prefix else message
                 # Prepend pending model switch note so the model knows about the switch
                 _msn = getattr(self, '_pending_model_switch_note', None)
@@ -8394,6 +8434,15 @@ class HermesCLI:
                         "failed": True,
                         "error": _summary,
                     }
+                finally:
+                    # Clear thread-local callbacks so a reused thread doesn't
+                    # hold stale references to a disposed CLI instance.
+                    try:
+                        set_sudo_password_callback(None)
+                        set_approval_callback(None)
+                        set_secret_capture_callback(None)
+                    except Exception:
+                        pass
 
             # Start agent in background thread (daemon so it cannot keep the
             # process alive when the user closes the terminal tab — SIGHUP
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 8caa07e1cf..da619a6afe 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -1329,7 +1329,7 @@ class BasePlatformAdapter(ABC):
         # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
         # and quoted/backticked paths for LLM-formatted outputs.
         media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
         )
         for match in media_pattern.finditer(content):
             path = match.group("path").strip()
diff --git a/gateway/run.py b/gateway/run.py
index eaabdcd7e6..c088dbe902 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3889,14 +3889,14 @@ class GatewayRunner:
                 message_text = f"{context_note}\n\n{message_text}"
 
         if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
+            # Always inject the reply-to pointer — even when the quoted text
+            # already appears in history. The prefix isn't deduplication, it's
+            # disambiguation: it tells the agent *which* prior message the user
+            # is referencing. History can contain the same or similar text
+            # multiple times, and without an explicit pointer the agent has to
+            # guess (or answer for both subjects). Token overhead is minimal.
             reply_snippet = str(event.reply_to_text)[:500]
-            found_in_history = any(
-                reply_snippet[:200] in (msg.get("content") or "")
-                for msg in history
-                if msg.get("role") in ("assistant", "user", "tool")
-            )
-            if not found_in_history:
-                message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
+            message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
 
         if "@" in message_text:
             try:
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 6de56af5a3..8eec141201 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -168,8 +168,11 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
         id="kimi-coding",
         name="Kimi / Moonshot",
         auth_type="api_key",
+        # Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat).
+        # sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding
+        # by _resolve_kimi_base_url() below.
         inference_base_url="https://api.moonshot.ai/v1",
-        api_key_env_vars=("KIMI_API_KEY",),
+        api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
         base_url_env_var="KIMI_BASE_URL",
     ),
     "kimi-coding-cn": ProviderConfig(
@@ -340,10 +343,16 @@ def get_anthropic_key() -> str:
 # =============================================================================
 
 # Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
-# on api.kimi.com/coding/v1.  Legacy keys from platform.moonshot.ai work on
-# api.moonshot.ai/v1 (the default).  Auto-detect when user hasn't set
+# on api.kimi.com/coding.  Legacy keys from platform.moonshot.ai work on
+# api.moonshot.ai/v1 (the old default).  Auto-detect when user hasn't set
 # KIMI_BASE_URL explicitly.
-KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1"
+#
+# Note: the base URL intentionally has NO /v1 suffix.  The /coding endpoint
+# speaks the Anthropic Messages protocol, and the anthropic SDK appends
+# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages"
+# (the correct target). Using "/coding/v1" here would produce
+# "/coding/v1/v1/messages" (a 404).
+KIMI_CODE_BASE_URL = "https://api.kimi.com/coding"
 
 
 def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str:
@@ -3379,7 +3388,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                 )
 
             from hermes_cli.models import (
-                _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
+                _PROVIDER_MODELS, get_pricing_for_provider,
                 check_nous_free_tier, partition_nous_models_by_tier,
             )
             model_ids = _PROVIDER_MODELS.get("nous", [])
@@ -3388,7 +3397,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
             unavailable_models: list = []
             if model_ids:
                 pricing = get_pricing_for_provider("nous")
-                model_ids = filter_nous_free_models(model_ids, pricing)
                 free_tier = check_nous_free_tier()
                 if free_tier:
                     model_ids, unavailable_models = partition_nous_models_by_tier(
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 797acab5e9..8b43a351fb 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -924,12 +924,22 @@ class SlashCommandCompleter(Completer):
                     display_meta=meta,
                 )
 
-        # If the user typed @file: or @folder:, delegate to path completions
+        # If the user typed @file: / @folder: (or just @file / @folder with
+        # no colon yet), delegate to path completions.  Accepting the bare
+        # form lets the picker surface directories as soon as the user has
+        # typed `@folder`, without requiring them to first accept the static
+        # `@folder:` hint and re-trigger completion.
         for prefix in ("@file:", "@folder:"):
-            if word.startswith(prefix):
-                path_part = word[len(prefix):] or "."
+            bare = prefix[:-1]
+
+            if word == bare or word.startswith(prefix):
+                want_dir = prefix == "@folder:"
+                path_part = '' if word == bare else word[len(prefix):]
                 expanded = os.path.expanduser(path_part)
-                if expanded.endswith("/"):
+
+                if not expanded or expanded == ".":
+                    search_dir, match_prefix = ".", ""
+                elif expanded.endswith("/"):
                     search_dir, match_prefix = expanded, ""
                 else:
                     search_dir = os.path.dirname(expanded) or "."
@@ -945,15 +955,21 @@ class SlashCommandCompleter(Completer):
                 for entry in sorted(entries):
                     if match_prefix and not entry.lower().startswith(prefix_lower):
                         continue
-                    if count >= limit:
-                        break
                     full_path = os.path.join(search_dir, entry)
                     is_dir = os.path.isdir(full_path)
+                    # `@folder:` must only surface directories; `@file:` only
+                    # regular files.  Without this filter `@folder:` listed
+                    # every .env / .gitignore in the cwd, defeating the
+                    # explicit prefix and confusing users expecting a
+                    # directory picker.
+                    if want_dir != is_dir:
+                        continue
+                    if count >= limit:
+                        break
                     display_path = os.path.relpath(full_path)
                     suffix = "/" if is_dir else ""
-                    kind = "folder" if is_dir else "file"
                     meta = "dir" if is_dir else _file_size_label(full_path)
-                    completion = f"@{kind}:{display_path}{suffix}"
+                    completion = f"{prefix}{display_path}{suffix}"
                     yield Completion(
                         completion,
                         start_position=-len(word),
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 74c27bca94..366b672b56 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -964,6 +964,10 @@ DEFAULT_CONFIG: _DefaultConfig = {
     },
     
     # Text-to-speech configuration
+    # Each provider supports an optional `max_text_length:` override for the
+    # per-request input-character cap. Omit it to use the provider's documented
+    # limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
+    # Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
     "tts": {
         "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
         "edge": {
@@ -1063,6 +1067,12 @@ DEFAULT_CONFIG: _DefaultConfig = {
                                # independent of the parent's max_iterations)
         "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
                                  # "low", "minimal", "none" (empty = inherit parent's level)
+        "max_concurrent_children": 3,  # max parallel children per batch; floor of 1 enforced, no ceiling
+        # Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
+        # and _get_orchestrator_enabled).  Values are clamped to [1, 3] with a
+        # warning log if out of range.
+        "max_spawn_depth": 1,        # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
+        "orchestrator_enabled": True,  # kill switch for role="orchestrator"
     },
 
     # Ephemeral prefill messages file — JSON list of {role, content} dicts
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index e16f0bf5e6..2fc50321f6 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -943,18 +943,22 @@ def run_doctor(args):
             try:
                 import httpx
                 _base = os.getenv(_base_env, "") if _base_env else ""
-                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
+                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
+                # (OpenAI-compat surface, which exposes /models for health check).
                 if not _base and _key.startswith("sk-kimi-"):
                     _base = "https://api.kimi.com/coding/v1"
-                # Anthropic-compat endpoints (/anthropic) don't support /models.
-                # Rewrite to the OpenAI-compat /v1 surface for health checks.
+                # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
+                # with no /v1) don't support /models.  Rewrite to the OpenAI-compat
+                # /v1 surface for health checks.
                 if _base and _base.rstrip("/").endswith("/anthropic"):
                     from agent.auxiliary_client import _to_openai_base_url
                     _base = _to_openai_base_url(_base)
+                if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
+                    _base = _base.rstrip("/") + "/v1"
                 _url = (_base.rstrip("/") + "/models") if _base else _default_url
                 _headers = {"Authorization": f"Bearer {_key}"}
                 if base_url_host_matches(_base, "api.kimi.com"):
-                    _headers["User-Agent"] = "KimiCLI/1.30.0"
+                    _headers["User-Agent"] = "claude-code/0.1.0"
                 _resp = httpx.get(
                     _url,
                     headers=_headers,
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index e4c52cb8aa..a5cb11392a 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -2165,7 +2165,6 @@ def _model_flow_nous(config, current_model="", args=None):
     from hermes_cli.models import (
         _PROVIDER_MODELS,
         get_pricing_for_provider,
-        filter_nous_free_models,
         check_nous_free_tier,
         partition_nous_models_by_tier,
     )
@@ -2208,10 +2207,8 @@ def _model_flow_nous(config, current_model="", args=None):
     # Check if user is on free tier
     free_tier = check_nous_free_tier()
 
-    # For both tiers: apply the allowlist filter first (removes non-allowlisted
-    # free models and allowlist models that aren't actually free).
-    # Then for free users: partition remaining models into selectable/unavailable.
-    model_ids = filter_nous_free_models(model_ids, pricing)
+    # For free users: partition models into selectable/unavailable based on
+    # whether they are free per the Portal-reported pricing.
     unavailable_models: list[str] = []
     if free_tier:
         model_ids, unavailable_models = partition_nous_models_by_tier(
diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py
index 22721f9a42..5b26f5b8b5 100644
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -678,6 +678,7 @@ def switch_model(
         _da = DIRECT_ALIASES.get(resolved_alias)
         if _da is not None and _da.base_url:
             base_url = _da.base_url
+            api_mode = ""  # clear so determine_api_mode re-detects from URL
             if not api_key:
                 api_key = "no-key-required"
 
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 33614d4263..186119b24d 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -53,6 +53,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("stepfun/step-3.5-flash",          ""),
     ("minimax/minimax-m2.7",            ""),
     ("minimax/minimax-m2.5",            ""),
+    ("minimax/minimax-m2.5:free",       "free"),
     ("z-ai/glm-5.1",                    ""),
     ("z-ai/glm-5v-turbo",               ""),
     ("z-ai/glm-5-turbo",                ""),
@@ -125,17 +126,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "stepfun/step-3.5-flash",
         "minimax/minimax-m2.7",
         "minimax/minimax-m2.5",
+        "minimax/minimax-m2.5:free",
         "z-ai/glm-5.1",
         "z-ai/glm-5v-turbo",
         "z-ai/glm-5-turbo",
         "x-ai/grok-4.20-beta",
         "nvidia/nemotron-3-super-120b-a12b",
-        "nvidia/nemotron-3-super-120b-a12b:free",
-        "arcee-ai/trinity-large-preview:free",
         "arcee-ai/trinity-large-thinking",
         "openai/gpt-5.4-pro",
         "openai/gpt-5.4-nano",
-        "openrouter/elephant-alpha",
     ],
     "openai-codex": _codex_curated_models(),
     "copilot-acp": [
@@ -362,17 +361,11 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
 _PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
 
 # ---------------------------------------------------------------------------
-# Nous Portal free-model filtering
+# Nous Portal free-model helper
 # ---------------------------------------------------------------------------
-# Models that are ALLOWED to appear when priced as free on Nous Portal.
-# Any other free model is hidden — prevents promotional/temporary free models
-# from cluttering the selection when users are paying subscribers.
-# Models in this list are ALSO filtered out if they are NOT free (i.e. they
-# should only appear in the menu when they are genuinely free).
-_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
-    "xiaomi/mimo-v2-pro",
-    "xiaomi/mimo-v2-omni",
-})
+# The Nous Portal models endpoint is the source of truth for which models
+# are currently offered (free or paid). We trust whatever it returns and
+# surface it to users as-is — no local allowlist filtering.
 
 
 def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
@@ -386,35 +379,6 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
         return False
 
 
-def filter_nous_free_models(
-    model_ids: list[str],
-    pricing: dict[str, dict[str, str]],
-) -> list[str]:
-    """Filter the Nous Portal model list according to free-model policy.
-
-    Rules:
-      • Paid models that are NOT in the allowlist → keep (normal case).
-      • Free models that are NOT in the allowlist → drop.
-      • Allowlist models that ARE free → keep.
-      • Allowlist models that are NOT free → drop.
-    """
-    if not pricing:
-        return model_ids  # no pricing data — can't filter, show everything
-
-    result: list[str] = []
-    for mid in model_ids:
-        free = _is_model_free(mid, pricing)
-        if mid in _NOUS_ALLOWED_FREE_MODELS:
-            # Allowlist model: only show when it's actually free
-            if free:
-                result.append(mid)
-        else:
-            # Regular model: keep only when it's NOT free
-            if not free:
-                result.append(mid)
-    return result
-
-
 # ---------------------------------------------------------------------------
 # Nous Portal account tier detection
 # ---------------------------------------------------------------------------
@@ -478,8 +442,7 @@ def partition_nous_models_by_tier(
 ) -> tuple[list[str], list[str]]:
     """Split Nous models into (selectable, unavailable) based on user tier.
 
-    For paid-tier users: all models are selectable, none unavailable
-    (free-model filtering is handled separately by ``filter_nous_free_models``).
+    For paid-tier users: all models are selectable, none unavailable.
 
     For free-tier users: only free models are selectable; paid models
     are returned as unavailable (shown grayed out in the menu).
@@ -549,6 +512,157 @@ def check_nous_free_tier() -> bool:
         return False  # default to paid on error — don't block users
 
 
+# ---------------------------------------------------------------------------
+# Nous Portal recommended models
+#
+# The Portal publishes a curated list of suggested models (separated into
+# paid and free tiers) plus dedicated recommendations for compaction (text
+# summarisation / auxiliary) and vision tasks. We fetch it once per process
+# with a TTL cache so callers can ask "what's the best aux model right now?"
+# without hitting the network on every lookup.
+#
+# Shape of the response (fields we care about):
+#   {
+#     "paidRecommendedModels":     [ {modelName, ...}, ... ],
+#     "freeRecommendedModels":     [ {modelName, ...}, ... ],
+#     "paidRecommendedCompactionModel":  {modelName, ...} | null,
+#     "paidRecommendedVisionModel":      {modelName, ...} | null,
+#     "freeRecommendedCompactionModel":  {modelName, ...} | null,
+#     "freeRecommendedVisionModel":      {modelName, ...} | null,
+#   }
+# ---------------------------------------------------------------------------
+
+NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
+_NOUS_RECOMMENDED_CACHE_TTL: int = 600  # seconds (10 minutes)
+# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
+_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
+
+
+def fetch_nous_recommended_models(
+    portal_base_url: str = "",
+    timeout: float = 5.0,
+    *,
+    force_refresh: bool = False,
+) -> dict[str, Any]:
+    """Fetch the Nous Portal's curated recommended-models payload.
+
+    Hits ``<portal>/api/nous/recommended-models``. The endpoint is public —
+    no auth is required. Results are cached per portal URL for
+    ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
+    bypass the cache.
+
+    Returns the parsed JSON dict on success, or ``{}`` on any failure
+    (network, parse, non-2xx). Callers must treat missing/null fields as
+    "no recommendation" and fall back to their own default.
+    """
+    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
+    now = time.monotonic()
+    cached = _nous_recommended_cache.get(base)
+    if not force_refresh and cached is not None:
+        payload, cached_at = cached
+        if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
+            return payload
+
+    url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
+    try:
+        req = urllib.request.Request(
+            url,
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = json.loads(resp.read().decode())
+        if not isinstance(data, dict):
+            data = {}
+    except Exception:
+        data = {}
+
+    _nous_recommended_cache[base] = (data, now)
+    return data
+
+
+def _resolve_nous_portal_url() -> str:
+    """Best-effort lookup of the Portal base URL the user is authed against."""
+    try:
+        from hermes_cli.auth import (
+            DEFAULT_NOUS_PORTAL_URL,
+            get_provider_auth_state,
+        )
+        state = get_provider_auth_state("nous") or {}
+        portal = str(state.get("portal_base_url") or "").strip()
+        if portal:
+            return portal.rstrip("/")
+        return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+    except Exception:
+        return "https://portal.nousresearch.com"
+
+
+def _extract_model_name(entry: Any) -> Optional[str]:
+    """Pull the ``modelName`` field from a recommended-model entry, else None."""
+    if not isinstance(entry, dict):
+        return None
+    model_name = entry.get("modelName")
+    if isinstance(model_name, str) and model_name.strip():
+        return model_name.strip()
+    return None
+
+
+def get_nous_recommended_aux_model(
+    *,
+    vision: bool = False,
+    free_tier: Optional[bool] = None,
+    portal_base_url: str = "",
+    force_refresh: bool = False,
+) -> Optional[str]:
+    """Return the Portal's recommended model name for an auxiliary task.
+
+    Picks the best field from the Portal's recommended-models payload:
+
+    * ``vision=True``  → ``paidRecommendedVisionModel``  (paid tier) or
+                         ``freeRecommendedVisionModel``  (free tier)
+    * ``vision=False`` → ``paidRecommendedCompactionModel`` or
+                         ``freeRecommendedCompactionModel``
+
+    When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
+    via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
+    detection — useful for tests or when the caller already knows the tier.
+
+    For paid-tier users we prefer the paid recommendation but gracefully fall
+    back to the free recommendation if the Portal returned ``null`` for the
+    paid field (common during the staged rollout of new paid models).
+
+    Returns ``None`` when every candidate is missing, null, or the fetch
+    fails — callers should fall back to their own default (currently
+    ``google/gemini-3-flash-preview``).
+    """
+    base = portal_base_url or _resolve_nous_portal_url()
+    payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
+    if not payload:
+        return None
+
+    if free_tier is None:
+        try:
+            free_tier = check_nous_free_tier()
+        except Exception:
+            # On any detection error, assume paid — paid users see both fields
+            # anyway so this is a safe default that maximises model quality.
+            free_tier = False
+
+    if vision:
+        paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
+    else:
+        paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
+
+    # Preference order:
+    #   free tier  → free only
+    #   paid tier  → paid, then free (if paid field is null)
+    candidates = [free_key] if free_tier else [paid_key, free_key]
+    for key in candidates:
+        name = _extract_model_name(payload.get(key))
+        if name:
+            return name
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Canonical provider list — single source of truth for provider identity.
 # Every code path that lists, displays, or iterates providers derives from
diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py
index a593782e61..11f18f0716 100644
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@@ -133,6 +133,9 @@ def _get_enabled_plugins() -> Optional[set]:
 # Data classes
 # ---------------------------------------------------------------------------
 
+_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive"}
+
+
 @dataclass
 class PluginManifest:
     """Parsed representation of a plugin.yaml manifest."""
@@ -146,6 +149,23 @@ class PluginManifest:
     provides_hooks: List[str] = field(default_factory=list)
     source: str = ""        # "user", "project", or "entrypoint"
     path: Optional[str] = None
+    # Plugin kind — see plugins.py module docstring for semantics.
+    # ``standalone`` (default): hooks/tools of its own; opt-in via
+    #                           ``plugins.enabled``.
+    # ``backend``: pluggable backend for an existing core tool (e.g.
+    #              image_gen). Built-in (bundled) backends auto-load;
+    #              user-installed still gated by ``plugins.enabled``.
+    # ``exclusive``: category with exactly one active provider (memory).
+    #              Selection via ``<category>.provider`` config key; the
+    #              category's own discovery system handles loading and the
+    #              general scanner skips these.
+    kind: str = "standalone"
+    # Registry key — path-derived, used by ``plugins.enabled``/``disabled``
+    # lookups and by ``hermes plugins list``. For a flat plugin at
+    # ``plugins/disk-cleanup/`` the key is ``disk-cleanup``; for a nested
+    # category plugin at ``plugins/image_gen/openai/`` the key is
+    # ``image_gen/openai``. When empty, falls back to ``name``.
+    key: str = ""
 
 
 @dataclass
@@ -366,6 +386,33 @@ class PluginContext:
             self.manifest.name, engine.name,
         )
 
+    # -- image gen provider registration ------------------------------------
+
+    def register_image_gen_provider(self, provider) -> None:
+        """Register an image generation backend.
+
+        ``provider`` must be an instance of
+        :class:`agent.image_gen_provider.ImageGenProvider`. The
+        ``provider.name`` attribute is what ``image_gen.provider`` in
+        ``config.yaml`` matches against when routing ``image_generate``
+        tool calls.
+        """
+        from agent.image_gen_provider import ImageGenProvider
+        from agent.image_gen_registry import register_provider
+
+        if not isinstance(provider, ImageGenProvider):
+            logger.warning(
+                "Plugin '%s' tried to register an image_gen provider that does "
+                "not inherit from ImageGenProvider. Ignoring.",
+                self.manifest.name,
+            )
+            return
+        register_provider(provider)
+        logger.info(
+            "Plugin '%s' registered image_gen provider: %s",
+            self.manifest.name, provider.name,
+        )
+
     # -- hook registration --------------------------------------------------
 
     def register_hook(self, hook_name: str, callback: Callable) -> None:
@@ -465,11 +512,16 @@ class PluginManager:
         manifests: List[PluginManifest] = []
 
         # 1. Bundled plugins (<repo>/plugins/<name>/)
-        # Repo-shipped generic plugins live next to hermes_cli/.  Memory and
-        # context_engine subdirs are handled by their own discovery paths, so
-        # skip those names here.  Bundled plugins are discovered (so they
-        # show up in `hermes plugins`) but only loaded when added to
-        # `plugins.enabled` in config.yaml — opt-in like any other plugin.
+        #
+        # Repo-shipped plugins live next to hermes_cli/. Two layouts are
+        # supported (see ``_scan_directory`` for details):
+        #
+        #   - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone)
+        #   - category: ``plugins/image_gen/openai/plugin.yaml`` (backend)
+        #
+        # ``memory/`` and ``context_engine/`` are skipped at the top level —
+        # they have their own discovery systems. Porting those to the
+        # category-namespace ``kind: exclusive`` model is a future PR.
         repo_plugins = Path(__file__).resolve().parent.parent / "plugins"
         manifests.extend(
             self._scan_directory(
@@ -492,36 +544,69 @@ class PluginManager:
         manifests.extend(self._scan_entry_points())
 
         # Load each manifest (skip user-disabled plugins).
-        # Later sources override earlier ones on name collision — user plugins
-        # take precedence over bundled, project plugins take precedence over
-        # user.  Dedup here so we only load the final winner.
+        # Later sources override earlier ones on key collision — user
+        # plugins take precedence over bundled, project plugins take
+        # precedence over user. Dedup here so we only load the final
+        # winner. Keys are path-derived (``image_gen/openai``,
+        # ``disk-cleanup``) so ``tts/openai`` and ``image_gen/openai``
+        # don't collide even when both manifests say ``name: openai``.
         disabled = _get_disabled_plugins()
         enabled = _get_enabled_plugins()  # None = opt-in default (nothing enabled)
         winners: Dict[str, PluginManifest] = {}
         for manifest in manifests:
-            winners[manifest.name] = manifest
+            winners[manifest.key or manifest.name] = manifest
         for manifest in winners.values():
-            # Explicit disable always wins.
-            if manifest.name in disabled:
+            lookup_key = manifest.key or manifest.name
+
+            # Explicit disable always wins (matches on key or on legacy
+            # bare name for back-compat with existing user configs).
+            if lookup_key in disabled or manifest.name in disabled:
                 loaded = LoadedPlugin(manifest=manifest, enabled=False)
                 loaded.error = "disabled via config"
-                self._plugins[manifest.name] = loaded
-                logger.debug("Skipping disabled plugin '%s'", manifest.name)
+                self._plugins[lookup_key] = loaded
+                logger.debug("Skipping disabled plugin '%s'", lookup_key)
                 continue
-            # Opt-in gate: plugins must be in the enabled allow-list.
-            # If the allow-list is missing (None), treat as "nothing enabled"
-            # — users have to explicitly enable plugins to load them.
-            # Memory and context_engine providers are excluded from this gate
-            # since they have their own single-select config (memory.provider
-            # / context.engine), not the enabled list.
-            if enabled is None or manifest.name not in enabled:
+
+            # Exclusive plugins (memory providers) have their own
+            # discovery/activation path. The general loader records the
+            # manifest for introspection but does not load the module.
+            if manifest.kind == "exclusive":
                 loaded = LoadedPlugin(manifest=manifest, enabled=False)
-                loaded.error = "not enabled in config (run `hermes plugins enable {}` to activate)".format(
-                    manifest.name
+                loaded.error = (
+                    "exclusive plugin — activate via <category>.provider config"
                 )
-                self._plugins[manifest.name] = loaded
+                self._plugins[lookup_key] = loaded
                 logger.debug(
-                    "Skipping '%s' (not in plugins.enabled)", manifest.name
+                    "Skipping '%s' (exclusive, handled by category discovery)",
+                    lookup_key,
+                )
+                continue
+
+            # Built-in backends auto-load — they ship with hermes and must
+            # just work. Selection among them (e.g. which image_gen backend
+            # services calls) is driven by ``<category>.provider`` config,
+            # enforced by the tool wrapper.
+            if manifest.kind == "backend" and manifest.source == "bundled":
+                self._load_plugin(manifest)
+                continue
+
+            # Everything else (standalone, user-installed backends,
+            # entry-point plugins) is opt-in via plugins.enabled.
+            # Accept both the path-derived key and the legacy bare name
+            # so existing configs keep working.
+            is_enabled = (
+                enabled is not None
+                and (lookup_key in enabled or manifest.name in enabled)
+            )
+            if not is_enabled:
+                loaded = LoadedPlugin(manifest=manifest, enabled=False)
+                loaded.error = (
+                    "not enabled in config (run `hermes plugins enable {}` to activate)"
+                    .format(lookup_key)
+                )
+                self._plugins[lookup_key] = loaded
+                logger.debug(
+                    "Skipping '%s' (not in plugins.enabled)", lookup_key
                 )
                 continue
             self._load_plugin(manifest)
@@ -545,9 +630,37 @@ class PluginManager:
     ) -> List[PluginManifest]:
         """Read ``plugin.yaml`` manifests from subdirectories of *path*.
 
-        *skip_names* is an optional allow-list of names to ignore (used
-        for the bundled scan to exclude ``memory`` / ``context_engine``
-        subdirs that have their own discovery path).
+        Supports two layouts, mixed freely:
+
+        * **Flat** — ``<root>/<plugin-name>/plugin.yaml``. Key is
+          ``<plugin-name>`` (e.g. ``disk-cleanup``).
+        * **Category** — ``<root>/<category>/<plugin-name>/plugin.yaml``,
+          where the ``<category>`` directory itself has no ``plugin.yaml``.
+          Key is ``<category>/<plugin-name>`` (e.g. ``image_gen/openai``).
+          Depth is capped at two segments.
+
+        *skip_names* is an optional allow-list of names to ignore at the
+        top level (kept for back-compat; the current call sites no longer
+        pass it now that categories are first-class).
+        """
+        return self._scan_directory_level(
+            path, source, skip_names=skip_names, prefix="", depth=0
+        )
+
+    def _scan_directory_level(
+        self,
+        path: Path,
+        source: str,
+        *,
+        skip_names: Optional[Set[str]],
+        prefix: str,
+        depth: int,
+    ) -> List[PluginManifest]:
+        """Recursive implementation of :meth:`_scan_directory`.
+
+        ``prefix`` is the category path already accumulated ("" at root,
+        "image_gen" one level in). ``depth`` is the recursion depth; we
+        cap at 2 so ``<root>/a/b/c/`` is ignored.
         """
         manifests: List[PluginManifest] = []
         if not path.is_dir():
@@ -556,37 +669,88 @@ class PluginManager:
         for child in sorted(path.iterdir()):
             if not child.is_dir():
                 continue
-            if skip_names and child.name in skip_names:
+            if depth == 0 and skip_names and child.name in skip_names:
                 continue
             manifest_file = child / "plugin.yaml"
             if not manifest_file.exists():
                 manifest_file = child / "plugin.yml"
-            if not manifest_file.exists():
-                logger.debug("Skipping %s (no plugin.yaml)", child)
+
+            if manifest_file.exists():
+                manifest = self._parse_manifest(
+                    manifest_file, child, source, prefix
+                )
+                if manifest is not None:
+                    manifests.append(manifest)
                 continue
 
-            try:
-                if yaml is None:
-                    logger.warning("PyYAML not installed – cannot load %s", manifest_file)
-                    continue
-                data = yaml.safe_load(manifest_file.read_text()) or {}
-                manifest = PluginManifest(
-                    name=data.get("name", child.name),
-                    version=str(data.get("version", "")),
-                    description=data.get("description", ""),
-                    author=data.get("author", ""),
-                    requires_env=data.get("requires_env", []),
-                    provides_tools=data.get("provides_tools", []),
-                    provides_hooks=data.get("provides_hooks", []),
-                    source=source,
-                    path=str(child),
+            # No manifest at this level. If we're still within the depth
+            # cap, treat this directory as a category namespace and recurse
+            # one level in looking for children with manifests.
+            if depth >= 1:
+                logger.debug("Skipping %s (no plugin.yaml, depth cap reached)", child)
+                continue
+
+            sub_prefix = f"{prefix}/{child.name}" if prefix else child.name
+            manifests.extend(
+                self._scan_directory_level(
+                    child,
+                    source,
+                    skip_names=None,
+                    prefix=sub_prefix,
+                    depth=depth + 1,
                 )
-                manifests.append(manifest)
-            except Exception as exc:
-                logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            )
 
         return manifests
 
+    def _parse_manifest(
+        self,
+        manifest_file: Path,
+        plugin_dir: Path,
+        source: str,
+        prefix: str,
+    ) -> Optional[PluginManifest]:
+        """Parse a single ``plugin.yaml`` into a :class:`PluginManifest`.
+
+        Returns ``None`` on parse failure (logs a warning).
+        """
+        try:
+            if yaml is None:
+                logger.warning("PyYAML not installed – cannot load %s", manifest_file)
+                return None
+            data = yaml.safe_load(manifest_file.read_text()) or {}
+
+            name = data.get("name", plugin_dir.name)
+            key = f"{prefix}/{plugin_dir.name}" if prefix else name
+
+            raw_kind = data.get("kind", "standalone")
+            if not isinstance(raw_kind, str):
+                raw_kind = "standalone"
+            kind = raw_kind.strip().lower()
+            if kind not in _VALID_PLUGIN_KINDS:
+                logger.warning(
+                    "Plugin %s: unknown kind '%s' (valid: %s); treating as 'standalone'",
+                    key, raw_kind, ", ".join(sorted(_VALID_PLUGIN_KINDS)),
+                )
+                kind = "standalone"
+
+            return PluginManifest(
+                name=name,
+                version=str(data.get("version", "")),
+                description=data.get("description", ""),
+                author=data.get("author", ""),
+                requires_env=data.get("requires_env", []),
+                provides_tools=data.get("provides_tools", []),
+                provides_hooks=data.get("provides_hooks", []),
+                source=source,
+                path=str(plugin_dir),
+                kind=kind,
+                key=key,
+            )
+        except Exception as exc:
+            logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            return None
+
     # -----------------------------------------------------------------------
     # Entry-point scanning
     # -----------------------------------------------------------------------
@@ -609,6 +773,7 @@ class PluginManager:
                     name=ep.name,
                     source="entrypoint",
                     path=ep.value,
+                    key=ep.name,
                 )
                 manifests.append(manifest)
         except Exception as exc:
@@ -670,10 +835,16 @@ class PluginManager:
             loaded.error = str(exc)
             logger.warning("Failed to load plugin '%s': %s", manifest.name, exc)
 
-        self._plugins[manifest.name] = loaded
+        self._plugins[manifest.key or manifest.name] = loaded
 
     def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType:
-        """Import a directory-based plugin as ``hermes_plugins.<name>``."""
+        """Import a directory-based plugin as ``hermes_plugins.<slug>``.
+
+        The module slug is derived from ``manifest.key`` so category-namespaced
+        plugins (``image_gen/openai``) import as
+        ``hermes_plugins.image_gen__openai`` without colliding with any
+        future ``tts/openai``.
+        """
         plugin_dir = Path(manifest.path)  # type: ignore[arg-type]
         init_file = plugin_dir / "__init__.py"
         if not init_file.exists():
@@ -686,7 +857,9 @@ class PluginManager:
             ns_pkg.__package__ = _NS_PARENT
             sys.modules[_NS_PARENT] = ns_pkg
 
-        module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}"
+        key = manifest.key or manifest.name
+        slug = key.replace("/", "__").replace("-", "_")
+        module_name = f"{_NS_PARENT}.{slug}"
         spec = importlib.util.spec_from_file_location(
             module_name,
             init_file,
@@ -767,10 +940,12 @@ class PluginManager:
     def list_plugins(self) -> List[Dict[str, Any]]:
         """Return a list of info dicts for all discovered plugins."""
         result: List[Dict[str, Any]] = []
-        for name, loaded in sorted(self._plugins.items()):
+        for key, loaded in sorted(self._plugins.items()):
             result.append(
                 {
-                    "name": name,
+                    "name": loaded.manifest.name,
+                    "key": loaded.manifest.key or loaded.manifest.name,
+                    "kind": loaded.manifest.kind,
                     "version": loaded.manifest.version,
                     "description": loaded.manifest.description,
                     "source": loaded.manifest.source,
diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py
index 1764474aa9..00c3f64bcf 100644
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@@ -427,6 +427,16 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
     """
     pdef = get_provider(provider)
     if pdef is not None:
+        # Even for known providers, check URL heuristics for special endpoints
+        # (e.g. kimi /coding endpoint needs anthropic_messages even on 'custom')
+        if base_url:
+            url_lower = base_url.rstrip("/").lower()
+            if "api.kimi.com/coding" in url_lower:
+                return "anthropic_messages"
+            if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
+                return "anthropic_messages"
+            if "api.openai.com" in url_lower:
+                return "codex_responses"
         return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")
 
     # Direct provider checks for providers not in HERMES_OVERLAYS
@@ -439,6 +449,8 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
         hostname = base_url_hostname(base_url)
         if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com":
             return "anthropic_messages"
+        if hostname == "api.kimi.com" and "/coding" in url_lower:
+            return "anthropic_messages"
         if hostname == "api.openai.com":
             return "codex_responses"
         if hostname.startswith("bedrock-runtime.") and base_url_host_matches(base_url, "amazonaws.com"):
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 62f1407cc7..922946e2ad 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -46,6 +46,9 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
       protocol under a ``/anthropic`` suffix — treat those as
       ``anthropic_messages`` transport instead of the default
       ``chat_completions``.
+    - Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the
+      Anthropic Messages protocol (the /coding route accepts Claude
+      Code's native request shape).
     """
     normalized = (base_url or "").strip().lower().rstrip("/")
     hostname = base_url_hostname(base_url)
@@ -55,6 +58,8 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
         return "codex_responses"
     if normalized.endswith("/anthropic"):
         return "anthropic_messages"
+    if hostname == "api.kimi.com" and "/coding" in normalized:
+        return "anthropic_messages"
     return None
 
 
@@ -205,7 +210,8 @@ def _resolve_runtime_from_pool_entry(
             api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
         else:
             # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
-            # api.openai.com → codex_responses, api.x.ai → codex_responses).
+            # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
+            # codex_responses).
             detected = _detect_api_mode_for_url(base_url)
             if detected:
                 api_mode = detected
@@ -660,7 +666,8 @@ def _resolve_explicit_runtime(
             if configured_mode:
                 api_mode = configured_mode
             else:
-                # Auto-detect Anthropic-compatible endpoints (/anthropic suffix).
+                # Auto-detect from URL (Anthropic /anthropic suffix,
+                # api.openai.com → Responses, Kimi /coding, etc.).
                 detected = _detect_api_mode_for_url(base_url)
                 if detected:
                     api_mode = detected
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index d7eb7b734a..1a620d62b3 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -408,13 +408,36 @@ def _print_setup_summary(config: dict, hermes_home):
             ("Browser Automation", False, missing_browser_hint)
         )
 
-    # FAL (image generation)
+    # Image generation — FAL (direct or via Nous), or any plugin-registered
+    # provider (OpenAI, etc.)
     if subscription_features.image_gen.managed_by_nous:
         tool_status.append(("Image Generation (Nous subscription)", True, None))
     elif subscription_features.image_gen.available:
         tool_status.append(("Image Generation", True, None))
     else:
-        tool_status.append(("Image Generation", False, "FAL_KEY"))
+        # Fall back to probing plugin-registered providers so OpenAI-only
+        # setups don't show as "missing FAL_KEY".
+        _img_backend = None
+        try:
+            from agent.image_gen_registry import list_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            for _p in list_providers():
+                if _p.name == "fal":
+                    continue
+                try:
+                    if _p.is_available():
+                        _img_backend = _p.display_name
+                        break
+                except Exception:
+                    continue
+        except Exception:
+            pass
+        if _img_backend:
+            tool_status.append((f"Image Generation ({_img_backend})", True, None))
+        else:
+            tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY"))
 
     # TTS — show configured provider
     tts_provider = config.get("tts", {}).get("provider", "edge")
diff --git a/hermes_cli/tips.py b/hermes_cli/tips.py
index 71bace524a..24acc15f53 100644
--- a/hermes_cli/tips.py
+++ b/hermes_cli/tips.py
@@ -127,7 +127,7 @@ TIPS = [
 
     # --- Tools & Capabilities ---
     "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
-    "delegate_task spawns up to 3 concurrent sub-agents with isolated contexts for parallel work.",
+    "delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
     "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
     "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
     "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index f91a0e037b..afc99e56c5 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -842,6 +842,51 @@ def _configure_toolset(ts_key: str, config: dict):
         _configure_simple_requirements(ts_key)
 
 
+def _plugin_image_gen_providers() -> list[dict]:
+    """Build picker-row dicts from plugin-registered image gen providers.
+
+    Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
+    row but carries an ``image_gen_plugin_name`` marker so downstream
+    code (config writing, model picker) knows to route through the
+    plugin registry instead of the in-tree FAL backend.
+
+    FAL is skipped — it's already exposed by the hardcoded
+    ``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
+    a plugin in a follow-up PR, the hardcoded entries go away and this
+    function surfaces it alongside OpenAI automatically.
+    """
+    try:
+        from agent.image_gen_registry import list_providers
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        providers = list_providers()
+    except Exception:
+        return []
+
+    rows: list[dict] = []
+    for provider in providers:
+        if getattr(provider, "name", None) == "fal":
+            # FAL has its own hardcoded rows today.
+            continue
+        try:
+            schema = provider.get_setup_schema()
+        except Exception:
+            continue
+        if not isinstance(schema, dict):
+            continue
+        rows.append(
+            {
+                "name": schema.get("name", provider.display_name),
+                "badge": schema.get("badge", ""),
+                "tag": schema.get("tag", ""),
+                "env_vars": schema.get("env_vars", []),
+                "image_gen_plugin_name": provider.name,
+            }
+        )
+    return rows
+
+
 def _visible_providers(cat: dict, config: dict) -> list[dict]:
     """Return provider entries visible for the current auth/config state."""
     features = get_nous_subscription_features(config)
@@ -852,6 +897,12 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
         if provider.get("requires_nous_auth") and not features.nous_auth_present:
             continue
         visible.append(provider)
+
+    # Inject plugin-registered image_gen backends (OpenAI today, more
+    # later) so the picker lists them alongside FAL / Nous Subscription.
+    if cat.get("name") == "Image Generation":
+        visible.extend(_plugin_image_gen_providers())
+
     return visible
 
 
@@ -871,7 +922,24 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
         browser_cfg = config.get("browser", {})
         return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
     if ts_key == "image_gen":
-        return not fal_key_is_configured()
+        # Satisfied when the in-tree FAL backend is configured OR any
+        # plugin-registered image gen provider is available.
+        if fal_key_is_configured():
+            return False
+        try:
+            from agent.image_gen_registry import list_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            for provider in list_providers():
+                try:
+                    if provider.is_available():
+                        return False
+                except Exception:
+                    continue
+        except Exception:
+            pass
+        return True
 
     return not _toolset_has_keys(ts_key, config)
 
@@ -1096,6 +1164,88 @@ def _configure_imagegen_model(backend_name: str, config: dict) -> None:
     _print_success(f"  Model set to: {chosen}")
 
 
+def _plugin_image_gen_catalog(plugin_name: str):
+    """Return ``(catalog_dict, default_model_id)`` for a plugin provider.
+
+    ``catalog_dict`` is shaped like the legacy ``FAL_MODELS`` table —
+    ``{model_id: {"display", "speed", "strengths", "price", ...}}`` —
+    so the existing picker code paths work without change. Returns
+    ``({}, None)`` if the provider isn't registered or has no models.
+    """
+    try:
+        from agent.image_gen_registry import get_provider
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        provider = get_provider(plugin_name)
+    except Exception:
+        return {}, None
+    if provider is None:
+        return {}, None
+    try:
+        models = provider.list_models() or []
+        default = provider.default_model()
+    except Exception:
+        return {}, None
+    catalog = {m["id"]: m for m in models if isinstance(m, dict) and "id" in m}
+    return catalog, default
+
+
+def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None:
+    """Prompt the user to pick a model for a plugin-registered backend.
+
+    Writes selection to ``image_gen.model``. Mirrors
+    :func:`_configure_imagegen_model` but sources its catalog from the
+    plugin registry instead of :data:`IMAGEGEN_BACKENDS`.
+    """
+    catalog, default_model = _plugin_image_gen_catalog(plugin_name)
+    if not catalog:
+        return
+
+    cur_cfg = config.setdefault("image_gen", {})
+    if not isinstance(cur_cfg, dict):
+        cur_cfg = {}
+        config["image_gen"] = cur_cfg
+    current_model = cur_cfg.get("model") or default_model
+    if current_model not in catalog:
+        current_model = default_model
+
+    model_ids = list(catalog.keys())
+    ordered = [current_model] + [m for m in model_ids if m != current_model]
+
+    widths = {
+        "model": max(len(m) for m in model_ids),
+        "speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6),
+        "strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0),
+    }
+
+    print()
+    header = (
+        f"  {'Model':<{widths['model']}}  "
+        f"{'Speed':<{widths['speed']}}  "
+        f"{'Strengths':<{widths['strengths']}}  "
+        f"Price"
+    )
+    print(color(header, Colors.CYAN))
+
+    rows = []
+    for mid in ordered:
+        row = _format_imagegen_model_row(mid, catalog[mid], widths)
+        if mid == current_model:
+            row += "  ← currently in use"
+        rows.append(row)
+
+    idx = _prompt_choice(
+        f"  Choose {plugin_name} model:",
+        rows,
+        default=0,
+    )
+
+    chosen = ordered[idx]
+    cur_cfg["model"] = chosen
+    _print_success(f"  Model set to: {chosen}")
+
+
 def _configure_provider(provider: dict, config: dict):
     """Configure a single provider - prompt for API keys and set config."""
     env_vars = provider.get("env_vars", [])
@@ -1152,10 +1302,28 @@ def _configure_provider(provider: dict, config: dict):
         _print_success(f"  {provider['name']} - no configuration needed!")
         if managed_feature:
             _print_info("  Requests for this tool will be billed to your Nous subscription.")
+        # Plugin-registered image_gen provider: write image_gen.provider
+        # and route model selection to the plugin's own catalog.
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
+            return
         # Imagegen backends prompt for model selection after backend pick.
         backend = provider.get("imagegen_backend")
         if backend:
             _configure_imagegen_model(backend, config)
+            # In-tree FAL is the only non-plugin backend today. Keep
+            # image_gen.provider clear so the dispatch shim falls through
+            # to the legacy FAL path.
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+                img_cfg["provider"] = "fal"
         return
 
     # Prompt for each required env var
@@ -1190,10 +1358,23 @@ def _configure_provider(provider: dict, config: dict):
 
     if all_configured:
         _print_success(f"  {provider['name']} configured!")
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
+            return
         # Imagegen backends prompt for model selection after env vars are in.
         backend = provider.get("imagegen_backend")
         if backend:
             _configure_imagegen_model(backend, config)
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+                img_cfg["provider"] = "fal"
 
 
 def _configure_simple_requirements(ts_key: str):
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index 6cf1199253..784dc4834d 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -16,6 +16,7 @@ import json
 import logging
 import os
 import secrets
+import subprocess
 import sys
 import threading
 import time
@@ -561,6 +562,138 @@ async def get_status():
     }
 
 
+# ---------------------------------------------------------------------------
+# Gateway + update actions (invoked from the Status page).
+#
+# Both commands are spawned as detached subprocesses so the HTTP request
+# returns immediately.  stdin is closed (``DEVNULL``) so any stray ``input()``
+# calls fail fast with EOF rather than hanging forever.  stdout/stderr are
+# streamed to a per-action log file under ``~/.hermes/logs/<action>.log`` so
+# the dashboard can tail them back to the user.
+# ---------------------------------------------------------------------------
+
+_ACTION_LOG_DIR: Path = get_hermes_home() / "logs"
+
+# Short ``name`` (from the URL) → absolute log file path.
+_ACTION_LOG_FILES: Dict[str, str] = {
+    "gateway-restart": "gateway-restart.log",
+    "hermes-update": "hermes-update.log",
+}
+
+# ``name`` → most recently spawned Popen handle.  Used so ``status`` can
+# report liveness and exit code without shelling out to ``ps``.
+_ACTION_PROCS: Dict[str, subprocess.Popen] = {}
+
+
+def _spawn_hermes_action(subcommand: List[str], name: str) -> subprocess.Popen:
+    """Spawn ``hermes <subcommand>`` detached and record the Popen handle.
+
+    Uses the running interpreter's ``hermes_cli.main`` module so the action
+    inherits the same venv/PYTHONPATH the web server is using.
+    """
+    log_file_name = _ACTION_LOG_FILES[name]
+    _ACTION_LOG_DIR.mkdir(parents=True, exist_ok=True)
+    log_path = _ACTION_LOG_DIR / log_file_name
+    log_file = open(log_path, "ab", buffering=0)
+    log_file.write(
+        f"\n=== {name} started {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n".encode()
+    )
+
+    cmd = [sys.executable, "-m", "hermes_cli.main", *subcommand]
+
+    popen_kwargs: Dict[str, Any] = {
+        "cwd": str(PROJECT_ROOT),
+        "stdin": subprocess.DEVNULL,
+        "stdout": log_file,
+        "stderr": subprocess.STDOUT,
+        "env": {**os.environ, "HERMES_NONINTERACTIVE": "1"},
+    }
+    if sys.platform == "win32":
+        popen_kwargs["creationflags"] = (
+            subprocess.CREATE_NEW_PROCESS_GROUP  # type: ignore[attr-defined]
+            | getattr(subprocess, "DETACHED_PROCESS", 0)
+        )
+    else:
+        popen_kwargs["start_new_session"] = True
+
+    proc = subprocess.Popen(cmd, **popen_kwargs)
+    _ACTION_PROCS[name] = proc
+    return proc
+
+
+def _tail_lines(path: Path, n: int) -> List[str]:
+    """Return the last ``n`` lines of ``path``.  Reads the whole file — fine
+    for our small per-action logs.  Binary-decoded with ``errors='replace'``
+    so log corruption doesn't 500 the endpoint."""
+    if not path.exists():
+        return []
+    try:
+        text = path.read_text(errors="replace")
+    except OSError:
+        return []
+    lines = text.splitlines()
+    return lines[-n:] if n > 0 else lines
+
+
+@app.post("/api/gateway/restart")
+async def restart_gateway():
+    """Kick off a ``hermes gateway restart`` in the background."""
+    try:
+        proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart")
+    except Exception as exc:
+        _log.exception("Failed to spawn gateway restart")
+        raise HTTPException(status_code=500, detail=f"Failed to restart gateway: {exc}")
+    return {
+        "ok": True,
+        "pid": proc.pid,
+        "name": "gateway-restart",
+    }
+
+
+@app.post("/api/hermes/update")
+async def update_hermes():
+    """Kick off ``hermes update`` in the background."""
+    try:
+        proc = _spawn_hermes_action(["update"], "hermes-update")
+    except Exception as exc:
+        _log.exception("Failed to spawn hermes update")
+        raise HTTPException(status_code=500, detail=f"Failed to start update: {exc}")
+    return {
+        "ok": True,
+        "pid": proc.pid,
+        "name": "hermes-update",
+    }
+
+
+@app.get("/api/actions/{name}/status")
+async def get_action_status(name: str, lines: int = 200):
+    """Tail an action log and report whether the process is still running."""
+    log_file_name = _ACTION_LOG_FILES.get(name)
+    if log_file_name is None:
+        raise HTTPException(status_code=404, detail=f"Unknown action: {name}")
+
+    log_path = _ACTION_LOG_DIR / log_file_name
+    tail = _tail_lines(log_path, min(max(lines, 1), 2000))
+
+    proc = _ACTION_PROCS.get(name)
+    if proc is None:
+        running = False
+        exit_code: Optional[int] = None
+        pid: Optional[int] = None
+    else:
+        exit_code = proc.poll()
+        running = exit_code is None
+        pid = proc.pid
+
+    return {
+        "name": name,
+        "running": running,
+        "exit_code": exit_code,
+        "pid": pid,
+        "lines": tail,
+    }
+
+
 @app.get("/api/sessions")
 async def get_sessions(limit: int = 20, offset: int = 0):
     try:
diff --git a/package-lock.json b/package-lock.json
index 9d0ae80cdc..728429e51b 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1069,6 +1069,7 @@
         }
       ],
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "baseline-browser-mapping": "^2.10.12",
         "caniuse-lite": "^1.0.30001782",
@@ -3911,6 +3912,7 @@
       "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz",
       "integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==",
       "license": "Apache-2.0",
+      "peer": true,
       "dependencies": {
         "playwright-core": "1.59.1"
       },
@@ -3929,6 +3931,7 @@
       "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz",
       "integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==",
       "license": "Apache-2.0",
+      "peer": true,
       "bin": {
         "playwright-core": "cli.js"
       },
diff --git a/plugins/image_gen/openai/__init__.py b/plugins/image_gen/openai/__init__.py
new file mode 100644
index 0000000000..c1a719f910
--- /dev/null
+++ b/plugins/image_gen/openai/__init__.py
@@ -0,0 +1,303 @@
+"""OpenAI image generation backend.
+
+Exposes OpenAI's ``gpt-image-2`` model at three quality tiers as an
+:class:`ImageGenProvider` implementation. The tiers are implemented as
+three virtual model IDs so the ``hermes tools`` model picker and the
+``image_gen.model`` config key behave like any other multi-model backend:
+
+    gpt-image-2-low     ~15s   fastest, good for iteration
+    gpt-image-2-medium  ~40s   default — balanced
+    gpt-image-2-high    ~2min  slowest, highest fidelity
+
+All three hit the same underlying API model (``gpt-image-2``) with a
+different ``quality`` parameter. Output is base64 JSON → saved under
+``$HERMES_HOME/cache/images/``.
+
+Selection precedence (first hit wins):
+
+1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests)
+2. ``image_gen.openai.model`` in ``config.yaml``
+3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs)
+4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium``
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any, Dict, List, Optional, Tuple
+
+from agent.image_gen_provider import (
+    DEFAULT_ASPECT_RATIO,
+    ImageGenProvider,
+    error_response,
+    resolve_aspect_ratio,
+    save_b64_image,
+    success_response,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Model catalog
+# ---------------------------------------------------------------------------
+#
+# All three IDs resolve to the same underlying API model with a different
+# ``quality`` setting. ``api_model`` is what gets sent to OpenAI;
+# ``quality`` is the knob that changes generation time and output fidelity.
+
+API_MODEL = "gpt-image-2"
+
+_MODELS: Dict[str, Dict[str, Any]] = {
+    "gpt-image-2-low": {
+        "display": "GPT Image 2 (Low)",
+        "speed": "~15s",
+        "strengths": "Fast iteration, lowest cost",
+        "quality": "low",
+    },
+    "gpt-image-2-medium": {
+        "display": "GPT Image 2 (Medium)",
+        "speed": "~40s",
+        "strengths": "Balanced — default",
+        "quality": "medium",
+    },
+    "gpt-image-2-high": {
+        "display": "GPT Image 2 (High)",
+        "speed": "~2min",
+        "strengths": "Highest fidelity, strongest prompt adherence",
+        "quality": "high",
+    },
+}
+
+DEFAULT_MODEL = "gpt-image-2-medium"
+
+_SIZES = {
+    "landscape": "1536x1024",
+    "square": "1024x1024",
+    "portrait": "1024x1536",
+}
+
+
+def _load_openai_config() -> Dict[str, Any]:
+    """Read ``image_gen`` from config.yaml (returns {} on any failure)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        return section if isinstance(section, dict) else {}
+    except Exception as exc:
+        logger.debug("Could not load image_gen config: %s", exc)
+        return {}
+
+
+def _resolve_model() -> Tuple[str, Dict[str, Any]]:
+    """Decide which tier to use and return ``(model_id, meta)``."""
+    env_override = os.environ.get("OPENAI_IMAGE_MODEL")
+    if env_override and env_override in _MODELS:
+        return env_override, _MODELS[env_override]
+
+    cfg = _load_openai_config()
+    openai_cfg = cfg.get("openai") if isinstance(cfg.get("openai"), dict) else {}
+    candidate: Optional[str] = None
+    if isinstance(openai_cfg, dict):
+        value = openai_cfg.get("model")
+        if isinstance(value, str) and value in _MODELS:
+            candidate = value
+    if candidate is None:
+        top = cfg.get("model")
+        if isinstance(top, str) and top in _MODELS:
+            candidate = top
+
+    if candidate is not None:
+        return candidate, _MODELS[candidate]
+
+    return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
+
+
+# ---------------------------------------------------------------------------
+# Provider
+# ---------------------------------------------------------------------------
+
+
+class OpenAIImageGenProvider(ImageGenProvider):
+    """OpenAI ``images.generate`` backend — gpt-image-2 at low/medium/high."""
+
+    @property
+    def name(self) -> str:
+        return "openai"
+
+    @property
+    def display_name(self) -> str:
+        return "OpenAI"
+
+    def is_available(self) -> bool:
+        if not os.environ.get("OPENAI_API_KEY"):
+            return False
+        try:
+            import openai  # noqa: F401
+        except ImportError:
+            return False
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        return [
+            {
+                "id": model_id,
+                "display": meta["display"],
+                "speed": meta["speed"],
+                "strengths": meta["strengths"],
+                "price": "varies",
+            }
+            for model_id, meta in _MODELS.items()
+        ]
+
+    def default_model(self) -> Optional[str]:
+        return DEFAULT_MODEL
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "OpenAI",
+            "badge": "paid",
+            "tag": "gpt-image-2 at low/medium/high quality tiers",
+            "env_vars": [
+                {
+                    "key": "OPENAI_API_KEY",
+                    "prompt": "OpenAI API key",
+                    "url": "https://platform.openai.com/api-keys",
+                },
+            ],
+        }
+
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        prompt = (prompt or "").strip()
+        aspect = resolve_aspect_ratio(aspect_ratio)
+
+        if not prompt:
+            return error_response(
+                error="Prompt is required and must be a non-empty string",
+                error_type="invalid_argument",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        if not os.environ.get("OPENAI_API_KEY"):
+            return error_response(
+                error=(
+                    "OPENAI_API_KEY not set. Run `hermes tools` → Image "
+                    "Generation → OpenAI to configure, or `hermes setup` "
+                    "to add the key."
+                ),
+                error_type="auth_required",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        try:
+            import openai
+        except ImportError:
+            return error_response(
+                error="openai Python package not installed (pip install openai)",
+                error_type="missing_dependency",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        tier_id, meta = _resolve_model()
+        size = _SIZES.get(aspect, _SIZES["square"])
+
+        # gpt-image-2 returns b64_json unconditionally and REJECTS
+        # ``response_format`` as an unknown parameter. Don't send it.
+        payload: Dict[str, Any] = {
+            "model": API_MODEL,
+            "prompt": prompt,
+            "size": size,
+            "n": 1,
+            "quality": meta["quality"],
+        }
+
+        try:
+            client = openai.OpenAI()
+            response = client.images.generate(**payload)
+        except Exception as exc:
+            logger.debug("OpenAI image generation failed", exc_info=True)
+            return error_response(
+                error=f"OpenAI image generation failed: {exc}",
+                error_type="api_error",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        data = getattr(response, "data", None) or []
+        if not data:
+            return error_response(
+                error="OpenAI returned no image data",
+                error_type="empty_response",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        first = data[0]
+        b64 = getattr(first, "b64_json", None)
+        url = getattr(first, "url", None)
+        revised_prompt = getattr(first, "revised_prompt", None)
+
+        if b64:
+            try:
+                saved_path = save_b64_image(b64, prefix=f"openai_{tier_id}")
+            except Exception as exc:
+                return error_response(
+                    error=f"Could not save image to cache: {exc}",
+                    error_type="io_error",
+                    provider="openai",
+                    model=tier_id,
+                    prompt=prompt,
+                    aspect_ratio=aspect,
+                )
+            image_ref = str(saved_path)
+        elif url:
+            # Defensive — gpt-image-2 returns b64 today, but fall back
+            # gracefully if the API ever changes.
+            image_ref = url
+        else:
+            return error_response(
+                error="OpenAI response contained neither b64_json nor URL",
+                error_type="empty_response",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        extra: Dict[str, Any] = {"size": size, "quality": meta["quality"]}
+        if revised_prompt:
+            extra["revised_prompt"] = revised_prompt
+
+        return success_response(
+            image=image_ref,
+            model=tier_id,
+            prompt=prompt,
+            aspect_ratio=aspect,
+            provider="openai",
+            extra=extra,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    """Plugin entry point — wire ``OpenAIImageGenProvider`` into the registry."""
+    ctx.register_image_gen_provider(OpenAIImageGenProvider())
diff --git a/plugins/image_gen/openai/plugin.yaml b/plugins/image_gen/openai/plugin.yaml
new file mode 100644
index 0000000000..18e4d86390
--- /dev/null
+++ b/plugins/image_gen/openai/plugin.yaml
@@ -0,0 +1,7 @@
+name: openai
+version: 1.0.0
+description: "OpenAI image generation backend (gpt-image-2). Saves generated images to $HERMES_HOME/cache/images/."
+author: NousResearch
+kind: backend
+requires_env:
+  - OPENAI_API_KEY
diff --git a/run_agent.py b/run_agent.py
index a5d22f75c4..8179a71546 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1178,7 +1178,7 @@ class AIAgent:
                     client_kwargs["default_headers"] = copilot_default_headers()
                 elif base_url_host_matches(effective_base, "api.kimi.com"):
                     client_kwargs["default_headers"] = {
-                        "User-Agent": "KimiCLI/1.30.0",
+                        "User-Agent": "claude-code/0.1.0",
                     }
                 elif base_url_host_matches(effective_base, "portal.qwen.ai"):
                     client_kwargs["default_headers"] = _qwen_portal_headers()
@@ -2008,6 +2008,22 @@ class AIAgent:
         self._fallback_activated = False
         self._fallback_index = 0
 
+        # When the user deliberately swaps primary providers (e.g. openrouter
+        # → anthropic), drop any fallback entries that target the OLD primary
+        # or the NEW one.  The chain was seeded from config at agent init for
+        # the original provider — without pruning, a failed turn on the new
+        # primary silently re-activates the provider the user just rejected,
+        # which is exactly what was reported during TUI v2 blitz testing
+        # ("switched to anthropic, tui keeps trying openrouter").
+        old_norm = (old_provider or "").strip().lower()
+        new_norm = (new_provider or "").strip().lower()
+        if old_norm and new_norm and old_norm != new_norm:
+            self._fallback_chain = [
+                entry for entry in self._fallback_chain
+                if (entry.get("provider") or "").strip().lower() not in {old_norm, new_norm}
+            ]
+            self._fallback_model = self._fallback_chain[0] if self._fallback_chain else None
+
         logging.info(
             "Model switched in-place: %s (%s) -> %s (%s)",
             old_model, old_provider, new_model, new_provider,
@@ -4295,10 +4311,6 @@ class AIAgent:
         if self._memory_store:
             self._memory_store.load_from_disk()
 
-    def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
-        """Convert chat-completions tool schemas to Responses function-tool schemas."""
-        return _codex_responses_tools(tools if tools is not None else self.tools)
-
     @staticmethod
     def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
         """Generate a deterministic call_id from tool call content.
@@ -4322,33 +4334,6 @@ class AIAgent:
         """Build a valid Responses `function_call.id` (must start with `fc_`)."""
         return _codex_derive_responses_function_call_id(call_id, response_item_id)
 
-    def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-        """Convert internal chat-style messages to Responses input items."""
-        return _codex_chat_messages_to_responses_input(messages)
-
-    def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
-        return _codex_preflight_codex_input_items(raw_items)
-
-    def _preflight_codex_api_kwargs(
-        self,
-        api_kwargs: Any,
-        *,
-        allow_stream: bool = False,
-    ) -> Dict[str, Any]:
-        return _codex_preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
-
-    def _extract_responses_message_text(self, item: Any) -> str:
-        """Extract assistant text from a Responses message output item."""
-        return _codex_extract_responses_message_text(item)
-
-    def _extract_responses_reasoning_text(self, item: Any) -> str:
-        """Extract a compact reasoning text from a Responses reasoning item."""
-        return _codex_extract_responses_reasoning_text(item)
-
-    def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
-        """Normalize a Responses API object to an assistant_message-like object."""
-        return _codex_normalize_codex_response(response)
-
     def _thread_identity(self) -> str:
         thread = threading.current_thread()
         return f"{thread.name}:{thread.ident}"
@@ -4841,7 +4826,7 @@ class AIAgent:
         active_client = client or self._ensure_primary_openai_client(reason="codex_create_stream_fallback")
         fallback_kwargs = dict(api_kwargs)
         fallback_kwargs["stream"] = True
-        fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
+        fallback_kwargs = self._get_codex_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
         stream_or_response = active_client.responses.create(**fallback_kwargs)
 
         # Compatibility shim for mocks or providers that still return a concrete response.
@@ -5036,7 +5021,7 @@ class AIAgent:
 
             self._client_kwargs["default_headers"] = copilot_default_headers()
         elif base_url_host_matches(base_url, "api.kimi.com"):
-            self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+            self._client_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
         elif base_url_host_matches(base_url, "portal.qwen.ai"):
             self._client_kwargs["default_headers"] = _qwen_portal_headers()
         elif base_url_host_matches(base_url, "chatgpt.com"):
@@ -6583,6 +6568,33 @@ class AIAgent:
             self._anthropic_transport = t
         return t
 
+    def _get_codex_transport(self):
+        """Return the cached ResponsesApiTransport instance (lazy singleton)."""
+        t = getattr(self, "_codex_transport", None)
+        if t is None:
+            from agent.transports import get_transport
+            t = get_transport("codex_responses")
+            self._codex_transport = t
+        return t
+
+    def _get_chat_completions_transport(self):
+        """Return the cached ChatCompletionsTransport instance (lazy singleton)."""
+        t = getattr(self, "_chat_completions_transport", None)
+        if t is None:
+            from agent.transports import get_transport
+            t = get_transport("chat_completions")
+            self._chat_completions_transport = t
+        return t
+
+    def _get_bedrock_transport(self):
+        """Return the cached BedrockTransport instance (lazy singleton)."""
+        t = getattr(self, "_bedrock_transport", None)
+        if t is None:
+            from agent.transports import get_transport
+            t = get_transport("bedrock_converse")
+            self._bedrock_transport = t
+        return t
+
     def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list:
         if not any(
             isinstance(msg, dict) and self._content_has_image_parts(msg.get("content"))
@@ -6722,31 +6734,20 @@ class AIAgent:
         # AWS Bedrock native Converse API — bypasses the OpenAI client entirely.
         # The adapter handles message/tool conversion and boto3 calls directly.
         if self.api_mode == "bedrock_converse":
-            from agent.bedrock_adapter import build_converse_kwargs
+            _bt = self._get_bedrock_transport()
             region = getattr(self, "_bedrock_region", None) or "us-east-1"
             guardrail = getattr(self, "_bedrock_guardrail_config", None)
-            return {
-                "__bedrock_converse__": True,
-                "__bedrock_region__": region,
-                **build_converse_kwargs(
-                    model=self.model,
-                    messages=api_messages,
-                    tools=self.tools,
-                    max_tokens=self.max_tokens or 4096,
-                    temperature=None,  # Let the model use its default
-                    guardrail_config=guardrail,
-                ),
-            }
+            return _bt.build_kwargs(
+                model=self.model,
+                messages=api_messages,
+                tools=self.tools,
+                max_tokens=self.max_tokens or 4096,
+                region=region,
+                guardrail_config=guardrail,
+            )
 
         if self.api_mode == "codex_responses":
-            instructions = ""
-            payload_messages = api_messages
-            if api_messages and api_messages[0].get("role") == "system":
-                instructions = str(api_messages[0].get("content") or "").strip()
-                payload_messages = api_messages[1:]
-            if not instructions:
-                instructions = DEFAULT_AGENT_IDENTITY
-
+            _ct = self._get_codex_transport()
             is_github_responses = (
                 base_url_host_matches(self.base_url, "models.github.ai")
                 or base_url_host_matches(self.base_url, "api.githubcopilot.com")
@@ -6758,320 +6759,118 @@ class AIAgent:
                     and "/backend-api/codex" in self._base_url_lower
                 )
             )
-
-            # Resolve reasoning effort: config > default (medium)
-            reasoning_effort = "medium"
-            reasoning_enabled = True
-            if self.reasoning_config and isinstance(self.reasoning_config, dict):
-                if self.reasoning_config.get("enabled") is False:
-                    reasoning_enabled = False
-                elif self.reasoning_config.get("effort"):
-                    reasoning_effort = self.reasoning_config["effort"]
-
-            # Clamp effort levels not supported by the Responses API model.
-            # GPT-5.4 supports none/low/medium/high/xhigh but not "minimal".
-            # "minimal" is valid on OpenRouter and GPT-5 but fails on 5.2/5.4.
-            _effort_clamp = {"minimal": "low"}
-            reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
-
-            kwargs = {
-                "model": self.model,
-                "instructions": instructions,
-                "input": self._chat_messages_to_responses_input(payload_messages),
-                "tools": self._responses_tools(),
-                "tool_choice": "auto",
-                "parallel_tool_calls": True,
-                "store": False,
-            }
-
-            if not is_github_responses:
-                kwargs["prompt_cache_key"] = self.session_id
-
             is_xai_responses = self.provider == "xai" or self._base_url_hostname == "api.x.ai"
-
-            if reasoning_enabled and is_xai_responses:
-                # xAI reasons automatically — no effort param, just include encrypted content
-                kwargs["include"] = ["reasoning.encrypted_content"]
-            elif reasoning_enabled:
-                if is_github_responses:
-                    # Copilot's Responses route advertises reasoning-effort support,
-                    # but not OpenAI-specific prompt cache or encrypted reasoning
-                    # fields. Keep the payload to the documented subset.
-                    github_reasoning = self._github_models_reasoning_extra_body()
-                    if github_reasoning is not None:
-                        kwargs["reasoning"] = github_reasoning
-                else:
-                    kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
-                    kwargs["include"] = ["reasoning.encrypted_content"]
-            elif not is_github_responses and not is_xai_responses:
-                kwargs["include"] = []
-
-            if self.request_overrides:
-                kwargs.update(self.request_overrides)
-
-            if self.max_tokens is not None and not is_codex_backend:
-                kwargs["max_output_tokens"] = self.max_tokens
-
-            if is_xai_responses and getattr(self, "session_id", None):
-                kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
-
-            return kwargs
-
-        sanitized_messages = api_messages
-        needs_sanitization = False
-        for msg in api_messages:
-            if not isinstance(msg, dict):
-                continue
-            if "codex_reasoning_items" in msg:
-                needs_sanitization = True
-                break
-
-            tool_calls = msg.get("tool_calls")
-            if isinstance(tool_calls, list):
-                for tool_call in tool_calls:
-                    if not isinstance(tool_call, dict):
-                        continue
-                    if "call_id" in tool_call or "response_item_id" in tool_call:
-                        needs_sanitization = True
-                        break
-                if needs_sanitization:
-                    break
-
-        if needs_sanitization:
-            sanitized_messages = copy.deepcopy(api_messages)
-            for msg in sanitized_messages:
-                if not isinstance(msg, dict):
-                    continue
-
-                # Codex-only replay state must not leak into strict chat-completions APIs.
-                msg.pop("codex_reasoning_items", None)
-
-                tool_calls = msg.get("tool_calls")
-                if isinstance(tool_calls, list):
-                    for tool_call in tool_calls:
-                        if isinstance(tool_call, dict):
-                            tool_call.pop("call_id", None)
-                            tool_call.pop("response_item_id", None)
-
-        # Qwen portal: normalize content to list-of-dicts, inject cache_control.
-        # Must run AFTER codex sanitization so we transform the final messages.
-        # If sanitization already deepcopied, reuse that copy (in-place).
-        if self._is_qwen_portal():
-            if sanitized_messages is api_messages:
-                # No sanitization was done — we need our own copy.
-                sanitized_messages = self._qwen_prepare_chat_messages(sanitized_messages)
-            else:
-                # Already a deepcopy — transform in place to avoid a second deepcopy.
-                self._qwen_prepare_chat_messages_inplace(sanitized_messages)
-
-        # GPT-5 and Codex models respond better to 'developer' than 'system'
-        # for instruction-following.  Swap the role at the API boundary so
-        # internal message representation stays uniform ("system").
-        _model_lower = (self.model or "").lower()
-        if (
-            sanitized_messages
-            and sanitized_messages[0].get("role") == "system"
-            and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
-        ):
-            # Shallow-copy the list + first message only — rest stays shared.
-            sanitized_messages = list(sanitized_messages)
-            sanitized_messages[0] = {**sanitized_messages[0], "role": "developer"}
-
-        provider_preferences = {}
-        if self.providers_allowed:
-            provider_preferences["only"] = self.providers_allowed
-        if self.providers_ignored:
-            provider_preferences["ignore"] = self.providers_ignored
-        if self.providers_order:
-            provider_preferences["order"] = self.providers_order
-        if self.provider_sort:
-            provider_preferences["sort"] = self.provider_sort
-        if self.provider_require_parameters:
-            provider_preferences["require_parameters"] = True
-        if self.provider_data_collection:
-            provider_preferences["data_collection"] = self.provider_data_collection
-
-        api_kwargs = {
-            "model": self.model,
-            "messages": sanitized_messages,
-            "timeout": self._resolved_api_call_timeout(),
-        }
-        try:
-            from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
-        except Exception:
-            _fixed_temperature_for_model = None
-            OMIT_TEMPERATURE = None
-        if _fixed_temperature_for_model is not None:
-            fixed_temperature = _fixed_temperature_for_model(self.model, self.base_url)
-            if fixed_temperature is OMIT_TEMPERATURE:
-                api_kwargs.pop("temperature", None)
-            elif fixed_temperature is not None:
-                api_kwargs["temperature"] = fixed_temperature
-        if self._is_qwen_portal():
-            api_kwargs["metadata"] = {
-                "sessionId": self.session_id or "hermes",
-                "promptId": str(uuid.uuid4()),
-            }
-        if self.tools:
-            api_kwargs["tools"] = self.tools
-
-        # ── max_tokens for chat_completions ──────────────────────────────
-        # Priority: ephemeral override (error recovery / length-continuation
-        # boost) > user-configured max_tokens > provider-specific defaults.
-        _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
-        if _ephemeral_out is not None:
-            self._ephemeral_max_output_tokens = None  # consume immediately
-            api_kwargs.update(self._max_tokens_param(_ephemeral_out))
-        elif self.max_tokens is not None:
-            api_kwargs.update(self._max_tokens_param(self.max_tokens))
-        elif "integrate.api.nvidia.com" in self._base_url_lower:
-            # NVIDIA NIM defaults to a very low max_tokens when omitted,
-            # causing models like GLM-4.7 to truncate immediately (thinking
-            # tokens alone exhaust the budget).  16384 provides adequate room.
-            api_kwargs.update(self._max_tokens_param(16384))
-        elif self._is_qwen_portal():
-            # Qwen Portal defaults to a very low max_tokens when omitted.
-            # Reasoning models (qwen3-coder-plus) exhaust that budget on
-            # thinking tokens alone, causing the portal to return
-            # finish_reason="stop" with truncated output — the agent sees
-            # this as an intentional stop and exits the loop.  Send 65536
-            # (the documented max output for qwen3-coder models) so the
-            # model has adequate output budget for tool calls.
-            api_kwargs.update(self._max_tokens_param(65536))
-        elif (
-            base_url_host_matches(self.base_url, "api.kimi.com")
-            or base_url_host_matches(self.base_url, "moonshot.ai")
-            or base_url_host_matches(self.base_url, "moonshot.cn")
-        ):
-            # Kimi/Moonshot defaults to a low max_tokens when omitted.
-            # Reasoning tokens share the output budget — without an explicit
-            # value the model can exhaust it on thinking alone, causing
-            # "Response truncated due to output length limit".  32000 matches
-            # Kimi CLI's default (see MoonshotAI/kimi-cli kimi.py generate()).
-            api_kwargs.update(self._max_tokens_param(32000))
-            # Kimi requires reasoning_effort as a top-level chat completions
-            # parameter (not inside extra_body).  Mirror Kimi CLI's
-            # with_generation_kwargs(reasoning_effort=...) / with_thinking():
-            # when thinking is disabled, Kimi CLI omits reasoning_effort
-            # entirely (maps to None).
-            _kimi_thinking_off = bool(
-                self.reasoning_config
-                and isinstance(self.reasoning_config, dict)
-                and self.reasoning_config.get("enabled") is False
+            return _ct.build_kwargs(
+                model=self.model,
+                messages=api_messages,
+                tools=self.tools,
+                reasoning_config=self.reasoning_config,
+                session_id=getattr(self, "session_id", None),
+                max_tokens=self.max_tokens,
+                request_overrides=self.request_overrides,
+                is_github_responses=is_github_responses,
+                is_codex_backend=is_codex_backend,
+                is_xai_responses=is_xai_responses,
+                github_reasoning_extra=self._github_models_reasoning_extra_body() if is_github_responses else None,
             )
-            if not _kimi_thinking_off:
-                _kimi_effort = "medium"
-                if self.reasoning_config and isinstance(self.reasoning_config, dict):
-                    _e = (self.reasoning_config.get("effort") or "").strip().lower()
-                    if _e in ("low", "medium", "high"):
-                        _kimi_effort = _e
-                api_kwargs["reasoning_effort"] = _kimi_effort
-        elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower():
-            # OpenRouter and Nous Portal translate requests to Anthropic's
-            # Messages API, which requires max_tokens as a mandatory field.
-            # When we omit it, the proxy picks a default that can be too
-            # low — the model spends its output budget on thinking and has
-            # almost nothing left for the actual response (especially large
-            # tool calls like write_file).  Sending the model's real output
-            # limit ensures full capacity.
-            try:
-                from agent.anthropic_adapter import _get_anthropic_max_output
-                _model_output_limit = _get_anthropic_max_output(self.model)
-                api_kwargs["max_tokens"] = _model_output_limit
-            except Exception:
-                pass  # fail open — let the proxy pick its default
 
-        extra_body = {}
+        # ── chat_completions (default) ─────────────────────────────────────
+        _ct = self._get_chat_completions_transport()
 
-        _is_openrouter = self._is_openrouter_url()
-        _is_github_models = (
+        # Provider detection flags
+        _is_qwen = self._is_qwen_portal()
+        _is_or = self._is_openrouter_url()
+        _is_gh = (
             base_url_host_matches(self._base_url_lower, "models.github.ai")
             or base_url_host_matches(self._base_url_lower, "api.githubcopilot.com")
         )
-
-        # Provider preferences (only, ignore, order, sort) are OpenRouter-
-        # specific.  Only send to OpenRouter-compatible endpoints.
-        # TODO: Nous Portal will add transparent proxy support — re-enable
-        # for _is_nous when their backend is updated.
-        if provider_preferences and _is_openrouter:
-            extra_body["provider"] = provider_preferences
         _is_nous = "nousresearch" in self._base_url_lower
-
-        # Kimi/Moonshot API uses extra_body.thinking (separate from the
-        # top-level reasoning_effort) to enable/disable reasoning mode.
-        # Mirror Kimi CLI's with_thinking() behavior exactly — see
-        # MoonshotAI/kimi-cli packages/kosong/src/kosong/chat_provider/kimi.py
+        _is_nvidia = "integrate.api.nvidia.com" in self._base_url_lower
         _is_kimi = (
             base_url_host_matches(self.base_url, "api.kimi.com")
             or base_url_host_matches(self.base_url, "moonshot.ai")
             or base_url_host_matches(self.base_url, "moonshot.cn")
         )
-        if _is_kimi:
-            _kimi_thinking_enabled = True
-            if self.reasoning_config and isinstance(self.reasoning_config, dict):
-                if self.reasoning_config.get("enabled") is False:
-                    _kimi_thinking_enabled = False
-            extra_body["thinking"] = {
-                "type": "enabled" if _kimi_thinking_enabled else "disabled",
+
+        # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE
+        # sentinel (temperature omitted entirely), a numeric override, or None.
+        try:
+            from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
+            _ft = _fixed_temperature_for_model(self.model, self.base_url)
+            _omit_temp = _ft is OMIT_TEMPERATURE
+            _fixed_temp = _ft if not _omit_temp else None
+        except Exception:
+            _omit_temp = False
+            _fixed_temp = None
+
+        # Provider preferences (OpenRouter-specific)
+        _prefs: Dict[str, Any] = {}
+        if self.providers_allowed:
+            _prefs["only"] = self.providers_allowed
+        if self.providers_ignored:
+            _prefs["ignore"] = self.providers_ignored
+        if self.providers_order:
+            _prefs["order"] = self.providers_order
+        if self.provider_sort:
+            _prefs["sort"] = self.provider_sort
+        if self.provider_require_parameters:
+            _prefs["require_parameters"] = True
+        if self.provider_data_collection:
+            _prefs["data_collection"] = self.provider_data_collection
+
+        # Anthropic max output for Claude on OpenRouter/Nous
+        _ant_max = None
+        if (_is_or or _is_nous) and "claude" in (self.model or "").lower():
+            try:
+                from agent.anthropic_adapter import _get_anthropic_max_output
+                _ant_max = _get_anthropic_max_output(self.model)
+            except Exception:
+                pass  # fail open — let the proxy pick its default
+
+        # Qwen session metadata precomputed here (promptId is per-call random)
+        _qwen_meta = None
+        if _is_qwen:
+            _qwen_meta = {
+                "sessionId": self.session_id or "hermes",
+                "promptId": str(uuid.uuid4()),
             }
 
-        if self._supports_reasoning_extra_body():
-            if _is_github_models:
-                github_reasoning = self._github_models_reasoning_extra_body()
-                if github_reasoning is not None:
-                    extra_body["reasoning"] = github_reasoning
-            else:
-                if self.reasoning_config is not None:
-                    rc = dict(self.reasoning_config)
-                    # Nous Portal requires reasoning enabled — don't send
-                    # enabled=false to it (would cause 400).
-                    if _is_nous and rc.get("enabled") is False:
-                        pass  # omit reasoning entirely for Nous when disabled
-                    else:
-                        extra_body["reasoning"] = rc
-                else:
-                    extra_body["reasoning"] = {
-                        "enabled": True,
-                        "effort": "medium"
-                    }
+        # Ephemeral max output override — consume immediately so the next
+        # turn doesn't inherit it.
+        _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
+        if _ephemeral_out is not None:
+            self._ephemeral_max_output_tokens = None
 
-        # Nous Portal product attribution
-        if _is_nous:
-            extra_body["tags"] = ["product=hermes-agent"]
-
-        # Ollama num_ctx: override the 2048 default so the model actually
-        # uses the context window it was trained for.  Passed via the OpenAI
-        # SDK's extra_body → options.num_ctx, which Ollama's OpenAI-compat
-        # endpoint forwards to the runner as --ctx-size.
-        if self._ollama_num_ctx:
-            options = extra_body.get("options", {})
-            options["num_ctx"] = self._ollama_num_ctx
-            extra_body["options"] = options
-
-        # Ollama / custom provider: pass think=false when reasoning is disabled.
-        # Ollama does not recognise the OpenRouter-style `reasoning` extra_body
-        # field, so we use its native `think` parameter instead.
-        # This prevents thinking-capable models (Qwen3, etc.) from generating
-        # <think> blocks and producing empty-response errors when the user has
-        # set reasoning_effort: none.
-        if self.provider == "custom" and self.reasoning_config and isinstance(self.reasoning_config, dict):
-            _effort = (self.reasoning_config.get("effort") or "").strip().lower()
-            _enabled = self.reasoning_config.get("enabled", True)
-            if _effort == "none" or _enabled is False:
-                extra_body["think"] = False
-
-        if self._is_qwen_portal():
-            extra_body["vl_high_resolution_images"] = True
-
-        if extra_body:
-            api_kwargs["extra_body"] = extra_body
-
-        # Priority Processing / generic request overrides (e.g. service_tier).
-        # Applied last so overrides win over any defaults set above.
-        if self.request_overrides:
-            api_kwargs.update(self.request_overrides)
-
-        return api_kwargs
+        return _ct.build_kwargs(
+            model=self.model,
+            messages=api_messages,
+            tools=self.tools,
+            timeout=self._resolved_api_call_timeout(),
+            max_tokens=self.max_tokens,
+            ephemeral_max_output_tokens=_ephemeral_out,
+            max_tokens_param_fn=self._max_tokens_param,
+            reasoning_config=self.reasoning_config,
+            request_overrides=self.request_overrides,
+            session_id=getattr(self, "session_id", None),
+            model_lower=(self.model or "").lower(),
+            is_openrouter=_is_or,
+            is_nous=_is_nous,
+            is_qwen_portal=_is_qwen,
+            is_github_models=_is_gh,
+            is_nvidia_nim=_is_nvidia,
+            is_kimi=_is_kimi,
+            is_custom_provider=self.provider == "custom",
+            ollama_num_ctx=self._ollama_num_ctx,
+            provider_preferences=_prefs or None,
+            qwen_prepare_fn=self._qwen_prepare_chat_messages if _is_qwen else None,
+            qwen_prepare_inplace_fn=self._qwen_prepare_chat_messages_inplace if _is_qwen else None,
+            qwen_session_metadata=_qwen_meta,
+            fixed_temperature=_fixed_temp,
+            omit_temperature=_omit_temp,
+            supports_reasoning=self._supports_reasoning_extra_body(),
+            github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None,
+            anthropic_max_output=_ant_max,
+        )
 
     def _supports_reasoning_extra_body(self) -> bool:
         """Return True when reasoning extra_body is safe to send for this route/model.
@@ -7428,7 +7227,7 @@ class AIAgent:
             if not _aux_available and self.api_mode == "codex_responses":
                 # No auxiliary client -- use the Codex Responses path directly
                 codex_kwargs = self._build_api_kwargs(api_messages)
-                codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
+                codex_kwargs["tools"] = self._get_codex_transport().convert_tools([memory_tool_def])
                 if _flush_temperature is not None:
                     codex_kwargs["temperature"] = _flush_temperature
                 else:
@@ -7463,9 +7262,15 @@ class AIAgent:
             # Extract tool calls from the response, handling all API formats
             tool_calls = []
             if self.api_mode == "codex_responses" and not _aux_available:
-                assistant_msg, _ = self._normalize_codex_response(response)
-                if assistant_msg and assistant_msg.tool_calls:
-                    tool_calls = assistant_msg.tool_calls
+                _ct_flush = self._get_codex_transport()
+                _cnr_flush = _ct_flush.normalize_response(response)
+                if _cnr_flush and _cnr_flush.tool_calls:
+                    tool_calls = [
+                        SimpleNamespace(
+                            id=tc.id, type="function",
+                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                        ) for tc in _cnr_flush.tool_calls
+                    ]
             elif self.api_mode == "anthropic_messages" and not _aux_available:
                 _tfn = self._get_anthropic_transport()
                 _flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
@@ -7635,8 +7440,27 @@ class AIAgent:
         finally:
             self._executing_tools = False
 
+    def _dispatch_delegate_task(self, function_args: dict) -> str:
+        """Single call site for delegate_task dispatch.
+
+        New DELEGATE_TASK_SCHEMA fields only need to be added here to reach all
+        invocation paths (concurrent, sequential, inline).
+        """
+        from tools.delegate_tool import delegate_task as _delegate_task
+        return _delegate_task(
+            goal=function_args.get("goal"),
+            context=function_args.get("context"),
+            toolsets=function_args.get("toolsets"),
+            tasks=function_args.get("tasks"),
+            max_iterations=function_args.get("max_iterations"),
+            acp_command=function_args.get("acp_command"),
+            acp_args=function_args.get("acp_args"),
+            role=function_args.get("role"),
+            parent_agent=self,
+        )
+
     def _invoke_tool(self, function_name: str, function_args: dict, effective_task_id: str,
-                     tool_call_id: Optional[str] = None) -> str:
+                     tool_call_id: Optional[str] = None, messages: list = None) -> str:
         """Invoke a single tool and return the result string. No display logic.
 
         Handles both agent-level tools (todo, memory, etc.) and registry-dispatched
@@ -7704,15 +7528,7 @@ class AIAgent:
                 callback=self.clarify_callback,
             )
         elif function_name == "delegate_task":
-            from tools.delegate_tool import delegate_task as _delegate_task
-            return _delegate_task(
-                goal=function_args.get("goal"),
-                context=function_args.get("context"),
-                toolsets=function_args.get("toolsets"),
-                tasks=function_args.get("tasks"),
-                max_iterations=function_args.get("max_iterations"),
-                parent_agent=self,
-            )
+            return self._dispatch_delegate_task(function_args)
         else:
             return handle_function_call(
                 function_name, function_args, effective_task_id,
@@ -7874,7 +7690,7 @@ class AIAgent:
                 pass
             start = time.time()
             try:
-                result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id)
+                result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id, messages=messages)
             except Exception as tool_error:
                 result = f"Error executing tool '{function_name}': {tool_error}"
                 logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
@@ -8226,7 +8042,6 @@ class AIAgent:
                 if self._should_emit_quiet_tool_messages():
                     self._vprint(f"  {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
             elif function_name == "delegate_task":
-                from tools.delegate_tool import delegate_task as _delegate_task
                 tasks_arg = function_args.get("tasks")
                 if tasks_arg and isinstance(tasks_arg, list):
                     spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
@@ -8241,14 +8056,7 @@ class AIAgent:
                 self._delegate_spinner = spinner
                 _delegate_result = None
                 try:
-                    function_result = _delegate_task(
-                        goal=function_args.get("goal"),
-                        context=function_args.get("context"),
-                        toolsets=function_args.get("toolsets"),
-                        tasks=tasks_arg,
-                        max_iterations=function_args.get("max_iterations"),
-                        parent_agent=self,
-                    )
+                    function_result = self._dispatch_delegate_task(function_args)
                     _delegate_result = function_result
                 finally:
                     self._delegate_spinner = None
@@ -8506,8 +8314,9 @@ class AIAgent:
                 codex_kwargs = self._build_api_kwargs(api_messages)
                 codex_kwargs.pop("tools", None)
                 summary_response = self._run_codex_stream(codex_kwargs)
-                assistant_message, _ = self._normalize_codex_response(summary_response)
-                final_response = (assistant_message.content or "").strip() if assistant_message else ""
+                _ct_sum = self._get_codex_transport()
+                _cnr_sum = _ct_sum.normalize_response(summary_response)
+                final_response = (_cnr_sum.content or "").strip()
             else:
                 summary_kwargs = {
                     "model": self.model,
@@ -8564,8 +8373,9 @@ class AIAgent:
                     codex_kwargs = self._build_api_kwargs(api_messages)
                     codex_kwargs.pop("tools", None)
                     retry_response = self._run_codex_stream(codex_kwargs)
-                    retry_msg, _ = self._normalize_codex_response(retry_response)
-                    final_response = (retry_msg.content or "").strip() if retry_msg else ""
+                    _ct_retry = self._get_codex_transport()
+                    _cnr_retry = _ct_retry.normalize_response(retry_response)
+                    final_response = (_cnr_retry.content or "").strip()
                 elif self.api_mode == "anthropic_messages":
                     _tretry = self._get_anthropic_transport()
                     _ant_kw2 = _tretry.build_kwargs(model=self.model, messages=api_messages, tools=None,
@@ -8676,6 +8486,11 @@ class AIAgent:
         self._persist_user_message_override = persist_user_message
         # Generate unique task_id if not provided to isolate VMs between concurrent tasks
         effective_task_id = task_id or str(uuid.uuid4())
+        # Expose the active task_id so tools running mid-turn (e.g. delegate_task
+        # in delegate_tool.py) can identify this agent for the cross-agent file
+        # state registry.  Set BEFORE any tool dispatch so snapshots taken at
+        # child-launch time see the parent's real id, not None.
+        self._current_task_id = effective_task_id
         
         # Reset retry counters and iteration budget at the start of each turn
         # so subagent usage from a previous turn doesn't eat into the next one.
@@ -9322,7 +9137,7 @@ class AIAgent:
                     if self._force_ascii_payload:
                         _sanitize_structure_non_ascii(api_kwargs)
                     if self.api_mode == "codex_responses":
-                        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
+                        api_kwargs = self._get_codex_transport().preflight_kwargs(api_kwargs, allow_stream=False)
 
                     try:
                         from hermes_cli.plugins import invoke_hook as _invoke_hook
@@ -9410,38 +9225,34 @@ class AIAgent:
                     response_invalid = False
                     error_details = []
                     if self.api_mode == "codex_responses":
-                        output_items = getattr(response, "output", None) if response is not None else None
-                        if response is None:
-                            response_invalid = True
-                            error_details.append("response is None")
-                        elif not isinstance(output_items, list):
-                            response_invalid = True
-                            error_details.append("response.output is not a list")
-                        elif not output_items:
-                            # Stream backfill may have failed, but
-                            # _normalize_codex_response can still recover
-                            # from response.output_text. Only mark invalid
-                            # when that fallback is also absent.
-                            _out_text = getattr(response, "output_text", None)
-                            _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
-                            if _out_text_stripped:
-                                logger.debug(
-                                    "Codex response.output is empty but output_text is present "
-                                    "(%d chars); deferring to normalization.",
-                                    len(_out_text_stripped),
-                                )
-                            else:
-                                _resp_status = getattr(response, "status", None)
-                                _resp_incomplete = getattr(response, "incomplete_details", None)
-                                logger.warning(
-                                    "Codex response.output is empty after stream backfill "
-                                    "(status=%s, incomplete_details=%s, model=%s). %s",
-                                    _resp_status, _resp_incomplete,
-                                    getattr(response, "model", None),
-                                    f"api_mode={self.api_mode} provider={self.provider}",
-                                )
+                        _ct_v = self._get_codex_transport()
+                        if not _ct_v.validate_response(response):
+                            if response is None:
                                 response_invalid = True
-                                error_details.append("response.output is empty")
+                                error_details.append("response is None")
+                            else:
+                                # output_text fallback: stream backfill may have failed
+                                # but normalize can still recover from output_text
+                                _out_text = getattr(response, "output_text", None)
+                                _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
+                                if _out_text_stripped:
+                                    logger.debug(
+                                        "Codex response.output is empty but output_text is present "
+                                        "(%d chars); deferring to normalization.",
+                                        len(_out_text_stripped),
+                                    )
+                                else:
+                                    _resp_status = getattr(response, "status", None)
+                                    _resp_incomplete = getattr(response, "incomplete_details", None)
+                                    logger.warning(
+                                        "Codex response.output is empty after stream backfill "
+                                        "(status=%s, incomplete_details=%s, model=%s). %s",
+                                        _resp_status, _resp_incomplete,
+                                        getattr(response, "model", None),
+                                        f"api_mode={self.api_mode} provider={self.provider}",
+                                    )
+                                    response_invalid = True
+                                    error_details.append("response.output is empty")
                     elif self.api_mode == "anthropic_messages":
                         _tv = self._get_anthropic_transport()
                         if not _tv.validate_response(response):
@@ -9450,8 +9261,17 @@ class AIAgent:
                                 error_details.append("response is None")
                             else:
                                 error_details.append("response.content invalid (not a non-empty list)")
+                    elif self.api_mode == "bedrock_converse":
+                        _btv = self._get_bedrock_transport()
+                        if not _btv.validate_response(response):
+                            response_invalid = True
+                            if response is None:
+                                error_details.append("response is None")
+                            else:
+                                error_details.append("Bedrock response invalid (no output or choices)")
                     else:
-                        if response is None or not hasattr(response, 'choices') or response.choices is None or not response.choices:
+                        _ctv = self._get_chat_completions_transport()
+                        if not _ctv.validate_response(response):
                             response_invalid = True
                             if response is None:
                                 error_details.append("response is None")
@@ -9612,6 +9432,10 @@ class AIAgent:
                     elif self.api_mode == "anthropic_messages":
                         _tfr = self._get_anthropic_transport()
                         finish_reason = _tfr.map_finish_reason(response.stop_reason)
+                    elif self.api_mode == "bedrock_converse":
+                        # Bedrock response is already normalized at dispatch — finish_reason
+                        # is already in OpenAI format via normalize_converse_response()
+                        finish_reason = response.choices[0].finish_reason if hasattr(response, "choices") and response.choices else "stop"
                     else:
                         finish_reason = response.choices[0].finish_reason
                         assistant_message = response.choices[0].message
@@ -10867,7 +10691,40 @@ class AIAgent:
 
             try:
                 if self.api_mode == "codex_responses":
-                    assistant_message, finish_reason = self._normalize_codex_response(response)
+                    _ct = self._get_codex_transport()
+                    _cnr = _ct.normalize_response(response)
+                    # Back-compat shim: downstream expects SimpleNamespace with
+                    # codex-specific fields (.codex_reasoning_items, .reasoning_details,
+                    # and .call_id/.response_item_id on tool calls).
+                    _tc_list = None
+                    if _cnr.tool_calls:
+                        _tc_list = []
+                        for tc in _cnr.tool_calls:
+                            _tc_ns = SimpleNamespace(
+                                id=tc.id, type="function",
+                                function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                            )
+                            if tc.provider_data:
+                                if tc.provider_data.get("call_id"):
+                                    _tc_ns.call_id = tc.provider_data["call_id"]
+                                if tc.provider_data.get("response_item_id"):
+                                    _tc_ns.response_item_id = tc.provider_data["response_item_id"]
+                            _tc_list.append(_tc_ns)
+                    assistant_message = SimpleNamespace(
+                        content=_cnr.content,
+                        tool_calls=_tc_list or None,
+                        reasoning=_cnr.reasoning,
+                        reasoning_content=None,
+                        codex_reasoning_items=(
+                            _cnr.provider_data.get("codex_reasoning_items")
+                            if _cnr.provider_data else None
+                        ),
+                        reasoning_details=(
+                            _cnr.provider_data.get("reasoning_details")
+                            if _cnr.provider_data else None
+                        ),
+                    )
+                    finish_reason = _cnr.finish_reason
                 elif self.api_mode == "anthropic_messages":
                     _transport = self._get_anthropic_transport()
                     _nr = _transport.normalize_response(
diff --git a/scripts/release.py b/scripts/release.py
index f9b247e077..5a82b89b98 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -45,6 +45,7 @@ AUTHOR_MAP = {
     "teknium@nousresearch.com": "teknium1",
     "127238744+teknium1@users.noreply.github.com": "teknium1",
     # contributors (from noreply pattern)
+    "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
     "snreynolds2506@gmail.com": "snreynolds",
     "35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
     "71184274+MassiveMassimo@users.noreply.github.com": "MassiveMassimo",
@@ -98,6 +99,7 @@ AUTHOR_MAP = {
     "mygamez@163.com": "zhongyueming1121",
     "hansnow@users.noreply.github.com": "hansnow",
     "134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
+    "ben.burtenshaw@gmail.com": "burtenshaw",
     # contributors (manual mapping from git names)
     "ahmedsherif95@gmail.com": "asheriif",
     "liujinkun@bytedance.com": "liujinkun2025",
@@ -129,6 +131,7 @@ AUTHOR_MAP = {
     "brooklyn.bb.nicholson@gmail.com": "brooklynnicholson",
     "withapurpose37@gmail.com": "StefanIsMe",
     "4317663+helix4u@users.noreply.github.com": "helix4u",
+    "ifkellx@users.noreply.github.com": "Ifkellx",
     "331214+counterposition@users.noreply.github.com": "counterposition",
     "blspear@gmail.com": "BrennerSpear",
     "akhater@gmail.com": "akhater",
@@ -332,6 +335,7 @@ AUTHOR_MAP = {
     "asslaenn5@gmail.com": "Aslaaen",
     "shalompmc0505@naver.com": "pinion05",
     "105142614+VTRiot@users.noreply.github.com": "VTRiot",
+    "vivien000812@gmail.com": "iamagenius00",
 }
 
 
diff --git a/skills/creative/baoyu-comic/PORT_NOTES.md b/skills/creative/baoyu-comic/PORT_NOTES.md
new file mode 100644
index 0000000000..637b7befb5
--- /dev/null
+++ b/skills/creative/baoyu-comic/PORT_NOTES.md
@@ -0,0 +1,77 @@
+# Port Notes — baoyu-comic
+
+Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56.1.
+
+## Changes from upstream
+
+### SKILL.md adaptations
+
+| Change | Upstream | Hermes |
+|--------|----------|--------|
+| Metadata namespace | `openclaw` | `hermes` (with `tags` + `homepage`) |
+| Trigger | Slash commands / CLI flags | Natural language skill matching |
+| User config | EXTEND.md file (project/user/XDG paths) | Removed — not part of Hermes infra |
+| User prompts | `AskUserQuestion` (batched) | `clarify` tool (one question at a time) |
+| Image generation | baoyu-imagine (Bun/TypeScript, supports `--ref`) | `image_generate` — **prompt-only**, returns a URL; no reference image input; agent must download the URL to the output directory |
+| PDF assembly | `scripts/merge-to-pdf.ts` (Bun + `pdf-lib`) | Removed — the PDF merge step is out of scope for this port; pages are delivered as PNGs only |
+| Platform support | Linux/macOS/Windows/WSL/PowerShell | Linux/macOS only |
+| File operations | Generic instructions | Hermes file tools (`write_file`, `read_file`) |
+
+### Structural removals
+
+- **`references/config/` directory** (removed entirely):
+  - `first-time-setup.md` — blocking first-time setup flow for EXTEND.md
+  - `preferences-schema.md` — EXTEND.md YAML schema
+  - `watermark-guide.md` — watermark config (tied to EXTEND.md)
+- **`scripts/` directory** (removed entirely): upstream's `merge-to-pdf.ts` depended on `pdf-lib`, which is not declared anywhere in the Hermes repo. Rather than add a new dependency, the port drops PDF assembly and delivers per-page PNGs.
+- **Workflow Step 8 (Merge to PDF)** removed from `workflow.md`; Step 9 (Completion report) renumbered to Step 8.
+- **Workflow Step 1.1** — "Load Preferences (EXTEND.md)" section removed from `workflow.md`; steps 1.2/1.3 renumbered to 1.1/1.2.
+- **Generic "User Input Tools" and "Image Generation Tools" preambles** — SKILL.md no longer lists fallback rules for multiple possible tools; it references `clarify` and `image_generate` directly.
+
+### Image generation strategy changes
+
+`image_generate`'s schema accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`). Upstream's reference-image flow (`--ref characters.png` for character consistency, plus user-supplied refs for style/palette/scene) does not map to this tool, so the workflow was restructured:
+
+- **Character sheet PNG** is still generated for multi-page comics, but it is repositioned as a **human-facing review artifact** (for visual verification) and a reference for later regenerations / manual prompt edits. Page prompts themselves are built from the **text descriptions** in `characters/characters.md` (embedded inline during Step 5). `image_generate` never sees the PNG as a visual input.
+- **User-supplied reference images** are reduced to `style` / `palette` / `scene` trait extraction — traits are embedded in the prompt body; the image files themselves are kept only for provenance under `refs/`.
+- **Page prompts** now mandate that character descriptions are embedded inline (copied from `characters/characters.md`) — this is the only mechanism left to enforce cross-page character consistency.
+- **Download step** — after every `image_generate` call, the returned URL is fetched to disk (e.g., `curl -fsSL "<url>" -o <target>.png`) and verified before the workflow advances.
+
+### SKILL.md reductions
+
+- CLI option columns (`--art`, `--tone`, `--layout`, `--aspect`, `--lang`, `--ref`, `--storyboard-only`, `--prompts-only`, `--images-only`, `--regenerate`) converted to plain-English option descriptions.
+- Preset files (`presets/*.md`) and `ohmsha-guide.md`: `` `--style X` `` / `` `--art X --tone Y` `` shorthand rewritten to `art=X, tone=Y` + natural-language references.
+- `partial-workflows.md`: per-skill slash command invocations rewritten as user-intent cues; PDF-related outputs removed.
+- `auto-selection.md`: priority order dropped the EXTEND.md tier.
+- `analysis-framework.md`: language-priority comment updated (user option → conversation → source).
+
+### File naming convention
+
+Source content pasted by the user is saved as `source-{slug}.md`, where `{slug}` is the kebab-case topic slug used for the output directory. Backups follow the same pattern with a `-backup-YYYYMMDD-HHMMSS` suffix. SKILL.md and `workflow.md` now agree on this single convention.
+
+### What was preserved verbatim
+
+- All 6 art-style definitions (`references/art-styles/`)
+- All 7 tone definitions (`references/tones/`)
+- All 7 layout definitions (`references/layouts/`)
+- Core templates: `character-template.md`, `storyboard-template.md`, `base-prompt.md`
+- Preset bodies (only the first few intro lines adapted; special rules unchanged)
+- Author, version, homepage attribution
+
+## Syncing with upstream
+
+To pull upstream updates:
+
+```bash
+# Compare versions
+curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-comic/SKILL.md | head -5
+# Look for the version: line
+
+# Diff a reference file
+diff <(curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-comic/references/art-styles/manga.md) \
+     references/art-styles/manga.md
+```
+
+Art-style, tone, and layout reference files can usually be overwritten directly (they're upstream-verbatim). `SKILL.md`, `references/workflow.md`, `references/partial-workflows.md`, `references/auto-selection.md`, `references/analysis-framework.md`, `references/ohmsha-guide.md`, and `references/presets/*.md` must be manually merged since they contain Hermes-specific adaptations.
+
+If upstream adds a Hermes-compatible PDF merge step (no extra npm deps), restore `scripts/` and reintroduce Step 8 in `workflow.md`.
diff --git a/skills/creative/baoyu-comic/SKILL.md b/skills/creative/baoyu-comic/SKILL.md
new file mode 100644
index 0000000000..d3c89ed4c7
--- /dev/null
+++ b/skills/creative/baoyu-comic/SKILL.md
@@ -0,0 +1,246 @@
+---
+name: baoyu-comic
+description: Knowledge comic creator supporting multiple art styles and tones. Creates original educational comics with detailed panel layouts and sequential image generation. Use when user asks to create "知识漫画", "教育漫画", "biography comic", "tutorial comic", or "Logicomix-style comic".
+version: 1.56.1
+author: 宝玉 (JimLiu)
+license: MIT
+metadata:
+  hermes:
+    tags: [comic, knowledge-comic, creative, image-generation]
+    homepage: https://github.com/JimLiu/baoyu-skills#baoyu-comic
+---
+
+# Knowledge Comic Creator
+
+Adapted from [baoyu-comic](https://github.com/JimLiu/baoyu-skills) for Hermes Agent's tool ecosystem.
+
+Create original knowledge comics with flexible art style × tone combinations.
+
+## When to Use
+
+Trigger this skill when the user asks to create a knowledge/educational comic, biography comic, tutorial comic, or uses terms like "知识漫画", "教育漫画", or "Logicomix-style". The user provides content (text, file path, URL, or topic) and optionally specifies art style, tone, layout, aspect ratio, or language.
+
+## Reference Images
+
+Hermes' `image_generate` tool is **prompt-only** — it accepts a text prompt and an aspect ratio, and returns an image URL. It does **NOT** accept reference images. When the user supplies a reference image, use it to **extract traits in text** that get embedded in every page prompt:
+
+**Intake**: Accept file paths when the user provides them (or pastes images in conversation).
+- File path(s) → copy to `refs/NN-ref-{slug}.{ext}` alongside the comic output for provenance
+- Pasted image with no path → ask the user for the path via `clarify`, or extract style traits verbally as a text fallback
+- No reference → skip this section
+
+**Usage modes** (per reference):
+
+| Usage | Effect |
+|-------|--------|
+| `style` | Extract style traits (line treatment, texture, mood) and append to every page's prompt body |
+| `palette` | Extract hex colors and append to every page's prompt body |
+| `scene` | Extract scene composition or subject notes and append to the relevant page(s) |
+
+**Record in each page's prompt frontmatter** when refs exist:
+
+```yaml
+references:
+  - ref_id: 01
+    filename: 01-ref-scene.png
+    usage: style
+    traits: "muted earth tones, soft-edged ink wash, low-contrast backgrounds"
+```
+
+Character consistency is driven by **text descriptions** in `characters/characters.md` (written in Step 3) that get embedded inline in every page prompt (Step 5). The optional PNG character sheet generated in Step 7.1 is a human-facing review artifact, not an input to `image_generate`.
+
+## Options
+
+### Visual Dimensions
+
+| Option | Values | Description |
+|--------|--------|-------------|
+| Art | ligne-claire (default), manga, realistic, ink-brush, chalk, minimalist | Art style / rendering technique |
+| Tone | neutral (default), warm, dramatic, romantic, energetic, vintage, action | Mood / atmosphere |
+| Layout | standard (default), cinematic, dense, splash, mixed, webtoon, four-panel | Panel arrangement |
+| Aspect | 3:4 (default, portrait), 4:3 (landscape), 16:9 (widescreen) | Page aspect ratio |
+| Language | auto (default), zh, en, ja, etc. | Output language |
+| Refs | File paths | Reference images used for style / palette trait extraction (not passed to the image model). See [Reference Images](#reference-images) above. |
+
+### Partial Workflow Options
+
+| Option | Description |
+|--------|-------------|
+| Storyboard only | Generate storyboard only, skip prompts and images |
+| Prompts only | Generate storyboard + prompts, skip images |
+| Images only | Generate images from existing prompts directory |
+| Regenerate N | Regenerate specific page(s) only (e.g., `3` or `2,5,8`) |
+
+Details: [references/partial-workflows.md](references/partial-workflows.md)
+
+### Art, Tone & Preset Catalogue
+
+- **Art styles** (6): `ligne-claire`, `manga`, `realistic`, `ink-brush`, `chalk`, `minimalist`. Full definitions at `references/art-styles/<style>.md`.
+- **Tones** (7): `neutral`, `warm`, `dramatic`, `romantic`, `energetic`, `vintage`, `action`. Full definitions at `references/tones/<tone>.md`.
+- **Presets** (5) with special rules beyond plain art+tone:
+
+  | Preset | Equivalent | Hook |
+  |--------|-----------|------|
+  | `ohmsha` | manga + neutral | Visual metaphors, no talking heads, gadget reveals |
+  | `wuxia` | ink-brush + action | Qi effects, combat visuals, atmospheric |
+  | `shoujo` | manga + romantic | Decorative elements, eye details, romantic beats |
+  | `concept-story` | manga + warm | Visual symbol system, growth arc, dialogue+action balance |
+  | `four-panel` | minimalist + neutral + four-panel layout | 起承转合 structure, B&W + spot color, stick-figure characters |
+
+  Full rules at `references/presets/<preset>.md` — load the file when a preset is picked.
+
+- **Compatibility matrix** and **content-signal → preset** table live in [references/auto-selection.md](references/auto-selection.md). Read it before recommending combinations in Step 2.
+
+## File Structure
+
+Output directory: `comic/{topic-slug}/`
+- Slug: 2-4 words kebab-case from topic (e.g., `alan-turing-bio`)
+- Conflict: append timestamp (e.g., `turing-story-20260118-143052`)
+
+**Contents**:
+| File | Description |
+|------|-------------|
+| `source-{slug}.md` | Saved source content (kebab-case slug matches the output directory) |
+| `analysis.md` | Content analysis |
+| `storyboard.md` | Storyboard with panel breakdown |
+| `characters/characters.md` | Character definitions |
+| `characters/characters.png` | Character reference sheet (downloaded from `image_generate`) |
+| `prompts/NN-{cover\|page}-[slug].md` | Generation prompts |
+| `NN-{cover\|page}-[slug].png` | Generated images (downloaded from `image_generate`) |
+| `refs/NN-ref-{slug}.{ext}` | User-supplied reference images (optional, for provenance) |
+
+## Language Handling
+
+**Detection Priority**:
+1. User-specified language (explicit option)
+2. User's conversation language
+3. Source content language
+
+**Rule**: Use user's input language for ALL interactions:
+- Storyboard outlines and scene descriptions
+- Image generation prompts
+- User selection options and confirmations
+- Progress updates, questions, errors, summaries
+
+Technical terms remain in English.
+
+## Workflow
+
+### Progress Checklist
+
+```
+Comic Progress:
+- [ ] Step 1: Setup & Analyze
+  - [ ] 1.1 Analyze content
+  - [ ] 1.2 Check existing directory
+- [ ] Step 2: Confirmation - Style & options ⚠️ REQUIRED
+- [ ] Step 3: Generate storyboard + characters
+- [ ] Step 4: Review outline (conditional)
+- [ ] Step 5: Generate prompts
+- [ ] Step 6: Review prompts (conditional)
+- [ ] Step 7: Generate images
+  - [ ] 7.1 Generate character sheet (if needed) → characters/characters.png
+  - [ ] 7.2 Generate pages (with character descriptions embedded in prompt)
+- [ ] Step 8: Completion report
+```
+
+### Flow
+
+```
+Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Storyboard → [Review?] → Prompts → [Review?] → Images → Complete
+```
+
+### Step Summary
+
+| Step | Action | Key Output |
+|------|--------|------------|
+| 1.1 | Analyze content | `analysis.md`, `source-{slug}.md` |
+| 1.2 | Check existing directory | Handle conflicts |
+| 2 | Confirm style, focus, audience, reviews | User preferences |
+| 3 | Generate storyboard + characters | `storyboard.md`, `characters/` |
+| 4 | Review outline (if requested) | User approval |
+| 5 | Generate prompts | `prompts/*.md` |
+| 6 | Review prompts (if requested) | User approval |
+| 7.1 | Generate character sheet (if needed) | `characters/characters.png` |
+| 7.2 | Generate pages | `*.png` files |
+| 8 | Completion report | Summary |
+
+### User Questions
+
+Use the `clarify` tool to confirm options. Since `clarify` handles one question at a time, ask the most important question first and proceed sequentially. See [references/workflow.md](references/workflow.md) for the full Step 2 question set.
+
+**Timeout handling (CRITICAL)**: `clarify` can return `"The user did not provide a response within the time limit. Use your best judgement to make the choice and proceed."` — this is NOT user consent to default everything.
+
+- Treat it as a default **for that one question only**. Continue asking the remaining Step 2 questions in sequence; each question is an independent consent point.
+- **Surface the default to the user visibly** in your next message so they have a chance to correct it: e.g. `"Style: defaulted to ohmsha preset (clarify timed out). Say the word to switch."` — an unreported default is indistinguishable from never having asked.
+- Do NOT collapse Step 2 into a single "use all defaults" pass after one timeout. If the user is genuinely absent, they will be equally absent for all five questions — but they can correct visible defaults when they return, and cannot correct invisible ones.
+
+### Step 7: Image Generation
+
+Use Hermes' built-in `image_generate` tool for all image rendering. Its schema accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`); it **returns a URL**, not a local file. Every generated page or character sheet must therefore be downloaded to the output directory.
+
+**Prompt file requirement (hard)**: write each image's full, final prompt to a standalone file under `prompts/` (naming: `NN-{type}-[slug].md`) BEFORE calling `image_generate`. The prompt file is the reproducibility record.
+
+**Aspect ratio mapping** — the storyboard's `aspect_ratio` field maps to `image_generate`'s format as follows:
+
+| Storyboard ratio | `image_generate` format |
+|------------------|-------------------------|
+| `3:4`, `9:16`, `2:3` | `portrait` |
+| `4:3`, `16:9`, `3:2` | `landscape` |
+| `1:1` | `square` |
+
+**Download step** — after every `image_generate` call:
+1. Read the URL from the tool result
+2. Fetch the image bytes using an **absolute** output path, e.g.
+   `curl -fsSL "<url>" -o /abs/path/to/comic/<slug>/NN-page-<slug>.png`
+3. Verify the file exists and is non-empty at that exact path before proceeding to the next page
+
+**Never rely on shell CWD persistence for `-o` paths.** The terminal tool's persistent-shell CWD can change between batches (session expiry, `TERMINAL_LIFETIME_SECONDS`, a failed `cd` that leaves you in the wrong directory). `curl -o relative/path.png` is a silent footgun: if CWD has drifted, the file lands somewhere else with no error. **Always pass a fully-qualified absolute path to `-o`**, or pass `workdir=<abs path>` to the terminal tool. Incident Apr 2026: pages 06-09 of a 10-page comic landed at the repo root instead of `comic/<slug>/` because batch 3 inherited a stale CWD from batch 2 and `curl -o 06-page-skills.png` wrote to the wrong directory. The agent then spent several turns claiming the files existed where they didn't.
+
+**7.1 Character sheet** — generate it (to `characters/characters.png`, aspect `landscape`) when the comic is multi-page with recurring characters. Skip for simple presets (e.g., four-panel minimalist) or single-page comics. The prompt file at `characters/characters.md` must exist before invoking `image_generate`. The rendered PNG is a **human-facing review artifact** (so the user can visually verify character design) and a reference for later regenerations or manual prompt edits — it does **not** drive Step 7.2. Page prompts are already written in Step 5 from the **text descriptions** in `characters/characters.md`; `image_generate` cannot accept images as visual input.
+
+**7.2 Pages** — each page's prompt MUST already be at `prompts/NN-{cover|page}-[slug].md` before invoking `image_generate`. Because `image_generate` is prompt-only, character consistency is enforced by **embedding character descriptions (sourced from `characters/characters.md`) inline in every page prompt during Step 5**. The embedding is done uniformly whether or not a PNG sheet is produced in 7.1; the PNG is only a review/regeneration aid.
+
+**Backup rule**: existing `prompts/…md` and `…png` files → rename with `-backup-YYYYMMDD-HHMMSS` suffix before regenerating.
+
+Full step-by-step workflow (analysis, storyboard, review gates, regeneration variants): [references/workflow.md](references/workflow.md).
+
+## References
+
+**Core Templates**:
+- [analysis-framework.md](references/analysis-framework.md) - Deep content analysis
+- [character-template.md](references/character-template.md) - Character definition format
+- [storyboard-template.md](references/storyboard-template.md) - Storyboard structure
+- [ohmsha-guide.md](references/ohmsha-guide.md) - Ohmsha manga specifics
+
+**Style Definitions**:
+- `references/art-styles/` - Art styles (ligne-claire, manga, realistic, ink-brush, chalk, minimalist)
+- `references/tones/` - Tones (neutral, warm, dramatic, romantic, energetic, vintage, action)
+- `references/presets/` - Presets with special rules (ohmsha, wuxia, shoujo, concept-story, four-panel)
+- `references/layouts/` - Layouts (standard, cinematic, dense, splash, mixed, webtoon, four-panel)
+
+**Workflow**:
+- [workflow.md](references/workflow.md) - Full workflow details
+- [auto-selection.md](references/auto-selection.md) - Content signal analysis
+- [partial-workflows.md](references/partial-workflows.md) - Partial workflow options
+
+## Page Modification
+
+| Action | Steps |
+|--------|-------|
+| **Edit** | **Update prompt file FIRST** → regenerate image → download new PNG |
+| **Add** | Create prompt at position → generate with character descriptions embedded → renumber subsequent → update storyboard |
+| **Delete** | Remove files → renumber subsequent → update storyboard |
+
+**IMPORTANT**: When updating pages, ALWAYS update the prompt file (`prompts/NN-{cover|page}-[slug].md`) FIRST before regenerating. This ensures changes are documented and reproducible.
+
+## Pitfalls
+
+- Image generation: 10-30 seconds per page; auto-retry once on failure
+- **Always download** the URL returned by `image_generate` to a local PNG — downstream tooling (and the user's review) expects files in the output directory, not ephemeral URLs
+- **Use absolute paths for `curl -o`** — never rely on persistent-shell CWD across batches. Silent footgun: files land in the wrong directory and subsequent `ls` on the intended path shows nothing. See Step 7 "Download step".
+- Use stylized alternatives for sensitive public figures
+- **Step 2 confirmation required** - do not skip
+- **Steps 4/6 conditional** - only if user requested in Step 2
+- **Step 7.1 character sheet** - recommended for multi-page comics, optional for simple presets. The PNG is a review/regeneration aid; page prompts (written in Step 5) use the text descriptions in `characters/characters.md`, not the PNG. `image_generate` does not accept images as visual input
+- **Strip secrets** — scan source content for API keys, tokens, or credentials before writing any output file
diff --git a/skills/creative/baoyu-comic/references/analysis-framework.md b/skills/creative/baoyu-comic/references/analysis-framework.md
new file mode 100644
index 0000000000..da5ba9d9af
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/analysis-framework.md
@@ -0,0 +1,176 @@
+# Comic Content Analysis Framework
+
+Deep analysis framework for transforming source content into effective visual storytelling.
+
+## Purpose
+
+Before creating a comic, thoroughly analyze the source material to:
+- Identify the target audience and their needs
+- Determine what value the comic will deliver
+- Extract narrative potential for visual storytelling
+- Plan character arcs and key moments
+
+## Analysis Dimensions
+
+### 1. Core Content (Understanding "What")
+
+**Central Message**
+- What is the single most important idea readers should take away?
+- Can you express it in one sentence?
+
+**Key Concepts**
+- What are the essential concepts readers must understand?
+- How should these concepts be visualized?
+- Which concepts need simplified explanations?
+
+**Content Structure**
+- How is the source material organized?
+- What is the natural narrative arc?
+- Where are the climax and turning points?
+
+**Evidence & Examples**
+- What concrete examples, data, or stories support the main ideas?
+- Which examples translate well to visual panels?
+- What can be shown rather than told?
+
+### 2. Context & Background (Understanding "Why")
+
+**Source Origin**
+- Who created this content? What is their perspective?
+- What was the original purpose?
+- Is there bias to be aware of?
+
+**Historical/Cultural Context**
+- When and where does the story take place?
+- What background knowledge do readers need?
+- What period-specific visual elements are required?
+
+**Underlying Assumptions**
+- What does the source assume readers already know?
+- What implicit beliefs or values are present?
+- Should the comic challenge or reinforce these?
+
+### 3. Audience Analysis
+
+**Primary Audience**
+- Who will read this comic?
+- What is their existing knowledge level?
+- What are their interests and motivations?
+
+**Secondary Audiences**
+- Who else might benefit from this comic?
+- How might their needs differ?
+
+**Reader Questions**
+- What questions will readers have?
+- What misconceptions might they bring?
+- What "aha moments" can we create?
+
+### 4. Value Proposition
+
+**Knowledge Value**
+- What will readers learn?
+- What new perspectives will they gain?
+- How will this change their understanding?
+
+**Emotional Value**
+- What emotions should readers feel?
+- What connections will they make with characters?
+- What will make this memorable?
+
+**Practical Value**
+- Can readers apply what they learn?
+- What actions might this inspire?
+- What conversations might it spark?
+
+### 5. Narrative Potential
+
+**Story Arc Candidates**
+- What natural narratives exist in the content?
+- Where is the conflict or tension?
+- What transformations occur?
+
+**Character Potential**
+- Who are the key figures?
+- What are their motivations and obstacles?
+- How do they change throughout?
+
+**Visual Opportunities**
+- What scenes have strong visual potential?
+- Where can abstract concepts become concrete images?
+- What metaphors can be visualized?
+
+**Dramatic Moments**
+- What are the breakthrough/revelation moments?
+- Where are the emotional peaks?
+- What creates tension and release?
+
+### 6. Adaptation Considerations
+
+**What to Keep**
+- Essential facts and ideas
+- Key quotes or moments
+- Core emotional beats
+
+**What to Simplify**
+- Complex explanations
+- Dense technical details
+- Lengthy descriptions
+
+**What to Expand**
+- Brief mentions that deserve more attention
+- Implied emotions or relationships
+- Visual details not in source
+
+**What to Omit**
+- Tangential information
+- Redundant examples
+- Content that doesn't serve the narrative
+
+## Output Format
+
+Analysis results should be saved to `analysis.md` with:
+
+1. **YAML Front Matter**: Metadata (title, topic, time_span, source_language, user_language, aspect_ratio, recommended_page_count, recommended_art, recommended_tone, recommended_layout)
+2. **Target Audience**: Primary, secondary, tertiary audiences with their needs
+3. **Value Proposition**: What readers will gain (knowledge, emotional, practical)
+4. **Core Themes**: Table with theme, narrative potential, visual opportunity
+5. **Key Figures & Story Arcs**: Character profiles with arcs, visual identity, key moments
+6. **Content Signals**: Style and layout recommendations based on content type
+7. **Recommended Approaches**: Narrative approaches ranked by suitability
+
+### YAML Front Matter Example
+
+```yaml
+---
+title: "Alan Turing: The Father of Computing"
+topic: alan-turing-biography
+time_span: 1912-1954
+source_language: en
+user_language: zh  # User-specified or detected from conversation
+aspect_ratio: "3:4"
+recommended_page_count: 16
+recommended_art: ligne-claire  # ligne-claire|manga|realistic|ink-brush|chalk
+recommended_tone: neutral      # neutral|warm|dramatic|romantic|energetic|vintage|action
+recommended_layout: mixed      # standard|cinematic|dense|splash|mixed|webtoon
+---
+```
+
+### Language Fields
+
+| Field | Description |
+|-------|-------------|
+| `source_language` | Detected language of source content |
+| `user_language` | Output language for comic (user-specified option > conversation language > source_language) |
+
+## Analysis Checklist
+
+Before proceeding to storyboard:
+
+- [ ] Can I state the core message in one sentence?
+- [ ] Do I know exactly who will read this comic?
+- [ ] Have I identified at least 3 ways this comic provides value?
+- [ ] Are there clear protagonists with compelling arcs?
+- [ ] Have I found at least 5 visually powerful moments?
+- [ ] Do I understand what to keep, simplify, expand, and omit?
+- [ ] Have I identified the emotional peaks and valleys?
diff --git a/skills/creative/baoyu-comic/references/art-styles/chalk.md b/skills/creative/baoyu-comic/references/art-styles/chalk.md
new file mode 100644
index 0000000000..3974214cc8
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/chalk.md
@@ -0,0 +1,101 @@
+# chalk
+
+粉笔画风 - Chalkboard aesthetic with hand-drawn warmth
+
+## Overview
+
+Classic classroom chalkboard aesthetic with hand-drawn chalk illustrations. Nostalgic educational feel with imperfect, sketchy lines that capture the warmth of traditional teaching.
+
+## Line Work
+
+- Sketchy, imperfect hand-drawn lines
+- Chalk texture on all strokes
+- Varying line weight from chalk pressure
+- Soft edges, no sharp digital lines
+- Visible chalk dust effects
+
+## Character Design
+
+- Simplified, friendly character designs
+- Stick figures to semi-detailed range
+- Expressive through simple gestures
+- Approachable, non-intimidating
+- Educational presenter style
+
+## Background
+
+- Chalkboard Black (#1A1A1A) or Dark Green-Black (#1C2B1C)
+- Realistic chalkboard texture
+- Subtle scratches and dust particles
+- Faint eraser marks for authenticity
+- Wooden frame border optional
+
+## Typography
+
+- Hand-drawn chalk lettering style
+- Visible chalk texture on text
+- Imperfect baseline adds authenticity
+- White or bright colored chalk for emphasis
+
+## Visual Elements
+
+- Hand-drawn chalk illustrations
+- Chalk dust effects around elements
+- Doodles: stars, arrows, underlines, circles
+- Mathematical formulas and diagrams
+- Eraser smudges and chalk residue
+- Stick figures and simple icons
+- Connection lines with hand-drawn feel
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Background | Chalkboard Black | #1A1A1A |
+| Alt Background | Green-Black | #1C2B1C |
+| Primary Text | Chalk White | #F5F5F5 |
+| Accent 1 | Chalk Yellow | #FFE566 |
+| Accent 2 | Chalk Pink | #FF9999 |
+| Accent 3 | Chalk Blue | #66B3FF |
+| Accent 4 | Chalk Green | #90EE90 |
+| Accent 5 | Chalk Orange | #FFB366 |
+
+## Style Rules
+
+### Do
+- Maintain authentic chalk texture on all elements
+- Use imperfect, hand-drawn quality throughout
+- Add subtle chalk dust and smudge effects
+- Create visual hierarchy with color variety
+- Include playful doodles and annotations
+
+### Don't
+- Use perfect geometric shapes
+- Create clean digital-looking lines
+- Add photorealistic elements
+- Use gradients or glossy effects
+
+## Quality Markers
+
+- ✓ Authentic chalk texture throughout
+- ✓ Imperfect, hand-drawn quality
+- ✓ Readable despite sketchy style
+- ✓ Nostalgic classroom feel
+- ✓ Effective color hierarchy
+- ✓ Playful educational aesthetic
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Classic educational |
+| warm | ✓✓ | Nostalgic feel |
+| dramatic | ✗ | Style mismatch |
+| vintage | ✓ | Old school feel |
+| romantic | ✗ | Style mismatch |
+| energetic | ✓✓ | Fun learning |
+| action | ✗ | Style mismatch |
+
+## Best For
+
+Educational content, tutorials, classroom themes, teaching materials, workshops, informal learning, knowledge sharing
diff --git a/skills/creative/baoyu-comic/references/art-styles/ink-brush.md b/skills/creative/baoyu-comic/references/art-styles/ink-brush.md
new file mode 100644
index 0000000000..6c744d1422
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/ink-brush.md
@@ -0,0 +1,97 @@
+# ink-brush
+
+水墨画风 - Chinese ink brush aesthetics with dynamic strokes
+
+## Overview
+
+Traditional Chinese ink brush painting style adapted for comics. Combines calligraphic brush strokes with ink wash effects. Creates atmospheric, artistic visuals rooted in East Asian aesthetics.
+
+## Line Work
+
+- 2-3px dynamic brush strokes with varying weight
+- Ink wash effects, traditional Chinese brush feel
+- Bold, confident strokes with sharp edges
+- Flowing lines for fabric and hair
+- Pressure-sensitive stroke variation
+
+## Character Design
+
+- Realistic human proportions (7.5-8 head heights)
+- Defined features with ink brush definition
+- Dynamic poses capturing movement
+- Flowing hair and clothing in motion
+- Traditional attire options (robes, hanfu)
+- Intense, expressive faces
+
+## Brush Techniques
+
+| Technique | Usage |
+|-----------|-------|
+| Bold strokes | Character outlines |
+| Fine lines | Details, hair |
+| Ink wash | Atmosphere, shadows |
+| Dry brush | Texture, aging |
+| Splatter | Impact, drama |
+
+## Background Treatment
+
+- Dramatic landscapes: mountains, waterfalls, temples
+- Ink wash atmospheric effects
+- Misty, layered depth
+- Traditional architecture elements
+- High contrast silhouettes
+- Negative space as design element
+
+## Color Approach
+
+- Ink gradients as primary
+- Limited accent colors
+- Traditional Chinese palette
+- Atmospheric color washes
+- High contrast compositions
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Deep black ink | #1A1A1A |
+| Accent | Crimson red | #8B0000 |
+| Accent | Imperial gold | #D4AF37 |
+| Skin | Natural tan | #D4A574 |
+| Background | Misty gray | #9CA3AF |
+| Background | Earth tone | #8B7355 |
+| Wash | Ink gradient | #2D3748 |
+
+## Visual Elements
+
+- Calligraphic text integration
+- Seal stamps (optional)
+- Ink splatter effects
+- Flowing fabric trails
+- Atmospheric mist
+- Mountain silhouettes
+
+## Quality Markers
+
+- ✓ Dynamic brush stroke quality
+- ✓ Authentic ink wash atmosphere
+- ✓ High contrast compositions
+- ✓ Flowing movement in fabric/hair
+- ✓ Traditional aesthetic elements
+- ✓ Atmospheric depth
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓ | Contemplative stories |
+| warm | ✓ | Nostalgic, gentle |
+| dramatic | ✓✓ | High contrast |
+| vintage | ✓✓ | Historical pieces |
+| romantic | ✗ | Style mismatch |
+| energetic | ✗ | Too refined |
+| action | ✓✓ | Martial arts |
+
+## Best For
+
+Chinese historical stories, martial arts, traditional tales, contemplative narratives, artistic adaptations
diff --git a/skills/creative/baoyu-comic/references/art-styles/ligne-claire.md b/skills/creative/baoyu-comic/references/art-styles/ligne-claire.md
new file mode 100644
index 0000000000..0ce58b2c00
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/ligne-claire.md
@@ -0,0 +1,75 @@
+# ligne-claire
+
+清线画风 - Uniform lines, flat colors, European comic tradition
+
+## Overview
+
+Classic European comic style originating from Hergé's Tintin. Characterized by clean, uniform outlines and flat color fills without gradients. Creates a timeless, accessible aesthetic suitable for educational and narrative content.
+
+## Line Work
+
+- Uniform, clean outlines with consistent weight (2px)
+- No hatching or cross-hatching for shading
+- Sharp, precise edges on all elements
+- Black ink outlines on all figures and objects
+- Shadows indicated through flat color areas, not line techniques
+
+## Character Design
+
+- Slightly stylized/cartoonish characters with realistic proportions
+- Distinctive, recognizable facial features
+- Expressive faces with clear emotions
+- Period-appropriate clothing with attention to detail
+- Consistent character appearance across panels
+- 6-7 head height proportions
+
+## Background Treatment
+
+- Detailed, realistic backgrounds with architectural accuracy
+- Period-specific props and technology
+- Clear spatial depth and perspective
+- Environmental storytelling through details
+- Contrast between simplified characters and detailed backgrounds
+
+## Color Approach
+
+- Flat colors without gradients (true to Ligne Claire tradition)
+- Limited palette per page for cohesion
+- Colors support narrative mood
+- Consistent lighting logic within scenes
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary Blue | Clean blue | #3182CE |
+| Primary Red | Classic red | #E53E3E |
+| Primary Yellow | Warm yellow | #ECC94B |
+| Skin | Warm tan | #F7CFAE |
+| Background Light | Light cream | #FFFAF0 |
+| Background Sky | Sky blue | #BEE3F8 |
+
+## Quality Markers
+
+- ✓ Clean, uniform line weight throughout
+- ✓ Flat colors without gradients
+- ✓ Detailed backgrounds, stylized characters
+- ✓ Clear panel borders and reading flow
+- ✓ Hand-drawn text style
+- ✓ Proper perspective in environments
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Classic combination |
+| warm | ✓✓ | Nostalgic stories |
+| dramatic | ✓ | Works with high contrast |
+| vintage | ✓ | Period pieces |
+| romantic | ✗ | Style mismatch |
+| energetic | ✓ | Lighter stories |
+| action | ✗ | Lacks dynamic lines |
+
+## Best For
+
+Educational content, balanced narratives, biography comics, historical stories
diff --git a/skills/creative/baoyu-comic/references/art-styles/manga.md b/skills/creative/baoyu-comic/references/art-styles/manga.md
new file mode 100644
index 0000000000..bb2a2663b5
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/manga.md
@@ -0,0 +1,93 @@
+# manga
+
+日漫画风 - Anime/manga aesthetics with expressive characters
+
+## Overview
+
+Japanese manga art style characterized by large expressive eyes, dynamic poses, and visual emotion indicators. Versatile style that works across genres from educational to romantic to action.
+
+## Line Work
+
+- Clean, smooth lines (1.5-2px)
+- Expressive weight variation for emphasis
+- Smooth curves, dynamic strokes
+- Speed lines and motion effects available
+- Screen tone effects for atmosphere
+
+## Character Design
+
+- Anime/manga proportions: larger eyes, expressive faces
+- 5-7 head height proportions (varies by sub-style)
+- Clear emotional indicators (！, ？, sweat drops, sparkles)
+- Dynamic poses and gestures
+- Detailed hair with individual strands
+- Fashionable clothing with natural folds
+
+## Eye Styles
+
+| Type | Description |
+|------|-------------|
+| Standard | Medium-large, 2-3 highlights |
+| Educational | Friendly, approachable eyes |
+| Dramatic | Intense, detailed irises |
+| Cute | Very large, sparkly eyes |
+
+## Background Treatment
+
+- Simplified during dialogue/explanation
+- Detailed for establishing shots
+- Screen tone gradients for mood
+- Abstract backgrounds for emotional moments
+- Technical diagrams styled as displays
+
+## Color Approach
+
+- Clean, bright anime colors
+- Soft gradients on skin
+- Vibrant palette options
+- Light and shadow with soft transitions
+- Color coding for character identification
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary Blue | Bright blue | #4299E1 |
+| Primary Orange | Warm orange | #ED8936 |
+| Primary Green | Soft green | #68D391 |
+| Skin | Anime warm | #FEEBC8 |
+| Background | Clean white | #FFFFFF |
+| Highlight | Golden | #FFD700 |
+
+## Visual Elements
+
+- Speech bubbles: rounded (normal), spiky (excitement)
+- Sound effects integrated visually
+- Emotion symbols (sweat drops, anger marks, hearts)
+- Speed lines and motion blur
+- Sparkle and glow effects
+
+## Quality Markers
+
+- ✓ Expressive character faces
+- ✓ Clean, consistent line work
+- ✓ Dynamic poses and compositions
+- ✓ Appropriate use of manga conventions
+- ✓ Readable panel flow
+- ✓ Consistent character designs
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Educational manga |
+| warm | ✓ | Slice of life |
+| dramatic | ✓ | Intense moments |
+| romantic | ✓✓ | Shoujo style |
+| energetic | ✓✓ | Shonen style |
+| vintage | ✗ | Style mismatch |
+| action | ✓✓ | Battle manga |
+
+## Best For
+
+Educational tutorials, romance, action, coming-of-age, technical explanations, youth-oriented content
diff --git a/skills/creative/baoyu-comic/references/art-styles/minimalist.md b/skills/creative/baoyu-comic/references/art-styles/minimalist.md
new file mode 100644
index 0000000000..f075b2d400
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/minimalist.md
@@ -0,0 +1,84 @@
+# minimalist
+
+极简画风 - Clean black line art, limited spot color, simplified stick-figure characters
+
+## Overview
+
+Minimalist cartoon illustration characterized by clean black line art on white background with very limited spot color for emphasis. Characters are simplified to near-stick-figure abstraction, focusing on gesture and concept rather than anatomical detail. Designed for business allegory, quick-read educational content, and concept illustration.
+
+## Line Work
+
+- Clean, uniform black lines (1.5-2px)
+- No hatching, cross-hatching, or shading techniques
+- Minimal detail — every line serves a purpose
+- Bold outlines for characters, thinner lines for props/labels
+- No decorative flourishes or ornamental lines
+
+## Character Design
+
+- Highly simplified, stick-figure-like business characters
+- Circle or oval heads with minimal facial features (dot eyes, simple line mouth)
+- Body as simple geometric shapes or line constructions
+- Distinguishing features through props only (tie, hat, briefcase, glasses)
+- No anatomical detail — expressive through posture and gesture
+- 4-5 head height proportions (squat, iconic)
+
+## Background Treatment
+
+- Mostly blank/white — negative space is a design element
+- Minimal environmental cues (a line for ground, simple desk outline)
+- Concept labels and text annotations replace detailed environments
+- Icons and symbols over realistic rendering
+- No perspective or spatial depth
+
+## Color Approach
+
+- Primarily black and white (90%+ of the image)
+- 1-2 spot accent colors for emphasis on key concepts
+- Accent color used sparingly: highlighting key objects, text labels, concept indicators
+- No gradients, no shading, no color fills on backgrounds
+- Color draws the eye to the most important element in each panel
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Black ink | `#1A1A1A` |
+| Background | Clean white | `#FFFFFF` |
+| Accent 1 | Spot orange | `#FF6B35` |
+| Accent 2 | Spot blue (optional) | `#3182CE` |
+| Text labels | Dark gray | `#4A4A4A` |
+| Panel border | Medium gray | `#666666` |
+
+## Visual Elements
+
+- Text labels with accent-color backgrounds or underlines for key terms
+- Simple icons: arrows, circles, checkmarks, crosses
+- Concept highlight boxes with spot color
+- Minimal speech bubbles (simple oval or rectangle, thin black outline)
+- No sound effects, no motion lines, no screen tones
+
+## Quality Markers
+
+- ✓ Clean, purposeful line work with no unnecessary detail
+- ✓ 90%+ black-and-white with strategic spot color
+- ✓ Simplified characters readable at small sizes
+- ✓ Text labels integrated naturally into panels
+- ✓ Strong negative space usage
+- ✓ Every element serves the narrative point
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Ideal for business/educational content |
+| warm | ✓ | Works for gentle stories, slight warmth in accent |
+| energetic | ✓ | Works for punchy, high-energy content |
+| dramatic | ✗ | Style too stripped down for dramatic intensity |
+| vintage | ✗ | Minimalist aesthetic conflicts with aged/textured look |
+| romantic | ✗ | No capacity for decorative/soft elements |
+| action | ✗ | No dynamic line capability for speed/impact |
+
+## Best For
+
+Business allegory, management fables, short concept illustration, four-panel comic strips, quick-insight education, social media content
diff --git a/skills/creative/baoyu-comic/references/art-styles/realistic.md b/skills/creative/baoyu-comic/references/art-styles/realistic.md
new file mode 100644
index 0000000000..fcc39ad7f4
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/realistic.md
@@ -0,0 +1,89 @@
+# realistic
+
+写实画风 - Digital painting with realistic proportions and lighting
+
+## Overview
+
+Full-color realistic manga style using digital painting techniques. Features anatomically accurate characters, rich gradients, and detailed environmental rendering. Sophisticated aesthetic for mature audiences.
+
+## Line Work
+
+- Clean, precise outlines with clear contours
+- Uniform line weight for character definition
+- No excessive hatching - rely on color for depth
+- Smooth curves and realistic anatomical lines
+- Ligne Claire influence: clean but not simplified
+
+## Character Design
+
+- Realistic human proportions (7-8 head heights)
+- Anatomically accurate features and expressions
+- Detailed facial structure without exaggeration
+- Natural poses and body language
+- Consistent appearance across panels
+- Subtle expressions rather than manga-style
+
+## Rendering Style
+
+- Full-color digital painting with rich gradients
+- Soft shadow transitions on skin and fabric
+- Realistic material textures (glass, liquid, fabric, wood)
+- Detailed hair with natural shine and volume
+- Environmental lighting affects all elements
+- NOT flat cel-shading - smooth color blending
+
+## Background Treatment
+
+- Highly detailed, realistic environments
+- Accurate perspective and spatial depth
+- Atmospheric lighting (warm indoor, cool outdoor)
+- Professional settings rendered with precision
+- Props and objects with realistic textures
+
+## Color Approach
+
+- Rich gradients for depth and volume
+- Realistic lighting with warm/cool contrast
+- Material-specific rendering
+- Subtle color temperature shifts
+- Professional, sophisticated palette
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Skin Light | Natural warm | #F5D6C6 |
+| Skin Shadow | Warm shadow | #E8C4B0 |
+| Environment | Warm wood | #8B7355 |
+| Environment Cool | Cool stone | #9CA3AF |
+| Accent | Wine red | #722F37 |
+| Accent Gold | Gold | #D4AF37 |
+| Light Warm | Amber | #FFB347 |
+| Light Cool | Cool blue | #B0C4DE |
+
+## Quality Markers
+
+- ✓ Anatomically accurate proportions
+- ✓ Smooth color gradients (not flat fills)
+- ✓ Realistic material textures
+- ✓ Detailed, atmospheric backgrounds
+- ✓ Natural lighting with soft shadows
+- ✓ Expressive but subtle expressions
+- ✓ Professional aesthetic
+- ✓ Clean speech bubbles
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Professional content |
+| warm | ✓✓ | Nostalgic stories |
+| dramatic | ✓✓ | High drama |
+| vintage | ✓✓ | Period pieces |
+| romantic | ✗ | Style mismatch |
+| energetic | ✗ | Too refined |
+| action | ✓ | Serious action |
+
+## Best For
+
+Professional topics (wine, food, business), lifestyle content, adult narratives, documentary-style, mature educational guides
diff --git a/skills/creative/baoyu-comic/references/auto-selection.md b/skills/creative/baoyu-comic/references/auto-selection.md
new file mode 100644
index 0000000000..4541b7dfc7
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/auto-selection.md
@@ -0,0 +1,71 @@
+# Auto Selection
+
+Content signals determine default art + tone + layout (or preset).
+
+## Content Signal Matrix
+
+| Content Signals | Art Style | Tone | Layout | Preset |
+|-----------------|-----------|------|--------|--------|
+| Tutorial, how-to, beginner | manga | neutral | webtoon | **ohmsha** |
+| Computing, AI, programming | manga | neutral | dense | **ohmsha** |
+| Technical explanation, educational | manga | neutral | webtoon | **ohmsha** |
+| Pre-1950, classical, ancient | realistic | vintage | cinematic | - |
+| Personal story, mentor | ligne-claire | warm | standard | - |
+| Psychology, motivation, self-help, coaching | manga | warm | standard | **concept-story** |
+| Business narrative, management, leadership | manga | warm | standard | **concept-story** |
+| Conflict, breakthrough | (inherit) | dramatic | splash | - |
+| Wine, food, lifestyle | realistic | neutral | cinematic | - |
+| Martial arts, wuxia, xianxia | ink-brush | action | splash | **wuxia** |
+| Romance, love, school life | manga | romantic | standard | **shoujo** |
+| Business allegory, fable, parable, short insight, 四格 | minimalist | neutral | four-panel | **four-panel** |
+| Biography, balanced | ligne-claire | neutral | mixed | - |
+
+## Preset Recommendation Rules
+
+**When preset is recommended**: Load `presets/{preset}.md` and apply all special rules.
+
+### ohmsha
+- **Triggers**: Tutorial, technical, educational, computing, programming, how-to, beginner
+- **Special rules**: Visual metaphors, NO talking heads, gadget reveals, Doraemon-style characters
+- **Base**: manga + neutral + webtoon/dense
+
+### wuxia
+- **Triggers**: Martial arts, wuxia, xianxia, cultivation, swordplay
+- **Special rules**: Qi effects, combat visuals, atmospheric elements
+- **Base**: ink-brush + action + splash
+
+### shoujo
+- **Triggers**: Romance, love story, school life, emotional drama
+- **Special rules**: Decorative elements, eye details, romantic beats
+- **Base**: manga + romantic + standard
+
+### concept-story
+- **Triggers**: Psychology, motivation, self-help, business narrative, management, leadership, personal growth, coaching, soft skills, abstract concept through story
+- **Special rules**: Visual symbol system, growth arc, dialogue+action balance, original characters
+- **Base**: manga + warm + standard
+
+### four-panel
+- **Triggers**: Business allegory, fable, parable, short insight, four-panel, 四格, 四格漫画, single-page comic, minimalist comic strip
+- **Special rules**: Strict 起承转合 4-panel structure, B&W + spot color, simplified stick-figure characters, single-page story
+- **Base**: minimalist + neutral + four-panel
+
+## Compatibility Matrix
+
+Art Style × Tone combinations work best when matched appropriately:
+
+| Art Style | ✓✓ Best | ✓ Works | ✗ Avoid |
+|-----------|---------|---------|---------|
+| ligne-claire | neutral, warm | dramatic, vintage, energetic | romantic, action |
+| manga | neutral, romantic, energetic, action | warm, dramatic | vintage |
+| realistic | neutral, warm, dramatic, vintage | action | romantic, energetic |
+| ink-brush | neutral, dramatic, action, vintage | warm | romantic, energetic |
+| chalk | neutral, warm, energetic | vintage | dramatic, action, romantic |
+| minimalist | neutral | warm, energetic | dramatic, vintage, romantic, action |
+
+**Note**: Art Style × Tone × Layout can be freely combined. Incompatible combinations work but may produce unexpected results.
+
+## Priority Order
+
+1. User-specified options (art / tone / style)
+2. Content signal analysis → auto-selection
+3. Fallback: ligne-claire + neutral + standard
diff --git a/skills/creative/baoyu-comic/references/base-prompt.md b/skills/creative/baoyu-comic/references/base-prompt.md
new file mode 100644
index 0000000000..7df4e959ba
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/base-prompt.md
@@ -0,0 +1,98 @@
+Create a knowledge biography comic page following these guidelines:
+
+## Image Specifications
+
+- **Type**: Comic book page with multiple panels
+- **Orientation**: Portrait (vertical)
+- **Aspect Ratio**: 2:3
+- **Style**: See style-specific reference for visual guidelines
+
+## Panel Structure
+
+### Panel Borders
+- Clean black lines (1-2px) around each panel
+- White gutters between panels (8-12px)
+- Panels arranged for clear reading flow
+- Variety in panel sizes for visual rhythm
+
+### Panel Composition
+- Clear focal points in each panel
+- Proper use of foreground, midground, background
+- Camera angles vary: eye level, bird's eye, low angle, close-up, wide shot
+- Action flows logically between panels
+- Negative space used intentionally
+
+## Text Elements
+
+### Speech Bubbles
+- **Dialogue**: Oval/elliptical bubbles with pointed tails
+- White fill with thin black outline
+- Tail points clearly to speaker
+- Hand-lettered style font (not computer-generated)
+
+### Narrator Boxes
+- **Fourth Wall/Narrator**: Rectangular boxes
+- Often positioned at panel edges (top or bottom)
+- Slightly different fill color (cream or light yellow)
+- Used for commentary, time jumps, explanations
+
+### Thought Bubbles
+- Cloud-shaped with bubble trail leading to thinker
+- Softer outline than speech bubbles
+- For internal monologue
+
+### Caption Bars
+- Rectangular bars at panel edges
+- Time and place information
+- "Meanwhile...", "Three years later..." type transitions
+- Darker fill with white text, or vice versa
+
+### Typography
+- Hand-drawn lettering style throughout
+- Bold for emphasis and key terms
+- Consistent letter sizing
+- Chinese text: use full-width punctuation ""，。！
+- Clear hierarchy: titles > dialogue > captions
+
+## Scientific/Concept Visualization
+
+When depicting abstract concepts:
+
+| Concept | Visual Metaphor |
+|---------|----------------|
+| Neural networks | Glowing nodes connected by clean lines |
+| Data flow | Luminous particles along simple paths |
+| Algorithms | Geometric patterns, building blocks |
+| Logic/proof | Interlocking puzzle pieces |
+| Discovery | Light breaking through darkness |
+| Uncertainty | Forking paths, question marks |
+| Time | Clock motifs, calendar pages |
+
+- Integrate diagrams naturally into narrative panels
+- Use inset panels or thought-bubble style for explanations
+- Simplified iconography over realistic depiction
+
+## Fourth Wall / Narrator Character
+
+When depicting narrator characters addressing the reader:
+- Character may look directly out of panel
+- Can appear in "present day" framing scenes
+- Distinct visual treatment from main timeline
+- Often at page edges or in dedicated panels
+- May comment on or question the events shown
+
+## Historical Accuracy
+
+- Research period-specific details: costumes, technology, architecture
+- Show aging naturally for characters across time periods
+- Iconic items and locations rendered recognizably
+- Balance accuracy with stylization
+
+## Language
+
+- All text in Chinese (中文) unless source material is in another language
+- Use Chinese full-width punctuation: ""，。！
+
+---
+
+Please generate the comic page based on the content provided below:
diff --git a/skills/creative/baoyu-comic/references/character-template.md b/skills/creative/baoyu-comic/references/character-template.md
new file mode 100644
index 0000000000..5865358ce5
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/character-template.md
@@ -0,0 +1,180 @@
+# Character Definition Template
+
+## Character Document Format
+
+Create `characters/characters.md` with the following structure:
+
+```markdown
+# Character Definitions - [Comic Title]
+
+**Style**: [selected style]
+**Art Direction**: [Ligne Claire / Manga / etc.]
+
+---
+
+## Character 1: [Name]
+
+**Role**: [Protagonist / Mentor / Antagonist / Narrator]
+**Age**: [approximate age or age range in story]
+
+**Appearance**:
+- Face shape: [oval/square/round]
+- Hair: [color, style, length]
+- Eyes: [color, shape, distinctive features]
+- Build: [height, body type]
+- Distinguishing features: [glasses, beard, scar, etc.]
+
+**Costume**:
+- Default outfit: [detailed description]
+- Color palette: [primary colors for this character]
+- Accessories: [hat, bag, tools, etc.]
+
+**Expression Range**:
+- Neutral: [description]
+- Happy/Excited: [description]
+- Thinking/Confused: [description]
+- Determined: [description]
+
+**Visual Reference Notes**:
+[Any specific artistic direction]
+
+---
+
+## Character 2: [Name]
+...
+```
+
+## Reference Sheet Image Prompt
+
+After character definitions, include a prompt for generating the reference sheet:
+
+```markdown
+## Reference Sheet Prompt
+
+Character reference sheet in [style] style, clean lines, flat colors:
+
+[ROW 1 - Character Name]:
+- Front view: [detailed description]
+- 3/4 view: [description]
+- Expression sheet: Neutral | Happy | Focused | Worried
+
+[ROW 2 - Character Name]:
+...
+
+COLOR PALETTE:
+- [Character 1]: [colors]
+- [Character 2]: [colors]
+
+White background, clear labels under each character.
+```
+
+## Example: Turing Biography
+
+```markdown
+# Character Definitions - The Imitation Game
+
+**Style**: classic (Ligne Claire)
+**Art Direction**: Clean lines, muted colors, period-accurate details
+
+---
+
+## Character 1: Alan Turing
+
+**Role**: Protagonist
+**Age**: 25-40 (varies across story)
+
+**Appearance**:
+- Face shape: Oval, slightly angular
+- Hair: Dark brown, wavy, slightly disheveled
+- Eyes: Deep-set, intense gaze
+- Build: Tall, lean, slightly awkward posture
+- Distinguishing features: Prominent brow, thoughtful expression
+
+**Costume**:
+- Default outfit: Tweed jacket with elbow patches, white shirt, no tie
+- Color palette: Muted browns, navy blue, cream
+- Accessories: Occasionally a pipe, papers/notebooks
+
+**Expression Range**:
+- Neutral: Thoughtful, slightly distant
+- Happy/Excited: Eureka moment, eyes bright, subtle smile
+- Thinking/Confused: Furrowed brow, looking at abstract space
+- Determined: Jaw set, focused eyes
+
+---
+
+## Character 2: The Bombe Machine
+
+**Role**: Supporting (anthropomorphized)
+**Appearance**:
+- Large brass and wood cabinet
+- Dial "eyes" that can express states
+- Paper tape "mouth"
+- Indicator lights for emotions
+
+**Expression Range**:
+- Processing: Spinning dials, humming
+- Success: Lights up warmly
+- Stuck: Smoke wisps, stuttering
+
+---
+
+## Reference Sheet Prompt
+
+Character reference sheet in Ligne Claire style, clean lines, flat colors:
+
+TOP ROW - Alan Turing:
+- Front view: Young man, 30s, short dark wavy hair, thoughtful expression, wearing tweed jacket with elbow patches, white shirt
+- 3/4 view: Same character, slight smile, showing profile of nose
+- Expression sheet: Neutral | Excited (eureka moment) | Focused (working) | Worried
+
+BOTTOM ROW - The Bombe Machine (anthropomorphized):
+- Bombe machine as character: Large, brass and wood, dial "eyes", paper tape "mouth"
+- Expressions: Processing (spinning dials) | Success (lights up) | Stuck (smoke wisps)
+
+COLOR PALETTE:
+- Turing: Muted browns (#8B7355), navy blue (#2C3E50), cream (#F5F5DC)
+- Machine: Brass (#B5A642), mahogany (#4E2728), emerald indicators (#2ECC71)
+
+White background, clear labels under each character.
+```
+
+## Handling Age Variants
+
+For biographies spanning many years, define age variants:
+
+```markdown
+## Alan Turing - Age Variants
+
+### Young (1920s, age 10-18)
+- Boyish features, round face
+- School uniform (Sherborne)
+- Curious, eager expression
+
+### Adult (1930s-40s, age 25-35)
+- Angular face, defined jaw
+- Tweed jacket, rumpled appearance
+- Intense, focused expression
+
+### Later (1950s, age 40+)
+- Slightly weathered
+- More casual dress
+- Thoughtful, sometimes melancholic
+```
+
+## Best Practices
+
+| Practice | Description |
+|----------|-------------|
+| Be specific | "Short dark wavy hair, parted left" not just "dark hair" |
+| Use distinguishing features | Glasses, scars, accessories that identify character |
+| Define color codes | Use specific color names or hex codes |
+| Include age markers | Wrinkles, posture, clothing style matching era |
+| Reference real people | For historical figures, note "based on 1940s photographs" |
+
+## Why Character Reference Matters
+
+Without unified character definition, AI generates inconsistent appearances. The reference sheet provides:
+1. Visual anchors for consistent features
+2. Color palettes for consistent coloring
+3. Expression documentation for emotional portrayals
diff --git a/skills/creative/baoyu-comic/references/layouts/cinematic.md b/skills/creative/baoyu-comic/references/layouts/cinematic.md
new file mode 100644
index 0000000000..8061cde9e9
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/cinematic.md
@@ -0,0 +1,23 @@
+# cinematic
+
+Wide panels, filmic feel
+
+## Panel Structure
+
+- **Panels per page**: 2-4
+- **Structure**: Horizontal emphasis, wide aspect panels
+- **Gutters**: Generous spacing (12-15px)
+
+## Grid Configuration
+
+- 1-2 columns, horizontal emphasis
+- Panel sizes: Wide aspect ratios (3:1, 4:1)
+- Reading flow: Horizontal sweep, filmic rhythm
+
+## Best For
+
+Establishing shots, dramatic moments, landscapes
+
+## Best Style Pairings
+
+dramatic, classic, sepia
diff --git a/skills/creative/baoyu-comic/references/layouts/dense.md b/skills/creative/baoyu-comic/references/layouts/dense.md
new file mode 100644
index 0000000000..7346466ffe
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/dense.md
@@ -0,0 +1,23 @@
+# dense
+
+Information-rich, educational focus
+
+## Panel Structure
+
+- **Panels per page**: 6-9
+- **Structure**: Compact grid, smaller panels
+- **Gutters**: Tight spacing (4-6px)
+
+## Grid Configuration
+
+- 3 columns × 3 rows
+- Panel sizes: Compact, uniform
+- Reading flow: Rapid progression, information-rich
+
+## Best For
+
+Technical explanations, complex narratives, timelines
+
+## Best Style Pairings
+
+ohmsha, vibrant
diff --git a/skills/creative/baoyu-comic/references/layouts/four-panel.md b/skills/creative/baoyu-comic/references/layouts/four-panel.md
new file mode 100644
index 0000000000..e9cbdfcdf3
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/four-panel.md
@@ -0,0 +1,40 @@
+# four-panel
+
+四格漫画 - Strict 2×2 grid, single-page story
+
+## Panel Structure
+
+- **Panels per page**: 4 (exactly, no variation)
+- **Structure**: Strict 2×2 equal grid
+- **Gutters**: Consistent white space (8-10px), uniform on all sides
+
+## Grid Configuration
+
+- 2 columns × 2 rows, all panels identical size
+- Panel sizes: Exactly equal (each panel = 25% of content area)
+- Reading flow: Z-pattern — Panel 1 (top-left) → Panel 2 (top-right) → Panel 3 (bottom-left) → Panel 4 (bottom-right)
+
+## Narrative Structure
+
+Each panel serves a specific narrative role (起承转合 / kishōtenketsu):
+
+| Panel | Position | Role | Purpose |
+|-------|----------|------|---------|
+| 1 | Top-left | 起 Setup | Establish situation, introduce characters/problem |
+| 2 | Top-right | 承 Development | Build on setup, add complication or attempt |
+| 3 | Bottom-left | 转 Turn | Twist, key insight, or reversal — the pivotal moment |
+| 4 | Bottom-right | 合 Conclusion | Resolution, punchline, or takeaway |
+
+## Aspect Ratio
+
+- Recommended page aspect: **4:3** (landscape)
+- Landscape gives each panel a comfortable wide rectangle
+- Portrait (3:4) makes panels tall and narrow — avoid for this layout
+
+## Best For
+
+Business allegory, quick-insight education, social media comics, fables, parables, single-concept explanation
+
+## Best Style Pairings
+
+minimalist, ligne-claire, chalk
diff --git a/skills/creative/baoyu-comic/references/layouts/mixed.md b/skills/creative/baoyu-comic/references/layouts/mixed.md
new file mode 100644
index 0000000000..dc33cc1477
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/mixed.md
@@ -0,0 +1,23 @@
+# mixed
+
+Dynamic, varied rhythm
+
+## Panel Structure
+
+- **Panels per page**: 3-7 (varies)
+- **Structure**: Intentionally varied for pacing
+- **Gutters**: Dynamic spacing
+
+## Grid Configuration
+
+- Intentionally irregular
+- Panel sizes: Varied for pacing and emphasis
+- Reading flow: Guides eye through varied rhythm
+
+## Best For
+
+Action sequences, emotional arcs, complex stories
+
+## Best Style Pairings
+
+dramatic, vibrant, ohmsha
diff --git a/skills/creative/baoyu-comic/references/layouts/splash.md b/skills/creative/baoyu-comic/references/layouts/splash.md
new file mode 100644
index 0000000000..15d4258b5d
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/splash.md
@@ -0,0 +1,23 @@
+# splash
+
+Impact-focused, key moments
+
+## Panel Structure
+
+- **Panels per page**: 1-2 large + 2-3 small
+- **Structure**: Dominant splash with supporting panels
+- **Gutters**: Varied for emphasis
+
+## Grid Configuration
+
+- 1 dominant panel + 2-3 supporting
+- Panel sizes: 50-70% splash, remainder small
+- Reading flow: Splash dominates, supporting panels accent
+
+## Best For
+
+Revelations, breakthroughs, chapter openings
+
+## Best Style Pairings
+
+dramatic, classic, vibrant
diff --git a/skills/creative/baoyu-comic/references/layouts/standard.md b/skills/creative/baoyu-comic/references/layouts/standard.md
new file mode 100644
index 0000000000..76ee5d824c
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/standard.md
@@ -0,0 +1,23 @@
+# standard
+
+Classic comic grid, versatile
+
+## Panel Structure
+
+- **Panels per page**: 4-6
+- **Structure**: Regular grid with occasional variation
+- **Gutters**: Consistent white space (8-10px)
+
+## Grid Configuration
+
+- 2-3 columns × 2-3 rows
+- Panel sizes: Mostly equal, occasional variation
+- Reading flow: Left→right, top→bottom (Z-pattern)
+
+## Best For
+
+Narrative flow, dialogue scenes
+
+## Best Style Pairings
+
+classic, warm, sepia
diff --git a/skills/creative/baoyu-comic/references/layouts/webtoon.md b/skills/creative/baoyu-comic/references/layouts/webtoon.md
new file mode 100644
index 0000000000..efc464aa76
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/webtoon.md
@@ -0,0 +1,30 @@
+# webtoon
+
+Vertical scrolling comic (竖版条漫)
+
+## Panel Structure
+
+- **Panels per page**: 3-5 vertically stacked
+- **Structure**: Single column, vertical flow optimized for scrolling
+- **Gutters**: Generous vertical spacing (20-40px), panels often bleed horizontally
+
+## Grid Configuration
+
+- Single column, vertical stack
+- Panel sizes: Full width, variable height (1:1 to 1:2 aspect)
+- Reading flow: Top→bottom continuous scroll
+
+## Special Features
+
+- Panels can extend beyond frame for dramatic effect
+- Generous whitespace between beats
+- Character close-ups alternate with wide explanation panels
+- "Float" effect - elements can exist between panels
+
+## Best For
+
+Ohmsha-style tutorials, mobile reading, step-by-step guides
+
+## Best Style Pairings
+
+ohmsha, vibrant
diff --git a/skills/creative/baoyu-comic/references/ohmsha-guide.md b/skills/creative/baoyu-comic/references/ohmsha-guide.md
new file mode 100644
index 0000000000..e789677214
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/ohmsha-guide.md
@@ -0,0 +1,85 @@
+# Ohmsha Manga Guide Style
+
+Guidelines for educational manga comics using the `ohmsha` preset.
+
+## Character Setup
+
+| Role | Default | Traits |
+|------|---------|--------|
+| Student (Role A) | 大雄 | Confused, asks basic but crucial questions, represents reader |
+| Mentor (Role B) | 哆啦A梦 | Knowledgeable, patient, uses gadgets as technical metaphors |
+| Antagonist (Role C, optional) | 胖虎 | Represents misunderstanding, or "noise" in the data |
+
+Custom characters: ask the user for role → name mappings (e.g., `Student:小明, Mentor:教授, Antagonist:Bug怪`).
+
+## Character Reference Sheet Style
+
+For Ohmsha style, use manga/anime style with:
+- Exaggerated expressions for educational clarity
+- Simple, distinctive silhouettes
+- Bright, saturated color palettes
+- Chibi/SD (super-deformed) variants for comedic reactions
+
+## Outline Spec Block
+
+Every ohmsha outline must start with:
+
+```markdown
+【漫画规格单】
+- Language: [Same as input content]
+- Style: Ohmsha (Manga Guide), Full Color
+- Layout: Vertical Scrolling Comic (竖版条漫)
+- Characters: [List character names and roles]
+- Character Reference: characters/characters.png
+- Page Limit: ≤20 pages
+```
+
+## Visual Metaphor Rules (Critical)
+
+**NEVER** create "talking heads" panels. Every technical concept must become:
+
+1. **A tangible gadget/prop** - Something characters can hold, use, demonstrate
+2. **An action scene** - Characters doing something that illustrates the concept
+3. **A visual environment** - Stepping into a metaphorical space
+
+### Examples
+
+| Concept | Bad (Talking Heads) | Good (Visual Metaphor) |
+|---------|---------------------|------------------------|
+| Word embeddings | Characters discussing vectors | 哆啦A梦拿出"词向量压缩机"，把书本压缩成彩色小球 |
+| Gradient descent | Explaining math formula | 大雄在山谷地形上滚球，寻找最低点 |
+| Neural network | Diagram on whiteboard | 角色走进由发光节点组成的网络迷宫 |
+
+## Page Title Convention
+
+Avoid AI-style "Title: Subtitle" format. Use narrative descriptions:
+
+- ❌ "Page 3: Introduction to Neural Networks"
+- ✓ "Page 3: 大雄被海量单词淹没，哆啦A梦拿出'词向量压缩机'"
+
+## Ending Requirements
+
+- NO generic endings ("What will you choose?", "Thanks for reading")
+- End with: Technical summary moment OR character achieving a small goal
+- Final panel: Sense of accomplishment, not open-ended question
+
+### Good Endings
+
+- Student successfully applies learned concept
+- Visual callback to opening problem, now solved
+- Mentor gives summary while student demonstrates understanding
+
+### Bad Endings
+
+- "What do you think?" open questions
+- "Thanks for reading this tutorial"
+- Cliffhanger without resolution
+
+## Layout Preference
+
+Ohmsha style typically uses:
+- `webtoon` (vertical scrolling) - Primary choice
+- `dense` - For information-heavy sections
+- `mixed` - For varied pacing
+
+Avoid `cinematic` and `splash` for educational content.
diff --git a/skills/creative/baoyu-comic/references/partial-workflows.md b/skills/creative/baoyu-comic/references/partial-workflows.md
new file mode 100644
index 0000000000..749b5ac7b2
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/partial-workflows.md
@@ -0,0 +1,106 @@
+# Partial Workflows
+
+Options to run specific parts of the workflow. Trigger these via natural language (e.g., "just the storyboard", "regenerate page 3").
+
+## Options Summary
+
+| Option | Steps Executed | Output |
+|--------|----------------|--------|
+| Storyboard only | 1-3 | `storyboard.md` + `characters/` |
+| Prompts only | 1-5 | + `prompts/*.md` |
+| Images only | 7-8 | + images |
+| Regenerate N | 7 (partial) | Specific page(s) |
+
+---
+
+## Storyboard-only
+
+Generate storyboard and characters without prompts or images.
+
+**User cue**: "storyboard only", "just the outline", "don't generate images yet".
+
+**Workflow**: Steps 1-3 only (stop after storyboard + characters)
+
+**Output**:
+- `analysis.md`
+- `storyboard.md`
+- `characters/characters.md`
+
+**Use case**: Review and edit the storyboard before generating images. Useful for:
+- Getting feedback on the narrative structure
+- Making manual adjustments to panel layouts
+- Defining custom characters
+
+---
+
+## Prompts-only
+
+Generate storyboard, characters, and prompts without images.
+
+**User cue**: "prompts only", "write the prompts but don't generate yet".
+
+**Workflow**: Steps 1-5 (generate prompts, skip images)
+
+**Output**:
+- `analysis.md`
+- `storyboard.md`
+- `characters/characters.md`
+- `prompts/*.md`
+
+**Use case**: Review and edit prompts before image generation. Useful for:
+- Fine-tuning image generation prompts
+- Ensuring visual consistency before committing to generation
+- Making style adjustments at the prompt level
+
+---
+
+## Images-only
+
+Generate images from existing prompts (starts at Step 7).
+
+**User cue**: "generate images from existing prompts", "run the images now" (pointing at an existing `comic/topic-slug/` directory).
+
+**Workflow**: Skip to Step 7, then 8
+
+**Prerequisites** (must exist in directory):
+- `prompts/` directory with page prompt files
+- `storyboard.md` with style information
+- `characters/characters.md` with character definitions
+
+**Output**:
+- `characters/characters.png` (if not exists)
+- `NN-{cover|page}-[slug].png` images
+
+**Use case**: Re-generate images after editing prompts. Useful for:
+- Recovering from failed image generation
+- Trying different image generation settings
+- Regenerating after manual prompt edits
+
+---
+
+## Regenerate
+
+Regenerate specific pages only.
+
+**User cue**: "regenerate page 3", "redo pages 2, 5, 8", "regenerate the cover".
+
+**Workflow**:
+1. Read existing prompts for specified pages
+2. Regenerate images only for those pages via `image_generate`
+3. Download each returned URL and overwrite the existing PNG
+
+**Prerequisites** (must exist):
+- `prompts/NN-{cover|page}-[slug].md` for specified pages
+- `characters/characters.md` (for agent-side consistency checks, if it was used originally)
+
+**Output**:
+- Regenerated `NN-{cover|page}-[slug].png` for specified pages
+
+**Use case**: Fix specific pages without regenerating entire comic. Useful for:
+- Fixing a single problematic page
+- Iterating on specific visuals
+- Regenerating pages after prompt edits
+
+**Page numbering**:
+- `0` = Cover page
+- `1-N` = Content pages
diff --git a/skills/creative/baoyu-comic/references/presets/concept-story.md b/skills/creative/baoyu-comic/references/presets/concept-story.md
new file mode 100644
index 0000000000..d1c71d6ed5
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/concept-story.md
@@ -0,0 +1,121 @@
+# concept-story
+
+概念故事预设 - Narrative comics that visualize abstract concepts through character-driven stories
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | manga |
+| Tone | warm |
+| Layout | standard (default) |
+
+Equivalent to: art=manga, tone=warm
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `concept-story` preset is selected, ALL rules below must be applied.
+
+### Concept Visualization System (CRITICAL)
+
+Each major abstract concept SHOULD have a recurring visual symbol/metaphor:
+
+| Concept Type | Visualization Approach |
+|-------------|----------------------|
+| Psychological need | Tangible object character holds or discovers (e.g., glowing energy ball = competence) |
+| Management principle | Environmental metaphor character navigates (e.g., ship wheel = autonomy) |
+| Growth/development | Living organic symbol that transforms (e.g., seed → flowering plant = relatedness) |
+| Abstract framework | Spatial structure characters can enter or observe |
+| Emotional state | Color/lighting shift in the scene atmosphere |
+
+**Unlike ohmsha**: Dialogue panels are allowed and expected. The goal is to COMBINE visual metaphors WITH dialogue, not replace dialogue entirely.
+
+**Pattern**: "Dialogue introduces idea" → "Visual metaphor illustrates it" → "Character reacts/applies it"
+
+### Visual Symbol Continuity
+
+Symbols must persist across the story:
+
+| Stage | Treatment |
+|-------|-----------|
+| Introduction | Symbol appears with soft glow effect when concept is first mentioned |
+| Recurrence | Same symbol reappears in background or character interaction when concept is referenced |
+| Resolution | ALL symbols gather in the final composition, showing integration of learned concepts |
+
+**Storyboard requirement**: Include a Symbol Mapping Table defining concept → visual symbol before panel breakdown.
+
+### Character Archetypes (Flexible)
+
+Create original characters based on content domain. No fixed defaults:
+
+| Role | Archetype | Visual Cues |
+|------|-----------|------------|
+| Protagonist | Learner/worker facing a challenge | Modern professional or student, relatable, starts with constrained posture |
+| Mentor | Experienced guide who teaches through experience | Slightly older, calm demeanor, warm color accents |
+| Catalyst | Person or event that triggers transformation | Can be a colleague, situation, challenge, or opportunity |
+
+**IMPORTANT**: Characters are created fresh each time based on the source content's domain (business, psychology, education, etc.). No default character set.
+
+### Narrative Arc Structure
+
+Enforce a five-stage growth arc:
+
+| Act | Structure | Visual Tone |
+|-----|-----------|------------|
+| Opening | Protagonist stuck in routine, faces frustration | Muted warm tones, tight framing, constrained compositions |
+| Inciting moment | Mentor appears or opportunity arrives | Brightness increases, panels open up |
+| Learning | Concepts introduced through visual metaphors | Rich warm palette, symbols introduced one by one |
+| Turning point | Protagonist applies knowledge, faces test | Contrast increases, dynamic compositions |
+| Transformation | Growth demonstrated, new understanding visible | Full warm palette, expansive composition, all symbols present |
+
+### Dialogue + Action Balance
+
+- Dialogue is encouraged and expected (unlike ohmsha's NO talking heads rule)
+- Every page should combine at least one dialogue panel with at least one visual/action panel
+- Avoid pure "lecture" pages where a character explains for 4+ panels straight
+- When a character explains a concept verbally, the NEXT panel should visualize it
+
+**Wrong approach**: Four consecutive panels of mentor lecturing at protagonist
+**Right approach**: Mentor introduces concept → visual metaphor panel → protagonist reacts → applies understanding
+
+### Scene Atmosphere Rules
+
+| Scene Type | Atmosphere |
+|------------|-----------|
+| Problem/frustration | Cool muted tones over warm base, tight framing, cluttered environment |
+| Mentoring moment | Golden hour lighting, open composition, warm indoor glow |
+| Concept visualization | Soft glow effects, clean simplified backgrounds, symbol spotlight |
+| Growth/transformation | Warm light expanding outward, character posture opening up |
+| Resolution | Full warm palette, spacious composition, all visual symbols visible |
+
+### Ending Requirements
+
+Final page MUST include:
+
+1. Protagonist demonstrating transformed understanding (not just being told)
+2. Visual callback showing contrast with opening state (e.g., wilted plant → thriving plant)
+3. All concept symbols visible together in the composition
+4. A forward-looking element suggesting ongoing growth (not a closed ending)
+
+### Page Title Convention
+
+Every page MUST have a narrative title:
+
+**Wrong**: "Chapter 3: Self-Determination Theory"
+**Right**: "The Day Xiao Ming Found His Own Engine"
+
+## Quality Markers
+
+- ✓ Each major concept has a recurring visual symbol
+- ✓ Dialogue and visual metaphors work together (not one replacing the other)
+- ✓ Clear growth arc from problem to transformation
+- ✓ Original characters suited to the content domain
+- ✓ Warm, professional atmosphere throughout
+- ✓ Visual symbols recur and accumulate through the story
+- ✓ Final page integrates all concept symbols with transformation callback
+
+## Best For
+
+Psychology concepts, business/management principles, motivation theory, personal development,
+self-help content, leadership frameworks, coaching narratives, soft skill education,
+abstract concept explanation through character-driven stories
diff --git a/skills/creative/baoyu-comic/references/presets/four-panel.md b/skills/creative/baoyu-comic/references/presets/four-panel.md
new file mode 100644
index 0000000000..8c52a4d044
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/four-panel.md
@@ -0,0 +1,107 @@
+# four-panel
+
+四格漫画预设 - Minimalist four-panel business allegory comics
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | minimalist |
+| Tone | neutral |
+| Layout | four-panel (default) |
+| Aspect | 4:3 (landscape) |
+
+Equivalent to: art=minimalist, tone=neutral, layout=four-panel, aspect=4:3
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `four-panel` preset is selected, ALL rules below must be applied.
+
+### 起承转合 Narrative Structure (CRITICAL)
+
+Every comic MUST follow the four-panel 起承转合 structure:
+
+| Panel | Role | Requirements |
+|-------|------|-------------|
+| 1 (起 Setup) | Introduce the situation | Show character(s) in a recognizable context. Establish the "normal" state or problem |
+| 2 (承 Development) | Build on the setup | Add complication, show an attempt, or introduce the concept. Stakes become clearer |
+| 3 (转 Turn) | The twist or key insight | **Most important panel.** Show the unexpected reversal, contrast, or "aha" moment that makes the allegory work |
+| 4 (合 Conclusion) | Resolution and takeaway | Show the result, consequence, or lesson learned. Can be a visual punchline or summary |
+
+**CRITICAL**: Do NOT deviate from exactly 4 panels. No 5th panel, no title panel, no footer panel within the image.
+
+### Single-Page Story Rule (CRITICAL)
+
+- The entire story is told in ONE page with exactly 4 panels
+- Page count: always 1 (plus optional cover)
+- No multi-page four-panel stories — if content requires more, create multiple separate four-panel comics
+- Storyboard structure: Cover (optional) + 1 page
+
+### Accent Color System
+
+- The image is primarily black-and-white line art
+- Use exactly 1-2 spot colors per strip (default: orange `#FF6B35`)
+- Rules:
+  - Key concept label or object: filled with accent color or outlined in accent
+  - Panel 3 (转 Turn) should have the strongest color emphasis
+  - Characters remain B&W — color is for concepts/objects/labels only
+  - Consistent accent color across all 4 panels (do not switch colors between panels)
+
+### Character Design Rules
+
+- Simplified stick-figure-like characters
+- Distinguish characters through simple props: ties, glasses, hats, briefcases, aprons
+- No detailed faces — dot eyes, line mouth at most
+- Characters should be generic enough to represent archetypes (the manager, the employee, the customer)
+- Maximum 2-3 characters per strip
+
+### Text in Panels
+
+- Chinese text for dialogue and labels (or match source language)
+- Keep text minimal — 1-2 short lines per panel maximum
+- Key concept terms can be highlighted with accent color background
+- No narrator boxes — dialogue and labels only
+- Speech bubbles: simple rectangles or ovals, thin black outline
+
+### Optional Title & Caption
+
+- A brief descriptive title above the 4 panels
+- An optional one-line caption/moral below the panels
+- These are part of the page composition, not separate panels
+
+### Character Archetypes (Flexible)
+
+Create simple stick-figure characters based on content. No fixed defaults:
+
+| Role | Archetype | Visual Cues |
+|------|-----------|------------|
+| Protagonist | Worker/employee facing a situation | Simple figure, minimal distinguishing feature (glasses, tie) |
+| Authority | Boss/manager/expert | Slightly larger figure, or prop like pointer/clipboard |
+| Object | The concept itself | Labeled object, icon, or highlighted text with accent color |
+
+### Prompt Template
+
+When generating image prompts for four-panel comics, include these keywords:
+
+> A minimalist, clean line art digital comic strip in a four-panel grid layout (2×2). The style is simplified cartoon illustration with clear black outlines and a minimal color palette of black, white, and specific spot [accent color] for key concepts.
+
+Each panel description should specify:
+- Panel position (Top Left / Top Right / Bottom Left / Bottom Right)
+- Character poses and gestures (simple, stick-figure style)
+- Dialogue text in Chinese (hand-drawn style)
+- Any accent-colored elements (concept labels, key objects)
+
+## Quality Markers
+
+- ✓ Exactly 4 panels in strict 2×2 grid
+- ✓ 起承转合 narrative arc clearly present
+- ✓ 90%+ black-and-white with strategic spot color
+- ✓ Simplified stick-figure characters
+- ✓ Key concept visually highlighted with accent color
+- ✓ Text is minimal and in Chinese (or source language)
+- ✓ Single complete story in one page
+- ✓ Panel 3 delivers a clear "turn" or insight
+
+## Best For
+
+Business allegory, management fables, short insights, workplace parables, concept contrasts, social media educational content, quick-read comics
diff --git a/skills/creative/baoyu-comic/references/presets/ohmsha.md b/skills/creative/baoyu-comic/references/presets/ohmsha.md
new file mode 100644
index 0000000000..8b6540ef39
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/ohmsha.md
@@ -0,0 +1,114 @@
+# ohmsha
+
+Ohmsha预设 - Educational manga with visual metaphors
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | manga |
+| Tone | neutral |
+| Layout | webtoon (default) |
+
+Equivalent to: art=manga, tone=neutral
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `ohmsha` preset is selected, ALL rules below must be applied.
+
+### Visual Metaphor Requirements (CRITICAL)
+
+Every technical concept MUST be visualized as a metaphor:
+
+| Concept Type | Visualization Approach |
+|-------------|----------------------|
+| Algorithm | Gadget/machine that demonstrates the process |
+| Data structure | Physical space characters can enter/explore |
+| Mathematical formula | Transformation visible in environment |
+| Abstract process | Tangible flow of particles/objects |
+
+**Wrong approach**: Character points at blackboard explaining
+**Right approach**: Character uses "Concept Visualizer" gadget, steps into metaphorical space
+
+### Visual Metaphor Examples
+
+| Concept | Wrong (Talking Head) | Right (Visual Metaphor) |
+|---------|---------------------|------------------------|
+| Attention mechanism | Character points at formula on blackboard | "Attention Flashlight" gadget illuminates key words in dark room |
+| Gradient descent | "The algorithm minimizes loss" | Character rides ball rolling down mountain valley |
+| Neural network | Diagram with arrows | Living network of glowing creatures passing messages |
+| Overfitting | "The model memorized the data" | Character wearing clothes that fit only one specific pose |
+
+### Character Roles (Required)
+
+**DEFAULT: Use Doraemon characters** unless user explicitly specifies custom characters.
+
+| Role | Default Character | Visual | Traits |
+|------|-------------------|--------|--------|
+| Student (Role A) | 大雄 (Nobita) | Boy, 10yo, round glasses, black hair, yellow shirt, navy shorts | Confused, asks basic but crucial questions, represents reader |
+| Mentor (Role B) | 哆啦A梦 (Doraemon) | Blue robot cat, white belly, 4D pocket, red nose, golden bell | Knowledgeable, patient, uses gadgets as technical metaphors |
+| Challenge (Role C) | 胖虎 (Gian) | Stocky boy, small eyes, orange shirt | Represents misunderstanding, or "noise" in the data |
+| Support (Role D) | 静香 (Shizuka) | Cute girl, black short hair, pink dress | Asks clarifying questions, provides alternative perspectives |
+
+**IMPORTANT**: These Doraemon characters ARE the default for ohmsha preset. Generate character definitions using these exact characters unless user requests otherwise.
+
+To use custom characters: ask the user to provide role → character mappings (e.g., `Student:小明, Mentor:教授`).
+
+### Page Title Convention
+
+Every page MUST have a narrative title (not section header):
+
+**Wrong**: "Chapter 1: Introduction to Transformers"
+**Right**: "The Day Nobita Couldn't Understand Anyone"
+
+### Gadget Reveal Pattern
+
+When introducing a concept:
+
+1. Student expresses confusion with visual indicator (？, spiral eyes)
+2. Mentor dramatically produces gadget with sparkle effects
+3. Gadget name announced in bold with explanation
+4. Demonstration begins - student enters metaphorical space
+
+### Ending Requirements
+
+Final page MUST include:
+
+1. Student demonstrating understanding (applying the concept)
+2. Callback to opening problem (now resolved)
+3. Mentor's satisfied expression
+4. Optional: hint at next topic
+
+### NO Talking Heads Rule
+
+**Critical**: Characters must DO things, not just explain.
+
+Every panel should show:
+- Action being performed
+- Metaphor being demonstrated
+- Character interaction with concept-space
+- NOT: two characters facing each other talking
+
+### Special Visual Elements
+
+| Element | Usage |
+|---------|-------|
+| Gadget reveals | Dramatic unveiling with sparkle effects |
+| Concept spaces | Rounded borders, glowing edges for "imagination mode" |
+| Information displays | Holographic UI style for technical details |
+| Aha moments | Radial lines, light burst effects |
+| Confusion | Spiral eyes, question marks floating above head |
+
+## Quality Markers
+
+- ✓ Every concept is a visual metaphor
+- ✓ Characters are DOING things, not just talking
+- ✓ Clear student/mentor dynamic
+- ✓ Gadgets and props drive the explanation
+- ✓ Expressive manga-style emotions
+- ✓ Information density through visual design, not text walls
+- ✓ Narrative page titles
+
+## Reference
+
+For complete guidelines, see `references/ohmsha-guide.md`
diff --git a/skills/creative/baoyu-comic/references/presets/shoujo.md b/skills/creative/baoyu-comic/references/presets/shoujo.md
new file mode 100644
index 0000000000..697887373d
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/shoujo.md
@@ -0,0 +1,116 @@
+# shoujo
+
+少女预设 - Classic shoujo manga with romantic aesthetics
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | manga |
+| Tone | romantic |
+| Layout | standard (default) |
+
+Equivalent to: art=manga, tone=romantic
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `shoujo` preset is selected, ALL rules below must be applied.
+
+### Decorative Elements (Required)
+
+Every emotional moment must include decorative elements:
+
+| Emotion | Required Decorations |
+|---------|---------------------|
+| Love | Floating hearts, sparkles, rose petals |
+| Longing | Feathers, bubbles, distant sparkles |
+| Joy | Flowers blooming, light bursts, stars |
+| Sadness | Falling petals, fading sparkles |
+| Shyness | Soft sparkles, floating bubbles |
+| Realization | Radiating lines with sparkles |
+
+### Eye Detail Requirements
+
+Eyes are critical in shoujo style:
+
+| Aspect | Treatment |
+|--------|-----------|
+| Size | Larger than standard manga (1.2x) |
+| Highlights | Multiple (3-5), placed for emotion |
+| Reflection | Scene reflection in emotional moments |
+| Sparkle | Built-in sparkle effects |
+| Tears | Crystalline, detailed teardrops |
+
+### Character Beauty Standards
+
+| Feature | Treatment |
+|---------|-----------|
+| Hair | Flowing, detailed strands, shine highlights |
+| Skin | Porcelain, soft blush on cheeks |
+| Lips | Soft, slightly glossy |
+| Hands | Elegant, expressive gestures |
+| Posture | Graceful, elegant poses |
+
+### Background Effects
+
+**Abstract backgrounds** for emotional moments:
+
+| Moment Type | Background |
+|-------------|-----------|
+| Love confession | Soft gradient + floating flowers |
+| Shock | Screen tone speed lines + sparkles |
+| Memory | Dreamy blur + scattered petals |
+| Realization | Radial lines + light burst |
+| Intimate | Soft focus + floating elements |
+
+### Panel Flow
+
+- Overlap panels for intimate moments
+- Break panel borders for emotional impact
+- Float decorative elements between panels
+- Use screen tone gradients for mood
+- Irregular panel shapes for drama
+
+### Emotional Beat Timing
+
+Slow down pacing for emotional impact:
+
+| Scene Type | Panel Treatment |
+|------------|-----------------|
+| Confession | Multiple small panels, then splash |
+| Eye contact | Close-up sequence |
+| Touch | Slow-motion panel breakdown |
+| Realization | Build-up panels then impact |
+
+### Color Palette Application
+
+| Scene Type | Palette |
+|------------|---------|
+| Romantic | Pink, lavender, rose gold |
+| Happy | Soft yellow, peach, sky blue |
+| Sad | Pale blue, silver, gray lavender |
+| Dramatic | Deep rose, purple, contrast |
+
+### Screen Tone Usage
+
+| Mood | Tone Pattern |
+|------|-------------|
+| Neutral | Clean, minimal |
+| Romantic | Soft gradient overlays |
+| Dramatic | Heavy contrast tones |
+| Dreamy | Soft dot patterns |
+
+## Quality Markers
+
+- ✓ Large, sparkling detailed eyes
+- ✓ Decorative elements in emotional moments
+- ✓ Flowing, beautiful character designs
+- ✓ Soft, pastel color palette
+- ✓ Elegant panel compositions
+- ✓ Screen tone mood effects
+- ✓ Romantic atmosphere throughout
+- ✓ Beautiful, expressive poses
+
+## Best For
+
+Romance stories, coming-of-age, friendship narratives, school life, emotional drama, love stories
diff --git a/skills/creative/baoyu-comic/references/presets/wuxia.md b/skills/creative/baoyu-comic/references/presets/wuxia.md
new file mode 100644
index 0000000000..15b911d622
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/wuxia.md
@@ -0,0 +1,110 @@
+# wuxia
+
+武侠预设 - Hong Kong martial arts comic style
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | ink-brush |
+| Tone | action |
+| Layout | splash (default) |
+
+Equivalent to: art=ink-brush, tone=action
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `wuxia` preset is selected, ALL rules below must be applied.
+
+### Qi/Energy Effects (Required)
+
+Martial arts power must be visible through qi effects:
+
+| Effect Type | Visual Treatment |
+|-------------|-----------------|
+| Internal qi | Glowing aura around character |
+| External qi | Visible energy projection |
+| Qi clash | Radiating impact waves |
+| Qi absorption | Flowing particles toward character |
+| Hidden power | Subtle glow in eyes/fists |
+
+### Energy Colors
+
+| Qi Type | Color |
+|---------|-------|
+| Righteous | Blue (#4299E1), Gold (#FFD700) |
+| Fierce | Red (#DC2626), Orange (#EA580C) |
+| Evil | Purple (#7C3AED), Green (#16A34A) |
+| Pure | White, Silver |
+| Ancient | Gold with particles |
+
+### Combat Visual Language
+
+**Impact moments** must include:
+
+1. Speed lines radiating from impact point
+2. Flying debris (stone, wood, cloth)
+3. Shockwave rings
+4. Dust/energy clouds
+5. Hair and clothing blown back
+
+### Movement Depiction
+
+| Speed Level | Visual Treatment |
+|-------------|-----------------|
+| Normal | Standard pose |
+| Fast | Motion blur, speed lines |
+| Lightning | Afterimages, multiple positions |
+| Teleport | Fade effect, particle trail |
+
+### Environmental Integration
+
+Backgrounds must support action:
+
+| Environment | Combat Enhancement |
+|-------------|-------------------|
+| Mountains | Crumbling peaks from impacts |
+| Forest | Exploding trees, flying leaves |
+| Water | Dramatic splashes, walking on water |
+| Temple | Breaking pillars, flying tiles |
+| Cliff | Dramatic falls, wind effects |
+
+### Character Pose Guidelines
+
+- Dynamic warrior stances with weight distribution
+- Flowing robes and hair showing movement
+- Muscle tension visible in action
+- Feet planted or in dynamic motion
+- Traditional martial arts postures
+
+### Weapon Effects
+
+| Weapon | Visual Treatment |
+|--------|-----------------|
+| Sword | Trailing light arc, blade glow |
+| Palm | Qi projection, wind effect |
+| Staff | Spinning blur, impact ripples |
+| Whip | Flowing energy trail |
+
+### Atmospheric Elements
+
+Always include:
+- Floating particles (leaves, petals, dust)
+- Ink wash mist for depth
+- Wind direction indicators
+- Dramatic sky/weather when appropriate
+
+## Quality Markers
+
+- ✓ Dynamic action poses with sense of motion
+- ✓ Ink brush aesthetic in line work
+- ✓ Visible qi/energy effects
+- ✓ High contrast dramatic lighting
+- ✓ Atmospheric backgrounds with Chinese elements
+- ✓ Flowing fabric and hair movement
+- ✓ Impactful combat moments
+- ✓ Speed lines and impact effects
+
+## Best For
+
+Martial arts stories, Chinese historical fiction, wuxia/xianxia adaptations, action-heavy narratives
diff --git a/skills/creative/baoyu-comic/references/storyboard-template.md b/skills/creative/baoyu-comic/references/storyboard-template.md
new file mode 100644
index 0000000000..790ceb0297
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/storyboard-template.md
@@ -0,0 +1,143 @@
+# Storyboard Template
+
+## Storyboard Document Format
+
+```markdown
+---
+title: "[Comic Title]"
+topic: "[topic description]"
+time_span: "[e.g., 1912-1954]"
+narrative_approach: "[chronological/thematic/character-focused]"
+recommended_style: "[style name]"
+recommended_layout: "[layout name or varies]"
+aspect_ratio: "3:4"    # 3:4 (portrait), 4:3 (landscape), 16:9 (widescreen)
+language: "[zh/en/ja/etc.]"
+page_count: [N]
+generated: "YYYY-MM-DD HH:mm"
+---
+
+# [Comic Title] - Knowledge Comic Storyboard
+
+**Character Reference**: characters/characters.png
+
+---
+
+## Cover
+
+**Filename**: 00-cover-[slug].png
+**Core Message**: [one-liner]
+
+**Visual Design**:
+- Title typography style
+- Main visual composition
+- Color scheme
+- Subtitle / time span notation
+
+**Visual Prompt**:
+[Detailed image generation prompt]
+
+---
+
+## Page 1 / N
+
+**Filename**: 01-page-[slug].png
+**Layout**: [standard/cinematic/dense/splash/mixed]
+**Narrative Layer**: [Main narrative / Narrator layer / Mixed]
+**Core Message**: [What this page conveys]
+
+### Panel Layout
+
+**Panel Count**: X
+**Layout Type**: [grid/irregular/splash]
+
+#### Panel 1 (Size: 1/3 page, Position: Top)
+
+**Scene**: [Time, location]
+**Image Description**:
+- Camera angle: [bird's eye / low angle / eye level / close-up / wide shot]
+- Characters: [pose, expression, action]
+- Environment: [scene details, period markers]
+- Lighting: [atmosphere description]
+- Color tone: [palette reference]
+
+**Text Elements**:
+- Dialogue bubble (oval): "Character line"
+- Narrator box (rectangular): 「Narrator commentary」
+- Caption bar: [Background info text]
+
+#### Panel 2...
+
+**Page Hook**: [Cliffhanger or transition at page end]
+
+**Visual Prompt**:
+[Full page image generation prompt]
+
+---
+
+## Page 2 / N
+...
+```
+
+## Cover Design Principles
+
+- Academic gravitas with visual appeal
+- Title typography reflecting knowledge/science theme
+- Composition hinting at core theme (character silhouette, iconic symbol, concept diagram)
+- Subtitle or time span for epic scope
+
+## Panel Composition Guidelines
+
+| Panel Type | Recommended Count | Usage |
+|-----------|-------------------|-------|
+| Main narrative | 3-5 per page | Story progression |
+| Concept diagram | 1-2 per page | Visualize abstractions |
+| Narrator panel | 0-1 per page | Commentary, transition |
+| Splash (full/half) | Occasional | Major moments |
+
+## Panel Size Reference
+
+- **Full page (Splash)**: Major moments, key breakthroughs
+- **Half page**: Important scenes, turning points
+- **1/3 page**: Standard narrative panels
+- **1/4 or smaller**: Quick progression, sequential action
+
+## Concept Visualization Techniques
+
+Transform abstract concepts into concrete visuals:
+
+| Abstract Concept | Visual Approach |
+|-----------------|-----------------|
+| Neural network | Glowing nodes with connecting lines |
+| Gradient descent | Ball rolling down valley terrain |
+| Data flow | Luminous particles flowing through pipes |
+| Algorithm iteration | Ascending spiral staircase |
+| Breakthrough moment | Shattering barrier, piercing light |
+| Logical proof | Building blocks assembling |
+| Uncertainty | Forking paths, fog, multiple shadows |
+
+## Text Element Design
+
+| Text Type | Style | Usage |
+|-----------|-------|-------|
+| Character dialogue | Oval speech bubble | Main narrative speech |
+| Narrator commentary | Rectangular box | Explanation, commentary |
+| Caption bar | Edge-mounted rectangle | Time, location info |
+| Thought bubble | Cloud shape | Character inner monologue |
+| Term label | Bold / special color | First appearance of technical terms |
+
+## Prompt Structure for Consistency
+
+Each page prompt should include character reference:
+
+```
+[CHARACTER REFERENCE]
+(Key details from characters.md for characters in this page)
+
+[PAGE CONTENT]
+(Specific scene, panel layout, and visual elements)
+
+[CONSISTENCY REMINDER]
+Maintain exact character appearances as defined in character reference.
+- [Character A]: [key identifying features]
+- [Character B]: [key identifying features]
+```
diff --git a/skills/creative/baoyu-comic/references/tones/action.md b/skills/creative/baoyu-comic/references/tones/action.md
new file mode 100644
index 0000000000..f9c6d954af
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/action.md
@@ -0,0 +1,110 @@
+# action
+
+动作基调 - Speed, impact, power
+
+## Overview
+
+High-impact action atmosphere with dynamic movement, combat effects, and powerful visual energy. Creates visceral, exciting sequences.
+
+## Mood Characteristics
+
+- Speed and motion
+- Power and impact
+- Combat intensity
+- Physical energy
+- Visceral excitement
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | High contrast |
+| Contrast | Maximum |
+| Temperature | Variable per effect |
+| Brightness | Dynamic range |
+
+## Action Effects
+
+**Combat/motion effects** (apply liberally):
+
+| Effect | Usage |
+|--------|-------|
+| Speed lines | Motion, velocity |
+| Impact bursts | Hits, collisions |
+| Shockwaves | Powerful impacts |
+| Flying debris | Environmental destruction |
+| Dust clouds | Ground impacts |
+| Motion blur | Fast movement |
+| Afterimages | Super speed |
+
+## Special Effects
+
+| Effect Type | Visual Approach |
+|------------|-----------------|
+| Energy attacks | Glowing, radiating |
+| Physical impacts | Radiating lines, debris |
+| Movement | Speed lines, blur |
+| Atmosphere | Flying particles, wind |
+
+## Effect Colors
+
+| Effect | Color | Hex |
+|--------|-------|-----|
+| Energy glow | Blue | #4299E1 |
+| Fire/power | Gold | #FFD700 |
+| Impact | White burst | #FFFFFF |
+| Blood/intensity | Deep red | #8B0000 |
+
+## Lighting
+
+- Dynamic, shifting
+- Impact flashes
+- Energy glow sources
+- Rim lighting on figures
+- Dramatic contrast
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Determination | Fierce focus |
+| Rage | Intense, powerful |
+| Triumph | Victorious pose |
+| Struggle | Strained effort |
+
+## Composition
+
+- Dynamic angles
+- Extreme perspectives
+- Panel-breaking layouts
+- Asymmetric designs
+- Impact-focused framing
+
+## Pose Guidelines
+
+- Dynamic warrior poses
+- Weight and momentum visible
+- Muscle tension shown
+- Flow of movement captured
+- Impact points emphasized
+
+## Best For
+
+- Martial arts combat
+- Action sequences
+- Sports moments
+- Physical challenges
+- Battle scenes
+- Climactic confrontations
+
+## Combination Notes
+
+Works especially well with:
+- ink-brush: wuxia combat
+- manga: shonen battles
+
+Avoid with:
+- chalk: style mismatch
+- ligne-claire: style mismatch (too static)
diff --git a/skills/creative/baoyu-comic/references/tones/dramatic.md b/skills/creative/baoyu-comic/references/tones/dramatic.md
new file mode 100644
index 0000000000..459cc2aecd
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/dramatic.md
@@ -0,0 +1,95 @@
+# dramatic
+
+戏剧基调 - High contrast, intense, powerful moments
+
+## Overview
+
+High-impact dramatic tone for pivotal moments, conflicts, and breakthroughs. Uses strong contrast and intense compositions to create emotional power.
+
+## Mood Characteristics
+
+- Tension and intensity
+- Pivotal moments
+- Conflict and resolution
+- Breakthrough discoveries
+- Emotional climaxes
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | High (vibrant or deep) |
+| Contrast | Maximum |
+| Temperature | Varies for effect |
+| Brightness | Strong highlights, deep shadows |
+
+## Contrast Approach
+
+- Sharp light/dark divisions
+- Minimal mid-tones
+- Stark compositions
+- Silhouette potential
+- Rim lighting effects
+
+## Accent Colors
+
+- Deep navy (#1A365D)
+- Crimson (#9B2C2C)
+- Stark white
+- Heavy blacks
+- Limited palette per scene
+
+## Lighting
+
+- Dramatic single-source
+- High contrast shadows
+- Rim lighting on characters
+- Spotlight effects
+- Chiaroscuro influence
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Anger | Intense, defined features |
+| Determination | Strong, focused gaze |
+| Shock | Wide eyes, stark lighting |
+| Triumph | Powerful, elevated pose |
+
+## Composition
+
+- Angular, dynamic layouts
+- Dramatic camera angles
+- Low/high viewpoints
+- Diagonal compositions
+- Negative space for impact
+
+## Visual Elements
+
+- Speed lines for tension
+- Impact effects
+- Dramatic backgrounds (storms, fire)
+- Silhouettes
+- Light burst effects
+- Environmental drama
+
+## Best For
+
+- Pivotal discoveries
+- Conflict scenes
+- Climactic moments
+- Breakthrough realizations
+- Emotional confrontations
+- Historical turning points
+
+## Combination Notes
+
+Works especially well with:
+- realistic: powerful drama
+- ink-brush: martial arts climax
+- ligne-claire: historical pivots
+- manga: shonen battles
+
+Avoid with: chalk (style mismatch)
diff --git a/skills/creative/baoyu-comic/references/tones/energetic.md b/skills/creative/baoyu-comic/references/tones/energetic.md
new file mode 100644
index 0000000000..257e8c6e33
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/energetic.md
@@ -0,0 +1,105 @@
+# energetic
+
+活力基调 - Bright, dynamic, exciting
+
+## Overview
+
+High-energy atmosphere for exciting, discovery-filled content. Bright colors, dynamic compositions, and movement create engaging visuals for younger audiences.
+
+## Mood Characteristics
+
+- Excitement and wonder
+- Discovery and learning
+- Energy and enthusiasm
+- Movement and action
+- Youthful spirit
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | High (vibrant) |
+| Contrast | Medium-high |
+| Temperature | Variable, punchy |
+| Brightness | Bright, clean |
+
+## Color Palette
+
+Shift toward vibrant tones:
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary Red | Bright red | #F56565 |
+| Primary Yellow | Sunny yellow | #F6E05E |
+| Primary Blue | Sky blue | #63B3ED |
+| Accent 1 | Magenta | #D53F8C |
+| Accent 2 | Lime green | #68D391 |
+| Background | Clean white | #FFFFFF |
+| Background Alt | Bright pastels | Various |
+
+## Lighting
+
+- Bright, clear lighting
+- Clean shadows
+- High energy
+- Spotlight effects for emphasis
+- Dynamic light sources
+
+## Dynamic Elements
+
+**Energy effects** (add to compositions):
+
+| Element | Usage |
+|---------|-------|
+| Speed lines | Motion, excitement |
+| Sparkles | Discoveries |
+| Burst effects | Aha moments |
+| Motion blur | Fast action |
+| Star bursts | Emphasis |
+| Sweat drops | Effort/surprise |
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Excitement | Wide eyes, big smile |
+| Surprise | Dramatic reaction |
+| Determination | Intense focus |
+| Wonder | Sparkling eyes |
+
+## Composition
+
+- Dynamic angles
+- Action-oriented layouts
+- Movement emphasis
+- Clean, punchy designs
+- Energy flows
+
+## Visual Style
+
+- Expressive, animated characters
+- Wide eyes, big reactions
+- Dynamic poses
+- Motion and action focus
+- Simplified backgrounds for energy
+
+## Best For
+
+- Science explanations
+- "Aha" moments
+- Young audience content
+- Discovery narratives
+- Learning adventures
+- Action tutorials
+
+## Combination Notes
+
+Works especially well with:
+- manga: shonen energy
+- chalk: fun education
+
+Avoid with:
+- realistic: style mismatch
+- ink-brush: style mismatch
diff --git a/skills/creative/baoyu-comic/references/tones/neutral.md b/skills/creative/baoyu-comic/references/tones/neutral.md
new file mode 100644
index 0000000000..db1f7a3c50
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/neutral.md
@@ -0,0 +1,63 @@
+# neutral
+
+中性基调 - Balanced, rational, educational
+
+## Overview
+
+Default balanced tone suitable for educational and informative content. Neither overly emotional nor cold - creates accessible, professional atmosphere.
+
+## Mood Characteristics
+
+- Balanced emotional register
+- Clear, rational presentation
+- Educational focus
+- Professional but approachable
+- Objective storytelling
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Standard (no shift) |
+| Contrast | Balanced |
+| Temperature | Neutral |
+| Brightness | Slightly bright |
+
+## Lighting
+
+- Even, clear lighting
+- Minimal dramatic shadows
+- Consistent across panels
+- Natural light sources
+- No extreme contrast
+
+## Emotional Range
+
+| Emotion | Expression Level |
+|---------|-----------------|
+| Joy | Moderate smile |
+| Concern | Thoughtful expression |
+| Surprise | Mild widening of eyes |
+| Frustration | Slight frown |
+
+## Composition
+
+- Balanced panel layouts
+- Clear focal points
+- Readable hierarchies
+- Standard framing
+- Functional compositions
+
+## Best For
+
+- Educational content
+- Technical tutorials
+- Informative biographies
+- Documentary style
+- Professional topics
+
+## Usage Notes
+
+Neutral is the default tone. Combine with any art style for baseline professional output. Most versatile tone option.
diff --git a/skills/creative/baoyu-comic/references/tones/romantic.md b/skills/creative/baoyu-comic/references/tones/romantic.md
new file mode 100644
index 0000000000..396439d9ec
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/romantic.md
@@ -0,0 +1,100 @@
+# romantic
+
+浪漫基调 - Soft, beautiful, emotionally delicate
+
+## Overview
+
+Soft, dreamy atmosphere for romantic and emotionally delicate content. Features decorative elements, sparkles, and beautiful compositions that emphasize feeling and beauty.
+
+## Mood Characteristics
+
+- Romance and love
+- Beauty and elegance
+- Emotional delicacy
+- Dreams and hopes
+- Youth and idealism
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Soft pastels |
+| Contrast | Low, gentle |
+| Temperature | Slightly warm pink |
+| Brightness | Soft, glowing |
+
+## Color Palette
+
+Shift toward romantic tones:
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Soft pink | #FFB6C1 |
+| Secondary | Lavender | #E6E6FA |
+| Accent | Rose | #FF69B4 |
+| Highlight | Pearl white | #FFFAF0 |
+| Gold | Gold sparkle | #FFD700 |
+| Skin | Porcelain | #FFF5EE |
+| Blush | Soft blush | #FFE4E1 |
+| Background | Soft cream | #FFF8DC |
+
+## Lighting
+
+- Soft, diffused light
+- Glowing effects
+- Backlighting halos
+- Sparkle highlights
+- Dreamy atmospheres
+
+## Decorative Elements
+
+**Essential decorations** (add to compositions):
+
+| Element | Usage |
+|---------|-------|
+| Flower petals | Floating, framing |
+| Sparkles | Emotional highlights |
+| Bubbles | Dreamy moments |
+| Feathers | Gentle floating |
+| Stars | Night scenes, wonder |
+| Hearts | Love emphasis |
+| Light halos | Character highlights |
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Love | Soft gaze, blush |
+| Longing | Distant, beautiful sadness |
+| Joy | Radiant smile, sparkles |
+| Shyness | Downcast eyes, blush |
+
+## Composition
+
+- Elegant, flowing layouts
+- Soft focus backgrounds
+- Characters framed by decorations
+- Beautiful angles (3/4 profiles)
+- Screen tone gradients
+
+## Best For
+
+- Romance stories
+- Coming-of-age
+- Friendship narratives
+- Emotional drama
+- School life
+- Beautiful moments
+
+## Combination Notes
+
+Works especially well with:
+- manga: classic shoujo style
+
+Avoid with:
+- realistic: style mismatch
+- ink-brush: style mismatch
+- ligne-claire: style mismatch
+- chalk: style mismatch
diff --git a/skills/creative/baoyu-comic/references/tones/vintage.md b/skills/creative/baoyu-comic/references/tones/vintage.md
new file mode 100644
index 0000000000..32250024bf
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/vintage.md
@@ -0,0 +1,104 @@
+# vintage
+
+复古基调 - Historical, aged, period authenticity
+
+## Overview
+
+Historical atmosphere with aged paper effects and period-appropriate aesthetics. Creates sense of time, authenticity, and historical distance.
+
+## Mood Characteristics
+
+- Historical authenticity
+- Period distance
+- Archival quality
+- Time and memory
+- Classical elegance
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Reduced, muted |
+| Contrast | Medium, aged |
+| Temperature | Sepia shift |
+| Brightness | Slightly faded |
+
+## Color Palette
+
+Shift toward aged tones:
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Sepia brown | #8B7355 |
+| Background | Aged paper | #F5E6D3 |
+| Accent 1 | Faded teal | #6B8E8E |
+| Accent 2 | Muted burgundy | #7B3F3F |
+| Ink | Aged black | #3D3D3D |
+| Yellowed | Paper yellow | #F5DEB3 |
+
+## Visual Effects
+
+**Aging effects** (apply subtly):
+
+| Effect | Application |
+|--------|-------------|
+| Paper aging | Background texture |
+| Faded edges | Vignette effect |
+| Dust specks | Subtle overlay |
+| Yellowing | Color shift |
+| Wear marks | Corner/edge details |
+
+## Period Elements
+
+- Historical typography
+- Period-accurate details
+- Archival presentation
+- Classical compositions
+- Formal framing
+
+## Lighting
+
+- Natural, period-appropriate
+- Oil lamp/candle warmth
+- Soft, diffused light
+- Indoor historical lighting
+- Photographic quality
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Dignity | Formal, composed |
+| Sorrow | Restrained, elegant |
+| Pride | Classical posture |
+| Wisdom | Aged grace |
+
+## Composition
+
+- Classical framing
+- Formal compositions
+- Period-appropriate staging
+- Documentary style
+- Historical accuracy priority
+
+## Best For
+
+- Pre-1950s stories
+- Classical science history
+- Historical biographies
+- Period pieces
+- Documentary comics
+- Archival narratives
+
+## Combination Notes
+
+Works especially well with:
+- realistic: period drama
+- ligne-claire: historical adventure
+- ink-brush: classical Asian stories
+
+Avoid with:
+- manga: style mismatch (too modern)
+- chalk: style mismatch (modern educational)
diff --git a/skills/creative/baoyu-comic/references/tones/warm.md b/skills/creative/baoyu-comic/references/tones/warm.md
new file mode 100644
index 0000000000..11b24aeefc
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/warm.md
@@ -0,0 +1,94 @@
+# warm
+
+温馨基调 - Nostalgic, personal, comforting
+
+## Overview
+
+Warm, inviting atmosphere for personal stories and nostalgic content. Creates emotional connection through cozy aesthetics and comforting visuals.
+
+## Mood Characteristics
+
+- Nostalgic feeling
+- Personal, intimate atmosphere
+- Comforting and healing
+- Memory and reflection
+- Gentle emotional warmth
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Slightly reduced |
+| Contrast | Softer |
+| Temperature | Warm shift (+15%) |
+| Brightness | Soft, golden |
+
+## Color Temperature
+
+Shift palette toward warm tones:
+
+| Original | Warm Shift |
+|----------|-----------|
+| Cool blue | Soft teal |
+| Pure white | Cream |
+| Gray | Warm gray |
+| Black | Soft charcoal |
+
+## Accent Colors
+
+- Golden yellow (#D69E2E)
+- Soft orange (#DD6B20)
+- Warm brown (#8B6F47)
+- Sunset tones
+
+## Lighting
+
+- Golden hour lighting
+- Soft, diffused light
+- Warm indoor glow
+- Candle/lamp warmth
+- Gentle shadows
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Joy | Genuine warm smile |
+| Sadness | Gentle melancholy |
+| Love | Soft, tender expressions |
+| Memory | Distant, reflective gaze |
+
+## Composition
+
+- Intimate framing
+- Cozy environments
+- Soft focus backgrounds
+- Welcoming spaces
+- Personal moments highlighted
+
+## Visual Elements
+
+- Warm light rays
+- Soft edges
+- Nostalgic props (old photos, keepsakes)
+- Comfort objects (blankets, tea cups)
+- Nature elements (autumn leaves, sunset)
+
+## Best For
+
+- Personal stories
+- Childhood memories
+- Mentorship narratives
+- Family histories
+- Gentle biographies
+- Healing journeys
+
+## Combination Notes
+
+Works especially well with:
+- ligne-claire: nostalgic European comics
+- realistic: touching human stories
+- manga: slice-of-life warmth
+- chalk: nostalgic education
diff --git a/skills/creative/baoyu-comic/references/workflow.md b/skills/creative/baoyu-comic/references/workflow.md
new file mode 100644
index 0000000000..f98109374a
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/workflow.md
@@ -0,0 +1,401 @@
+# Complete Workflow
+
+Full workflow for generating knowledge comics.
+
+## Progress Checklist
+
+Copy and track progress:
+
+```
+Comic Progress:
+- [ ] Step 1: Setup & Analyze
+  - [ ] 1.1 Analyze content
+  - [ ] 1.2 Check existing ⚠️ REQUIRED
+- [ ] Step 2: Confirmation - Style & options ⚠️ REQUIRED
+- [ ] Step 3: Generate storyboard + characters
+- [ ] Step 4: Review outline (conditional)
+- [ ] Step 5: Generate prompts
+- [ ] Step 6: Review prompts (conditional)
+- [ ] Step 7: Generate images
+  - [ ] 7.1 Character sheet (if needed)
+  - [ ] 7.2 Generate pages
+- [ ] Step 8: Completion report
+```
+
+## Flow Diagram
+
+```
+Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Storyboard → [Review Outline?] → Prompts → [Review Prompts?] → Images → Complete
+```
+
+---
+
+## Step 1: Setup & Analyze
+
+### 1.1 Analyze Content → `analysis.md`
+
+Read source content, save it if needed, and perform deep analysis.
+
+**Actions**:
+1. **Save source content** (if not already a file):
+   - If user provides a file path: use as-is
+   - If user pastes content: save to `source-{slug}.md` in the target directory using `write_file`, where `{slug}` is the kebab-case topic slug used for the output directory
+   - **Backup rule**: If `source-{slug}.md` already exists, rename it to `source-{slug}-backup-YYYYMMDD-HHMMSS.md` before writing
+2. Read source content
+3. **Deep analysis** following `analysis-framework.md`:
+   - Target audience identification
+   - Value proposition for readers
+   - Core themes and narrative potential
+   - Key figures and their story arcs
+4. Detect source language
+5. **Determine language**:
+   - If user specified a language → use it
+   - Else → use detected source language or user's conversation language
+6. Determine recommended page count:
+   - Short story: 5-8 pages
+   - Medium complexity: 9-15 pages
+   - Full biography: 16-25 pages
+7. Analyze content signals for art/tone/layout recommendations
+8. **Save to `analysis.md`** using `write_file`
+
+**analysis.md Format**: YAML front matter (title, topic, time_span, source_language, user_language, aspect_ratio, recommended_page_count, recommended_art, recommended_tone) + sections for Target Audience, Value Proposition, Core Themes, Key Figures & Story Arcs, Content Signals, Recommended Approaches. See `analysis-framework.md` for full template.
+
+### 1.2 Check Existing Content ⚠️ REQUIRED
+
+**MUST execute before proceeding to Step 2.**
+
+Check if the output directory exists (e.g., via `test -d "comic/{topic-slug}"`).
+
+**If directory exists**, use `clarify`:
+
+```
+question: "Existing content found at comic/{topic-slug}. How to proceed?"
+options:
+  - "Regenerate storyboard — Keep images, regenerate storyboard and characters only"
+  - "Regenerate images — Keep storyboard, regenerate images only"
+  - "Backup and regenerate — Backup to {slug}-backup-{timestamp}, then regenerate all"
+  - "Exit — Cancel, keep existing content unchanged"
+```
+
+Save result and handle accordingly:
+- **Regenerate storyboard**: Skip to Step 3, preserve `prompts/` and images
+- **Regenerate images**: Skip to Step 7, use existing prompts
+- **Backup and regenerate**: Move directory, start fresh from Step 2
+- **Exit**: End workflow immediately
+
+---
+
+## Step 2: Confirmation - Style & Options ⚠️
+
+**Purpose**: Select visual style + decide whether to review outline before generation. **Do NOT skip.**
+
+**Display summary first**:
+- Content type + topic identified
+- Key figures extracted
+- Time span detected
+- Recommended page count
+- Language (detected or user-specified)
+- **Recommended style**: [art] + [tone] (based on content signals)
+
+**Use `clarify` one question at a time**, in priority order:
+
+> **Timeout handling (CRITICAL)**: if `clarify` returns `"The user did not provide a response within the time limit. Use your best judgement..."`, that is a per-question default, NOT blanket consent. Continue to the next question in the sequence — do not bail out of Step 2. Then, in your next user-visible message, explicitly surface every default that was taken (e.g. `"Defaulted style → ohmsha, narrative focus → concept explanation, audience → developers (clarify timed out on all three). Say the word to redirect."`). An unreported default is indistinguishable to the user from "the agent never asked."
+
+### Question 1: Visual Style
+
+If a preset is recommended (see `auto-selection.md`), show it first:
+
+```
+question: "Which visual style for this comic?"
+options:
+  - "[preset name] preset (Recommended) — [preset description] with special rules"
+  - "[recommended art] + [recommended tone] (Recommended) — Best match for your content"
+  - "ligne-claire + neutral — Classic educational, Logicomix style"
+  - "ohmsha preset — Educational manga with visual metaphors, gadgets, NO talking heads"
+  - "Custom — Specify your own art + tone or preset"
+```
+
+**Preset vs Art+Tone**: Presets include special rules beyond art+tone. `ohmsha` = manga + neutral + visual metaphor rules + character roles + NO talking heads. Plain `manga + neutral` does NOT include these rules.
+
+### Question 2: Narrative Focus
+
+```
+question: "What should the comic emphasize? (Pick the primary focus; mention others in a follow-up if needed)"
+options:
+  - "Biography/life story — Follow a person's journey through key life events"
+  - "Concept explanation — Break down complex ideas visually"
+  - "Historical event — Dramatize important historical moments"
+  - "Tutorial/how-to — Step-by-step educational guide"
+```
+
+### Question 3: Target Audience
+
+```
+question: "Who is the primary reader?"
+options:
+  - "General readers — Broad appeal, accessible content"
+  - "Students/learners — Educational focus, clear explanations"
+  - "Industry professionals — Technical depth, domain knowledge"
+  - "Children/young readers — Simplified language, engaging visuals"
+```
+
+### Question 4: Outline Review
+
+```
+question: "Do you want to review the outline before image generation?"
+options:
+  - "Yes, let me review (Recommended) — Review storyboard and characters before generating images"
+  - "No, generate directly — Skip outline review, start generating immediately"
+```
+
+### Question 5: Prompt Review
+
+```
+question: "Review prompts before generating images?"
+options:
+  - "Yes, review prompts (Recommended) — Review image generation prompts before generating"
+  - "No, skip prompt review — Proceed directly to image generation"
+```
+
+**After responses**:
+1. Update `analysis.md` with user preferences
+2. **Store `skip_outline_review`** flag based on Question 4 response
+3. **Store `skip_prompt_review`** flag based on Question 5 response
+4. → Step 3
+
+---
+
+## Step 3: Generate Storyboard + Characters
+
+Create storyboard and character definitions using the confirmed style from Step 2.
+
+**Loading Style References**:
+- Art style: `art-styles/{art}.md`
+- Tone: `tones/{tone}.md`
+- If preset (ohmsha/wuxia/shoujo/concept-story/four-panel): also load `presets/{preset}.md`
+
+**Generate**:
+
+1. **Storyboard** (`storyboard.md`):
+   - YAML front matter with art_style, tone, layout, aspect_ratio
+   - Cover design
+   - Each page: layout, panel breakdown, visual prompts
+   - **Written in user's preferred language** (from Step 1)
+   - Reference: `storyboard-template.md`
+   - **If using preset**: Load and apply preset rules from `presets/`
+
+2. **Character definitions** (`characters/characters.md`):
+   - Visual specs matching the art style (in user's preferred language)
+   - Include Reference Sheet Prompt for later image generation
+   - Reference: `character-template.md`
+   - **If using ohmsha preset**: Use default Doraemon characters (see below)
+
+**Ohmsha Default Characters** (use these unless user specifies custom characters):
+
+| Role | Character | Visual Description |
+|------|-----------|-------------------|
+| Student | 大雄 (Nobita) | Japanese boy, 10yo, round glasses, black hair parted in middle, yellow shirt, navy shorts |
+| Mentor | 哆啦 A 梦 (Doraemon) | Round blue robot cat, big white eyes, red nose, whiskers, white belly with 4D pocket, golden bell, no ears |
+| Challenge | 胖虎 (Gian) | Stocky boy, rough features, small eyes, orange shirt |
+| Support | 静香 (Shizuka) | Cute girl, black short hair, pink dress, gentle expression |
+
+These are the canonical ohmsha-style characters. Do NOT create custom characters for ohmsha unless explicitly requested.
+
+**After generation**:
+- If `skip_outline_review` is true → Skip Step 4, go directly to Step 5
+- If `skip_outline_review` is false → Continue to Step 4
+
+---
+
+## Step 4: Review Outline (Conditional)
+
+**Skip this step** if user selected "No, generate directly" in Step 2.
+
+**Purpose**: User reviews and confirms storyboard + characters before generation.
+
+**Display**:
+- Page count and structure
+- Art style + Tone combination
+- Page-by-page summary (Cover → P1 → P2...)
+- Character list with brief descriptions
+
+**Use `clarify`**:
+
+```
+question: "Ready to generate images with this outline?"
+options:
+  - "Yes, proceed (Recommended) — Generate character sheet and comic pages"
+  - "Edit storyboard first — I'll modify storyboard.md before continuing"
+  - "Edit characters first — I'll modify characters/characters.md before continuing"
+  - "Edit both — I'll modify both files before continuing"
+```
+
+**After response**:
+1. If user wants to edit → Wait for user to finish editing, then ask again
+2. If user confirms → Continue to Step 5
+
+---
+
+## Step 5: Generate Prompts
+
+Create image generation prompts for all pages.
+
+**Style Reference Loading**:
+- Read `art-styles/{art}.md` for rendering guidelines
+- Read `tones/{tone}.md` for mood/color adjustments
+- If preset: Read `presets/{preset}.md` for special rules
+
+**For each page (cover + pages)**:
+1. Create prompt following art style + tone guidelines
+2. **Embed character descriptions** inline (copy relevant traits from `characters/characters.md`) — `image_generate` is prompt-only, so the prompt text is the sole vehicle for character consistency
+3. Save to `prompts/NN-{cover|page}-[slug].md` using `write_file`
+   - **Backup rule**: If prompt file exists, rename to `prompts/NN-{cover|page}-[slug]-backup-YYYYMMDD-HHMMSS.md`
+
+**Prompt File Format**:
+```markdown
+# Page NN: [Title]
+
+## Visual Style
+Art: [art style] | Tone: [tone] | Layout: [layout type]
+
+## Character Reference (embedded inline — maintain exact traits below)
+- [Character A]: [detailed visual traits from characters/characters.md]
+- [Character B]: [detailed visual traits from characters/characters.md]
+
+## Panel Breakdown
+[From storyboard.md - panel descriptions, actions, dialogue]
+
+## Generation Prompt
+[Combined prompt passed to image_generate]
+```
+
+**After generation**:
+- If `skip_prompt_review` is true → Skip Step 6, go directly to Step 7
+- If `skip_prompt_review` is false → Continue to Step 6
+
+---
+
+## Step 6: Review Prompts (Conditional)
+
+**Skip this step** if user selected "No, skip prompt review" in Step 2.
+
+**Purpose**: User reviews and confirms prompts before image generation.
+
+**Display prompt summary table**:
+
+| Page | Title | Key Elements |
+|------|-------|--------------|
+| Cover | [title] | [main visual] |
+| P1 | [title] | [key elements] |
+| ... | ... | ... |
+
+**Use `clarify`**:
+
+```
+question: "Ready to generate images with these prompts?"
+options:
+  - "Yes, proceed (Recommended) — Generate all comic page images"
+  - "Edit prompts first — I'll modify prompts/*.md before continuing"
+  - "Regenerate prompts — Regenerate all prompts with different approach"
+```
+
+**After response**:
+1. If user wants to edit → Wait for user to finish editing, then ask again
+2. If user wants to regenerate → Go back to Step 5
+3. If user confirms → Continue to Step 7
+
+---
+
+## Step 7: Generate Images
+
+With confirmed prompts from Step 5/6, use the `image_generate` tool. The tool accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`) and **returns a URL** — it does not accept reference images and does not write local files. Every invocation must be followed by a download step.
+
+**Aspect ratio mapping** — map the storyboard's `aspect_ratio` to the tool's enum:
+
+| Storyboard ratio | `image_generate` format |
+|------------------|-------------------------|
+| `3:4`, `9:16`, `2:3` | `portrait` |
+| `4:3`, `16:9`, `3:2` | `landscape` |
+| `1:1` | `square` |
+
+**Download procedure** (run after every successful `image_generate` call):
+
+1. Extract the `url` field from the tool result
+2. Fetch it to disk, e.g. `curl -fsSL "<url>" -o comic/{slug}/<target>.png`
+3. Verify the file is non-empty (`test -s <target>.png`); on failure, retry the generation once
+
+### 7.1 Generate Character Reference Sheet (conditional)
+
+Character sheet is recommended for multi-page comics with recurring characters, but **NOT required** for all presets.
+
+**When to generate**:
+
+| Condition | Action |
+|-----------|--------|
+| Multi-page comic with detailed/recurring characters | Generate character sheet (recommended) |
+| Preset with simplified characters (e.g., four-panel minimalist) | Skip — prompt descriptions are sufficient |
+| Single-page comic | Skip unless characters are complex |
+
+**When generating**:
+1. Use Reference Sheet Prompt from `characters/characters.md`
+2. **Backup rule**: If `characters/characters.png` exists, rename to `characters/characters-backup-YYYYMMDD-HHMMSS.png`
+3. Call `image_generate` with `landscape` format
+4. Download the returned URL → save to `characters/characters.png`
+
+**Important**: the downloaded sheet is a **human-facing review artifact** (so the user can visually verify character design) and a reference for later regenerations or manual prompt edits. It does **not** drive Step 7.2 — page prompts were already written in Step 5 from the text descriptions in `characters/characters.md`. `image_generate` cannot accept images as visual input, so the text is the sole cross-page consistency mechanism.
+
+### 7.2 Generate Comic Pages
+
+**Before generating any page**:
+1. Confirm each prompt file exists at `prompts/NN-{cover|page}-[slug].md`
+2. Confirm that each prompt has character descriptions embedded inline (see Step 5). `image_generate` is prompt-only, so the prompt text is the sole consistency mechanism.
+
+**Page Generation Strategy**: every page prompt must embed character descriptions (sourced from `characters/characters.md`) inline. This is done during Step 5, uniformly whether or not the PNG sheet was produced in 7.1 — the PNG is only a review/regeneration aid, never a generation input.
+
+**Example embedded prompt** (`prompts/01-page-xxx.md`):
+
+```markdown
+# Page 01: [Title]
+
+## Character Reference (embedded inline — maintain consistency)
+- 大雄：Japanese boy, round glasses, yellow shirt, navy shorts, worried expression...
+- 哆啦 A 梦：Round blue robot cat, white belly, red nose, golden bell, 4D pocket...
+
+## Page Content
+[Original page prompt body — panels, dialogue, visual metaphors]
+```
+
+**For each page (cover + pages)**:
+1. Read prompt from `prompts/NN-{cover|page}-[slug].md`
+2. **Backup rule**: If image file exists, rename to `NN-{cover|page}-[slug]-backup-YYYYMMDD-HHMMSS.png`
+3. Call `image_generate` with the prompt text and mapped aspect ratio
+4. Download the returned URL → save to `NN-{cover|page}-[slug].png`
+5. Report progress after each generation: "Generated X/N: [page title]"
+
+---
+
+## Step 8: Completion Report
+
+```
+Comic Complete!
+Title: [title] | Art: [art] | Tone: [tone] | Pages: [count] | Aspect: [ratio] | Language: [lang]
+Location: [path]
+✓ source-{slug}.md (if content was pasted)
+✓ analysis.md
+✓ characters.png (if generated)
+✓ 00-cover-[slug].png ... NN-page-[slug].png
+```
+
+---
+
+## Page Modification
+
+| Action | Steps |
+|--------|-------|
+| **Edit** | Update prompt → Regenerate image → Download new PNG |
+| **Add** | Create prompt at position → Generate image → Download PNG → Renumber subsequent (NN+1) → Update storyboard |
+| **Delete** | Remove files → Renumber subsequent (NN-1) → Update storyboard |
+
+**File naming**: `NN-{cover|page}-[slug].png` (e.g., `03-page-enigma-machine.png`)
+- Slugs: kebab-case, unique, derived from content
+- Renumbering: Update NN prefix only, slugs unchanged
diff --git a/skills/mlops/inference/llama-cpp/SKILL.md b/skills/mlops/inference/llama-cpp/SKILL.md
index 33fc37adb1..0844e4d5a4 100644
--- a/skills/mlops/inference/llama-cpp/SKILL.md
+++ b/skills/mlops/inference/llama-cpp/SKILL.md
@@ -1,218 +1,113 @@
 ---
 name: llama-cpp
-description: Run LLM inference with llama.cpp on CPU, Apple Silicon, AMD/Intel GPUs, or NVIDIA — plus GGUF model conversion and quantization (2–8 bit with K-quants and imatrix). Covers CLI, Python bindings, OpenAI-compatible server, and Ollama/LM Studio integration. Use for edge deployment, M1/M2/M3/M4 Macs, CUDA-less environments, or flexible local quantization.
-version: 2.0.0
+description: llama.cpp local GGUF inference + HF Hub model discovery.
+version: 2.1.2
 author: Orchestra Research
 license: MIT
 dependencies: [llama-cpp-python>=0.2.0]
 metadata:
   hermes:
-    tags: [llama.cpp, GGUF, Quantization, CPU Inference, Apple Silicon, Edge Deployment, Non-NVIDIA, AMD GPUs, Intel GPUs, Embedded, Model Compression]
+    tags: [llama.cpp, GGUF, Quantization, Hugging Face Hub, CPU Inference, Apple Silicon, Edge Deployment, AMD GPUs, Intel GPUs, NVIDIA, URL-first]
 ---
 
 # llama.cpp + GGUF
 
-Pure C/C++ LLM inference with minimal dependencies, plus the GGUF (GPT-Generated Unified Format) standard used for quantized weights. One toolchain covers conversion, quantization, and serving.
+Use this skill for local GGUF inference, quant selection, or Hugging Face repo discovery for llama.cpp.
 
 ## When to use
 
-**Use llama.cpp + GGUF when:**
-- Running on CPU-only machines or Apple Silicon (M1/M2/M3/M4) with Metal acceleration
-- Using AMD (ROCm) or Intel GPUs where CUDA isn't available
-- Edge deployment (Raspberry Pi, embedded systems, consumer laptops)
-- Need flexible quantization (2–8 bit with K-quants)
-- Want local AI tools (LM Studio, Ollama, text-generation-webui, koboldcpp)
-- Want a single binary deploy without Docker/Python
+- Run local models on CPU, Apple Silicon, CUDA, ROCm, or Intel GPUs
+- Find the right GGUF for a specific Hugging Face repo
+- Build a `llama-server` or `llama-cli` command from the Hub
+- Search the Hub for models that already support llama.cpp
+- Enumerate available `.gguf` files and sizes for a repo
+- Decide between Q4/Q5/Q6/IQ variants for the user's RAM or VRAM
 
-**Key advantages:**
-- Universal hardware: CPU, Apple Silicon, NVIDIA, AMD, Intel
-- No Python runtime required (pure C/C++)
-- K-quants + imatrix for better low-bit quality
-- OpenAI-compatible server built in
-- Rich ecosystem (Ollama, LM Studio, llama-cpp-python)
+## Model Discovery workflow
 
-**Use alternatives instead:**
-- **vLLM** — NVIDIA GPUs, PagedAttention, Python-first, max throughput
-- **TensorRT-LLM** — Production NVIDIA (A100/H100), maximum speed
-- **AWQ/GPTQ** — Calibrated quantization for NVIDIA-only deployments
-- **bitsandbytes** — Simple HuggingFace transformers integration
-- **HQQ** — Fast calibration-free quantization
+Prefer URL workflows before asking for `hf`, Python, or custom scripts.
+
+1. Search for candidate repos on the Hub:
+   - Base: `https://huggingface.co/models?apps=llama.cpp&sort=trending`
+   - Add `search=<term>` for a model family
+   - Add `num_parameters=min:0,max:24B` or similar when the user has size constraints
+2. Open the repo with the llama.cpp local-app view:
+   - `https://huggingface.co/<repo>?local-app=llama.cpp`
+3. Treat the local-app snippet as the source of truth when it is visible:
+   - copy the exact `llama-server` or `llama-cli` command
+   - report the recommended quant exactly as HF shows it
+4. Read the same `?local-app=llama.cpp` URL as page text or HTML and extract the section under `Hardware compatibility`:
+   - prefer its exact quant labels and sizes over generic tables
+   - keep repo-specific labels such as `UD-Q4_K_M` or `IQ4_NL_XL`
+   - if that section is not visible in the fetched page source, say so and fall back to the tree API plus generic quant guidance
+5. Query the tree API to confirm what actually exists:
+   - `https://huggingface.co/api/models/<repo>/tree/main?recursive=true`
+   - keep entries where `type` is `file` and `path` ends with `.gguf`
+   - use `path` and `size` as the source of truth for filenames and byte sizes
+   - separate quantized checkpoints from `mmproj-*.gguf` projector files and `BF16/` shard files
+   - use `https://huggingface.co/<repo>/tree/main` only as a human fallback
+6. If the local-app snippet is not text-visible, reconstruct the command from the repo plus the chosen quant:
+   - shorthand quant selection: `llama-server -hf <repo>:<QUANT>`
+   - exact-file fallback: `llama-server --hf-repo <repo> --hf-file <filename.gguf>`
+7. Only suggest conversion from Transformers weights if the repo does not already expose GGUF files.
 
 ## Quick start
 
-### Install
+### Install llama.cpp
 
 ```bash
 # macOS / Linux (simplest)
 brew install llama.cpp
+```
 
-# Or build from source
+```bash
+winget install llama.cpp
+```
+
+```bash
 git clone https://github.com/ggml-org/llama.cpp
 cd llama.cpp
-make                        # CPU
-make GGML_METAL=1           # Apple Silicon
-make GGML_CUDA=1            # NVIDIA CUDA
-make LLAMA_HIP=1            # AMD ROCm
-
-# Python bindings (optional)
-pip install llama-cpp-python
-# With CUDA:   CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
-# With Metal:  CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
+cmake -B build
+cmake --build build --config Release
 ```
 
-### Download a pre-quantized GGUF
+### Run directly from the Hugging Face Hub
 
 ```bash
-# TheBloke hosts most popular models pre-quantized
-huggingface-cli download \
-    TheBloke/Llama-2-7B-Chat-GGUF \
-    llama-2-7b-chat.Q4_K_M.gguf \
-    --local-dir models/
+llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
 ```
 
-### Or convert a HuggingFace model to GGUF
-
 ```bash
-# 1. Download HF model
-huggingface-cli download meta-llama/Llama-3.1-8B --local-dir ./llama-3.1-8b
-
-# 2. Convert to FP16 GGUF
-python convert_hf_to_gguf.py ./llama-3.1-8b \
-    --outfile llama-3.1-8b-f16.gguf \
-    --outtype f16
-
-# 3. Quantize to Q4_K_M
-./llama-quantize llama-3.1-8b-f16.gguf llama-3.1-8b-q4_k_m.gguf Q4_K_M
+llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
 ```
 
-### Run inference
+### Run an exact GGUF file from the Hub
+
+Use this when the tree API shows custom file naming or the exact HF snippet is missing.
 
 ```bash
-# One-shot prompt
-./llama-cli -m model.Q4_K_M.gguf -p "Explain quantum computing" -n 256
-
-# Interactive chat
-./llama-cli -m model.Q4_K_M.gguf --interactive
-
-# With GPU offload
-./llama-cli -m model.Q4_K_M.gguf -ngl 35 -p "Hello!"
+llama-server \
+    --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \
+    --hf-file Phi-3-mini-4k-instruct-q4.gguf \
+    -c 4096
 ```
 
-### Serve an OpenAI-compatible API
-
-```bash
-./llama-server \
-    -m model.Q4_K_M.gguf \
-    --host 0.0.0.0 \
-    --port 8080 \
-    -ngl 35 \
-    -c 4096 \
-    --parallel 4 \
-    --cont-batching
-```
+### OpenAI-compatible server check
 
 ```bash
 curl http://localhost:8080/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "local",
-    "messages": [{"role": "user", "content": "Hello!"}],
-    "temperature": 0.7,
-    "max_tokens": 100
+    "messages": [
+      {"role": "user", "content": "Write a limerick about Python exceptions"}
+    ]
   }'
 ```
 
-## Quantization formats (GGUF)
-
-### K-quant methods (recommended)
-
-| Type | Bits | Size (7B) | Quality | Use Case |
-|------|------|-----------|---------|----------|
-| Q2_K | 2.5 | ~2.8 GB | Low | Extreme compression (testing only) |
-| Q3_K_S | 3.0 | ~3.0 GB | Low-Med | Memory constrained |
-| Q3_K_M | 3.3 | ~3.3 GB | Medium | Fits small devices |
-| Q4_K_S | 4.0 | ~3.8 GB | Med-High | Speed critical |
-| **Q4_K_M** | 4.5 | ~4.1 GB | High | **Recommended default** |
-| Q5_K_S | 5.0 | ~4.6 GB | High | Quality focused |
-| Q5_K_M | 5.5 | ~4.8 GB | Very High | High quality |
-| Q6_K | 6.0 | ~5.5 GB | Excellent | Near-original |
-| Q8_0 | 8.0 | ~7.2 GB | Best | Maximum quality, minimal degradation |
-
-**Variant suffixes** — `_S` (Small, faster, lower quality), `_M` (Medium, balanced), `_L` (Large, better quality).
-
-**Legacy (Q4_0/Q4_1/Q5_0/Q5_1) exist** but always prefer K-quants for better quality/size ratio.
-
-**IQ quantization** — ultra-low-bit with importance-aware methods: IQ2_XXS, IQ2_XS, IQ2_S, IQ3_XXS, IQ3_XS, IQ3_S, IQ4_XS. Require `--imatrix`.
-
-**Task-specific defaults:**
-- General chat / assistants: Q4_K_M, or Q5_K_M if RAM allows
-- Code generation: Q5_K_M or Q6_K (higher precision helps)
-- Technical / medical: Q6_K or Q8_0
-- Very large (70B, 405B) on consumer hardware: Q3_K_M or Q4_K_S
-- Raspberry Pi / edge: Q2_K or Q3_K_S
-
-## Conversion workflows
-
-### Basic: HF → GGUF → quantized
-
-```bash
-python convert_hf_to_gguf.py ./model --outfile model-f16.gguf --outtype f16
-./llama-quantize model-f16.gguf model-q4_k_m.gguf Q4_K_M
-./llama-cli -m model-q4_k_m.gguf -p "Hello!" -n 50
-```
-
-### With importance matrix (imatrix) — better low-bit quality
-
-`imatrix` gives 10–20% perplexity improvement at Q4, essential at Q3 and below.
-
-```bash
-# 1. Convert to FP16 GGUF
-python convert_hf_to_gguf.py ./model --outfile model-f16.gguf
-
-# 2. Prepare calibration data (diverse text, ~100MB is ideal)
-cat > calibration.txt << 'EOF'
-The quick brown fox jumps over the lazy dog.
-Machine learning is a subset of artificial intelligence.
-# Add more diverse text samples...
-EOF
-
-# 3. Generate importance matrix
-./llama-imatrix -m model-f16.gguf \
-    -f calibration.txt \
-    --chunk 512 \
-    -o model.imatrix \
-    -ngl 35
-
-# 4. Quantize with imatrix
-./llama-quantize --imatrix model.imatrix \
-    model-f16.gguf model-q4_k_m.gguf Q4_K_M
-```
-
-### Multi-quant batch
-
-```bash
-#!/bin/bash
-MODEL="llama-3.1-8b-f16.gguf"
-IMATRIX="llama-3.1-8b.imatrix"
-
-./llama-imatrix -m $MODEL -f wiki.txt -o $IMATRIX -ngl 35
-
-for QUANT in Q4_K_M Q5_K_M Q6_K Q8_0; do
-    OUTPUT="llama-3.1-8b-${QUANT,,}.gguf"
-    ./llama-quantize --imatrix $IMATRIX $MODEL $OUTPUT $QUANT
-    echo "Created: $OUTPUT ($(du -h $OUTPUT | cut -f1))"
-done
-```
-
-### Quality testing (perplexity)
-
-```bash
-./llama-perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw -c 512
-# Baseline FP16: ~5.96  |  Q4_K_M: ~6.06 (+1.7%)  |  Q2_K: ~6.87 (+15.3%)
-```
-
 ## Python bindings (llama-cpp-python)
 
+`pip install llama-cpp-python` (CUDA: `CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir`; Metal: `CMAKE_ARGS="-DGGML_METAL=on" ...`).
+
 ### Basic generation
 
 ```python
@@ -221,39 +116,32 @@ from llama_cpp import Llama
 llm = Llama(
     model_path="./model-q4_k_m.gguf",
     n_ctx=4096,
-    n_gpu_layers=35,     # 0 for CPU only, 99 to offload everything
+    n_gpu_layers=35,     # 0 for CPU, 99 to offload everything
     n_threads=8,
 )
 
-output = llm(
-    "What is machine learning?",
-    max_tokens=256,
-    temperature=0.7,
-    stop=["</s>", "\n\n"],
-)
-print(output["choices"][0]["text"])
+out = llm("What is machine learning?", max_tokens=256, temperature=0.7)
+print(out["choices"][0]["text"])
 ```
 
-### Chat completion + streaming
+### Chat + streaming
 
 ```python
 llm = Llama(
     model_path="./model-q4_k_m.gguf",
     n_ctx=4096,
     n_gpu_layers=35,
-    chat_format="llama-3",    # Or "chatml", "mistral", etc.
+    chat_format="llama-3",   # or "chatml", "mistral", etc.
 )
 
-# Non-streaming
-response = llm.create_chat_completion(
+resp = llm.create_chat_completion(
     messages=[
         {"role": "system", "content": "You are a helpful assistant."},
         {"role": "user", "content": "What is Python?"},
     ],
     max_tokens=256,
-    temperature=0.7,
 )
-print(response["choices"][0]["message"]["content"])
+print(resp["choices"][0]["message"]["content"])
 
 # Streaming
 for chunk in llm("Explain quantum computing:", max_tokens=256, stream=True):
@@ -268,171 +156,93 @@ vec = llm.embed("This is a test sentence.")
 print(f"Embedding dimension: {len(vec)}")
 ```
 
-## Hardware acceleration
-
-### Apple Silicon (Metal)
-
-```bash
-make clean && make GGML_METAL=1
-./llama-cli -m model.gguf -ngl 99 -p "Hello"   # offload all layers
-```
+You can also load a GGUF straight from the Hub:
 
 ```python
-llm = Llama(
-    model_path="model.gguf",
-    n_gpu_layers=99,     # Offload everything
-    n_threads=1,         # Metal handles parallelism
+llm = Llama.from_pretrained(
+    repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
+    filename="*Q4_K_M.gguf",
+    n_gpu_layers=35,
 )
 ```
 
-Performance: M3 Max ~40–60 tok/s on Llama 2-7B Q4_K_M.
+## Choosing a quant
 
-### NVIDIA (CUDA)
+Use the Hub page first, generic heuristics second.
 
-```bash
-make clean && make GGML_CUDA=1
-./llama-cli -m model.gguf -ngl 35 -p "Hello"
+- Prefer the exact quant that HF marks as compatible for the user's hardware profile.
+- For general chat, start with `Q4_K_M`.
+- For code or technical work, prefer `Q5_K_M` or `Q6_K` if memory allows.
+- For very tight RAM budgets, consider `Q3_K_M`, `IQ` variants, or `Q2` variants only if the user explicitly prioritizes fit over quality.
+- For multimodal repos, mention `mmproj-*.gguf` separately. The projector is not the main model file.
+- Do not normalize repo-native labels. If the page says `UD-Q4_K_M`, report `UD-Q4_K_M`.
 
-# Hybrid for large models
-./llama-cli -m llama-70b.Q4_K_M.gguf -ngl 20   # GPU: 20 layers, CPU: rest
+## Extracting available GGUFs from a repo
 
-# Multi-GPU split
-./llama-cli -m large-model.gguf --tensor-split 0.5,0.5 -ngl 60
+When the user asks what GGUFs exist, return:
+
+- filename
+- file size
+- quant label
+- whether it is a main model or an auxiliary projector
+
+Ignore unless requested:
+
+- README
+- BF16 shard files
+- imatrix blobs or calibration artifacts
+
+Use the tree API for this step:
+
+- `https://huggingface.co/api/models/<repo>/tree/main?recursive=true`
+
+For a repo like `unsloth/Qwen3.6-35B-A3B-GGUF`, the local-app page can show quant chips such as `UD-Q4_K_M`, `UD-Q5_K_M`, `UD-Q6_K`, and `Q8_0`, while the tree API exposes exact file paths such as `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf` and `Qwen3.6-35B-A3B-Q8_0.gguf` with byte sizes. Use the tree API to turn a quant label into an exact filename.
+
+## Search patterns
+
+Use these URL shapes directly:
+
+```text
+https://huggingface.co/models?apps=llama.cpp&sort=trending
+https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
+https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
+https://huggingface.co/<repo>?local-app=llama.cpp
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+https://huggingface.co/<repo>/tree/main
 ```
 
-### AMD (ROCm)
+## Output format
 
-```bash
-make LLAMA_HIP=1
-./llama-cli -m model.gguf -ngl 999
+When answering discovery requests, prefer a compact structured result like:
+
+```text
+Repo: <repo>
+Recommended quant from HF: <label> (<size>)
+llama-server: <command>
+Other GGUFs:
+- <filename> - <size>
+- <filename> - <size>
+Source URLs:
+- <local-app URL>
+- <tree API URL>
 ```
 
-### CPU
-
-```bash
-# Match PHYSICAL cores, not logical
-./llama-cli -m model.gguf -t 8 -p "Hello"
-
-# BLAS acceleration (2–3× speedup)
-make LLAMA_OPENBLAS=1
-```
-
-```python
-llm = Llama(
-    model_path="model.gguf",
-    n_gpu_layers=0,
-    n_threads=8,
-    n_batch=512,         # Larger batch = faster prompt processing
-)
-```
-
-## Performance benchmarks
-
-### CPU (Llama 2-7B Q4_K_M)
-
-| CPU | Threads | Speed |
-|-----|---------|-------|
-| Apple M3 Max (Metal) | 16 | 50 tok/s |
-| AMD Ryzen 9 7950X | 32 | 35 tok/s |
-| Intel i9-13900K | 32 | 30 tok/s |
-
-### GPU offloading on RTX 4090
-
-| Layers GPU | Speed | VRAM |
-|------------|-------|------|
-| 0 (CPU only) | 30 tok/s | 0 GB |
-| 20 (hybrid) | 80 tok/s | 8 GB |
-| 35 (all) | 120 tok/s | 12 GB |
-
-## Supported models
-
-- **LLaMA family**: Llama 2 (7B/13B/70B), Llama 3 (8B/70B/405B), Code Llama
-- **Mistral family**: Mistral 7B, Mixtral 8x7B/8x22B
-- **Other**: Falcon, BLOOM, GPT-J, Phi-3, Gemma, Qwen, LLaVA (vision), Whisper (audio)
-
-Find GGUF models: https://huggingface.co/models?library=gguf
-
-## Ecosystem integrations
-
-### Ollama
-
-```bash
-cat > Modelfile << 'EOF'
-FROM ./model-q4_k_m.gguf
-TEMPLATE """{{ .System }}
-{{ .Prompt }}"""
-PARAMETER temperature 0.7
-PARAMETER num_ctx 4096
-EOF
-
-ollama create mymodel -f Modelfile
-ollama run mymodel "Hello!"
-```
-
-### LM Studio
-
-1. Place GGUF file in `~/.cache/lm-studio/models/`
-2. Open LM Studio and select the model
-3. Configure context length and GPU offload, start inference
-
-### text-generation-webui
-
-```bash
-cp model-q4_k_m.gguf text-generation-webui/models/
-python server.py --model model-q4_k_m.gguf --loader llama.cpp --n-gpu-layers 35
-```
-
-### OpenAI client → llama-server
-
-```python
-from openai import OpenAI
-
-client = OpenAI(base_url="http://localhost:8080/v1", api_key="not-needed")
-response = client.chat.completions.create(
-    model="local-model",
-    messages=[{"role": "user", "content": "Hello!"}],
-    max_tokens=256,
-)
-print(response.choices[0].message.content)
-```
-
-## Best practices
-
-1. **Use K-quants** — Q4_K_M is the recommended default
-2. **Use imatrix** for Q4 and below (calibration improves quality substantially)
-3. **Offload as many layers as VRAM allows** — start high, reduce by 5 on OOM
-4. **Thread count** — match physical cores, not logical
-5. **Batch size** — increase `n_batch` (e.g. 512) for faster prompt processing
-6. **Context** — start at 4096, grow only as needed (memory scales with ctx)
-7. **Flash Attention** — add `--flash-attn` if your build supports it
-
-## Common issues (quick fixes)
-
-**Model loads slowly** — use `--mmap` for memory-mapped loading.
-
-**Out of memory (GPU)** — reduce `-ngl`, use a smaller quant (Q4_K_S / Q3_K_M), or quantize the KV cache:
-```python
-Llama(model_path="...", type_k=2, type_v=2, n_gpu_layers=35)  # Q4_0 KV cache
-```
-
-**Garbage output** — wrong `chat_format`, temperature too high, or model file corrupted. Test with `temperature=0.1` and verify FP16 baseline works.
-
-**Connection refused (server)** — bind to `--host 0.0.0.0`, check `lsof -i :8080`.
-
-See `references/troubleshooting.md` for the full playbook.
-
 ## References
 
+- **[hub-discovery.md](references/hub-discovery.md)** - URL-only Hugging Face workflows, search patterns, GGUF extraction, and command reconstruction
 - **[advanced-usage.md](references/advanced-usage.md)** — speculative decoding, batched inference, grammar-constrained generation, LoRA, multi-GPU, custom builds, benchmark scripts
-- **[quantization.md](references/quantization.md)** — perplexity tables, use-case guide, model size scaling (7B/13B/70B RAM needs), imatrix deep dive
-- **[server.md](references/server.md)** — OpenAI API endpoints, Docker deployment, NGINX load balancing, monitoring
+- **[quantization.md](references/quantization.md)** — quant quality tradeoffs, when to use Q4/Q5/Q6/IQ, model size scaling, imatrix
+- **[server.md](references/server.md)** — direct-from-Hub server launch, OpenAI API endpoints, Docker deployment, NGINX load balancing, monitoring
 - **[optimization.md](references/optimization.md)** — CPU threading, BLAS, GPU offload heuristics, batch tuning, benchmarks
 - **[troubleshooting.md](references/troubleshooting.md)** — install/convert/quantize/inference/server issues, Apple Silicon, debugging
 
 ## Resources
 
 - **GitHub**: https://github.com/ggml-org/llama.cpp
-- **Python bindings**: https://github.com/abetlen/llama-cpp-python
-- **Pre-quantized models**: https://huggingface.co/TheBloke
-- **GGUF converter Space**: https://huggingface.co/spaces/ggml-org/gguf-my-repo
+- **Hugging Face GGUF + llama.cpp docs**: https://huggingface.co/docs/hub/gguf-llamacpp
+- **Hugging Face Local Apps docs**: https://huggingface.co/docs/hub/main/local-apps
+- **Hugging Face Local Agents docs**: https://huggingface.co/docs/hub/agents-local
+- **Example local-app page**: https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF?local-app=llama.cpp
+- **Example tree API**: https://huggingface.co/api/models/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main?recursive=true
+- **Example llama.cpp search**: https://huggingface.co/models?num_parameters=min:0,max:24B&apps=llama.cpp&sort=trending
 - **License**: MIT
diff --git a/skills/mlops/inference/llama-cpp/references/hub-discovery.md b/skills/mlops/inference/llama-cpp/references/hub-discovery.md
new file mode 100644
index 0000000000..4573ad4601
--- /dev/null
+++ b/skills/mlops/inference/llama-cpp/references/hub-discovery.md
@@ -0,0 +1,168 @@
+# Hugging Face URL Workflows for llama.cpp
+
+Use URL-only workflows first. Do not require `hf` or API clients just to find GGUF files, choose a quant, or build a `llama-server` command.
+
+## Core URLs
+
+```text
+Search:
+https://huggingface.co/models?apps=llama.cpp&sort=trending
+
+Search with text:
+https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
+
+Search with size bounds:
+https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
+
+Repo local-app view:
+https://huggingface.co/<repo>?local-app=llama.cpp
+
+Repo tree API:
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+
+Repo file tree:
+https://huggingface.co/<repo>/tree/main
+```
+
+## 1. Search for llama.cpp-compatible models
+
+Start from the models page with `apps=llama.cpp`.
+
+Use:
+
+- `search=<term>` for model family names such as `Qwen`, `Gemma`, `Phi`, or `Mistral`
+- `num_parameters=min:0,max:24B` or similar if the user has hardware limits
+- `sort=trending` when the user wants popular repos right now
+
+Do not start with random GGUF repos if the user has not chosen a model family yet. Search first, shortlist second.
+
+Example: https://huggingface.co/models?search=Qwen&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
+
+## 2. Use the local-app page for the recommended quant
+
+Open:
+
+```text
+https://huggingface.co/<repo>?local-app=llama.cpp
+```
+
+Extract, in order:
+
+1. The exact `Use this model` snippet, if it is visible as text
+2. The `Hardware compatibility` section from the fetched page text or HTML:
+   - quant label
+   - file size
+   - bit-depth grouping
+3. Any extra launch flags shown in the snippet, such as `--jinja`
+
+Treat the HF local-app snippet as the source of truth when it is visible.
+
+Do this by reading the URL itself, not by assuming the UI rendered in a browser. If the fetched page source does not expose `Hardware compatibility`, say that the section was not text-visible and fall back to the tree API plus generic guidance from `quantization.md`.
+
+## 3. Confirm exact files from the tree API
+
+Open:
+
+```text
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+```
+
+Treat the JSON response as the source of truth for repo inventory.
+
+Keep entries where:
+
+- `type` is `file`
+- `path` ends with `.gguf`
+
+Use these fields:
+
+- `path` for the filename and subdirectory
+- `size` for the byte size
+- optionally `lfs.size` to confirm the LFS payload size
+
+Separate files into:
+
+- quantized single-file checkpoints, for example `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf`
+- projector weights, usually `mmproj-*.gguf`
+- BF16 shard files, usually under `BF16/`
+- everything else
+
+Ignore unless the user asks:
+
+- `README.md`
+- imatrix or calibration blobs
+
+Use `https://huggingface.co/<repo>/tree/main` only as a human fallback if the API endpoint fails or the user wants the web view.
+
+## 4. Build the command
+
+Preferred order:
+
+1. Copy the exact HF snippet from the local-app page
+2. If the page gives a clean quant label, use shorthand selection:
+
+```bash
+llama-server -hf <repo>:<QUANT>
+```
+
+3. If you need an exact file from the tree API, use the file-specific form:
+
+```bash
+llama-server --hf-repo <repo> --hf-file <filename.gguf>
+```
+
+4. For CLI usage instead of a server, use:
+
+```bash
+llama-cli -hf <repo>:<QUANT>
+```
+
+Use the exact-file form when the repo uses custom labels or nonstandard naming that could make `:<QUANT>` ambiguous.
+
+## 5. Example: `unsloth/Qwen3.6-35B-A3B-GGUF`
+
+Use these URLs:
+
+```text
+https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF?local-app=llama.cpp
+https://huggingface.co/api/models/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main?recursive=true
+https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main
+```
+
+On the local-app page, the hardware compatibility section can expose entries such as:
+
+- `UD-IQ4_XS` - 17.7 GB
+- `UD-Q4_K_S` - 20.9 GB
+- `UD-Q4_K_M` - 22.1 GB
+- `UD-Q5_K_M` - 26.5 GB
+- `UD-Q6_K` - 29.3 GB
+- `Q8_0` - 36.9 GB
+
+On the tree API, you can confirm exact filenames such as:
+
+- `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf`
+- `Qwen3.6-35B-A3B-UD-Q5_K_M.gguf`
+- `Qwen3.6-35B-A3B-UD-Q6_K.gguf`
+- `Qwen3.6-35B-A3B-Q8_0.gguf`
+- `mmproj-F16.gguf`
+
+Good final output for this repo:
+
+```text
+Repo: unsloth/Qwen3.6-35B-A3B-GGUF
+Recommended quant from HF: UD-Q4_K_M (22.1 GB)
+llama-server: llama-server --hf-repo unsloth/Qwen3.6-35B-A3B-GGUF --hf-file Qwen3.6-35B-A3B-UD-Q4_K_M.gguf
+Other GGUFs:
+- Qwen3.6-35B-A3B-UD-Q5_K_M.gguf - 26.5 GB
+- Qwen3.6-35B-A3B-UD-Q6_K.gguf - 29.3 GB
+- Qwen3.6-35B-A3B-Q8_0.gguf - 36.9 GB
+Projector:
+- mmproj-F16.gguf - 899 MB
+```
+
+## Notes
+
+- Repo-specific quant labels matter. Do not rewrite `UD-Q4_K_M` to `Q4_K_M` unless the page itself does.
+- `mmproj` files are projector weights for multimodal models, not the main language model checkpoint.
+- If the HF hardware compatibility panel is missing because the user has no hardware profile configured, or because the fetched page source did not expose it, still use the tree API plus generic quant guidance from `quantization.md`.
+- If the repo already has GGUFs, do not jump straight to conversion workflows.
diff --git a/skills/mlops/inference/llama-cpp/references/quantization.md b/skills/mlops/inference/llama-cpp/references/quantization.md
index 8620463a64..79478779f4 100644
--- a/skills/mlops/inference/llama-cpp/references/quantization.md
+++ b/skills/mlops/inference/llama-cpp/references/quantization.md
@@ -2,6 +2,22 @@
 
 Complete guide to GGUF quantization formats and model conversion.
 
+## Hub-first quant selection
+
+Before using generic tables, open the model repo with:
+
+```text
+https://huggingface.co/<repo>?local-app=llama.cpp
+```
+
+Prefer the exact quant labels and sizes shown in the `Hardware compatibility` section of the fetched `?local-app=llama.cpp` page text or HTML. Then confirm the matching filenames in:
+
+```text
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+```
+
+Use the Hub page first, and only fall back to the generic heuristics below when the repo page does not expose a clear recommendation.
+
 ## Quantization Overview
 
 **GGUF** (GPT-Generated Unified Format) - Standard format for llama.cpp models.
@@ -23,11 +39,11 @@ Complete guide to GGUF quantization formats and model conversion.
 
 ## Converting Models
 
-### HuggingFace to GGUF
+### Hugging Face to GGUF
 
 ```bash
-# 1. Download HuggingFace model
-huggingface-cli download meta-llama/Llama-2-7b-chat-hf \
+# 1. Download Hugging Face model
+hf download meta-llama/Llama-2-7b-chat-hf \
     --local-dir models/llama-2-7b-chat/
 
 # 2. Convert to FP16 GGUF
@@ -152,18 +168,32 @@ Q2_K or Q3_K_S - Fit in limited RAM
 
 ## Finding Pre-Quantized Models
 
-**TheBloke** on HuggingFace:
-- https://huggingface.co/TheBloke
-- Most models available in all GGUF formats
-- No conversion needed
+Use the Hub search with the llama.cpp app filter:
+
+```text
+https://huggingface.co/models?apps=llama.cpp&sort=trending
+https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
+https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
+```
+
+For a specific repo, open:
+
+```text
+https://huggingface.co/<repo>?local-app=llama.cpp
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+```
+
+Then launch directly from the Hub without extra Hub tooling:
 
-**Example**:
 ```bash
-# Download pre-quantized Llama 2-7B
-huggingface-cli download \
-    TheBloke/Llama-2-7B-Chat-GGUF \
-    llama-2-7b-chat.Q4_K_M.gguf \
-    --local-dir models/
+llama-cli -hf <repo>:Q4_K_M
+llama-server -hf <repo>:Q4_K_M
+```
+
+If you need the exact file name from the tree API:
+
+```bash
+llama-server --hf-repo <repo> --hf-file <filename.gguf>
 ```
 
 ## Importance Matrices (imatrix)
diff --git a/skills/mlops/inference/llama-cpp/references/server.md b/skills/mlops/inference/llama-cpp/references/server.md
index 19dba47bc2..896d81b964 100644
--- a/skills/mlops/inference/llama-cpp/references/server.md
+++ b/skills/mlops/inference/llama-cpp/references/server.md
@@ -2,6 +2,31 @@
 
 Production deployment of llama.cpp server with OpenAI-compatible API.
 
+## Direct from Hugging Face Hub
+
+Prefer the model repo's local-app page first:
+
+```text
+https://huggingface.co/<repo>?local-app=llama.cpp
+```
+
+If the page shows an exact snippet, copy it. If not, use one of these forms:
+
+```bash
+# Choose a quant label directly from the Hub repo
+llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
+```
+
+```bash
+# Pin an exact GGUF file from the repo tree
+llama-server \
+    --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \
+    --hf-file Phi-3-mini-4k-instruct-q4.gguf \
+    -c 4096
+```
+
+Use the file-specific form when the repo has custom naming or when you already extracted the exact filename from the tree API.
+
 ## Server Modes
 
 ### llama-server
diff --git a/skills/research/llm-wiki/SKILL.md b/skills/research/llm-wiki/SKILL.md
index a90dd0a9b6..d82d638f14 100644
--- a/skills/research/llm-wiki/SKILL.md
+++ b/skills/research/llm-wiki/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: llm-wiki
 description: "Karpathy's LLM Wiki — build and maintain a persistent, interlinked markdown knowledge base. Ingest sources, query compiled knowledge, and lint for consistency."
-version: 2.0.0
+version: 2.1.0
 author: Hermes Agent
 license: MIT
 metadata:
@@ -122,6 +122,10 @@ Adapt to the user's domain. The schema constrains agent behavior and ensures con
 - When updating a page, always bump the `updated` date
 - Every new page must be added to `index.md` under the correct section
 - Every action must be appended to `log.md`
+- **Provenance markers:** On pages that synthesize 3+ sources, append `^[raw/articles/source-file.md]`
+  at the end of paragraphs whose claims come from a specific source. This lets a reader trace each
+  claim back without re-reading the whole raw file. Optional on single-source pages where the
+  `sources:` frontmatter is enough.
 
 ## Frontmatter
   ```yaml
@@ -132,9 +136,33 @@ Adapt to the user's domain. The schema constrains agent behavior and ensures con
   type: entity | concept | comparison | query | summary
   tags: [from taxonomy below]
   sources: [raw/articles/source-name.md]
+  # Optional quality signals:
+  confidence: high | medium | low        # how well-supported the claims are
+  contested: true                        # set when the page has unresolved contradictions
+  contradictions: [other-page-slug]      # pages this one conflicts with
   ---
   ```
 
+`confidence` and `contested` are optional but recommended for opinion-heavy or fast-moving
+topics. Lint surfaces `contested: true` and `confidence: low` pages for review so weak claims
+don't silently harden into accepted wiki fact.
+
+### raw/ Frontmatter
+
+Raw sources ALSO get a small frontmatter block so re-ingests can detect drift:
+
+```yaml
+---
+source_url: https://example.com/article   # original URL, if applicable
+ingested: YYYY-MM-DD
+sha256: <hex digest of the raw content below the frontmatter>
+---
+```
+
+The `sha256:` lets a future re-ingest of the same URL skip processing when content is unchanged,
+and flag drift when it has changed. Compute over the body only (everything after the closing
+`---`), not the frontmatter itself.
+
 ## Tag Taxonomy
 [Define 10-20 top-level tags for the domain. Add new tags here BEFORE using them.]
 
@@ -234,6 +262,10 @@ When the user provides a source (URL, file, paste), integrate it into the wiki:
    - PDF → use `web_extract` (handles PDFs), save to `raw/papers/`
    - Pasted text → save to appropriate `raw/` subdirectory
    - Name the file descriptively: `raw/articles/karpathy-llm-wiki-2026.md`
+   - **Add raw frontmatter** (`source_url`, `ingested`, `sha256` of the body).
+     On re-ingest of the same URL: recompute the sha256, compare to the stored value —
+     skip if identical, flag drift and update if different. This is cheap enough to
+     do on every re-ingest and catches silent source changes.
 
 ② **Discuss takeaways** with the user — what's interesting, what matters for
    the domain. (Skip this in automated/cron contexts — proceed directly.)
@@ -250,6 +282,11 @@ When the user provides a source (URL, file, paste), integrate it into the wiki:
    - **Cross-reference:** Every new or updated page must link to at least 2 other
      pages via `[[wikilinks]]`. Check that existing pages link back.
    - **Tags:** Only use tags from the taxonomy in SCHEMA.md
+   - **Provenance:** On pages synthesizing 3+ sources, append `^[raw/articles/source.md]`
+     markers to paragraphs whose claims trace to a specific source.
+   - **Confidence:** For opinion-heavy, fast-moving, or single-source claims, set
+     `confidence: medium` or `low` in frontmatter. Don't mark `high` unless the
+     claim is well-supported across multiple sources.
 
 ⑤ **Update navigation:**
    - Add new pages to `index.md` under the correct section, alphabetically
@@ -304,18 +341,28 @@ wiki = "<WIKI_PATH>"
    recent source that mentions the same entities.
 
 ⑥ **Contradictions:** Pages on the same topic with conflicting claims. Look for
-   pages that share tags/entities but state different facts.
+   pages that share tags/entities but state different facts. Surface all pages
+   with `contested: true` or `contradictions:` frontmatter for user review.
 
-⑦ **Page size:** Flag pages over 200 lines — candidates for splitting.
+⑦ **Quality signals:** List pages with `confidence: low` and any page that cites
+   only a single source but has no confidence field set — these are candidates
+   for either finding corroboration or demoting to `confidence: medium`.
 
-⑧ **Tag audit:** List all tags in use, flag any not in the SCHEMA.md taxonomy.
+⑧ **Source drift:** For each file in `raw/` with a `sha256:` frontmatter, recompute
+   the hash and flag mismatches. Mismatches indicate the raw file was edited
+   (shouldn't happen — raw/ is immutable) or ingested from a URL that has since
+   changed. Not a hard error, but worth reporting.
 
-⑨ **Log rotation:** If log.md exceeds 500 entries, rotate it.
+⑨ **Page size:** Flag pages over 200 lines — candidates for splitting.
 
-⑩ **Report findings** with specific file paths and suggested actions, grouped by
-   severity (broken links > orphans > stale content > style issues).
+⑩ **Tag audit:** List all tags in use, flag any not in the SCHEMA.md taxonomy.
 
-⑪ **Append to log.md:** `## [YYYY-MM-DD] lint | N issues found`
+⑪ **Log rotation:** If log.md exceeds 500 entries, rotate it.
+
+⑫ **Report findings** with specific file paths and suggested actions, grouped by
+   severity (broken links > orphans > source drift > contested pages > stale content > style issues).
+
+⑬ **Append to log.md:** `## [YYYY-MM-DD] lint | N issues found`
 
 ## Working with the Wiki
 
@@ -448,3 +495,12 @@ vault in Obsidian on your laptop/phone — changes appear within seconds.
   The agent should check log size during lint.
 - **Handle contradictions explicitly** — don't silently overwrite. Note both claims with dates,
   mark in frontmatter, flag for user review.
+
+## Related Tools
+
+[llm-wiki-compiler](https://github.com/atomicmemory/llm-wiki-compiler) is a Node.js CLI that
+compiles sources into a concept wiki with the same Karpathy inspiration. It's Obsidian-compatible,
+so users who want a scheduled/CLI-driven compile pipeline can point it at the same vault this
+skill maintains. Trade-offs: it owns page generation (replaces the agent's judgment on page
+creation) and is tuned for small corpora. Use this skill when you want agent-in-the-loop curation;
+use llmwiki when you want batch compile of a source directory.
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 2285a58f40..4c775b8a6c 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -476,6 +476,133 @@ class TestGetTextAuxiliaryClient:
         assert isinstance(client, CodexAuxiliaryClient)
         assert model == "gpt-5.2-codex"
 
+
+class TestNousAuxiliaryRefresh:
+    def test_try_nous_prefers_runtime_credentials(self):
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            from agent.auxiliary_client import _try_nous
+
+            mock_openai.return_value = MagicMock()
+            client, model = _try_nous()
+
+        assert client is not None
+        # No Portal recommendation → falls back to the hardcoded default.
+        assert model == "google/gemini-3-flash-preview"
+        assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
+        assert mock_openai.call_args.kwargs["base_url"] == fresh_base
+
+    def test_try_nous_uses_portal_recommendation_for_text(self):
+        """When the Portal recommends a compaction model, _try_nous honors it."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec,
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            from agent.auxiliary_client import _try_nous
+
+            mock_openai.return_value = MagicMock()
+            client, model = _try_nous(vision=False)
+
+        assert client is not None
+        assert model == "minimax/minimax-m2.7"
+        assert mock_rec.call_args.kwargs["vision"] is False
+
+    def test_try_nous_uses_portal_recommendation_for_vision(self):
+        """Vision tasks should ask for the vision-specific recommendation."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec,
+            patch("agent.auxiliary_client.OpenAI"),
+        ):
+            from agent.auxiliary_client import _try_nous
+            client, model = _try_nous(vision=True)
+
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+        assert mock_rec.call_args.kwargs["vision"] is True
+
+    def test_try_nous_falls_back_when_recommendation_lookup_raises(self):
+        """If the Portal lookup throws, we must still return a usable model."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")),
+            patch("agent.auxiliary_client.OpenAI"),
+        ):
+            from agent.auxiliary_client import _try_nous
+            client, model = _try_nous()
+
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_call_llm_retries_nous_after_401(self):
+        class _Auth401(Exception):
+            status_code = 401
+
+        stale_client = MagicMock()
+        stale_client.base_url = "https://inference-api.nousresearch.com/v1"
+        stale_client.chat.completions.create.side_effect = _Auth401("stale nous key")
+
+        fresh_client = MagicMock()
+        fresh_client.base_url = "https://inference-api.nousresearch.com/v1"
+        fresh_client.chat.completions.create.return_value = {"ok": True}
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
+            patch("agent.auxiliary_client.OpenAI", return_value=fresh_client),
+            patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
+        ):
+            result = call_llm(
+                task="compression",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert result == {"ok": True}
+        assert stale_client.chat.completions.create.call_count == 1
+        assert fresh_client.chat.completions.create.call_count == 1
+
+    @pytest.mark.asyncio
+    async def test_async_call_llm_retries_nous_after_401(self):
+        class _Auth401(Exception):
+            status_code = 401
+
+        stale_client = MagicMock()
+        stale_client.base_url = "https://inference-api.nousresearch.com/v1"
+        stale_client.chat.completions.create = AsyncMock(side_effect=_Auth401("stale nous key"))
+
+        fresh_async_client = MagicMock()
+        fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1"
+        fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True})
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
+            patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")),
+            patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
+        ):
+            result = await async_call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert result == {"ok": True}
+        assert stale_client.chat.completions.create.await_count == 1
+        assert fresh_async_client.chat.completions.create.await_count == 1
+
 # ── Payment / credit exhaustion fallback ─────────────────────────────────
 
 
diff --git a/tests/agent/test_auxiliary_main_first.py b/tests/agent/test_auxiliary_main_first.py
index 353c6c2ddc..d756d6ffb1 100644
--- a/tests/agent/test_auxiliary_main_first.py
+++ b/tests/agent/test_auxiliary_main_first.py
@@ -167,7 +167,7 @@ class TestResolveAutoMainFirst:
 
 
 class TestResolveVisionMainFirst:
-    """Vision auto-detection prefers main provider + main model first."""
+    """Vision auto-detection prefers the main provider first."""
 
     def test_openrouter_main_vision_uses_main_model(self, monkeypatch):
         """OpenRouter main with vision-capable model → aux vision uses main model."""
@@ -200,28 +200,49 @@ class TestResolveVisionMainFirst:
         assert mock_resolve.call_args.args[0] == "openrouter"
         assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"
 
-    def test_nous_main_vision_uses_main_model(self):
-        """Nous Portal main → aux vision uses main model, not free-tier MiMo-V2-Omni."""
+    def test_nous_main_vision_uses_paid_nous_vision_backend(self):
+        """Paid Nous main → aux vision uses the dedicated Nous vision backend."""
         with patch(
             "agent.auxiliary_client._read_main_provider", return_value="nous",
         ), patch(
             "agent.auxiliary_client._read_main_model",
             return_value="openai/gpt-5",
         ), patch(
-            "agent.auxiliary_client.resolve_provider_client"
-        ) as mock_resolve, patch(
             "agent.auxiliary_client._resolve_task_provider_model",
             return_value=("auto", None, None, None, None),
+        ), patch(
+            "agent.auxiliary_client._resolve_strict_vision_backend",
+            return_value=(MagicMock(), "google/gemini-3-flash-preview"),
         ):
-            mock_client = MagicMock()
-            mock_resolve.return_value = (mock_client, "openai/gpt-5")
-
             from agent.auxiliary_client import resolve_vision_provider_client
 
             provider, client, model = resolve_vision_provider_client()
 
         assert provider == "nous"
-        assert model == "openai/gpt-5"
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_nous_main_vision_uses_free_tier_nous_vision_backend(self):
+        """Free-tier Nous main → aux vision uses MiMo omni, not the text main model."""
+        with patch(
+            "agent.auxiliary_client._read_main_provider", return_value="nous",
+        ), patch(
+            "agent.auxiliary_client._read_main_model",
+            return_value="xiaomi/mimo-v2-pro",
+        ), patch(
+            "agent.auxiliary_client._resolve_task_provider_model",
+            return_value=("auto", None, None, None, None),
+        ), patch(
+            "agent.auxiliary_client._resolve_strict_vision_backend",
+            return_value=(MagicMock(), "xiaomi/mimo-v2-omni"),
+        ):
+            from agent.auxiliary_client import resolve_vision_provider_client
+
+            provider, client, model = resolve_vision_provider_client()
+
+        assert provider == "nous"
+        assert client is not None
+        assert model == "xiaomi/mimo-v2-omni"
 
     def test_exotic_provider_with_vision_override_preserved(self):
         """xiaomi → mimo-v2-omni override still wins over main_model."""
diff --git a/tests/agent/test_image_gen_registry.py b/tests/agent/test_image_gen_registry.py
new file mode 100644
index 0000000000..7b492395ca
--- /dev/null
+++ b/tests/agent/test_image_gen_registry.py
@@ -0,0 +1,111 @@
+"""Tests for agent/image_gen_registry.py — provider registration & active lookup."""
+
+from __future__ import annotations
+
+import pytest
+
+from agent import image_gen_registry
+from agent.image_gen_provider import ImageGenProvider
+
+
+class _FakeProvider(ImageGenProvider):
+    def __init__(self, name: str, available: bool = True):
+        self._name = name
+        self._available = available
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def is_available(self) -> bool:
+        return self._available
+
+    def generate(self, prompt, aspect_ratio="landscape", **kw):
+        return {"success": True, "image": f"{self._name}://{prompt}"}
+
+
+@pytest.fixture(autouse=True)
+def _reset_registry():
+    image_gen_registry._reset_for_tests()
+    yield
+    image_gen_registry._reset_for_tests()
+
+
+class TestRegisterProvider:
+    def test_register_and_lookup(self):
+        provider = _FakeProvider("fake")
+        image_gen_registry.register_provider(provider)
+        assert image_gen_registry.get_provider("fake") is provider
+
+    def test_rejects_non_provider(self):
+        with pytest.raises(TypeError):
+            image_gen_registry.register_provider("not a provider")  # type: ignore[arg-type]
+
+    def test_rejects_empty_name(self):
+        class Empty(ImageGenProvider):
+            @property
+            def name(self) -> str:
+                return ""
+
+            def generate(self, prompt, aspect_ratio="landscape", **kw):
+                return {}
+
+        with pytest.raises(ValueError):
+            image_gen_registry.register_provider(Empty())
+
+    def test_reregister_overwrites(self):
+        a = _FakeProvider("same")
+        b = _FakeProvider("same")
+        image_gen_registry.register_provider(a)
+        image_gen_registry.register_provider(b)
+        assert image_gen_registry.get_provider("same") is b
+
+    def test_list_is_sorted(self):
+        image_gen_registry.register_provider(_FakeProvider("zeta"))
+        image_gen_registry.register_provider(_FakeProvider("alpha"))
+        names = [p.name for p in image_gen_registry.list_providers()]
+        assert names == ["alpha", "zeta"]
+
+
+class TestGetActiveProvider:
+    def test_single_provider_autoresolves(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        image_gen_registry.register_provider(_FakeProvider("solo"))
+        active = image_gen_registry.get_active_provider()
+        assert active is not None and active.name == "solo"
+
+    def test_fal_preferred_on_multi_without_config(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        image_gen_registry.register_provider(_FakeProvider("openai"))
+        active = image_gen_registry.get_active_provider()
+        assert active is not None and active.name == "fal"
+
+    def test_explicit_config_wins(self, tmp_path, monkeypatch):
+        import yaml
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"provider": "openai"}})
+        )
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        image_gen_registry.register_provider(_FakeProvider("openai"))
+        active = image_gen_registry.get_active_provider()
+        assert active is not None and active.name == "openai"
+
+    def test_missing_configured_provider_falls_back(self, tmp_path, monkeypatch):
+        import yaml
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"provider": "replicate"}})
+        )
+        # Only FAL is registered — configured provider doesn't exist
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        active = image_gen_registry.get_active_provider()
+        # Falls back to FAL preference (legacy default) rather than None
+        assert active is not None and active.name == "fal"
+
+    def test_none_when_empty(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        assert image_gen_registry.get_active_provider() is None
diff --git a/tests/agent/test_kimi_coding_anthropic_thinking.py b/tests/agent/test_kimi_coding_anthropic_thinking.py
new file mode 100644
index 0000000000..706f7e0e16
--- /dev/null
+++ b/tests/agent/test_kimi_coding_anthropic_thinking.py
@@ -0,0 +1,115 @@
+"""Regression guard: don't send Anthropic ``thinking`` to Kimi's /coding endpoint.
+
+Kimi's ``api.kimi.com/coding`` endpoint speaks the Anthropic Messages protocol
+but has its own thinking semantics.  When ``thinking.enabled`` is present in
+the request, Kimi validates the message history and requires every prior
+assistant tool-call message to carry OpenAI-style ``reasoning_content``.
+
+The Anthropic path never populates that field, and
+``convert_messages_to_anthropic`` strips Anthropic thinking blocks on
+third-party endpoints — so after one turn with tool calls the next request
+fails with HTTP 400::
+
+    thinking is enabled but reasoning_content is missing in assistant
+    tool call message at index N
+
+Kimi on the chat_completions route handles ``thinking`` via ``extra_body`` in
+``ChatCompletionsTransport`` (#13503).  On the Anthropic route the right
+thing to do is drop the parameter entirely and let Kimi drive reasoning
+server-side.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+class TestKimiCodingSkipsAnthropicThinking:
+    """build_anthropic_kwargs must not inject ``thinking`` for Kimi /coding."""
+
+    @pytest.mark.parametrize(
+        "base_url",
+        [
+            "https://api.kimi.com/coding",
+            "https://api.kimi.com/coding/v1",
+            "https://api.kimi.com/coding/anthropic",
+            "https://api.kimi.com/coding/",
+        ],
+    )
+    def test_kimi_coding_endpoint_omits_thinking(self, base_url: str) -> None:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url=base_url,
+        )
+        assert "thinking" not in kwargs, (
+            "Anthropic thinking must not be sent to Kimi /coding — "
+            "endpoint requires reasoning_content on history we don't preserve."
+        )
+        assert "output_config" not in kwargs
+
+    def test_kimi_coding_with_explicit_disabled_also_omits(self) -> None:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": False},
+            base_url="https://api.kimi.com/coding",
+        )
+        assert "thinking" not in kwargs
+
+    def test_non_kimi_third_party_still_gets_thinking(self) -> None:
+        """MiniMax and other third-party Anthropic endpoints must retain thinking."""
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="MiniMax-M2.7",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url="https://api.minimax.io/anthropic",
+        )
+        assert "thinking" in kwargs
+        assert kwargs["thinking"]["type"] == "enabled"
+
+    def test_native_anthropic_still_gets_thinking(self) -> None:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="claude-sonnet-4-20250514",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url=None,
+        )
+        assert "thinking" in kwargs
+
+    def test_kimi_root_endpoint_unaffected(self) -> None:
+        """Only the /coding route is special-cased — plain api.kimi.com is not.
+
+        ``api.kimi.com`` without ``/coding`` uses the chat_completions transport
+        (see runtime_provider._detect_api_mode_for_url); build_anthropic_kwargs
+        should never see it, but if it somehow does we should not suppress
+        thinking there — that path has different semantics.
+        """
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url="https://api.kimi.com/v1",
+        )
+        assert "thinking" in kwargs
diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py
index 0962060313..11712b9519 100644
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@@ -789,6 +789,24 @@ class TestPromptBuilderConstants:
         assert "cron" in PLATFORM_HINTS
         assert "cli" in PLATFORM_HINTS
 
+    def test_cli_hint_does_not_suggest_media_tags(self):
+        # Regression: MEDIA:/path tags are intercepted only by messaging
+        # gateway platforms. On the CLI they render as literal text and
+        # confuse users. The CLI hint must steer the agent away from them.
+        cli_hint = PLATFORM_HINTS["cli"]
+        assert "MEDIA:" in cli_hint, (
+            "CLI hint should mention MEDIA: in order to tell the agent "
+            "NOT to use it (negative guidance)."
+        )
+        # Must contain explicit "don't" language near the MEDIA reference.
+        assert any(
+            marker in cli_hint.lower()
+            for marker in ("do not emit media", "not intercepted", "do not", "don't")
+        ), "CLI hint should explicitly discourage MEDIA: tags."
+        # Messaging hints should still advertise MEDIA: positively (sanity
+        # check that this test is calibrated correctly).
+        assert "include MEDIA:" in PLATFORM_HINTS["telegram"]
+
 
 # =========================================================================
 # Environment hints
diff --git a/tests/agent/test_subagent_progress.py b/tests/agent/test_subagent_progress.py
index 88b2e37902..953f26a69e 100644
--- a/tests/agent/test_subagent_progress.py
+++ b/tests/agent/test_subagent_progress.py
@@ -193,7 +193,7 @@ class TestBuildChildProgressCallback:
         
         # task_index=0 in a batch of 3 → prefix "[1]"
         cb0 = _build_child_progress_callback(0, "test goal", parent, task_count=3)
-        cb0("web_search", "test")
+        cb0("tool.started", "web_search", "test", {})
         output = buf.getvalue()
         assert "[1]" in output
 
@@ -201,7 +201,7 @@ class TestBuildChildProgressCallback:
         buf.truncate(0)
         buf.seek(0)
         cb2 = _build_child_progress_callback(2, "test goal", parent, task_count=3)
-        cb2("web_search", "test")
+        cb2("tool.started", "web_search", "test", {})
         output = buf.getvalue()
         assert "[3]" in output
 
diff --git a/tests/agent/transports/test_bedrock_transport.py b/tests/agent/transports/test_bedrock_transport.py
new file mode 100644
index 0000000000..f9d78a31ce
--- /dev/null
+++ b/tests/agent/transports/test_bedrock_transport.py
@@ -0,0 +1,164 @@
+"""Tests for the BedrockTransport."""
+
+import json
+import pytest
+from types import SimpleNamespace
+
+from agent.transports import get_transport
+from agent.transports.types import NormalizedResponse, ToolCall
+
+
+@pytest.fixture
+def transport():
+    import agent.transports.bedrock  # noqa: F401
+    return get_transport("bedrock_converse")
+
+
+class TestBedrockBasic:
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "bedrock_converse"
+
+    def test_registered(self, transport):
+        assert transport is not None
+
+
+class TestBedrockBuildKwargs:
+
+    def test_basic_kwargs(self, transport):
+        msgs = [{"role": "user", "content": "Hello"}]
+        kw = transport.build_kwargs(model="anthropic.claude-3-5-sonnet-20241022-v2:0", messages=msgs)
+        assert kw["modelId"] == "anthropic.claude-3-5-sonnet-20241022-v2:0"
+        assert kw["__bedrock_converse__"] is True
+        assert kw["__bedrock_region__"] == "us-east-1"
+        assert "messages" in kw
+
+    def test_custom_region(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="anthropic.claude-3-5-sonnet-20241022-v2:0",
+            messages=msgs,
+            region="eu-west-1",
+        )
+        assert kw["__bedrock_region__"] == "eu-west-1"
+
+    def test_max_tokens(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="anthropic.claude-3-5-sonnet-20241022-v2:0",
+            messages=msgs,
+            max_tokens=8192,
+        )
+        assert kw["inferenceConfig"]["maxTokens"] == 8192
+
+
+class TestBedrockConvertTools:
+
+    def test_convert_tools(self, transport):
+        tools = [{
+            "type": "function",
+            "function": {
+                "name": "terminal",
+                "description": "Run commands",
+                "parameters": {"type": "object", "properties": {"command": {"type": "string"}}},
+            }
+        }]
+        result = transport.convert_tools(tools)
+        assert len(result) == 1
+        assert result[0]["toolSpec"]["name"] == "terminal"
+
+
+class TestBedrockValidate:
+
+    def test_none(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_raw_dict_valid(self, transport):
+        assert transport.validate_response({"output": {"message": {}}}) is True
+
+    def test_raw_dict_invalid(self, transport):
+        assert transport.validate_response({"error": "fail"}) is False
+
+    def test_normalized_valid(self, transport):
+        r = SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content="hi"))])
+        assert transport.validate_response(r) is True
+
+
+class TestBedrockMapFinishReason:
+
+    def test_end_turn(self, transport):
+        assert transport.map_finish_reason("end_turn") == "stop"
+
+    def test_tool_use(self, transport):
+        assert transport.map_finish_reason("tool_use") == "tool_calls"
+
+    def test_max_tokens(self, transport):
+        assert transport.map_finish_reason("max_tokens") == "length"
+
+    def test_guardrail(self, transport):
+        assert transport.map_finish_reason("guardrail_intervened") == "content_filter"
+
+    def test_unknown(self, transport):
+        assert transport.map_finish_reason("unknown") == "stop"
+
+
+class TestBedrockNormalize:
+
+    def _make_bedrock_response(self, text="Hello", tool_calls=None, stop_reason="end_turn"):
+        """Build a raw Bedrock converse response dict."""
+        content = []
+        if text:
+            content.append({"text": text})
+        if tool_calls:
+            for tc in tool_calls:
+                content.append({
+                    "toolUse": {
+                        "toolUseId": tc["id"],
+                        "name": tc["name"],
+                        "input": tc["input"],
+                    }
+                })
+        return {
+            "output": {"message": {"role": "assistant", "content": content}},
+            "stopReason": stop_reason,
+            "usage": {"inputTokens": 10, "outputTokens": 5, "totalTokens": 15},
+        }
+
+    def test_text_response(self, transport):
+        raw = self._make_bedrock_response(text="Hello world")
+        nr = transport.normalize_response(raw)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello world"
+        assert nr.finish_reason == "stop"
+
+    def test_tool_call_response(self, transport):
+        raw = self._make_bedrock_response(
+            text=None,
+            tool_calls=[{"id": "tool_1", "name": "terminal", "input": {"command": "ls"}}],
+            stop_reason="tool_use",
+        )
+        nr = transport.normalize_response(raw)
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        assert nr.tool_calls[0].name == "terminal"
+
+    def test_already_normalized_response(self, transport):
+        """Test normalize_response handles already-normalized SimpleNamespace (from dispatch site)."""
+        pre_normalized = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(
+                    content="Hello from Bedrock",
+                    tool_calls=None,
+                    reasoning=None,
+                    reasoning_content=None,
+                ),
+                finish_reason="stop",
+            )],
+            usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15),
+        )
+        nr = transport.normalize_response(pre_normalized)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello from Bedrock"
+        assert nr.finish_reason == "stop"
+        assert nr.usage is not None
+        assert nr.usage.prompt_tokens == 10
diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py
new file mode 100644
index 0000000000..b44eafd453
--- /dev/null
+++ b/tests/agent/transports/test_chat_completions.py
@@ -0,0 +1,349 @@
+"""Tests for the ChatCompletionsTransport."""
+
+import pytest
+from types import SimpleNamespace
+
+from agent.transports import get_transport
+from agent.transports.types import NormalizedResponse, ToolCall
+
+
+@pytest.fixture
+def transport():
+    import agent.transports.chat_completions  # noqa: F401
+    return get_transport("chat_completions")
+
+
+class TestChatCompletionsBasic:
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "chat_completions"
+
+    def test_registered(self, transport):
+        assert transport is not None
+
+    def test_convert_tools_identity(self, transport):
+        tools = [{"type": "function", "function": {"name": "test", "parameters": {}}}]
+        assert transport.convert_tools(tools) is tools
+
+    def test_convert_messages_no_codex_leaks(self, transport):
+        msgs = [{"role": "user", "content": "hi"}]
+        result = transport.convert_messages(msgs)
+        assert result is msgs  # no copy needed
+
+    def test_convert_messages_strips_codex_fields(self, transport):
+        msgs = [
+            {"role": "assistant", "content": "ok", "codex_reasoning_items": [{"id": "rs_1"}],
+             "tool_calls": [{"id": "call_1", "call_id": "call_1", "response_item_id": "fc_1",
+                            "type": "function", "function": {"name": "t", "arguments": "{}"}}]},
+        ]
+        result = transport.convert_messages(msgs)
+        assert "codex_reasoning_items" not in result[0]
+        assert "call_id" not in result[0]["tool_calls"][0]
+        assert "response_item_id" not in result[0]["tool_calls"][0]
+        # Original list untouched (deepcopy-on-demand)
+        assert "codex_reasoning_items" in msgs[0]
+
+
+class TestChatCompletionsBuildKwargs:
+
+    def test_basic_kwargs(self, transport):
+        msgs = [{"role": "user", "content": "Hello"}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, timeout=30.0)
+        assert kw["model"] == "gpt-4o"
+        assert kw["messages"][0]["content"] == "Hello"
+        assert kw["timeout"] == 30.0
+
+    def test_developer_role_swap(self, transport):
+        msgs = [{"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-5.4", messages=msgs, model_lower="gpt-5.4")
+        assert kw["messages"][0]["role"] == "developer"
+
+    def test_no_developer_swap_for_non_gpt5(self, transport):
+        msgs = [{"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="claude-sonnet-4", messages=msgs, model_lower="claude-sonnet-4")
+        assert kw["messages"][0]["role"] == "system"
+
+    def test_tools_included(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        tools = [{"type": "function", "function": {"name": "test", "parameters": {}}}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, tools=tools)
+        assert kw["tools"] == tools
+
+    def test_openrouter_provider_prefs(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            is_openrouter=True,
+            provider_preferences={"only": ["openai"]},
+        )
+        assert kw["extra_body"]["provider"] == {"only": ["openai"]}
+
+    def test_nous_tags(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, is_nous=True)
+        assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
+
+    def test_reasoning_default(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            supports_reasoning=True,
+        )
+        assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"}
+
+    def test_nous_omits_disabled_reasoning(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            supports_reasoning=True,
+            is_nous=True,
+            reasoning_config={"enabled": False},
+        )
+        # Nous rejects enabled=false; reasoning omitted entirely
+        assert "reasoning" not in kw.get("extra_body", {})
+
+    def test_ollama_num_ctx(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="llama3", messages=msgs,
+            ollama_num_ctx=32768,
+        )
+        assert kw["extra_body"]["options"]["num_ctx"] == 32768
+
+    def test_custom_think_false(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="qwen3", messages=msgs,
+            is_custom_provider=True,
+            reasoning_config={"effort": "none"},
+        )
+        assert kw["extra_body"]["think"] is False
+
+    def test_max_tokens_with_fn(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            max_tokens=4096,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["max_tokens"] == 4096
+
+    def test_ephemeral_overrides_max_tokens(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            max_tokens=4096,
+            ephemeral_max_output_tokens=2048,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["max_tokens"] == 2048
+
+    def test_nvidia_default_max_tokens(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="glm-4.7", messages=msgs,
+            is_nvidia_nim=True,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # NVIDIA default: 16384
+        assert kw["max_tokens"] == 16384
+
+    def test_qwen_default_max_tokens(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="qwen3-coder-plus", messages=msgs,
+            is_qwen_portal=True,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # Qwen default: 65536
+        assert kw["max_tokens"] == 65536
+
+    def test_anthropic_max_output_for_claude_on_aggregator(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="anthropic/claude-sonnet-4.6", messages=msgs,
+            is_openrouter=True,
+            anthropic_max_output=64000,
+        )
+        # Set as plain max_tokens (not via fn) because the aggregator proxies to
+        # Anthropic Messages API which requires the field.
+        assert kw["max_tokens"] == 64000
+
+    def test_request_overrides_last(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            request_overrides={"service_tier": "priority"},
+        )
+        assert kw["service_tier"] == "priority"
+
+    def test_fixed_temperature(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, fixed_temperature=0.6)
+        assert kw["temperature"] == 0.6
+
+    def test_omit_temperature(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, omit_temperature=True, fixed_temperature=0.5)
+        # omit wins
+        assert "temperature" not in kw
+
+
+class TestChatCompletionsKimi:
+    """Regression tests for the Kimi/Moonshot quirks migrated into the transport."""
+
+    def test_kimi_max_tokens_default(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # Kimi CLI default: 32000
+        assert kw["max_tokens"] == 32000
+
+    def test_kimi_reasoning_effort_top_level(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            reasoning_config={"effort": "high"},
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # Kimi requires reasoning_effort as a top-level parameter
+        assert kw["reasoning_effort"] == "high"
+
+    def test_kimi_reasoning_effort_omitted_when_thinking_disabled(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            reasoning_config={"enabled": False},
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # Mirror Kimi CLI: omit reasoning_effort entirely when thinking off
+        assert "reasoning_effort" not in kw
+
+    def test_kimi_thinking_enabled_extra_body(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["extra_body"]["thinking"] == {"type": "enabled"}
+
+    def test_kimi_thinking_disabled_extra_body(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            reasoning_config={"enabled": False},
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["extra_body"]["thinking"] == {"type": "disabled"}
+
+
+class TestChatCompletionsValidate:
+
+    def test_none(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_no_choices(self, transport):
+        r = SimpleNamespace(choices=None)
+        assert transport.validate_response(r) is False
+
+    def test_empty_choices(self, transport):
+        r = SimpleNamespace(choices=[])
+        assert transport.validate_response(r) is False
+
+    def test_valid(self, transport):
+        r = SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content="hi"))])
+        assert transport.validate_response(r) is True
+
+
+class TestChatCompletionsNormalize:
+
+    def test_text_response(self, transport):
+        r = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(content="Hello", tool_calls=None, reasoning_content=None),
+                finish_reason="stop",
+            )],
+            usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15),
+        )
+        nr = transport.normalize_response(r)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello"
+        assert nr.finish_reason == "stop"
+        assert nr.tool_calls is None
+
+    def test_tool_call_response(self, transport):
+        tc = SimpleNamespace(
+            id="call_123",
+            function=SimpleNamespace(name="terminal", arguments='{"command": "ls"}'),
+        )
+        r = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(content=None, tool_calls=[tc], reasoning_content=None),
+                finish_reason="tool_calls",
+            )],
+            usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20, total_tokens=30),
+        )
+        nr = transport.normalize_response(r)
+        assert len(nr.tool_calls) == 1
+        assert nr.tool_calls[0].name == "terminal"
+        assert nr.tool_calls[0].id == "call_123"
+
+    def test_tool_call_extra_content_preserved(self, transport):
+        """Gemini 3 thinking models attach extra_content with thought_signature
+        on tool_calls.  Without this replay on the next turn, the API rejects
+        the request with 400.  The transport MUST surface extra_content so the
+        agent loop can write it back into the assistant message."""
+        tc = SimpleNamespace(
+            id="call_gem",
+            function=SimpleNamespace(name="terminal", arguments='{"command": "ls"}'),
+            extra_content={"google": {"thought_signature": "SIG_ABC123"}},
+        )
+        r = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(content=None, tool_calls=[tc], reasoning_content=None),
+                finish_reason="tool_calls",
+            )],
+            usage=None,
+        )
+        nr = transport.normalize_response(r)
+        assert nr.tool_calls[0].provider_data == {
+            "extra_content": {"google": {"thought_signature": "SIG_ABC123"}}
+        }
+
+    def test_reasoning_content_preserved_separately(self, transport):
+        """DeepSeek/Moonshot use reasoning_content distinct from reasoning.
+        Don't merge them — the thinking-prefill retry check reads each field
+        separately."""
+        r = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(
+                    content=None, tool_calls=None,
+                    reasoning="summary text",
+                    reasoning_content="detailed scratchpad",
+                ),
+                finish_reason="stop",
+            )],
+            usage=None,
+        )
+        nr = transport.normalize_response(r)
+        assert nr.reasoning == "summary text"
+        assert nr.provider_data == {"reasoning_content": "detailed scratchpad"}
+
+
+class TestChatCompletionsCacheStats:
+
+    def test_no_usage(self, transport):
+        r = SimpleNamespace(usage=None)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_no_details(self, transport):
+        r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=None))
+        assert transport.extract_cache_stats(r) is None
+
+    def test_with_cache(self, transport):
+        details = SimpleNamespace(cached_tokens=500, cache_write_tokens=100)
+        r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=details))
+        result = transport.extract_cache_stats(r)
+        assert result == {"cached_tokens": 500, "creation_tokens": 100}
diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py
new file mode 100644
index 0000000000..f97c913af2
--- /dev/null
+++ b/tests/agent/transports/test_codex_transport.py
@@ -0,0 +1,220 @@
+"""Tests for the ResponsesApiTransport (Codex)."""
+
+import json
+import pytest
+from types import SimpleNamespace
+
+from agent.transports import get_transport
+from agent.transports.types import NormalizedResponse, ToolCall
+
+
+@pytest.fixture
+def transport():
+    import agent.transports.codex  # noqa: F401
+    return get_transport("codex_responses")
+
+
+class TestCodexTransportBasic:
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "codex_responses"
+
+    def test_registered_on_import(self, transport):
+        assert transport is not None
+
+    def test_convert_tools(self, transport):
+        tools = [{
+            "type": "function",
+            "function": {
+                "name": "terminal",
+                "description": "Run a command",
+                "parameters": {"type": "object", "properties": {"command": {"type": "string"}}},
+            }
+        }]
+        result = transport.convert_tools(tools)
+        assert len(result) == 1
+        assert result[0]["type"] == "function"
+        assert result[0]["name"] == "terminal"
+
+
+class TestCodexBuildKwargs:
+
+    def test_basic_kwargs(self, transport):
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "Hello"},
+        ]
+        kw = transport.build_kwargs(
+            model="gpt-5.4",
+            messages=messages,
+            tools=[],
+        )
+        assert kw["model"] == "gpt-5.4"
+        assert kw["instructions"] == "You are helpful."
+        assert "input" in kw
+        assert kw["store"] is False
+
+    def test_system_extracted_from_messages(self, transport):
+        messages = [
+            {"role": "system", "content": "Custom system prompt"},
+            {"role": "user", "content": "Hi"},
+        ]
+        kw = transport.build_kwargs(model="gpt-5.4", messages=messages, tools=[])
+        assert kw["instructions"] == "Custom system prompt"
+
+    def test_no_system_uses_default(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-5.4", messages=messages, tools=[])
+        assert kw["instructions"]  # should be non-empty default
+
+    def test_reasoning_config(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            reasoning_config={"effort": "high"},
+        )
+        assert kw.get("reasoning", {}).get("effort") == "high"
+
+    def test_reasoning_disabled(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            reasoning_config={"enabled": False},
+        )
+        assert "reasoning" not in kw or kw.get("include") == []
+
+    def test_session_id_sets_cache_key(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            session_id="test-session-123",
+        )
+        assert kw.get("prompt_cache_key") == "test-session-123"
+
+    def test_github_responses_no_cache_key(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            session_id="test-session",
+            is_github_responses=True,
+        )
+        assert "prompt_cache_key" not in kw
+
+    def test_max_tokens(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            max_tokens=4096,
+        )
+        assert kw.get("max_output_tokens") == 4096
+
+    def test_codex_backend_no_max_output_tokens(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            max_tokens=4096,
+            is_codex_backend=True,
+        )
+        assert "max_output_tokens" not in kw
+
+    def test_xai_headers(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="grok-3", messages=messages, tools=[],
+            session_id="conv-123",
+            is_xai_responses=True,
+        )
+        assert kw.get("extra_headers", {}).get("x-grok-conv-id") == "conv-123"
+
+    def test_minimal_effort_clamped(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            reasoning_config={"effort": "minimal"},
+        )
+        # "minimal" should be clamped to "low"
+        assert kw.get("reasoning", {}).get("effort") == "low"
+
+
+class TestCodexValidateResponse:
+
+    def test_none_response(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_empty_output(self, transport):
+        r = SimpleNamespace(output=[], output_text=None)
+        assert transport.validate_response(r) is False
+
+    def test_valid_output(self, transport):
+        r = SimpleNamespace(output=[{"type": "message", "content": []}])
+        assert transport.validate_response(r) is True
+
+    def test_output_text_fallback_not_valid(self, transport):
+        """validate_response is strict — output_text doesn't make it valid.
+        The caller handles output_text fallback with diagnostic logging."""
+        r = SimpleNamespace(output=None, output_text="Some text")
+        assert transport.validate_response(r) is False
+
+
+class TestCodexMapFinishReason:
+
+    def test_completed(self, transport):
+        assert transport.map_finish_reason("completed") == "stop"
+
+    def test_incomplete(self, transport):
+        assert transport.map_finish_reason("incomplete") == "length"
+
+    def test_failed(self, transport):
+        assert transport.map_finish_reason("failed") == "stop"
+
+    def test_unknown(self, transport):
+        assert transport.map_finish_reason("unknown_status") == "stop"
+
+
+class TestCodexNormalizeResponse:
+
+    def test_text_response(self, transport):
+        """Normalize a simple text Codex response."""
+        r = SimpleNamespace(
+            output=[
+                SimpleNamespace(
+                    type="message",
+                    role="assistant",
+                    content=[SimpleNamespace(type="output_text", text="Hello world")],
+                    status="completed",
+                ),
+            ],
+            status="completed",
+            incomplete_details=None,
+            usage=SimpleNamespace(input_tokens=10, output_tokens=5,
+                                  input_tokens_details=None, output_tokens_details=None),
+        )
+        nr = transport.normalize_response(r)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello world"
+        assert nr.finish_reason == "stop"
+
+    def test_tool_call_response(self, transport):
+        """Normalize a Codex response with tool calls."""
+        r = SimpleNamespace(
+            output=[
+                SimpleNamespace(
+                    type="function_call",
+                    call_id="call_abc123",
+                    name="terminal",
+                    arguments=json.dumps({"command": "ls"}),
+                    id="fc_abc123",
+                    status="completed",
+                ),
+            ],
+            status="completed",
+            incomplete_details=None,
+            usage=SimpleNamespace(input_tokens=10, output_tokens=20,
+                                  input_tokens_details=None, output_tokens_details=None),
+        )
+        nr = transport.normalize_response(r)
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        tc = nr.tool_calls[0]
+        assert tc.name == "terminal"
+        assert '"command"' in tc.arguments
diff --git a/tests/cli/test_cli_approval_ui.py b/tests/cli/test_cli_approval_ui.py
index 205e316083..5be1c0ca04 100644
--- a/tests/cli/test_cli_approval_ui.py
+++ b/tests/cli/test_cli_approval_ui.py
@@ -254,3 +254,88 @@ class TestCliApprovalUi:
 
         # Command got truncated with a marker.
         assert "(command truncated" in rendered
+
+
+class TestApprovalCallbackThreadLocalWiring:
+    """Regression guard for the thread-local callback freeze (#13617 / #13618).
+
+    After 62348cff made _approval_callback / _sudo_password_callback thread-local
+    (ACP GHSA-qg5c-hvr5-hjgr), the CLI agent thread could no longer see callbacks
+    registered in the main thread — the dangerous-command prompt silently fell
+    back to stdin input() and deadlocked against prompt_toolkit. The fix is to
+    register the callbacks INSIDE the agent worker thread (matching the ACP
+    pattern). These tests lock in that invariant.
+    """
+
+    def test_main_thread_registration_is_invisible_to_child_thread(self):
+        """Confirms the underlying threading.local semantics that drove the bug.
+
+        If this ever starts passing as "visible", the thread-local isolation
+        is gone and the ACP race GHSA-qg5c-hvr5-hjgr may be back.
+        """
+        from tools.terminal_tool import (
+            set_approval_callback,
+            _get_approval_callback,
+        )
+
+        def main_cb(_cmd, _desc):
+            return "once"
+
+        set_approval_callback(main_cb)
+        try:
+            seen = {}
+
+            def _child():
+                seen["value"] = _get_approval_callback()
+
+            t = threading.Thread(target=_child, daemon=True)
+            t.start()
+            t.join(timeout=2)
+            assert seen["value"] is None
+        finally:
+            set_approval_callback(None)
+
+    def test_child_thread_registration_is_visible_and_cleared_in_finally(self):
+        """The fix pattern: register INSIDE the worker thread, clear in finally.
+
+        This is exactly what cli.py's run_agent() closure does. If this test
+        fails, the CLI approval prompt freeze (#13617) has regressed.
+        """
+        from tools.terminal_tool import (
+            set_approval_callback,
+            set_sudo_password_callback,
+            _get_approval_callback,
+            _get_sudo_password_callback,
+        )
+
+        def approval_cb(_cmd, _desc):
+            return "once"
+
+        def sudo_cb():
+            return "hunter2"
+
+        seen = {}
+
+        def _worker():
+            # Mimic cli.py's run_agent() thread target.
+            set_approval_callback(approval_cb)
+            set_sudo_password_callback(sudo_cb)
+            try:
+                seen["approval"] = _get_approval_callback()
+                seen["sudo"] = _get_sudo_password_callback()
+            finally:
+                set_approval_callback(None)
+                set_sudo_password_callback(None)
+                seen["approval_after"] = _get_approval_callback()
+                seen["sudo_after"] = _get_sudo_password_callback()
+
+        t = threading.Thread(target=_worker, daemon=True)
+        t.start()
+        t.join(timeout=2)
+
+        assert seen["approval"] is approval_cb
+        assert seen["sudo"] is sudo_cb
+        # Finally block must clear both slots — otherwise a reused thread
+        # would hold a stale reference to a disposed CLI instance.
+        assert seen["approval_after"] is None
+        assert seen["sudo_after"] is None
diff --git a/tests/cli/test_cli_file_drop.py b/tests/cli/test_cli_file_drop.py
index 78503de8d7..fa6aac1ed1 100644
--- a/tests/cli/test_cli_file_drop.py
+++ b/tests/cli/test_cli_file_drop.py
@@ -147,6 +147,37 @@ class TestEscapedSpaces:
         assert result["path"] == tmp_image_with_spaces
         assert result["remainder"] == "what is this?"
 
+    def test_unquoted_spaces_in_path(self, tmp_image_with_spaces):
+        result = _detect_file_drop(str(tmp_image_with_spaces))
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["is_image"] is True
+        assert result["remainder"] == ""
+
+    def test_unquoted_spaces_with_trailing_text(self, tmp_image_with_spaces):
+        user_input = f"{tmp_image_with_spaces} what is this?"
+        result = _detect_file_drop(user_input)
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["remainder"] == "what is this?"
+
+    def test_mixed_escaped_and_literal_spaces_in_path(self, tmp_path):
+        img = tmp_path / "Screenshot 2026-04-21 at 1.04.43 PM.png"
+        img.write_bytes(b"\x89PNG\r\n\x1a\n")
+        mixed = str(img).replace("Screenshot ", "Screenshot\\ ").replace("2026-04-21 ", "2026-04-21\\ ").replace("at ", "at\\ ")
+        result = _detect_file_drop(mixed)
+        assert result is not None
+        assert result["path"] == img
+        assert result["is_image"] is True
+        assert result["remainder"] == ""
+
+    def test_file_uri_image_path(self, tmp_image_with_spaces):
+        uri = tmp_image_with_spaces.as_uri()
+        result = _detect_file_drop(uri)
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["is_image"] is True
+
     def test_tilde_prefixed_path(self, tmp_path, monkeypatch):
         home = tmp_path / "home"
         img = home / "storage" / "shared" / "Pictures" / "cat.png"
diff --git a/tests/cli/test_cli_markdown_rendering.py b/tests/cli/test_cli_markdown_rendering.py
index 97ed1c7518..01f0bab6c6 100644
--- a/tests/cli/test_cli_markdown_rendering.py
+++ b/tests/cli/test_cli_markdown_rendering.py
@@ -115,3 +115,27 @@ def test_final_assistant_content_can_leave_markdown_raw():
 
     output = _render_to_text(renderable)
     assert "***Bold italic***" in output
+
+
+def test_strip_mode_preserves_intraword_underscores_in_snake_case_identifiers():
+    renderable = _render_final_assistant_content(
+        "Let me look at test_case_with_underscores and SOME_CONST "
+        "then /tmp/snake_case_dir/file_with_name.py",
+        mode="strip",
+    )
+
+    output = _render_to_text(renderable)
+    assert "test_case_with_underscores" in output
+    assert "SOME_CONST" in output
+    assert "snake_case_dir" in output
+    assert "file_with_name" in output
+
+
+def test_strip_mode_still_strips_boundary_underscore_emphasis():
+    renderable = _render_final_assistant_content(
+        "say _hi_ and __bold__ now",
+        mode="strip",
+    )
+
+    output = _render_to_text(renderable)
+    assert "say hi and bold now" in output
diff --git a/tests/gateway/test_complete_path_at_filter.py b/tests/gateway/test_complete_path_at_filter.py
new file mode 100644
index 0000000000..9e5031c0d9
--- /dev/null
+++ b/tests/gateway/test_complete_path_at_filter.py
@@ -0,0 +1,91 @@
+"""Regression tests for the TUI gateway's `complete.path` handler.
+
+Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder`
+with no colon yet) still surfaced files alongside directories in the
+TUI composer, because the gateway-side completion lives in
+`tui_gateway/server.py` and was never touched by the earlier fix to
+`hermes_cli/commands.py`.
+
+Covers:
+  - `@folder:` only yields directories
+  - `@file:` only yields regular files
+  - Bare `@folder` / `@file` (no colon) lists cwd directly
+  - Explicit prefix is preserved in the completion text
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from tui_gateway import server
+
+
+def _fixture(tmp_path: Path):
+    (tmp_path / "readme.md").write_text("x")
+    (tmp_path / ".env").write_text("x")
+    (tmp_path / "src").mkdir()
+    (tmp_path / "docs").mkdir()
+
+
+def _items(word: str):
+    resp = server.handle_request({"id": "1", "method": "complete.path", "params": {"word": word}})
+
+    return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]]
+
+
+def test_at_folder_colon_only_dirs(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@folder:")]
+
+    assert all(t.startswith("@folder:") for t in texts), texts
+    assert any(t == "@folder:src/" for t in texts)
+    assert any(t == "@folder:docs/" for t in texts)
+    assert not any(t == "@folder:readme.md" for t in texts)
+    assert not any(t == "@folder:.env" for t in texts)
+
+
+def test_at_file_colon_only_files(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@file:")]
+
+    assert all(t.startswith("@file:") for t in texts), texts
+    assert any(t == "@file:readme.md" for t in texts)
+    assert not any(t == "@file:src/" for t in texts)
+    assert not any(t == "@file:docs/" for t in texts)
+
+
+def test_at_folder_bare_without_colon_lists_dirs(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@folder")]
+
+    assert any(t == "@folder:src/" for t in texts), texts
+    assert any(t == "@folder:docs/" for t in texts), texts
+    assert not any(t == "@folder:readme.md" for t in texts)
+
+
+def test_at_file_bare_without_colon_lists_files(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@file")]
+
+    assert any(t == "@file:readme.md" for t in texts), texts
+    assert not any(t == "@file:src/" for t in texts)
+
+
+def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch):
+    """`@` alone should list the static references so users discover the
+    available prefixes.  (Unchanged behaviour; regression guard.)
+    """
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _, _ in _items("@")]
+
+    for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"):
+        assert expected in texts, f"missing static ref {expected!r} in {texts!r}"
diff --git a/tests/gateway/test_reply_to_injection.py b/tests/gateway/test_reply_to_injection.py
new file mode 100644
index 0000000000..f75ec6d68f
--- /dev/null
+++ b/tests/gateway/test_reply_to_injection.py
@@ -0,0 +1,159 @@
+"""Tests for reply-to pointer injection in _prepare_inbound_message_text.
+
+The `[Replying to: "..."]` prefix is a *disambiguation pointer*, not
+deduplication. It must always be injected when the user explicitly replies
+to a prior message — even when the quoted text already exists somewhere
+in the conversation history. History can contain the same or similar text
+multiple times, and without an explicit pointer the agent has to guess
+which prior message the user is referencing.
+"""
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.run import GatewayRunner
+from gateway.session import SessionSource
+
+
+def _make_runner() -> GatewayRunner:
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake")},
+    )
+    runner.adapters = {}
+    runner._model = "openai/gpt-4.1-mini"
+    runner._base_url = None
+    return runner
+
+
+def _source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="123",
+        chat_name="DM",
+        chat_type="private",
+        user_name="Alice",
+    )
+
+
+@pytest.mark.asyncio
+async def test_reply_prefix_injected_when_text_absent_from_history():
+    runner = _make_runner()
+    source = _source()
+    event = MessageEvent(
+        text="What's the best time to go?",
+        source=source,
+        reply_to_message_id="42",
+        reply_to_text="Japan is great for culture, food, and efficiency.",
+    )
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[{"role": "user", "content": "unrelated"}],
+    )
+
+    assert result is not None
+    assert result.startswith(
+        '[Replying to: "Japan is great for culture, food, and efficiency."]'
+    )
+    assert result.endswith("What's the best time to go?")
+
+
+@pytest.mark.asyncio
+async def test_reply_prefix_still_injected_when_text_in_history():
+    """Regression test: the pointer must survive even when the quoted text
+    already appears in history. Previously a `found_in_history` guard
+    silently dropped the prefix, leaving the agent to guess which prior
+    message the user was referencing."""
+    runner = _make_runner()
+    source = _source()
+    quoted = "Japan is great for culture, food, and efficiency."
+    event = MessageEvent(
+        text="What's the best time to go?",
+        source=source,
+        reply_to_message_id="42",
+        reply_to_text=quoted,
+    )
+
+    history = [
+        {"role": "user", "content": "I'm thinking of going to Japan or Italy."},
+        {
+            "role": "assistant",
+            "content": (
+                f"{quoted} Italy is better if you prefer a relaxed pace."
+            ),
+        },
+        {"role": "user", "content": "How long should I stay?"},
+        {"role": "assistant", "content": "For Japan, 10-14 days is ideal."},
+    ]
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=history,
+    )
+
+    assert result is not None
+    assert result.startswith(f'[Replying to: "{quoted}"]')
+    assert result.endswith("What's the best time to go?")
+
+
+@pytest.mark.asyncio
+async def test_no_prefix_without_reply_context():
+    runner = _make_runner()
+    source = _source()
+    event = MessageEvent(text="hello", source=source)
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result == "hello"
+
+
+@pytest.mark.asyncio
+async def test_no_prefix_when_reply_to_text_is_empty():
+    """reply_to_message_id alone without text (e.g. a reply to a media-only
+    message) should not produce an empty `[Replying to: ""]` prefix."""
+    runner = _make_runner()
+    source = _source()
+    event = MessageEvent(
+        text="hi",
+        source=source,
+        reply_to_message_id="42",
+        reply_to_text=None,
+    )
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result == "hi"
+
+
+@pytest.mark.asyncio
+async def test_reply_snippet_truncated_to_500_chars():
+    runner = _make_runner()
+    source = _source()
+    long_text = "x" * 800
+    event = MessageEvent(
+        text="follow-up",
+        source=source,
+        reply_to_message_id="42",
+        reply_to_text=long_text,
+    )
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result is not None
+    assert result.startswith('[Replying to: "' + "x" * 500 + '"]')
+    assert "x" * 501 not in result
diff --git a/tests/gateway/test_session_list_allowed_sources.py b/tests/gateway/test_session_list_allowed_sources.py
new file mode 100644
index 0000000000..bd6791ff40
--- /dev/null
+++ b/tests/gateway/test_session_list_allowed_sources.py
@@ -0,0 +1,76 @@
+"""Regression tests for the TUI gateway's ``session.list`` handler.
+
+Reported during TUI v2 blitz retest: the ``/resume`` modal inside a TUI
+session only surfaced ``tui``/``cli`` rows, hiding telegram sessions users
+could still resume directly via ``hermes --tui --resume <id>``.
+
+The fix widens the picker to a curated allowlist of user-facing sources
+(tui/cli + chat adapters) while still filtering internal/system sources.
+"""
+
+from __future__ import annotations
+
+from tui_gateway import server
+
+
+class _StubDB:
+    def __init__(self, rows):
+        self.rows = rows
+        self.calls: list[dict] = []
+
+    def list_sessions_rich(self, **kwargs):
+        self.calls.append(kwargs)
+        return list(self.rows)
+
+
+def _call(limit: int = 20):
+    return server.handle_request({
+        "id": "1",
+        "method": "session.list",
+        "params": {"limit": limit},
+    })
+
+
+def test_session_list_includes_telegram_but_filters_internal_sources(monkeypatch):
+    rows = [
+        {"id": "tui-1", "source": "tui", "started_at": 9},
+        {"id": "tool-1", "source": "tool", "started_at": 8},
+        {"id": "tg-1", "source": "telegram", "started_at": 7},
+        {"id": "acp-1", "source": "acp", "started_at": 6},
+        {"id": "cli-1", "source": "cli", "started_at": 5},
+    ]
+    db = _StubDB(rows)
+    monkeypatch.setattr(server, "_get_db", lambda: db)
+
+    resp = _call(limit=10)
+    sessions = resp["result"]["sessions"]
+    ids = [s["id"] for s in sessions]
+
+    assert "tg-1" in ids and "tui-1" in ids and "cli-1" in ids, ids
+    assert "tool-1" not in ids and "acp-1" not in ids, ids
+
+
+def test_session_list_fetches_wider_window_before_filtering(monkeypatch):
+    db = _StubDB([{"id": "x", "source": "cli", "started_at": 1}])
+    monkeypatch.setattr(server, "_get_db", lambda: db)
+
+    _call(limit=10)
+
+    assert len(db.calls) == 1
+    assert db.calls[0].get("source") is None, db.calls[0]
+    assert db.calls[0].get("limit") == 100, db.calls[0]
+
+
+def test_session_list_preserves_ordering_after_filter(monkeypatch):
+    rows = [
+        {"id": "newest", "source": "telegram", "started_at": 5},
+        {"id": "internal", "source": "tool", "started_at": 4},
+        {"id": "middle", "source": "tui", "started_at": 3},
+        {"id": "oldest", "source": "discord", "started_at": 1},
+    ]
+    monkeypatch.setattr(server, "_get_db", lambda: _StubDB(rows))
+
+    resp = _call()
+    ids = [s["id"] for s in resp["result"]["sessions"]]
+
+    assert ids == ["newest", "middle", "oldest"]
diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py
index 2af003ea08..7d0674b038 100644
--- a/tests/hermes_cli/test_api_key_providers.py
+++ b/tests/hermes_cli/test_api_key_providers.py
@@ -71,7 +71,11 @@ class TestProviderRegistry:
 
     def test_kimi_env_vars(self):
         pconfig = PROVIDER_REGISTRY["kimi-coding"]
-        assert pconfig.api_key_env_vars == ("KIMI_API_KEY",)
+        # KIMI_API_KEY is the primary env var; KIMI_CODING_API_KEY is a
+        # secondary fallback for Kimi Code sk-kimi- keys so users don't
+        # have to overload the same variable.
+        assert "KIMI_API_KEY" in pconfig.api_key_env_vars
+        assert "KIMI_CODING_API_KEY" in pconfig.api_key_env_vars
         assert pconfig.base_url_env_var == "KIMI_BASE_URL"
 
     def test_minimax_env_vars(self):
diff --git a/tests/hermes_cli/test_at_context_completion_filter.py b/tests/hermes_cli/test_at_context_completion_filter.py
new file mode 100644
index 0000000000..dfd44b4727
--- /dev/null
+++ b/tests/hermes_cli/test_at_context_completion_filter.py
@@ -0,0 +1,90 @@
+"""Regression test: `@folder:` completion must only surface directories and
+`@file:` must only surface regular files.
+
+Reported during TUI v2 blitz testing: typing `@folder:` showed .dockerignore,
+.env, .gitignore, etc. alongside the actual directories because the path-
+completion branch yielded every entry regardless of the explicit prefix, and
+auto-switched the completion kind based on `is_dir`. That defeated the user's
+explicit choice and rendered the `@folder:` / `@file:` prefixes useless for
+filtering.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Iterable
+
+from hermes_cli.commands import SlashCommandCompleter
+
+
+def _run(tmp_path: Path, word: str) -> list[tuple[str, str]]:
+    (tmp_path / "readme.md").write_text("x")
+    (tmp_path / ".env").write_text("x")
+    (tmp_path / "src").mkdir()
+    (tmp_path / "docs").mkdir()
+
+    completer = SlashCommandCompleter.__new__(SlashCommandCompleter)
+    completions: Iterable = completer._context_completions(word)
+
+    return [(c.text, c.display_meta) for c in completions if c.text.startswith(("@file:", "@folder:"))]
+
+
+def test_at_folder_only_yields_directories(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@folder:")]
+
+    assert all(t.startswith("@folder:") for t in texts), texts
+    assert any(t == "@folder:src/" for t in texts)
+    assert any(t == "@folder:docs/" for t in texts)
+    assert not any(t == "@folder:readme.md" for t in texts)
+    assert not any(t == "@folder:.env" for t in texts)
+
+
+def test_at_file_only_yields_files(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@file:")]
+
+    assert all(t.startswith("@file:") for t in texts), texts
+    assert any(t == "@file:readme.md" for t in texts)
+    assert any(t == "@file:.env" for t in texts)
+    assert not any(t == "@file:src/" for t in texts)
+    assert not any(t == "@file:docs/" for t in texts)
+
+
+def test_at_folder_preserves_prefix_on_empty_match(tmp_path, monkeypatch):
+    """User typed `@folder:` (no partial) — completion text must keep the
+    `@folder:` prefix even though the previous implementation auto-rewrote
+    it to `@file:` for non-dir entries.
+    """
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@folder:")]
+
+    assert texts, "expected at least one directory completion"
+    for t in texts:
+        assert t.startswith("@folder:"), f"prefix leaked: {t}"
+
+
+def test_at_folder_bare_without_colon_lists_directories(tmp_path, monkeypatch):
+    """Typing `@folder` alone (no colon yet) should surface directories so
+    users don't need to first accept the static `@folder:` hint before
+    seeing what they're picking from.
+    """
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@folder")]
+
+    assert any(t == "@folder:src/" for t in texts), texts
+    assert any(t == "@folder:docs/" for t in texts), texts
+    assert not any(t == "@folder:readme.md" for t in texts)
+
+
+def test_at_file_bare_without_colon_lists_files(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@file")]
+
+    assert any(t == "@file:readme.md" for t in texts), texts
+    assert not any(t == "@file:src/" for t in texts)
diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py
index 3a58282ca2..b6d70a26ff 100644
--- a/tests/hermes_cli/test_auth_nous_provider.py
+++ b/tests/hermes_cli/test_auth_nous_provider.py
@@ -376,7 +376,6 @@ class TestLoginNousSkipKeepsCurrent:
             lambda *a, **kw: prompt_returns,
         )
         monkeypatch.setattr(models_mod, "get_pricing_for_provider", lambda p: {})
-        monkeypatch.setattr(models_mod, "filter_nous_free_models", lambda ids, p: ids)
         monkeypatch.setattr(models_mod, "check_nous_free_tier", lambda: None)
         monkeypatch.setattr(
             models_mod, "partition_nous_models_by_tier",
diff --git a/tests/hermes_cli/test_config_drift.py b/tests/hermes_cli/test_config_drift.py
new file mode 100644
index 0000000000..6fa96042c5
--- /dev/null
+++ b/tests/hermes_cli/test_config_drift.py
@@ -0,0 +1,36 @@
+"""Regression tests for removed dead config keys.
+
+This file guards against accidental re-introduction of config keys that were
+documented or declared at some point but never actually wired up to read code.
+Future dead-config regressions can accumulate here.
+"""
+
+import inspect
+
+
+def test_delegation_default_toolsets_removed_from_cli_config():
+    """delegation.default_toolsets was dead config — never read by
+    _load_config() or anywhere else. Removed.
+
+    Guards against accidental re-introduction in cli.py's CLI_CONFIG default
+    dict. If this test fails, someone re-added the key without wiring it up
+    to _load_config() in tools/delegate_tool.py.
+
+    We inspect the source of load_cli_config() instead of asserting on the
+    runtime CLI_CONFIG dict because CLI_CONFIG is populated by deep-merging
+    the user's ~/.hermes/config.yaml over the defaults (cli.py:359-366).
+    A contributor who still has the legacy key set in their own config
+    would cause a false failure, and HERMES_HOME patching via conftest
+    doesn't help because cli._hermes_home is frozen at module import time
+    (cli.py:76) — before any autouse fixture can fire. Source inspection
+    sidesteps all of that: it tests the defaults literal directly.
+    """
+    from cli import load_cli_config
+
+    source = inspect.getsource(load_cli_config)
+    assert '"default_toolsets"' not in source, (
+        "delegation.default_toolsets was removed because it was never read. "
+        "Do not re-add it to cli.py's CLI_CONFIG default dict; "
+        "use tools/delegate_tool.py's DEFAULT_TOOLSETS module constant or "
+        "wire a new config key through _load_config()."
+    )
diff --git a/tests/hermes_cli/test_image_gen_picker.py b/tests/hermes_cli/test_image_gen_picker.py
new file mode 100644
index 0000000000..27c502def8
--- /dev/null
+++ b/tests/hermes_cli/test_image_gen_picker.py
@@ -0,0 +1,174 @@
+"""Tests for plugin image_gen providers injecting themselves into the picker.
+
+Covers `_plugin_image_gen_providers`, `_visible_providers`, and
+`_toolset_needs_configuration_prompt` handling of plugin providers.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agent import image_gen_registry
+from agent.image_gen_provider import ImageGenProvider
+
+
+class _FakeProvider(ImageGenProvider):
+    def __init__(self, name: str, available: bool = True, schema=None, models=None):
+        self._name = name
+        self._available = available
+        self._schema = schema or {
+            "name": name.title(),
+            "badge": "test",
+            "tag": f"{name} test tag",
+            "env_vars": [{"key": f"{name.upper()}_API_KEY", "prompt": f"{name} key"}],
+        }
+        self._models = models or [
+            {"id": f"{name}-model-v1", "display": f"{name} v1",
+             "speed": "~5s", "strengths": "test", "price": "$"},
+        ]
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def is_available(self) -> bool:
+        return self._available
+
+    def list_models(self):
+        return list(self._models)
+
+    def default_model(self):
+        return self._models[0]["id"] if self._models else None
+
+    def get_setup_schema(self):
+        return dict(self._schema)
+
+    def generate(self, prompt, aspect_ratio="landscape", **kw):
+        return {"success": True, "image": f"{self._name}://{prompt}"}
+
+
+@pytest.fixture(autouse=True)
+def _reset_registry():
+    image_gen_registry._reset_for_tests()
+    yield
+    image_gen_registry._reset_for_tests()
+
+
+class TestPluginPickerInjection:
+    def test_plugin_providers_returns_registered(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("myimg"))
+
+        rows = tools_config._plugin_image_gen_providers()
+        names = [r["name"] for r in rows]
+        plugin_names = [r.get("image_gen_plugin_name") for r in rows]
+
+        assert "Myimg" in names
+        assert "myimg" in plugin_names
+
+    def test_fal_skipped_to_avoid_duplicate(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        # Simulate a FAL plugin being registered — the picker already has
+        # hardcoded FAL rows in TOOL_CATEGORIES, so plugin-FAL must be
+        # skipped to avoid showing FAL twice.
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        image_gen_registry.register_provider(_FakeProvider("openai"))
+
+        rows = tools_config._plugin_image_gen_providers()
+        names = [r.get("image_gen_plugin_name") for r in rows]
+        assert "fal" not in names
+        assert "openai" in names
+
+    def test_visible_providers_includes_plugins_for_image_gen(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("someimg"))
+
+        cat = tools_config.TOOL_CATEGORIES["image_gen"]
+        visible = tools_config._visible_providers(cat, {})
+        plugin_names = [p.get("image_gen_plugin_name") for p in visible if p.get("image_gen_plugin_name")]
+        assert "someimg" in plugin_names
+
+    def test_visible_providers_does_not_inject_into_other_categories(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("someimg"))
+
+        # Browser category must NOT see image_gen plugins.
+        browser = tools_config.TOOL_CATEGORIES["browser"]
+        visible = tools_config._visible_providers(browser, {})
+        assert all(p.get("image_gen_plugin_name") is None for p in visible)
+
+
+class TestPluginCatalog:
+    def test_plugin_catalog_returns_models(self):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("catimg"))
+
+        catalog, default = tools_config._plugin_image_gen_catalog("catimg")
+        assert "catimg-model-v1" in catalog
+        assert default == "catimg-model-v1"
+
+    def test_plugin_catalog_empty_for_unknown(self):
+        from hermes_cli import tools_config
+
+        catalog, default = tools_config._plugin_image_gen_catalog("does-not-exist")
+        assert catalog == {}
+        assert default is None
+
+
+class TestConfigPrompt:
+    def test_image_gen_satisfied_by_plugin_provider(self, monkeypatch, tmp_path):
+        """When a plugin provider reports is_available(), the picker should
+        not force a setup prompt on the user."""
+        from hermes_cli import tools_config
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.delenv("FAL_KEY", raising=False)
+
+        image_gen_registry.register_provider(_FakeProvider("avail-img", available=True))
+
+        assert tools_config._toolset_needs_configuration_prompt("image_gen", {}) is False
+
+    def test_image_gen_still_prompts_when_nothing_available(self, monkeypatch, tmp_path):
+        from hermes_cli import tools_config
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.delenv("FAL_KEY", raising=False)
+
+        image_gen_registry.register_provider(_FakeProvider("unavail-img", available=False))
+
+        assert tools_config._toolset_needs_configuration_prompt("image_gen", {}) is True
+
+
+class TestConfigWriting:
+    def test_picking_plugin_provider_writes_provider_and_model(self, monkeypatch, tmp_path):
+        """When a user picks a plugin-backed image_gen provider with no
+        env vars needed, ``_configure_provider`` should write both
+        ``image_gen.provider`` and ``image_gen.model``."""
+        from hermes_cli import tools_config
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        image_gen_registry.register_provider(_FakeProvider("noenv", schema={
+            "name": "NoEnv",
+            "badge": "free",
+            "tag": "",
+            "env_vars": [],
+        }))
+
+        # Stub out the interactive model picker — no TTY in tests.
+        monkeypatch.setattr(tools_config, "_prompt_choice", lambda *a, **kw: 0)
+
+        config: dict = {}
+        provider_row = {
+            "name": "NoEnv",
+            "env_vars": [],
+            "image_gen_plugin_name": "noenv",
+        }
+        tools_config._configure_provider(provider_row, config)
+
+        assert config["image_gen"]["provider"] == "noenv"
+        assert config["image_gen"]["model"] == "noenv-model-v1"
diff --git a/tests/hermes_cli/test_models.py b/tests/hermes_cli/test_models.py
index ea2f3057f4..b493fd2b63 100644
--- a/tests/hermes_cli/test_models.py
+++ b/tests/hermes_cli/test_models.py
@@ -4,7 +4,6 @@ from unittest.mock import patch, MagicMock
 
 from hermes_cli.models import (
     OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model,
-    filter_nous_free_models, _NOUS_ALLOWED_FREE_MODELS,
     is_nous_free_tier, partition_nous_models_by_tier,
     check_nous_free_tier, _FREE_TIER_CACHE_TTL,
 )
@@ -293,89 +292,6 @@ class TestDetectProviderForModel:
         assert result[0] not in ("nous",)  # nous has claude models but shouldn't be suggested
 
 
-class TestFilterNousFreeModels:
-    """Tests for filter_nous_free_models — Nous Portal free-model policy."""
-
-    _PAID = {"prompt": "0.000003", "completion": "0.000015"}
-    _FREE = {"prompt": "0", "completion": "0"}
-
-    def test_paid_models_kept(self):
-        """Regular paid models pass through unchanged."""
-        models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
-        pricing = {m: self._PAID for m in models}
-        assert filter_nous_free_models(models, pricing) == models
-
-    def test_free_non_allowlist_models_removed(self):
-        """Free models NOT in the allowlist are filtered out."""
-        models = ["anthropic/claude-opus-4.6", "arcee-ai/trinity-large-preview:free"]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "arcee-ai/trinity-large-preview:free": self._FREE,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == ["anthropic/claude-opus-4.6"]
-
-    def test_allowlist_model_kept_when_free(self):
-        """Allowlist models are kept when they report as free."""
-        models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "xiaomi/mimo-v2-pro": self._FREE,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
-
-    def test_allowlist_model_removed_when_paid(self):
-        """Allowlist models are removed when they are NOT free."""
-        models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "xiaomi/mimo-v2-pro": self._PAID,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == ["anthropic/claude-opus-4.6"]
-
-    def test_no_pricing_returns_all(self):
-        """When pricing data is unavailable, all models pass through."""
-        models = ["anthropic/claude-opus-4.6", "nvidia/nemotron-3-super-120b-a12b:free"]
-        assert filter_nous_free_models(models, {}) == models
-
-    def test_model_with_no_pricing_entry_treated_as_paid(self):
-        """A model missing from the pricing dict is kept (assumed paid)."""
-        models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
-        pricing = {"anthropic/claude-opus-4.6": self._PAID}  # gpt-5.4 not in pricing
-        result = filter_nous_free_models(models, pricing)
-        assert result == models
-
-    def test_mixed_scenario(self):
-        """End-to-end: mix of paid, free-allowed, free-disallowed, allowlist-not-free."""
-        models = [
-            "anthropic/claude-opus-4.6",       # paid, not allowlist → keep
-            "nvidia/nemotron-3-super-120b-a12b:free",  # free, not allowlist → drop
-            "xiaomi/mimo-v2-pro",              # free, allowlist → keep
-            "xiaomi/mimo-v2-omni",             # paid, allowlist → drop
-            "openai/gpt-5.4",                  # paid, not allowlist → keep
-        ]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "nvidia/nemotron-3-super-120b-a12b:free": self._FREE,
-            "xiaomi/mimo-v2-pro": self._FREE,
-            "xiaomi/mimo-v2-omni": self._PAID,
-            "openai/gpt-5.4": self._PAID,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == [
-            "anthropic/claude-opus-4.6",
-            "xiaomi/mimo-v2-pro",
-            "openai/gpt-5.4",
-        ]
-
-    def test_allowlist_contains_expected_models(self):
-        """Sanity: the allowlist has the models we expect."""
-        assert "xiaomi/mimo-v2-pro" in _NOUS_ALLOWED_FREE_MODELS
-        assert "xiaomi/mimo-v2-omni" in _NOUS_ALLOWED_FREE_MODELS
-
-
 class TestIsNousFreeTier:
     """Tests for is_nous_free_tier — account tier detection."""
 
@@ -501,3 +417,190 @@ class TestCheckNousFreeTierCache:
     def test_cache_ttl_is_short(self):
         """TTL should be short enough to catch upgrades quickly (<=5 min)."""
         assert _FREE_TIER_CACHE_TTL <= 300
+
+
+class TestNousRecommendedModels:
+    """Tests for fetch_nous_recommended_models + get_nous_recommended_aux_model."""
+
+    _SAMPLE_PAYLOAD = {
+        "paidRecommendedModels": [],
+        "freeRecommendedModels": [],
+        "paidRecommendedCompactionModel": None,
+        "paidRecommendedVisionModel": None,
+        "freeRecommendedCompactionModel": {
+            "modelName": "google/gemini-3-flash-preview",
+            "displayName": "Google: Gemini 3 Flash Preview",
+        },
+        "freeRecommendedVisionModel": {
+            "modelName": "google/gemini-3-flash-preview",
+            "displayName": "Google: Gemini 3 Flash Preview",
+        },
+    }
+
+    def setup_method(self):
+        _models_mod._nous_recommended_cache.clear()
+
+    def teardown_method(self):
+        _models_mod._nous_recommended_cache.clear()
+
+    def _mock_urlopen(self, payload):
+        """Return a context-manager mock mimicking urllib.request.urlopen()."""
+        import json as _json
+        response = MagicMock()
+        response.read.return_value = _json.dumps(payload).encode()
+        cm = MagicMock()
+        cm.__enter__.return_value = response
+        cm.__exit__.return_value = False
+        return cm
+
+    def test_fetch_caches_per_portal_url(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
+        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
+            a = fetch_nous_recommended_models("https://portal.example.com")
+            b = fetch_nous_recommended_models("https://portal.example.com")
+        assert a == self._SAMPLE_PAYLOAD
+        assert b == self._SAMPLE_PAYLOAD
+        assert mock_urlopen.call_count == 1  # second call served from cache
+
+    def test_fetch_cache_is_keyed_per_portal(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
+        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
+            fetch_nous_recommended_models("https://portal.example.com")
+            fetch_nous_recommended_models("https://portal.staging-nousresearch.com")
+        assert mock_urlopen.call_count == 2  # different portals → separate fetches
+
+    def test_fetch_returns_empty_on_network_failure(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        with patch("urllib.request.urlopen", side_effect=OSError("boom")):
+            result = fetch_nous_recommended_models("https://portal.example.com")
+        assert result == {}
+
+    def test_fetch_force_refresh_bypasses_cache(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
+        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
+            fetch_nous_recommended_models("https://portal.example.com")
+            fetch_nous_recommended_models("https://portal.example.com", force_refresh=True)
+        assert mock_urlopen.call_count == 2
+
+    def test_get_aux_model_returns_vision_recommendation(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=self._SAMPLE_PAYLOAD,
+        ):
+            # Free tier → free vision recommendation.
+            model = get_nous_recommended_aux_model(vision=True, free_tier=True)
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_get_aux_model_returns_compaction_recommendation(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = dict(self._SAMPLE_PAYLOAD)
+        payload["freeRecommendedCompactionModel"] = {"modelName": "minimax/minimax-m2.7"}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=payload,
+        ):
+            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
+        assert model == "minimax/minimax-m2.7"
+
+    def test_get_aux_model_returns_none_when_field_null(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = dict(self._SAMPLE_PAYLOAD)
+        payload["freeRecommendedCompactionModel"] = None
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=payload,
+        ):
+            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
+        assert model is None
+
+    def test_get_aux_model_returns_none_on_empty_payload(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
+            assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
+            assert get_nous_recommended_aux_model(vision=True, free_tier=False) is None
+
+    def test_get_aux_model_returns_none_when_modelname_blank(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {"freeRecommendedCompactionModel": {"modelName": "  "}}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=payload,
+        ):
+            assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
+
+    def test_paid_tier_prefers_paid_recommendation(self):
+        """Paid-tier users should get the paid model when it's populated."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
+            "freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
+            "paidRecommendedVisionModel": {"modelName": "openai/gpt-5.4"},
+            "freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
+        }
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
+            text = get_nous_recommended_aux_model(vision=False, free_tier=False)
+            vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
+        assert text == "anthropic/claude-opus-4.7"
+        assert vision == "openai/gpt-5.4"
+
+    def test_paid_tier_falls_back_to_free_when_paid_is_null(self):
+        """If the Portal returns null for the paid field, fall back to free."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": None,
+            "freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
+            "paidRecommendedVisionModel": None,
+            "freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
+        }
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
+            text = get_nous_recommended_aux_model(vision=False, free_tier=False)
+            vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
+        assert text == "google/gemini-3-flash-preview"
+        assert vision == "google/gemini-3-flash-preview"
+
+    def test_free_tier_never_uses_paid_recommendation(self):
+        """Free-tier users must not get paid-only recommendations."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
+            "freeRecommendedCompactionModel": None,  # no free recommendation
+        }
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
+            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
+        # Free tier must return None — never leak the paid model.
+        assert model is None
+
+    def test_auto_detects_tier_when_not_supplied(self):
+        """Default behaviour: call check_nous_free_tier() to pick the tier."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "paid-model"},
+            "freeRecommendedCompactionModel": {"modelName": "free-model"},
+        }
+        with (
+            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
+            patch("hermes_cli.models.check_nous_free_tier", return_value=True),
+        ):
+            assert get_nous_recommended_aux_model(vision=False) == "free-model"
+        with (
+            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
+            patch("hermes_cli.models.check_nous_free_tier", return_value=False),
+        ):
+            assert get_nous_recommended_aux_model(vision=False) == "paid-model"
+
+    def test_tier_detection_error_defaults_to_paid(self):
+        """If tier detection raises, assume paid so we don't downgrade silently."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "paid-model"},
+            "freeRecommendedCompactionModel": {"modelName": "free-model"},
+        }
+        with (
+            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
+            patch("hermes_cli.models.check_nous_free_tier", side_effect=RuntimeError("boom")),
+        ):
+            assert get_nous_recommended_aux_model(vision=False) == "paid-model"
diff --git a/tests/hermes_cli/test_plugin_scanner_recursion.py b/tests/hermes_cli/test_plugin_scanner_recursion.py
new file mode 100644
index 0000000000..b6e2641681
--- /dev/null
+++ b/tests/hermes_cli/test_plugin_scanner_recursion.py
@@ -0,0 +1,357 @@
+"""Tests for PR1 pluggable image gen: scanner recursion, kinds, path keys.
+
+Covers ``_scan_directory`` recursion into category namespaces
+(``plugins/image_gen/openai/``), ``kind`` parsing, path-derived registry
+keys, and the new gate logic (bundled backends auto-load; user backends
+still opt-in; exclusive kind skipped; unknown kinds → standalone warning).
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, Dict
+
+import pytest
+import yaml
+
+from hermes_cli.plugins import PluginManager, PluginManifest
+
+
+# ── Helpers ────────────────────────────────────────────────────────────────
+
+
+def _write_plugin(
+    root: Path,
+    segments: list[str],
+    *,
+    manifest_extra: Dict[str, Any] | None = None,
+    register_body: str = "pass",
+) -> Path:
+    """Create a plugin dir at ``root/<segments...>/`` with plugin.yaml + __init__.py.
+
+    ``segments`` lets tests build both flat (``["my-plugin"]``) and
+    category-namespaced (``["image_gen", "openai"]``) layouts.
+    """
+    plugin_dir = root
+    for seg in segments:
+        plugin_dir = plugin_dir / seg
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+
+    manifest = {
+        "name": segments[-1],
+        "version": "0.1.0",
+        "description": f"Test plugin {'/'.join(segments)}",
+    }
+    if manifest_extra:
+        manifest.update(manifest_extra)
+    (plugin_dir / "plugin.yaml").write_text(yaml.dump(manifest))
+    (plugin_dir / "__init__.py").write_text(
+        f"def register(ctx):\n    {register_body}\n"
+    )
+    return plugin_dir
+
+
+def _enable(hermes_home: Path, name: str) -> None:
+    """Append ``name`` to ``plugins.enabled`` in ``<hermes_home>/config.yaml``."""
+    cfg_path = hermes_home / "config.yaml"
+    cfg: dict = {}
+    if cfg_path.exists():
+        try:
+            cfg = yaml.safe_load(cfg_path.read_text()) or {}
+        except Exception:
+            cfg = {}
+    plugins_cfg = cfg.setdefault("plugins", {})
+    enabled = plugins_cfg.setdefault("enabled", [])
+    if isinstance(enabled, list) and name not in enabled:
+        enabled.append(name)
+    cfg_path.write_text(yaml.safe_dump(cfg))
+
+
+# ── Scanner recursion ──────────────────────────────────────────────────────
+
+
+class TestCategoryNamespaceRecursion:
+    def test_category_namespace_discovered(self, tmp_path, monkeypatch):
+        """``<root>/image_gen/openai/plugin.yaml`` is discovered with key
+        ``image_gen/openai`` when the ``image_gen`` parent has no manifest."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(user_plugins, ["image_gen", "openai"])
+        _enable(hermes_home, "image_gen/openai")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "image_gen/openai" in mgr._plugins
+        loaded = mgr._plugins["image_gen/openai"]
+        assert loaded.manifest.key == "image_gen/openai"
+        assert loaded.manifest.name == "openai"
+        assert loaded.enabled is True
+
+    def test_flat_plugin_key_matches_name(self, tmp_path, monkeypatch):
+        """Flat plugins keep their bare name as the key (back-compat)."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(user_plugins, ["my-plugin"])
+        _enable(hermes_home, "my-plugin")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "my-plugin" in mgr._plugins
+        assert mgr._plugins["my-plugin"].manifest.key == "my-plugin"
+
+    def test_depth_cap_two(self, tmp_path, monkeypatch):
+        """Plugins nested three levels deep are not discovered.
+
+        ``<root>/a/b/c/plugin.yaml`` should NOT be picked up — cap is
+        two segments.
+        """
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(user_plugins, ["a", "b", "c"])
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        non_bundled = [
+            k for k, p in mgr._plugins.items()
+            if p.manifest.source != "bundled"
+        ]
+        assert non_bundled == []
+
+    def test_category_dir_with_manifest_is_leaf(self, tmp_path, monkeypatch):
+        """If ``image_gen/plugin.yaml`` exists, ``image_gen`` itself IS the
+        plugin and its children are ignored."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        # parent has a manifest → stop recursing
+        _write_plugin(user_plugins, ["image_gen"])
+        # child also has a manifest — should NOT be found because we stop
+        # at the parent.
+        _write_plugin(user_plugins, ["image_gen", "openai"])
+        _enable(hermes_home, "image_gen")
+        _enable(hermes_home, "image_gen/openai")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        # The bundled plugins/image_gen/openai/ exists in the repo — filter
+        # it out so we're only asserting on the user-dir layout.
+        user_plugins_in_registry = {
+            k for k, p in mgr._plugins.items() if p.manifest.source != "bundled"
+        }
+        assert "image_gen" in user_plugins_in_registry
+        assert "image_gen/openai" not in user_plugins_in_registry
+
+
+# ── Kind parsing ───────────────────────────────────────────────────────────
+
+
+class TestKindField:
+    def test_default_kind_is_standalone(self, tmp_path, monkeypatch):
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(hermes_home / "plugins", ["p1"])
+        _enable(hermes_home, "p1")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert mgr._plugins["p1"].manifest.kind == "standalone"
+
+    @pytest.mark.parametrize("kind", ["backend", "exclusive", "standalone"])
+    def test_valid_kinds_parsed(self, kind, tmp_path, monkeypatch):
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(
+            hermes_home / "plugins",
+            ["p1"],
+            manifest_extra={"kind": kind},
+        )
+        # Not all kinds auto-load, but manifest should parse.
+        _enable(hermes_home, "p1")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "p1" in mgr._plugins
+        assert mgr._plugins["p1"].manifest.kind == kind
+
+    def test_unknown_kind_falls_back_to_standalone(self, tmp_path, monkeypatch, caplog):
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(
+            hermes_home / "plugins",
+            ["p1"],
+            manifest_extra={"kind": "bogus"},
+        )
+        _enable(hermes_home, "p1")
+
+        with caplog.at_level("WARNING"):
+            mgr = PluginManager()
+            mgr.discover_and_load()
+
+        assert mgr._plugins["p1"].manifest.kind == "standalone"
+        assert any(
+            "unknown kind" in rec.getMessage() for rec in caplog.records
+        )
+
+
+# ── Gate logic ─────────────────────────────────────────────────────────────
+
+
+class TestBackendGate:
+    def test_user_backend_still_gated_by_enabled(self, tmp_path, monkeypatch):
+        """User-installed ``kind: backend`` plugins still require opt-in —
+        they're not trusted by default."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(
+            user_plugins,
+            ["image_gen", "fancy"],
+            manifest_extra={"kind": "backend"},
+        )
+        # Do NOT opt in.
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        loaded = mgr._plugins["image_gen/fancy"]
+        assert loaded.enabled is False
+        assert "not enabled" in (loaded.error or "")
+
+    def test_user_backend_loads_when_enabled(self, tmp_path, monkeypatch):
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(
+            user_plugins,
+            ["image_gen", "fancy"],
+            manifest_extra={"kind": "backend"},
+        )
+        _enable(hermes_home, "image_gen/fancy")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert mgr._plugins["image_gen/fancy"].enabled is True
+
+    def test_exclusive_kind_skipped(self, tmp_path, monkeypatch):
+        """``kind: exclusive`` plugins are recorded but not loaded — the
+        category's own discovery system handles them (memory today)."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(
+            hermes_home / "plugins",
+            ["some-backend"],
+            manifest_extra={"kind": "exclusive"},
+        )
+        _enable(hermes_home, "some-backend")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        loaded = mgr._plugins["some-backend"]
+        assert loaded.enabled is False
+        assert "exclusive" in (loaded.error or "")
+
+
+# ── Bundled backend auto-load (integration with real bundled plugin) ────────
+
+
+class TestBundledBackendAutoLoad:
+    def test_bundled_image_gen_openai_autoloads(self, tmp_path, monkeypatch):
+        """The bundled ``plugins/image_gen/openai/`` plugin loads without
+        any opt-in — it's ``kind: backend`` and shipped in-repo."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "image_gen/openai" in mgr._plugins
+        loaded = mgr._plugins["image_gen/openai"]
+        assert loaded.manifest.source == "bundled"
+        assert loaded.manifest.kind == "backend"
+        assert loaded.enabled is True, f"error: {loaded.error}"
+
+
+# ── PluginContext.register_image_gen_provider ───────────────────────────────
+
+
+class TestRegisterImageGenProvider:
+    def test_accepts_valid_provider(self, tmp_path, monkeypatch):
+        from agent import image_gen_registry
+        from agent.image_gen_provider import ImageGenProvider
+
+        image_gen_registry._reset_for_tests()
+
+        class FakeProvider(ImageGenProvider):
+            @property
+            def name(self) -> str:
+                return "fake-test"
+
+            def generate(self, prompt, aspect_ratio="landscape", **kw):
+                return {"success": True, "image": "test://fake"}
+
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        plugin_dir = _write_plugin(
+            hermes_home / "plugins",
+            ["my-img-plugin"],
+            register_body=(
+                "from agent.image_gen_provider import ImageGenProvider\n"
+                "    class P(ImageGenProvider):\n"
+                "        @property\n"
+                "        def name(self): return 'fake-ctx'\n"
+                "        def generate(self, prompt, aspect_ratio='landscape', **kw):\n"
+                "            return {'success': True, 'image': 'x://y'}\n"
+                "    ctx.register_image_gen_provider(P())"
+            ),
+        )
+        _enable(hermes_home, "my-img-plugin")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert mgr._plugins["my-img-plugin"].enabled is True
+        assert image_gen_registry.get_provider("fake-ctx") is not None
+
+        image_gen_registry._reset_for_tests()
+
+    def test_rejects_non_provider(self, tmp_path, monkeypatch, caplog):
+        from agent import image_gen_registry
+
+        image_gen_registry._reset_for_tests()
+
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(
+            hermes_home / "plugins",
+            ["bad-img-plugin"],
+            register_body="ctx.register_image_gen_provider('not a provider')",
+        )
+        _enable(hermes_home, "bad-img-plugin")
+
+        with caplog.at_level("WARNING"):
+            mgr = PluginManager()
+            mgr.discover_and_load()
+
+        # Plugin loaded (register returned normally) but nothing was
+        # registered in the provider registry.
+        assert mgr._plugins["bad-img-plugin"].enabled is True
+        assert image_gen_registry.get_provider("not a provider") is None
+
+        image_gen_registry._reset_for_tests()
diff --git a/tests/plugins/image_gen/__init__.py b/tests/plugins/image_gen/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/plugins/image_gen/test_openai_provider.py b/tests/plugins/image_gen/test_openai_provider.py
new file mode 100644
index 0000000000..670722efbd
--- /dev/null
+++ b/tests/plugins/image_gen/test_openai_provider.py
@@ -0,0 +1,243 @@
+"""Tests for the bundled OpenAI image_gen plugin (gpt-image-2, three tiers)."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+import plugins.image_gen.openai as openai_plugin
+
+
+# 1×1 transparent PNG — valid bytes for save_b64_image()
+_PNG_HEX = (
+    "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
+    "890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
+    "ae426082"
+)
+
+
+def _b64_png() -> str:
+    import base64
+    return base64.b64encode(bytes.fromhex(_PNG_HEX)).decode()
+
+
+def _fake_response(*, b64=None, url=None, revised_prompt=None):
+    item = SimpleNamespace(b64_json=b64, url=url, revised_prompt=revised_prompt)
+    return SimpleNamespace(data=[item])
+
+
+@pytest.fixture(autouse=True)
+def _tmp_hermes_home(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    yield tmp_path
+
+
+@pytest.fixture
+def provider(monkeypatch):
+    monkeypatch.setenv("OPENAI_API_KEY", "test-key")
+    return openai_plugin.OpenAIImageGenProvider()
+
+
+def _patched_openai(fake_client: MagicMock):
+    fake_openai = MagicMock()
+    fake_openai.OpenAI.return_value = fake_client
+    return patch.dict("sys.modules", {"openai": fake_openai})
+
+
+# ── Metadata ────────────────────────────────────────────────────────────────
+
+
+class TestMetadata:
+    def test_name(self, provider):
+        assert provider.name == "openai"
+
+    def test_default_model(self, provider):
+        assert provider.default_model() == "gpt-image-2-medium"
+
+    def test_list_models_three_tiers(self, provider):
+        ids = [m["id"] for m in provider.list_models()]
+        assert ids == ["gpt-image-2-low", "gpt-image-2-medium", "gpt-image-2-high"]
+
+    def test_catalog_entries_have_display_speed_strengths(self, provider):
+        for entry in provider.list_models():
+            assert entry["display"].startswith("GPT Image 2")
+            assert entry["speed"]
+            assert entry["strengths"]
+
+
+# ── Availability ────────────────────────────────────────────────────────────
+
+
+class TestAvailability:
+    def test_no_api_key_unavailable(self, monkeypatch):
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        assert openai_plugin.OpenAIImageGenProvider().is_available() is False
+
+    def test_api_key_set_available(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_API_KEY", "test")
+        assert openai_plugin.OpenAIImageGenProvider().is_available() is True
+
+
+# ── Model resolution ────────────────────────────────────────────────────────
+
+
+class TestModelResolution:
+    def test_default_is_medium(self):
+        model_id, meta = openai_plugin._resolve_model()
+        assert model_id == "gpt-image-2-medium"
+        assert meta["quality"] == "medium"
+
+    def test_env_var_override(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_IMAGE_MODEL", "gpt-image-2-high")
+        model_id, meta = openai_plugin._resolve_model()
+        assert model_id == "gpt-image-2-high"
+        assert meta["quality"] == "high"
+
+    def test_env_var_unknown_falls_back(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_IMAGE_MODEL", "bogus-tier")
+        model_id, _ = openai_plugin._resolve_model()
+        assert model_id == openai_plugin.DEFAULT_MODEL
+
+    def test_config_openai_model(self, tmp_path):
+        import yaml
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"openai": {"model": "gpt-image-2-low"}}})
+        )
+        model_id, meta = openai_plugin._resolve_model()
+        assert model_id == "gpt-image-2-low"
+        assert meta["quality"] == "low"
+
+    def test_config_top_level_model(self, tmp_path):
+        """``image_gen.model: gpt-image-2-high`` also works (top-level)."""
+        import yaml
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"model": "gpt-image-2-high"}})
+        )
+        model_id, meta = openai_plugin._resolve_model()
+        assert model_id == "gpt-image-2-high"
+        assert meta["quality"] == "high"
+
+
+# ── Generate ────────────────────────────────────────────────────────────────
+
+
+class TestGenerate:
+    def test_empty_prompt_rejected(self, provider):
+        result = provider.generate("", aspect_ratio="square")
+        assert result["success"] is False
+        assert result["error_type"] == "invalid_argument"
+
+    def test_missing_api_key(self, monkeypatch):
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        result = openai_plugin.OpenAIImageGenProvider().generate("a cat")
+        assert result["success"] is False
+        assert result["error_type"] == "auth_required"
+
+    def test_b64_saves_to_cache(self, provider, tmp_path):
+        import base64
+        png_bytes = bytes.fromhex(_PNG_HEX)
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(b64=_b64_png())
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat", aspect_ratio="landscape")
+
+        assert result["success"] is True
+        assert result["model"] == "gpt-image-2-medium"
+        assert result["aspect_ratio"] == "landscape"
+        assert result["provider"] == "openai"
+        assert result["quality"] == "medium"
+
+        saved = Path(result["image"])
+        assert saved.exists()
+        assert saved.parent == tmp_path / "cache" / "images"
+        assert saved.read_bytes() == png_bytes
+
+        call_kwargs = fake_client.images.generate.call_args.kwargs
+        # All tiers hit the single underlying API model.
+        assert call_kwargs["model"] == "gpt-image-2"
+        assert call_kwargs["quality"] == "medium"
+        assert call_kwargs["size"] == "1536x1024"
+        # gpt-image-2 rejects response_format — we must NOT send it.
+        assert "response_format" not in call_kwargs
+
+    @pytest.mark.parametrize("tier,expected_quality", [
+        ("gpt-image-2-low", "low"),
+        ("gpt-image-2-medium", "medium"),
+        ("gpt-image-2-high", "high"),
+    ])
+    def test_tier_maps_to_quality(self, provider, monkeypatch, tier, expected_quality):
+        monkeypatch.setenv("OPENAI_IMAGE_MODEL", tier)
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(b64=_b64_png())
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["model"] == tier
+        assert result["quality"] == expected_quality
+        assert fake_client.images.generate.call_args.kwargs["quality"] == expected_quality
+        # Always the same underlying API model regardless of tier.
+        assert fake_client.images.generate.call_args.kwargs["model"] == "gpt-image-2"
+
+    @pytest.mark.parametrize("aspect,expected_size", [
+        ("landscape", "1536x1024"),
+        ("square", "1024x1024"),
+        ("portrait", "1024x1536"),
+    ])
+    def test_aspect_ratio_mapping(self, provider, aspect, expected_size):
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(b64=_b64_png())
+
+        with _patched_openai(fake_client):
+            provider.generate("a cat", aspect_ratio=aspect)
+
+        assert fake_client.images.generate.call_args.kwargs["size"] == expected_size
+
+    def test_revised_prompt_passed_through(self, provider):
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(
+            b64=_b64_png(), revised_prompt="A photo of a cat",
+        )
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["revised_prompt"] == "A photo of a cat"
+
+    def test_api_error_returns_error_response(self, provider):
+        fake_client = MagicMock()
+        fake_client.images.generate.side_effect = RuntimeError("boom")
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["success"] is False
+        assert result["error_type"] == "api_error"
+        assert "boom" in result["error"]
+
+    def test_empty_response_data(self, provider):
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = SimpleNamespace(data=[])
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["success"] is False
+        assert result["error_type"] == "empty_response"
+
+    def test_url_fallback_if_api_changes(self, provider):
+        """Defensive: if OpenAI ever returns URL instead of b64, pass through."""
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(
+            b64=None, url="https://example.com/img.png",
+        )
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["success"] is True
+        assert result["image"] == "https://example.com/img.png"
diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
index 3df51b8534..f96dbf4212 100644
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@@ -12,6 +12,7 @@ from types import SimpleNamespace
 from unittest.mock import patch, MagicMock
 
 import pytest
+from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
 
 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@@ -446,7 +447,7 @@ class TestChatMessagesToResponsesInput:
         agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                             base_url="https://chatgpt.com/backend-api/codex")
         messages = [{"role": "user", "content": "hello"}]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         assert items == [{"role": "user", "content": "hello"}]
 
     def test_system_messages_filtered(self, monkeypatch):
@@ -456,7 +457,7 @@ class TestChatMessagesToResponsesInput:
             {"role": "system", "content": "be helpful"},
             {"role": "user", "content": "hello"},
         ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         assert len(items) == 1
         assert items[0]["role"] == "user"
 
@@ -472,7 +473,7 @@ class TestChatMessagesToResponsesInput:
                 "function": {"name": "web_search", "arguments": '{"query": "test"}'},
             }],
         }]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         fc_items = [i for i in items if i.get("type") == "function_call"]
         assert len(fc_items) == 1
         assert fc_items[0]["name"] == "web_search"
@@ -482,7 +483,7 @@ class TestChatMessagesToResponsesInput:
         agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                             base_url="https://chatgpt.com/backend-api/codex")
         messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         assert items[0]["type"] == "function_call_output"
         assert items[0]["call_id"] == "call_abc"
         assert items[0]["output"] == "result here"
@@ -502,7 +503,7 @@ class TestChatMessagesToResponsesInput:
             },
             {"role": "user", "content": "continue"},
         ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         reasoning_items = [i for i in items if i.get("type") == "reasoning"]
         assert len(reasoning_items) == 1
         assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
@@ -515,7 +516,7 @@ class TestChatMessagesToResponsesInput:
             {"role": "assistant", "content": "hi"},
             {"role": "user", "content": "hello"},
         ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         reasoning_items = [i for i in items if i.get("type") == "reasoning"]
         assert len(reasoning_items) == 0
 
@@ -539,7 +540,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert msg.content == "Hello!"
         assert reason == "stop"
 
@@ -557,7 +558,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert msg.content == "42"
         assert "math" in msg.reasoning
         assert reason == "stop"
@@ -576,7 +577,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert msg.codex_reasoning_items is not None
         assert len(msg.codex_reasoning_items) == 1
         assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
@@ -592,7 +593,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert msg.codex_reasoning_items is None
 
     def test_tool_calls_extracted(self, monkeypatch):
@@ -605,7 +606,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert reason == "tool_calls"
         assert len(msg.tool_calls) == 1
         assert msg.tool_calls[0].function.name == "web_search"
@@ -821,7 +822,7 @@ class TestCodexReasoningPreflight:
              "summary": [{"type": "summary_text", "text": "Thinking about it"}]},
             {"role": "assistant", "content": "hi there"},
         ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
         reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
         assert len(reasoning_items) == 1
         assert reasoning_items[0]["encrypted_content"] == "abc123encrypted"
@@ -837,7 +838,7 @@ class TestCodexReasoningPreflight:
         raw_input = [
             {"type": "reasoning", "encrypted_content": "abc123"},
         ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
         assert len(normalized) == 1
         assert "id" not in normalized[0]
         assert normalized[0]["summary"] == []  # default empty summary
@@ -849,7 +850,7 @@ class TestCodexReasoningPreflight:
             {"type": "reasoning", "encrypted_content": ""},
             {"role": "user", "content": "hello"},
         ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
         reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
         assert len(reasoning_items) == 0
 
@@ -868,7 +869,7 @@ class TestCodexReasoningPreflight:
             },
             {"role": "user", "content": "follow up"},
         ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         reasoning_items = [i for i in items if isinstance(i, dict) and i.get("type") == "reasoning"]
         assert len(reasoning_items) == 1
         assert reasoning_items[0]["encrypted_content"] == "enc123"
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index e7a96e5dee..db16df33db 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -16,6 +16,7 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
+from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
 
 import run_agent
 from run_agent import AIAgent
@@ -4248,7 +4249,7 @@ class TestNormalizeCodexDictArguments:
         json.dumps, not str(), so downstream json.loads() succeeds."""
         args_dict = {"query": "weather in NYC", "units": "celsius"}
         response = self._make_codex_response("function_call", args_dict)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
         tc = msg.tool_calls[0]
         parsed = json.loads(tc.function.arguments)
         assert parsed == args_dict
@@ -4257,7 +4258,7 @@ class TestNormalizeCodexDictArguments:
         """dict arguments from custom_tool_call must also use json.dumps."""
         args_dict = {"path": "/tmp/test.txt", "content": "hello"}
         response = self._make_codex_response("custom_tool_call", args_dict)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
         tc = msg.tool_calls[0]
         parsed = json.loads(tc.function.arguments)
         assert parsed == args_dict
@@ -4266,7 +4267,7 @@ class TestNormalizeCodexDictArguments:
         """String arguments must pass through without modification."""
         args_str = '{"query": "test"}'
         response = self._make_codex_response("function_call", args_str)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
         tc = msg.tool_calls[0]
         assert tc.function.arguments == args_str
 
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 81213aaf67..16ab3f02d0 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -640,7 +640,8 @@ def test_run_conversation_codex_tool_round_trip(monkeypatch):
 
 def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
     agent = _build_agent(monkeypatch)
-    items = agent._chat_messages_to_responses_input(
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(
         [
             {"role": "user", "content": "Run terminal"},
             {
@@ -668,7 +669,8 @@ def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeyp
 
 def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
     agent = _build_agent(monkeypatch)
-    items = agent._chat_messages_to_responses_input(
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(
         [
             {"role": "user", "content": "Run terminal"},
             {
@@ -696,7 +698,8 @@ def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
 
 def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
     agent = _build_agent(monkeypatch)
-    preflight = agent._preflight_codex_api_kwargs(
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+    preflight = _preflight_codex_api_kwargs(
         {
             "model": "gpt-5-codex",
             "instructions": "You are Hermes.",
@@ -724,7 +727,8 @@ def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id
     agent = _build_agent(monkeypatch)
 
     with pytest.raises(ValueError, match="function_call_output is missing call_id"):
-        agent._preflight_codex_api_kwargs(
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        _preflight_codex_api_kwargs(
             {
                 "model": "gpt-5-codex",
                 "instructions": "You are Hermes.",
@@ -741,7 +745,8 @@ def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypat
     kwargs["some_unknown_field"] = "value"
 
     with pytest.raises(ValueError, match="unsupported field"):
-        agent._preflight_codex_api_kwargs(kwargs)
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        _preflight_codex_api_kwargs(kwargs)
 
 
 def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
@@ -752,7 +757,8 @@ def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch
     kwargs["temperature"] = 0.7
     kwargs["max_output_tokens"] = 4096
 
-    result = agent._preflight_codex_api_kwargs(kwargs)
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+    result = _preflight_codex_api_kwargs(kwargs)
     assert result["reasoning"] == {"effort": "high", "summary": "auto"}
     assert result["include"] == ["reasoning.encrypted_content"]
     assert result["temperature"] == 0.7
@@ -764,7 +770,8 @@ def test_preflight_codex_api_kwargs_allows_service_tier(monkeypatch):
     kwargs = _codex_request_kwargs()
     kwargs["service_tier"] = "priority"
 
-    result = agent._preflight_codex_api_kwargs(kwargs)
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+    result = _preflight_codex_api_kwargs(kwargs)
     assert result["service_tier"] == "priority"
 
 
@@ -841,7 +848,8 @@ def test_run_conversation_codex_continues_after_incomplete_interim_message(monke
 
 def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
     agent = _build_agent(monkeypatch)
-    assistant_message, finish_reason = agent._normalize_codex_response(
+    from agent.codex_responses_adapter import _normalize_codex_response
+    assistant_message, finish_reason = _normalize_codex_response(
         _codex_commentary_message_response("I'll inspect the repository first.")
     )
 
@@ -1068,7 +1076,8 @@ def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch
     sends them into the empty-content retry loop (3 retries then failure).
     """
     agent = _build_agent(monkeypatch)
-    assistant_message, finish_reason = agent._normalize_codex_response(
+    from agent.codex_responses_adapter import _normalize_codex_response
+    assistant_message, finish_reason = _normalize_codex_response(
         _codex_reasoning_only_response()
     )
 
@@ -1101,7 +1110,8 @@ def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch):
         status="completed",
         model="gpt-5-codex",
     )
-    assistant_message, finish_reason = agent._normalize_codex_response(response)
+    from agent.codex_responses_adapter import _normalize_codex_response
+    assistant_message, finish_reason = _normalize_codex_response(response)
 
     assert finish_reason == "stop"
     assert "Here is the answer" in assistant_message.content
@@ -1186,7 +1196,8 @@ def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monk
             ],
         },
     ]
-    items = agent._chat_messages_to_responses_input(messages)
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(messages)
 
     # Find the reasoning item
     reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"]
@@ -1273,7 +1284,8 @@ def test_chat_messages_to_responses_input_deduplicates_reasoning_ids(monkeypatch
             ],
         },
     ]
-    items = agent._chat_messages_to_responses_input(messages)
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(messages)
 
     reasoning_items = [it for it in items if it.get("type") == "reasoning"]
     # Dedup: rs_aaa appears in both turns but should only be emitted once.
@@ -1299,7 +1311,8 @@ def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
         {"type": "reasoning", "id": "rs_zzz", "encrypted_content": "enc_b"},
         {"role": "assistant", "content": "done"},
     ]
-    normalized = agent._preflight_codex_input_items(raw_input)
+    from agent.codex_responses_adapter import _preflight_codex_input_items
+    normalized = _preflight_codex_input_items(raw_input)
 
     reasoning_items = [it for it in normalized if it.get("type") == "reasoning"]
     # rs_xyz duplicate should be collapsed to one item; rs_zzz kept.
diff --git a/tests/run_agent/test_switch_model_fallback_prune.py b/tests/run_agent/test_switch_model_fallback_prune.py
new file mode 100644
index 0000000000..99af3579f3
--- /dev/null
+++ b/tests/run_agent/test_switch_model_fallback_prune.py
@@ -0,0 +1,93 @@
+"""Regression test for TUI v2 blitz bug: explicit /model --provider switch
+silently fell back to the old primary provider on the next turn because the
+fallback chain — seeded from config at agent __init__ — kept entries for the
+provider the user just moved away from.
+
+Reported: "switched from openrouter provider to anthropic api key via hermes
+model and the tui keeps trying openrouter".
+"""
+
+from unittest.mock import MagicMock, patch
+
+from run_agent import AIAgent
+
+
+def _make_agent(chain):
+    agent = AIAgent.__new__(AIAgent)
+
+    agent.provider = "openrouter"
+    agent.model = "x-ai/grok-4"
+    agent.base_url = "https://openrouter.ai/api/v1"
+    agent.api_key = "or-key"
+    agent.api_mode = "chat_completions"
+    agent.client = MagicMock()
+    agent._client_kwargs = {"api_key": "or-key", "base_url": "https://openrouter.ai/api/v1"}
+    agent.context_compressor = None
+    agent._anthropic_api_key = ""
+    agent._anthropic_base_url = None
+    agent._anthropic_client = None
+    agent._is_anthropic_oauth = False
+    agent._cached_system_prompt = "cached"
+    agent._primary_runtime = {}
+    agent._fallback_activated = False
+    agent._fallback_index = 0
+    agent._fallback_chain = list(chain)
+    agent._fallback_model = chain[0] if chain else None
+
+    return agent
+
+
+def _switch_to_anthropic(agent):
+    with (
+        patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+        patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-xyz"),
+        patch("agent.anthropic_adapter._is_oauth_token", return_value=False),
+        patch("hermes_cli.timeouts.get_provider_request_timeout", return_value=None),
+    ):
+        agent.switch_model(
+            new_model="claude-sonnet-4-5",
+            new_provider="anthropic",
+            api_key="sk-ant-xyz",
+            base_url="https://api.anthropic.com",
+            api_mode="anthropic_messages",
+        )
+
+
+def test_switch_drops_old_primary_from_fallback_chain():
+    agent = _make_agent([
+        {"provider": "openrouter", "model": "x-ai/grok-4"},
+        {"provider": "nous", "model": "hermes-4"},
+    ])
+
+    _switch_to_anthropic(agent)
+
+    providers = [entry["provider"] for entry in agent._fallback_chain]
+
+    assert "openrouter" not in providers, "old primary must be pruned"
+    assert "anthropic" not in providers, "new primary is redundant in the chain"
+    assert providers == ["nous"]
+    assert agent._fallback_model == {"provider": "nous", "model": "hermes-4"}
+
+
+def test_switch_with_empty_chain_stays_empty():
+    agent = _make_agent([])
+
+    _switch_to_anthropic(agent)
+
+    assert agent._fallback_chain == []
+    assert agent._fallback_model is None
+
+
+def test_switch_within_same_provider_preserves_chain():
+    chain = [{"provider": "openrouter", "model": "x-ai/grok-4"}]
+    agent = _make_agent(chain)
+
+    with patch("hermes_cli.timeouts.get_provider_request_timeout", return_value=None):
+        agent.switch_model(
+            new_model="openai/gpt-5",
+            new_provider="openrouter",
+            api_key="or-key",
+            base_url="https://openrouter.ai/api/v1",
+        )
+
+    assert agent._fallback_chain == chain
diff --git a/tests/test_cli_file_drop.py b/tests/test_cli_file_drop.py
index 386aba5d17..5161e435f0 100644
--- a/tests/test_cli_file_drop.py
+++ b/tests/test_cli_file_drop.py
@@ -147,6 +147,27 @@ class TestEscapedSpaces:
         assert result["path"] == tmp_image_with_spaces
         assert result["remainder"] == "what is this?"
 
+    def test_unquoted_spaces_in_path(self, tmp_image_with_spaces):
+        result = _detect_file_drop(str(tmp_image_with_spaces))
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["is_image"] is True
+        assert result["remainder"] == ""
+
+    def test_unquoted_spaces_with_trailing_text(self, tmp_image_with_spaces):
+        user_input = f"{tmp_image_with_spaces} what is this?"
+        result = _detect_file_drop(user_input)
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["remainder"] == "what is this?"
+
+    def test_file_uri_image_path(self, tmp_image_with_spaces):
+        uri = tmp_image_with_spaces.as_uri()
+        result = _detect_file_drop(uri)
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["is_image"] is True
+
 
 # ---------------------------------------------------------------------------
 # Tests: edge cases
diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index 7585f33365..7a7f632844 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -1,4 +1,5 @@
 import json
+import os
 import sys
 import threading
 import time
@@ -230,6 +231,48 @@ def test_config_set_model_global_persists(monkeypatch):
     assert saved["model"]["base_url"] == "https://api.anthropic.com"
 
 
+def test_config_set_model_syncs_inference_provider_env(monkeypatch):
+    """After an explicit provider switch, HERMES_INFERENCE_PROVIDER must
+    reflect the user's choice so ambient re-resolution (credential pool
+    refresh, aux clients) picks up the new provider instead of the original
+    one persisted in config or shell env.
+
+    Regression: a TUI user switched openrouter → anthropic and the TUI kept
+    trying openrouter because the env-var-backed resolvers still saw the old
+    provider.
+    """
+    class _Agent:
+        provider = "openrouter"
+        model = "old/model"
+        base_url = ""
+        api_key = "sk-or"
+
+        def switch_model(self, **_kwargs):
+            return None
+
+    result = types.SimpleNamespace(
+        success=True,
+        new_model="claude-sonnet-4.6",
+        target_provider="anthropic",
+        api_key="sk-ant",
+        base_url="https://api.anthropic.com",
+        api_mode="anthropic_messages",
+        warning_message="",
+    )
+
+    server._sessions["sid"] = _session(agent=_Agent())
+    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter")
+    monkeypatch.setattr("hermes_cli.model_switch.switch_model", lambda **_kwargs: result)
+    monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None)
+    monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
+
+    server.handle_request(
+        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "model", "value": "claude-sonnet-4.6 --provider anthropic"}}
+    )
+
+    assert os.environ["HERMES_INFERENCE_PROVIDER"] == "anthropic"
+
+
 def test_config_set_personality_rejects_unknown_name(monkeypatch):
     monkeypatch.setattr(server, "_available_personalities", lambda cfg=None: {"helpful": "You are helpful."})
     resp = server.handle_request(
@@ -350,6 +393,11 @@ def test_prompt_submit_expands_context_refs(monkeypatch):
 def test_image_attach_appends_local_image(monkeypatch):
     fake_cli = types.ModuleType("cli")
     fake_cli._IMAGE_EXTENSIONS = {".png"}
+    fake_cli._detect_file_drop = lambda raw: {
+        "path": Path("/tmp/cat.png"),
+        "is_image": True,
+        "remainder": "",
+    }
     fake_cli._split_path_input = lambda raw: (raw, "")
     fake_cli._resolve_attachment_path = lambda raw: Path("/tmp/cat.png")
 
@@ -363,6 +411,31 @@ def test_image_attach_appends_local_image(monkeypatch):
     assert len(server._sessions["sid"]["attached_images"]) == 1
 
 
+def test_image_attach_accepts_unquoted_screenshot_path_with_spaces(monkeypatch):
+    screenshot = Path("/tmp/Screenshot 2026-04-21 at 1.04.43 PM.png")
+    fake_cli = types.ModuleType("cli")
+    fake_cli._IMAGE_EXTENSIONS = {".png"}
+    fake_cli._detect_file_drop = lambda raw: {
+        "path": screenshot,
+        "is_image": True,
+        "remainder": "",
+    }
+    fake_cli._split_path_input = lambda raw: ("/tmp/Screenshot", "2026-04-21 at 1.04.43 PM.png")
+    fake_cli._resolve_attachment_path = lambda raw: None
+
+    server._sessions["sid"] = _session()
+    monkeypatch.setitem(sys.modules, "cli", fake_cli)
+
+    resp = server.handle_request(
+        {"id": "1", "method": "image.attach", "params": {"session_id": "sid", "path": str(screenshot)}}
+    )
+
+    assert resp["result"]["attached"] is True
+    assert resp["result"]["path"] == str(screenshot)
+    assert resp["result"]["remainder"] == ""
+    assert len(server._sessions["sid"]["attached_images"]) == 1
+
+
 def test_commands_catalog_surfaces_quick_commands(monkeypatch):
     monkeypatch.setattr(server, "_load_cfg", lambda: {"quick_commands": {
         "build": {"type": "exec", "command": "npm run build"},
diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py
index 762654a259..f53da7e554 100644
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -20,11 +20,14 @@ from unittest.mock import MagicMock, patch
 from tools.delegate_tool import (
     DELEGATE_BLOCKED_TOOLS,
     DELEGATE_TASK_SCHEMA,
+    DelegateEvent,
     _get_max_concurrent_children,
+    _LEGACY_EVENT_MAP,
     MAX_DEPTH,
     check_delegate_requirements,
     delegate_task,
     _build_child_agent,
+    _build_child_progress_callback,
     _build_child_system_prompt,
     _strip_blocked_tools,
     _resolve_child_credential_pool,
@@ -387,7 +390,7 @@ class TestToolNamePreservation(unittest.TestCase):
         with patch("run_agent.AIAgent") as MockAgent:
             mock_child = MagicMock()
 
-            def capture_and_return(user_message):
+            def capture_and_return(user_message, task_id=None):
                 captured["saved"] = list(mock_child._delegate_saved_tool_names)
                 return {"final_response": "ok", "completed": True, "api_calls": 1}
 
@@ -568,8 +571,16 @@ class TestBlockedTools(unittest.TestCase):
             self.assertIn(tool, DELEGATE_BLOCKED_TOOLS)
 
     def test_constants(self):
+        from tools.delegate_tool import (
+            _get_max_spawn_depth, _get_orchestrator_enabled,
+            _MIN_SPAWN_DEPTH, _MAX_SPAWN_DEPTH_CAP,
+        )
         self.assertEqual(_get_max_concurrent_children(), 3)
-        self.assertEqual(MAX_DEPTH, 2)
+        self.assertEqual(MAX_DEPTH, 1)
+        self.assertEqual(_get_max_spawn_depth(), 1)       # default: flat
+        self.assertTrue(_get_orchestrator_enabled())      # default
+        self.assertEqual(_MIN_SPAWN_DEPTH, 1)
+        self.assertEqual(_MAX_SPAWN_DEPTH_CAP, 3)
 
 
 class TestDelegationCredentialResolution(unittest.TestCase):
@@ -1325,5 +1336,635 @@ class TestDelegationReasoningEffort(unittest.TestCase):
         self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "medium"})
 
 
+# =========================================================================
+# Dispatch helper, progress events, concurrency
+# =========================================================================
+
+class TestDispatchDelegateTask(unittest.TestCase):
+    """Tests for the _dispatch_delegate_task helper and full param forwarding."""
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_acp_args_forwarded(self, mock_creds, mock_cfg):
+        """Both acp_command and acp_args reach delegate_task via the helper."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        with patch("tools.delegate_tool._build_child_agent") as mock_build:
+            mock_child = MagicMock()
+            mock_child.run_conversation.return_value = {
+                "final_response": "done", "completed": True,
+                "api_calls": 1, "messages": [],
+            }
+            mock_child._delegate_saved_tool_names = []
+            mock_child._credential_pool = None
+            mock_child.session_prompt_tokens = 0
+            mock_child.session_completion_tokens = 0
+            mock_child.model = "test"
+            mock_build.return_value = mock_child
+
+            delegate_task(
+                goal="test",
+                acp_command="claude",
+                acp_args=["--acp", "--stdio"],
+                parent_agent=parent,
+            )
+            _, kwargs = mock_build.call_args
+            self.assertEqual(kwargs["override_acp_command"], "claude")
+            self.assertEqual(kwargs["override_acp_args"], ["--acp", "--stdio"])
+
+class TestDelegateEventEnum(unittest.TestCase):
+    """Tests for DelegateEvent enum and back-compat aliases."""
+
+    def test_enum_values_are_strings(self):
+        for event in DelegateEvent:
+            self.assertIsInstance(event.value, str)
+            self.assertTrue(event.value.startswith("delegate."))
+
+    def test_legacy_map_covers_all_old_names(self):
+        expected_legacy = {"_thinking", "reasoning.available",
+                          "tool.started", "tool.completed", "subagent_progress"}
+        self.assertEqual(set(_LEGACY_EVENT_MAP.keys()), expected_legacy)
+
+    def test_legacy_map_values_are_delegate_events(self):
+        for old_name, event in _LEGACY_EVENT_MAP.items():
+            self.assertIsInstance(event, DelegateEvent)
+
+    def test_progress_callback_normalises_tool_started(self):
+        """_build_child_progress_callback handles tool.started via enum."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = MagicMock()
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        self.assertIsNotNone(cb)
+
+        cb("tool.started", tool_name="terminal", preview="ls")
+        parent._delegate_spinner.print_above.assert_called()
+
+    def test_progress_callback_normalises_thinking(self):
+        """Both _thinking and reasoning.available route to TASK_THINKING."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = None
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+
+        cb("_thinking", tool_name=None, preview="pondering...")
+        assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
+
+        parent._delegate_spinner.print_above.reset_mock()
+        cb("reasoning.available", tool_name=None, preview="hmm")
+        assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
+
+    def test_progress_callback_tool_completed_is_noop(self):
+        """tool.completed is normalised but produces no display output."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = None
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        cb("tool.completed", tool_name="terminal")
+        parent._delegate_spinner.print_above.assert_not_called()
+
+    def test_progress_callback_ignores_unknown_events(self):
+        """Unknown event types are silently ignored."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        # Should not raise
+        cb("some.unknown.event", tool_name="x")
+        parent._delegate_spinner.print_above.assert_not_called()
+
+    def test_progress_callback_accepts_enum_value_directly(self):
+        """cb(DelegateEvent.TASK_THINKING, ...) must route to the thinking
+        branch.  Pre-fix the callback only handled legacy strings via
+        _LEGACY_EVENT_MAP.get and silently dropped enum-typed callers."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = None
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        cb(DelegateEvent.TASK_THINKING, preview="pondering")
+        # If the enum was accepted, the thinking emoji got printed.
+        assert any(
+            "💭" in str(c)
+            for c in parent._delegate_spinner.print_above.call_args_list
+        )
+
+    def test_progress_callback_accepts_new_style_string(self):
+        """cb('delegate.task_thinking', ...) — the string form of the
+        enum value — must route to the thinking branch too, so new-style
+        emitters don't have to import DelegateEvent."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        cb("delegate.task_thinking", preview="hmm")
+        assert any(
+            "💭" in str(c)
+            for c in parent._delegate_spinner.print_above.call_args_list
+        )
+
+    def test_progress_callback_task_progress_not_misrendered(self):
+        """'subagent_progress' (legacy name for TASK_PROGRESS) carries a
+        pre-batched summary in the tool_name slot.  Before the fix, this
+        fell through to the TASK_TOOL_STARTED rendering path, treating
+        the summary string as a tool name.  After the fix: distinct
+        render (no tool-start emoji lookup) and pass-through relay
+        upward (no re-batching).
+
+        Regression path only reachable once nested orchestration is
+        enabled: nested orchestrators relay subagent_progress from
+        grandchildren upward through this callback.
+        """
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = MagicMock()
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        cb("subagent_progress", tool_name="🔀 [1] terminal, file")
+
+        # Spinner gets a distinct 🔀-prefixed line, NOT a tool emoji
+        # followed by the summary string as if it were a tool name.
+        calls = parent._delegate_spinner.print_above.call_args_list
+        self.assertTrue(any("🔀 🔀 [1] terminal, file" in str(c) for c in calls))
+        # Parent callback receives the relay (pass-through, no re-batching).
+        parent.tool_progress_callback.assert_called_once()
+        # No '⚡' tool-start emoji should appear — that's the pre-fix bug.
+        self.assertFalse(any("⚡" in str(c) for c in calls))
+
+
+class TestConcurrencyDefaults(unittest.TestCase):
+    """Tests for the concurrency default and no hard ceiling."""
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_default_is_three(self, mock_cfg):
+        # Clear env var if set
+        with patch.dict(os.environ, {}, clear=True):
+            self.assertEqual(_get_max_concurrent_children(), 3)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_concurrent_children": 10})
+    def test_no_upper_ceiling(self, mock_cfg):
+        """Users can raise concurrency as high as they want — no hard cap."""
+        self.assertEqual(_get_max_concurrent_children(), 10)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_concurrent_children": 100})
+    def test_very_high_values_honored(self, mock_cfg):
+        self.assertEqual(_get_max_concurrent_children(), 100)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_concurrent_children": 0})
+    def test_zero_clamped_to_one(self, mock_cfg):
+        """Floor of 1 is enforced; zero or negative values raise to 1."""
+        self.assertEqual(_get_max_concurrent_children(), 1)
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_env_var_honored_uncapped(self, mock_cfg):
+        with patch.dict(os.environ, {"DELEGATION_MAX_CONCURRENT_CHILDREN": "12"}):
+            self.assertEqual(_get_max_concurrent_children(), 12)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_concurrent_children": 6})
+    def test_configured_value_returned(self, mock_cfg):
+        self.assertEqual(_get_max_concurrent_children(), 6)
+
+
+# =========================================================================
+# max_spawn_depth clamping
+# =========================================================================
+
+class TestMaxSpawnDepth(unittest.TestCase):
+    """Tests for _get_max_spawn_depth clamping and fallback behavior."""
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_max_spawn_depth_defaults_to_1(self, mock_cfg):
+        from tools.delegate_tool import _get_max_spawn_depth
+        self.assertEqual(_get_max_spawn_depth(), 1)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 0})
+    def test_max_spawn_depth_clamped_below_one(self, mock_cfg):
+        import logging
+        from tools.delegate_tool import _get_max_spawn_depth
+        with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
+            result = _get_max_spawn_depth()
+        self.assertEqual(result, 1)
+        self.assertTrue(any("clamping to 1" in m for m in cm.output))
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 99})
+    def test_max_spawn_depth_clamped_above_three(self, mock_cfg):
+        import logging
+        from tools.delegate_tool import _get_max_spawn_depth
+        with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
+            result = _get_max_spawn_depth()
+        self.assertEqual(result, 3)
+        self.assertTrue(any("clamping to 3" in m for m in cm.output))
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": "not-a-number"})
+    def test_max_spawn_depth_invalid_falls_back_to_default(self, mock_cfg):
+        from tools.delegate_tool import _get_max_spawn_depth
+        self.assertEqual(_get_max_spawn_depth(), 1)
+
+
+# =========================================================================
+# role param plumbing
+# =========================================================================
+#
+# These tests cover the schema + signature + stash plumbing of the role
+# param.  The full role-honoring behavior (toolset re-add, role-aware
+# prompt) lives in TestOrchestratorRoleBehavior below; these tests only
+# assert on _delegate_role stashing and on the schema shape.
+
+
+class TestOrchestratorRoleSchema(unittest.TestCase):
+    """Tests that the role param reaches the child via dispatch."""
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def _run_with_mock_child(self, role_arg, mock_cfg, mock_creds):
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.run_conversation.return_value = {
+                "final_response": "done", "completed": True,
+                "api_calls": 1, "messages": [],
+            }
+            mock_child._delegate_saved_tool_names = []
+            mock_child._credential_pool = None
+            mock_child.session_prompt_tokens = 0
+            mock_child.session_completion_tokens = 0
+            mock_child.model = "test"
+            MockAgent.return_value = mock_child
+            kwargs = {"goal": "test", "parent_agent": parent}
+            if role_arg is not _SENTINEL:
+                kwargs["role"] = role_arg
+            delegate_task(**kwargs)
+            return mock_child
+
+    def test_default_role_is_leaf(self):
+        child = self._run_with_mock_child(_SENTINEL)
+        self.assertEqual(child._delegate_role, "leaf")
+
+    def test_explicit_orchestrator_role_stashed(self):
+        """role='orchestrator' reaches _build_child_agent and is stashed.
+        Full behavior (toolset re-add) lands in commit 3; commit 2 only
+        verifies the plumbing."""
+        child = self._run_with_mock_child("orchestrator")
+        self.assertEqual(child._delegate_role, "orchestrator")
+
+    def test_unknown_role_coerces_to_leaf(self):
+        """role='nonsense' → _normalize_role warns and returns 'leaf'."""
+        import logging
+        with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
+            child = self._run_with_mock_child("nonsense")
+        self.assertEqual(child._delegate_role, "leaf")
+        self.assertTrue(any("coercing" in m.lower() for m in cm.output))
+
+    def test_schema_has_role_top_level_and_per_task(self):
+        from tools.delegate_tool import DELEGATE_TASK_SCHEMA
+        props = DELEGATE_TASK_SCHEMA["parameters"]["properties"]
+        self.assertIn("role", props)
+        self.assertEqual(props["role"]["enum"], ["leaf", "orchestrator"])
+        task_props = props["tasks"]["items"]["properties"]
+        self.assertIn("role", task_props)
+        self.assertEqual(task_props["role"]["enum"], ["leaf", "orchestrator"])
+
+
+# Sentinel used to distinguish "role kwarg omitted" from "role=None".
+_SENTINEL = object()
+
+
+# =========================================================================
+# role-honoring behavior
+# =========================================================================
+
+
+def _make_role_mock_child():
+    """Helper: mock child with minimal fields for delegate_task to process."""
+    mock_child = MagicMock()
+    mock_child.run_conversation.return_value = {
+        "final_response": "done", "completed": True,
+        "api_calls": 1, "messages": [],
+    }
+    mock_child._delegate_saved_tool_names = []
+    mock_child._credential_pool = None
+    mock_child.session_prompt_tokens = 0
+    mock_child.session_completion_tokens = 0
+    mock_child.model = "test"
+    return mock_child
+
+
+class TestOrchestratorRoleBehavior(unittest.TestCase):
+    """Tests that role='orchestrator' actually changes toolset + prompt."""
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_orchestrator_role_keeps_delegation_at_depth_1(
+        self, mock_cfg, mock_creds
+    ):
+        """role='orchestrator' + depth-0 parent with max_spawn_depth=2 →
+        child at depth 1 gets 'delegation' in enabled_toolsets (can
+        further delegate).  Requires max_spawn_depth>=2 since the new
+        default is 1 (flat)."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file"]
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = _make_role_mock_child()
+            MockAgent.return_value = mock_child
+            delegate_task(goal="test", role="orchestrator", parent_agent=parent)
+            kwargs = MockAgent.call_args[1]
+            self.assertIn("delegation", kwargs["enabled_toolsets"])
+            self.assertEqual(mock_child._delegate_role, "orchestrator")
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_orchestrator_blocked_at_max_spawn_depth(
+        self, mock_cfg, mock_creds
+    ):
+        """Parent at depth 1 with max_spawn_depth=2 spawns child
+        at depth 2 (the floor); role='orchestrator' degrades to leaf."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=1)
+        parent.enabled_toolsets = ["terminal", "delegation"]
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = _make_role_mock_child()
+            MockAgent.return_value = mock_child
+            delegate_task(goal="test", role="orchestrator", parent_agent=parent)
+            kwargs = MockAgent.call_args[1]
+            self.assertNotIn("delegation", kwargs["enabled_toolsets"])
+            self.assertEqual(mock_child._delegate_role, "leaf")
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_orchestrator_blocked_at_default_flat_depth(
+        self, mock_cfg, mock_creds
+    ):
+        """With default max_spawn_depth=1 (flat), role='orchestrator'
+        on a depth-0 parent produces a depth-1 child that is already at
+        the floor — the role degrades to 'leaf' and the delegation
+        toolset is stripped.  This is the new default posture."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file", "delegation"]
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = _make_role_mock_child()
+            MockAgent.return_value = mock_child
+            delegate_task(goal="test", role="orchestrator", parent_agent=parent)
+            kwargs = MockAgent.call_args[1]
+            self.assertNotIn("delegation", kwargs["enabled_toolsets"])
+            self.assertEqual(mock_child._delegate_role, "leaf")
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_orchestrator_enabled_false_forces_leaf(self, mock_creds):
+        """Kill switch delegation.orchestrator_enabled=false overrides
+        role='orchestrator'."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "delegation"]
+        with patch("tools.delegate_tool._load_config",
+                   return_value={"orchestrator_enabled": False}):
+            with patch("run_agent.AIAgent") as MockAgent:
+                mock_child = _make_role_mock_child()
+                MockAgent.return_value = mock_child
+                delegate_task(goal="test", role="orchestrator",
+                              parent_agent=parent)
+                kwargs = MockAgent.call_args[1]
+                self.assertNotIn("delegation", kwargs["enabled_toolsets"])
+                self.assertEqual(mock_child._delegate_role, "leaf")
+
+    # ── Role-aware system prompt ────────────────────────────────────────
+
+    def test_leaf_prompt_does_not_mention_delegation(self):
+        prompt = _build_child_system_prompt(
+            "Fix tests", role="leaf",
+            max_spawn_depth=2, child_depth=1,
+        )
+        self.assertNotIn("delegate_task", prompt)
+        self.assertNotIn("Orchestrator Role", prompt)
+
+    def test_orchestrator_prompt_mentions_delegation_capability(self):
+        prompt = _build_child_system_prompt(
+            "Survey approaches", role="orchestrator",
+            max_spawn_depth=2, child_depth=1,
+        )
+        self.assertIn("delegate_task", prompt)
+        self.assertIn("Orchestrator Role", prompt)
+        # Depth/max-depth note present and literal:
+        self.assertIn("depth 1", prompt)
+        self.assertIn("max_spawn_depth=2", prompt)
+
+    def test_orchestrator_prompt_at_depth_floor_says_children_are_leaves(self):
+        """With max_spawn_depth=2 and child_depth=1, the orchestrator's
+        own children would be at depth 2 (the floor) → must be leaves."""
+        prompt = _build_child_system_prompt(
+            "Survey", role="orchestrator",
+            max_spawn_depth=2, child_depth=1,
+        )
+        self.assertIn("MUST be leaves", prompt)
+
+    def test_orchestrator_prompt_below_floor_allows_more_nesting(self):
+        """With max_spawn_depth=3 and child_depth=1, the orchestrator's
+        own children can themselves be orchestrators (depth 2 < 3)."""
+        prompt = _build_child_system_prompt(
+            "Deep work", role="orchestrator",
+            max_spawn_depth=3, child_depth=1,
+        )
+        self.assertIn("can themselves be orchestrators", prompt)
+
+    # ── Batch mode and intersection ─────────────────────────────────────
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_batch_mode_per_task_role_override(self, mock_cfg, mock_creds):
+        """Per-task role beats top-level; no top-level role → "leaf".
+
+        tasks=[{role:'orchestrator'},{role:'leaf'},{}] → first gets
+        delegation, second and third don't.  Requires max_spawn_depth>=2
+        (raised explicitly here) since the new default is 1 (flat).
+        """
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file", "delegation"]
+        built_toolsets = []
+
+        def _factory(*a, **kw):
+            m = _make_role_mock_child()
+            built_toolsets.append(kw.get("enabled_toolsets"))
+            return m
+
+        with patch("run_agent.AIAgent", side_effect=_factory):
+            delegate_task(
+                tasks=[
+                    {"goal": "A", "role": "orchestrator"},
+                    {"goal": "B", "role": "leaf"},
+                    {"goal": "C"},  # no role → falls back to top_role (leaf)
+                ],
+                parent_agent=parent,
+            )
+        self.assertIn("delegation", built_toolsets[0])
+        self.assertNotIn("delegation", built_toolsets[1])
+        self.assertNotIn("delegation", built_toolsets[2])
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_intersection_preserves_delegation_bound(
+        self, mock_cfg, mock_creds
+    ):
+        """Design decision: orchestrator capability is granted by role,
+        NOT inherited from the parent's toolset. A parent without
+        'delegation' in its enabled_toolsets can still spawn an
+        orchestrator child — the re-add in _build_child_agent runs
+        unconditionally for orchestrators (when max_spawn_depth allows).
+
+        If you want to change to "parent must have delegation too",
+        update _build_child_agent to check parent_toolsets before the
+        re-add and update this test to match.
+        """
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file"]  # no delegation
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = _make_role_mock_child()
+            MockAgent.return_value = mock_child
+            delegate_task(goal="test", role="orchestrator",
+                          parent_agent=parent)
+            self.assertIn("delegation", MockAgent.call_args[1]["enabled_toolsets"])
+
+
+class TestOrchestratorEndToEnd(unittest.TestCase):
+    """End-to-end: parent -> orchestrator -> two-leaf nested orchestration.
+
+    Covers the acceptance gate: parent delegates to an orchestrator
+    child; the orchestrator delegates to two leaf grandchildren; the
+    role/toolset/depth chain all resolve correctly.
+
+    Mock strategy: a single AIAgent patch with a side_effect factory
+    that keys on the child's ephemeral_system_prompt — orchestrator
+    prompts contain the string "Orchestrator Role" (see
+    _build_child_system_prompt), leaves don't.  The orchestrator
+    mock's run_conversation recursively calls delegate_task with
+    tasks=[{goal:...},{goal:...}] to spawn two leaves.  This keeps
+    the test in one patch context and avoids depth-indexed nesting.
+    """
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_end_to_end_nested_orchestration(self, mock_cfg, mock_creds):
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file", "delegation"]
+
+        # (enabled_toolsets, _delegate_role) for each agent built
+        built_agents: list = []
+        # Keep the orchestrator mock around so the re-entrant delegate_task
+        # can reach it via closure.
+        orch_mock = {}
+
+        def _factory(*a, **kw):
+            prompt = kw.get("ephemeral_system_prompt", "") or ""
+            is_orchestrator = "Orchestrator Role" in prompt
+            m = _make_role_mock_child()
+            built_agents.append({
+                "enabled_toolsets": list(kw.get("enabled_toolsets") or []),
+                "is_orchestrator_prompt": is_orchestrator,
+            })
+
+            if is_orchestrator:
+                # Prepare the orchestrator mock as a parent-capable object
+                # so the nested delegate_task call succeeds.
+                m._delegate_depth = 1
+                m._delegate_role = "orchestrator"
+                m._active_children = []
+                m._active_children_lock = threading.Lock()
+                m._session_db = None
+                m.platform = "cli"
+                m.enabled_toolsets = ["terminal", "file", "delegation"]
+                m.api_key = "***"
+                m.base_url = ""
+                m.provider = None
+                m.api_mode = None
+                m.providers_allowed = None
+                m.providers_ignored = None
+                m.providers_order = None
+                m.provider_sort = None
+                m._print_fn = None
+                m.tool_progress_callback = None
+                m.thinking_callback = None
+                orch_mock["agent"] = m
+
+                def _orchestrator_run(user_message=None, task_id=None):
+                    # Re-entrant: orchestrator spawns two leaves
+                    delegate_task(
+                        tasks=[{"goal": "leaf-A"}, {"goal": "leaf-B"}],
+                        parent_agent=m,
+                    )
+                    return {
+                        "final_response": "orchestrated 2 workers",
+                        "completed": True, "api_calls": 1,
+                        "messages": [],
+                    }
+                m.run_conversation.side_effect = _orchestrator_run
+
+            return m
+
+        with patch("run_agent.AIAgent", side_effect=_factory) as MockAgent:
+            delegate_task(
+                goal="top-level orchestration",
+                role="orchestrator",
+                parent_agent=parent,
+            )
+
+        # 1 orchestrator + 2 leaf grandchildren = 3 agents
+        self.assertEqual(MockAgent.call_count, 3)
+        # First built = the orchestrator (parent's direct child)
+        self.assertIn("delegation", built_agents[0]["enabled_toolsets"])
+        self.assertTrue(built_agents[0]["is_orchestrator_prompt"])
+        # Next two = leaves (grandchildren)
+        self.assertNotIn("delegation", built_agents[1]["enabled_toolsets"])
+        self.assertFalse(built_agents[1]["is_orchestrator_prompt"])
+        self.assertNotIn("delegation", built_agents[2]["enabled_toolsets"])
+        self.assertFalse(built_agents[2]["is_orchestrator_prompt"])
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/tools/test_file_state_registry.py b/tests/tools/test_file_state_registry.py
new file mode 100644
index 0000000000..6038036ae8
--- /dev/null
+++ b/tests/tools/test_file_state_registry.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+"""Tests for the cross-agent FileStateRegistry (tools/file_state.py).
+
+Covers the three layers added for safe concurrent subagent file edits:
+
+  1. Cross-agent staleness detection via ``check_stale``
+  2. Per-path serialization via ``lock_path``
+  3. Delegate-completion reminder via ``writes_since``
+
+Plus integration through the real ``read_file_tool`` / ``write_file_tool``
+/ ``patch_tool`` handlers so the full hook wiring is exercised.
+
+Run:
+    python -m pytest tests/tools/test_file_state_registry.py -v
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import tempfile
+import threading
+import time
+import unittest
+
+from tools import file_state
+from tools.file_tools import (
+    read_file_tool,
+    write_file_tool,
+    patch_tool,
+)
+
+
+def _tmp_file(content: str = "initial\n") -> str:
+    fd, path = tempfile.mkstemp(prefix="hermes_file_state_test_", suffix=".txt")
+    with os.fdopen(fd, "w") as f:
+        f.write(content)
+    return path
+
+
+class FileStateRegistryUnitTests(unittest.TestCase):
+    """Direct unit tests on the registry singleton."""
+
+    def setUp(self) -> None:
+        file_state.get_registry().clear()
+        self._tmpfiles: list[str] = []
+
+    def tearDown(self) -> None:
+        for p in self._tmpfiles:
+            try:
+                os.unlink(p)
+            except OSError:
+                pass
+        file_state.get_registry().clear()
+
+    def _mk(self, content: str = "x\n") -> str:
+        p = _tmp_file(content)
+        self._tmpfiles.append(p)
+        return p
+
+    def test_record_read_then_check_stale_returns_none(self):
+        p = self._mk()
+        file_state.record_read("A", p)
+        self.assertIsNone(file_state.check_stale("A", p))
+
+    def test_sibling_write_flags_other_agent_as_stale(self):
+        p = self._mk()
+        file_state.record_read("A", p)
+        # Simulate sibling writing this file later
+        time.sleep(0.01)  # ensure ts ordering across resolution
+        file_state.note_write("B", p)
+        warn = file_state.check_stale("A", p)
+        self.assertIsNotNone(warn)
+        self.assertIn("B", warn)
+        self.assertIn("sibling", warn.lower())
+
+    def test_write_without_read_flagged(self):
+        p = self._mk()
+        # Agent A never read this file.
+        file_state.note_write("B", p)  # another agent touched it
+        warn = file_state.check_stale("A", p)
+        self.assertIsNotNone(warn)
+
+    def test_partial_read_flagged_on_write(self):
+        p = self._mk()
+        file_state.record_read("A", p, partial=True)
+        warn = file_state.check_stale("A", p)
+        self.assertIsNotNone(warn)
+        self.assertIn("partial", warn.lower())
+
+    def test_external_mtime_drift_flagged(self):
+        p = self._mk()
+        file_state.record_read("A", p)
+        # Bump the on-disk mtime without going through the registry.
+        time.sleep(0.01)
+        os.utime(p, None)
+        with open(p, "w") as f:
+            f.write("externally modified\n")
+        warn = file_state.check_stale("A", p)
+        self.assertIsNotNone(warn)
+        self.assertIn("modified since you last read", warn)
+
+    def test_own_write_updates_stamp_so_next_write_is_clean(self):
+        p = self._mk()
+        file_state.record_read("A", p)
+        file_state.note_write("A", p)
+        # Second write by the same agent — should not be flagged.
+        self.assertIsNone(file_state.check_stale("A", p))
+
+    def test_different_paths_dont_interfere(self):
+        a = self._mk()
+        b = self._mk()
+        file_state.record_read("A", a)
+        file_state.note_write("B", b)
+        # A reads only `a`; B writes `b`. A writing `a` is NOT stale.
+        self.assertIsNone(file_state.check_stale("A", a))
+
+    def test_lock_path_serializes_same_path(self):
+        p = self._mk()
+        events: list[tuple[str, int]] = []
+        lock = threading.Lock()
+
+        def worker(i: int) -> None:
+            with file_state.lock_path(p):
+                with lock:
+                    events.append(("enter", i))
+                time.sleep(0.01)
+                with lock:
+                    events.append(("exit", i))
+
+        threads = [threading.Thread(target=worker, args=(i,)) for i in range(4)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        # Every enter must be immediately followed by its matching exit.
+        self.assertEqual(len(events), 8)
+        for i in range(0, 8, 2):
+            self.assertEqual(events[i][0], "enter")
+            self.assertEqual(events[i + 1][0], "exit")
+            self.assertEqual(events[i][1], events[i + 1][1])
+
+    def test_lock_path_is_per_path_not_global(self):
+        a = self._mk()
+        b = self._mk()
+        b_entered = threading.Event()
+
+        def hold_a() -> None:
+            with file_state.lock_path(a):
+                b_entered.wait(timeout=2.0)
+
+        def enter_b() -> None:
+            time.sleep(0.02)  # let A grab its lock
+            with file_state.lock_path(b):
+                b_entered.set()
+
+        ta = threading.Thread(target=hold_a)
+        tb = threading.Thread(target=enter_b)
+        ta.start()
+        tb.start()
+        self.assertTrue(b_entered.wait(timeout=3.0))
+        ta.join(timeout=3.0)
+        tb.join(timeout=3.0)
+
+    def test_writes_since_filters_by_parent_read_set(self):
+        foo = self._mk()
+        bar = self._mk()
+        baz = self._mk()
+        file_state.record_read("parent", foo)
+        file_state.record_read("parent", bar)
+        since = time.time()
+        time.sleep(0.01)
+        file_state.note_write("child", foo)  # parent read this — report
+        file_state.note_write("child", baz)  # parent never saw — skip
+
+        # Caller passes only paths the parent actually read (this is what
+        # delegate_tool does via ``known_reads(parent_task_id)``).
+        parent_reads = file_state.known_reads("parent")
+        out = file_state.writes_since("parent", since, parent_reads)
+        self.assertIn("child", out)
+        self.assertIn(foo, out["child"])
+        self.assertNotIn(baz, out["child"])
+
+    def test_writes_since_excludes_the_target_agent(self):
+        p = self._mk()
+        file_state.record_read("parent", p)
+        since = time.time()
+        time.sleep(0.01)
+        file_state.note_write("parent", p)  # parent's own write
+        out = file_state.writes_since("parent", since, [p])
+        self.assertEqual(out, {})
+
+    def test_kill_switch_env_var(self):
+        p = self._mk()
+        os.environ["HERMES_DISABLE_FILE_STATE_GUARD"] = "1"
+        try:
+            file_state.record_read("A", p)
+            file_state.note_write("B", p)
+            self.assertIsNone(file_state.check_stale("A", p))
+            self.assertEqual(file_state.known_reads("A"), [])
+            self.assertEqual(
+                file_state.writes_since("A", 0.0, [p]),
+                {},
+            )
+        finally:
+            del os.environ["HERMES_DISABLE_FILE_STATE_GUARD"]
+
+
+class FileToolsIntegrationTests(unittest.TestCase):
+    """Integration through the real file_tools handlers.
+
+    These exercise the wiring: read_file_tool → registry.record_read,
+    write_file_tool / patch_tool → check_stale + lock_path + note_write.
+    """
+
+    def setUp(self) -> None:
+        file_state.get_registry().clear()
+        self._tmpdir = tempfile.mkdtemp(prefix="hermes_file_state_int_")
+
+    def tearDown(self) -> None:
+        import shutil
+        shutil.rmtree(self._tmpdir, ignore_errors=True)
+        file_state.get_registry().clear()
+
+    def _write_seed(self, name: str, content: str = "seed\n") -> str:
+        p = os.path.join(self._tmpdir, name)
+        with open(p, "w") as f:
+            f.write(content)
+        return p
+
+    def test_sibling_agent_write_surfaces_warning_through_handler(self):
+        p = self._write_seed("shared.txt")
+        r = json.loads(read_file_tool(path=p, task_id="agentA"))
+        self.assertNotIn("error", r)
+
+        w_b = json.loads(write_file_tool(path=p, content="B wrote\n", task_id="agentB"))
+        self.assertNotIn("error", w_b)
+
+        w_a = json.loads(write_file_tool(path=p, content="A stale\n", task_id="agentA"))
+        warn = w_a.get("_warning", "")
+        self.assertTrue(warn, f"expected warning, got: {w_a}")
+        # The cross-agent message names the sibling task_id.
+        self.assertIn("agentB", warn)
+        self.assertIn("sibling", warn.lower())
+
+    def test_same_agent_consecutive_writes_no_false_warning(self):
+        p = self._write_seed("own.txt")
+        json.loads(read_file_tool(path=p, task_id="agentC"))
+        w1 = json.loads(write_file_tool(path=p, content="one\n", task_id="agentC"))
+        self.assertFalse(w1.get("_warning"))
+        w2 = json.loads(write_file_tool(path=p, content="two\n", task_id="agentC"))
+        self.assertFalse(w2.get("_warning"))
+
+    def test_patch_tool_also_surfaces_sibling_warning(self):
+        p = self._write_seed("p.txt", "hello world\n")
+        json.loads(read_file_tool(path=p, task_id="agentA"))
+        json.loads(write_file_tool(path=p, content="hello planet\n", task_id="agentB"))
+        r = json.loads(
+            patch_tool(
+                mode="replace",
+                path=p,
+                old_string="hello",
+                new_string="HI",
+                task_id="agentA",
+            )
+        )
+        warn = r.get("_warning", "")
+        # Patch may fail (sibling changed the content so old_string may not
+        # match) or succeed — either way, the cross-agent warning should be
+        # present when old_string still happens to match.  What matters is
+        # that if the patch succeeded or the warning was reported, it names
+        # the sibling.  When old_string doesn't match, the patch itself
+        # returns an error but the warning is still set from the pre-check.
+        if warn:
+            self.assertIn("agentB", warn)
+
+    def test_net_new_file_no_warning(self):
+        p = os.path.join(self._tmpdir, "brand_new.txt")
+        # Nobody has read or written this before.
+        w = json.loads(write_file_tool(path=p, content="hi\n", task_id="agentX"))
+        self.assertFalse(w.get("_warning"))
+        self.assertNotIn("error", w)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/tools/test_image_generation.py b/tests/tools/test_image_generation.py
index 4cde05fb4e..b24e6bc1fc 100644
--- a/tests/tools/test_image_generation.py
+++ b/tests/tools/test_image_generation.py
@@ -136,6 +136,49 @@ class TestGptLiteralFamily:
         assert p["image_size"] == "1024x1536"
 
 
+class TestGptImage2Presets:
+    """GPT Image 2 uses preset enum sizes (not literal strings like 1.5).
+    Mapped to 4:3 variants so we stay above the 655,360 min-pixel floor
+    (16:9 presets at 1024x576 = 589,824 would be rejected)."""
+
+    def test_gpt2_landscape_uses_4_3_preset(self, image_tool):
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hello", "landscape")
+        assert p["image_size"] == "landscape_4_3"
+
+    def test_gpt2_square_uses_square_hd(self, image_tool):
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hello", "square")
+        assert p["image_size"] == "square_hd"
+
+    def test_gpt2_portrait_uses_4_3_preset(self, image_tool):
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hello", "portrait")
+        assert p["image_size"] == "portrait_4_3"
+
+    def test_gpt2_quality_pinned_to_medium(self, image_tool):
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hi", "square")
+        assert p["quality"] == "medium"
+
+    def test_gpt2_strips_byok_and_unsupported_overrides(self, image_tool):
+        """openai_api_key (BYOK) is deliberately not in supports — all users
+        route through shared FAL billing. guidance_scale/num_inference_steps
+        aren't in the model's API surface either."""
+        p = image_tool._build_fal_payload(
+            "fal-ai/gpt-image-2", "hi", "square",
+            overrides={
+                "openai_api_key": "sk-...",
+                "guidance_scale": 7.5,
+                "num_inference_steps": 50,
+            },
+        )
+        assert "openai_api_key" not in p
+        assert "guidance_scale" not in p
+        assert "num_inference_steps" not in p
+
+    def test_gpt2_strips_seed_even_if_passed(self, image_tool):
+        # seed isn't in the GPT Image 2 API surface either.
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hi", "square", seed=42)
+        assert "seed" not in p
+
+
 # ---------------------------------------------------------------------------
 # Supports whitelist — the main safety property
 # ---------------------------------------------------------------------------
@@ -231,10 +274,11 @@ class TestGptQualityPinnedToMedium:
         assert p["quality"] == "medium"
 
     def test_non_gpt_model_never_gets_quality(self, image_tool):
-        """quality is only meaningful for gpt-image-1.5 — other models should
-        never have it in their payload."""
+        """quality is only meaningful for GPT-Image models (1.5, 2) — other
+        models should never have it in their payload."""
+        gpt_models = {"fal-ai/gpt-image-1.5", "fal-ai/gpt-image-2"}
         for mid in image_tool.FAL_MODELS:
-            if mid == "fal-ai/gpt-image-1.5":
+            if mid in gpt_models:
                 continue
             p = image_tool._build_fal_payload(mid, "hi", "square")
             assert "quality" not in p, f"{mid} unexpectedly has 'quality' in payload"
diff --git a/tests/tools/test_tts_max_text_length.py b/tests/tools/test_tts_max_text_length.py
new file mode 100644
index 0000000000..38a763ea78
--- /dev/null
+++ b/tests/tools/test_tts_max_text_length.py
@@ -0,0 +1,197 @@
+"""Tests for per-provider TTS input-character limits.
+
+Replaces the old global ``MAX_TEXT_LENGTH = 4000`` cap that truncated every
+provider at 4000 chars even though OpenAI allows 4096, xAI allows 15000,
+MiniMax allows 10000, and ElevenLabs allows 5000-40000 depending on model.
+"""
+
+import json
+from unittest.mock import patch
+
+import pytest
+
+from tools.tts_tool import (
+    ELEVENLABS_MODEL_MAX_TEXT_LENGTH,
+    FALLBACK_MAX_TEXT_LENGTH,
+    PROVIDER_MAX_TEXT_LENGTH,
+    _resolve_max_text_length,
+)
+
+
+class TestResolveMaxTextLength:
+    def test_edge_default(self):
+        assert _resolve_max_text_length("edge", {}) == PROVIDER_MAX_TEXT_LENGTH["edge"]
+
+    def test_openai_default_is_4096(self):
+        assert _resolve_max_text_length("openai", {}) == 4096
+
+    def test_xai_default_is_15000(self):
+        assert _resolve_max_text_length("xai", {}) == 15000
+
+    def test_minimax_default_is_10000(self):
+        assert _resolve_max_text_length("minimax", {}) == 10000
+
+    def test_mistral_default(self):
+        assert _resolve_max_text_length("mistral", {}) == PROVIDER_MAX_TEXT_LENGTH["mistral"]
+
+    def test_gemini_default(self):
+        assert _resolve_max_text_length("gemini", {}) == PROVIDER_MAX_TEXT_LENGTH["gemini"]
+
+    def test_unknown_provider_falls_back(self):
+        assert _resolve_max_text_length("does-not-exist", {}) == FALLBACK_MAX_TEXT_LENGTH
+
+    def test_empty_provider_falls_back(self):
+        assert _resolve_max_text_length("", {}) == FALLBACK_MAX_TEXT_LENGTH
+        assert _resolve_max_text_length(None, {}) == FALLBACK_MAX_TEXT_LENGTH
+
+    def test_case_insensitive(self):
+        assert _resolve_max_text_length("OpenAI", {}) == 4096
+        assert _resolve_max_text_length("  XAI  ", {}) == 15000
+
+    # --- Overrides ---
+
+    def test_override_wins(self):
+        cfg = {"openai": {"max_text_length": 9999}}
+        assert _resolve_max_text_length("openai", cfg) == 9999
+
+    def test_override_zero_falls_through(self):
+        # A broken/zero override must not disable truncation
+        cfg = {"openai": {"max_text_length": 0}}
+        assert _resolve_max_text_length("openai", cfg) == 4096
+
+    def test_override_negative_falls_through(self):
+        cfg = {"xai": {"max_text_length": -1}}
+        assert _resolve_max_text_length("xai", cfg) == 15000
+
+    def test_override_non_int_falls_through(self):
+        cfg = {"minimax": {"max_text_length": "lots"}}
+        assert _resolve_max_text_length("minimax", cfg) == 10000
+
+    def test_override_bool_falls_through(self):
+        # bool is technically an int; make sure we don't treat True as 1 char
+        cfg = {"openai": {"max_text_length": True}}
+        assert _resolve_max_text_length("openai", cfg) == 4096
+
+    def test_missing_provider_section_uses_default(self):
+        cfg = {"provider": "openai"}  # no "openai" key
+        assert _resolve_max_text_length("openai", cfg) == 4096
+
+    # --- ElevenLabs model-aware ---
+
+    def test_elevenlabs_default_model_multilingual_v2(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_multilingual_v2"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 10000
+
+    def test_elevenlabs_flash_v2_5_gets_40k(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_flash_v2_5"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 40000
+
+    def test_elevenlabs_flash_v2_gets_30k(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_flash_v2"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 30000
+
+    def test_elevenlabs_v3_gets_5k(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_v3"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 5000
+
+    def test_elevenlabs_unknown_model_falls_back_to_provider_default(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_experimental_xyz"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == PROVIDER_MAX_TEXT_LENGTH["elevenlabs"]
+
+    def test_elevenlabs_override_beats_model_lookup(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_flash_v2_5", "max_text_length": 1000}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 1000
+
+    def test_elevenlabs_no_model_id_uses_default_model_mapping(self):
+        # Falls back to DEFAULT_ELEVENLABS_MODEL_ID = eleven_multilingual_v2 -> 10000
+        assert _resolve_max_text_length("elevenlabs", {}) == 10000
+
+    def test_provider_config_not_a_dict(self):
+        cfg = {"openai": "not-a-dict"}
+        assert _resolve_max_text_length("openai", cfg) == 4096
+
+    # --- Sanity: the table covers every provider listed in the schema ---
+
+    def test_all_documented_providers_have_defaults(self):
+        expected = {"edge", "openai", "xai", "minimax", "mistral",
+                    "gemini", "elevenlabs", "neutts", "kittentts"}
+        assert expected.issubset(PROVIDER_MAX_TEXT_LENGTH.keys())
+
+
+class TestTextToSpeechToolTruncation:
+    """End-to-end: verify the resolver actually drives the text_to_speech_tool
+    truncation path rather than the old 4000-char global."""
+
+    def test_openai_truncates_at_4096_not_4000(self, tmp_path, monkeypatch, caplog):
+        import logging
+        caplog.set_level(logging.WARNING, logger="tools.tts_tool")
+
+        # 5000 chars -- over OpenAI's 4096 limit but under xAI's 15k
+        text = "A" * 5000
+        captured_text = {}
+
+        def fake_openai(t, out, cfg):
+            captured_text["text"] = t
+            with open(out, "wb") as f:
+                f.write(b"\x00")
+            return out
+
+        monkeypatch.setattr("tools.tts_tool._generate_openai_tts", fake_openai)
+        monkeypatch.setattr("tools.tts_tool._load_tts_config",
+                            lambda: {"provider": "openai"})
+
+        from tools.tts_tool import text_to_speech_tool
+        out = str(tmp_path / "out.mp3")
+        result = json.loads(text_to_speech_tool(text=text, output_path=out))
+
+        assert result["success"] is True
+        # Should be truncated to 4096, not the old 4000
+        assert len(captured_text["text"]) == 4096
+        # And the warning should mention the provider
+        assert any("openai" in rec.message.lower() for rec in caplog.records)
+
+    def test_xai_accepts_much_longer_input(self, tmp_path, monkeypatch):
+        # 12000 chars -- over old global 4000, under xAI's 15000
+        text = "B" * 12000
+        captured_text = {}
+
+        def fake_xai(t, out, cfg):
+            captured_text["text"] = t
+            with open(out, "wb") as f:
+                f.write(b"\x00")
+            return out
+
+        monkeypatch.setattr("tools.tts_tool._generate_xai_tts", fake_xai)
+        monkeypatch.setattr("tools.tts_tool._load_tts_config",
+                            lambda: {"provider": "xai"})
+
+        from tools.tts_tool import text_to_speech_tool
+        out = str(tmp_path / "out.mp3")
+        result = json.loads(text_to_speech_tool(text=text, output_path=out))
+
+        assert result["success"] is True
+        # xAI should accept the full 12000 chars
+        assert len(captured_text["text"]) == 12000
+
+    def test_user_override_is_respected(self, tmp_path, monkeypatch):
+        # User says "cap openai at 100 chars" -- we must honor it
+        text = "C" * 500
+        captured_text = {}
+
+        def fake_openai(t, out, cfg):
+            captured_text["text"] = t
+            with open(out, "wb") as f:
+                f.write(b"\x00")
+            return out
+
+        monkeypatch.setattr("tools.tts_tool._generate_openai_tts", fake_openai)
+        monkeypatch.setattr("tools.tts_tool._load_tts_config",
+                            lambda: {"provider": "openai",
+                                     "openai": {"max_text_length": 100}})
+
+        from tools.tts_tool import text_to_speech_tool
+        out = str(tmp_path / "out.mp3")
+        result = json.loads(text_to_speech_tool(text=text, output_path=out))
+
+        assert result["success"] is True
+        assert len(captured_text["text"]) == 100
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 6f9612387f..242b0ceb4d 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -16,16 +16,18 @@ The parent's context only sees the delegation call and the summary result,
 never the child's intermediate tool calls or reasoning.
 """
 
+import enum
 import json
 import logging
 logger = logging.getLogger(__name__)
 import os
 import threading
 import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import Any, Callable, Dict, List, Optional
+from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError, as_completed
+from typing import Any, Dict, List, Optional
 
 from toolsets import TOOLSETS
+from tools import file_state
 from utils import base_url_hostname
 
 
@@ -41,6 +43,12 @@ DELEGATE_BLOCKED_TOOLS = frozenset([
 # Build a description fragment listing toolsets available for subagents.
 # Excludes toolsets where ALL tools are blocked, composite/platform toolsets
 # (hermes-* prefixed), and scenario toolsets.
+#
+# NOTE: "delegation" is in this exclusion set so the subagent-facing
+# capability hint string (_TOOLSET_LIST_STR) doesn't advertise it as a
+# toolset to request explicitly — the correct mechanism for nested
+# delegation is role='orchestrator', which re-adds "delegation" in
+# _build_child_agent regardless of this exclusion.
 _EXCLUDED_TOOLSET_NAMES = frozenset({"debugging", "safe", "delegation", "moa", "rl"})
 _SUBAGENT_TOOLSETS = sorted(
     name for name, defn in TOOLSETS.items()
@@ -51,13 +59,36 @@ _SUBAGENT_TOOLSETS = sorted(
 _TOOLSET_LIST_STR = ", ".join(f"'{n}'" for n in _SUBAGENT_TOOLSETS)
 
 _DEFAULT_MAX_CONCURRENT_CHILDREN = 3
-MAX_DEPTH = 2  # parent (0) -> child (1) -> grandchild rejected (2)
+MAX_DEPTH = 1  # flat by default: parent (0) -> child (1); grandchild rejected unless max_spawn_depth raised.
+# Configurable depth cap consulted by _get_max_spawn_depth; MAX_DEPTH
+# stays as the default fallback and is still the symbol tests import.
+_MIN_SPAWN_DEPTH = 1
+_MAX_SPAWN_DEPTH_CAP = 3
+
+
+def _normalize_role(r: Optional[str]) -> str:
+    """Normalise a caller-provided role to 'leaf' or 'orchestrator'.
+
+    None/empty -> 'leaf'.  Unknown strings coerce to 'leaf' with a
+    warning log (matches the silent-degrade pattern of
+    _get_orchestrator_enabled).  _build_child_agent adds a second
+    degrade layer for depth/kill-switch bounds.
+    """
+    if r is None or not r:
+        return "leaf"
+    r_norm = str(r).strip().lower()
+    if r_norm in ("leaf", "orchestrator"):
+        return r_norm
+    logger.warning("Unknown delegate_task role=%r, coercing to 'leaf'", r)
+    return "leaf"
 
 
 def _get_max_concurrent_children() -> int:
     """Read delegation.max_concurrent_children from config, falling back to
     DELEGATION_MAX_CONCURRENT_CHILDREN env var, then the default (3).
 
+    Users can raise this as high as they want; only the floor (1) is enforced.
+
     Uses the same ``_load_config()`` path that the rest of ``delegate_task``
     uses, keeping config priority consistent (config.yaml > env > default).
     """
@@ -71,18 +102,135 @@ def _get_max_concurrent_children() -> int:
                 "delegation.max_concurrent_children=%r is not a valid integer; "
                 "using default %d", val, _DEFAULT_MAX_CONCURRENT_CHILDREN,
             )
+            return _DEFAULT_MAX_CONCURRENT_CHILDREN
     env_val = os.getenv("DELEGATION_MAX_CONCURRENT_CHILDREN")
     if env_val:
         try:
             return max(1, int(env_val))
         except (TypeError, ValueError):
-            pass
+            return _DEFAULT_MAX_CONCURRENT_CHILDREN
     return _DEFAULT_MAX_CONCURRENT_CHILDREN
+
+
+def _get_child_timeout() -> float:
+    """Read delegation.child_timeout_seconds from config.
+
+    Returns the number of seconds a single child agent is allowed to run
+    before being considered stuck.  Default: 300 s (5 minutes).
+    """
+    cfg = _load_config()
+    val = cfg.get("child_timeout_seconds")
+    if val is not None:
+        try:
+            return max(30.0, float(val))
+        except (TypeError, ValueError):
+            logger.warning(
+                "delegation.child_timeout_seconds=%r is not a valid number; "
+                "using default %d", val, DEFAULT_CHILD_TIMEOUT,
+            )
+    env_val = os.getenv("DELEGATION_CHILD_TIMEOUT_SECONDS")
+    if env_val:
+        try:
+            return max(30.0, float(env_val))
+        except (TypeError, ValueError):
+            pass
+    return float(DEFAULT_CHILD_TIMEOUT)
+
+
+def _get_max_spawn_depth() -> int:
+    """Read delegation.max_spawn_depth from config, clamped to [1, 3].
+
+    depth 0 = parent agent.  max_spawn_depth = N means agents at depths
+    0..N-1 can spawn; depth N is the leaf floor.  Default 1 is flat:
+    parent spawns children (depth 1), depth-1 children cannot spawn
+    (blocked by this guard AND, for leaf children, by the delegation
+    toolset strip in _strip_blocked_tools).
+
+    Raise to 2 or 3 to unlock nested orchestration. role="orchestrator"
+    removes the toolset strip for depth-1 children when
+    max_spawn_depth >= 2, enabling them to spawn their own workers.
+    """
+    cfg = _load_config()
+    val = cfg.get("max_spawn_depth")
+    if val is None:
+        return MAX_DEPTH
+    try:
+        ival = int(val)
+    except (TypeError, ValueError):
+        logger.warning(
+            "delegation.max_spawn_depth=%r is not a valid integer; "
+            "using default %d", val, MAX_DEPTH,
+        )
+        return MAX_DEPTH
+    clamped = max(_MIN_SPAWN_DEPTH, min(_MAX_SPAWN_DEPTH_CAP, ival))
+    if clamped != ival:
+        logger.warning(
+            "delegation.max_spawn_depth=%d out of range [%d, %d]; "
+            "clamping to %d", ival, _MIN_SPAWN_DEPTH,
+            _MAX_SPAWN_DEPTH_CAP, clamped,
+        )
+    return clamped
+
+
+def _get_orchestrator_enabled() -> bool:
+    """Global kill switch for the orchestrator role.
+
+    When False, role="orchestrator" is silently forced to "leaf" in
+    _build_child_agent and the delegation toolset is stripped as before.
+    Lets an operator disable the feature without a code revert.
+    """
+    cfg = _load_config()
+    val = cfg.get("orchestrator_enabled", True)
+    if isinstance(val, bool):
+        return val
+    # Accept "true"/"false" strings from YAML that doesn't auto-coerce.
+    if isinstance(val, str):
+        return val.strip().lower() in ("true", "1", "yes", "on")
+    return True
+
+
 DEFAULT_MAX_ITERATIONS = 50
+DEFAULT_CHILD_TIMEOUT = 300  # seconds before a child agent is considered stuck
 _HEARTBEAT_INTERVAL = 30  # seconds between parent activity heartbeats during delegation
+_HEARTBEAT_STALE_CYCLES = 5  # mark child stale after this many heartbeats with no iteration progress
 DEFAULT_TOOLSETS = ["terminal", "file", "web"]
 
 
+# ---------------------------------------------------------------------------
+# Delegation progress event types
+# ---------------------------------------------------------------------------
+
+class DelegateEvent(str, enum.Enum):
+    """Formal event types emitted during delegation progress.
+
+    _build_child_progress_callback normalises incoming legacy strings
+    (``tool.started``, ``_thinking``, …) to these enum values via
+    ``_LEGACY_EVENT_MAP``.  External consumers (gateway SSE, ACP adapter,
+    CLI) still receive the legacy strings during the deprecation window.
+
+    TASK_SPAWNED / TASK_COMPLETED / TASK_FAILED are reserved for
+    future orchestrator lifecycle events and are not currently emitted.
+    """
+    TASK_SPAWNED = "delegate.task_spawned"
+    TASK_PROGRESS = "delegate.task_progress"
+    TASK_COMPLETED = "delegate.task_completed"
+    TASK_FAILED = "delegate.task_failed"
+    TASK_THINKING = "delegate.task_thinking"
+    TASK_TOOL_STARTED = "delegate.tool_started"
+    TASK_TOOL_COMPLETED = "delegate.tool_completed"
+
+
+# Legacy event strings → DelegateEvent mapping.
+# Incoming child-agent events use the old names; the callback normalises them.
+_LEGACY_EVENT_MAP: Dict[str, DelegateEvent] = {
+    "_thinking": DelegateEvent.TASK_THINKING,
+    "reasoning.available": DelegateEvent.TASK_THINKING,
+    "tool.started": DelegateEvent.TASK_TOOL_STARTED,
+    "tool.completed": DelegateEvent.TASK_TOOL_COMPLETED,
+    "subagent_progress": DelegateEvent.TASK_PROGRESS,
+}
+
+
 def check_delegate_requirements() -> bool:
     """Delegation has no external requirements -- always available."""
     return True
@@ -93,8 +241,18 @@ def _build_child_system_prompt(
     context: Optional[str] = None,
     *,
     workspace_path: Optional[str] = None,
+    role: str = "leaf",
+    max_spawn_depth: int = 2,
+    child_depth: int = 1,
 ) -> str:
-    """Build a focused system prompt for a child agent."""
+    """Build a focused system prompt for a child agent.
+
+    When role='orchestrator', appends a delegation-capability block
+    modeled on OpenClaw's buildSubagentSystemPrompt (canSpawn branch at
+    inspiration/openclaw/src/agents/subagent-system-prompt.ts:63-95).
+    The depth note is literal truth (grounded in the passed config) so
+    the LLM doesn't confabulate nesting capabilities that don't exist.
+    """
     parts = [
         "You are a focused subagent working on a specific delegated task.",
         "",
@@ -120,6 +278,37 @@ def _build_child_system_prompt(
         "Be thorough but concise -- your response is returned to the "
         "parent agent as a summary."
     )
+    if role == "orchestrator":
+        child_note = (
+            "Your own children MUST be leaves (cannot delegate further) "
+            "because they would be at the depth floor — you cannot pass "
+            "role='orchestrator' to your own delegate_task calls."
+            if child_depth + 1 >= max_spawn_depth else
+            "Your own children can themselves be orchestrators or leaves, "
+            "depending on the `role` you pass to delegate_task. Default is "
+            "'leaf'; pass role='orchestrator' explicitly when a child "
+            "needs to further decompose its work."
+        )
+        parts.append(
+            "\n## Subagent Spawning (Orchestrator Role)\n"
+            "You have access to the `delegate_task` tool and CAN spawn "
+            "your own subagents to parallelize independent work.\n\n"
+            "WHEN to delegate:\n"
+            "- The goal decomposes into 2+ independent subtasks that can "
+            "run in parallel (e.g. research A and B simultaneously).\n"
+            "- A subtask is reasoning-heavy and would flood your context "
+            "with intermediate data.\n\n"
+            "WHEN NOT to delegate:\n"
+            "- Single-step mechanical work — do it directly.\n"
+            "- Trivial tasks you can execute in one or two tool calls.\n"
+            "- Re-delegating your entire assigned goal to one worker "
+            "(that's just pass-through with no value added).\n\n"
+            "Coordinate your workers' results and synthesize them before "
+            "reporting back to your parent. You are responsible for the "
+            "final summary, not your workers.\n\n"
+            f"NOTE: You are at depth {child_depth}. The delegation tree "
+            f"is capped at max_spawn_depth={max_spawn_depth}. {child_note}"
+        )
     return "\n".join(parts)
 
 
@@ -197,10 +386,9 @@ def _build_child_progress_callback(task_index: int, goal: str, parent_agent, tas
         except Exception as e:
             logger.debug("Parent callback failed: %s", e)
 
-    def _callback(event_type: str, tool_name: str = None, preview: str = None, args=None, **kwargs):
-        # event_type is one of: "tool.started", "tool.completed",
-        # "reasoning.available", "_thinking", "subagent.*"
-
+    def _callback(event_type, tool_name: str = None, preview: str = None, args=None, **kwargs):
+        # Lifecycle events emitted by the orchestrator itself — handled
+        # before enum normalisation since they are not part of DelegateEvent.
         if event_type == "subagent.start":
             if spinner and goal_label:
                 short = (goal_label[:55] + "...") if len(goal_label) > 55 else goal_label
@@ -215,8 +403,21 @@ def _build_child_progress_callback(task_index: int, goal: str, parent_agent, tas
             _relay("subagent.complete", preview=preview, **kwargs)
             return
 
-        # "_thinking" / reasoning events
-        if event_type in ("_thinking", "reasoning.available"):
+        # Normalise legacy strings, new-style "delegate.*" strings, and
+        # DelegateEvent enum values all to a single DelegateEvent.  The
+        # original implementation only accepted the five legacy strings;
+        # enum-typed callers were silently dropped.
+        if isinstance(event_type, DelegateEvent):
+            event = event_type
+        else:
+            event = _LEGACY_EVENT_MAP.get(event_type)
+            if event is None:
+                try:
+                    event = DelegateEvent(event_type)
+                except (ValueError, TypeError):
+                    return  # Unknown event — ignore
+
+        if event == DelegateEvent.TASK_THINKING:
             text = preview or tool_name or ""
             if spinner:
                 short = (text[:55] + "...") if len(text) > 55 else text
@@ -227,11 +428,31 @@ def _build_child_progress_callback(task_index: int, goal: str, parent_agent, tas
             _relay("subagent.thinking", preview=text)
             return
 
-        # tool.completed — no display needed here (spinner shows on started)
-        if event_type == "tool.completed":
+        if event == DelegateEvent.TASK_TOOL_COMPLETED:
             return
 
-        # tool.started — display and batch for parent relay
+        if event == DelegateEvent.TASK_PROGRESS:
+            # Pre-batched progress summary relayed from a nested
+            # orchestrator's grandchild (upstream emits as
+            # parent_cb("subagent_progress", summary_string) where the
+            # summary lands in the tool_name positional slot).  Treat as
+            # a pass-through: render distinctly (not via the tool-start
+            # emoji lookup, which would mistake the summary string for a
+            # tool name) and relay upward without re-batching.
+            summary_text = tool_name or preview or ""
+            if spinner and summary_text:
+                try:
+                    spinner.print_above(f" {prefix}├─ 🔀 {summary_text}")
+                except Exception as e:
+                    logger.debug("Spinner print_above failed: %s", e)
+            if parent_cb:
+                try:
+                    parent_cb("subagent_progress", f"{prefix}{summary_text}")
+                except Exception as e:
+                    logger.debug("Parent callback relay failed: %s", e)
+            return
+
+        # TASK_TOOL_STARTED — display and batch for parent relay
         if spinner:
             short = (preview[:35] + "...") if preview and len(preview) > 35 else (preview or "")
             from agent.display import get_tool_emoji
@@ -280,6 +501,10 @@ def _build_child_agent(
     # ACP transport overrides — lets a non-ACP parent spawn ACP child agents
     override_acp_command: Optional[str] = None,
     override_acp_args: Optional[List[str]] = None,
+    # Per-call role controlling whether the child can further delegate.
+    # 'leaf' (default) cannot; 'orchestrator' retains the delegation
+    # toolset subject to depth/kill-switch bounds applied below.
+    role: str = "leaf",
 ):
     """
     Build a child AIAgent on the main thread (thread-safe construction).
@@ -292,6 +517,17 @@ def _build_child_agent(
     """
     from run_agent import AIAgent
 
+    # ── Role resolution ─────────────────────────────────────────────────
+    # Honor the caller's role only when BOTH the kill switch and the
+    # child's depth allow it.  This is the single point where role
+    # degrades to 'leaf' — keeps the rule predictable.  Callers pass
+    # the normalised role (_normalize_role ran in delegate_task) so
+    # we only deal with 'leaf' or 'orchestrator' here.
+    child_depth = getattr(parent_agent, '_delegate_depth', 0) + 1
+    max_spawn = _get_max_spawn_depth()
+    orchestrator_ok = _get_orchestrator_enabled() and child_depth < max_spawn
+    effective_role = role if (role == "orchestrator" and orchestrator_ok) else "leaf"
+
     # When no explicit toolsets given, inherit from parent's enabled toolsets
     # so disabled tools (e.g. web) don't leak to subagents.
     # Note: enabled_toolsets=None means "all tools enabled" (the default),
@@ -319,8 +555,21 @@ def _build_child_agent(
     else:
         child_toolsets = _strip_blocked_tools(DEFAULT_TOOLSETS)
 
+    # Orchestrators retain the 'delegation' toolset that _strip_blocked_tools
+    # removed.  The re-add is unconditional on parent-toolset membership because
+    # orchestrator capability is granted by role, not inherited — see the
+    # test_intersection_preserves_delegation_bound test for the design rationale.
+    if effective_role == "orchestrator" and "delegation" not in child_toolsets:
+        child_toolsets.append("delegation")
+
     workspace_hint = _resolve_workspace_hint(parent_agent)
-    child_prompt = _build_child_system_prompt(goal, context, workspace_path=workspace_hint)
+    child_prompt = _build_child_system_prompt(
+        goal, context,
+        workspace_path=workspace_hint,
+        role=effective_role,
+        max_spawn_depth=max_spawn,
+        child_depth=child_depth,
+    )
     # Extract parent's API key so subagents inherit auth (e.g. Nous Portal).
     parent_api_key = getattr(parent_agent, "api_key", None)
     if (not parent_api_key) and hasattr(parent_agent, "_client_kwargs"):
@@ -406,7 +655,10 @@ def _build_child_agent(
     )
     child._print_fn = getattr(parent_agent, '_print_fn', None)
     # Set delegation depth so children can't spawn grandchildren
-    child._delegate_depth = getattr(parent_agent, '_delegate_depth', 0) + 1
+    child._delegate_depth = child_depth
+    # Stash the post-degrade role for introspection (leaf if the
+    # kill switch or depth bounded the caller's requested role).
+    child._delegate_role = effective_role
 
     # Share a credential pool with the child when possible so subagents can
     # rotate credentials on rate limits instead of getting pinned to one key.
@@ -464,6 +716,8 @@ def _run_single_child(
     # Without this, the parent's _last_activity_ts freezes when delegate_task
     # starts and the gateway eventually kills the agent for "no activity".
     _heartbeat_stop = threading.Event()
+    _last_seen_iter = [0]  # mutable container for heartbeat stale detection
+    _stale_count = [0]
 
     def _heartbeat_loop():
         while not _heartbeat_stop.wait(_HEARTBEAT_INTERVAL):
@@ -479,6 +733,25 @@ def _run_single_child(
                 child_tool = child_summary.get("current_tool")
                 child_iter = child_summary.get("api_call_count", 0)
                 child_max = child_summary.get("max_iterations", 0)
+
+                # Stale detection: if iteration count hasn't advanced,
+                # increment stale counter.  After N cycles with no
+                # progress, stop masking the hang so the gateway
+                # inactivity timeout can fire as a last resort.
+                if child_iter <= _last_seen_iter[0]:
+                    _stale_count[0] += 1
+                else:
+                    _last_seen_iter[0] = child_iter
+                    _stale_count[0] = 0
+
+                if _stale_count[0] >= _HEARTBEAT_STALE_CYCLES:
+                    logger.warning(
+                        "Subagent %d appears stale (no iteration progress "
+                        "for %d heartbeat cycles) — stopping heartbeat",
+                        task_index, _stale_count[0],
+                    )
+                    break  # stop touching parent, let gateway timeout fire
+
                 if child_tool:
                     desc = (f"delegate_task: subagent running {child_tool} "
                             f"(iteration {child_iter}/{child_max})")
@@ -504,7 +777,78 @@ def _run_single_child(
             except Exception as e:
                 logger.debug("Progress callback start failed: %s", e)
 
-        result = child.run_conversation(user_message=goal)
+        # File-state coordination: generate a stable child task_id so the
+        # file_state registry can attribute writes back to this subagent,
+        # and snapshot the parent's read set at launch time.  After the
+        # child returns we compare to detect "sibling modified files the
+        # parent previously read" and surface it as a reminder on the
+        # returned summary.
+        import uuid as _uuid
+        child_task_id = f"subagent-{task_index}-{_uuid.uuid4().hex[:8]}"
+        parent_task_id = getattr(parent_agent, "_current_task_id", None)
+        wall_start = time.time()
+        parent_reads_snapshot = (
+            list(file_state.known_reads(parent_task_id))
+            if parent_task_id else []
+        )
+
+        # Run child with a hard timeout to prevent indefinite blocking
+        # when the child's API call or tool-level HTTP request hangs.
+        child_timeout = _get_child_timeout()
+        _timeout_executor = ThreadPoolExecutor(max_workers=1)
+        _child_future = _timeout_executor.submit(
+            child.run_conversation, user_message=goal, task_id=child_task_id,
+        )
+        try:
+            result = _child_future.result(timeout=child_timeout)
+        except Exception as _timeout_exc:
+            # Signal the child to stop so its thread can exit cleanly.
+            try:
+                if hasattr(child, 'interrupt'):
+                    child.interrupt()
+                elif hasattr(child, '_interrupt_requested'):
+                    child._interrupt_requested = True
+            except Exception:
+                pass
+
+            is_timeout = isinstance(_timeout_exc, (FuturesTimeoutError, TimeoutError))
+            duration = round(time.monotonic() - child_start, 2)
+            logger.warning(
+                "Subagent %d %s after %.1fs",
+                task_index,
+                "timed out" if is_timeout else f"raised {type(_timeout_exc).__name__}",
+                duration,
+            )
+
+            if child_progress_cb:
+                try:
+                    child_progress_cb(
+                        "subagent.complete",
+                        preview=f"Timed out after {duration}s" if is_timeout else str(_timeout_exc),
+                        status="timeout" if is_timeout else "error",
+                        duration_seconds=duration,
+                        summary="",
+                    )
+                except Exception:
+                    pass
+
+            return {
+                "task_index": task_index,
+                "status": "timeout" if is_timeout else "error",
+                "summary": None,
+                "error": (
+                    f"Subagent timed out after {child_timeout}s with no response. "
+                    "The child may be stuck on a slow API call or unresponsive network request."
+                ) if is_timeout else str(_timeout_exc),
+                "exit_reason": "timeout" if is_timeout else "error",
+                "api_calls": 0,
+                "duration_seconds": duration,
+                "_child_role": getattr(child, "_delegate_role", None),
+            }
+        finally:
+            # Shut down executor without waiting — if the child thread
+            # is stuck on blocking I/O, wait=True would hang forever.
+            _timeout_executor.shutdown(wait=False)
 
         # Flush any remaining batched progress to gateway
         if child_progress_cb and hasattr(child_progress_cb, '_flush'):
@@ -602,6 +946,36 @@ def _run_single_child(
         if status == "failed":
             entry["error"] = result.get("error", "Subagent did not produce a response.")
 
+        # Cross-agent file-state reminder.  If this subagent wrote any
+        # files the parent had already read, surface it so the parent
+        # knows to re-read before editing — the scenario that motivated
+        # the registry.  We check writes by ANY non-parent task_id (not
+        # just this child's), which also covers transitive writes from
+        # nested orchestrator→worker chains.
+        try:
+            if parent_task_id and parent_reads_snapshot:
+                sibling_writes = file_state.writes_since(
+                    parent_task_id, wall_start, parent_reads_snapshot
+                )
+                if sibling_writes:
+                    mod_paths = sorted(
+                        {p for paths in sibling_writes.values() for p in paths}
+                    )
+                    if mod_paths:
+                        reminder = (
+                            "\n\n[NOTE: subagent modified files the parent "
+                            "previously read — re-read before editing: "
+                            + ", ".join(mod_paths[:8])
+                            + (f" (+{len(mod_paths) - 8} more)" if len(mod_paths) > 8 else "")
+                            + "]"
+                        )
+                        if entry.get("summary"):
+                            entry["summary"] = entry["summary"] + reminder
+                        else:
+                            entry["stale_paths"] = mod_paths
+        except Exception:
+            logger.debug("file_state sibling-write check failed", exc_info=True)
+
         if child_progress_cb:
             try:
                 child_progress_cb(
@@ -691,27 +1065,40 @@ def delegate_task(
     max_iterations: Optional[int] = None,
     acp_command: Optional[str] = None,
     acp_args: Optional[List[str]] = None,
+    role: Optional[str] = None,
     parent_agent=None,
 ) -> str:
     """
     Spawn one or more child agents to handle delegated tasks.
 
     Supports two modes:
-      - Single: provide goal (+ optional context, toolsets)
-      - Batch:  provide tasks array [{goal, context, toolsets}, ...]
+      - Single: provide goal (+ optional context, toolsets, role)
+      - Batch:  provide tasks array [{goal, context, toolsets, role}, ...]
+
+    The 'role' parameter controls whether a child can further delegate:
+    'leaf' (default) cannot; 'orchestrator' retains the delegation
+    toolset and can spawn its own workers, bounded by
+    delegation.max_spawn_depth.  Per-task role beats the top-level one.
 
     Returns JSON with results array, one entry per task.
     """
     if parent_agent is None:
         return tool_error("delegate_task requires a parent agent context.")
 
-    # Depth limit
+    # Normalise the top-level role once; per-task overrides re-normalise.
+    top_role = _normalize_role(role)
+
+    # Depth limit — configurable via delegation.max_spawn_depth,
+    # default 2 for parity with the original MAX_DEPTH constant.
     depth = getattr(parent_agent, '_delegate_depth', 0)
-    if depth >= MAX_DEPTH:
+    max_spawn = _get_max_spawn_depth()
+    if depth >= max_spawn:
         return json.dumps({
             "error": (
-                f"Delegation depth limit reached ({MAX_DEPTH}). "
-                "Subagents cannot spawn further subagents."
+                f"Delegation depth limit reached (depth={depth}, "
+                f"max_spawn_depth={max_spawn}). Raise "
+                f"delegation.max_spawn_depth in config.yaml if deeper "
+                f"nesting is required (cap: {_MAX_SPAWN_DEPTH_CAP})."
             )
         })
 
@@ -743,7 +1130,8 @@ def delegate_task(
             )
         task_list = tasks
     elif goal and isinstance(goal, str) and goal.strip():
-        task_list = [{"goal": goal, "context": context, "toolsets": toolsets}]
+        task_list = [{"goal": goal, "context": context,
+                      "toolsets": toolsets, "role": top_role}]
     else:
         return tool_error("Provide either 'goal' (single task) or 'tasks' (batch).")
 
@@ -775,6 +1163,9 @@ def delegate_task(
     try:
         for i, t in enumerate(task_list):
             task_acp_args = t.get("acp_args") if "acp_args" in t else None
+            # Per-task role beats top-level; normalise again so unknown
+            # per-task values warn and degrade to leaf uniformly.
+            effective_role = _normalize_role(t.get("role") or top_role)
             child = _build_child_agent(
                 task_index=i, goal=t["goal"], context=t.get("context"),
                 toolsets=t.get("toolsets") or toolsets, model=creds["model"],
@@ -786,6 +1177,7 @@ def delegate_task(
                 override_acp_args=task_acp_args if task_acp_args is not None else (
                     acp_args if acp_args is not None else creds.get("args")
                 ),
+                role=effective_role,
             )
             # Override with correct parent tool names (before child construction mutated global)
             child._delegate_saved_tool_names = _parent_tool_names
@@ -1034,6 +1426,9 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
         elif base_url_hostname(configured_base_url) == "api.anthropic.com":
             provider = "anthropic"
             api_mode = "anthropic_messages"
+        elif "api.kimi.com/coding" in base_lower:
+            provider = "custom"
+            api_mode = "anthropic_messages"
 
         return {
             "model": configured_model,
@@ -1119,7 +1514,7 @@ DELEGATE_TASK_SCHEMA = {
         "never enter your context window.\n\n"
         "TWO MODES (one of 'goal' or 'tasks' is required):\n"
         "1. Single task: provide 'goal' (+ optional context, toolsets)\n"
-        "2. Batch (parallel): provide 'tasks' array with up to 3 items. "
+        "2. Batch (parallel): provide 'tasks' array with up to delegation.max_concurrent_children items (default 3). "
         "All run concurrently and results are returned together.\n\n"
         "WHEN TO USE delegate_task:\n"
         "- Reasoning-heavy subtasks (debugging, code review, research synthesis)\n"
@@ -1132,8 +1527,14 @@ DELEGATE_TASK_SCHEMA = {
         "IMPORTANT:\n"
         "- Subagents have NO memory of your conversation. Pass all relevant "
         "info (file paths, error messages, constraints) via the 'context' field.\n"
-        "- Subagents CANNOT call: delegate_task, clarify, memory, send_message, "
-        "execute_code.\n"
+        "- Leaf subagents (role='leaf', the default) CANNOT call: "
+        "delegate_task, clarify, memory, send_message, execute_code.\n"
+        "- Orchestrator subagents (role='orchestrator') retain "
+        "delegate_task so they can spawn their own workers, but still "
+        "cannot use clarify, memory, send_message, or execute_code. "
+        "Orchestrators are bounded by delegation.max_spawn_depth "
+        "(default 2) and can be disabled globally via "
+        "delegation.orchestrator_enabled=false.\n"
         "- Each subagent gets its own terminal session (separate working directory and state).\n"
         "- Results are always returned as an array, one entry per task."
     ),
@@ -1189,6 +1590,11 @@ DELEGATE_TASK_SCHEMA = {
                             "items": {"type": "string"},
                             "description": "Per-task ACP args override.",
                         },
+                        "role": {
+                            "type": "string",
+                            "enum": ["leaf", "orchestrator"],
+                            "description": "Per-task role override. See top-level 'role' for semantics.",
+                        },
                     },
                     "required": ["goal"],
                 },
@@ -1208,6 +1614,19 @@ DELEGATE_TASK_SCHEMA = {
                     "Only set lower for simple tasks."
                 ),
             },
+            "role": {
+                "type": "string",
+                "enum": ["leaf", "orchestrator"],
+                "description": (
+                    "Role of the child agent. 'leaf' (default) = focused "
+                    "worker, cannot delegate further. 'orchestrator' = can "
+                    "use delegate_task to spawn its own workers. Requires "
+                    "delegation.max_spawn_depth >= 2 in config; ignored "
+                    "(treated as 'leaf') when the child would exceed "
+                    "max_spawn_depth or when "
+                    "delegation.orchestrator_enabled=false."
+                ),
+            },
             "acp_command": {
                 "type": "string",
                 "description": (
@@ -1246,6 +1665,7 @@ registry.register(
         max_iterations=args.get("max_iterations"),
         acp_command=args.get("acp_command"),
         acp_args=args.get("acp_args"),
+        role=args.get("role"),
         parent_agent=kw.get("parent_agent")),
     check_fn=check_delegate_requirements,
     emoji="🔀",
diff --git a/tools/file_state.py b/tools/file_state.py
new file mode 100644
index 0000000000..f22a966e1d
--- /dev/null
+++ b/tools/file_state.py
@@ -0,0 +1,332 @@
+"""Cross-agent file state coordination.
+
+Prevents mangled edits when concurrent subagents (same process, same
+filesystem) touch the same file. Complements the single-agent path-overlap
+check in ``run_agent._should_parallelize_tool_batch`` — this module catches
+the case where subagent B writes a file that subagent A already read, so
+A's next write would overwrite B's changes with stale content.
+
+Design
+------
+A process-wide singleton ``FileStateRegistry`` tracks, per resolved path:
+
+  * per-agent read stamps: {task_id: {path: (mtime, read_ts, partial)}}
+  * last writer globally: {path: (task_id, write_ts)}
+  * per-path ``threading.Lock`` for read→modify→write critical sections
+
+Three public hooks are used by the file tools:
+
+  * ``record_read(task_id, path, *, partial)`` — called by read_file
+  * ``note_write(task_id, path)`` — called after write_file / patch
+  * ``check_stale(task_id, path)`` — called BEFORE write_file / patch
+
+Plus ``lock_path(path)`` — a context-manager returning a per-path lock to
+wrap the whole read→modify→write block. And ``writes_since(task_id,
+since_ts, paths)`` for the subagent-completion reminder in delegate_tool.
+
+All methods are no-ops when ``HERMES_DISABLE_FILE_STATE_GUARD=1`` is set.
+
+This module is intentionally separate from ``_read_tracker`` in
+``file_tools.py`` — that tracker is per-task and handles consecutive-read
+loop detection, which is a different concern.
+"""
+from __future__ import annotations
+
+import os
+import threading
+import time
+from collections import defaultdict
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Tuple
+
+
+# ── Public stamp type ────────────────────────────────────────────────
+# (mtime, read_ts, partial).  partial=True when read_file returned a
+# windowed view (offset > 1 or limit < total_lines) — writes that happen
+# after a partial read should still warn so the model re-reads in full.
+ReadStamp = Tuple[float, float, bool]
+
+# Number of resolved-path entries retained per agent.  Bounded to keep
+# long sessions from accumulating unbounded state.  On overflow we drop
+# the oldest entries by insertion order.
+_MAX_PATHS_PER_AGENT = 4096
+
+# Global last-writer map cap.  Same policy.
+_MAX_GLOBAL_WRITERS = 4096
+
+
+class FileStateRegistry:
+    """Process-wide coordinator for cross-agent file edits."""
+
+    def __init__(self) -> None:
+        self._reads: Dict[str, Dict[str, ReadStamp]] = defaultdict(dict)
+        self._last_writer: Dict[str, Tuple[str, float]] = {}
+        self._path_locks: Dict[str, threading.Lock] = {}
+        self._meta_lock = threading.Lock()  # guards _path_locks
+        self._state_lock = threading.Lock()  # guards _reads + _last_writer
+
+    # ── Path lock management ────────────────────────────────────────
+    def _lock_for(self, resolved: str) -> threading.Lock:
+        with self._meta_lock:
+            lock = self._path_locks.get(resolved)
+            if lock is None:
+                lock = threading.Lock()
+                self._path_locks[resolved] = lock
+            return lock
+
+    @contextmanager
+    def lock_path(self, resolved: str):
+        """Acquire the per-path lock for a read→modify→write section.
+
+        Same process, same filesystem — threads on the same path serialize.
+        Different paths proceed in parallel.
+        """
+        lock = self._lock_for(resolved)
+        lock.acquire()
+        try:
+            yield
+        finally:
+            lock.release()
+
+    # ── Read/write accounting ───────────────────────────────────────
+    def record_read(
+        self,
+        task_id: str,
+        resolved: str,
+        *,
+        partial: bool = False,
+        mtime: Optional[float] = None,
+    ) -> None:
+        if _disabled():
+            return
+        if mtime is None:
+            try:
+                mtime = os.path.getmtime(resolved)
+            except OSError:
+                return
+        now = time.time()
+        with self._state_lock:
+            agent_reads = self._reads[task_id]
+            agent_reads[resolved] = (float(mtime), now, bool(partial))
+            _cap_dict(agent_reads, _MAX_PATHS_PER_AGENT)
+
+    def note_write(
+        self,
+        task_id: str,
+        resolved: str,
+        *,
+        mtime: Optional[float] = None,
+    ) -> None:
+        """Record a successful write.
+
+        Updates the global last-writer map AND this agent's own read stamp
+        (a write is an implicit read — the agent now knows the current
+        content).
+        """
+        if _disabled():
+            return
+        if mtime is None:
+            try:
+                mtime = os.path.getmtime(resolved)
+            except OSError:
+                return
+        now = time.time()
+        with self._state_lock:
+            self._last_writer[resolved] = (task_id, now)
+            _cap_dict(self._last_writer, _MAX_GLOBAL_WRITERS)
+            # Writer's own view is now up-to-date.
+            self._reads[task_id][resolved] = (float(mtime), now, False)
+            _cap_dict(self._reads[task_id], _MAX_PATHS_PER_AGENT)
+
+    def check_stale(self, task_id: str, resolved: str) -> Optional[str]:
+        """Return a model-facing warning if this write would be stale.
+
+        Three staleness classes, in order of severity:
+
+          1. Sibling subagent wrote this file after this agent's last read.
+          2. External/unknown change (mtime differs from our last read).
+          3. Agent never read the file (write-without-read).
+
+        Returns ``None`` when the write is safe.  Does not raise — callers
+        decide whether to block or warn.
+        """
+        if _disabled():
+            return None
+        with self._state_lock:
+            stamp = self._reads.get(task_id, {}).get(resolved)
+            last_writer = self._last_writer.get(resolved)
+
+        # Case 3: never read AND we have no write record — net-new file or
+        # first touch by this agent.  Let existing _check_sensitive_path
+        # and file-exists logic handle it; nothing to warn about here.
+        if stamp is None and last_writer is None:
+            return None
+
+        try:
+            current_mtime = os.path.getmtime(resolved)
+        except OSError:
+            # File doesn't exist — write will create it; not stale.
+            return None
+
+        # Case 1: sibling subagent modified after our last read.
+        if last_writer is not None:
+            writer_tid, writer_ts = last_writer
+            if writer_tid != task_id:
+                if stamp is None:
+                    return (
+                        f"{resolved} was modified by sibling subagent "
+                        f"{writer_tid!r} but this agent never read it. "
+                        "Read the file before writing to avoid overwriting "
+                        "the sibling's changes."
+                    )
+                read_ts = stamp[1]
+                if writer_ts > read_ts:
+                    return (
+                        f"{resolved} was modified by sibling subagent "
+                        f"{writer_tid!r} at {_fmt_ts(writer_ts)} — after "
+                        f"this agent's last read at {_fmt_ts(read_ts)}. "
+                        "Re-read the file before writing."
+                    )
+
+        # Case 2: external / unknown modification (mtime drifted).
+        if stamp is not None:
+            read_mtime, _read_ts, partial = stamp
+            if current_mtime != read_mtime:
+                return (
+                    f"{resolved} was modified since you last read it "
+                    "on disk (external edit or unrecorded writer). "
+                    "Re-read the file before writing."
+                )
+            if partial:
+                return (
+                    f"{resolved} was last read with offset/limit pagination "
+                    "(partial view). Re-read the whole file before "
+                    "overwriting it."
+                )
+
+        # Case 3b: agent truly never read the file.
+        if stamp is None:
+            return (
+                f"{resolved} was not read by this agent. "
+                "Read the file first so you can write an informed edit."
+            )
+
+        return None
+
+    # ── Reminder helper for delegate_tool ───────────────────────────
+    def writes_since(
+        self,
+        exclude_task_id: str,
+        since_ts: float,
+        paths: Iterable[str],
+    ) -> Dict[str, List[str]]:
+        """Return ``{writer_task_id: [paths]}`` for writes done after
+        ``since_ts`` by agents OTHER than ``exclude_task_id``.
+
+        Used by delegate_task to append a "subagent modified files the
+        parent previously read" reminder to the delegation result.
+        """
+        if _disabled():
+            return {}
+        paths_set = set(paths)
+        out: Dict[str, List[str]] = defaultdict(list)
+        with self._state_lock:
+            for p, (writer_tid, ts) in self._last_writer.items():
+                if writer_tid == exclude_task_id:
+                    continue
+                if ts < since_ts:
+                    continue
+                if p in paths_set:
+                    out[writer_tid].append(p)
+        return dict(out)
+
+    def known_reads(self, task_id: str) -> List[str]:
+        """Return the list of resolved paths this agent has read."""
+        if _disabled():
+            return []
+        with self._state_lock:
+            return list(self._reads.get(task_id, {}).keys())
+
+    # ── Testing hooks ───────────────────────────────────────────────
+    def clear(self) -> None:
+        """Reset all state.  Intended for tests only."""
+        with self._state_lock:
+            self._reads.clear()
+            self._last_writer.clear()
+        with self._meta_lock:
+            self._path_locks.clear()
+
+
+# ── Module-level singleton + helpers ─────────────────────────────────
+_registry = FileStateRegistry()
+
+
+def get_registry() -> FileStateRegistry:
+    return _registry
+
+
+def _disabled() -> bool:
+    # Re-read each call so tests can toggle via monkeypatch.setenv.
+    return os.environ.get("HERMES_DISABLE_FILE_STATE_GUARD", "").strip() == "1"
+
+
+def _fmt_ts(ts: float) -> str:
+    # Short relative wall-clock for error messages; avoids pulling in
+    # datetime formatting overhead on the hot path.
+    return time.strftime("%H:%M:%S", time.localtime(ts))
+
+
+def _cap_dict(d: dict, limit: int) -> None:
+    """Trim a dict to ``limit`` entries by dropping insertion-order oldest."""
+    over = len(d) - limit
+    if over <= 0:
+        return
+    # dict preserves insertion order (PY>=3.7) — pop the oldest keys.
+    it = iter(d)
+    for _ in range(over):
+        try:
+            d.pop(next(it))
+        except (StopIteration, KeyError):
+            break
+
+
+# ── Convenience wrappers (short names used at call sites) ────────────
+def record_read(task_id: str, resolved_or_path: str | Path, *, partial: bool = False) -> None:
+    _registry.record_read(task_id, str(resolved_or_path), partial=partial)
+
+
+def note_write(task_id: str, resolved_or_path: str | Path) -> None:
+    _registry.note_write(task_id, str(resolved_or_path))
+
+
+def check_stale(task_id: str, resolved_or_path: str | Path) -> Optional[str]:
+    return _registry.check_stale(task_id, str(resolved_or_path))
+
+
+def lock_path(resolved_or_path: str | Path):
+    return _registry.lock_path(str(resolved_or_path))
+
+
+def writes_since(
+    exclude_task_id: str,
+    since_ts: float,
+    paths: Iterable[str | Path],
+) -> Dict[str, List[str]]:
+    return _registry.writes_since(exclude_task_id, since_ts, [str(p) for p in paths])
+
+
+def known_reads(task_id: str) -> List[str]:
+    return _registry.known_reads(task_id)
+
+
+__all__ = [
+    "FileStateRegistry",
+    "get_registry",
+    "record_read",
+    "note_write",
+    "check_stale",
+    "lock_path",
+    "writes_since",
+    "known_reads",
+]
diff --git a/tools/file_tools.py b/tools/file_tools.py
index 5b44ff03d3..a2e72e7ecd 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -12,6 +12,7 @@ from typing import Optional
 from agent.file_safety import get_read_block_error
 from tools.binary_extensions import has_binary_extension
 from tools.file_operations import ShellFileOperations
+from tools import file_state
 from agent.redact import redact_sensitive_text
 
 logger = logging.getLogger(__name__)
@@ -483,6 +484,19 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
             # accumulate megabytes of dict/set state.  See _cap_read_tracker_data.
             _cap_read_tracker_data(task_data)
 
+        # Cross-agent file-state registry (separate from per-task read
+        # tracker above): records that THIS agent has read this path so
+        # write/patch can detect sibling-subagent writes that happened
+        # after our read.  Partial read when offset>1 or the read was
+        # truncated (large file with more content than limit covered).
+        # Outside the _read_tracker_lock so the registry's own locking
+        # isn't nested under ours.
+        try:
+            _partial = (offset > 1) or bool(result_dict.get("truncated"))
+            file_state.record_read(task_id, resolved_str, partial=_partial)
+        except Exception:
+            logger.debug("file_state.record_read failed", exc_info=True)
+
         if count >= 4:
             # Hard block: stop returning content to break the loop
             return json.dumps({
@@ -602,15 +616,43 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
     if sensitive_err:
         return tool_error(sensitive_err)
     try:
-        stale_warning = _check_file_staleness(path, task_id)
-        file_ops = _get_file_ops(task_id)
-        result = file_ops.write_file(path, content)
-        result_dict = result.to_dict()
-        if stale_warning:
-            result_dict["_warning"] = stale_warning
-        # Refresh the stored timestamp so consecutive writes by this
-        # task don't trigger false staleness warnings.
-        _update_read_timestamp(path, task_id)
+        # Resolve once for the registry lock + stale check.  Failures here
+        # fall back to the legacy path — write proceeds, per-task staleness
+        # check below still runs.
+        try:
+            _resolved = str(_resolve_path(path))
+        except Exception:
+            _resolved = None
+
+        if _resolved is None:
+            stale_warning = _check_file_staleness(path, task_id)
+            file_ops = _get_file_ops(task_id)
+            result = file_ops.write_file(path, content)
+            result_dict = result.to_dict()
+            if stale_warning:
+                result_dict["_warning"] = stale_warning
+            _update_read_timestamp(path, task_id)
+            return json.dumps(result_dict, ensure_ascii=False)
+
+        # Serialize the read→modify→write region per-path so concurrent
+        # subagents can't interleave on the same file.  Different paths
+        # remain fully parallel.
+        with file_state.lock_path(_resolved):
+            # Cross-agent staleness wins over per-task warning when both
+            # fire — its message names the sibling subagent.
+            cross_warning = file_state.check_stale(task_id, _resolved)
+            stale_warning = _check_file_staleness(path, task_id)
+            file_ops = _get_file_ops(task_id)
+            result = file_ops.write_file(path, content)
+            result_dict = result.to_dict()
+            effective_warning = cross_warning or stale_warning
+            if effective_warning:
+                result_dict["_warning"] = effective_warning
+            # Refresh stamps after the successful write so consecutive
+            # writes by this task don't trigger false staleness warnings.
+            _update_read_timestamp(path, task_id)
+            if not result_dict.get("error"):
+                file_state.note_write(task_id, _resolved)
         return json.dumps(result_dict, ensure_ascii=False)
     except Exception as e:
         if _is_expected_write_exception(e):
@@ -637,36 +679,70 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
         if sensitive_err:
             return tool_error(sensitive_err)
     try:
-        # Check staleness for all files this patch will touch.
-        stale_warnings = []
+        # Resolve paths for locking.  Ordered + deduplicated so concurrent
+        # callers lock in the same order — prevents deadlock on overlapping
+        # multi-file V4A patches.
+        _resolved_paths: list[str] = []
+        _seen: set[str] = set()
         for _p in _paths_to_check:
-            _sw = _check_file_staleness(_p, task_id)
-            if _sw:
-                stale_warnings.append(_sw)
+            try:
+                _r = str(_resolve_path(_p))
+            except Exception:
+                _r = None
+            if _r and _r not in _seen:
+                _resolved_paths.append(_r)
+                _seen.add(_r)
+        _resolved_paths.sort()
 
-        file_ops = _get_file_ops(task_id)
-        
-        if mode == "replace":
-            if not path:
-                return tool_error("path required")
-            if old_string is None or new_string is None:
-                return tool_error("old_string and new_string required")
-            result = file_ops.patch_replace(path, old_string, new_string, replace_all)
-        elif mode == "patch":
-            if not patch:
-                return tool_error("patch content required")
-            result = file_ops.patch_v4a(patch)
-        else:
-            return tool_error(f"Unknown mode: {mode}")
-        
-        result_dict = result.to_dict()
-        if stale_warnings:
-            result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
-        # Refresh stored timestamps for all successfully-patched paths so
-        # consecutive edits by this task don't trigger false warnings.
-        if not result_dict.get("error"):
+        # Acquire per-path locks in sorted order via ExitStack.  On single
+        # path this degenerates to one lock; on empty list (unresolvable)
+        # it's a no-op and execution falls through unchanged.
+        from contextlib import ExitStack
+        with ExitStack() as _locks:
+            for _r in _resolved_paths:
+                _locks.enter_context(file_state.lock_path(_r))
+
+            # Collect warnings — cross-agent registry first (names sibling),
+            # then per-task tracker as a fallback.
+            stale_warnings: list[str] = []
+            _path_to_resolved: dict[str, str] = {}
             for _p in _paths_to_check:
-                _update_read_timestamp(_p, task_id)
+                try:
+                    _r = str(_resolve_path(_p))
+                except Exception:
+                    _r = None
+                _path_to_resolved[_p] = _r
+                _cross = file_state.check_stale(task_id, _r) if _r else None
+                _sw = _cross or _check_file_staleness(_p, task_id)
+                if _sw:
+                    stale_warnings.append(_sw)
+
+            file_ops = _get_file_ops(task_id)
+
+            if mode == "replace":
+                if not path:
+                    return tool_error("path required")
+                if old_string is None or new_string is None:
+                    return tool_error("old_string and new_string required")
+                result = file_ops.patch_replace(path, old_string, new_string, replace_all)
+            elif mode == "patch":
+                if not patch:
+                    return tool_error("patch content required")
+                result = file_ops.patch_v4a(patch)
+            else:
+                return tool_error(f"Unknown mode: {mode}")
+
+            result_dict = result.to_dict()
+            if stale_warnings:
+                result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
+            # Refresh stored timestamps for all successfully-patched paths so
+            # consecutive edits by this task don't trigger false warnings.
+            if not result_dict.get("error"):
+                for _p in _paths_to_check:
+                    _update_read_timestamp(_p, task_id)
+                    _r = _path_to_resolved.get(_p)
+                    if _r:
+                        file_state.note_write(task_id, _r)
         result_json = json.dumps(result_dict, ensure_ascii=False)
         # Hint when old_string not found — saves iterations where the agent
         # retries with stale content instead of re-reading the file.
diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py
index 2865a10c0f..baa662aa1e 100644
--- a/tools/image_generation_tool.py
+++ b/tools/image_generation_tool.py
@@ -189,6 +189,38 @@ FAL_MODELS: Dict[str, Dict[str, Any]] = {
         },
         "upscale": False,
     },
+    "fal-ai/gpt-image-2": {
+        "display": "GPT Image 2",
+        "speed": "~20s",
+        "strengths": "SOTA text rendering + CJK, world-aware photorealism",
+        "price": "$0.04–0.06/image",
+        # GPT Image 2 uses FAL's standard preset enum (unlike 1.5's literal
+        # dimensions). We map to the 4:3 variants — the 16:9 presets
+        # (1024x576) fall below GPT-Image-2's 655,360 min-pixel requirement
+        # and would be rejected. 4:3 keeps us above the minimum on all
+        # three aspect ratios.
+        "size_style": "image_size_preset",
+        "sizes": {
+            "landscape": "landscape_4_3",   # 1024x768
+            "square": "square_hd",            # 1024x1024
+            "portrait": "portrait_4_3",       # 768x1024
+        },
+        "defaults": {
+            # Same quality pinning as gpt-image-1.5: medium keeps Nous
+            # Portal billing predictable. "high" is 3-4x the per-image
+            # cost at the same size; "low" is too rough for production use.
+            "quality": "medium",
+            "num_images": 1,
+            "output_format": "png",
+        },
+        "supports": {
+            "prompt", "image_size", "quality", "num_images", "output_format",
+            "sync_mode",
+            # openai_api_key (BYOK) intentionally omitted — all users go
+            # through the shared FAL billing path.
+        },
+        "upscale": False,
+    },
     "fal-ai/ideogram/v3": {
         "display": "Ideogram V3",
         "speed": "~5s",
@@ -749,14 +781,41 @@ def check_fal_api_key() -> bool:
 
 
 def check_image_generation_requirements() -> bool:
-    """True if FAL credentials and fal_client SDK are both available."""
+    """True if any image gen backend is available.
+
+    Providers are considered in this order:
+
+    1. The in-tree FAL backend (FAL_KEY or managed gateway).
+    2. Any plugin-registered provider whose ``is_available()`` returns True.
+
+    Plugins win only when the in-tree FAL path is NOT ready, which matches
+    the historical behavior: shipping hermes with a FAL key configured
+    should still expose the tool. The active selection among ready
+    providers is resolved per-call by ``image_gen.provider``.
+    """
     try:
-        if not check_fal_api_key():
-            return False
-        fal_client  # noqa: F401 — SDK presence check
-        return True
+        if check_fal_api_key():
+            fal_client  # noqa: F401 — SDK presence check
+            return True
     except ImportError:
-        return False
+        pass
+
+    # Probe plugin providers. Discovery is idempotent and cheap.
+    try:
+        from agent.image_gen_registry import list_providers
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        for provider in list_providers():
+            try:
+                if provider.is_available():
+                    return True
+            except Exception:
+                continue
+    except Exception:
+        pass
+
+    return False
 
 
 # ---------------------------------------------------------------------------
@@ -802,10 +861,11 @@ from tools.registry import registry, tool_error
 IMAGE_GENERATE_SCHEMA = {
     "name": "image_generate",
     "description": (
-        "Generate high-quality images from text prompts using FAL.ai. "
-        "The underlying model is user-configured (default: FLUX 2 Klein 9B, "
-        "sub-1s generation) and is not selectable by the agent. Returns a "
-        "single image URL. Display it using markdown: ![description](URL)"
+        "Generate high-quality images from text prompts. The underlying "
+        "backend (FAL, OpenAI, etc.) and model are user-configured and not "
+        "selectable by the agent. Returns either a URL or an absolute file "
+        "path in the `image` field; display it with markdown "
+        "![description](url-or-path) and the gateway will deliver it."
     ),
     "parameters": {
         "type": "object",
@@ -826,13 +886,104 @@ IMAGE_GENERATE_SCHEMA = {
 }
 
 
+def _read_configured_image_provider():
+    """Return the value of ``image_gen.provider`` from config.yaml, or None.
+
+    We only consult the plugin registry when this is explicitly set — an
+    unset value keeps users on the legacy in-tree FAL path even when other
+    providers happen to be registered (e.g. a user has OPENAI_API_KEY set
+    for other features but never asked for OpenAI image gen).
+    """
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        if isinstance(section, dict):
+            value = section.get("provider")
+            if isinstance(value, str) and value.strip():
+                return value.strip()
+    except Exception as exc:
+        logger.debug("Could not read image_gen.provider: %s", exc)
+    return None
+
+
+def _dispatch_to_plugin_provider(prompt: str, aspect_ratio: str):
+    """Route the call to a plugin-registered provider when one is selected.
+
+    Returns a JSON string on dispatch, or ``None`` to fall through to the
+    built-in FAL path.
+
+    Dispatch only fires when ``image_gen.provider`` is explicitly set AND
+    it does not point to ``fal`` (FAL still lives in-tree in this PR;
+    a later PR ports it into ``plugins/image_gen/fal/``). Any other value
+    that matches a registered plugin provider wins.
+    """
+    configured = _read_configured_image_provider()
+    if not configured or configured == "fal":
+        return None
+
+    try:
+        # Import locally so plugin discovery isn't triggered just by
+        # importing this module (tests rely on that).
+        from agent.image_gen_registry import get_provider
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        provider = get_provider(configured)
+    except Exception as exc:
+        logger.debug("image_gen plugin dispatch skipped: %s", exc)
+        return None
+
+    if provider is None:
+        return json.dumps({
+            "success": False,
+            "image": None,
+            "error": (
+                f"image_gen.provider='{configured}' is set but no plugin "
+                f"registered that name. Run `hermes plugins list` to see "
+                f"available image gen backends."
+            ),
+            "error_type": "provider_not_registered",
+        })
+
+    try:
+        result = provider.generate(prompt=prompt, aspect_ratio=aspect_ratio)
+    except Exception as exc:
+        logger.warning(
+            "Image gen provider '%s' raised: %s",
+            getattr(provider, "name", "?"), exc,
+        )
+        return json.dumps({
+            "success": False,
+            "image": None,
+            "error": f"Provider '{getattr(provider, 'name', '?')}' error: {exc}",
+            "error_type": "provider_exception",
+        })
+    if not isinstance(result, dict):
+        return json.dumps({
+            "success": False,
+            "image": None,
+            "error": "Provider returned a non-dict result",
+            "error_type": "provider_contract",
+        })
+    return json.dumps(result)
+
+
 def _handle_image_generate(args, **kw):
     prompt = args.get("prompt", "")
     if not prompt:
         return tool_error("prompt is required for image generation")
+    aspect_ratio = args.get("aspect_ratio", DEFAULT_ASPECT_RATIO)
+
+    # Route to a plugin-registered provider if one is active (and it's
+    # not the in-tree FAL path).
+    dispatched = _dispatch_to_plugin_provider(prompt, aspect_ratio)
+    if dispatched is not None:
+        return dispatched
+
     return image_generate_tool(
         prompt=prompt,
-        aspect_ratio=args.get("aspect_ratio", DEFAULT_ASPECT_RATIO),
+        aspect_ratio=aspect_ratio,
     )
 
 
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index b83fa4d73e..a7ca57fab1 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -121,7 +121,80 @@ def _get_default_output_dir() -> str:
     return str(get_hermes_dir("cache/audio", "audio_cache"))
 
 DEFAULT_OUTPUT_DIR = _get_default_output_dir()
-MAX_TEXT_LENGTH = 4000
+
+# ---------------------------------------------------------------------------
+# Per-provider input-character limits (from official provider docs).
+# A single global cap was wrong: OpenAI is 4096, xAI is 15k, MiniMax is 10k,
+# ElevenLabs is model-dependent (5k / 10k / 30k / 40k), Gemini caps at ~8k
+# input tokens.  Users can override any of these via
+# ``tts.<provider>.max_text_length`` in config.yaml.
+# ---------------------------------------------------------------------------
+PROVIDER_MAX_TEXT_LENGTH: Dict[str, int] = {
+    "edge": 5000,         # edge-tts practical sync limit
+    "openai": 4096,       # https://platform.openai.com/docs/guides/text-to-speech
+    "xai": 15000,         # https://docs.x.ai/developers/model-capabilities/audio/text-to-speech
+    "minimax": 10000,     # https://platform.minimax.io/docs/api-reference/speech-t2a-http (sync)
+    "mistral": 4000,      # conservative; no published per-request cap
+    "gemini": 5000,       # Gemini TTS caps at ~8k input tokens / ~655s audio
+    "elevenlabs": 10000,  # fallback when model-aware lookup can't resolve (multilingual_v2)
+    "neutts": 2000,       # local model, quality falls off on long text
+    "kittentts": 2000,    # local 25MB model
+}
+
+# ElevenLabs caps vary by model_id. https://elevenlabs.io/docs/overview/models
+ELEVENLABS_MODEL_MAX_TEXT_LENGTH: Dict[str, int] = {
+    "eleven_v3": 5000,
+    "eleven_ttv_v3": 5000,
+    "eleven_multilingual_v2": 10000,
+    "eleven_multilingual_v1": 10000,
+    "eleven_english_sts_v2": 10000,
+    "eleven_english_sts_v1": 10000,
+    "eleven_flash_v2": 30000,
+    "eleven_flash_v2_5": 40000,
+}
+
+# Final fallback when provider isn't recognised at all.
+FALLBACK_MAX_TEXT_LENGTH = 4000
+
+# Back-compat alias. Prefer ``_resolve_max_text_length()`` for new code.
+MAX_TEXT_LENGTH = FALLBACK_MAX_TEXT_LENGTH
+
+
+def _resolve_max_text_length(
+    provider: Optional[str],
+    tts_config: Optional[Dict[str, Any]] = None,
+) -> int:
+    """Return the input-character cap for *provider*.
+
+    Resolution order:
+      1. ``tts.<provider>.max_text_length`` (user override in config.yaml)
+      2. ElevenLabs model-aware table (keyed on configured ``model_id``)
+      3. ``PROVIDER_MAX_TEXT_LENGTH`` default
+      4. ``FALLBACK_MAX_TEXT_LENGTH`` (4000)
+
+    Non-positive or non-integer overrides fall through to the default so a
+    broken config can't accidentally disable truncation entirely.
+    """
+    if not provider:
+        return FALLBACK_MAX_TEXT_LENGTH
+    key = provider.lower().strip()
+    cfg = tts_config or {}
+    prov_cfg = cfg.get(key) if isinstance(cfg.get(key), dict) else {}
+
+    override = prov_cfg.get("max_text_length") if prov_cfg else None
+    if isinstance(override, bool):
+        # bool is an int subclass; treat explicit booleans as "not set"
+        override = None
+    if isinstance(override, int) and override > 0:
+        return override
+
+    if key == "elevenlabs":
+        model_id = (prov_cfg or {}).get("model_id") or DEFAULT_ELEVENLABS_MODEL_ID
+        mapped = ELEVENLABS_MODEL_MAX_TEXT_LENGTH.get(str(model_id).strip())
+        if mapped:
+            return mapped
+
+    return PROVIDER_MAX_TEXT_LENGTH.get(key, FALLBACK_MAX_TEXT_LENGTH)
 
 
 # ===========================================================================
@@ -865,14 +938,19 @@ def text_to_speech_tool(
     if not text or not text.strip():
         return tool_error("Text is required", success=False)
 
-    # Truncate very long text with a warning
-    if len(text) > MAX_TEXT_LENGTH:
-        logger.warning("TTS text too long (%d chars), truncating to %d", len(text), MAX_TEXT_LENGTH)
-        text = text[:MAX_TEXT_LENGTH]
-
     tts_config = _load_tts_config()
     provider = _get_provider(tts_config)
 
+    # Truncate very long text with a warning. The cap is per-provider
+    # (OpenAI 4096, xAI 15k, MiniMax 10k, ElevenLabs model-aware, etc.).
+    max_len = _resolve_max_text_length(provider, tts_config)
+    if len(text) > max_len:
+        logger.warning(
+            "TTS text too long for provider %s (%d chars), truncating to %d",
+            provider, len(text), max_len,
+        )
+        text = text[:max_len]
+
     # Detect platform from gateway env var to choose the best output format.
     # Telegram voice bubbles require Opus (.ogg); OpenAI and ElevenLabs can
     # produce Opus natively (no ffmpeg needed).  Edge TTS always outputs MP3
@@ -1191,6 +1269,14 @@ def stream_tts_to_speaker(
         voice_id = el_config.get("voice_id", voice_id)
         model_id = el_config.get("streaming_model_id",
                                  el_config.get("model_id", model_id))
+        # Per-sentence cap for the streaming path. Look up the cap against
+        # the *streaming* model_id (defaults to eleven_flash_v2_5 = 40k chars),
+        # not the sync model_id. A user override
+        # (tts.elevenlabs.max_text_length) still wins.
+        stream_max_len = _resolve_max_text_length(
+            "elevenlabs",
+            {**tts_config, "elevenlabs": {**el_config, "model_id": model_id}},
+        )
 
         api_key = os.getenv("ELEVENLABS_API_KEY", "")
         if not api_key:
@@ -1246,9 +1332,9 @@ def stream_tts_to_speaker(
             # Skip audio generation if no TTS client available
             if client is None:
                 return
-            # Truncate very long sentences
-            if len(cleaned) > MAX_TEXT_LENGTH:
-                cleaned = cleaned[:MAX_TEXT_LENGTH]
+            # Truncate very long sentences (ElevenLabs streaming path)
+            if len(cleaned) > stream_max_len:
+                cleaned = cleaned[:stream_max_len]
             try:
                 audio_iter = client.text_to_speech.convert(
                     text=cleaned,
@@ -1406,7 +1492,7 @@ TTS_SCHEMA = {
         "properties": {
             "text": {
                 "type": "string",
-                "description": "The text to convert to speech. Keep under 4000 characters."
+                "description": "The text to convert to speech. Provider-specific character caps apply and are enforced automatically (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k depending on model); over-long input is truncated."
             },
             "output_path": {
                 "type": "string",
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 9b1c9f8228..62ce84b4fe 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -41,13 +41,22 @@ _SLASH_WORKER_TIMEOUT_S = max(5.0, float(os.environ.get("HERMES_TUI_SLASH_TIMEOU
 # ── Async RPC dispatch (#12546) ──────────────────────────────────────
 # A handful of handlers block the dispatcher loop in entry.py for seconds
 # to minutes (slash.exec, cli.exec, shell.exec, session.resume,
-# session.branch). While they're running, inbound RPCs — notably
-# approval.respond and session.interrupt — sit unread in the stdin pipe.
-# We route only those slow handlers onto a small thread pool; everything
-# else stays on the main thread so ordering stays sane for the fast path.
-# write_json is already _stdout_lock-guarded, so concurrent response
-# writes are safe.
-_LONG_HANDLERS = frozenset({"cli.exec", "session.branch", "session.resume", "shell.exec", "slash.exec"})
+# session.branch, skills.manage).  While they're running, inbound RPCs —
+# notably approval.respond and session.interrupt — sit unread in the
+# stdin pipe.  We route only those slow handlers onto a small thread pool;
+# everything else stays on the main thread so ordering stays sane for the
+# fast path.  write_json is already _stdout_lock-guarded, so concurrent
+# response writes are safe.
+_LONG_HANDLERS = frozenset(
+    {
+        "cli.exec",
+        "session.branch",
+        "session.resume",
+        "shell.exec",
+        "skills.manage",
+        "slash.exec",
+    }
+)
 
 _pool = concurrent.futures.ThreadPoolExecutor(
     max_workers=max(2, int(os.environ.get("HERMES_TUI_RPC_POOL_WORKERS", "4") or 4)),
@@ -530,6 +539,12 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
         _emit("session.info", sid, _session_info(agent))
 
     os.environ["HERMES_MODEL"] = result.new_model
+    # Keep the process-level provider env var in sync with the user's explicit
+    # choice so any ambient re-resolution (credential pool refresh, compressor
+    # rebuild, aux clients) resolves to the new provider instead of the
+    # original one persisted in config or env.
+    if result.target_provider:
+        os.environ["HERMES_INFERENCE_PROVIDER"] = result.target_provider
     if persist_global:
         _persist_model_switch(result)
     return {"value": result.new_model, "warning": result.warning_message or ""}
@@ -1217,12 +1232,34 @@ def _(rid, params: dict) -> dict:
 @method("session.list")
 def _(rid, params: dict) -> dict:
     try:
-        db = _get_db()
-        # Show both TUI and CLI sessions — TUI is the successor to the CLI,
-        # so users expect to resume their old CLI sessions here too.
-        tui = db.list_sessions_rich(source="tui", limit=params.get("limit", 20))
-        cli = db.list_sessions_rich(source="cli", limit=params.get("limit", 20))
-        rows = sorted(tui + cli, key=lambda s: s.get("started_at") or 0, reverse=True)[:params.get("limit", 20)]
+        # Resume picker should include human conversation surfaces beyond
+        # tui/cli (notably telegram from blitz row #7), but avoid internal
+        # sources that clutter the modal (tool/acp/etc).
+        allow = frozenset(
+            {
+                "cli",
+                "tui",
+                "telegram",
+                "discord",
+                "slack",
+                "whatsapp",
+                "wecom",
+                "weixin",
+                "feishu",
+                "signal",
+                "mattermost",
+                "matrix",
+                "qq",
+            }
+        )
+
+        limit = int(params.get("limit", 20) or 20)
+        fetch_limit = max(limit * 5, 100)
+        rows = [
+            s
+            for s in _get_db().list_sessions_rich(source=None, limit=fetch_limit)
+            if (s.get("source") or "").strip().lower() in allow
+        ][:limit]
         return _ok(rid, {"sessions": [
             {"id": s["id"], "title": s.get("title") or "", "preview": s.get("preview") or "",
              "started_at": s.get("started_at") or 0, "message_count": s.get("message_count") or 0,
@@ -1645,12 +1682,17 @@ def _(rid, params: dict) -> dict:
     if not raw:
         return _err(rid, 4015, "path required")
     try:
-        from cli import _IMAGE_EXTENSIONS, _resolve_attachment_path, _split_path_input
+        from cli import _IMAGE_EXTENSIONS, _detect_file_drop, _resolve_attachment_path, _split_path_input
 
-        path_token, remainder = _split_path_input(raw)
-        image_path = _resolve_attachment_path(path_token)
-        if image_path is None:
-            return _err(rid, 4016, f"image not found: {path_token}")
+        dropped = _detect_file_drop(raw)
+        if dropped:
+            image_path = dropped["path"]
+            remainder = dropped["remainder"]
+        else:
+            path_token, remainder = _split_path_input(raw)
+            image_path = _resolve_attachment_path(path_token)
+            if image_path is None:
+                return _err(rid, 4016, f"image not found: {path_token}")
         if image_path.suffix.lower() not in _IMAGE_EXTENSIONS:
             return _err(rid, 4016, f"unsupported image: {image_path.name}")
         session.setdefault("attached_images", []).append(str(image_path))
@@ -2413,15 +2455,22 @@ def _(rid, params: dict) -> dict:
             ]
             return _ok(rid, {"items": items})
 
-        if is_context and query.startswith(("file:", "folder:")):
-            prefix_tag = query.split(":", 1)[0]
-            path_part = query.split(":", 1)[1] or "."
+        # Accept both `@folder:path` and the bare `@folder` form so the user
+        # sees directory listings as soon as they finish typing the keyword,
+        # without first accepting the static `@folder:` hint.
+        if is_context and query in ("file", "folder"):
+            prefix_tag, path_part = query, ""
+        elif is_context and query.startswith(("file:", "folder:")):
+            prefix_tag, _, tail = query.partition(":")
+            path_part = tail
         else:
             prefix_tag = ""
-            path_part = query if not is_context else query
+            path_part = query if is_context else query
 
-        expanded = _normalize_completion_path(path_part)
-        if expanded.endswith("/"):
+        expanded = _normalize_completion_path(path_part) if path_part else "."
+        if expanded == "." or not expanded:
+            search_dir, match = ".", ""
+        elif expanded.endswith("/"):
             search_dir, match = expanded, ""
         else:
             search_dir = os.path.dirname(expanded) or "."
@@ -2430,6 +2479,7 @@ def _(rid, params: dict) -> dict:
         if not os.path.isdir(search_dir):
             return _ok(rid, {"items": []})
 
+        want_dir = prefix_tag == "folder"
         match_lower = match.lower()
         for entry in sorted(os.listdir(search_dir)):
             if match and not entry.lower().startswith(match_lower):
@@ -2438,6 +2488,11 @@ def _(rid, params: dict) -> dict:
                 continue
             full = os.path.join(search_dir, entry)
             is_dir = os.path.isdir(full)
+            # Explicit `@folder:` / `@file:` — honour the user's filter.  Skip
+            # the opposite kind instead of auto-rewriting the completion tag,
+            # which used to defeat the prefix and let `@folder:` list files.
+            if prefix_tag and want_dir != is_dir:
+                continue
             rel = os.path.relpath(full)
             suffix = "/" if is_dir else ""
 
diff --git a/ui-tui/README.md b/ui-tui/README.md
index 38d206baf4..4d7090d5ac 100644
--- a/ui-tui/README.md
+++ b/ui-tui/README.md
@@ -112,7 +112,7 @@ Current input behavior is split across `app.tsx`, `components/textInput.tsx`, an
 | `Ctrl+D`                        | Exit                                                                                                                                                    |
 | `Ctrl+G`                        | Open `$EDITOR` with the current draft                                                                                                                   |
 | `Ctrl+L`                        | New session (same as `/clear`)                                                                                                                          |
-| `Ctrl+V` / `Alt+V`              | Paste clipboard image (same as `/paste`)                                                                                                                |
+| `Ctrl+V` / `Alt+V`              | Paste text first, then fall back to image/path attachment when applicable                                                                               |
 | `Tab`                           | Apply the active completion                                                                                                                             |
 | `Up/Down`                       | Cycle completions if the completion list is open; otherwise edit queued messages first, then walk input history                                         |
 | `Left/Right`                    | Move the cursor                                                                                                                                         |
@@ -217,8 +217,8 @@ The local slash handler covers the built-ins that need direct client behavior:
 Notes:
 
 - `/copy` sends the selected assistant response through OSC 52.
-- `/paste` with no args asks the gateway for clipboard image attachment state.
-- `/paste` does not manage text paste entries; text paste is inline-only.
+- `/paste` with no args asks the gateway to attach a clipboard image.
+- Text paste remains inline-only; `Cmd+V` / `Ctrl+V` handle layered text/OSC52/image fallback before `/paste` is needed.
 - `/details [hidden|collapsed|expanded|cycle]` controls thinking/tool-detail visibility.
 - `/statusbar` toggles the status rule on/off.
 
diff --git a/ui-tui/packages/hermes-ink/src/ink/events/cmd-shortcuts.test.ts b/ui-tui/packages/hermes-ink/src/ink/events/cmd-shortcuts.test.ts
new file mode 100644
index 0000000000..1abd7bbe00
--- /dev/null
+++ b/ui-tui/packages/hermes-ink/src/ink/events/cmd-shortcuts.test.ts
@@ -0,0 +1,38 @@
+import { describe, expect, it } from 'vitest'
+
+import { parseMultipleKeypresses } from '../parse-keypress.js'
+
+import { InputEvent } from './input-event.js'
+
+function parseOne(sequence: string) {
+  const [keys] = parseMultipleKeypresses({ incomplete: '', mode: 'NORMAL' }, sequence)
+  expect(keys).toHaveLength(1)
+
+  return keys[0]!
+}
+
+describe('InputEvent macOS command modifiers', () => {
+  it('preserves Cmd as super for kitty keyboard CSI-u sequences', () => {
+    const parsed = parseOne('\u001b[99;9u')
+    const event = new InputEvent(parsed)
+
+    expect(parsed.name).toBe('c')
+    expect(event.key.meta).toBe(false)
+    expect(event.key.super).toBe(true)
+  })
+
+  it('preserves Cmd on word-delete and word-navigation sequences', () => {
+    const backspace = new InputEvent(parseOne('\u001b[127;9u'))
+    const left = new InputEvent(parseOne('\u001b[1;9D'))
+    const right = new InputEvent(parseOne('\u001b[1;9C'))
+
+    expect(backspace.key.backspace).toBe(true)
+    expect(backspace.key.super).toBe(true)
+
+    expect(left.key.leftArrow).toBe(true)
+    expect(left.key.super).toBe(true)
+
+    expect(right.key.rightArrow).toBe(true)
+    expect(right.key.super).toBe(true)
+  })
+})
diff --git a/ui-tui/src/__tests__/clipboard.test.ts b/ui-tui/src/__tests__/clipboard.test.ts
index e9bf4f5a7c..ba14e9bebc 100644
--- a/ui-tui/src/__tests__/clipboard.test.ts
+++ b/ui-tui/src/__tests__/clipboard.test.ts
@@ -1,26 +1,101 @@
 import { describe, expect, it, vi } from 'vitest'
 
-import { readClipboardText, writeClipboardText } from '../lib/clipboard.js'
+import { isUsableClipboardText, readClipboardText, writeClipboardText } from '../lib/clipboard.js'
 
 describe('readClipboardText', () => {
-  it('does nothing off macOS', async () => {
-    const run = vi.fn()
-
-    await expect(readClipboardText('linux', run)).resolves.toBeNull()
-    expect(run).not.toHaveBeenCalled()
-  })
-
   it('reads text from pbpaste on macOS', async () => {
     const run = vi.fn().mockResolvedValue({ stdout: 'hello world\n' })
 
     await expect(readClipboardText('darwin', run)).resolves.toBe('hello world\n')
-    expect(run).toHaveBeenCalledWith('pbpaste', [], expect.objectContaining({ encoding: 'utf8', windowsHide: true }))
+    expect(run).toHaveBeenCalledWith(
+      'pbpaste',
+      [],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
   })
 
-  it('returns null when pbpaste fails', async () => {
-    const run = vi.fn().mockRejectedValue(new Error('pbpaste failed'))
+  it('reads text from PowerShell on Windows', async () => {
+    const run = vi.fn().mockResolvedValue({ stdout: 'from windows\r\n' })
 
-    await expect(readClipboardText('darwin', run)).resolves.toBeNull()
+    await expect(readClipboardText('win32', run)).resolves.toBe('from windows\r\n')
+    expect(run).toHaveBeenCalledWith(
+      'powershell',
+      ['-NoProfile', '-NonInteractive', '-Command', 'Get-Clipboard -Raw'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('tries powershell.exe first on WSL', async () => {
+    const run = vi.fn().mockResolvedValue({ stdout: 'from wsl\n' })
+
+    await expect(readClipboardText('linux', run, { WSL_INTEROP: '/tmp/socket' } as NodeJS.ProcessEnv)).resolves.toBe(
+      'from wsl\n'
+    )
+    expect(run).toHaveBeenCalledWith(
+      'powershell.exe',
+      ['-NoProfile', '-NonInteractive', '-Command', 'Get-Clipboard -Raw'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('uses wl-paste on Wayland Linux', async () => {
+    const run = vi.fn().mockResolvedValue({ stdout: 'from wayland\n' })
+
+    await expect(readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)).resolves.toBe(
+      'from wayland\n'
+    )
+    expect(run).toHaveBeenCalledWith(
+      'wl-paste',
+      ['--type', 'text'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('falls back to xclip on Linux when wl-paste fails', async () => {
+    const run = vi
+      .fn()
+      .mockRejectedValueOnce(new Error('wl-paste missing'))
+      .mockResolvedValueOnce({ stdout: 'from xclip\n' })
+
+    await expect(readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)).resolves.toBe(
+      'from xclip\n'
+    )
+    expect(run).toHaveBeenNthCalledWith(
+      1,
+      'wl-paste',
+      ['--type', 'text'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+    expect(run).toHaveBeenNthCalledWith(
+      2,
+      'xclip',
+      ['-selection', 'clipboard', '-out'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('returns null when every clipboard backend fails', async () => {
+    const run = vi.fn().mockRejectedValue(new Error('clipboard failed'))
+
+    await expect(
+      readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)
+    ).resolves.toBeNull()
+  })
+})
+
+describe('isUsableClipboardText', () => {
+  it('accepts normal text', () => {
+    expect(isUsableClipboardText('hello world\n')).toBe(true)
+  })
+
+  it('rejects empty or whitespace-only content', () => {
+    expect(isUsableClipboardText('')).toBe(false)
+    expect(isUsableClipboardText('  \n\t')).toBe(false)
+  })
+
+  it('rejects binary-looking clipboard payloads', () => {
+    expect(isUsableClipboardText('PNG\u0000\u0001\u0002\u0003IHDR')).toBe(false)
+    expect(isUsableClipboardText('TIFF\ufffd\ufffd\ufffdmetadata')).toBe(false)
   })
 })
 
@@ -34,6 +109,7 @@ describe('writeClipboardText', () => {
 
   it('writes text to pbcopy on macOS', async () => {
     const stdin = { end: vi.fn() }
+
     const child = {
       once: vi.fn((event: string, cb: (code?: number) => void) => {
         if (event === 'close') {
@@ -44,10 +120,15 @@ describe('writeClipboardText', () => {
       }),
       stdin
     }
+
     const start = vi.fn().mockReturnValue(child)
 
     await expect(writeClipboardText('hello world', 'darwin', start as any)).resolves.toBe(true)
-    expect(start).toHaveBeenCalledWith('pbcopy', [], expect.objectContaining({ stdio: ['pipe', 'ignore', 'ignore'], windowsHide: true }))
+    expect(start).toHaveBeenCalledWith(
+      'pbcopy',
+      [],
+      expect.objectContaining({ stdio: ['pipe', 'ignore', 'ignore'], windowsHide: true })
+    )
     expect(stdin.end).toHaveBeenCalledWith('hello world')
   })
 
@@ -62,6 +143,7 @@ describe('writeClipboardText', () => {
       }),
       stdin: { end: vi.fn() }
     }
+
     const start = vi.fn().mockReturnValue(child)
 
     await expect(writeClipboardText('hello world', 'darwin', start as any)).resolves.toBe(false)
diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
index f1f0c306bc..23f7c46465 100644
--- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
+++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
@@ -1,9 +1,9 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest'
 
 import { createGatewayEventHandler } from '../app/createGatewayEventHandler.js'
-import { resetOverlayState } from '../app/overlayStore.js'
+import { getOverlayState, resetOverlayState } from '../app/overlayStore.js'
 import { turnController } from '../app/turnController.js'
-import { resetTurnState } from '../app/turnStore.js'
+import { getTurnState, resetTurnState } from '../app/turnStore.js'
 import { patchUiState, resetUiState } from '../app/uiStore.js'
 import { estimateTokensRough } from '../lib/text.js'
 import type { Msg } from '../types.js'
@@ -143,6 +143,117 @@ describe('createGatewayEventHandler', () => {
     expect(appended[0]?.thinkingTokens).toBe(estimateTokensRough(fromServer))
   })
 
+  it('attaches inline_diff to the assistant completion body', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const diff = '\u001b[31m--- a/foo.ts\u001b[0m\n\u001b[32m+++ b/foo.ts\u001b[0m\n@@\n-old\n+new'
+    const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+
+    onEvent({
+      payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
+      type: 'tool.start'
+    } as any)
+    onEvent({
+      payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+
+    // Diff is buffered for message.complete and sanitized (ANSI stripped).
+    expect(appended).toHaveLength(0)
+    expect(turnController.pendingInlineDiffs).toEqual([cleaned])
+
+    onEvent({
+      payload: { text: 'patch applied' },
+      type: 'message.complete'
+    } as any)
+
+    // Diff is rendered in the same assistant message body as the completion.
+    expect(appended).toHaveLength(1)
+    expect(appended[0]).toMatchObject({ role: 'assistant' })
+    expect(appended[0]?.text).toContain('patch applied')
+    expect(appended[0]?.text).toContain('```diff')
+    expect(appended[0]?.text).toContain(cleaned)
+  })
+
+  it('does not append inline_diff twice when assistant text already contains it', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+    const assistantText = `Done. Here's the inline diff:\n\n\`\`\`diff\n${cleaned}\n\`\`\``
+
+    onEvent({
+      payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+    onEvent({
+      payload: { text: assistantText },
+      type: 'message.complete'
+    } as any)
+
+    expect(appended).toHaveLength(1)
+    expect(appended[0]?.text).toBe(assistantText)
+    expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1)
+  })
+
+  it('strips the CLI "┊ review diff" header from queued inline diffs', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const raw = '  \u001b[33m┊ review diff\u001b[0m\n--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+
+    onEvent({
+      payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+    onEvent({
+      payload: { text: 'done' },
+      type: 'message.complete'
+    } as any)
+
+    expect(appended).toHaveLength(1)
+    expect(appended[0]?.text).not.toContain('┊ review diff')
+    expect(appended[0]?.text).toContain('--- a/foo.ts')
+  })
+
+  it('suppresses inline_diff when assistant already wrote a diff fence', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+    const assistantText = 'Done. Clean swap:\n\n```diff\n-old\n+new\n```'
+
+    onEvent({
+      payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+    onEvent({
+      payload: { text: assistantText },
+      type: 'message.complete'
+    } as any)
+
+    expect(appended).toHaveLength(1)
+    expect(appended[0]?.text).toBe(assistantText)
+    expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1)
+  })
+
+  it('keeps tool trail terse when inline_diff is present', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const diff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+
+    onEvent({
+      payload: { inline_diff: diff, name: 'review_diff', summary: diff, tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+    onEvent({
+      payload: { text: 'done' },
+      type: 'message.complete'
+    } as any)
+
+    expect(appended).toHaveLength(1)
+    expect(appended[0]?.tools?.[0]).toContain('Review Diff')
+    expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts')
+    expect(appended[0]?.text).toContain('```diff')
+  })
+
   it('shows setup panel for missing provider startup error', () => {
     const appended: Msg[] = []
     const onEvent = createGatewayEventHandler(buildCtx(appended))
@@ -162,4 +273,42 @@ describe('createGatewayEventHandler', () => {
       role: 'system'
     })
   })
+
+  it('keeps gateway noise informational and approval out of Activity', async () => {
+    const appended: Msg[] = []
+    const ctx = buildCtx(appended)
+    ctx.gateway.rpc = vi.fn(async () => {
+      throw new Error('cold start')
+    })
+
+    const onEvent = createGatewayEventHandler(ctx)
+
+    onEvent({ payload: { line: 'Traceback: noisy but non-fatal' }, type: 'gateway.stderr' } as any)
+    onEvent({ payload: { preview: 'bad framing' }, type: 'gateway.protocol_error' } as any)
+    onEvent({
+      payload: { command: 'rm -rf /tmp/nope', description: 'dangerous command' },
+      type: 'approval.request'
+    } as any)
+    onEvent({ payload: {}, type: 'gateway.ready' } as any)
+
+    await Promise.resolve()
+    await Promise.resolve()
+
+    expect(getOverlayState().approval).toMatchObject({ description: 'dangerous command' })
+    expect(getTurnState().activity).toMatchObject([
+      { text: 'Traceback: noisy but non-fatal', tone: 'info' },
+      { text: 'protocol noise detected · /logs to inspect', tone: 'info' },
+      { text: 'protocol noise: bad framing', tone: 'info' },
+      { text: 'command catalog unavailable: cold start', tone: 'info' }
+    ])
+  })
+
+  it('still surfaces terminal turn failures as errors', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+
+    onEvent({ payload: { message: 'boom' }, type: 'error' } as any)
+
+    expect(getTurnState().activity).toMatchObject([{ text: 'boom', tone: 'error' }])
+  })
 })
diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts
index 1f2f938a93..901564f732 100644
--- a/ui-tui/src/__tests__/createSlashHandler.test.ts
+++ b/ui-tui/src/__tests__/createSlashHandler.test.ts
@@ -211,6 +211,42 @@ describe('createSlashHandler', () => {
     expect(ctx.transcript.send).toHaveBeenCalledWith(skillMessage)
   })
 
+  it('/history pages the current TUI transcript (user + assistant)', () => {
+    const ctx = buildCtx({
+      local: {
+        ...buildLocal(),
+        getHistoryItems: vi.fn(() => [
+          { role: 'user', text: 'hello' },
+          { role: 'system', text: 'ignore me' },
+          { role: 'assistant', text: 'hi there' },
+          { role: 'user', text: 'test' }
+        ])
+      }
+    })
+
+    createSlashHandler(ctx)('/history')
+    expect(ctx.transcript.page).toHaveBeenCalledTimes(1)
+
+    const [body, title] = ctx.transcript.page.mock.calls[0]!
+
+    expect(title).toBe('History')
+    expect(body).toContain('[You #1]')
+    expect(body).toContain('hello')
+    expect(body).toContain('[Hermes #2]')
+    expect(body).toContain('hi there')
+    expect(body).toContain('[You #3]')
+    expect(body).not.toContain('ignore me')
+    expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
+  })
+
+  it('/history reports empty state without paging', () => {
+    const ctx = buildCtx()
+
+    createSlashHandler(ctx)('/history')
+    expect(ctx.transcript.page).not.toHaveBeenCalled()
+    expect(ctx.transcript.sys).toHaveBeenCalledWith('no conversation yet')
+  })
+
   it('handles send-type dispatch for /plan command', async () => {
     const planMessage = 'Plan skill content loaded'
 
diff --git a/ui-tui/src/__tests__/emoji.test.ts b/ui-tui/src/__tests__/emoji.test.ts
new file mode 100644
index 0000000000..929fd53e05
--- /dev/null
+++ b/ui-tui/src/__tests__/emoji.test.ts
@@ -0,0 +1,64 @@
+import { describe, expect, it } from 'vitest'
+
+import { ensureEmojiPresentation } from '../lib/emoji.js'
+
+const VS16 = '\uFE0F'
+
+describe('ensureEmojiPresentation', () => {
+  it('passes through ASCII unchanged', () => {
+    expect(ensureEmojiPresentation('hello world')).toBe('hello world')
+    expect(ensureEmojiPresentation('')).toBe('')
+  })
+
+  it('passes through emoji that already defaults to emoji presentation', () => {
+    expect(ensureEmojiPresentation('🚀 rocket')).toBe('🚀 rocket')
+    expect(ensureEmojiPresentation('😀')).toBe('😀')
+  })
+
+  it('injects VS16 after text-default emoji codepoints', () => {
+    expect(ensureEmojiPresentation('⚠ careful')).toBe(`⚠${VS16} careful`)
+    expect(ensureEmojiPresentation('ℹ info')).toBe(`ℹ${VS16} info`)
+    expect(ensureEmojiPresentation('love ❤ you')).toBe(`love ❤${VS16} you`)
+    expect(ensureEmojiPresentation('✔ done')).toBe(`✔${VS16} done`)
+  })
+
+  it('is idempotent when VS16 is already present', () => {
+    const already = `⚠${VS16} ℹ${VS16} ❤${VS16}`
+
+    expect(ensureEmojiPresentation(already)).toBe(already)
+    expect(ensureEmojiPresentation(ensureEmojiPresentation('⚠'))).toBe(`⚠${VS16}`)
+  })
+
+  it('leaves keycap sequences alone when the base is not a text-default emoji', () => {
+    expect(ensureEmojiPresentation('1\u20e3')).toBe('1\u20e3')
+  })
+
+  it('injects VS16 before ZWJ so text-default bases participate in emoji sequences', () => {
+    // ❤ + ZWJ + 🔥 → ❤️‍🔥 (heart on fire).  Without VS16 between the heart
+    // and the ZWJ, terminals render the heart in text/monochrome form and
+    // the ZWJ ligature can fail to form.
+    const heartFire = '\u2764\u200d\ud83d\udd25'
+
+    expect(ensureEmojiPresentation(heartFire)).toBe(`\u2764\uFE0F\u200d\ud83d\udd25`)
+  })
+
+  it('leaves explicit text-presentation selector (VS15) alone', () => {
+    // `❤︎` (U+2764 + U+FE0E) asks for text presentation — injecting VS16
+    // would create an invalid double-variation sequence.
+    const explicitText = '\u2764\ufe0e'
+
+    expect(ensureEmojiPresentation(explicitText)).toBe(explicitText)
+  })
+
+  it('returns the original reference when no change is needed', () => {
+    const already = `⚠${VS16} ℹ${VS16} ❤${VS16}`
+
+    // Reference equality — the lazy allocator should short-circuit to the
+    // input when nothing needed injection.
+    expect(ensureEmojiPresentation(already)).toBe(already)
+  })
+
+  it('handles mixed content', () => {
+    expect(ensureEmojiPresentation('⚠ path: /tmp/x ❤ done')).toBe(`⚠${VS16} path: /tmp/x ❤${VS16} done`)
+  })
+})
diff --git a/ui-tui/src/__tests__/markdown.test.ts b/ui-tui/src/__tests__/markdown.test.ts
index 478cb6255c..0e95ba6c0f 100644
--- a/ui-tui/src/__tests__/markdown.test.ts
+++ b/ui-tui/src/__tests__/markdown.test.ts
@@ -23,6 +23,31 @@ describe('INLINE_RE emphasis', () => {
     expect(matches('a*b*c')).toEqual(['*b*'])
     expect(matches('a**bold**c')).toEqual(['**bold**'])
   })
+
+  it('matches short alphanumeric subscript (H~2~O, CO~2~, X~n~)', () => {
+    expect(matches('H~2~O')).toEqual(['~2~'])
+    expect(matches('CO~2~ levels')).toEqual(['~2~'])
+    expect(matches('the X~n~ term')).toEqual(['~n~'])
+  })
+
+  it('ignores kaomoji-style ~! and ~? punctuation', () => {
+    // Kimi / Qwen / GLM emit these as decorators and the whole span between
+    // two tildes used to get collapsed into one dim blob.
+    expect(matches('Aww ~! Building step by step, I love it ~!')).toEqual([])
+    expect(matches('cool ~? yeah ~?')).toEqual([])
+    expect(matches('mixed ~! and ~? flow')).toEqual([])
+  })
+
+  it('ignores tilde spans that contain spaces or punctuation', () => {
+    // Real subscript doesn't contain spaces; a tilde followed by words-then-
+    // tilde is almost always conversational. Matching it swallows text.
+    expect(matches('hello ~good idea~ there')).toEqual([])
+    expect(matches('x ~oh no!~ y')).toEqual([])
+  })
+
+  it('does not let strikethrough eat subscript', () => {
+    expect(matches('~~strike~~ and H~2~O')).toEqual(['~~strike~~', '~2~'])
+  })
 })
 
 describe('stripInlineMarkup', () => {
@@ -31,6 +56,11 @@ describe('stripInlineMarkup', () => {
     expect(stripInlineMarkup('browser_screenshot_ecc.png')).toBe('browser_screenshot_ecc.png')
     expect(stripInlineMarkup('__bold__ and foo__bar__')).toBe('bold and foo__bar__')
   })
+
+  it('leaves ~!/~? kaomoji alone and still handles real subscript', () => {
+    expect(stripInlineMarkup('Yay ~! nice work ~!')).toBe('Yay ~! nice work ~!')
+    expect(stripInlineMarkup('H~2~O and CO~2~')).toBe('H_2O and CO_2')
+  })
 })
 
 describe('protocol sentinels', () => {
diff --git a/ui-tui/src/__tests__/osc52.test.ts b/ui-tui/src/__tests__/osc52.test.ts
new file mode 100644
index 0000000000..a1f5242ddb
--- /dev/null
+++ b/ui-tui/src/__tests__/osc52.test.ts
@@ -0,0 +1,67 @@
+import { afterEach, describe, expect, it, vi } from 'vitest'
+
+import {
+  buildOsc52ClipboardQuery,
+  OSC52_CLIPBOARD_QUERY,
+  parseOsc52ClipboardData,
+  readOsc52Clipboard
+} from '../lib/osc52.js'
+
+const envBackup = { ...process.env }
+
+afterEach(() => {
+  process.env = { ...envBackup }
+})
+
+describe('buildOsc52ClipboardQuery', () => {
+  it('returns the raw OSC52 query outside multiplexers', () => {
+    delete process.env.TMUX
+    delete process.env.STY
+
+    expect(buildOsc52ClipboardQuery()).toBe(OSC52_CLIPBOARD_QUERY)
+  })
+
+  it('wraps the query for tmux passthrough', () => {
+    process.env.TMUX = '/tmp/tmux-123/default,1,0'
+
+    expect(buildOsc52ClipboardQuery()).toContain('\x1bPtmux;')
+    expect(buildOsc52ClipboardQuery()).toContain(']52;c;?')
+  })
+})
+
+describe('parseOsc52ClipboardData', () => {
+  it('decodes clipboard payloads', () => {
+    const encoded = Buffer.from('hello from osc52', 'utf8').toString('base64')
+
+    expect(parseOsc52ClipboardData(`c;${encoded}`)).toBe('hello from osc52')
+  })
+
+  it('returns null for empty or query payloads', () => {
+    expect(parseOsc52ClipboardData('c;?')).toBeNull()
+    expect(parseOsc52ClipboardData('c;')).toBeNull()
+  })
+})
+
+describe('readOsc52Clipboard', () => {
+  it('returns decoded text from a terminal OSC52 response', async () => {
+    const send = vi.fn().mockResolvedValue({
+      code: 52,
+      data: `c;${Buffer.from('queried text', 'utf8').toString('base64')}`,
+      type: 'osc'
+    })
+
+    const flush = vi.fn().mockResolvedValue(undefined)
+
+    await expect(readOsc52Clipboard({ flush, send })).resolves.toBe('queried text')
+    expect(send).toHaveBeenCalled()
+    expect(flush).toHaveBeenCalled()
+  })
+
+  it('returns null when the querier is missing or unsupported', async () => {
+    await expect(readOsc52Clipboard(null)).resolves.toBeNull()
+
+    const send = vi.fn().mockResolvedValue(undefined)
+    const flush = vi.fn().mockResolvedValue(undefined)
+    await expect(readOsc52Clipboard({ flush, send })).resolves.toBeNull()
+  })
+})
diff --git a/ui-tui/src/__tests__/platform.test.ts b/ui-tui/src/__tests__/platform.test.ts
new file mode 100644
index 0000000000..1d2f73fe46
--- /dev/null
+++ b/ui-tui/src/__tests__/platform.test.ts
@@ -0,0 +1,32 @@
+import { afterEach, describe, expect, it, vi } from 'vitest'
+
+const originalPlatform = process.platform
+
+async function importPlatform(platform: NodeJS.Platform) {
+  vi.resetModules()
+  Object.defineProperty(process, 'platform', { value: platform })
+
+  return import('../lib/platform.js')
+}
+
+afterEach(() => {
+  Object.defineProperty(process, 'platform', { value: originalPlatform })
+  vi.resetModules()
+})
+
+describe('platform action modifier', () => {
+  it('treats kitty Cmd sequences as the macOS action modifier', async () => {
+    const { isActionMod } = await importPlatform('darwin')
+
+    expect(isActionMod({ ctrl: false, meta: false, super: true })).toBe(true)
+    expect(isActionMod({ ctrl: false, meta: true, super: false })).toBe(true)
+    expect(isActionMod({ ctrl: true, meta: false, super: false })).toBe(false)
+  })
+
+  it('still uses Ctrl as the action modifier on non-macOS', async () => {
+    const { isActionMod } = await importPlatform('linux')
+
+    expect(isActionMod({ ctrl: true, meta: false, super: false })).toBe(true)
+    expect(isActionMod({ ctrl: false, meta: false, super: true })).toBe(false)
+  })
+})
diff --git a/ui-tui/src/__tests__/terminalParity.test.ts b/ui-tui/src/__tests__/terminalParity.test.ts
new file mode 100644
index 0000000000..0054343968
--- /dev/null
+++ b/ui-tui/src/__tests__/terminalParity.test.ts
@@ -0,0 +1,71 @@
+import { describe, expect, it, vi } from 'vitest'
+
+import { terminalParityHints } from '../lib/terminalParity.js'
+
+describe('terminalParityHints', () => {
+  it('warns for Apple Terminal and SSH/tmux sessions', async () => {
+    const hints = await terminalParityHints({
+      TERM_PROGRAM: 'Apple_Terminal',
+      TERM_SESSION_ID: 'w0t0p0:123',
+      SSH_CONNECTION: '1',
+      TMUX: '/tmp/tmux-1/default,1,0'
+    } as NodeJS.ProcessEnv)
+
+    expect(hints.map(h => h.key)).toEqual(expect.arrayContaining(['apple-terminal', 'remote', 'tmux']))
+  })
+
+  it('suggests IDE setup only for VS Code-family terminals that still need bindings', async () => {
+    const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
+
+    const hints = await terminalParityHints({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv, {
+      fileOps: { readFile },
+      homeDir: '/tmp/fake-home'
+    })
+
+    expect(hints.some(h => h.key === 'ide-setup')).toBe(true)
+  })
+
+  it('suppresses IDE setup hint when keybindings are already configured', async () => {
+    const readFile = vi.fn().mockResolvedValue(
+      JSON.stringify([
+        {
+          key: 'shift+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'ctrl+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'cmd+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'cmd+z',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\u001b[122;9u' }
+        },
+        {
+          key: 'shift+cmd+z',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\u001b[122;10u' }
+        }
+      ])
+    )
+
+    const hints = await terminalParityHints({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv, {
+      fileOps: { readFile },
+      homeDir: '/tmp/fake-home'
+    })
+
+    expect(hints.some(h => h.key === 'ide-setup')).toBe(false)
+  })
+})
diff --git a/ui-tui/src/__tests__/terminalSetup.test.ts b/ui-tui/src/__tests__/terminalSetup.test.ts
new file mode 100644
index 0000000000..de23176f26
--- /dev/null
+++ b/ui-tui/src/__tests__/terminalSetup.test.ts
@@ -0,0 +1,237 @@
+import { describe, expect, it, vi } from 'vitest'
+
+import {
+  configureDetectedTerminalKeybindings,
+  configureTerminalKeybindings,
+  detectVSCodeLikeTerminal,
+  getVSCodeStyleConfigDir,
+  shouldPromptForTerminalSetup,
+  stripJsonComments
+} from '../lib/terminalSetup.js'
+
+describe('terminalSetup helpers', () => {
+  it('detects VS Code family terminals from environment', () => {
+    expect(detectVSCodeLikeTerminal({ CURSOR_TRACE_ID: 'x' } as NodeJS.ProcessEnv)).toBe('cursor')
+    expect(detectVSCodeLikeTerminal({ VSCODE_GIT_ASKPASS_MAIN: '/tmp/windsurf' } as NodeJS.ProcessEnv)).toBe('windsurf')
+    expect(detectVSCodeLikeTerminal({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv)).toBe('vscode')
+    expect(detectVSCodeLikeTerminal({} as NodeJS.ProcessEnv)).toBeNull()
+  })
+
+  it('computes VS Code style config dirs cross-platform', () => {
+    expect(getVSCodeStyleConfigDir('Code', 'darwin', {} as NodeJS.ProcessEnv, '/home/me')).toBe(
+      '/home/me/Library/Application Support/Code/User'
+    )
+    expect(getVSCodeStyleConfigDir('Code', 'linux', {} as NodeJS.ProcessEnv, '/home/me')).toBe(
+      '/home/me/.config/Code/User'
+    )
+    expect(
+      getVSCodeStyleConfigDir(
+        'Code',
+        'win32',
+        { APPDATA: 'C:/Users/me/AppData/Roaming' } as NodeJS.ProcessEnv,
+        '/home/me'
+      )
+    ).toBe('C:/Users/me/AppData/Roaming/Code/User')
+  })
+
+  it('strips line comments from keybindings JSON', () => {
+    expect(stripJsonComments('// comment\n[{"key":"shift+enter"}]')).toBe('\n[{"key":"shift+enter"}]')
+  })
+
+  it('strips inline comments and block comments', () => {
+    expect(stripJsonComments('[{"key":"a"} // inline\n]')).toBe('[{"key":"a"} \n]')
+    expect(stripJsonComments('[/* block */{"key":"a"}]')).toBe('[{"key":"a"}]')
+  })
+
+  it('removes trailing commas before ] or }', () => {
+    expect(JSON.parse(stripJsonComments('[{"key":"a"},]'))).toEqual([{ key: 'a' }])
+    expect(JSON.parse(stripJsonComments('[{"key":"a",}]'))).toEqual([{ key: 'a' }])
+  })
+
+  it('preserves comment-like sequences inside strings', () => {
+    const input = '[{"key":"a","args":{"text":"// not a comment"}}]'
+    expect(JSON.parse(stripJsonComments(input))).toEqual([{ key: 'a', args: { text: '// not a comment' } }])
+  })
+
+  it('handles unterminated block comments gracefully', () => {
+    const input = '[{"key":"a"} /* never closed'
+    const stripped = stripJsonComments(input)
+    // The unterminated comment is consumed to end-of-file; the remainder is parseable
+    expect(stripped).toBe('[{"key":"a"} ')
+  })
+})
+
+describe('configureTerminalKeybindings', () => {
+  it('writes missing bindings into a VS Code style keybindings file', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+    const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureTerminalKeybindings('vscode', {
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(true)
+    expect(result.requiresRestart).toBe(true)
+    expect(writeFile).toHaveBeenCalledTimes(1)
+    expect(copyFile).not.toHaveBeenCalled() // no existing file to back up
+    const written = writeFile.mock.calls[0]?.[1] as string
+    expect(written).toContain('shift+enter')
+    expect(written).toContain('cmd+enter')
+    expect(written).toContain('cmd+z')
+  })
+
+  it('reports conflicts without overwriting existing bindings', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+
+    const readFile = vi.fn().mockResolvedValue(
+      JSON.stringify([
+        {
+          key: 'cmd+z',
+          command: 'something.else',
+          when: 'terminalFocus',
+          args: { text: 'noop' }
+        }
+      ])
+    )
+
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureTerminalKeybindings('cursor', {
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(false)
+    expect(result.message).toContain('cmd+z')
+    expect(writeFile).not.toHaveBeenCalled()
+    expect(copyFile).not.toHaveBeenCalled() // no backup when not writing
+  })
+
+  it('backs up existing keybindings.json only when writing changes', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+    const readFile = vi.fn().mockResolvedValue(JSON.stringify([]))
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureTerminalKeybindings('vscode', {
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(true)
+    expect(writeFile).toHaveBeenCalledTimes(1)
+    expect(copyFile).toHaveBeenCalledTimes(1) // backup created before writing
+  })
+
+  it('reports error when keybindings.json is not readable (EACCES)', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+    const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('permission denied'), { code: 'EACCES' }))
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureTerminalKeybindings('vscode', {
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(false)
+    expect(result.message).toContain('Failed to read')
+    expect(writeFile).not.toHaveBeenCalled()
+  })
+
+  it('auto-detects the current IDE terminal', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+    const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureDetectedTerminalKeybindings({
+      env: { CURSOR_TRACE_ID: 'trace' } as NodeJS.ProcessEnv,
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(true)
+    expect(writeFile).toHaveBeenCalled()
+  })
+
+  it('refuses to configure IDE bindings from an SSH session', async () => {
+    const result = await configureDetectedTerminalKeybindings({
+      env: { SSH_CONNECTION: '1 2 3 4', TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv,
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(false)
+    expect(result.message).toContain('local machine')
+  })
+
+  it('prompts for setup when bindings are missing and suppresses prompt when complete', async () => {
+    const readMissing = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
+    await expect(
+      shouldPromptForTerminalSetup({
+        env: { TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv,
+        fileOps: { readFile: readMissing }
+      })
+    ).resolves.toBe(true)
+
+    const readComplete = vi.fn().mockResolvedValue(
+      JSON.stringify([
+        {
+          key: 'shift+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'ctrl+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'cmd+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'cmd+z',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\u001b[122;9u' }
+        },
+        {
+          key: 'shift+cmd+z',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\u001b[122;10u' }
+        }
+      ])
+    )
+
+    await expect(
+      shouldPromptForTerminalSetup({
+        env: { TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv,
+        fileOps: { readFile: readComplete }
+      })
+    ).resolves.toBe(false)
+  })
+
+  it('suppresses terminal setup prompts inside SSH sessions', async () => {
+    await expect(
+      shouldPromptForTerminalSetup({
+        env: { SSH_CONNECTION: '1 2 3 4', TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv
+      })
+    ).resolves.toBe(false)
+  })
+})
diff --git a/ui-tui/src/__tests__/text.test.ts b/ui-tui/src/__tests__/text.test.ts
index 0a11e3cc06..d4a2469e8f 100644
--- a/ui-tui/src/__tests__/text.test.ts
+++ b/ui-tui/src/__tests__/text.test.ts
@@ -97,4 +97,12 @@ describe('estimateRows', () => {
 
     expect(estimateRows(md, 40)).toBe(2)
   })
+
+  it('keeps intraword underscores when sizing snake_case identifiers', () => {
+    const w = 80
+    const snake = 'look at test_case_with_underscores now'
+    const plain = 'look at test case with underscores now'
+
+    expect(estimateRows(snake, w)).toBe(estimateRows(plain, w))
+  })
 })
diff --git a/ui-tui/src/__tests__/textInputLineNav.test.ts b/ui-tui/src/__tests__/textInputLineNav.test.ts
new file mode 100644
index 0000000000..56b3772a9f
--- /dev/null
+++ b/ui-tui/src/__tests__/textInputLineNav.test.ts
@@ -0,0 +1,55 @@
+import { describe, expect, it } from 'vitest'
+
+import { lineNav } from '../components/textInput.js'
+
+describe('lineNav', () => {
+  it('returns null for single-line input (up)', () => {
+    expect(lineNav('hello world', 6, -1)).toBeNull()
+  })
+
+  it('returns null for single-line input (down)', () => {
+    expect(lineNav('hello world', 6, 1)).toBeNull()
+  })
+
+  it('returns null when cursor already on first line of a multiline block', () => {
+    expect(lineNav('one\ntwo\nthree', 2, -1)).toBeNull()
+  })
+
+  it('returns null when cursor on last line of a multiline block', () => {
+    expect(lineNav('one\ntwo\nthree', 10, 1)).toBeNull()
+  })
+
+  it('moves cursor up one line preserving column', () => {
+    // "hello\nworld" — cursor at col 3 of line 1 ('l' in world) → col 3 of line 0 ('l' in hello)
+    expect(lineNav('hello\nworld', 9, -1)).toBe(3)
+  })
+
+  it('moves cursor down one line preserving column', () => {
+    // cursor at col 2 of line 0 → col 2 of line 1
+    expect(lineNav('hello\nworld', 2, 1)).toBe(8)
+  })
+
+  it('clamps to end of shorter destination line on up', () => {
+    // col 10 on long line → clamp to end of short line "abc"
+    const s = 'abc\nlong long text'
+    const from = 14
+
+    expect(lineNav(s, from, -1)).toBe(3)
+  })
+
+  it('clamps to end of shorter destination line on down', () => {
+    // col 10 on line 0 → clamp to end of "abc" on line 1
+    const s = 'long long text\nabc'
+
+    expect(lineNav(s, 10, 1)).toBe(18)
+  })
+
+  it('handles empty lines correctly', () => {
+    // "a\n\nb" — cursor at line 2 (b) → up to empty line 1
+    expect(lineNav('a\n\nb', 3, -1)).toBe(2)
+  })
+
+  it('handles leading newline without crashing', () => {
+    expect(lineNav('\nfoo', 2, -1)).toBe(0)
+  })
+})
diff --git a/ui-tui/src/__tests__/useComposerState.test.ts b/ui-tui/src/__tests__/useComposerState.test.ts
new file mode 100644
index 0000000000..ff446153a6
--- /dev/null
+++ b/ui-tui/src/__tests__/useComposerState.test.ts
@@ -0,0 +1,59 @@
+import { describe, expect, it } from 'vitest'
+
+import { looksLikeDroppedPath } from '../app/useComposerState.js'
+
+describe('looksLikeDroppedPath', () => {
+  it('recognizes macOS screenshot temp paths and file URIs', () => {
+    expect(looksLikeDroppedPath('/var/folders/x/T/TemporaryItems/Screenshot\\ 2026-04-21\\ at\\ 1.04.43 PM.png')).toBe(
+      true
+    )
+    expect(
+      looksLikeDroppedPath('file:///var/folders/x/T/TemporaryItems/Screenshot%202026-04-21%20at%201.04.43%20PM.png')
+    ).toBe(true)
+  })
+
+  it('rejects normal multiline or plain text paste', () => {
+    expect(looksLikeDroppedPath('hello world')).toBe(false)
+    expect(looksLikeDroppedPath('line one\nline two')).toBe(false)
+  })
+
+  it('recognizes common image file extensions', () => {
+    expect(looksLikeDroppedPath('/Users/me/Desktop/photo.jpg')).toBe(true)
+    expect(looksLikeDroppedPath('/Users/me/Desktop/diagram.png')).toBe(true)
+    expect(looksLikeDroppedPath('/tmp/capture.webp')).toBe(true)
+    expect(looksLikeDroppedPath('/tmp/image.gif')).toBe(true)
+  })
+
+  it('recognizes file:// URIs with various extensions', () => {
+    expect(looksLikeDroppedPath('file:///home/user/doc.pdf')).toBe(true)
+    expect(looksLikeDroppedPath('file:///tmp/screenshot.png')).toBe(true)
+  })
+
+  it('recognizes paths with spaces (not backslash-escaped)', () => {
+    expect(looksLikeDroppedPath('/var/folders/x/T/TemporaryItems/Screenshot 2026-04-21 at 1.04.43 PM.png')).toBe(true)
+  })
+
+  it('rejects empty/whitespace-only input', () => {
+    expect(looksLikeDroppedPath('')).toBe(false)
+    expect(looksLikeDroppedPath('   ')).toBe(false)
+    expect(looksLikeDroppedPath('\n')).toBe(false)
+  })
+
+  it('rejects URLs that are not file:// URIs', () => {
+    expect(looksLikeDroppedPath('https://example.com/image.png')).toBe(false)
+    expect(looksLikeDroppedPath('http://localhost/file.pdf')).toBe(false)
+  })
+
+  it('rejects short slash-like strings without path structure', () => {
+    // No second '/' or '.' → not a plausible file path
+    expect(looksLikeDroppedPath('/help')).toBe(false)
+    expect(looksLikeDroppedPath('/model sonnet')).toBe(false)
+    expect(looksLikeDroppedPath('/api')).toBe(false)
+  })
+
+  it('accepts absolute paths with directory separators or extensions', () => {
+    expect(looksLikeDroppedPath('/usr/bin/test')).toBe(true)
+    expect(looksLikeDroppedPath('/tmp/file.txt')).toBe(true)
+    expect(looksLikeDroppedPath('/etc/hosts')).toBe(true) // has second /
+  })
+})
diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts
index 8f45bb3d7e..e5324e4605 100644
--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@@ -2,7 +2,7 @@ import { STREAM_BATCH_MS } from '../config/timing.js'
 import { buildSetupRequiredSections, SETUP_REQUIRED_TITLE } from '../content/setup.js'
 import type { CommandsCatalogResponse, GatewayEvent, GatewaySkin } from '../gatewayTypes.js'
 import { rpcErrorMessage } from '../lib/rpc.js'
-import { formatToolCall } from '../lib/text.js'
+import { formatToolCall, stripAnsi } from '../lib/text.js'
 import { fromSkin } from '../theme.js'
 import type { Msg, SubagentProgress } from '../types.js'
 
@@ -11,7 +11,6 @@ import { patchOverlayState } from './overlayStore.js'
 import { turnController } from './turnController.js'
 import { getUiState, patchUiState } from './uiStore.js'
 
-const ERRLIKE_RE = /\b(error|traceback|exception|failed|spawn)\b/i
 const NO_PROVIDER_RE = /\bNo (?:LLM|inference) provider configured\b/i
 
 const statusFromBusy = () => (getUiState().busy ? 'running…' : 'ready')
@@ -111,7 +110,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           turnController.pushActivity(String(r.warning), 'warn')
         }
       })
-      .catch((e: unknown) => turnController.pushActivity(`command catalog unavailable: ${rpcErrorMessage(e)}`, 'warn'))
+      .catch((e: unknown) => turnController.pushActivity(`command catalog unavailable: ${rpcErrorMessage(e)}`, 'info'))
 
     if (!STARTUP_RESUME_ID) {
       patchUiState({ status: 'forging session…' })
@@ -201,7 +200,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
       case 'gateway.stderr': {
         const line = String(ev.payload.line).slice(0, 120)
 
-        turnController.pushActivity(line, ERRLIKE_RE.test(line) ? 'error' : 'warn')
+        turnController.pushActivity(line, 'info')
 
         return
       }
@@ -222,11 +221,11 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
 
         if (!turnController.protocolWarned) {
           turnController.protocolWarned = true
-          turnController.pushActivity('protocol noise detected · /logs to inspect', 'warn')
+          turnController.pushActivity('protocol noise detected · /logs to inspect', 'info')
         }
 
         if (ev.payload?.preview) {
-          turnController.pushActivity(`protocol noise: ${String(ev.payload.preview).slice(0, 120)}`, 'warn')
+          turnController.pushActivity(`protocol noise: ${String(ev.payload.preview).slice(0, 120)}`, 'info')
         }
 
         return
@@ -263,10 +262,27 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
         return
 
       case 'tool.complete':
-        turnController.recordToolComplete(ev.payload.tool_id, ev.payload.name, ev.payload.error, ev.payload.summary)
+        {
+          const inlineDiffText =
+            ev.payload.inline_diff && getUiState().inlineDiffs ? stripAnsi(String(ev.payload.inline_diff)).trim() : ''
 
-        if (ev.payload.inline_diff && getUiState().inlineDiffs) {
-          sys(ev.payload.inline_diff)
+          turnController.recordToolComplete(
+            ev.payload.tool_id,
+            ev.payload.name,
+            ev.payload.error,
+            inlineDiffText ? '' : ev.payload.summary
+          )
+
+          if (!inlineDiffText) {
+            return
+          }
+
+          // Keep inline diffs attached to the assistant completion body so
+          // they render in the same message flow, not as a standalone system
+          // artifact that can look out-of-place around tool rows.
+          turnController.queueInlineDiff(inlineDiffText)
+
+          return
         }
 
         return
@@ -282,7 +298,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
         const description = String(ev.payload.description ?? 'dangerous command')
 
         patchOverlayState({ approval: { command: String(ev.payload.command ?? ''), description } })
-        turnController.pushActivity(`approval needed · ${description}`, 'warn')
         setStatus('approval needed')
 
         return
diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts
index da9d0baede..757c591317 100644
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -3,6 +3,7 @@ import type { MutableRefObject, ReactNode, RefObject, SetStateAction } from 'rea
 
 import type { PasteEvent } from '../components/textInput.js'
 import type { GatewayClient } from '../gatewayClient.js'
+import type { ImageAttachResponse } from '../gatewayTypes.js'
 import type { RpcResult } from '../lib/rpc.js'
 import type { Theme } from '../theme.js'
 import type {
@@ -106,11 +107,13 @@ export interface ComposerPasteResult {
   value: string
 }
 
+export type MaybePromise<T> = Promise<T> | T
+
 export interface ComposerActions {
   clearIn: () => void
   dequeue: () => string | undefined
   enqueue: (text: string) => void
-  handleTextPaste: (event: PasteEvent) => ComposerPasteResult | null
+  handleTextPaste: (event: PasteEvent) => MaybePromise<ComposerPasteResult | null>
   openEditor: () => void
   pushHistory: (text: string) => void
   replaceQueue: (index: number, text: string) => void
@@ -146,6 +149,7 @@ export interface ComposerState {
 export interface UseComposerStateOptions {
   gw: GatewayClient
   onClipboardPaste: (quiet?: boolean) => Promise<void> | void
+  onImageAttached?: (info: ImageAttachResponse) => void
   submitRef: MutableRefObject<(value: string) => void>
 }
 
@@ -268,7 +272,7 @@ export interface AppLayoutComposerProps {
   compIdx: number
   completions: CompletionItem[]
   empty: boolean
-  handleTextPaste: (event: PasteEvent) => ComposerPasteResult | null
+  handleTextPaste: (event: PasteEvent) => MaybePromise<ComposerPasteResult | null>
   input: string
   inputBuf: string[]
   pagerPageSize: number
diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts
index 0f8916c5cb..77eb20dec3 100644
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@@ -9,6 +9,7 @@ import type {
   SessionUndoResponse
 } from '../../../gatewayTypes.js'
 import { writeOsc52Clipboard } from '../../../lib/osc52.js'
+import { configureDetectedTerminalKeybindings, configureTerminalKeybindings } from '../../../lib/terminalSetup.js'
 import type { DetailsMode, Msg, PanelSection } from '../../../types.js'
 import { patchOverlayState } from '../../overlayStore.js'
 import { patchUiState } from '../../uiStore.js'
@@ -224,11 +225,46 @@ export const coreCommands: SlashCommand[] = [
   },
 
   {
-    help: 'paste clipboard image',
+    help: 'attach clipboard image',
     name: 'paste',
     run: (arg, ctx) => (arg ? ctx.transcript.sys('usage: /paste') : ctx.composer.paste())
   },
 
+  {
+    help: 'configure IDE terminal keybindings for multiline + undo/redo',
+    name: 'terminal-setup',
+    run: (arg, ctx) => {
+      const target = arg.trim().toLowerCase()
+
+      if (target && !['auto', 'cursor', 'vscode', 'windsurf'].includes(target)) {
+        return ctx.transcript.sys('usage: /terminal-setup [auto|vscode|cursor|windsurf]')
+      }
+
+      const runner =
+        !target || target === 'auto'
+          ? configureDetectedTerminalKeybindings()
+          : configureTerminalKeybindings(target as 'cursor' | 'vscode' | 'windsurf')
+
+      void runner
+        .then(result => {
+          if (ctx.stale()) {
+            return
+          }
+
+          ctx.transcript.sys(result.message)
+
+          if (result.success && result.requiresRestart) {
+            ctx.transcript.sys('restart the IDE terminal for the new keybindings to take effect')
+          }
+        })
+        .catch(error => {
+          if (!ctx.stale()) {
+            ctx.transcript.sys(`terminal setup failed: ${String(error)}`)
+          }
+        })
+    }
+  },
+
   {
     help: 'view gateway logs',
     name: 'logs',
@@ -239,6 +275,34 @@ export const coreCommands: SlashCommand[] = [
     }
   },
 
+  {
+    help: 'view current transcript (user + assistant messages)',
+    name: 'history',
+    run: (arg, ctx) => {
+      // The CLI-side `/history` runs in a detached slash-worker subprocess
+      // that never sees the TUI's turns — it only surfaces whatever was
+      // persisted before this process started.  Render the TUI's own
+      // transcript so `/history` actually reflects what the user just did.
+      const items = ctx.local.getHistoryItems().filter(m => m.role === 'user' || m.role === 'assistant')
+
+      if (!items.length) {
+        return ctx.transcript.sys('no conversation yet')
+      }
+
+      const preview = Math.max(80, parseInt(arg, 10) || 400)
+
+      const lines = items.map((m, i) => {
+        const tag = m.role === 'user' ? `You #${i + 1}` : `Hermes #${i + 1}`
+        const body = m.text.trim() || (m.tools?.length ? `(${m.tools.length} tool calls)` : '(empty)')
+        const clipped = body.length > preview ? `${body.slice(0, preview).trimEnd()}…` : body
+
+        return `[${tag}]\n${clipped}`
+      })
+
+      ctx.transcript.page(lines.join('\n\n'), 'History')
+    }
+  },
+
   {
     aliases: ['sb'],
     help: 'toggle status bar',
diff --git a/ui-tui/src/app/slash/commands/ops.ts b/ui-tui/src/app/slash/commands/ops.ts
index 26318b3fb0..ef547b8dbd 100644
--- a/ui-tui/src/app/slash/commands/ops.ts
+++ b/ui-tui/src/app/slash/commands/ops.ts
@@ -1,4 +1,4 @@
-import type { ToolsConfigureResponse } from '../../../gatewayTypes.js'
+import type { SlashExecResponse, ToolsConfigureResponse } from '../../../gatewayTypes.js'
 import type { PanelSection } from '../../../types.js'
 import { patchOverlayState } from '../../overlayStore.js'
 import type { SlashCommand } from '../types.js'
@@ -207,10 +207,25 @@ export const opsCommands: SlashCommand[] = [
   {
     help: 'enable or disable tools (client-side history reset on change)',
     name: 'tools',
-    run: (arg, ctx) => {
+    run: (arg, ctx, cmd) => {
       const [subcommand, ...names] = arg.trim().split(/\s+/).filter(Boolean)
 
       if (subcommand !== 'disable' && subcommand !== 'enable') {
+        ctx.gateway.gw
+          .request<SlashExecResponse>('slash.exec', { command: cmd.slice(1), session_id: ctx.sid })
+          .then(r => {
+            if (ctx.stale()) {
+              return
+            }
+
+            const body = r?.output || '/tools: no output'
+            const text = r?.warning ? `warning: ${r.warning}\n${body}` : body
+            const long = text.length > 180 || text.split('\n').filter(Boolean).length > 2
+
+            long ? ctx.transcript.page(text, 'Tools') : ctx.transcript.sys(text)
+          })
+          .catch(ctx.guardedErr)
+
         return
       }
 
diff --git a/ui-tui/src/app/slash/commands/session.ts b/ui-tui/src/app/slash/commands/session.ts
index 354d3c1975..5f17667f03 100644
--- a/ui-tui/src/app/slash/commands/session.ts
+++ b/ui-tui/src/app/slash/commands/session.ts
@@ -1,4 +1,4 @@
-import { imageTokenMeta, introMsg, toTranscriptMessages } from '../../../domain/messages.js'
+import { attachedImageNotice, introMsg, toTranscriptMessages } from '../../../domain/messages.js'
 import type {
   BackgroundStartResponse,
   BtwStartResponse,
@@ -92,9 +92,7 @@ export const sessionCommands: SlashCommand[] = [
     run: (arg, ctx) => {
       ctx.gateway.rpc<ImageAttachResponse>('image.attach', { path: arg, session_id: ctx.sid }).then(
         ctx.guarded<ImageAttachResponse>(r => {
-          const meta = imageTokenMeta(r)
-
-          ctx.transcript.sys(`attached image: ${r.name ?? ''}${meta ? ` · ${meta}` : ''}`)
+          ctx.transcript.sys(attachedImageNotice(r))
 
           if (r.remainder) {
             ctx.composer.setInput(r.remainder)
diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts
index 236324ffb9..bf9d2926ce 100644
--- a/ui-tui/src/app/turnController.ts
+++ b/ui-tui/src/app/turnController.ts
@@ -39,6 +39,7 @@ class TurnController {
   bufRef = ''
   interrupted = false
   lastStatusNote = ''
+  pendingInlineDiffs: string[] = []
   persistedToolLabels = new Set<string>()
   protocolWarned = false
   reasoningText = ''
@@ -76,6 +77,7 @@ class TurnController {
     this.activeTools = []
     this.streamTimer = clear(this.streamTimer)
     this.bufRef = ''
+    this.pendingInlineDiffs = []
     this.pendingSegmentTools = []
     this.segmentMessages = []
 
@@ -95,14 +97,36 @@ class TurnController {
     this.interrupted = true
     gw.request<SessionInterruptResponse>('session.interrupt', { session_id: sid }).catch(() => {})
 
+    const segments = this.segmentMessages
     const partial = this.bufRef.trimStart()
+    const tools = this.pendingSegmentTools
 
-    partial ? appendMessage({ role: 'assistant', text: `${partial}\n\n*[interrupted]*` }) : sys('interrupted')
-
+    // Drain streaming/segment state off the nanostore before writing the
+    // preserved snapshot to the transcript — otherwise each flushed segment
+    // appears in both `turn.streamSegments` and the transcript for one frame.
     this.idle()
     this.clearReasoning()
     this.turnTools = []
     patchTurnState({ activity: [], outcome: '' })
+
+    for (const msg of segments) {
+      appendMessage(msg)
+    }
+
+    // Always surface an interruption indicator — if there's an in-flight
+    // `partial` or pending tools, fold them into a single assistant message;
+    // otherwise emit a sys note so the transcript always records that the
+    // turn was cancelled, even when only prior `segments` were preserved.
+    if (partial || tools.length) {
+      appendMessage({
+        role: 'assistant',
+        text: partial ? `${partial}\n\n*[interrupted]*` : '*[interrupted]*',
+        ...(tools.length && { tools })
+      })
+    } else {
+      sys('interrupted')
+    }
+
     patchUiState({ status: 'interrupted' })
     this.clearStatusTimer()
 
@@ -160,6 +184,22 @@ class TurnController {
     }, REASONING_PULSE_MS)
   }
 
+  queueInlineDiff(diffText: string) {
+    // Strip CLI chrome the gateway emits before the unified diff (e.g. a
+    // leading "┊ review diff" header written by `_emit_inline_diff` for the
+    // terminal printer). That header only makes sense as stdout dressing,
+    // not inside a markdown ```diff block.
+    const text = diffText
+      .replace(/^\s*┊[^\n]*\n?/, '')
+      .trim()
+
+    if (!text || this.pendingInlineDiffs.includes(text)) {
+      return
+    }
+
+    this.pendingInlineDiffs = [...this.pendingInlineDiffs, text]
+  }
+
   pushActivity(text: string, tone: ActivityItem['tone'] = 'info', replaceLabel?: string) {
     patchTurnState(state => {
       const base = replaceLabel
@@ -194,6 +234,7 @@ class TurnController {
     this.idle()
     this.clearReasoning()
     this.clearStatusTimer()
+    this.pendingInlineDiffs = []
     this.pendingSegmentTools = []
     this.segmentMessages = []
     this.turnTools = []
@@ -204,6 +245,17 @@ class TurnController {
     const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
     const split = splitReasoning(rawText)
     const finalText = split.text
+    // Skip appending if the assistant already narrated the diff inside a
+    // markdown fence of its own — otherwise we render two stacked diff
+    // blocks for the same edit.
+    const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText)
+    const remainingInlineDiffs = assistantAlreadyHasDiff
+      ? []
+      : this.pendingInlineDiffs.filter(diff => !finalText.includes(diff))
+    const inlineDiffBlock = remainingInlineDiffs.length
+      ? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\``
+      : ''
+    const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n')
     const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
     const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
     const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0
@@ -211,10 +263,10 @@ class TurnController {
     const tools = this.pendingSegmentTools
     const finalMessages = [...this.segmentMessages]
 
-    if (finalText) {
+    if (mergedText) {
       finalMessages.push({
         role: 'assistant',
-        text: finalText,
+        text: mergedText,
         thinking: savedReasoning || undefined,
         thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
         toolTokens: savedToolTokens || undefined,
@@ -231,7 +283,7 @@ class TurnController {
     this.bufRef = ''
     patchTurnState({ activity: [], outcome: '' })
 
-    return { finalMessages, finalText, wasInterrupted }
+    return { finalMessages, finalText: mergedText, wasInterrupted }
   }
 
   recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) {
@@ -337,6 +389,7 @@ class TurnController {
     this.bufRef = ''
     this.interrupted = false
     this.lastStatusNote = ''
+    this.pendingInlineDiffs = []
     this.pendingSegmentTools = []
     this.protocolWarned = false
     this.segmentMessages = []
@@ -382,6 +435,7 @@ class TurnController {
     this.endReasoningPhase()
     this.clearReasoning()
     this.activeTools = []
+    this.pendingInlineDiffs = []
     this.turnTools = []
     this.toolTokenAcc = 0
     this.persistedToolLabels.clear()
diff --git a/ui-tui/src/app/useComposerState.ts b/ui-tui/src/app/useComposerState.ts
index 4c47b2b707..f229067edc 100644
--- a/ui-tui/src/app/useComposerState.ts
+++ b/ui-tui/src/app/useComposerState.ts
@@ -3,18 +3,24 @@ import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
 import { tmpdir } from 'node:os'
 import { join } from 'node:path'
 
+import { useStdin } from '@hermes/ink'
 import { useStore } from '@nanostores/react'
 import { useCallback, useMemo, useState } from 'react'
 
 import type { PasteEvent } from '../components/textInput.js'
 import { LARGE_PASTE } from '../config/limits.js'
+import type { ImageAttachResponse, InputDetectDropResponse } from '../gatewayTypes.js'
 import { useCompletion } from '../hooks/useCompletion.js'
 import { useInputHistory } from '../hooks/useInputHistory.js'
 import { useQueue } from '../hooks/useQueue.js'
+import { isUsableClipboardText, readClipboardText } from '../lib/clipboard.js'
+import { readOsc52Clipboard } from '../lib/osc52.js'
+import { isRemoteShellSession } from '../lib/terminalSetup.js'
 import { pasteTokenLabel, stripTrailingPasteNewlines } from '../lib/text.js'
 
-import type { PasteSnippet, UseComposerStateOptions, UseComposerStateResult } from './interfaces.js'
+import type { MaybePromise, PasteSnippet, UseComposerStateOptions, UseComposerStateResult } from './interfaces.js'
 import { $isBlocked } from './overlayStore.js'
+import { getUiState } from './uiStore.js'
 
 const PASTE_SNIP_MAX_COUNT = 32
 const PASTE_SNIP_MAX_TOTAL_BYTES = 4 * 1024 * 1024
@@ -38,11 +44,70 @@ const trimSnips = (snips: PasteSnippet[]): PasteSnippet[] => {
   return out.length === snips.length ? snips : out
 }
 
-export function useComposerState({ gw, onClipboardPaste, submitRef }: UseComposerStateOptions): UseComposerStateResult {
+/** Insert text at the cursor position, adding spacing to separate from adjacent non-whitespace. */
+function insertAtCursor(value: string, cursor: number, text: string): { cursor: number; value: string } {
+  const lead = cursor > 0 && !/\s/.test(value[cursor - 1] ?? '') ? ' ' : ''
+  const tail = cursor < value.length && !/\s/.test(value[cursor] ?? '') ? ' ' : ''
+  const insert = `${lead}${text}${tail}`
+
+  return {
+    cursor: cursor + insert.length,
+    value: value.slice(0, cursor) + insert + value.slice(cursor)
+  }
+}
+
+/**
+ * Quick client-side heuristic to detect text that looks like a dropped file path.
+ * When this returns true the composer sends RPC calls to the server for actual
+ * validation. Keep in sync with _detect_file_drop() in cli.py — see that
+ * function for the canonical prefix list.
+ */
+export function looksLikeDroppedPath(text: string): boolean {
+  const trimmed = text.trim()
+
+  if (!trimmed || trimmed.includes('\n')) {
+    return false
+  }
+
+  // file:// URIs, relative, home-relative, quoted, and Windows drive paths
+  if (
+    trimmed.startsWith('file://') ||
+    trimmed.startsWith('~/') ||
+    trimmed.startsWith('./') ||
+    trimmed.startsWith('../') ||
+    trimmed.startsWith('"/') ||
+    trimmed.startsWith("'/") ||
+    trimmed.startsWith('"~') ||
+    trimmed.startsWith("'~") ||
+    /^[A-Za-z]:[/\\]/.test(trimmed) ||
+    /^["'][A-Za-z]:[/\\]/.test(trimmed)
+  ) {
+    return true
+  }
+
+  // Bare absolute paths (start with /) — require a second '/' or a '.' to avoid
+  // false positives on short strings like "/api" or "/help" which would trigger
+  // unnecessary RPC round-trips.
+  if (trimmed.startsWith('/')) {
+    const rest = trimmed.slice(1)
+
+    return rest.includes('/') || rest.includes('.')
+  }
+
+  return false
+}
+
+export function useComposerState({
+  gw,
+  onClipboardPaste,
+  onImageAttached,
+  submitRef
+}: UseComposerStateOptions): UseComposerStateResult {
   const [input, setInput] = useState('')
   const [inputBuf, setInputBuf] = useState<string[]>([])
   const [pasteSnips, setPasteSnips] = useState<PasteSnippet[]>([])
   const isBlocked = useStore($isBlocked)
+  const { querier } = useStdin() as { querier: Parameters<typeof readOsc52Clipboard>[0] }
 
   const { queueRef, queueEditRef, queuedDisplay, queueEditIdx, enqueue, dequeue, replaceQ, setQueueEdit, syncQueue } =
     useQueue()
@@ -59,14 +124,13 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
     historyDraftRef.current = ''
   }, [historyDraftRef, setQueueEdit, setHistoryIdx])
 
-  const handleTextPaste = useCallback(
-    ({ bracketed, cursor, hotkey, text, value }: PasteEvent) => {
-      if (hotkey) {
-        void onClipboardPaste(false)
-
-        return null
-      }
-
+  const handleResolvedPaste = useCallback(
+    async ({
+      bracketed,
+      cursor,
+      text,
+      value
+    }: Omit<PasteEvent, 'hotkey'>): Promise<null | { cursor: number; value: string }> => {
       const cleanedText = stripTrailingPasteNewlines(text)
 
       if (!cleanedText || !/[^\n]/.test(cleanedText)) {
@@ -77,6 +141,43 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
         return null
       }
 
+      const sid = getUiState().sid
+
+      if (sid && looksLikeDroppedPath(cleanedText)) {
+        try {
+          const attached = await gw.request<ImageAttachResponse>('image.attach', {
+            path: cleanedText,
+            session_id: sid
+          })
+
+          if (attached?.name) {
+            onImageAttached?.(attached)
+            const remainder = attached.remainder?.trim() ?? ''
+
+            if (!remainder) {
+              return { cursor, value }
+            }
+
+            return insertAtCursor(value, cursor, remainder)
+          }
+        } catch {
+          // Fall back to generic file-drop detection below.
+        }
+
+        try {
+          const dropped = await gw.request<InputDetectDropResponse>('input.detect_drop', {
+            session_id: sid,
+            text: cleanedText
+          })
+
+          if (dropped?.matched && dropped.text) {
+            return insertAtCursor(value, cursor, dropped.text)
+          }
+        } catch {
+          // Fall through to normal text paste behavior.
+        }
+      }
+
       const lineCount = cleanedText.split('\n').length
 
       if (cleanedText.length < LARGE_PASTE.chars && lineCount < LARGE_PASTE.lines) {
@@ -87,9 +188,7 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
       }
 
       const label = pasteTokenLabel(cleanedText, lineCount)
-      const lead = cursor > 0 && !/\s/.test(value[cursor - 1] ?? '') ? ' ' : ''
-      const tail = cursor < value.length && !/\s/.test(value[cursor] ?? '') ? ' ' : ''
-      const insert = `${lead}${label}${tail}`
+      const inserted = insertAtCursor(value, cursor, label)
 
       setPasteSnips(prev => trimSnips([...prev, { label, text: cleanedText }]))
 
@@ -106,12 +205,52 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
         })
         .catch(() => {})
 
-      return {
-        cursor: cursor + insert.length,
-        value: value.slice(0, cursor) + insert + value.slice(cursor)
-      }
+      return inserted
     },
-    [gw, onClipboardPaste]
+    [gw, onClipboardPaste, onImageAttached]
+  )
+
+  const handleTextPaste = useCallback(
+    ({
+      bracketed,
+      cursor,
+      hotkey,
+      text,
+      value
+    }: PasteEvent): MaybePromise<null | { cursor: number; value: string }> => {
+      if (hotkey) {
+        const preferOsc52 = isRemoteShellSession(process.env)
+
+        const readPreferredText = preferOsc52
+          ? readOsc52Clipboard(querier).then(async osc52Text => {
+              if (isUsableClipboardText(osc52Text)) {
+                return osc52Text
+              }
+
+              return readClipboardText()
+            })
+          : readClipboardText().then(async clipText => {
+              if (isUsableClipboardText(clipText)) {
+                return clipText
+              }
+
+              return readOsc52Clipboard(querier)
+            })
+
+        return readPreferredText.then(async preferredText => {
+          if (isUsableClipboardText(preferredText)) {
+            return handleResolvedPaste({ bracketed: false, cursor, text: preferredText, value })
+          }
+
+          void onClipboardPaste(false)
+
+          return null
+        })
+      }
+
+      return handleResolvedPaste({ bracketed: !!bracketed, cursor, text, value })
+    },
+    [handleResolvedPaste, onClipboardPaste, querier]
   )
 
   const openEditor = useCallback(() => {
diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts
index be2e5379e9..5c5f278495 100644
--- a/ui-tui/src/app/useInputHandlers.ts
+++ b/ui-tui/src/app/useInputHandlers.ts
@@ -7,7 +7,6 @@ import type {
   SudoRespondResponse,
   VoiceRecordResponse
 } from '../gatewayTypes.js'
-
 import { isAction, isMac } from '../lib/platform.js'
 
 import { getInputSelection } from './inputSelectionStore.js'
@@ -173,15 +172,72 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
     const live = getUiState()
 
     if (isBlocked) {
-      if (overlay.pager) {
-        if (key.return || ch === ' ') {
-          const nextOffset = overlay.pager.offset + pagerPageSize
+      // When approval/clarify/confirm overlays are active, their own useInput
+      // handlers must receive keystrokes (arrow keys, numbers, Enter).  Only
+      // intercept Ctrl+C here so the user can deny/dismiss — all other keys
+      // fall through to the component-level handlers.
+      if (overlay.approval || overlay.clarify || overlay.confirm) {
+        if (isCtrl(key, ch, 'c')) {
+          cancelOverlayFromCtrlC()
+        }
+        return
+      }
 
-          patchOverlayState({
-            pager: nextOffset >= overlay.pager.lines.length ? null : { ...overlay.pager, offset: nextOffset }
+      if (overlay.pager) {
+        if (key.escape || isCtrl(key, ch, 'c') || ch === 'q') {
+          return patchOverlayState({ pager: null })
+        }
+
+        const move = (delta: number | 'top' | 'bottom') =>
+          patchOverlayState(prev => {
+            if (!prev.pager) {
+              return prev
+            }
+
+            const { lines, offset } = prev.pager
+            const max = Math.max(0, lines.length - pagerPageSize)
+            const step = delta === 'top' ? -lines.length : delta === 'bottom' ? lines.length : delta
+            const next = Math.max(0, Math.min(offset + step, max))
+
+            return next === offset ? prev : { ...prev, pager: { ...prev.pager, offset: next } }
+          })
+
+        if (key.upArrow || ch === 'k') {
+          return move(-1)
+        }
+
+        if (key.downArrow || ch === 'j') {
+          return move(1)
+        }
+
+        if (key.pageUp || ch === 'b') {
+          return move(-pagerPageSize)
+        }
+
+        if (ch === 'g') {
+          return move('top')
+        }
+
+        if (ch === 'G') {
+          return move('bottom')
+        }
+
+        if (key.return || ch === ' ' || key.pageDown) {
+          patchOverlayState(prev => {
+            if (!prev.pager) {
+              return prev
+            }
+
+            const { lines, offset } = prev.pager
+            const max = Math.max(0, lines.length - pagerPageSize)
+
+            // Auto-close only when already at the last page — otherwise clamp
+            // to `max` so the offset matches what the line/page-back handlers
+            // can reach (prevents a snap-back jump on the next ↑/↓/PgUp).
+            return offset >= max
+              ? { ...prev, pager: null }
+              : { ...prev, pager: { ...prev.pager, offset: Math.min(offset + pagerPageSize, max) } }
           })
-        } else if (key.escape || isCtrl(key, ch, 'c') || ch === 'q') {
-          patchOverlayState({ pager: null })
         }
 
         return
@@ -232,15 +288,28 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
     }
 
     if (key.upArrow && !cState.inputBuf.length) {
-      cycleQueue(1) || cycleHistory(-1)
+      const inputSel = getInputSelection()
+      const cursor = inputSel && inputSel.start === inputSel.end ? inputSel.start : null
+      const noLineAbove =
+        !cState.input || (cursor !== null && cState.input.lastIndexOf('\n', Math.max(0, cursor - 1)) < 0)
 
-      return
+      if (noLineAbove) {
+        cycleQueue(1) || cycleHistory(-1)
+
+        return
+      }
     }
 
     if (key.downArrow && !cState.inputBuf.length) {
-      cycleQueue(-1) || cycleHistory(1)
+      const inputSel = getInputSelection()
+      const cursor = inputSel && inputSel.start === inputSel.end ? inputSel.start : null
+      const noLineBelow = !cState.input || (cursor !== null && cState.input.indexOf('\n', cursor) < 0)
 
-      return
+      if (noLineBelow || cState.historyIdx !== null) {
+        cycleQueue(-1) || cycleHistory(1)
+
+        return
+      }
     }
 
     if (isAction(key, ch, 'c')) {
diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts
index 77c2681c6c..a415d34379 100644
--- a/ui-tui/src/app/useMainApp.ts
+++ b/ui-tui/src/app/useMainApp.ts
@@ -4,7 +4,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 
 import { STARTUP_RESUME_ID } from '../config/env.js'
 import { MAX_HISTORY, WHEEL_SCROLL_STEP } from '../config/limits.js'
-import { imageTokenMeta } from '../domain/messages.js'
+import { attachedImageNotice, imageTokenMeta } from '../domain/messages.js'
 import { fmtCwdBranch } from '../domain/paths.js'
 import { type GatewayClient } from '../gatewayClient.js'
 import type {
@@ -16,6 +16,7 @@ import type {
 import { useGitBranch } from '../hooks/useGitBranch.js'
 import { useVirtualHistory } from '../hooks/useVirtualHistory.js'
 import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
+import { terminalParityHints } from '../lib/terminalParity.js'
 import { buildToolTrailLine, sameToolTrailGroup, toolTrailLabel } from '../lib/text.js'
 import type { Msg, PanelSection, SlashCatalog } from '../types.js'
 
@@ -117,6 +118,7 @@ export function useMainApp(gw: GatewayClient) {
   const onEventRef = useRef<(ev: GatewayEvent) => void>(() => {})
   const clipboardPasteRef = useRef<(quiet?: boolean) => Promise<void> | void>(() => {})
   const submitRef = useRef<(value: string) => void>(() => {})
+  const terminalHintsShownRef = useRef(new Set<string>())
   const historyItemsRef = useRef(historyItems)
   const lastUserMsgRef = useRef(lastUserMsg)
   const msgIdsRef = useRef(new WeakMap<Msg, string>())
@@ -136,12 +138,30 @@ export function useMainApp(gw: GatewayClient) {
   const composer = useComposerState({
     gw,
     onClipboardPaste: quiet => clipboardPasteRef.current(quiet),
+    onImageAttached: info => {
+      sys(attachedImageNotice(info))
+    },
     submitRef
   })
 
   const { actions: composerActions, refs: composerRefs, state: composerState } = composer
   const empty = !historyItems.some(msg => msg.kind !== 'intro')
 
+  useEffect(() => {
+    void terminalParityHints()
+      .then(hints => {
+        for (const hint of hints) {
+          if (terminalHintsShownRef.current.has(hint.key)) {
+            continue
+          }
+
+          terminalHintsShownRef.current.add(hint.key)
+          turnController.pushActivity(hint.message, hint.tone)
+        }
+      })
+      .catch(() => {})
+  }, [])
+
   const messageId = useCallback((msg: Msg) => {
     const hit = msgIdsRef.current.get(msg)
 
diff --git a/ui-tui/src/app/useSubmission.ts b/ui-tui/src/app/useSubmission.ts
index f8a40f5a08..f09dc36340 100644
--- a/ui-tui/src/app/useSubmission.ts
+++ b/ui-tui/src/app/useSubmission.ts
@@ -1,6 +1,6 @@
 import { type MutableRefObject, useCallback, useRef } from 'react'
 
-import { imageTokenMeta } from '../domain/messages.js'
+import { attachedImageNotice } from '../domain/messages.js'
 import { looksLikeSlashCommand } from '../domain/slash.js'
 import type { GatewayClient } from '../gatewayClient.js'
 import type { InputDetectDropResponse, PromptSubmitResponse, ShellExecResponse } from '../gatewayTypes.js'
@@ -83,9 +83,7 @@ export function useSubmission(opts: UseSubmissionOptions) {
           }
 
           if (r.is_image) {
-            const meta = imageTokenMeta(r)
-
-            turnController.pushActivity(`attached image: ${r.name}${meta ? ` · ${meta}` : ''}`)
+            turnController.pushActivity(attachedImageNotice(r))
           } else {
             turnController.pushActivity(`detected file: ${r.name}`)
           }
@@ -236,11 +234,11 @@ export function useSubmission(opts: UseSubmissionOptions) {
 
   const submit = useCallback(
     (value: string) => {
-      if (value.startsWith('/') && composerState.completions.length) {
+      if (composerState.completions.length) {
         const row = composerState.completions[composerState.compIdx]
 
         if (row?.text) {
-          const text = row.text.startsWith('/') && composerState.compReplace > 0 ? row.text.slice(1) : row.text
+          const text = value.startsWith('/') && row.text.startsWith('/') ? row.text.slice(1) : row.text
           const next = value.slice(0, composerState.compReplace) + text
 
           if (next !== value) {
diff --git a/ui-tui/src/components/appChrome.tsx b/ui-tui/src/components/appChrome.tsx
index da5507e28c..28f7b324e2 100644
--- a/ui-tui/src/components/appChrome.tsx
+++ b/ui-tui/src/components/appChrome.tsx
@@ -28,8 +28,7 @@ function FaceTicker({ color, startedAt }: { color: string; startedAt?: null | nu
 
   return (
     <Text color={color}>
-      {FACES[tick % FACES.length]} {VERBS[tick % VERBS.length]}…
-      {startedAt ? ` · ${fmtDuration(now - startedAt)}` : ''}
+      {FACES[tick % FACES.length]} {VERBS[tick % VERBS.length]}…{startedAt ? ` · ${fmtDuration(now - startedAt)}` : ''}
     </Text>
   )
 }
@@ -127,7 +126,11 @@ export function StatusRule({
       <Box flexShrink={1} width={leftWidth}>
         <Text color={t.color.bronze} wrap="truncate-end">
           {'─ '}
-          {busy ? <FaceTicker color={statusColor} startedAt={turnStartedAt} /> : <Text color={statusColor}>{status}</Text>}
+          {busy ? (
+            <FaceTicker color={statusColor} startedAt={turnStartedAt} />
+          ) : (
+            <Text color={statusColor}>{status}</Text>
+          )}
           <Text color={t.color.dim}> │ {model}</Text>
           {ctxLabel ? <Text color={t.color.dim}> │ {ctxLabel}</Text> : null}
           {bar ? (
diff --git a/ui-tui/src/components/appOverlays.tsx b/ui-tui/src/components/appOverlays.tsx
index 844996af3f..331fb58733 100644
--- a/ui-tui/src/components/appOverlays.tsx
+++ b/ui-tui/src/components/appOverlays.tsx
@@ -13,6 +13,8 @@ import { ApprovalPrompt, ClarifyPrompt, ConfirmPrompt } from './prompts.js'
 import { SessionPicker } from './sessionPicker.js'
 import { SkillsHub } from './skillsHub.js'
 
+const COMPLETION_WINDOW = 16
+
 export function PromptZone({
   cols,
   onApprovalChoice,
@@ -106,7 +108,12 @@ export function FloatingOverlays({
     return null
   }
 
-  const start = Math.max(0, compIdx - 8)
+  // Fixed viewport centered on compIdx — previously the slice end was
+  // compIdx + 8 so the dropdown grew from 8 rows to 16 as the user scrolled
+  // down, bouncing the height on every keystroke.
+  const viewportSize = Math.min(COMPLETION_WINDOW, completions.length)
+
+  const start = Math.max(0, Math.min(compIdx - Math.floor(COMPLETION_WINDOW / 2), completions.length - viewportSize))
 
   return (
     <Box alignItems="flex-start" bottom="100%" flexDirection="column" left={0} position="absolute" right={0}>
@@ -157,8 +164,8 @@ export function FloatingOverlays({
             <Box marginTop={1}>
               <Text color={ui.theme.color.dim}>
                 {overlay.pager.offset + pagerPageSize < overlay.pager.lines.length
-                  ? `Enter/Space for more · q to close (${Math.min(overlay.pager.offset + pagerPageSize, overlay.pager.lines.length)}/${overlay.pager.lines.length})`
-                  : `end · q to close (${overlay.pager.lines.length} lines)`}
+                  ? `↑↓/jk line · Enter/Space/PgDn page · b/PgUp back · g/G top/bottom · q close (${Math.min(overlay.pager.offset + pagerPageSize, overlay.pager.lines.length)}/${overlay.pager.lines.length})`
+                  : `end · ↑↓/jk · b/PgUp back · g top · q close (${overlay.pager.lines.length} lines)`}
               </Text>
             </Box>
           </Box>
@@ -168,7 +175,7 @@ export function FloatingOverlays({
       {!!completions.length && (
         <FloatBox color={ui.theme.color.gold}>
           <Box flexDirection="column" width={Math.max(28, cols - 6)}>
-            {completions.slice(start, compIdx + 8).map((item, i) => {
+            {completions.slice(start, start + viewportSize).map((item, i) => {
               const active = start + i === compIdx
 
               return (
diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx
index ebb3425a76..3fd1b494ac 100644
--- a/ui-tui/src/components/markdown.tsx
+++ b/ui-tui/src/components/markdown.tsx
@@ -1,100 +1,85 @@
 import { Box, Link, Text } from '@hermes/ink'
 import { memo, type ReactNode, useMemo } from 'react'
 
+import { ensureEmojiPresentation } from '../lib/emoji.js'
 import { highlightLine, isHighlightable } from '../lib/syntax.js'
 import type { Theme } from '../theme.js'
 
 const FENCE_RE = /^\s*(`{3,}|~{3,})(.*)$/
+const FENCE_CLOSE_RE = /^\s*(`{3,}|~{3,})\s*$/
 const HR_RE = /^ {0,3}([-*_])(?:\s*\1){2,}\s*$/
 const HEADING_RE = /^\s{0,3}(#{1,6})\s+(.*?)(?:\s+#+\s*)?$/
+const SETEXT_RE = /^\s{0,3}(=+|-+)\s*$/
 const FOOTNOTE_RE = /^\[\^([^\]]+)\]:\s*(.*)$/
 const DEF_RE = /^\s*:\s+(.+)$/
+const BULLET_RE = /^(\s*)[-+*]\s+(.*)$/
+const TASK_RE = /^\[( |x|X)\]\s+(.*)$/
+const NUMBERED_RE = /^(\s*)(\d+)[.)]\s+(.*)$/
+const QUOTE_RE = /^\s*(?:>\s*)+/
 const TABLE_DIVIDER_CELL_RE = /^:?-{3,}:?$/
 const MD_URL_RE = '((?:[^\\s()]|\\([^\\s()]*\\))+?)'
 
 export const MEDIA_LINE_RE = /^\s*[`"']?MEDIA:\s*(\S+?)[`"']?\s*$/
 export const AUDIO_DIRECTIVE_RE = /^\s*\[\[audio_as_voice\]\]\s*$/
 
+// Inline markdown tokens, in priority order. The outer regex picks the
+// leftmost match at each position, preferring earlier alternatives on tie —
+// so `**` must come before `*`, `__` before `_`, etc. Each pattern owns its
+// own capture groups; MdInline dispatches on which group matched.
+//
+// Subscript (`~x~`) is restricted to short alphanumeric runs so prose like
+// `thing ~! more ~?` from Kimi / Qwen / GLM (kaomoji-style decorators)
+// doesn't pair up the first `~` with the next one on the line and swallow
+// the text between them as a dim `_`-prefixed span.
 export const INLINE_RE = new RegExp(
-  `(!\\[(.*?)\\]\\(${MD_URL_RE}\\)|\\[(.+?)\\]\\(${MD_URL_RE}\\)|<((?:https?:\\/\\/|mailto:)[^>\\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})>|~~(.+?)~~|\`([^\\\`]+)\`|\\*\\*(.+?)\\*\\*|(?<!\\w)__(.+?)__(?!\\w)|\\*(.+?)\\*|(?<!\\w)_(.+?)_(?!\\w)|==(.+?)==|\\[\\^([^\\]]+)\\]|\\^([^^\\s][^^]*?)\\^|~([^~\\s][^~]*?)~|(https?:\\/\\/[^\\s<]+))`,
+  [
+    `!\\[(.*?)\\]\\(${MD_URL_RE}\\)`, // 1,2  image
+    `\\[(.+?)\\]\\(${MD_URL_RE}\\)`, // 3,4  link
+    `<((?:https?:\\/\\/|mailto:)[^>\\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})>`, // 5   autolink
+    `~~(.+?)~~`, // 6    strike
+    `\`([^\\\`]+)\``, // 7    code
+    `\\*\\*(.+?)\\*\\*`, // 8    bold *
+    `(?<!\\w)__(.+?)__(?!\\w)`, // 9    bold _
+    `\\*(.+?)\\*`, // 10   italic *
+    `(?<!\\w)_(.+?)_(?!\\w)`, // 11   italic _
+    `==(.+?)==`, // 12   highlight
+    `\\[\\^([^\\]]+)\\]`, // 13   footnote ref
+    `\\^([^^\\s][^^]*?)\\^`, // 14   superscript
+    `~([A-Za-z0-9]{1,8})~`, // 15   subscript
+    `https?:\\/\\/[^\\s<]+` //  16   bare URL
+  ].join('|'),
   'g'
 )
 
-type Fence = {
-  char: '`' | '~'
-  lang: string
-  len: number
-}
+const indentDepth = (s: string) => Math.floor(s.replace(/\t/g, '  ').length / 2)
 
-const renderLink = (key: number, t: Theme, label: string, url: string) => (
-  <Link key={key} url={url}>
-    <Text color={t.color.amber} underline>
-      {label}
-    </Text>
-  </Link>
-)
-
-const trimBareUrl = (value: string) => {
-  const trimmed = value.replace(/[),.;:!?]+$/g, '')
-
-  return {
-    tail: value.slice(trimmed.length),
-    url: trimmed
-  }
-}
-
-const renderAutolink = (key: number, t: Theme, raw: string) => {
-  const url = raw.startsWith('mailto:') ? raw : raw.includes('@') && !raw.startsWith('http') ? `mailto:${raw}` : raw
-
-  return (
-    <Link key={key} url={url}>
-      <Text color={t.color.amber} underline>
-        {raw.replace(/^mailto:/, '')}
-      </Text>
-    </Link>
-  )
-}
-
-const indentDepth = (indent: string) => Math.floor(indent.replace(/\t/g, '  ').length / 2)
-
-const parseFence = (line: string): Fence | null => {
-  const m = line.match(FENCE_RE)
-
-  if (!m) {
-    return null
-  }
-
-  return {
-    char: m[1]![0] as '`' | '~',
-    lang: m[2]!.trim().toLowerCase(),
-    len: m[1]!.length
-  }
-}
-
-const isFenceClose = (line: string, fence: Fence) => {
-  const end = line.match(/^\s*(`{3,}|~{3,})\s*$/)
-
-  return Boolean(end && end[1]![0] === fence.char && end[1]!.length >= fence.len)
-}
-
-const isMarkdownFence = (lang: string) => ['md', 'markdown'].includes(lang)
-
-const splitTableRow = (row: string) =>
+const splitRow = (row: string) =>
   row
     .trim()
     .replace(/^\|/, '')
     .replace(/\|$/, '')
     .split('|')
-    .map(cell => cell.trim())
+    .map(c => c.trim())
 
 const isTableDivider = (row: string) => {
-  const cells = splitTableRow(row)
+  const cells = splitRow(row)
 
-  return cells.length > 1 && cells.every(cell => TABLE_DIVIDER_CELL_RE.test(cell))
+  return cells.length > 1 && cells.every(c => TABLE_DIVIDER_CELL_RE.test(c))
 }
 
-export const stripInlineMarkup = (value: string) =>
-  value
+const autolinkUrl = (raw: string) =>
+  raw.startsWith('mailto:') || raw.startsWith('http') || !raw.includes('@') ? raw : `mailto:${raw}`
+
+const renderAutolink = (k: number, t: Theme, raw: string) => (
+  <Link key={k} url={autolinkUrl(raw)}>
+    <Text color={t.color.amber} underline>
+      {raw.replace(/^mailto:/, '')}
+    </Text>
+  </Link>
+)
+
+export const stripInlineMarkup = (v: string) =>
+  v
     .replace(/!\[(.*?)\]\(((?:[^\s()]|\([^\s()]*\))+?)\)/g, '[image: $1] $2')
     .replace(/\[(.+?)\]\(((?:[^\s()]|\([^\s()]*\))+?)\)/g, '$1')
     .replace(/<((?:https?:\/\/|mailto:)[^>\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})>/g, '$1')
@@ -107,27 +92,22 @@ export const stripInlineMarkup = (value: string) =>
     .replace(/==(.+?)==/g, '$1')
     .replace(/\[\^([^\]]+)\]/g, '[$1]')
     .replace(/\^([^^\s][^^]*?)\^/g, '^$1')
-    .replace(/~([^~\s][^~]*?)~/g, '_$1')
+    .replace(/~([A-Za-z0-9]{1,8})~/g, '_$1')
 
-const renderTable = (key: number, rows: string[][], t: Theme) => {
+const renderTable = (k: number, rows: string[][], t: Theme) => {
   const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => stripInlineMarkup(r[ci] ?? '').length)))
 
   return (
-    <Box flexDirection="column" key={key} paddingLeft={2}>
+    <Box flexDirection="column" key={k} paddingLeft={2}>
       {rows.map((row, ri) => (
         <Box key={ri}>
-          {widths.map((width, ci) => {
-            const cell = row[ci] ?? ''
-            const pad = ' '.repeat(Math.max(0, width - stripInlineMarkup(cell).length))
-
-            return (
-              <Text color={ri === 0 ? t.color.amber : undefined} key={ci}>
-                <MdInline t={t} text={cell} />
-                {pad}
-                {ci < widths.length - 1 ? '  ' : ''}
-              </Text>
-            )
-          })}
+          {widths.map((w, ci) => (
+            <Text color={ri === 0 ? t.color.amber : undefined} key={ci}>
+              <MdInline t={t} text={row[ci] ?? ''} />
+              {' '.repeat(Math.max(0, w - stripInlineMarkup(row[ci] ?? '').length))}
+              {ci < widths.length - 1 ? '  ' : ''}
+            </Text>
+          ))}
         </Box>
       ))}
     </Box>
@@ -141,76 +121,85 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
 
   for (const m of text.matchAll(INLINE_RE)) {
     const i = m.index ?? 0
+    const k = parts.length
 
     if (i > last) {
-      parts.push(<Text key={parts.length}>{text.slice(last, i)}</Text>)
+      parts.push(<Text key={k}>{text.slice(last, i)}</Text>)
     }
 
-    if (m[2] && m[3]) {
+    if (m[1] && m[2]) {
       parts.push(
         <Text color={t.color.dim} key={parts.length}>
-          [image: {m[2]}] {m[3]}
+          [image: {m[1]}] {m[2]}
         </Text>
       )
-    } else if (m[4] && m[5]) {
-      parts.push(renderLink(parts.length, t, m[4], m[5]))
+    } else if (m[3] && m[4]) {
+      parts.push(
+        <Link key={parts.length} url={m[4]}>
+          <Text color={t.color.amber} underline>
+            {m[3]}
+          </Text>
+        </Link>
+      )
+    } else if (m[5]) {
+      parts.push(renderAutolink(parts.length, t, m[5]))
     } else if (m[6]) {
-      parts.push(renderAutolink(parts.length, t, m[6]))
-    } else if (m[7]) {
       parts.push(
         <Text key={parts.length} strikethrough>
+          {m[6]}
+        </Text>
+      )
+    } else if (m[7]) {
+      parts.push(
+        <Text color={t.color.amber} dimColor key={parts.length}>
           {m[7]}
         </Text>
       )
-    } else if (m[8]) {
-      parts.push(
-        <Text color={t.color.amber} dimColor key={parts.length}>
-          {m[8]}
-        </Text>
-      )
-    } else if (m[9] || m[10]) {
+    } else if (m[8] ?? m[9]) {
       parts.push(
         <Text bold key={parts.length}>
-          {m[9] ?? m[10]}
+          {m[8] ?? m[9]}
         </Text>
       )
-    } else if (m[11] || m[12]) {
+    } else if (m[10] ?? m[11]) {
       parts.push(
         <Text italic key={parts.length}>
-          {m[11] ?? m[12]}
+          {m[10] ?? m[11]}
+        </Text>
+      )
+    } else if (m[12]) {
+      parts.push(
+        <Text backgroundColor={t.color.diffAdded} color={t.color.diffAddedWord} key={parts.length}>
+          {m[12]}
         </Text>
       )
     } else if (m[13]) {
       parts.push(
-        <Text backgroundColor={t.color.diffAdded} color={t.color.diffAddedWord} key={parts.length}>
-          {m[13]}
+        <Text color={t.color.dim} key={parts.length}>
+          [{m[13]}]
         </Text>
       )
     } else if (m[14]) {
       parts.push(
         <Text color={t.color.dim} key={parts.length}>
-          [{m[14]}]
+          ^{m[14]}
         </Text>
       )
     } else if (m[15]) {
       parts.push(
         <Text color={t.color.dim} key={parts.length}>
-          ^{m[15]}
+          _{m[15]}
         </Text>
       )
     } else if (m[16]) {
-      parts.push(
-        <Text color={t.color.dim} key={parts.length}>
-          _{m[16]}
-        </Text>
-      )
-    } else if (m[17]) {
-      const { tail, url } = trimBareUrl(m[17])
+      // Bare URL — trim trailing prose punctuation into a sibling text node
+      // so `see https://x.com/, which…` keeps the comma outside the link.
+      const url = m[16].replace(/[),.;:!?]+$/g, '')
 
       parts.push(renderAutolink(parts.length, t, url))
 
-      if (tail) {
-        parts.push(<Text key={parts.length}>{tail}</Text>)
+      if (url.length < m[16].length) {
+        parts.push(<Text key={parts.length}>{m[16].slice(url.length)}</Text>)
       }
     }
 
@@ -224,19 +213,13 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
   return <Text>{parts.length ? parts : <Text>{text}</Text>}</Text>
 }
 
-interface MdProps {
-  compact?: boolean
-  t: Theme
-  text: string
-}
-
 function MdImpl({ compact, t, text }: MdProps) {
   const nodes = useMemo(() => {
-    const lines = text.split('\n')
+    const lines = ensureEmojiPresentation(text).split('\n')
     const nodes: ReactNode[] = []
-    let i = 0
 
-    let prevKind: 'blank' | 'code' | 'heading' | 'list' | 'paragraph' | 'quote' | 'rule' | 'table' | null = null
+    let prevKind: Kind = null
+    let i = 0
 
     const gap = () => {
       if (nodes.length && prevKind !== 'blank') {
@@ -245,7 +228,7 @@ function MdImpl({ compact, t, text }: MdProps) {
       }
     }
 
-    const start = (kind: Exclude<typeof prevKind, null | 'blank'>) => {
+    const start = (kind: Exclude<Kind, null | 'blank'>) => {
       if (prevKind && prevKind !== 'blank' && prevKind !== kind) {
         gap()
       }
@@ -257,14 +240,11 @@ function MdImpl({ compact, t, text }: MdProps) {
       const line = lines[i]!
       const key = nodes.length
 
-      if (compact && !line.trim()) {
-        i++
-
-        continue
-      }
-
       if (!line.trim()) {
-        gap()
+        if (!compact) {
+          gap()
+        }
+
         i++
 
         continue
@@ -276,20 +256,17 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const media = line.match(MEDIA_LINE_RE)
+      const media = line.match(MEDIA_LINE_RE)?.[1]
 
       if (media) {
         start('paragraph')
-
-        const path = media[1]!
-        const url = /^(?:\/|[a-z]:[\\/])/i.test(path) ? `file://${path}` : path
-
         nodes.push(
           <Text color={t.color.dim} key={key}>
             {'▸ '}
-            <Link url={url}>
+
+            <Link url={/^(?:\/|[a-z]:[\\/])/i.test(media) ? `file://${media}` : media}>
               <Text color={t.color.amber} underline>
-                {path}
+                {media}
               </Text>
             </Link>
           </Text>
@@ -299,13 +276,21 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const fence = parseFence(line)
+      const fence = line.match(FENCE_RE)
 
       if (fence) {
+        const char = fence[1]![0] as '`' | '~'
+        const len = fence[1]!.length
+        const lang = fence[2]!.trim().toLowerCase()
         const block: string[] = []
-        const lang = fence.lang
 
-        for (i++; i < lines.length && !isFenceClose(lines[i]!, fence); i++) {
+        for (i++; i < lines.length; i++) {
+          const close = lines[i]!.match(FENCE_CLOSE_RE)?.[1]
+
+          if (close && close[0] === char && close.length >= len) {
+            break
+          }
+
           block.push(lines[i]!)
         }
 
@@ -313,7 +298,7 @@ function MdImpl({ compact, t, text }: MdProps) {
           i++
         }
 
-        if (isMarkdownFence(lang)) {
+        if (['md', 'markdown'].includes(lang)) {
           start('paragraph')
           nodes.push(<Md compact={compact} key={key} t={t} text={block.join('\n')} />)
 
@@ -328,17 +313,18 @@ function MdImpl({ compact, t, text }: MdProps) {
         nodes.push(
           <Box flexDirection="column" key={key} paddingLeft={2}>
             {lang && !isDiff && <Text color={t.color.dim}>{'─ ' + lang}</Text>}
+
             {block.map((l, j) => {
               if (highlighted) {
                 return (
                   <Text key={j}>
-                    {highlightLine(l, lang, t).map(([color, text], k) =>
+                    {highlightLine(l, lang, t).map(([color, text], kk) =>
                       color ? (
-                        <Text color={color} key={k}>
+                        <Text color={color} key={kk}>
                           {text}
                         </Text>
                       ) : (
-                        <Text key={k}>{text}</Text>
+                        <Text key={kk}>{text}</Text>
                       )
                     )}
                   </Text>
@@ -384,6 +370,7 @@ function MdImpl({ compact, t, text }: MdProps) {
         nodes.push(
           <Box flexDirection="column" key={key} paddingLeft={2}>
             <Text color={t.color.dim}>─ math</Text>
+
             {block.map((l, j) => (
               <Text color={t.color.amber} key={j}>
                 {l}
@@ -395,13 +382,13 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const heading = line.match(HEADING_RE)
+      const heading = line.match(HEADING_RE)?.[2]
 
       if (heading) {
         start('heading')
         nodes.push(
           <Text bold color={t.color.amber} key={key}>
-            {heading[2]}
+            {heading}
           </Text>
         )
         i++
@@ -409,20 +396,16 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      if (i + 1 < lines.length && line.trim()) {
-        const setext = lines[i + 1]!.match(/^\s{0,3}(=+|-+)\s*$/)
+      if (i + 1 < lines.length && SETEXT_RE.test(lines[i + 1]!)) {
+        start('heading')
+        nodes.push(
+          <Text bold color={t.color.amber} key={key}>
+            {line.trim()}
+          </Text>
+        )
+        i += 2
 
-        if (setext) {
-          start('heading')
-          nodes.push(
-            <Text bold color={t.color.amber} key={key}>
-              {line.trim()}
-            </Text>
-          )
-          i += 2
-
-          continue
-        }
+        continue
       }
 
       if (HR_RE.test(line)) {
@@ -472,7 +455,7 @@ function MdImpl({ compact, t, text }: MdProps) {
         i++
 
         while (i < lines.length) {
-          const def = lines[i]!.match(DEF_RE)
+          const def = lines[i]!.match(DEF_RE)?.[1]
 
           if (!def) {
             break
@@ -481,7 +464,7 @@ function MdImpl({ compact, t, text }: MdProps) {
           nodes.push(
             <Text key={`${key}-def-${i}`}>
               <Text color={t.color.dim}> · </Text>
-              <MdInline t={t} text={def[1]!} />
+              <MdInline t={t} text={def} />
             </Text>
           )
           i++
@@ -490,22 +473,22 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const bullet = line.match(/^(\s*)[-+*]\s+(.*)$/)
+      const bullet = line.match(BULLET_RE)
 
       if (bullet) {
         start('list')
-        const depth = indentDepth(bullet[1]!)
-        const task = bullet[2]!.match(/^\[( |x|X)\]\s+(.*)$/)
+
+        const task = bullet[2]!.match(TASK_RE)
         const marker = task ? (task[1]!.toLowerCase() === 'x' ? '☑' : '☐') : '•'
-        const body = task ? task[2]! : bullet[2]!
 
         nodes.push(
           <Text key={key}>
             <Text color={t.color.dim}>
-              {' '.repeat(depth * 2)}
+              {' '.repeat(indentDepth(bullet[1]!) * 2)}
               {marker}{' '}
             </Text>
-            <MdInline t={t} text={body} />
+
+            <MdInline t={t} text={task ? task[2]! : bullet[2]!} />
           </Text>
         )
         i++
@@ -513,18 +496,17 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const numbered = line.match(/^(\s*)(\d+)[.)]\s+(.*)$/)
+      const numbered = line.match(NUMBERED_RE)
 
       if (numbered) {
         start('list')
-        const depth = indentDepth(numbered[1]!)
-
         nodes.push(
           <Text key={key}>
             <Text color={t.color.dim}>
-              {' '.repeat(depth * 2)}
+              {' '.repeat(indentDepth(numbered[1]!) * 2)}
               {numbered[2]}.{' '}
             </Text>
+
             <MdInline t={t} text={numbered[3]!} />
           </Text>
         )
@@ -533,18 +515,15 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      if (/^\s*(?:>\s*)+/.test(line)) {
+      if (QUOTE_RE.test(line)) {
         start('quote')
+
         const quoteLines: Array<{ depth: number; text: string }> = []
 
-        while (i < lines.length && /^\s*(?:>\s*)+/.test(lines[i]!)) {
-          const raw = lines[i]!
-          const prefix = raw.match(/^\s*(?:>\s*)+/)?.[0] ?? ''
+        while (i < lines.length && QUOTE_RE.test(lines[i]!)) {
+          const prefix = lines[i]!.match(QUOTE_RE)?.[0] ?? ''
 
-          quoteLines.push({
-            depth: (prefix.match(/>/g) ?? []).length,
-            text: raw.slice(prefix.length)
-          })
+          quoteLines.push({ depth: (prefix.match(/>/g) ?? []).length, text: lines[i]!.slice(prefix.length) })
           i++
         }
 
@@ -565,34 +544,31 @@ function MdImpl({ compact, t, text }: MdProps) {
 
       if (line.includes('|') && i + 1 < lines.length && isTableDivider(lines[i + 1]!)) {
         start('table')
-        const tableRows: string[][] = []
 
-        tableRows.push(splitTableRow(line))
-        i += 2
+        const rows: string[][] = [splitRow(line)]
 
-        while (i < lines.length && lines[i]!.includes('|') && lines[i]!.trim()) {
-          tableRows.push(splitTableRow(lines[i]!))
-          i++
+        for (i += 2; i < lines.length && lines[i]!.includes('|') && lines[i]!.trim(); i++) {
+          rows.push(splitRow(lines[i]!))
         }
 
-        nodes.push(renderTable(key, tableRows, t))
+        nodes.push(renderTable(key, rows, t))
 
         continue
       }
 
-      if (/^<details\b/i.test(line) || /^<\/details>/i.test(line)) {
+      if (/^<\/?details\b/i.test(line)) {
         i++
 
         continue
       }
 
-      const summary = line.match(/^<summary>(.*?)<\/summary>$/i)
+      const summary = line.match(/^<summary>(.*?)<\/summary>$/i)?.[1]
 
       if (summary) {
         start('paragraph')
         nodes.push(
           <Text color={t.color.dim} key={key}>
-            ▶ {summary[1]}
+            ▶ {summary}
           </Text>
         )
         i++
@@ -614,20 +590,21 @@ function MdImpl({ compact, t, text }: MdProps) {
 
       if (line.includes('|') && line.trim().startsWith('|')) {
         start('table')
-        const tableRows: string[][] = []
+
+        const rows: string[][] = []
 
         while (i < lines.length && lines[i]!.trim().startsWith('|')) {
           const row = lines[i]!.trim()
 
           if (!/^[|\s:-]+$/.test(row)) {
-            tableRows.push(splitTableRow(row))
+            rows.push(splitRow(row))
           }
 
           i++
         }
 
-        if (tableRows.length) {
-          nodes.push(renderTable(key, tableRows, t))
+        if (rows.length) {
+          nodes.push(renderTable(key, rows, t))
         }
 
         continue
@@ -635,7 +612,6 @@ function MdImpl({ compact, t, text }: MdProps) {
 
       start('paragraph')
       nodes.push(<MdInline key={key} t={t} text={line} />)
-
       i++
     }
 
@@ -646,3 +622,11 @@ function MdImpl({ compact, t, text }: MdProps) {
 }
 
 export const Md = memo(MdImpl)
+
+type Kind = 'blank' | 'code' | 'heading' | 'list' | 'paragraph' | 'quote' | 'rule' | 'table' | null
+
+interface MdProps {
+  compact?: boolean
+  t: Theme
+  text: string
+}
diff --git a/ui-tui/src/components/modelPicker.tsx b/ui-tui/src/components/modelPicker.tsx
index 5ee19e407c..7927f3b736 100644
--- a/ui-tui/src/components/modelPicker.tsx
+++ b/ui-tui/src/components/modelPicker.tsx
@@ -1,4 +1,4 @@
-import { Box, Text, useInput } from '@hermes/ink'
+import { Box, Text, useInput, useStdout } from '@hermes/ink'
 import { useEffect, useMemo, useState } from 'react'
 
 import { providerDisplayNames } from '../domain/providers.js'
@@ -8,6 +8,8 @@ import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
 import type { Theme } from '../theme.js'
 
 const VISIBLE = 12
+const MIN_WIDTH = 40
+const MAX_WIDTH = 90
 
 const pageOffset = (count: number, sel: number) => Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), count - VISIBLE))
 
@@ -27,6 +29,13 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
   const [modelIdx, setModelIdx] = useState(0)
   const [stage, setStage] = useState<'model' | 'provider'>('provider')
 
+  const { stdout } = useStdout()
+  // Pin the picker to a stable width so the FloatBox parent (which shrinks-
+  // to-fit with alignSelf="flex-start") doesn't resize as long provider /
+  // model names scroll into view, and so `wrap="truncate-end"` on each row
+  // has an actual constraint to truncate against.
+  const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
+
   useEffect(() => {
     gw.request<ModelOptionsResponse>('model.options', sessionId ? { session_id: sessionId } : {})
       .then(raw => {
@@ -168,32 +177,53 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
     const { items, off } = visibleItems(rows, providerIdx)
 
     return (
-      <Box flexDirection="column">
-        <Text bold color={t.color.amber}>
+      <Box flexDirection="column" width={width}>
+        <Text bold color={t.color.amber} wrap="truncate-end">
           Select Provider
         </Text>
 
-        <Text color={t.color.dim}>Current model: {currentModel || '(unknown)'}</Text>
-        {provider?.warning ? <Text color={t.color.label}>warning: {provider.warning}</Text> : null}
-        {off > 0 && <Text color={t.color.dim}> ↑ {off} more</Text>}
+        <Text color={t.color.dim} wrap="truncate-end">
+          Current model: {currentModel || '(unknown)'}
+        </Text>
+        <Text color={t.color.label} wrap="truncate-end">
+          {provider?.warning ? `warning: ${provider.warning}` : ' '}
+        </Text>
+        <Text color={t.color.dim} wrap="truncate-end">
+          {off > 0 ? ` ↑ ${off} more` : ' '}
+        </Text>
 
-        {items.map((row, i) => {
+        {Array.from({ length: VISIBLE }, (_, i) => {
+          const row = items[i]
           const idx = off + i
 
-          return (
+          return row ? (
             <Text
-              color={providerIdx === idx ? t.color.cornsilk : t.color.dim}
+              bold={providerIdx === idx}
+              color={providerIdx === idx ? t.color.amber : t.color.dim}
+              inverse={providerIdx === idx}
               key={providers[idx]?.slug ?? `row-${idx}`}
+              wrap="truncate-end"
             >
               {providerIdx === idx ? '▸ ' : '  '}
               {i + 1}. {row}
             </Text>
+          ) : (
+            <Text color={t.color.dim} key={`pad-${i}`} wrap="truncate-end">
+              {' '}
+            </Text>
           )
         })}
 
-        {off + VISIBLE < rows.length && <Text color={t.color.dim}> ↓ {rows.length - off - VISIBLE} more</Text>}
-        <Text color={t.color.dim}>persist: {persistGlobal ? 'global' : 'session'} · g toggle</Text>
-        <Text color={t.color.dim}>↑/↓ select · Enter choose · 1-9,0 quick · Esc cancel</Text>
+        <Text color={t.color.dim} wrap="truncate-end">
+          {off + VISIBLE < rows.length ? ` ↓ ${rows.length - off - VISIBLE} more` : ' '}
+        </Text>
+
+        <Text color={t.color.dim} wrap="truncate-end">
+          persist: {persistGlobal ? 'global' : 'session'} · g toggle
+        </Text>
+        <Text color={t.color.dim} wrap="truncate-end">
+          ↑/↓ select · Enter choose · 1-9,0 quick · Esc cancel
+        </Text>
       </Box>
     )
   }
@@ -201,23 +231,44 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
   const { items, off } = visibleItems(models, modelIdx)
 
   return (
-    <Box flexDirection="column">
-      <Text bold color={t.color.amber}>
+    <Box flexDirection="column" width={width}>
+      <Text bold color={t.color.amber} wrap="truncate-end">
         Select Model
       </Text>
 
-      <Text color={t.color.dim}>{names[providerIdx] || '(unknown provider)'}</Text>
-      {!models.length ? <Text color={t.color.dim}>no models listed for this provider</Text> : null}
-      {provider?.warning ? <Text color={t.color.label}>warning: {provider.warning}</Text> : null}
-      {off > 0 && <Text color={t.color.dim}> ↑ {off} more</Text>}
+      <Text color={t.color.dim} wrap="truncate-end">
+        {names[providerIdx] || '(unknown provider)'}
+      </Text>
+      <Text color={t.color.label} wrap="truncate-end">
+        {provider?.warning ? `warning: ${provider.warning}` : ' '}
+      </Text>
+      <Text color={t.color.dim} wrap="truncate-end">
+        {off > 0 ? ` ↑ ${off} more` : ' '}
+      </Text>
 
-      {items.map((row, i) => {
+      {Array.from({ length: VISIBLE }, (_, i) => {
+        const row = items[i]
         const idx = off + i
 
+        if (!row) {
+          return !models.length && i === 0 ? (
+            <Text color={t.color.dim} key="empty" wrap="truncate-end">
+              no models listed for this provider
+            </Text>
+          ) : (
+            <Text color={t.color.dim} key={`pad-${i}`} wrap="truncate-end">
+              {' '}
+            </Text>
+          )
+        }
+
         return (
           <Text
-            color={modelIdx === idx ? t.color.cornsilk : t.color.dim}
+            bold={modelIdx === idx}
+            color={modelIdx === idx ? t.color.amber : t.color.dim}
+            inverse={modelIdx === idx}
             key={`${provider?.slug ?? 'prov'}:${idx}:${row}`}
+            wrap="truncate-end"
           >
             {modelIdx === idx ? '▸ ' : '  '}
             {i + 1}. {row}
@@ -225,9 +276,14 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
         )
       })}
 
-      {off + VISIBLE < models.length && <Text color={t.color.dim}> ↓ {models.length - off - VISIBLE} more</Text>}
-      <Text color={t.color.dim}>persist: {persistGlobal ? 'global' : 'session'} · g toggle</Text>
-      <Text color={t.color.dim}>
+      <Text color={t.color.dim} wrap="truncate-end">
+        {off + VISIBLE < models.length ? ` ↓ ${models.length - off - VISIBLE} more` : ' '}
+      </Text>
+
+      <Text color={t.color.dim} wrap="truncate-end">
+        persist: {persistGlobal ? 'global' : 'session'} · g toggle
+      </Text>
+      <Text color={t.color.dim} wrap="truncate-end">
         {models.length ? '↑/↓ select · Enter switch · 1-9,0 quick · Esc back' : 'Enter/Esc back'}
       </Text>
     </Box>
diff --git a/ui-tui/src/components/prompts.tsx b/ui-tui/src/components/prompts.tsx
index 967634d41f..1be68da178 100644
--- a/ui-tui/src/components/prompts.tsx
+++ b/ui-tui/src/components/prompts.tsx
@@ -1,11 +1,11 @@
 import { Box, Text, useInput } from '@hermes/ink'
 import { useState } from 'react'
 
+import { isMac } from '../lib/platform.js'
 import type { Theme } from '../theme.js'
 import type { ApprovalReq, ClarifyReq, ConfirmReq } from '../types.js'
 
 import { TextInput } from './textInput.js'
-import { isMac } from '../lib/platform.js'
 
 const OPTS = ['once', 'session', 'always', 'deny'] as const
 const LABELS = { always: 'Always allow', deny: 'Deny', once: 'Allow once', session: 'Allow this session' } as const
@@ -64,8 +64,8 @@ export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) {
 
       {OPTS.map((o, i) => (
         <Text key={o}>
-          <Text color={sel === i ? t.color.warn : t.color.dim}>{sel === i ? '▸ ' : '  '}</Text>
-          <Text color={sel === i ? t.color.cornsilk : t.color.dim}>
+          <Text bold={sel === i} color={sel === i ? t.color.warn : t.color.dim} inverse={sel === i}>
+            {sel === i ? '▸ ' : '  '}
             {i + 1}. {LABELS[o]}
           </Text>
         </Text>
@@ -130,7 +130,8 @@ export function ClarifyPrompt({ cols = 80, onAnswer, onCancel, req, t }: Clarify
         </Box>
 
         <Text color={t.color.dim}>
-          Enter send · Esc {choices.length ? 'back' : 'cancel'} · {isMac ? 'Cmd+C copy · Cmd+V paste · Ctrl+C cancel' : 'Ctrl+C cancel'}
+          Enter send · Esc {choices.length ? 'back' : 'cancel'} ·{' '}
+          {isMac ? 'Cmd+C copy · Cmd+V paste · Ctrl+C cancel' : 'Ctrl+C cancel'}
         </Text>
       </Box>
     )
@@ -142,8 +143,8 @@ export function ClarifyPrompt({ cols = 80, onAnswer, onCancel, req, t }: Clarify
 
       {[...choices, 'Other (type your answer)'].map((c, i) => (
         <Text key={i}>
-          <Text color={sel === i ? t.color.label : t.color.dim}>{sel === i ? '▸ ' : '  '}</Text>
-          <Text color={sel === i ? t.color.cornsilk : t.color.dim}>
+          <Text bold={sel === i} color={sel === i ? t.color.label : t.color.dim} inverse={sel === i}>
+            {sel === i ? '▸ ' : '  '}
             {i + 1}. {c}
           </Text>
         </Text>
diff --git a/ui-tui/src/components/sessionPicker.tsx b/ui-tui/src/components/sessionPicker.tsx
index 905fa707e3..c840782399 100644
--- a/ui-tui/src/components/sessionPicker.tsx
+++ b/ui-tui/src/components/sessionPicker.tsx
@@ -1,4 +1,4 @@
-import { Box, Text, useInput } from '@hermes/ink'
+import { Box, Text, useInput, useStdout } from '@hermes/ink'
 import { useEffect, useState } from 'react'
 
 import type { GatewayClient } from '../gatewayClient.js'
@@ -7,6 +7,8 @@ import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
 import type { Theme } from '../theme.js'
 
 const VISIBLE = 15
+const MIN_WIDTH = 60
+const MAX_WIDTH = 120
 
 const age = (ts: number) => {
   const d = (Date.now() / 1000 - ts) / 86400
@@ -28,6 +30,9 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
   const [sel, setSel] = useState(0)
   const [loading, setLoading] = useState(true)
 
+  const { stdout } = useStdout()
+  const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
+
   useEffect(() => {
     gw.request<SessionListResponse>('session.list', { limit: 20 })
       .then(raw => {
@@ -99,7 +104,7 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
   const off = Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), items.length - VISIBLE))
 
   return (
-    <Box flexDirection="column">
+    <Box flexDirection="column" width={width}>
       <Text bold color={t.color.amber}>
         Resume Session
       </Text>
@@ -108,24 +113,29 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
 
       {items.slice(off, off + VISIBLE).map((s, vi) => {
         const i = off + vi
+        const selected = sel === i
 
         return (
           <Box key={s.id}>
-            <Text color={sel === i ? t.color.label : t.color.dim}>{sel === i ? '▸ ' : '  '}</Text>
+            <Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected}>
+              {selected ? '▸ ' : '  '}
+            </Text>
 
             <Box width={30}>
-              <Text color={sel === i ? t.color.cornsilk : t.color.dim}>
+              <Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected}>
                 {String(i + 1).padStart(2)}. [{s.id}]
               </Text>
             </Box>
 
             <Box width={30}>
-              <Text color={t.color.dim}>
+              <Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected}>
                 ({s.message_count} msgs, {age(s.started_at)}, {s.source || 'tui'})
               </Text>
             </Box>
 
-            <Text color={sel === i ? t.color.cornsilk : t.color.dim}>{s.title || s.preview || '(untitled)'}</Text>
+            <Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected} wrap="truncate-end">
+              {s.title || s.preview || '(untitled)'}
+            </Text>
           </Box>
         )
       })}
diff --git a/ui-tui/src/components/skillsHub.tsx b/ui-tui/src/components/skillsHub.tsx
index 877bb0ef38..1bff92c0c8 100644
--- a/ui-tui/src/components/skillsHub.tsx
+++ b/ui-tui/src/components/skillsHub.tsx
@@ -1,4 +1,4 @@
-import { Box, Text, useInput } from '@hermes/ink'
+import { Box, Text, useInput, useStdout } from '@hermes/ink'
 import { useEffect, useState } from 'react'
 
 import type { GatewayClient } from '../gatewayClient.js'
@@ -6,6 +6,8 @@ import { rpcErrorMessage } from '../lib/rpc.js'
 import type { Theme } from '../theme.js'
 
 const VISIBLE = 12
+const MIN_WIDTH = 40
+const MAX_WIDTH = 90
 
 const pageOffset = (count: number, sel: number) => Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), count - VISIBLE))
 
@@ -26,6 +28,9 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
   const [err, setErr] = useState('')
   const [loading, setLoading] = useState(true)
 
+  const { stdout } = useStdout()
+  const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
+
   useEffect(() => {
     gw.request<{ skills?: Record<string, string[]> }>('skills.manage', { action: 'list' })
       .then(r => {
@@ -186,7 +191,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
 
   if (err && stage === 'category') {
     return (
-      <Box flexDirection="column">
+      <Box flexDirection="column" width={width}>
         <Text color={t.color.label}>error: {err}</Text>
         <Text color={t.color.dim}>Esc to cancel</Text>
       </Box>
@@ -195,7 +200,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
 
   if (!cats.length) {
     return (
-      <Box flexDirection="column">
+      <Box flexDirection="column" width={width}>
         <Text color={t.color.dim}>no skills available</Text>
         <Text color={t.color.dim}>Esc to cancel</Text>
       </Box>
@@ -207,7 +212,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
     const { items, off } = visibleItems(rows, catIdx)
 
     return (
-      <Box flexDirection="column">
+      <Box flexDirection="column" width={width}>
         <Text bold color={t.color.amber}>
           Skills Hub
         </Text>
@@ -219,7 +224,13 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
           const idx = off + i
 
           return (
-            <Text color={catIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+            <Text
+              bold={catIdx === idx}
+              color={catIdx === idx ? t.color.amber : t.color.dim}
+              inverse={catIdx === idx}
+              key={row}
+              wrap="truncate-end"
+            >
               {catIdx === idx ? '▸ ' : '  '}
               {i + 1}. {row}
             </Text>
@@ -236,7 +247,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
     const { items, off } = visibleItems(skills, skillIdx)
 
     return (
-      <Box flexDirection="column">
+      <Box flexDirection="column" width={width}>
         <Text bold color={t.color.amber}>
           {selectedCat}
         </Text>
@@ -249,7 +260,13 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
           const idx = off + i
 
           return (
-            <Text color={skillIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+            <Text
+              bold={skillIdx === idx}
+              color={skillIdx === idx ? t.color.amber : t.color.dim}
+              inverse={skillIdx === idx}
+              key={row}
+              wrap="truncate-end"
+            >
               {skillIdx === idx ? '▸ ' : '  '}
               {i + 1}. {row}
             </Text>
@@ -265,7 +282,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
   }
 
   return (
-    <Box flexDirection="column">
+    <Box flexDirection="column" width={width}>
       <Text bold color={t.color.amber}>
         {info?.name ?? skillName}
       </Text>
diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index 34ae5b7980..d5380faa2c 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -4,7 +4,7 @@ import { useEffect, useMemo, useRef, useState } from 'react'
 
 import { setInputSelection } from '../app/inputSelectionStore.js'
 import { readClipboardText, writeClipboardText } from '../lib/clipboard.js'
-import { isActionMod, isMac } from '../lib/platform.js'
+import { isActionMod, isMac, isMacActionFallback } from '../lib/platform.js'
 
 type InkExt = typeof Ink & {
   stringWidth: (s: string) => number
@@ -134,6 +134,39 @@ function wordRight(s: string, p: number) {
   return i
 }
 
+/**
+ * Move cursor one logical line up or down inside `s` while preserving the
+ * column offset from the current line's start. Returns `null` when the cursor
+ * is already on the first line (up) or last line (down) — callers use that
+ * signal to fall through to history cycling instead of eating the arrow key.
+ */
+export function lineNav(s: string, p: number, dir: -1 | 1): null | number {
+  const pos = snapPos(s, p)
+  const curStart = s.lastIndexOf('\n', pos - 1) + 1
+  const col = pos - curStart
+
+  if (dir < 0) {
+    if (curStart === 0) {
+      return null
+    }
+
+    const prevStart = s.lastIndexOf('\n', curStart - 2) + 1
+
+    return snapPos(s, Math.min(prevStart + col, curStart - 1))
+  }
+
+  const nextBreak = s.indexOf('\n', pos)
+
+  if (nextBreak < 0) {
+    return null
+  }
+
+  const nextEnd = s.indexOf('\n', nextBreak + 1)
+  const lineEnd = nextEnd < 0 ? s.length : nextEnd
+
+  return snapPos(s, Math.min(nextBreak + 1 + col, lineEnd))
+}
+
 function cursorLayout(value: string, cursor: number, cols: number) {
   const pos = Math.max(0, Math.min(cursor, value.length))
   const w = Math.max(1, cols - 1)
@@ -275,6 +308,12 @@ function useFwdDelete(active: boolean) {
   return ref
 }
 
+type PasteResult = { cursor: number; value: string } | null
+
+const isPasteResultPromise = (
+  value: PasteResult | Promise<PasteResult> | null | undefined
+): value is Promise<PasteResult> => !!value && typeof (value as PromiseLike<PasteResult>).then === 'function'
+
 export function TextInput({
   columns = 80,
   value,
@@ -298,6 +337,7 @@ export function TextInput({
   const pasteEnd = useRef<null | number>(null)
   const pasteTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
   const pastePos = useRef(0)
+  const editVersionRef = useRef(0)
   const undo = useRef<{ cursor: number; value: string }[]>([])
   const redo = useRef<{ cursor: number; value: string }[]>([])
 
@@ -360,22 +400,20 @@ export function TextInput({
       return
     }
 
-    if (selected) {
-      setInputSelection({
-        clear: () => {
+    setInputSelection({
+      clear: () => {
+        if (selRef.current) {
           selRef.current = null
           setSel(null)
-        },
-        end: selected.end,
-        start: selected.start,
-        value: vRef.current
-      })
-    } else {
-      setInputSelection(null)
-    }
+        }
+      },
+      end: selected?.end ?? curRef.current,
+      start: selected?.start ?? curRef.current,
+      value: vRef.current
+    })
 
     return () => setInputSelection(null)
-  }, [focus, selected])
+  }, [cur, focus, selected])
 
   useEffect(
     () => () => {
@@ -389,6 +427,7 @@ export function TextInput({
   const commit = (next: string, nextCur: number, track = true) => {
     const prev = vRef.current
     const c = snapPos(next, nextCur)
+    editVersionRef.current += 1
 
     if (selRef.current) {
       selRef.current = null
@@ -427,8 +466,29 @@ export function TextInput({
   }
 
   const emitPaste = (e: PasteEvent) => {
+    const startVersion = editVersionRef.current
     const h = cbPaste.current?.(e)
 
+    if (isPasteResultPromise(h)) {
+      const fallbackText = e.text
+
+      void h
+        .then(result => {
+          if (result && editVersionRef.current === startVersion) {
+            commit(result.value, result.cursor)
+          } else if (result && fallbackText && PRINTABLE.test(fallbackText)) {
+            // User typed while async paste was in-flight — fall back to raw text insert
+            // so the pasted content is not silently lost.
+            const cur = curRef.current
+            const v = vRef.current
+            commit(v.slice(0, cur) + fallbackText + v.slice(cur), cur + fallbackText.length)
+          }
+        })
+        .catch(() => {})
+
+      return true
+    }
+
     if (h) {
       commit(h.value, h.cursor)
     }
@@ -494,9 +554,11 @@ export function TextInput({
     }
 
     const range = selRange()
+
     const nextValue = range
       ? vRef.current.slice(0, range.start) + cleaned + vRef.current.slice(range.end)
       : vRef.current.slice(0, curRef.current) + cleaned + vRef.current.slice(curRef.current)
+
     const nextCursor = range ? range.start + cleaned.length : curRef.current + cleaned.length
 
     commit(nextValue, nextCursor)
@@ -506,7 +568,12 @@ export function TextInput({
     (inp: string, k: Key, event: InputEvent) => {
       const eventRaw = event.keypress.raw
 
-      if (eventRaw === '\x1bv' || eventRaw === '\x1bV' || eventRaw === '\x16' || (isMac && k.meta && inp.toLowerCase() === 'v')) {
+      if (
+        eventRaw === '\x1bv' ||
+        eventRaw === '\x1bV' ||
+        eventRaw === '\x16' ||
+        (isMac && isActionMod(k) && inp.toLowerCase() === 'v')
+      ) {
         if (cbPaste.current) {
           return void emitPaste({ cursor: curRef.current, hotkey: true, text: '', value: vRef.current })
         }
@@ -522,7 +589,7 @@ export function TextInput({
         return
       }
 
-      if (isMac && k.meta && inp.toLowerCase() === 'c') {
+      if (isMac && isActionMod(k) && inp.toLowerCase() === 'c') {
         const range = selRange()
 
         if (range) {
@@ -534,9 +601,21 @@ export function TextInput({
         return
       }
 
+      if (k.upArrow || k.downArrow) {
+        const next = lineNav(vRef.current, curRef.current, k.upArrow ? -1 : 1)
+
+        if (next !== null) {
+          clearSel()
+          setCur(next)
+          curRef.current = next
+
+          return
+        }
+
+        return
+      }
+
       if (
-        k.upArrow ||
-        k.downArrow ||
         (k.ctrl && inp === 'c') ||
         k.tab ||
         (k.shift && k.tab) ||
@@ -548,7 +627,7 @@ export function TextInput({
       }
 
       if (k.return) {
-        k.shift || k.meta
+        k.shift || (isMac ? isActionMod(k) : k.meta)
           ? commit(ins(vRef.current, curRef.current, '\n'), curRef.current + 1)
           : cbSubmit.current?.(vRef.current)
 
@@ -558,6 +637,10 @@ export function TextInput({
       let c = curRef.current
       let v = vRef.current
       const mod = isActionMod(k)
+      const wordMod = mod || k.meta
+      const actionHome = k.home || (!isMac && mod && inp === 'a') || isMacActionFallback(k, inp, 'a')
+      const actionEnd = k.end || (mod && inp === 'e') || isMacActionFallback(k, inp, 'e')
+      const actionDeleteToStart = (mod && inp === 'u') || isMacActionFallback(k, inp, 'u')
       const range = selRange()
       const delFwd = k.delete || fwdDel.current
 
@@ -569,43 +652,43 @@ export function TextInput({
         return swap(redo, undo)
       }
 
-      if (mod && inp === 'a') {
+      if (isMac && mod && inp === 'a') {
         return selectAll()
       }
 
-      if (k.home) {
+      if (actionHome) {
         clearSel()
         c = 0
-      } else if (k.end || (mod && inp === 'e')) {
+      } else if (actionEnd) {
         clearSel()
         c = v.length
       } else if (k.leftArrow) {
-        if (range && !mod) {
+        if (range && !wordMod) {
           clearSel()
           c = range.start
         } else {
           clearSel()
-          c = mod ? wordLeft(v, c) : prevPos(v, c)
+          c = wordMod ? wordLeft(v, c) : prevPos(v, c)
         }
       } else if (k.rightArrow) {
-        if (range && !mod) {
+        if (range && !wordMod) {
           clearSel()
           c = range.end
         } else {
           clearSel()
-          c = mod ? wordRight(v, c) : nextPos(v, c)
+          c = wordMod ? wordRight(v, c) : nextPos(v, c)
         }
-      } else if (mod && inp === 'b') {
+      } else if (wordMod && inp === 'b') {
         clearSel()
         c = wordLeft(v, c)
-      } else if (mod && inp === 'f') {
+      } else if (wordMod && inp === 'f') {
         clearSel()
         c = wordRight(v, c)
       } else if (range && (k.backspace || delFwd)) {
         v = v.slice(0, range.start) + v.slice(range.end)
         c = range.start
       } else if (k.backspace && c > 0) {
-        if (mod) {
+        if (wordMod) {
           const t = wordLeft(v, c)
           v = v.slice(0, t) + v.slice(c)
           c = t
@@ -615,7 +698,7 @@ export function TextInput({
           c = t
         }
       } else if (delFwd && c < v.length) {
-        if (mod) {
+        if (wordMod) {
           const t = wordRight(v, c)
           v = v.slice(0, c) + v.slice(t)
         } else {
@@ -633,7 +716,7 @@ export function TextInput({
         } else {
           return
         }
-      } else if (mod && inp === 'u') {
+      } else if (actionDeleteToStart) {
         if (range) {
           v = v.slice(0, range.start) + v.slice(range.end)
           c = range.start
@@ -742,7 +825,9 @@ interface TextInputProps {
   focus?: boolean
   mask?: string
   onChange: (v: string) => void
-  onPaste?: (e: PasteEvent) => { cursor: number; value: string } | null
+  onPaste?: (
+    e: PasteEvent
+  ) => { cursor: number; value: string } | Promise<{ cursor: number; value: string } | null> | null
   onSubmit?: (v: string) => void
   placeholder?: string
   value: string
diff --git a/ui-tui/src/components/thinking.tsx b/ui-tui/src/components/thinking.tsx
index 2c741caadc..37c9598f81 100644
--- a/ui-tui/src/components/thinking.tsx
+++ b/ui-tui/src/components/thinking.tsx
@@ -1,5 +1,5 @@
 import { Box, NoSelect, Text } from '@hermes/ink'
-import { memo, type ReactNode, useEffect, useMemo, useState } from 'react'
+import { memo, useEffect, useMemo, useState, type ReactNode } from 'react'
 import spinners, { type BrailleSpinnerName } from 'unicode-animations'
 
 import { THINKING_COT_MAX } from '../config/limits.js'
@@ -596,17 +596,6 @@ export const ToolTrail = memo(function ToolTrail({
     }
   }, [detailsMode])
 
-  const latestErrorId = useMemo(
-    () => activity.reduce((max, i) => (i.tone === 'error' && i.id > max ? i.id : max), -1),
-    [activity]
-  )
-
-  useEffect(() => {
-    if (latestErrorId >= 0) {
-      setOpenMeta(true)
-    }
-  }, [latestErrorId])
-
   const cot = useMemo(() => thinkingPreview(reasoning, 'full', THINKING_COT_MAX), [reasoning])
 
   if (
diff --git a/ui-tui/src/content/hotkeys.ts b/ui-tui/src/content/hotkeys.ts
index 3d1bb011b6..b0938e18eb 100644
--- a/ui-tui/src/content/hotkeys.ts
+++ b/ui-tui/src/content/hotkeys.ts
@@ -4,18 +4,16 @@ const action = isMac ? 'Cmd' : 'Ctrl'
 const paste = isMac ? 'Cmd' : 'Alt'
 
 export const HOTKEYS: [string, string][] = [
-  ...(
-    isMac
-      ? ([
-          ['Cmd+C', 'copy selection'],
-          ['Ctrl+C', 'interrupt / clear draft / exit']
-        ] as [string, string][])
-      : ([['Ctrl+C', 'copy selection / interrupt / clear draft / exit']] as [string, string][])
-  ),
+  ...(isMac
+    ? ([
+        ['Cmd+C', 'copy selection'],
+        ['Ctrl+C', 'interrupt / clear draft / exit']
+      ] as [string, string][])
+    : ([['Ctrl+C', 'copy selection / interrupt / clear draft / exit']] as [string, string][])),
   [action + '+D', 'exit'],
   [action + '+G', 'open $EDITOR for prompt'],
   [action + '+L', 'new session (clear)'],
-  [paste + '+V / /paste', 'paste clipboard image'],
+  [paste + '+V / /paste', 'paste text; /paste attaches clipboard image'],
   ['Tab', 'apply completion'],
   ['↑/↓', 'completions / queue edit / history'],
   [action + '+A/E', 'home / end of line'],
diff --git a/ui-tui/src/domain/messages.ts b/ui-tui/src/domain/messages.ts
index 34b072f01a..73f86c3e06 100644
--- a/ui-tui/src/domain/messages.ts
+++ b/ui-tui/src/domain/messages.ts
@@ -12,6 +12,13 @@ export const imageTokenMeta = (info?: ImageMeta | null) => {
     .join(' · ')
 }
 
+export const attachedImageNotice = (info?: ({ name?: string } & ImageMeta) | null) => {
+  const meta = imageTokenMeta(info)
+  const label = info?.name ? `📎 Attached image: ${info.name}` : '📎 Attached image'
+
+  return `${label}${meta ? ` · ${meta}` : ''}`
+}
+
 export const userDisplay = (text: string) => {
   if (text.length <= LONG_MSG) {
     return text
diff --git a/ui-tui/src/lib/clipboard.ts b/ui-tui/src/lib/clipboard.ts
index 64dccc5b49..23e03e5feb 100644
--- a/ui-tui/src/lib/clipboard.ts
+++ b/ui-tui/src/lib/clipboard.ts
@@ -2,29 +2,93 @@ import { execFile, spawn } from 'node:child_process'
 import { promisify } from 'node:util'
 
 const execFileAsync = promisify(execFile)
+const CLIPBOARD_MAX_BUFFER = 4 * 1024 * 1024
+const POWERSHELL_ARGS = ['-NoProfile', '-NonInteractive', '-Command', 'Get-Clipboard -Raw'] as const
+
+type ClipboardRun = typeof execFileAsync
+
+export function isUsableClipboardText(text: null | string): text is string {
+  if (!text || !/[^\s]/.test(text)) {
+    return false
+  }
+
+  if (text.includes('\u0000')) {
+    return false
+  }
+
+  let suspicious = 0
+
+  for (const ch of text) {
+    const code = ch.charCodeAt(0)
+    const isControl = code < 0x20 && ch !== '\n' && ch !== '\r' && ch !== '\t'
+
+    if (isControl || ch === '\ufffd') {
+      suspicious += 1
+    }
+  }
+
+  return suspicious <= Math.max(2, Math.floor(text.length * 0.02))
+}
+
+function readClipboardCommands(
+  platform: NodeJS.Platform,
+  env: NodeJS.ProcessEnv
+): Array<{ args: readonly string[]; cmd: string }> {
+  if (platform === 'darwin') {
+    return [{ cmd: 'pbpaste', args: [] }]
+  }
+
+  if (platform === 'win32') {
+    return [{ cmd: 'powershell', args: POWERSHELL_ARGS }]
+  }
+
+  const attempts: Array<{ args: readonly string[]; cmd: string }> = []
+
+  if (env.WSL_INTEROP) {
+    attempts.push({ cmd: 'powershell.exe', args: POWERSHELL_ARGS })
+  }
+
+  if (env.WAYLAND_DISPLAY) {
+    attempts.push({ cmd: 'wl-paste', args: ['--type', 'text'] })
+  }
+
+  attempts.push({ cmd: 'xclip', args: ['-selection', 'clipboard', '-out'] })
+
+  return attempts
+}
 
 /**
  * Read plain text from the system clipboard.
  *
- * On macOS this uses `pbpaste`. On other platforms we intentionally return
- * null for now; the TUI's text-paste hotkeys are primarily targeted at the
- * macOS clarify/input flow.
+ * Uses native platform tools in fallback order:
+ * - macOS: pbpaste
+ * - Windows: PowerShell Get-Clipboard -Raw
+ * - WSL: powershell.exe Get-Clipboard -Raw
+ * - Linux Wayland: wl-paste --type text
+ * - Linux X11: xclip -selection clipboard -out
  */
 export async function readClipboardText(
   platform: NodeJS.Platform = process.platform,
-  run: typeof execFileAsync = execFileAsync
+  run: ClipboardRun = execFileAsync,
+  env: NodeJS.ProcessEnv = process.env
 ): Promise<string | null> {
-  if (platform !== 'darwin') {
-    return null
+  for (const attempt of readClipboardCommands(platform, env)) {
+    try {
+      const result = await run(attempt.cmd, [...attempt.args], {
+        encoding: 'utf8',
+        maxBuffer: CLIPBOARD_MAX_BUFFER,
+        windowsHide: true
+      })
+
+      if (typeof result.stdout === 'string') {
+        return result.stdout
+      }
+    } catch {
+      // Fall through to the next clipboard backend.
+    }
   }
 
-  try {
-    const result = await run('pbpaste', [], { encoding: 'utf8', windowsHide: true })
-
-    return typeof result.stdout === 'string' ? result.stdout : null
-  } catch {
-    return null
-  }
+  return null
 }
 
 /**
diff --git a/ui-tui/src/lib/emoji.ts b/ui-tui/src/lib/emoji.ts
new file mode 100644
index 0000000000..6c22e811e3
--- /dev/null
+++ b/ui-tui/src/lib/emoji.ts
@@ -0,0 +1,55 @@
+const VS15 = 0xfe0e
+const VS16 = 0xfe0f
+const KEYCAP = 0x20e3
+
+const TEXT_DEFAULT_EMOJI = new Set<number>([
+  0x00a9, 0x00ae, 0x203c, 0x2049, 0x2122, 0x2139, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199, 0x21a9, 0x21aa,
+  0x2328, 0x23cf, 0x23ed, 0x23ee, 0x23ef, 0x23f1, 0x23f2, 0x23f8, 0x23f9, 0x23fa, 0x24c2, 0x25aa, 0x25ab, 0x25b6,
+  0x25c0, 0x25fb, 0x25fc, 0x2600, 0x2601, 0x2602, 0x2603, 0x2604, 0x260e, 0x2611, 0x2618, 0x261d, 0x2620, 0x2622,
+  0x2623, 0x2626, 0x262a, 0x262e, 0x262f, 0x2638, 0x2639, 0x263a, 0x2640, 0x2642, 0x265f, 0x2660, 0x2663, 0x2665,
+  0x2666, 0x2668, 0x267b, 0x267e, 0x2692, 0x2694, 0x2695, 0x2696, 0x2697, 0x2699, 0x269b, 0x269c, 0x26a0, 0x26a7,
+  0x26b0, 0x26b1, 0x26c8, 0x26cf, 0x26d1, 0x26d3, 0x26d4, 0x26e9, 0x26f0, 0x26f1, 0x26f4, 0x26f7, 0x26f8, 0x26f9,
+  0x2702, 0x2708, 0x2709, 0x270c, 0x270d, 0x270f, 0x2712, 0x2714, 0x2716, 0x271d, 0x2721, 0x2733, 0x2734, 0x2744,
+  0x2747, 0x2763, 0x2764, 0x27a1, 0x2934, 0x2935, 0x2b05, 0x2b06, 0x2b07, 0x3030, 0x303d, 0x3297, 0x3299
+])
+
+const MAYBE_TEXT_EMOJI_RE =
+  /[\u00a9\u00ae\u203c\u2049\u2122\u2139\u2194-\u2199\u21a9\u21aa\u2328\u23cf\u23ed-\u23ef\u23f1\u23f2\u23f8-\u23fa\u24c2\u25aa\u25ab\u25b6\u25c0\u25fb\u25fc\u2600-\u2604\u260e\u2611\u2618\u261d\u2620\u2622\u2623\u2626\u262a\u262e\u262f\u2638-\u263a\u2640\u2642\u265f\u2660\u2663\u2665\u2666\u2668\u267b\u267e\u2692\u2694-\u2697\u2699\u269b\u269c\u26a0\u26a7\u26b0\u26b1\u26c8\u26cf\u26d1\u26d3\u26d4\u26e9\u26f0\u26f1\u26f4\u26f7-\u26f9\u2702\u2708\u2709\u270c\u270d\u270f\u2712\u2714\u2716\u271d\u2721\u2733\u2734\u2744\u2747\u2763\u2764\u27a1\u2934\u2935\u2b05-\u2b07\u3030\u303d\u3297\u3299]/
+
+export function ensureEmojiPresentation(text: string): string {
+  if (!text || !MAYBE_TEXT_EMOJI_RE.test(text)) {
+    return text
+  }
+
+  // Lazy output: only start building when we actually need to insert VS16.
+  // Short-circuits the whole walk for strings where every text-default emoji
+  // is already followed by VS16/VS15, avoiding per-codepoint string growth.
+  let out: null | string = null
+  let last = 0
+  let i = 0
+
+  while (i < text.length) {
+    const cp = text.codePointAt(i)!
+    const size = cp > 0xffff ? 2 : 1
+
+    if (TEXT_DEFAULT_EMOJI.has(cp)) {
+      const next = text.codePointAt(i + size)
+
+      // Skip only when the sequence already carries an explicit presentation
+      // selector.  VS16 means the user (or a prior pass) already requested
+      // emoji presentation; VS15 is an explicit text-presentation request so
+      // leave it alone and don't pile VS16 on top of it.  Inject before ZWJ
+      // and KEYCAP so ZWJ-joined sequences (e.g. ❤️‍🔥) and digit keycaps
+      // both render as emoji rather than text.
+      if (next !== VS16 && next !== VS15) {
+        out ??= ''
+        out += text.slice(last, i + size) + '\uFE0F'
+        last = i + size
+      }
+    }
+
+    i += size
+  }
+
+  return out === null ? text : out + text.slice(last)
+}
diff --git a/ui-tui/src/lib/osc52.ts b/ui-tui/src/lib/osc52.ts
index d990829921..aaeecf4c93 100644
--- a/ui-tui/src/lib/osc52.ts
+++ b/ui-tui/src/lib/osc52.ts
@@ -1,2 +1,73 @@
+const ESC = '\x1b'
+const BEL = '\x07'
+const ST = `${ESC}\\`
+
+export const OSC52_CLIPBOARD_QUERY = `${ESC}]52;c;?${BEL}`
+
+type OscResponse = { code: number; data: string; type: 'osc' }
+
+type OscQuerier = {
+  flush: () => Promise<void>
+  send: <T>(query: { match: (r: unknown) => r is T; request: string }) => Promise<T | undefined>
+}
+
+function wrapForMultiplexer(sequence: string): string {
+  if (process.env['TMUX']) {
+    return `${ESC}Ptmux;${sequence.split(ESC).join(ESC + ESC)}${ST}`
+  }
+
+  if (process.env['STY']) {
+    return `${ESC}P${sequence}${ST}`
+  }
+
+  return sequence
+}
+
+export function buildOsc52ClipboardQuery(): string {
+  return wrapForMultiplexer(OSC52_CLIPBOARD_QUERY)
+}
+
+export function parseOsc52ClipboardData(data: string): null | string {
+  const firstSep = data.indexOf(';')
+
+  if (firstSep === -1) {
+    return null
+  }
+
+  const selection = data.slice(0, firstSep)
+  const payload = data.slice(firstSep + 1)
+
+  if ((selection !== 'c' && selection !== 'p') || !payload || payload === '?') {
+    return null
+  }
+
+  try {
+    return Buffer.from(payload, 'base64').toString('utf8')
+  } catch {
+    return null
+  }
+}
+
+export async function readOsc52Clipboard(querier: null | OscQuerier, timeoutMs = 500): Promise<null | string> {
+  if (!querier) {
+    return null
+  }
+
+  const timeout = new Promise<undefined>(resolve => setTimeout(resolve, timeoutMs))
+
+  const query = querier.send<OscResponse>({
+    request: buildOsc52ClipboardQuery(),
+    match: (r: unknown): r is OscResponse => {
+      return !!r && typeof r === 'object' && (r as OscResponse).type === 'osc' && (r as OscResponse).code === 52
+    }
+  })
+
+  const response = await Promise.race([query, timeout])
+
+  await querier.flush()
+
+  return response ? parseOsc52ClipboardData(response.data) : null
+}
+
 export const writeOsc52Clipboard = (s: string) =>
   process.stdout.write(`\x1b]52;c;${Buffer.from(s, 'utf8').toString('base64')}\x07`)
diff --git a/ui-tui/src/lib/platform.ts b/ui-tui/src/lib/platform.ts
index 8995351a1a..f4a5247330 100644
--- a/ui-tui/src/lib/platform.ts
+++ b/ui-tui/src/lib/platform.ts
@@ -1,15 +1,32 @@
 /** Platform-aware keybinding helpers.
  *
- * On macOS the "action" modifier is Cmd (key.meta in Ink), on other platforms
- * it is Ctrl. Ctrl+C is ALWAYS the interrupt key regardless of platform — it
- * must never be remapped to copy.
+ * On macOS the "action" modifier is Cmd. Modern terminals that support kitty
+ * keyboard protocol report Cmd as `key.super`; legacy terminals often surface it
+ * as `key.meta`. Some macOS terminals also translate Cmd+Left/Right/Backspace
+ * into readline-style Ctrl+A/Ctrl+E/Ctrl+U before the app sees them.
+ * On other platforms the action modifier is Ctrl.
+ * Ctrl+C is ALWAYS the interrupt key regardless of platform — it must never be
+ * remapped to copy.
  */
 
 export const isMac = process.platform === 'darwin'
 
 /** True when the platform action-modifier is pressed (Cmd on macOS, Ctrl elsewhere). */
-export const isActionMod = (key: { ctrl: boolean; meta: boolean }): boolean => (isMac ? key.meta : key.ctrl)
+export const isActionMod = (key: { ctrl: boolean; meta: boolean; super?: boolean }): boolean =>
+  isMac ? key.meta || key.super === true : key.ctrl
+
+/**
+ * Some macOS terminals rewrite Cmd navigation/deletion into readline control keys.
+ * Treat those as action shortcuts too, but only for the specific fallbacks we
+ * have observed from terminals: Cmd+Left → Ctrl+A, Cmd+Right → Ctrl+E,
+ * Cmd+Backspace → Ctrl+U.
+ */
+export const isMacActionFallback = (
+  key: { ctrl: boolean; meta: boolean; super?: boolean },
+  ch: string,
+  target: 'a' | 'e' | 'u'
+): boolean => isMac && key.ctrl && !key.meta && key.super !== true && ch.toLowerCase() === target
 
 /** Match action-modifier + a single character (case-insensitive). */
-export const isAction = (key: { ctrl: boolean; meta: boolean }, ch: string, target: string): boolean =>
+export const isAction = (key: { ctrl: boolean; meta: boolean; super?: boolean }, ch: string, target: string): boolean =>
   isActionMod(key) && ch.toLowerCase() === target
diff --git a/ui-tui/src/lib/terminalParity.ts b/ui-tui/src/lib/terminalParity.ts
new file mode 100644
index 0000000000..9010dedfc7
--- /dev/null
+++ b/ui-tui/src/lib/terminalParity.ts
@@ -0,0 +1,78 @@
+import {
+  detectVSCodeLikeTerminal,
+  type FileOps,
+  isRemoteShellSession,
+  shouldPromptForTerminalSetup
+} from './terminalSetup.js'
+
+export type MacTerminalHint = {
+  key: string
+  message: string
+  tone: 'info' | 'warn'
+}
+
+export type MacTerminalContext = {
+  isAppleTerminal: boolean
+  isRemote: boolean
+  isTmux: boolean
+  vscodeLike: null | 'cursor' | 'vscode' | 'windsurf'
+}
+
+export function detectMacTerminalContext(env: NodeJS.ProcessEnv = process.env): MacTerminalContext {
+  const termProgram = env['TERM_PROGRAM'] ?? ''
+
+  return {
+    isAppleTerminal: termProgram === 'Apple_Terminal' || !!env['TERM_SESSION_ID'],
+    isRemote: isRemoteShellSession(env),
+    isTmux: !!env['TMUX'],
+    vscodeLike: detectVSCodeLikeTerminal(env)
+  }
+}
+
+export async function terminalParityHints(
+  env: NodeJS.ProcessEnv = process.env,
+  options?: { fileOps?: Partial<FileOps>; homeDir?: string }
+): Promise<MacTerminalHint[]> {
+  const ctx = detectMacTerminalContext(env)
+  const hints: MacTerminalHint[] = []
+
+  if (
+    ctx.vscodeLike &&
+    (await shouldPromptForTerminalSetup({ env, fileOps: options?.fileOps, homeDir: options?.homeDir }))
+  ) {
+    hints.push({
+      key: 'ide-setup',
+      tone: 'info',
+      message: `Detected ${ctx.vscodeLike} terminal · run /terminal-setup for best Cmd+Enter / undo parity`
+    })
+  }
+
+  if (ctx.isAppleTerminal) {
+    hints.push({
+      key: 'apple-terminal',
+      tone: 'warn',
+      message:
+        'Apple Terminal detected · use /paste for image-only clipboard fallback, and try Ctrl+A / Ctrl+E / Ctrl+U if Cmd+←/→/⌫ gets rewritten'
+    })
+  }
+
+  if (ctx.isTmux) {
+    hints.push({
+      key: 'tmux',
+      tone: 'warn',
+      message:
+        'tmux detected · clipboard copy/paste uses passthrough when available; allow-passthrough improves OSC52 reliability'
+    })
+  }
+
+  if (ctx.isRemote) {
+    hints.push({
+      key: 'remote',
+      tone: 'warn',
+      message:
+        'SSH session detected · text clipboard can bridge via OSC52, but image clipboard and local screenshot paths still depend on the machine running Hermes'
+    })
+  }
+
+  return hints
+}
diff --git a/ui-tui/src/lib/terminalSetup.ts b/ui-tui/src/lib/terminalSetup.ts
new file mode 100644
index 0000000000..3c17734c63
--- /dev/null
+++ b/ui-tui/src/lib/terminalSetup.ts
@@ -0,0 +1,349 @@
+import { copyFile, mkdir, readFile, writeFile } from 'node:fs/promises'
+import { homedir } from 'node:os'
+import { join } from 'node:path'
+
+export type SupportedTerminal = 'cursor' | 'vscode' | 'windsurf'
+
+export type FileOps = {
+  copyFile: typeof copyFile
+  mkdir: typeof mkdir
+  readFile: typeof readFile
+  writeFile: typeof writeFile
+}
+
+type Keybinding = {
+  args?: { text?: string }
+  command?: string
+  key?: string
+  when?: string
+}
+
+export type TerminalSetupResult = {
+  message: string
+  requiresRestart?: boolean
+  success: boolean
+}
+
+const DEFAULT_FILE_OPS: FileOps = { copyFile, mkdir, readFile, writeFile }
+const MULTILINE_SEQUENCE = '\\\r\n'
+
+const TERMINAL_META: Record<SupportedTerminal, { appName: string; label: string }> = {
+  vscode: { appName: 'Code', label: 'VS Code' },
+  cursor: { appName: 'Cursor', label: 'Cursor' },
+  windsurf: { appName: 'Windsurf', label: 'Windsurf' }
+}
+
+const TARGET_BINDINGS: Keybinding[] = [
+  {
+    key: 'shift+enter',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: MULTILINE_SEQUENCE }
+  },
+  {
+    key: 'ctrl+enter',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: MULTILINE_SEQUENCE }
+  },
+  {
+    key: 'cmd+enter',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: MULTILINE_SEQUENCE }
+  },
+  {
+    key: 'cmd+z',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: '\u001b[122;9u' }
+  },
+  {
+    key: 'shift+cmd+z',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: '\u001b[122;10u' }
+  }
+]
+
+export function detectVSCodeLikeTerminal(env: NodeJS.ProcessEnv = process.env): null | SupportedTerminal {
+  const askpass = env['VSCODE_GIT_ASKPASS_MAIN']?.toLowerCase() ?? ''
+
+  if (env['CURSOR_TRACE_ID'] || askpass.includes('cursor')) {
+    return 'cursor'
+  }
+
+  if (askpass.includes('windsurf')) {
+    return 'windsurf'
+  }
+
+  if (env['TERM_PROGRAM'] === 'vscode' || env['VSCODE_GIT_IPC_HANDLE']) {
+    return 'vscode'
+  }
+
+  return null
+}
+
+/**
+ * Strip JSONC features (// line comments, /* block comments *\/, trailing commas)
+ * so the result is valid JSON parseable by JSON.parse().
+ * Handles comments inside strings correctly (preserves them).
+ */
+export function stripJsonComments(content: string): string {
+  let result = ''
+  let i = 0
+  const len = content.length
+
+  while (i < len) {
+    const ch = content[i]!
+
+    // String literal — copy as-is, including any comment-like chars inside
+    if (ch === '"') {
+      let j = i + 1
+
+      while (j < len) {
+        if (content[j] === '\\') {
+          j += 2 // skip escaped char
+        } else if (content[j] === '"') {
+          j++
+
+          break
+        } else {
+          j++
+        }
+      }
+
+      result += content.slice(i, j)
+      i = j
+
+      continue
+    }
+
+    // Line comment
+    if (ch === '/' && content[i + 1] === '/') {
+      const eol = content.indexOf('\n', i)
+      i = eol === -1 ? len : eol
+
+      continue
+    }
+
+    // Block comment
+    if (ch === '/' && content[i + 1] === '*') {
+      const end = content.indexOf('*/', i + 2)
+      i = end === -1 ? len : end + 2
+
+      continue
+    }
+
+    result += ch
+    i++
+  }
+
+  // Remove trailing commas before ] or }
+  return result.replace(/,(\s*[}\]])/g, '$1')
+}
+
+export function isRemoteShellSession(env: NodeJS.ProcessEnv): boolean {
+  return Boolean(env['SSH_CONNECTION'] || env['SSH_TTY'] || env['SSH_CLIENT'])
+}
+
+export function getVSCodeStyleConfigDir(
+  appName: string,
+  platform: NodeJS.Platform = process.platform,
+  env: NodeJS.ProcessEnv = process.env,
+  homeDir: string = homedir()
+): null | string {
+  if (platform === 'darwin') {
+    return join(homeDir, 'Library', 'Application Support', appName, 'User')
+  }
+
+  if (platform === 'win32') {
+    return env['APPDATA'] ? join(env['APPDATA'], appName, 'User') : null
+  }
+
+  return join(homeDir, '.config', appName, 'User')
+}
+
+function isKeybinding(value: unknown): value is Keybinding {
+  return typeof value === 'object' && value !== null
+}
+
+function sameBinding(a: Keybinding, b: Keybinding): boolean {
+  return a.key === b.key && a.command === b.command && a.when === b.when && a.args?.text === b.args?.text
+}
+
+async function backupFile(filePath: string, ops: FileOps): Promise<void> {
+  const stamp = new Date().toISOString().replace(/[:.]/g, '-')
+  await ops.copyFile(filePath, `${filePath}.backup.${stamp}`)
+}
+
+export async function configureTerminalKeybindings(
+  terminal: SupportedTerminal,
+  options?: {
+    env?: NodeJS.ProcessEnv
+    fileOps?: Partial<FileOps>
+    homeDir?: string
+    platform?: NodeJS.Platform
+  }
+): Promise<TerminalSetupResult> {
+  const env = options?.env ?? process.env
+  const platform = options?.platform ?? process.platform
+  const homeDir = options?.homeDir ?? homedir()
+  const ops: FileOps = { ...DEFAULT_FILE_OPS, ...(options?.fileOps ?? {}) }
+  const meta = TERMINAL_META[terminal]
+
+  if (isRemoteShellSession(env)) {
+    return {
+      success: false,
+      message: `${meta.label} terminal setup must be run on the local machine, not inside an SSH session.`
+    }
+  }
+
+  const configDir = getVSCodeStyleConfigDir(meta.appName, platform, env, homeDir)
+
+  if (!configDir) {
+    return {
+      success: false,
+      message: `Could not determine ${meta.label} settings path on this platform.`
+    }
+  }
+
+  const keybindingsFile = join(configDir, 'keybindings.json')
+
+  try {
+    await ops.mkdir(configDir, { recursive: true })
+
+    let keybindings: unknown[] = []
+    let hasExistingFile = false
+
+    try {
+      const content = await ops.readFile(keybindingsFile, 'utf8')
+      hasExistingFile = true
+      const parsed: unknown = JSON.parse(stripJsonComments(content))
+
+      if (!Array.isArray(parsed)) {
+        return {
+          success: false,
+          message: `${meta.label} keybindings.json is not a JSON array: ${keybindingsFile}`
+        }
+      }
+
+      keybindings = parsed
+    } catch (error) {
+      const code = (error as NodeJS.ErrnoException | undefined)?.code
+
+      if (code !== 'ENOENT') {
+        return {
+          success: false,
+          message: `Failed to read ${meta.label} keybindings: ${error}`
+        }
+      }
+    }
+
+    const conflicts = TARGET_BINDINGS.filter(target =>
+      keybindings.some(
+        existing => isKeybinding(existing) && existing.key === target.key && !sameBinding(existing, target)
+      )
+    )
+
+    if (conflicts.length) {
+      return {
+        success: false,
+        message:
+          `Existing terminal keybindings would conflict in ${keybindingsFile}: ` + conflicts.map(c => c.key).join(', ')
+      }
+    }
+
+    let added = 0
+
+    for (const target of TARGET_BINDINGS.slice().reverse()) {
+      const exists = keybindings.some(existing => isKeybinding(existing) && sameBinding(existing, target))
+
+      if (!exists) {
+        keybindings.unshift(target)
+        added += 1
+      }
+    }
+
+    if (!added) {
+      return {
+        success: true,
+        message: `${meta.label} terminal keybindings already configured.`
+      }
+    }
+
+    if (hasExistingFile) {
+      await backupFile(keybindingsFile, ops)
+    }
+
+    await ops.writeFile(keybindingsFile, `${JSON.stringify(keybindings, null, 2)}\n`, 'utf8')
+
+    return {
+      success: true,
+      requiresRestart: true,
+      message: `Added ${added} ${meta.label} terminal keybinding${added === 1 ? '' : 's'} in ${keybindingsFile}`
+    }
+  } catch (error) {
+    return {
+      success: false,
+      message: `Failed to configure ${meta.label} terminal shortcuts: ${error}`
+    }
+  }
+}
+
+export async function configureDetectedTerminalKeybindings(options?: {
+  env?: NodeJS.ProcessEnv
+  fileOps?: Partial<FileOps>
+  homeDir?: string
+  platform?: NodeJS.Platform
+}): Promise<TerminalSetupResult> {
+  const detected = detectVSCodeLikeTerminal(options?.env ?? process.env)
+
+  if (!detected) {
+    return {
+      success: false,
+      message: 'No supported IDE terminal detected. Supported: VS Code, Cursor, Windsurf.'
+    }
+  }
+
+  return configureTerminalKeybindings(detected, options)
+}
+
+export async function shouldPromptForTerminalSetup(options?: {
+  env?: NodeJS.ProcessEnv
+  fileOps?: Partial<FileOps>
+  homeDir?: string
+  platform?: NodeJS.Platform
+}): Promise<boolean> {
+  const env = options?.env ?? process.env
+  const detected = detectVSCodeLikeTerminal(env)
+
+  if (!detected || isRemoteShellSession(env)) {
+    return false
+  }
+
+  const platform = options?.platform ?? process.platform
+  const homeDir = options?.homeDir ?? homedir()
+  const ops: FileOps = { ...DEFAULT_FILE_OPS, ...(options?.fileOps ?? {}) }
+  const meta = TERMINAL_META[detected]
+  const configDir = getVSCodeStyleConfigDir(meta.appName, platform, env, homeDir)
+
+  if (!configDir) {
+    return false
+  }
+
+  try {
+    const content = await ops.readFile(join(configDir, 'keybindings.json'), 'utf8')
+    const parsed: unknown = JSON.parse(stripJsonComments(content))
+
+    if (!Array.isArray(parsed)) {
+      return true
+    }
+
+    return TARGET_BINDINGS.some(
+      target => !parsed.some(existing => isKeybinding(existing) && sameBinding(existing, target))
+    )
+  } catch {
+    return true
+  }
+}
diff --git a/ui-tui/src/lib/text.ts b/ui-tui/src/lib/text.ts
index fb10d7d2d4..8541ac3f68 100644
--- a/ui-tui/src/lib/text.ts
+++ b/ui-tui/src/lib/text.ts
@@ -25,9 +25,9 @@ const renderEstimateLine = (line: string) => {
     .replace(/\[(.+?)\]\((https?:\/\/[^\s)]+)\)/g, '$1')
     .replace(/`([^`]+)`/g, '$1')
     .replace(/\*\*(.+?)\*\*/g, '$1')
-    .replace(/__(.+?)__/g, '$1')
+    .replace(/(?<!\w)__(.+?)__(?!\w)/g, '$1')
     .replace(/\*(.+?)\*/g, '$1')
-    .replace(/_(.+?)_/g, '$1')
+    .replace(/(?<!\w)_(.+?)_(?!\w)/g, '$1')
     .replace(/~~(.+?)~~/g, '$1')
     .replace(/==(.+?)==/g, '$1')
     .replace(/\[\^([^\]]+)\]/g, '[$1]')
diff --git a/ui-tui/src/types/hermes-ink.d.ts b/ui-tui/src/types/hermes-ink.d.ts
index 9f8987ad34..507be85a34 100644
--- a/ui-tui/src/types/hermes-ink.d.ts
+++ b/ui-tui/src/types/hermes-ink.d.ts
@@ -4,6 +4,7 @@ declare module '@hermes/ink' {
   export type Key = {
     readonly ctrl: boolean
     readonly meta: boolean
+    readonly super: boolean
     readonly shift: boolean
     readonly alt: boolean
     readonly upArrow: boolean
diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts
index 8941fcda4a..90b4aae630 100644
--- a/web/src/i18n/en.ts
+++ b/web/src/i18n/en.ts
@@ -65,27 +65,36 @@ export const en: Translations = {
   },
 
   status: {
+    actionFailed: "Action failed",
+    actionFinished: "Finished",
+    actions: "Actions",
     agent: "Agent",
-    gateway: "Gateway",
     activeSessions: "Active Sessions",
-    recentSessions: "Recent Sessions",
-    connectedPlatforms: "Connected Platforms",
-    running: "Running",
-    starting: "Starting",
-    failed: "Failed",
-    stopped: "Stopped",
     connected: "Connected",
+    connectedPlatforms: "Connected Platforms",
     disconnected: "Disconnected",
     error: "Error",
-    notRunning: "Not running",
-    startFailed: "Start failed",
-    pid: "PID",
-    runningRemote: "Running (remote)",
-    noneRunning: "None",
+    failed: "Failed",
+    gateway: "Gateway",
     gatewayFailedToStart: "Gateway failed to start",
     lastUpdate: "Last update",
-    platformError: "error",
+    noneRunning: "None",
+    notRunning: "Not running",
+    pid: "PID",
     platformDisconnected: "disconnected",
+    platformError: "error",
+    recentSessions: "Recent Sessions",
+    restartGateway: "Restart Gateway",
+    restartingGateway: "Restarting gateway…",
+    running: "Running",
+    runningRemote: "Running (remote)",
+    startFailed: "Start failed",
+    starting: "Starting",
+    startedInBackground: "Started in background — check logs for progress",
+    stopped: "Stopped",
+    updateHermes: "Update Hermes",
+    updatingHermes: "Updating Hermes…",
+    waitingForOutput: "Waiting for output…",
   },
 
   sessions: {
diff --git a/web/src/i18n/types.ts b/web/src/i18n/types.ts
index 5ae559c9cd..1e16ee9f64 100644
--- a/web/src/i18n/types.ts
+++ b/web/src/i18n/types.ts
@@ -68,27 +68,36 @@ export interface Translations {
 
   // ── Status page ──
   status: {
+    actionFailed: string;
+    actionFinished: string;
+    actions: string;
     agent: string;
-    gateway: string;
-    activeSessions: string;
-    recentSessions: string;
-    connectedPlatforms: string;
-    running: string;
-    starting: string;
-    failed: string;
-    stopped: string;
     connected: string;
+    connectedPlatforms: string;
     disconnected: string;
     error: string;
-    notRunning: string;
-    startFailed: string;
-    pid: string;
-    runningRemote: string;
-    noneRunning: string;
+    failed: string;
+    gateway: string;
     gatewayFailedToStart: string;
     lastUpdate: string;
-    platformError: string;
+    noneRunning: string;
+    notRunning: string;
+    pid: string;
     platformDisconnected: string;
+    platformError: string;
+    activeSessions: string;
+    recentSessions: string;
+    restartGateway: string;
+    restartingGateway: string;
+    running: string;
+    runningRemote: string;
+    startFailed: string;
+    starting: string;
+    startedInBackground: string;
+    stopped: string;
+    updateHermes: string;
+    updatingHermes: string;
+    waitingForOutput: string;
   };
 
   // ── Sessions page ──
diff --git a/web/src/i18n/zh.ts b/web/src/i18n/zh.ts
index 34941f616a..a6f3c067f1 100644
--- a/web/src/i18n/zh.ts
+++ b/web/src/i18n/zh.ts
@@ -65,27 +65,36 @@ export const zh: Translations = {
   },
 
   status: {
+    actionFailed: "操作失败",
+    actionFinished: "已完成",
+    actions: "操作",
     agent: "代理",
-    gateway: "网关",
     activeSessions: "活跃会话",
-    recentSessions: "最近会话",
-    connectedPlatforms: "已连接平台",
-    running: "运行中",
-    starting: "启动中",
-    failed: "失败",
-    stopped: "已停止",
     connected: "已连接",
+    connectedPlatforms: "已连接平台",
     disconnected: "已断开",
     error: "错误",
-    notRunning: "未运行",
-    startFailed: "启动失败",
-    pid: "进程",
-    runningRemote: "运行中（远程）",
-    noneRunning: "无",
+    failed: "失败",
+    gateway: "网关",
     gatewayFailedToStart: "网关启动失败",
     lastUpdate: "最后更新",
-    platformError: "错误",
+    noneRunning: "无",
+    notRunning: "未运行",
+    pid: "进程",
     platformDisconnected: "已断开",
+    platformError: "错误",
+    recentSessions: "最近会话",
+    restartGateway: "重启网关",
+    restartingGateway: "正在重启网关…",
+    running: "运行中",
+    runningRemote: "运行中（远程）",
+    startFailed: "启动失败",
+    starting: "启动中",
+    startedInBackground: "已在后台启动 — 请查看日志",
+    stopped: "已停止",
+    updateHermes: "更新 Hermes",
+    updatingHermes: "正在更新 Hermes…",
+    waitingForOutput: "等待输出…",
   },
 
   sessions: {
diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts
index 4d39604060..81225fb5d0 100644
--- a/web/src/lib/api.ts
+++ b/web/src/lib/api.ts
@@ -183,6 +183,16 @@ export const api = {
     );
   },
 
+  // Gateway / update actions
+  restartGateway: () =>
+    fetchJSON<ActionResponse>("/api/gateway/restart", { method: "POST" }),
+  updateHermes: () =>
+    fetchJSON<ActionResponse>("/api/hermes/update", { method: "POST" }),
+  getActionStatus: (name: string, lines = 200) =>
+    fetchJSON<ActionStatusResponse>(
+      `/api/actions/${encodeURIComponent(name)}/status?lines=${lines}`,
+    ),
+
   // Dashboard plugins
   getPlugins: () =>
     fetchJSON<PluginManifestResponse[]>("/api/dashboard/plugins"),
@@ -200,6 +210,20 @@ export const api = {
     }),
 };
 
+export interface ActionResponse {
+  name: string;
+  ok: boolean;
+  pid: number;
+}
+
+export interface ActionStatusResponse {
+  exit_code: number | null;
+  lines: string[];
+  name: string;
+  pid: number | null;
+  running: boolean;
+}
+
 export interface PlatformStatus {
   error_code?: string;
   error_message?: string;
diff --git a/web/src/pages/StatusPage.tsx b/web/src/pages/StatusPage.tsx
index 51e87e8e2c..3c213b5cbb 100644
--- a/web/src/pages/StatusPage.tsx
+++ b/web/src/pages/StatusPage.tsx
@@ -1,25 +1,54 @@
-import { useEffect, useState } from "react";
+import { useEffect, useRef, useState } from "react";
 import {
   Activity,
   AlertTriangle,
+  CheckCircle2,
   Clock,
   Cpu,
   Database,
+  Download,
+  Loader2,
   Radio,
+  RotateCw,
   Wifi,
   WifiOff,
+  Wrench,
+  X,
 } from "lucide-react";
 import { Cell, Grid } from "@nous-research/ui";
 import { api } from "@/lib/api";
-import type { PlatformStatus, SessionInfo, StatusResponse } from "@/lib/api";
-import { timeAgo, isoTimeAgo } from "@/lib/utils";
+import type {
+  ActionStatusResponse,
+  PlatformStatus,
+  SessionInfo,
+  StatusResponse,
+} from "@/lib/api";
+import { cn, timeAgo, isoTimeAgo } from "@/lib/utils";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Toast } from "@/components/Toast";
 import { useI18n } from "@/i18n";
 
+const ACTION_NAMES: Record<"restart" | "update", string> = {
+  restart: "gateway-restart",
+  update: "hermes-update",
+};
+
 export default function StatusPage() {
   const [status, setStatus] = useState<StatusResponse | null>(null);
   const [sessions, setSessions] = useState<SessionInfo[]>([]);
+  const [pendingAction, setPendingAction] = useState<
+    "restart" | "update" | null
+  >(null);
+  const [activeAction, setActiveAction] = useState<"restart" | "update" | null>(
+    null,
+  );
+  const [actionStatus, setActionStatus] = useState<ActionStatusResponse | null>(
+    null,
+  );
+  const [toast, setToast] = useState<ToastState | null>(null);
+  const logScrollRef = useRef<HTMLPreElement | null>(null);
   const { t } = useI18n();
 
   useEffect(() => {
@@ -38,6 +67,75 @@ export default function StatusPage() {
     return () => clearInterval(interval);
   }, []);
 
+  useEffect(() => {
+    if (!toast) return;
+    const timer = setTimeout(() => setToast(null), 4000);
+    return () => clearTimeout(timer);
+  }, [toast]);
+
+  useEffect(() => {
+    if (!activeAction) return;
+    const name = ACTION_NAMES[activeAction];
+    let cancelled = false;
+
+    const poll = async () => {
+      try {
+        const resp = await api.getActionStatus(name);
+        if (cancelled) return;
+        setActionStatus(resp);
+        if (!resp.running) {
+          const ok = resp.exit_code === 0;
+          setToast({
+            type: ok ? "success" : "error",
+            message: ok
+              ? t.status.actionFinished
+              : `${t.status.actionFailed} (exit ${resp.exit_code ?? "?"})`,
+          });
+          return;
+        }
+      } catch {
+        // transient fetch error; keep polling
+      }
+      if (!cancelled) setTimeout(poll, 1500);
+    };
+
+    poll();
+    return () => {
+      cancelled = true;
+    };
+  }, [activeAction, t.status.actionFinished, t.status.actionFailed]);
+
+  useEffect(() => {
+    const el = logScrollRef.current;
+    if (el) el.scrollTop = el.scrollHeight;
+  }, [actionStatus?.lines]);
+
+  const runAction = async (action: "restart" | "update") => {
+    setPendingAction(action);
+    setActionStatus(null);
+    try {
+      if (action === "restart") {
+        await api.restartGateway();
+      } else {
+        await api.updateHermes();
+      }
+      setActiveAction(action);
+    } catch (err) {
+      const detail = err instanceof Error ? err.message : String(err);
+      setToast({
+        type: "error",
+        message: `${t.status.actionFailed}: ${detail}`,
+      });
+    } finally {
+      setPendingAction(null);
+    }
+  };
+
+  const dismissLog = () => {
+    setActiveAction(null);
+    setActionStatus(null);
+  };
+
   if (!status) {
     return (
       <div className="flex items-center justify-center py-24">
@@ -144,6 +242,8 @@ export default function StatusPage() {
 
   return (
     <div className="flex flex-col gap-6">
+      <Toast toast={toast} />
+
       {alerts.length > 0 && (
         <div className="border border-destructive/30 bg-destructive/[0.06] p-4">
           <div className="flex items-start gap-3">
@@ -166,7 +266,7 @@ export default function StatusPage() {
         </div>
       )}
 
-      <Grid className="border-b lg:!grid-cols-3">
+      <Grid className="border-b md:!grid-cols-2 lg:!grid-cols-4">
         {items.map(({ icon: Icon, label, value, badgeText, badgeVariant }) => (
           <Cell
             key={label}
@@ -194,8 +294,130 @@ export default function StatusPage() {
             )}
           </Cell>
         ))}
+
+        <Cell className="flex min-w-0 flex-col gap-2 overflow-hidden">
+          <div className="flex items-center justify-between">
+            <CardTitle className="text-sm font-medium">
+              {t.status.actions}
+            </CardTitle>
+            <Wrench className="h-4 w-4 text-muted-foreground" />
+          </div>
+
+          <div className="flex gap-4">
+            <Button
+              variant="outline"
+              size="sm"
+              onClick={() => runAction("restart")}
+              disabled={
+                pendingAction !== null ||
+                (activeAction !== null && actionStatus?.running !== false)
+              }
+              className="flex-1 min-w-0"
+            >
+              <RotateCw
+                className={cn(
+                  "h-3.5 w-3.5",
+                  (pendingAction === "restart" ||
+                    (activeAction === "restart" && actionStatus?.running)) &&
+                    "animate-spin",
+                )}
+              />
+
+              {activeAction === "restart" && actionStatus?.running
+                ? t.status.restartingGateway
+                : t.status.restartGateway}
+            </Button>
+
+            <Button
+              variant="outline"
+              size="sm"
+              onClick={() => runAction("update")}
+              disabled={
+                pendingAction !== null ||
+                (activeAction !== null && actionStatus?.running !== false)
+              }
+              className="flex-1 min-w-0"
+            >
+              <Download
+                className={cn(
+                  "h-3.5 w-3.5",
+                  (pendingAction === "update" ||
+                    (activeAction === "update" && actionStatus?.running)) &&
+                    "animate-pulse",
+                )}
+              />
+
+              {activeAction === "update" && actionStatus?.running
+                ? t.status.updatingHermes
+                : t.status.updateHermes}
+            </Button>
+          </div>
+        </Cell>
       </Grid>
 
+      {activeAction && (
+        <div className="border border-border bg-background-base/50">
+          <div className="flex items-center justify-between gap-2 border-b border-border px-3 py-2">
+            <div className="flex items-center gap-2 min-w-0">
+              {actionStatus?.running ? (
+                <Loader2 className="h-3.5 w-3.5 shrink-0 animate-spin text-warning" />
+              ) : actionStatus?.exit_code === 0 ? (
+                <CheckCircle2 className="h-3.5 w-3.5 shrink-0 text-success" />
+              ) : actionStatus !== null ? (
+                <AlertTriangle className="h-3.5 w-3.5 shrink-0 text-destructive" />
+              ) : (
+                <Loader2 className="h-3.5 w-3.5 shrink-0 animate-spin text-muted-foreground" />
+              )}
+
+              <span className="text-xs font-mondwest tracking-[0.12em] truncate">
+                {activeAction === "restart"
+                  ? t.status.restartGateway
+                  : t.status.updateHermes}
+              </span>
+
+              <Badge
+                variant={
+                  actionStatus?.running
+                    ? "warning"
+                    : actionStatus?.exit_code === 0
+                      ? "success"
+                      : actionStatus
+                        ? "destructive"
+                        : "outline"
+                }
+                className="text-[10px] shrink-0"
+              >
+                {actionStatus?.running
+                  ? t.status.running
+                  : actionStatus?.exit_code === 0
+                    ? t.status.actionFinished
+                    : actionStatus
+                      ? `${t.status.actionFailed} (${actionStatus.exit_code ?? "?"})`
+                      : t.common.loading}
+              </Badge>
+            </div>
+
+            <button
+              type="button"
+              onClick={dismissLog}
+              className="shrink-0 opacity-60 hover:opacity-100 cursor-pointer"
+              aria-label={t.common.close}
+            >
+              <X className="h-3.5 w-3.5" />
+            </button>
+          </div>
+
+          <pre
+            ref={logScrollRef}
+            className="max-h-72 overflow-auto px-3 py-2 font-mono-ui text-[11px] leading-relaxed whitespace-pre-wrap break-all"
+          >
+            {actionStatus?.lines && actionStatus.lines.length > 0
+              ? actionStatus.lines.join("\n")
+              : t.status.waitingForOutput}
+          </pre>
+        </div>
+      )}
+
       {platforms.length > 0 && (
         <PlatformsCard
           platforms={platforms}
@@ -378,6 +600,11 @@ function PlatformsCard({ platforms, platformStateBadge }: PlatformsCardProps) {
   );
 }
 
+interface ToastState {
+  message: string;
+  type: "success" | "error";
+}
+
 interface PlatformsCardProps {
   platforms: [string, PlatformStatus][];
   platformStateBadge: Record<
diff --git a/website/docs/guides/delegation-patterns.md b/website/docs/guides/delegation-patterns.md
index 4de7ebbd9e..11e276b121 100644
--- a/website/docs/guides/delegation-patterns.md
+++ b/website/docs/guides/delegation-patterns.md
@@ -216,10 +216,10 @@ Restricting toolsets keeps the subagent focused and prevents accidental side eff
 
 ## Constraints
 
-- **Default 3 parallel tasks** — batches default to 3 concurrent subagents (configurable via `delegation.max_concurrent_children` in config.yaml)
-- **No nesting** — subagents cannot call `delegate_task`, `clarify`, `memory`, `send_message`, or `execute_code`
+- **Default 3 parallel tasks** — batches default to 3 concurrent subagents (configurable via `delegation.max_concurrent_children` in config.yaml — no hard ceiling, only a floor of 1)
+- **Nested delegation is opt-in** — leaf subagents (default) cannot call `delegate_task`, `clarify`, `memory`, `send_message`, or `execute_code`. Orchestrator subagents (`role="orchestrator"`) retain `delegate_task` for further delegation, but only when `delegation.max_spawn_depth` is raised above the default of 1 (1-3 supported); the other four remain blocked. Disable globally via `delegation.orchestrator_enabled: false`.
 - **Separate terminals** — each subagent gets its own terminal session with separate working directory and state
-- **No conversation history** — subagents see only what you put in `goal` and `context`
+- **No conversation history** — subagents see only the `goal` and `context` the parent agent passes when calling `delegate_task`
 - **Default 50 iterations** — set `max_iterations` lower for simple tasks to save cost
 
 ---
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 46ab98d48d..886db482c4 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -367,6 +367,7 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `HERMES_ENABLE_PROJECT_PLUGINS` | Enable auto-discovery of repo-local plugins from `./.hermes/plugins/` (`true`/`false`, default: `false`) |
 | `HERMES_BACKGROUND_NOTIFICATIONS` | Background process notification mode in gateway: `all` (default), `result`, `error`, `off` |
 | `HERMES_EPHEMERAL_SYSTEM_PROMPT` | Ephemeral system prompt injected at API-call time (never persisted to sessions) |
+| `DELEGATION_MAX_CONCURRENT_CHILDREN` | Max parallel subagents per `delegate_task` batch (default: `3`, floor of 1, no ceiling). Also configurable via `delegation.max_concurrent_children` in `config.yaml` — the config value takes priority. |
 
 ## Interface
 
diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md
index 79453474fc..bde142820a 100644
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@@ -78,9 +78,10 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/usage` | Show token usage, cost breakdown, and session duration |
 | `/insights` | Show usage insights and analytics (last 30 days) |
 | `/platforms` (alias: `/gateway`) | Show gateway/messaging platform status |
-| `/paste` | Check clipboard for an image and attach it |
+| `/paste` | Attach a clipboard image |
 | `/copy [number]` | Copy the last assistant response to clipboard (or the Nth-from-last with a number). CLI-only. |
 | `/image <path>` | Attach a local image file for your next prompt. |
+| `/terminal-setup [auto\|vscode\|cursor\|windsurf]` | TUI-only: configure local VS Code-family terminal bindings for better multiline + undo/redo parity. |
 | `/debug` | Upload debug report (system info + logs) and get shareable links. Also available in messaging. |
 | `/profile` | Show active profile name and home directory |
 | `/gquota` | Show Google Gemini Code Assist quota usage with progress bars (only available when the `google-gemini-cli` provider is active). |
@@ -157,7 +158,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
 
 ## Notes
 
-- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/cron`, `/skills`, `/platforms`, `/paste`, `/image`, `/statusbar`, and `/plugins` are **CLI-only** commands.
+- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/cron`, `/skills`, `/platforms`, `/paste`, `/image`, `/terminal-setup`, `/statusbar`, and `/plugins` are **CLI-only** commands.
 - `/verbose` is **CLI-only by default**, but can be enabled for messaging platforms by setting `display.tool_progress_command: true` in `config.yaml`. When enabled, it cycles the `display.tool_progress` mode and saves to config.
 - `/sethome`, `/update`, `/restart`, `/approve`, `/deny`, and `/commands` are **messaging-only** commands.
 - `/status`, `/background`, `/voice`, `/reload-mcp`, `/rollback`, `/snapshot`, `/debug`, `/fast`, and `/yolo` work in **both** the CLI and the messaging gateway.
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index c6afd83322..1c491a48ce 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -1332,6 +1332,9 @@ delegation:
   # provider: "openrouter"                  # Override provider (empty = inherit parent)
   # base_url: "http://localhost:1234/v1"    # Direct OpenAI-compatible endpoint (takes precedence over provider)
   # api_key: "local-key"                    # API key for base_url (falls back to OPENAI_API_KEY)
+  max_concurrent_children: 3                # Parallel children per batch (floor 1, no ceiling). Also via DELEGATION_MAX_CONCURRENT_CHILDREN env var.
+  max_spawn_depth: 1                        # Delegation tree depth cap (1-3, clamped). 1 = flat (default): parent spawns leaves that cannot delegate. 2 = orchestrator children can spawn leaf grandchildren. 3 = three levels.
+  orchestrator_enabled: true                # Global kill switch. When false, role="orchestrator" is ignored and every child is forced to leaf regardless of max_spawn_depth.
 ```
 
 **Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
@@ -1342,6 +1345,8 @@ The delegation provider uses the same credential resolution as CLI/gateway start
 
 **Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
 
+**Width and depth:** `max_concurrent_children` caps how many subagents run in parallel per batch (default `3`, floor of 1, no ceiling). Can also be set via the `DELEGATION_MAX_CONCURRENT_CHILDREN` env var. When the model submits a `tasks` array longer than the cap, `delegate_task` returns a tool error explaining the limit rather than silently truncating. `max_spawn_depth` controls the delegation tree depth (clamped to 1-3). At the default `1`, delegation is flat: children cannot spawn grandchildren, and passing `role="orchestrator"` silently degrades to `leaf`. Raise to `2` so orchestrator children can spawn leaf grandchildren; `3` for three-level trees. The agent opts into orchestration per call via `role="orchestrator"`; `orchestrator_enabled: false` forces every child back to leaf regardless. Cost scales multiplicatively — at `max_spawn_depth: 3` with `max_concurrent_children: 3`, the tree can reach 3×3×3 = 27 concurrent leaf agents. See [Subagent Delegation → Depth Limit and Nested Orchestration](features/delegation.md#depth-limit-and-nested-orchestration) for usage patterns.
+
 ## Clarify
 
 Configure the clarification prompt behavior:
diff --git a/website/docs/user-guide/features/delegation.md b/website/docs/user-guide/features/delegation.md
index 2e22bada34..1ab8f8cbd5 100644
--- a/website/docs/user-guide/features/delegation.md
+++ b/website/docs/user-guide/features/delegation.md
@@ -20,7 +20,7 @@ delegate_task(
 
 ## Parallel Batch
 
-Up to 3 concurrent subagents:
+Up to 3 concurrent subagents by default (configurable, no hard ceiling):
 
 ```python
 delegate_task(tasks=[
@@ -33,10 +33,10 @@ delegate_task(tasks=[
 ## How Subagent Context Works
 
 :::warning Critical: Subagents Know Nothing
-Subagents start with a **completely fresh conversation**. They have zero knowledge of the parent's conversation history, prior tool calls, or anything discussed before delegation. The subagent's only context comes from the `goal` and `context` fields you provide.
+Subagents start with a **completely fresh conversation**. They have zero knowledge of the parent's conversation history, prior tool calls, or anything discussed before delegation. The subagent's only context comes from the `goal` and `context` fields the parent agent populates when it calls `delegate_task`.
 :::
 
-This means you must pass **everything** the subagent needs:
+This means the parent agent must pass **everything** the subagent needs in the call:
 
 ```python
 # BAD - subagent has no idea what "the error" is
@@ -121,8 +121,8 @@ delegate_task(
 
 When you provide a `tasks` array, subagents run in **parallel** using a thread pool:
 
-- **Maximum concurrency:** 3 tasks (the `tasks` array is truncated to 3 if longer)
-- **Thread pool:** Uses `ThreadPoolExecutor` with `MAX_CONCURRENT_CHILDREN = 3` workers
+- **Maximum concurrency:** 3 tasks by default (configurable via `delegation.max_concurrent_children` or the `DELEGATION_MAX_CONCURRENT_CHILDREN` env var; floor of 1, no hard ceiling). Batches larger than the limit return a tool error rather than being silently truncated.
+- **Thread pool:** Uses `ThreadPoolExecutor` with the configured concurrency limit as max workers
 - **Progress display:** In CLI mode, a tree-view shows tool calls from each subagent in real-time with per-task completion lines. In gateway mode, progress is batched and relayed to the parent's progress callback
 - **Result ordering:** Results are sorted by task index to match input order regardless of completion order
 - **Interrupt propagation:** Interrupting the parent (e.g., sending a new message) interrupts all active children
@@ -154,8 +154,8 @@ The `toolsets` parameter controls what tools the subagent has access to. Choose
 | `["file"]` | Read-only analysis, code review without execution |
 | `["terminal"]` | System administration, process management |
 
-Certain toolsets are **always blocked** for subagents regardless of what you specify:
-- `delegation` — no recursive delegation (prevents infinite spawning)
+Certain toolsets are blocked for subagents regardless of what you specify:
+- `delegation` — blocked for leaf subagents (the default). Retained for `role="orchestrator"` children, bounded by `max_spawn_depth` — see [Depth Limit and Nested Orchestration](#depth-limit-and-nested-orchestration) below.
 - `clarify` — subagents cannot interact with the user
 - `memory` — no writes to shared persistent memory
 - `code_execution` — children should reason step-by-step
@@ -173,16 +173,32 @@ delegate_task(
 )
 ```
 
-## Depth Limit
+## Depth Limit and Nested Orchestration
 
-Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children (depth 1), but children cannot delegate further. This prevents runaway recursive delegation chains.
+By default, delegation is **flat**: a parent (depth 0) spawns children (depth 1), and those children cannot delegate further. This prevents runaway recursive delegation.
+
+For multi-stage workflows (research → synthesis, or parallel orchestration over sub-problems), a parent can spawn **orchestrator** children that *can* delegate their own workers:
+
+```python
+delegate_task(
+    goal="Survey three code review approaches and recommend one",
+    role="orchestrator",  # Allows this child to spawn its own workers
+    context="...",
+)
+```
+
+- `role="leaf"` (default): child cannot delegate further — identical to the flat-delegation behavior.
+- `role="orchestrator"`: child retains the `delegation` toolset. Gated by `delegation.max_spawn_depth` (default **1** = flat, so `role="orchestrator"` is a no-op at defaults). Raise `max_spawn_depth` to 2 to allow orchestrator children to spawn leaf grandchildren; 3 for three levels (cap).
+- `delegation.orchestrator_enabled: false`: global kill switch that forces every child to `leaf` regardless of the `role` parameter.
+
+**Cost warning:** With `max_spawn_depth: 3` and `max_concurrent_children: 3`, the tree can reach 3×3×3 = 27 concurrent leaf agents. Each extra level multiplies spend — raise `max_spawn_depth` intentionally.
 
 ## Key Properties
 
 - Each subagent gets its **own terminal session** (separate from the parent)
-- **No nested delegation** — children cannot delegate further (no grandchildren)
-- Subagents **cannot** call: `delegate_task`, `clarify`, `memory`, `send_message`, `execute_code`
-- **Interrupt propagation** — interrupting the parent interrupts all active children
+- **Nested delegation is opt-in** — only `role="orchestrator"` children can delegate further, and only when `max_spawn_depth` is raised from its default of 1 (flat). Disable globally with `orchestrator_enabled: false`.
+- Leaf subagents **cannot** call: `delegate_task`, `clarify`, `memory`, `send_message`, `execute_code`. Orchestrator subagents retain `delegate_task` but still cannot use the other four.
+- **Interrupt propagation** — interrupting the parent interrupts all active children (including grandchildren under orchestrators)
 - Only the final summary enters the parent's context, keeping token usage efficient
 - Subagents inherit the parent's **API key, provider configuration, and credential pool** (enabling key rotation on rate limits)
 
@@ -193,7 +209,7 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
 | **Reasoning** | Full LLM reasoning loop | Just Python code execution |
 | **Context** | Fresh isolated conversation | No conversation, just script |
 | **Tool access** | All non-blocked tools with reasoning | 7 tools via RPC, no reasoning |
-| **Parallelism** | Up to 3 concurrent subagents | Single script |
+| **Parallelism** | 3 concurrent subagents by default (configurable) | Single script |
 | **Best for** | Complex tasks needing judgment | Mechanical multi-step pipelines |
 | **Token cost** | Higher (full LLM loop) | Lower (only stdout returned) |
 | **User interaction** | None (subagents can't clarify) | None |
@@ -206,7 +222,9 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
 # In ~/.hermes/config.yaml
 delegation:
   max_iterations: 50                        # Max turns per child (default: 50)
-  default_toolsets: ["terminal", "file", "web"]  # Default toolsets
+  # max_concurrent_children: 3              # Parallel children per batch (default: 3)
+  # max_spawn_depth: 1                      # Tree depth (1-3, default 1 = flat). Raise to 2 to allow orchestrator children to spawn leaves; 3 for three levels.
+  # orchestrator_enabled: true              # Disable to force all children to leaf role.
   model: "google/gemini-3-flash-preview"             # Optional provider/model override
   provider: "openrouter"                             # Optional built-in provider
 
diff --git a/website/docs/user-guide/features/image-generation.md b/website/docs/user-guide/features/image-generation.md
index 43abc6c201..118459429e 100644
--- a/website/docs/user-guide/features/image-generation.md
+++ b/website/docs/user-guide/features/image-generation.md
@@ -1,13 +1,13 @@
 ---
 title: Image Generation
-description: Generate images via FAL.ai — 8 models including FLUX 2, GPT-Image, Nano Banana Pro, Ideogram, Recraft V4 Pro, and more, selectable via `hermes tools`.
+description: Generate images via FAL.ai — 9 models including FLUX 2, GPT Image (1.5 & 2), Nano Banana Pro, Ideogram, Recraft V4 Pro, and more, selectable via `hermes tools`.
 sidebar_label: Image Generation
 sidebar_position: 6
 ---
 
 # Image Generation
 
-Hermes Agent generates images from text prompts via FAL.ai. Eight models are supported out of the box, each with different speed, quality, and cost tradeoffs. The active model is user-configurable via `hermes tools` and persists in `config.yaml`.
+Hermes Agent generates images from text prompts via FAL.ai. Nine models are supported out of the box, each with different speed, quality, and cost tradeoffs. The active model is user-configurable via `hermes tools` and persists in `config.yaml`.
 
 ## Supported Models
 
@@ -18,6 +18,7 @@ Hermes Agent generates images from text prompts via FAL.ai. Eight models are sup
 | `fal-ai/z-image/turbo` | ~2s | Bilingual EN/CN, 6B params | $0.005/MP |
 | `fal-ai/nano-banana-pro` | ~8s | Gemini 3 Pro, reasoning depth, text rendering | $0.15/image (1K) |
 | `fal-ai/gpt-image-1.5` | ~15s | Prompt adherence | $0.034/image |
+| `fal-ai/gpt-image-2` | ~20s | SOTA text rendering + CJK, world-aware photorealism | $0.04–0.06/image |
 | `fal-ai/ideogram/v3` | ~5s | Best typography | $0.03–0.09/image |
 | `fal-ai/recraft/v4/pro/text-to-image` | ~8s | Design, brand systems, production-ready | $0.25/image |
 | `fal-ai/qwen-image` | ~12s | LLM-based, complex text | $0.02/MP |
@@ -65,7 +66,7 @@ image_gen:
 
 ### GPT-Image Quality
 
-The `fal-ai/gpt-image-1.5` request quality is pinned to `medium` (~$0.034/image at 1024×1024). We don't expose the `low` / `high` tiers as a user-facing option so that Nous Portal billing stays predictable across all users — the cost spread between tiers is ~22×. If you want a cheaper GPT-Image option, pick a different model; if you want higher quality, use Klein 9B or Imagen-class models.
+The `fal-ai/gpt-image-1.5` and `fal-ai/gpt-image-2` request quality is pinned to `medium` (~$0.034–$0.06/image at 1024×1024). We don't expose the `low` / `high` tiers as a user-facing option so that Nous Portal billing stays predictable across all users — the cost spread between tiers is 3–22×. If you want a cheaper option, pick Klein 9B or Z-Image Turbo; if you want higher quality, use Nano Banana Pro or Recraft V4 Pro.
 
 ## Usage
 
@@ -87,11 +88,13 @@ Make me a futuristic cityscape, landscape orientation
 
 Every model accepts the same three aspect ratios from the agent's perspective. Internally, each model's native size spec is filled in automatically:
 
-| Agent input | image_size (flux/z-image/qwen/recraft/ideogram) | aspect_ratio (nano-banana-pro) | image_size (gpt-image) |
-|---|---|---|---|
-| `landscape` | `landscape_16_9` | `16:9` | `1536x1024` |
-| `square` | `square_hd` | `1:1` | `1024x1024` |
-| `portrait` | `portrait_16_9` | `9:16` | `1024x1536` |
+| Agent input | image_size (flux/z-image/qwen/recraft/ideogram) | aspect_ratio (nano-banana-pro) | image_size (gpt-image-1.5) | image_size (gpt-image-2) |
+|---|---|---|---|---|
+| `landscape` | `landscape_16_9` | `16:9` | `1536x1024` | `landscape_4_3` (1024×768) |
+| `square` | `square_hd` | `1:1` | `1024x1024` | `square_hd` (1024×1024) |
+| `portrait` | `portrait_16_9` | `9:16` | `1024x1536` | `portrait_4_3` (768×1024) |
+
+GPT Image 2 maps to 4:3 presets rather than 16:9 because its minimum pixel count is 655,360 — the `landscape_16_9` preset (1024×576 = 589,824) would be rejected.
 
 This translation happens in `_build_fal_payload()` — agent code never has to know about per-model schema differences.
 
diff --git a/website/docs/user-guide/features/overview.md b/website/docs/user-guide/features/overview.md
index df3c26becf..ff45a54a4a 100644
--- a/website/docs/user-guide/features/overview.md
+++ b/website/docs/user-guide/features/overview.md
@@ -20,7 +20,7 @@ Hermes Agent includes a rich set of capabilities that extend far beyond basic ch
 ## Automation
 
 - **[Scheduled Tasks (Cron)](cron.md)** — Schedule tasks to run automatically with natural language or cron expressions. Jobs can attach skills, deliver results to any platform, and support pause/resume/edit operations.
-- **[Subagent Delegation](delegation.md)** — The `delegate_task` tool spawns child agent instances with isolated context, restricted toolsets, and their own terminal sessions. Run up to 3 concurrent subagents for parallel workstreams.
+- **[Subagent Delegation](delegation.md)** — The `delegate_task` tool spawns child agent instances with isolated context, restricted toolsets, and their own terminal sessions. Run 3 concurrent subagents by default (configurable) for parallel workstreams.
 - **[Code Execution](code-execution.md)** — The `execute_code` tool lets the agent write Python scripts that call Hermes tools programmatically, collapsing multi-step workflows into a single LLM turn via sandboxed RPC execution.
 - **[Event Hooks](hooks.md)** — Run custom code at key lifecycle points. Gateway hooks handle logging, alerts, and webhooks; plugin hooks handle tool interception, metrics, and guardrails.
 - **[Batch Processing](batch-processing.md)** — Run the Hermes agent across hundreds or thousands of prompts in parallel, generating structured ShareGPT-format trajectory data for training data generation or evaluation.
diff --git a/website/docs/user-guide/features/vision.md b/website/docs/user-guide/features/vision.md
index 8257c186c6..0ef77128d1 100644
--- a/website/docs/user-guide/features/vision.md
+++ b/website/docs/user-guide/features/vision.md
@@ -27,50 +27,52 @@ How you attach an image depends on your terminal environment. Not all methods wo
 
 ### `/paste` Command
 
-**The most reliable method. Works everywhere.**
+**The most reliable explicit image-attach fallback.**
 
 ```
 /paste
 ```
 
-Type `/paste` and press Enter. Hermes checks your clipboard for an image and attaches it. This works in every environment because it explicitly calls the clipboard backend — no terminal keybinding interception to worry about.
+Type `/paste` and press Enter. Hermes checks your clipboard for an image and attaches it. This is the safest option when your terminal rewrites `Cmd+V`/`Ctrl+V`, or when you copied only an image and there is no bracketed-paste text payload to inspect.
 
-### Ctrl+V / Cmd+V (Bracketed Paste)
+### Ctrl+V / Cmd+V
 
-When you paste text that's on the clipboard alongside an image, Hermes automatically checks for an image too. This works when:
-- Your clipboard contains **both text and an image** (some apps put both on the clipboard when you copy)
-- Your terminal supports bracketed paste (most modern terminals do)
+Hermes now treats paste as a layered flow:
+- normal text paste first
+- native clipboard / OSC52 text fallback if the terminal did not deliver text cleanly
+- image attach when the clipboard or pasted payload resolves to an image or image path
+
+This means pasted macOS screenshot temp paths and `file://...` image URIs can attach immediately instead of sitting in the composer as raw text.
 
 :::warning
-If your clipboard has **only an image** (no text), Ctrl+V does nothing in most terminals. Terminals can only paste text — there's no standard mechanism to paste binary image data. Use `/paste` or Alt+V instead.
+If your clipboard has **only an image** (no text), terminals still cannot send binary image bytes directly. Use `/paste` as the explicit image-attach fallback.
 :::
 
-### Alt+V
+### `/terminal-setup` for VS Code / Cursor / Windsurf
 
-Alt key combinations pass through most terminal emulators (they're sent as ESC + key rather than being intercepted). Press `Alt+V` to check the clipboard for an image.
+If you run the TUI inside a local VS Code-family integrated terminal on macOS, Hermes can install the recommended `workbench.action.terminal.sendSequence` bindings for better multiline and undo/redo parity:
 
-:::caution
-**Does not work in VSCode's integrated terminal.** VSCode intercepts many Alt+key combos for its own UI. Use `/paste` instead.
-:::
+```text
+/terminal-setup
+```
 
-### Ctrl+V (Raw — Linux Only)
-
-On Linux desktop terminals (GNOME Terminal, Konsole, Alacritty, etc.), `Ctrl+V` is **not** the paste shortcut — `Ctrl+Shift+V` is. So `Ctrl+V` sends a raw byte to the application, and Hermes catches it to check the clipboard. This only works on Linux desktop terminals with X11 or Wayland clipboard access.
+This is especially useful when `Cmd+Enter`, `Cmd+Z`, or `Shift+Cmd+Z` are being intercepted by the IDE. Run it on the local machine only — not inside an SSH session.
 
 ## Platform Compatibility
 
-| Environment | `/paste` | Ctrl+V text+image | Alt+V | Notes |
+| Environment | `/paste` | Cmd/Ctrl+V | `/terminal-setup` | Notes |
 |---|:---:|:---:|:---:|---|
-| **macOS Terminal / iTerm2** | ✅ | ✅ | ✅ | Best experience — `osascript` always available |
-| **Linux X11 desktop** | ✅ | ✅ | ✅ | Requires `xclip` (`apt install xclip`) |
-| **Linux Wayland desktop** | ✅ | ✅ | ✅ | Requires `wl-paste` (`apt install wl-clipboard`) |
-| **WSL2 (Windows Terminal)** | ✅ | ✅¹ | ✅ | Uses `powershell.exe` — no extra install needed |
-| **VSCode Terminal (local)** | ✅ | ✅¹ | ❌ | VSCode intercepts Alt+key |
-| **VSCode Terminal (SSH)** | ❌² | ❌² | ❌ | Remote clipboard not accessible |
-| **SSH terminal (any)** | ❌² | ❌² | ❌² | Remote clipboard not accessible |
+| **macOS Terminal / iTerm2** | ✅ | ✅ | n/a | Best experience — native clipboard + screenshot-path recovery |
+| **Apple Terminal** | ✅ | ✅ | n/a | If Cmd+←/→/⌫ gets rewritten, use Ctrl+A / Ctrl+E / Ctrl+U fallbacks |
+| **Linux X11 desktop** | ✅ | ✅ | n/a | Requires `xclip` (`apt install xclip`) |
+| **Linux Wayland desktop** | ✅ | ✅ | n/a | Requires `wl-paste` (`apt install wl-clipboard`) |
+| **WSL2 (Windows Terminal)** | ✅ | ✅ | n/a | Uses `powershell.exe` — no extra install needed |
+| **VS Code / Cursor / Windsurf (local)** | ✅ | ✅ | ✅ | Recommended for better Cmd+Enter / undo / redo parity |
+| **VS Code / Cursor / Windsurf (SSH)** | ❌² | ❌² | ❌³ | Run `/terminal-setup` on the local machine instead |
+| **SSH terminal (any)** | ❌² | ❌² | n/a | Remote clipboard not accessible |
 
-¹ Only when clipboard has both text and an image (image-only clipboard = nothing happens)
 ² See [SSH & Remote Sessions](#ssh--remote-sessions) below
+³ The command writes local IDE keybindings and should not be run from the remote host
 
 ## Platform-Specific Setup
 
@@ -145,7 +147,9 @@ powershell.exe -NoProfile -Command "Add-Type -AssemblyName System.Windows.Forms;
 
 ## SSH & Remote Sessions
 
-**Clipboard paste does not work over SSH.** When you SSH into a remote machine, the Hermes CLI runs on the remote host. All clipboard tools (`xclip`, `wl-paste`, `powershell.exe`, `osascript`) read the clipboard of the machine they run on — which is the remote server, not your local machine. Your local clipboard is inaccessible from the remote side.
+**Clipboard image paste does not fully work over SSH.** When you SSH into a remote machine, the Hermes CLI runs on the remote host. Clipboard tools (`xclip`, `wl-paste`, `powershell.exe`, `osascript`) read the clipboard of the machine they run on — which is the remote server, not your local machine. Your local clipboard image is therefore inaccessible from the remote side.
+
+Text can sometimes still bridge through terminal paste or OSC52, but image clipboard access and local screenshot temp paths remain tied to the machine running Hermes.
 
 ### Workarounds for SSH
 
diff --git a/website/docs/user-guide/tui.md b/website/docs/user-guide/tui.md
index 9024c690d2..72c0a47123 100644
--- a/website/docs/user-guide/tui.md
+++ b/website/docs/user-guide/tui.md
@@ -46,7 +46,7 @@ The classic CLI remains available as the default. Anything documented in [CLI In
 - **Live session panel** — tools and skills fill in progressively as they initialize.
 - **Mouse-friendly selection** — drag to highlight with a uniform background instead of SGR inverse. Copy with your terminal's normal copy gesture.
 - **Alternate-screen rendering** — differential updates mean no flicker when streaming, no scrollback clutter after you quit.
-- **Composer affordances** — inline paste-collapse for long snippets, image paste from the clipboard (`Alt+V`), bracketed-paste safety.
+- **Composer affordances** — inline paste-collapse for long snippets, `Cmd+V` / `Ctrl+V` text paste with clipboard-image fallback, bracketed-paste safety, and image/file-path attachment normalization.
 
 Same [skins](features/skins.md) and [personalities](features/personality.md) apply. Switch mid-session with `/skin ares`, `/personality pirate`, and the UI repaints live. See [Skins & Themes](features/skins.md) for the full list of customizable keys and which ones apply to classic vs TUI — the TUI honors the banner palette, UI colors, prompt glyph/color, session display, completion menu, selection bg, `tool_prefix`, and `help_header`.
 
@@ -73,7 +73,8 @@ The directory must contain `dist/entry.js` and an up-to-date `node_modules`.
 Keybindings match the [Classic CLI](cli.md#keybindings) exactly. The only behavioral differences:
 
 - **Mouse drag** highlights text with a uniform selection background.
-- **`Ctrl+V`** pastes text from your clipboard directly into the composer; multi-line pastes stay on one row until you expand them.
+- **`Cmd+V` / `Ctrl+V`** first tries normal text paste, then falls back to OSC52/native clipboard reads, and finally image attach when the clipboard or pasted payload resolves to an image.
+- **`/terminal-setup`** installs local VS Code / Cursor / Windsurf terminal bindings for better `Cmd+Enter` and undo/redo parity on macOS.
 - **Slash autocompletion** opens as a floating panel with descriptions, not an inline dropdown.
 
 ## Slash commands
diff --git a/website/package.json b/website/package.json
index 6bf50e700d..e3aa70fc47 100644
--- a/website/package.json
+++ b/website/package.json
@@ -4,7 +4,9 @@
   "private": true,
   "scripts": {
     "docusaurus": "docusaurus",
+    "prestart": "node scripts/prebuild.mjs",
     "start": "docusaurus start",
+    "prebuild": "node scripts/prebuild.mjs",
     "build": "docusaurus build",
     "swizzle": "docusaurus swizzle",
     "deploy": "docusaurus deploy",
diff --git a/website/scripts/prebuild.mjs b/website/scripts/prebuild.mjs
new file mode 100644
index 0000000000..f129d745ff
--- /dev/null
+++ b/website/scripts/prebuild.mjs
@@ -0,0 +1,50 @@
+#!/usr/bin/env node
+// Runs website/scripts/extract-skills.py before docusaurus build/start so
+// that website/src/data/skills.json (imported by src/pages/skills/index.tsx)
+// exists without contributors needing to remember to run the Python script
+// manually. CI workflows still run the extraction explicitly, which is a
+// no-op duplicate but matches their historical behaviour.
+//
+// If python3 or its deps (pyyaml) aren't available on the local machine, we
+// fall back to writing an empty skills.json so `npm run build` still
+// succeeds — the Skills Hub page just shows an empty state. CI always has
+// the deps installed, so production deploys get real data.
+
+import { spawnSync } from "node:child_process";
+import { mkdirSync, writeFileSync, existsSync } from "node:fs";
+import { dirname, join, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const scriptDir = dirname(fileURLToPath(import.meta.url));
+const websiteDir = resolve(scriptDir, "..");
+const extractScript = join(scriptDir, "extract-skills.py");
+const outputFile = join(websiteDir, "src", "data", "skills.json");
+
+function writeEmptyFallback(reason) {
+  mkdirSync(dirname(outputFile), { recursive: true });
+  writeFileSync(outputFile, "[]\n");
+  console.warn(
+    `[prebuild] extract-skills.py skipped (${reason}); wrote empty skills.json. ` +
+      `Install python3 + pyyaml locally for a populated Skills Hub page.`,
+  );
+}
+
+if (!existsSync(extractScript)) {
+  writeEmptyFallback("extract script missing");
+  process.exit(0);
+}
+
+const result = spawnSync("python3", [extractScript], {
+  stdio: "inherit",
+  cwd: websiteDir,
+});
+
+if (result.error && result.error.code === "ENOENT") {
+  writeEmptyFallback("python3 not found");
+  process.exit(0);
+}
+
+if (result.status !== 0) {
+  writeEmptyFallback(`extract-skills.py exited with status ${result.status}`);
+  process.exit(0);
+}