Merge branch 'main' of github.com:NousResearch/hermes-agent into bb/gui

# Conflicts: # cli.py # hermes_cli/main.py # run_agent.py # tests/hermes_cli/test_cmd_update.py # tools/mcp_tool.py # web/src/lib/gatewayClient.ts
2026-07-24 16:54:43 +00:00 · 2026-05-18 01:26:56 -05:00 · 2026-05-18 01:26:56 -05:00 · 02aaac8f73
commit 02aaac8f73
parent 046f0c01cb 43e566f77e
260 changed files with 24547 additions and 13573 deletions
--- a/18
+++ b/18
@ -66,9 +66,11 @@ RUN npm install --prefer-offline --no-audit && \
 # frontend stats the readme path during dep resolution, so we `touch` an
 # empty placeholder — the real README is restored by `COPY . .` below.
 #
-# `uv sync --frozen --no-install-project --extra all` installs only the
-# deps reachable through the composite `[all]` extra (handpicked set
-# intended for the production image).  We do NOT use `--all-extras`:
+# `uv sync --frozen --no-install-project --extra all --extra messaging`
+# installs the deps reachable through the composite `[all]` extra
+# (handpicked set intended for the production image), plus gateway
+# messaging adapters that should work in the published image without a
+# first-boot lazy install.  We do NOT use `--all-extras`:
 # that would pull in `[rl]` (atroposlib + tinker + torch + wandb from
 # git), `[yc-bench]` (another git dep), and `[termux-all]` (Android
 # redundancy), none of which belong in the published container.
@ -76,7 +78,7 @@ RUN npm install --prefer-offline --no-audit && \
 # The editable link is created after the source copy below.
 COPY pyproject.toml uv.lock ./
 RUN touch ./README.md
-RUN uv sync --frozen --no-install-project --extra all
+RUN uv sync --frozen --no-install-project --extra all --extra messaging

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
@ -94,10 +96,10 @@ RUN cd web && npm run build && \
 # hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time
 # only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally
 # not chowned here.
-# The .venv MUST be hermes-writable so lazy_deps.py can install platform
-# packages (discord.py, telegram, slack, etc.) at first gateway boot.
-# Without this, `uv pip install` fails with EACCES and all messaging
-# adapters silently fail to load.  See tools/lazy_deps.py.
+# The .venv MUST remain hermes-writable so lazy_deps.py can install
+# remaining optional platform packages and future pin bumps at first use.
+# Without this, `uv pip install` fails with EACCES and adapters silently
+# fail to load.  See tools/lazy_deps.py.
 USER root
 RUN chmod -R a+rX /opt/hermes && \
    chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/node_modules
--- a/acp_adapter/tools.py
+++ b/acp_adapter/tools.py
@ -1123,7 +1123,6 @@ def build_tool_start(
        )

    # Generic fallback
-    import json
    try:
        args_text = json.dumps(arguments, indent=2, default=str)
    except (TypeError, ValueError):
--- a/acp_registry/agent.json
+++ b/acp_registry/agent.json
@ -1,7 +1,7 @@
 {
  "id": "hermes-agent",
  "name": "Hermes Agent",
-  "version": "0.13.0",
+  "version": "0.14.0",
  "description": "Self-improving open-source AI agent by Nous Research with ACP editor integration, persistent memory, skills, and rich tool support.",
  "repository": "https://github.com/NousResearch/hermes-agent",
  "website": "https://hermes-agent.nousresearch.com/docs/user-guide/features/acp",
@ -9,7 +9,7 @@
  "license": "MIT",
  "distribution": {
    "uvx": {
-      "package": "hermes-agent[acp]==0.13.0",
+      "package": "hermes-agent[acp]==0.14.0",
      "args": ["hermes-acp"]
    }
  }
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -755,7 +755,8 @@ class _CodexCompletionsAdapter:

        def _check_cancelled() -> None:
            if deadline is not None and time.monotonic() >= deadline:
-                timed_out.set()
+                if not timed_out.is_set():
+                    _close_client_on_timeout()
                raise TimeoutError(_timeout_message())
            try:
                from tools.interrupt import is_interrupted
@ -1233,7 +1234,7 @@ def _read_nous_auth() -> Optional[dict]:


 def _nous_api_key(provider: dict) -> str:
-    """Extract the best API key from a Nous provider state dict."""
+    """Extract the Nous runtime credential from the compatibility field."""
    return provider.get("agent_key") or provider.get("access_token", "")


@ -1246,17 +1247,25 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[
    """Return fresh Nous runtime credentials when available.

    This mirrors the main agent's 401 recovery path and keeps auxiliary
-    clients aligned with the singleton auth store + mint flow instead of
+    clients aligned with the singleton auth store + JWT/mint flow instead of
    relying only on whatever raw tokens happen to be sitting in auth.json
    or the credential pool.
    """
    try:
-        from hermes_cli.auth import resolve_nous_runtime_credentials
+        from hermes_cli.auth import (
+            NOUS_INFERENCE_AUTH_MODE_AUTO,
+            NOUS_INFERENCE_AUTH_MODE_LEGACY,
+            resolve_nous_runtime_credentials,
+        )

        creds = resolve_nous_runtime_credentials(
            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-            force_mint=force_refresh,
+            inference_auth_mode=(
+                NOUS_INFERENCE_AUTH_MODE_LEGACY
+                if force_refresh
+                else NOUS_INFERENCE_AUTH_MODE_AUTO
+            ),
        )
    except Exception as exc:
        logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
@ -1473,7 +1482,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:



-def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
+def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
    pool_present, entry = _select_pool_entry("openrouter")
    if pool_present:
        or_key = explicit_api_key or _pool_runtime_api_key(entry)
@ -1483,7 +1492,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
        base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
        logger.debug("Auxiliary client: OpenRouter via pool")
        return OpenAI(api_key=or_key, base_url=base_url,
-                       default_headers=build_or_headers()), _OPENROUTER_MODEL
+                       default_headers=build_or_headers()), model or _OPENROUTER_MODEL

    or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
    if not or_key:
@ -1491,7 +1500,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
        return None, None
    logger.debug("Auxiliary client: OpenRouter")
    return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
-                   default_headers=build_or_headers()), _OPENROUTER_MODEL
+                   default_headers=build_or_headers()), model or _OPENROUTER_MODEL


 def _describe_openrouter_unavailable() -> str:
@ -2087,7 +2096,13 @@ def _is_payment_error(exc: Exception) -> bool:
    """Detect payment/credit/quota exhaustion errors.

    Returns True for HTTP 402 (Payment Required) and for 429/other errors
-    whose message indicates billing exhaustion rather than rate limiting.
+    whose message indicates billing exhaustion or daily quota exhaustion
+    rather than transient rate limiting.
+
+    Daily token quota errors (e.g. Bedrock "Too many tokens per day",
+    Vertex AI "quota exceeded") are functionally equivalent to credit
+    exhaustion — the provider cannot serve the request until the quota
+    resets — and should trigger the same provider-fallback logic.
    """
    status = getattr(exc, "status_code", None)
    if status == 402:
@ -2095,10 +2110,19 @@ def _is_payment_error(exc: Exception) -> bool:
    err_lower = str(exc).lower()
    # OpenRouter and other providers include "credits" or "afford" in 402 bodies,
    # but sometimes wrap them in 429 or other codes.
+    # Daily quota exhaustion from Bedrock, Vertex AI, and similar providers
+    # uses different language but is semantically identical to credit exhaustion.
    if status in {402, 429, None}:
-        if any(kw in err_lower for kw in ("credits", "insufficient funds",
-                                           "can only afford", "billing",
-                                           "payment required")):
+        if any(kw in err_lower for kw in (
+            "credits", "insufficient funds",
+            "can only afford", "billing",
+            "payment required",
+            # Daily / monthly quota exhaustion keywords
+            "quota exceeded", "quota_exceeded",
+            "too many tokens per day", "daily limit",
+            "tokens per day", "daily quota",
+            "resource exhausted",  # Vertex AI / gRPC quota errors
+        )):
            return True
    return False

@ -2500,12 +2524,15 @@ def _refresh_provider_credentials(provider: str) -> bool:
            _evict_cached_clients(normalized)
            return True
        if normalized == "nous":
-            from hermes_cli.auth import resolve_nous_runtime_credentials
+            from hermes_cli.auth import (
+                NOUS_INFERENCE_AUTH_MODE_LEGACY,
+                resolve_nous_runtime_credentials,
+            )

            creds = resolve_nous_runtime_credentials(
                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-                force_mint=True,
+                inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
            )
            if not str(creds.get("api_key", "") or "").strip():
                return False
@ -2579,6 +2606,133 @@ def _try_payment_fallback(
    return None, None, ""


+def _try_main_agent_model_fallback(
+    failed_provider: str,
+    task: str = None,
+    reason: str = "error",
+) -> Tuple[Optional[Any], Optional[str], str]:
+    """Last-resort fallback to the user's main agent provider + model.
+
+    Used after the configured fallback_chain is exhausted (or empty) for
+    users with an explicit auxiliary provider.  This is the "safety net"
+    layer: if nothing the user asked for can serve the request, try the
+    main chat model before giving up.
+
+    Skips when the failed provider already IS the main provider (no point
+    retrying the same backend that just failed).
+
+    Returns:
+        (client, model, provider_label) or (None, None, "") if no fallback.
+    """
+    main_provider = (_read_main_provider() or "").strip()
+    main_model = (_read_main_model() or "").strip()
+    if not main_provider or not main_model or main_provider.lower() in {"auto", ""}:
+        return None, None, ""
+
+    skip = (failed_provider or "").lower().strip()
+    if main_provider.lower() == skip:
+        # The thing that failed IS the main model — nothing to fall back to.
+        return None, None, ""
+    if _is_provider_unhealthy(main_provider):
+        _log_skip_unhealthy(main_provider, task)
+        return None, None, ""
+
+    try:
+        client, resolved_model = resolve_provider_client(
+            provider=main_provider, model=main_model,
+        )
+    except Exception:
+        client, resolved_model = None, None
+
+    if client is None:
+        return None, None, ""
+
+    label = f"main-agent({main_provider})"
+    logger.info(
+        "Auxiliary %s: %s on %s — falling back to main agent model %s (%s)",
+        task or "call", reason, failed_provider, label, resolved_model or main_model,
+    )
+    return client, resolved_model or main_model, label
+
+
+def _try_configured_fallback_chain(
+    task: str,
+    failed_provider: str,
+    reason: str = "error",
+) -> Tuple[Optional[Any], Optional[str], str]:
+    """Try user-configured fallback_chain for a specific auxiliary task.
+
+    Reads auxiliary.<task>.fallback_chain from config.yaml and tries each
+    entry in order.  Each entry must have at least ``provider``; ``model``,
+    ``base_url``, and ``api_key`` are optional.
+
+    Returns:
+        (client, model, provider_label) or (None, None, "") if no fallback.
+    """
+    if not task:
+        return None, None, ""
+
+    task_config = _get_auxiliary_task_config(task)
+    chain = task_config.get("fallback_chain")
+    if not chain or not isinstance(chain, list):
+        return None, None, ""
+
+    skip = failed_provider.lower().strip()
+    tried = []
+
+    for i, entry in enumerate(chain):
+        if not isinstance(entry, dict):
+            continue
+        fb_provider = str(entry.get("provider", "")).strip()
+        if not fb_provider or fb_provider.lower() == skip:
+            continue
+        fb_model = str(entry.get("model", "")).strip() or None
+        fb_base_url = str(entry.get("base_url", "")).strip() or None
+        fb_api_key = str(entry.get("api_key", "")).strip() or None
+
+        label = f"fallback_chain[{i}]({fb_provider})"
+
+        try:
+            fb_client = _resolve_single_provider(
+                fb_provider, fb_model, fb_base_url, fb_api_key)
+        except Exception:
+            fb_client = None
+
+        if fb_client is not None:
+            logger.info(
+                "Auxiliary %s: %s on %s — configured fallback to %s (%s)",
+                task, reason, failed_provider, label, fb_model or "default",
+            )
+            return fb_client, fb_model, label
+        tried.append(label)
+
+    if tried:
+        logger.debug(
+            "Auxiliary %s: configured fallback_chain exhausted (tried: %s)",
+            task, ", ".join(tried),
+        )
+    return None, None, ""
+
+
+def _resolve_single_provider(
+    provider: str,
+    model: Optional[str] = None,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+) -> Optional[Any]:
+    """Resolve a single provider entry from fallback_chain to an OpenAI client.
+
+    Uses the existing provider resolution infrastructure where possible.
+    """
+    # Reuse resolve_provider_client which handles provider→client mapping
+    client, resolved_model = resolve_provider_client(
+        provider=provider,
+        model=model,
+        base_url=base_url,
+        api_key=api_key,
+    )
+    return client
+
 def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
    """Full auto-detection chain.

@ -3049,10 +3203,17 @@ def resolve_provider_client(
        if custom_entry:
            custom_base = custom_entry.get("base_url", "").strip()
            custom_key = custom_entry.get("api_key", "").strip()
-            custom_key_env = custom_entry.get("key_env", "").strip()
+            custom_key_env = (custom_entry.get("key_env") or custom_entry.get("api_key_env") or "").strip()
            if not custom_key and custom_key_env:
                custom_key = os.getenv(custom_key_env, "").strip()
            custom_key = custom_key or "no-key-required"
+            if custom_key == "no-key-required":
+                logger.warning(
+                    "resolve_provider_client: named custom provider %r has no resolvable "
+                    "api_key — request will be sent with placeholder no-key-required "
+                    "and will 401 on auth-required endpoints",
+                    custom_entry.get("name") or provider,
+                )
            # An explicit per-task api_mode override (from _resolve_task_provider_model)
            # wins; otherwise fall back to what the provider entry declared.
            entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
@ -3400,7 +3561,7 @@ def _resolve_strict_vision_backend(
    if provider == "copilot":
        return resolve_provider_client("copilot", model, is_vision=True)
    if provider == "openrouter":
-        return _try_openrouter()
+        return _try_openrouter(model=model)
    if provider == "nous":
        return _try_nous(vision=True)
    if provider == "openai-codex":
@ -4519,11 +4680,17 @@ def call_llm(
            or _is_connection_error(first_err)
            or _is_rate_limit_error(first_err)
        )
-        # Only try alternative providers when the user didn't explicitly
-        # configure this task's provider.  Explicit provider = hard constraint;
-        # auto (the default) = best-effort fallback chain.  (#7559)
+        # Respect explicit provider choice for transient errors (auth, request
+        # validation, etc.) but allow fallback when the provider clearly cannot
+        # serve the request due to capacity: payment/quota exhaustion and
+        # connection failures are capacity problems, not request constraints.
+        # See #26803: daily token quota (429 + "too many tokens per day") must
+        # fall back just like a 402 credit error.
        is_auto = resolved_provider in {"auto", "", None}
-        if should_fallback and is_auto:
+        # Capacity errors bypass the explicit-provider gate: the provider
+        # literally cannot serve this request regardless of user intent.
+        is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
+        if should_fallback and (is_auto or is_capacity_error):
            if _is_payment_error(first_err):
                reason = "payment error"
                # Resolve the actual provider label (resolved_provider may be
@ -4539,8 +4706,24 @@ def call_llm(
                reason = "connection error"
            logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
                        task or "call", reason, resolved_provider, first_err)
-            fb_client, fb_model, fb_label = _try_payment_fallback(
-                resolved_provider, task, reason=reason)
+
+            # Fallback order (#26882, #26803):
+            #   1. User-configured fallback_chain (per-task) if set
+            #   2. Main agent model (last-resort safety net)
+            # For auto users (no explicit aux provider), use the full
+            # auto-detection chain instead — its Step 1 IS the main agent
+            # model, so users on `auto` already get main-model fallback.
+            fb_client, fb_model, fb_label = (None, None, "")
+            if is_auto:
+                fb_client, fb_model, fb_label = _try_payment_fallback(
+                    resolved_provider, task, reason=reason)
+            else:
+                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
+                    task, resolved_provider or "auto", reason=reason)
+                if fb_client is None:
+                    fb_client, fb_model, fb_label = _try_main_agent_model_fallback(
+                        resolved_provider, task, reason=reason)
+
            if fb_client is not None:
                fb_kwargs = _build_call_kwargs(
                    fb_label, fb_model, messages,
@ -4550,6 +4733,14 @@ def call_llm(
                    base_url=str(getattr(fb_client, "base_url", "") or ""))
                return _validate_llm_response(
                    fb_client.chat.completions.create(**fb_kwargs), task)
+            # All fallback layers exhausted — emit a single user-visible
+            # warning so the operator knows aux task is about to fail.
+            # (#26882) The error itself is re-raised below.
+            logger.warning(
+                "Auxiliary %s: %s on %s and all fallbacks exhausted "
+                "(fallback_chain + main agent model). Raising original error.",
+                task or "call", reason, resolved_provider,
+            )
        # Connection/timeout errors leave the cached client poisoned (closed
        # httpx transport, half-read stream, dead async loop).  Drop it from
        # the cache regardless of whether we found a fallback above so the
@ -4851,8 +5042,12 @@ async def async_call_llm(
            or _is_connection_error(first_err)
            or _is_rate_limit_error(first_err)
        )
+        # Capacity errors (payment/quota/connection) bypass the explicit-provider
+        # gate — the provider cannot serve the request regardless of user intent.
+        # See #26803: daily token quota must fall back like a 402 credit error.
        is_auto = resolved_provider in {"auto", "", None}
-        if should_fallback and is_auto:
+        is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
+        if should_fallback and (is_auto or is_capacity_error):
            if _is_payment_error(first_err):
                reason = "payment error"
                _mark_provider_unhealthy(
@ -4864,8 +5059,23 @@ async def async_call_llm(
                reason = "connection error"
            logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
                        task or "call", reason, resolved_provider, first_err)
-            fb_client, fb_model, fb_label = _try_payment_fallback(
-                resolved_provider, task, reason=reason)
+
+            # Fallback order (#26882, #26803):
+            #   1. User-configured fallback_chain (per-task) if set
+            #   2. Main agent model (last-resort safety net)
+            # Auto users get the full auto-detection chain instead — its
+            # Step 1 IS the main agent model.
+            fb_client, fb_model, fb_label = (None, None, "")
+            if is_auto:
+                fb_client, fb_model, fb_label = _try_payment_fallback(
+                    resolved_provider, task, reason=reason)
+            else:
+                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
+                    task, resolved_provider or "auto", reason=reason)
+                if fb_client is None:
+                    fb_client, fb_model, fb_label = _try_main_agent_model_fallback(
+                        resolved_provider, task, reason=reason)
+
            if fb_client is not None:
                fb_kwargs = _build_call_kwargs(
                    fb_label, fb_model, messages,
@ -4881,6 +5091,12 @@ async def async_call_llm(
                    fb_kwargs["model"] = async_fb_model
                return _validate_llm_response(
                    await async_fb.chat.completions.create(**fb_kwargs), task)
+            # All fallback layers exhausted — warn before re-raising. (#26882)
+            logger.warning(
+                "Auxiliary %s (async): %s on %s and all fallbacks exhausted "
+                "(fallback_chain + main agent model). Raising original error.",
+                task or "call", reason, resolved_provider,
+            )
        # Mirror the sync path: drop poisoned clients on connection/timeout
        # so the next aux call rebuilds.  See issue #23432.
        if _is_connection_error(first_err):
--- a/agent/background_review.py
+++ b/agent/background_review.py
@ -0,0 +1,570 @@
+"""Background memory/skill review — fork the agent to evaluate the turn.
+
+After every turn, ``AIAgent.run_conversation`` may call
+:func:`spawn_background_review` to fire off a daemon thread that replays
+the conversation snapshot in a forked :class:`AIAgent` and asks itself
+"should any skill/memory be saved or updated?".  Writes go straight to
+the memory + skill stores.  Main conversation and prompt cache are never
+touched.
+
+The fork inherits the parent's live runtime (provider, model, base_url,
+credentials, cached system prompt) so it hits the same prefix cache and
+uses the same auth.  It runs with a tool whitelist limited to memory and
+skill management tools; everything else is denied at runtime.
+
+See the ``hermes-agent-dev`` skill (``references/self-improvement-loop.md``)
+for invariants and PR review criteria.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import json
+import logging
+import os
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# Review-prompt strings — used by ``spawn_background_review_thread`` to build
+# the user-message that the forked review agent receives.  AIAgent exposes
+# them as class attributes (``_MEMORY_REVIEW_PROMPT`` etc.) for back-compat;
+# the actual text lives here so future edits are one-place.
+_MEMORY_REVIEW_PROMPT = (
+    "Review the conversation above and consider saving to memory if appropriate.\n\n"
+    "Focus on:\n"
+    "1. Has the user revealed things about themselves — their persona, desires, "
+    "preferences, or personal details worth remembering?\n"
+    "2. Has the user expressed expectations about how you should behave, their work "
+    "style, or ways they want you to operate?\n\n"
+    "If something stands out, save it using the memory tool. "
+    "If nothing is worth saving, just say 'Nothing to save.' and stop."
+)
+
+_SKILL_REVIEW_PROMPT = (
+    "Review the conversation above and update the skill library. Be "
+    "ACTIVE — most sessions produce at least one skill update, even if "
+    "small. A pass that does nothing is a missed learning opportunity, "
+    "not a neutral outcome.\n\n"
+    "Target shape of the library: CLASS-LEVEL skills, each with a rich "
+    "SKILL.md and a `references/` directory for session-specific detail. "
+    "Not a long flat list of narrow one-session-one-skill entries. This "
+    "shapes HOW you update, not WHETHER you update.\n\n"
+    "Signals to look for (any one of these warrants action):\n"
+    "  • User corrected your style, tone, format, legibility, or "
+    "verbosity. Frustration signals like 'stop doing X', 'this is too "
+    "verbose', 'don't format like this', 'why are you explaining', "
+    "'just give me the answer', 'you always do Y and I hate it', or an "
+    "explicit 'remember this' are FIRST-CLASS skill signals, not just "
+    "memory signals. Update the relevant skill(s) to embed the "
+    "preference so the next session starts already knowing.\n"
+    "  • User corrected your workflow, approach, or sequence of steps. "
+    "Encode the correction as a pitfall or explicit step in the skill "
+    "that governs that class of task.\n"
+    "  • Non-trivial technique, fix, workaround, debugging path, or "
+    "tool-usage pattern emerged that a future session would benefit "
+    "from. Capture it.\n"
+    "  • A skill that got loaded or consulted this session turned out "
+    "to be wrong, missing a step, or outdated. Patch it NOW.\n\n"
+    "Preference order — prefer the earliest action that fits, but do "
+    "pick one when a signal above fired:\n"
+    "  1. UPDATE A CURRENTLY-LOADED SKILL. Look back through the "
+    "conversation for skills the user loaded via /skill-name or you "
+    "read via skill_view. If any of them covers the territory of the "
+    "new learning, PATCH that one first. It is the skill that was in "
+    "play, so it's the right one to extend.\n"
+    "  2. UPDATE AN EXISTING UMBRELLA (via skills_list + skill_view). "
+    "If no loaded skill fits but an existing class-level skill does, "
+    "patch it. Add a subsection, a pitfall, or broaden a trigger.\n"
+    "  3. ADD A SUPPORT FILE under an existing umbrella. Skills can be "
+    "packaged with three kinds of support files — use the right "
+    "directory per kind:\n"
+    "     • `references/<topic>.md` — session-specific detail (error "
+    "transcripts, reproduction recipes, provider quirks) AND "
+    "condensed knowledge banks: quoted research, API docs, external "
+    "authoritative excerpts, or domain notes you found while working "
+    "on the problem. Write it concise and for the value of the task, "
+    "not as a full mirror of upstream docs.\n"
+    "     • `templates/<name>.<ext>` — starter files meant to be "
+    "copied and modified (boilerplate configs, scaffolding, a "
+    "known-good example the agent can `reproduce with modifications`).\n"
+    "     • `scripts/<name>.<ext>` — statically re-runnable actions "
+    "the skill can invoke directly (verification scripts, fixture "
+    "generators, deterministic probes, anything the agent should run "
+    "rather than hand-type each time).\n"
+    "     Add support files via skill_manage action=write_file with "
+    "file_path starting 'references/', 'templates/', or 'scripts/'. "
+    "The umbrella's SKILL.md should gain a one-line pointer to any "
+    "new support file so future agents know it exists.\n"
+    "  4. CREATE A NEW CLASS-LEVEL UMBRELLA SKILL when no existing "
+    "skill covers the class. The name MUST be at the class level. "
+    "The name MUST NOT be a specific PR number, error string, feature "
+    "codename, library-alone name, or 'fix-X / debug-Y / audit-Z-today' "
+    "session artifact. If the proposed name only makes sense for "
+    "today's task, it's wrong — fall back to (1), (2), or (3).\n\n"
+    "User-preference embedding (important): when the user expressed a "
+    "style/format/workflow preference, the update belongs in the "
+    "SKILL.md body, not just in memory. Memory captures 'who the user "
+    "is and what the current situation and state of your operations "
+    "are'; skills capture 'how to do this class of task for this "
+    "user'. When they complain about how you handled a task, the "
+    "skill that governs that task needs to carry the lesson.\n\n"
+    "If you notice two existing skills that overlap, note it in your "
+    "reply — the background curator handles consolidation at scale.\n\n"
+    "Do NOT capture (these become persistent self-imposed constraints "
+    "that bite you later when the environment changes):\n"
+    "  • Environment-dependent failures: missing binaries, fresh-install "
+    "errors, post-migration path mismatches, 'command not found', "
+    "unconfigured credentials, uninstalled packages. The user can fix "
+    "these — they are not durable rules.\n"
+    "  • Negative claims about tools or features ('browser tools do not "
+    "work', 'X tool is broken', 'cannot use Y from execute_code'). These "
+    "harden into refusals the agent cites against itself for months "
+    "after the actual problem was fixed.\n"
+    "  • Session-specific transient errors that resolved before the "
+    "conversation ended. If retrying worked, the lesson is the retry "
+    "pattern, not the original failure.\n"
+    "  • One-off task narratives. A user asking 'summarize today's "
+    "market' or 'analyze this PR' is not a class of work that warrants "
+    "a skill.\n\n"
+    "If a tool failed because of setup state, capture the FIX (install "
+    "command, config step, env var to set) under an existing setup or "
+    "troubleshooting skill — never 'this tool does not work' as a "
+    "standalone constraint.\n\n"
+    "'Nothing to save.' is a real option but should NOT be the "
+    "default. If the session ran smoothly with no corrections and "
+    "produced no new technique, just say 'Nothing to save.' and stop. "
+    "Otherwise, act."
+)
+
+_COMBINED_REVIEW_PROMPT = (
+    "Review the conversation above and update two things:\n\n"
+    "**Memory**: who the user is. Did the user reveal persona, "
+    "desires, preferences, personal details, or expectations about "
+    "how you should behave? Save facts about the user and durable "
+    "preferences with the memory tool.\n\n"
+    "**Skills**: how to do this class of task. Be ACTIVE — most "
+    "sessions produce at least one skill update. A pass that does "
+    "nothing is a missed learning opportunity, not a neutral outcome.\n\n"
+    "Target shape of the skill library: CLASS-LEVEL skills with a rich "
+    "SKILL.md and a `references/` directory for session-specific detail. "
+    "Not a long flat list of narrow one-session-one-skill entries.\n\n"
+    "Signals that warrant a skill update (any one is enough):\n"
+    "  • User corrected your style, tone, format, legibility, "
+    "verbosity, or approach. Frustration is a FIRST-CLASS skill "
+    "signal, not just a memory signal. 'stop doing X', 'don't format "
+    "like this', 'I hate when you Y' — embed the lesson in the skill "
+    "that governs that task so the next session starts fixed.\n"
+    "  • Non-trivial technique, fix, workaround, or debugging path "
+    "emerged.\n"
+    "  • A skill that was loaded or consulted turned out wrong, "
+    "missing, or outdated — patch it now.\n\n"
+    "Preference order for skills — pick the earliest that fits:\n"
+    "  1. UPDATE A CURRENTLY-LOADED SKILL. Check what skills were "
+    "loaded via /skill-name or skill_view in the conversation. If one "
+    "of them covers the learning, PATCH it first. It was in play; "
+    "it's the right place.\n"
+    "  2. UPDATE AN EXISTING UMBRELLA (skills_list + skill_view to "
+    "find the right one). Patch it.\n"
+    "  3. ADD A SUPPORT FILE under an existing umbrella via "
+    "skill_manage action=write_file. Three kinds: "
+    "`references/<topic>.md` for session-specific detail OR condensed "
+    "knowledge banks (quoted research, API docs excerpts, domain "
+    "notes) written concise and task-focused; `templates/<name>.<ext>` "
+    "for starter files meant to be copied and modified; "
+    "`scripts/<name>.<ext>` for statically re-runnable actions "
+    "(verification, fixture generators, probes). Add a one-line "
+    "pointer in SKILL.md so future agents find them.\n"
+    "  4. CREATE A NEW CLASS-LEVEL UMBRELLA when nothing exists. "
+    "Name at the class level — NOT a PR number, error string, "
+    "codename, library-alone name, or 'fix-X / debug-Y' session "
+    "artifact. If the name only fits today's task, fall back to (1), "
+    "(2), or (3).\n\n"
+    "User-preference embedding: when the user complains about how "
+    "you handled a task, update the skill that governs that task — "
+    "memory alone isn't enough. Memory says 'who the user is and "
+    "what the current situation and state of your operations are'; "
+    "skills say 'how to do this class of task for this user'. Both "
+    "should carry user-preference lessons when relevant.\n\n"
+    "If you notice overlapping existing skills, mention it — the "
+    "background curator handles consolidation.\n\n"
+    "Do NOT capture as skills (these become persistent self-imposed "
+    "constraints that bite you later when the environment changes):\n"
+    "  • Environment-dependent failures: missing binaries, fresh-install "
+    "errors, post-migration path mismatches, 'command not found', "
+    "unconfigured credentials, uninstalled packages. The user can fix "
+    "these — they are not durable rules.\n"
+    "  • Negative claims about tools or features ('browser tools do not "
+    "work', 'X tool is broken', 'cannot use Y from execute_code'). These "
+    "harden into refusals the agent cites against itself for months "
+    "after the actual problem was fixed.\n"
+    "  • Session-specific transient errors that resolved before the "
+    "conversation ended. If retrying worked, the lesson is the retry "
+    "pattern, not the original failure.\n"
+    "  • One-off task narratives. A user asking 'summarize today's "
+    "market' or 'analyze this PR' is not a class of work that warrants "
+    "a skill.\n\n"
+    "If a tool failed because of setup state, capture the FIX (install "
+    "command, config step, env var to set) under an existing setup or "
+    "troubleshooting skill — never 'this tool does not work' as a "
+    "standalone constraint.\n\n"
+    "Act on whichever of the two dimensions has real signal. If "
+    "genuinely nothing stands out on either, say 'Nothing to save.' "
+    "and stop — but don't reach for that conclusion as a default."
+)
+
+
+
+def summarize_background_review_actions(
+    review_messages: List[Dict],
+    prior_snapshot: List[Dict],
+) -> List[str]:
+    """Build the human-facing action summary for a background review pass.
+
+    Walks the review agent's session messages and collects "successful tool
+    action" descriptions to surface to the user (e.g. "Memory updated").
+    Tool messages already present in ``prior_snapshot`` are skipped so we
+    don't re-surface stale results from the prior conversation that the
+    review agent inherited via ``conversation_history`` (issue #14944).
+
+    Matching is by ``tool_call_id`` when available, with a content-equality
+    fallback for tool messages that lack one.
+    """
+    existing_tool_call_ids = set()
+    existing_tool_contents = set()
+    for prior in prior_snapshot or []:
+        if not isinstance(prior, dict) or prior.get("role") != "tool":
+            continue
+        tcid = prior.get("tool_call_id")
+        if tcid:
+            existing_tool_call_ids.add(tcid)
+        else:
+            content = prior.get("content")
+            if isinstance(content, str):
+                existing_tool_contents.add(content)
+
+    actions: List[str] = []
+    for msg in review_messages or []:
+        if not isinstance(msg, dict) or msg.get("role") != "tool":
+            continue
+        tcid = msg.get("tool_call_id")
+        if tcid and tcid in existing_tool_call_ids:
+            continue
+        if not tcid:
+            content_str = msg.get("content")
+            if isinstance(content_str, str) and content_str in existing_tool_contents:
+                continue
+        try:
+            data = json.loads(msg.get("content", "{}"))
+        except (json.JSONDecodeError, TypeError):
+            continue
+        if not isinstance(data, dict) or not data.get("success"):
+            continue
+        message = data.get("message", "")
+        target = data.get("target", "")
+        if "created" in message.lower():
+            actions.append(message)
+        elif "updated" in message.lower():
+            actions.append(message)
+        elif "added" in message.lower() or (target and "add" in message.lower()):
+            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+            actions.append(f"{label} updated")
+        elif "Entry added" in message:
+            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+            actions.append(f"{label} updated")
+        elif "removed" in message.lower() or "replaced" in message.lower():
+            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+            actions.append(f"{label} updated")
+    return actions
+
+
+def build_memory_write_metadata(
+    agent: Any,
+    *,
+    write_origin: Optional[str] = None,
+    execution_context: Optional[str] = None,
+    task_id: Optional[str] = None,
+    tool_call_id: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Build provenance metadata for external memory-provider mirrors."""
+    metadata: Dict[str, Any] = {
+        "write_origin": write_origin or getattr(agent, "_memory_write_origin", "assistant_tool"),
+        "execution_context": (
+            execution_context
+            or getattr(agent, "_memory_write_context", "foreground")
+        ),
+        "session_id": agent.session_id or "",
+        "parent_session_id": agent._parent_session_id or "",
+        "platform": agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
+        "tool_name": "memory",
+    }
+    if task_id:
+        metadata["task_id"] = task_id
+    if tool_call_id:
+        metadata["tool_call_id"] = tool_call_id
+    return {k: v for k, v in metadata.items() if v not in {None, ""}}
+
+
+def _run_review_in_thread(
+    agent: Any,
+    messages_snapshot: List[Dict],
+    prompt: str,
+) -> None:
+    """Worker function executed in the background-review daemon thread.
+
+    Spawns a forked ``AIAgent`` inheriting the parent's runtime, runs the
+    review prompt, and surfaces a compact action summary back to the user
+    via ``agent._safe_print`` and ``agent.background_review_callback``.
+    """
+    # Local import to avoid a hard circular dep at module load.
+    from run_agent import AIAgent
+    from tools.terminal_tool import set_approval_callback as _set_approval_callback
+
+    # Install a non-interactive approval callback on this worker
+    # thread so any dangerous-command guard the review agent trips
+    # resolves to "deny" instead of falling back to input() -- which
+    # deadlocks against the parent's prompt_toolkit TUI (#15216).
+    # Same pattern as _subagent_auto_deny in tools/delegate_tool.py.
+    def _bg_review_auto_deny(command, description, **kwargs):
+        logger.warning(
+            "Background review auto-denied dangerous command: %s (%s)",
+            command, description,
+        )
+        return "deny"
+    try:
+        _set_approval_callback(_bg_review_auto_deny)
+    except Exception:
+        pass
+
+    review_agent = None
+    review_messages: List[Dict] = []
+    try:
+        with open(os.devnull, "w", encoding="utf-8") as _devnull, \
+             contextlib.redirect_stdout(_devnull), \
+             contextlib.redirect_stderr(_devnull):
+            # Inherit the parent agent's live runtime (provider, model,
+            # base_url, api_key, api_mode) so the fork uses the exact
+            # same credentials the main turn is using.  Without this,
+            # AIAgent.__init__ re-runs auto-resolution from env vars,
+            # which fails for OAuth-only providers, session-scoped
+            # creds, or credential-pool setups where the resolver can't
+            # reconstruct auth from scratch -- producing the spurious
+            # "No LLM provider configured" warning at end of turn.
+            _parent_runtime = agent._current_main_runtime()
+            _parent_api_mode = _parent_runtime.get("api_mode") or None
+            # The review fork needs to call agent-loop tools (memory,
+            # skill_manage). Those tools require Hermes' own dispatch,
+            # which the codex_app_server runtime bypasses entirely
+            # (it runs the turn inside codex's subprocess). So when
+            # the parent is on codex_app_server, downgrade the review
+            # fork to codex_responses — same auth/credentials, but
+            # talks to the OpenAI Responses API directly so Hermes
+            # owns the loop and the agent-loop tools dispatch.
+            if _parent_api_mode == "codex_app_server":
+                _parent_api_mode = "codex_responses"
+            # skip_memory=True keeps the review fork from
+            # touching external memory plugins (honcho, mem0,
+            # supermemory, etc.).  Without it, the fork's
+            # __init__ rebuilds its own _memory_manager from
+            # config, scoped to the parent's session_id, and
+            # run_conversation() then leaks the harness prompt
+            # into the user's real memory namespace via three
+            # ingestion sites: on_turn_start (cadence + turn
+            # message), prefetch_all (recall query), and
+            # sync_all (harness prompt + review output recorded
+            # as a (user, assistant) turn pair).  Built-in
+            # MEMORY.md / USER.md state is re-bound from the
+            # parent below so memory(action="add") writes from
+            # the review still land on disk; the review just
+            # has zero side effects on external providers.
+            review_agent = AIAgent(
+                model=agent.model,
+                max_iterations=16,
+                quiet_mode=True,
+                platform=agent.platform,
+                provider=agent.provider,
+                api_mode=_parent_api_mode,
+                base_url=_parent_runtime.get("base_url") or None,
+                api_key=_parent_runtime.get("api_key") or None,
+                credential_pool=getattr(agent, "_credential_pool", None),
+                parent_session_id=agent.session_id,
+                skip_memory=True,
+            )
+            review_agent._memory_write_origin = "background_review"
+            review_agent._memory_write_context = "background_review"
+            review_agent._memory_store = agent._memory_store
+            review_agent._memory_enabled = agent._memory_enabled
+            review_agent._user_profile_enabled = agent._user_profile_enabled
+            review_agent._memory_nudge_interval = 0
+            review_agent._skill_nudge_interval = 0
+            # Suppress all status/warning emits from the fork so the
+            # user only sees the final successful-action summary.
+            # Without this, mid-review "Iteration budget exhausted",
+            # rate-limit retries, compression warnings, and other
+            # lifecycle messages bubble up through _emit_status ->
+            # _vprint and leak past the stdout redirect (they go via
+            # _print_fn/status_callback, which bypass sys.stdout).
+            review_agent.suppress_status_output = True
+            # Inherit the parent's cached system prompt verbatim so
+            # the review fork's outbound HTTP request hits the same
+            # Anthropic/OpenRouter prefix cache the parent warmed.
+            # Without this, the fork rebuilds the system prompt from
+            # scratch (fresh _hermes_now() timestamp, fresh
+            # session_id, narrower toolset → different skills_prompt)
+            # and the byte-exact prefix-cache key misses. See
+            # issue #25322 and PR #17276 for the full analysis +
+            # measured impact (~26% end-to-end cost reduction on
+            # Sonnet 4.5).
+            review_agent._cached_system_prompt = agent._cached_system_prompt
+            # Defensive: pin session_start + session_id to the
+            # parent's so any code path that re-renders parts of
+            # the system prompt (compression, plugin hooks) still
+            # produces byte-identical output. The cached-prompt
+            # assignment above already short-circuits the normal
+            # rebuild path, but these pins guarantee parity even
+            # if a future code path bypasses the cache.
+            review_agent.session_start = agent.session_start
+            review_agent.session_id = agent.session_id
+
+            from model_tools import get_tool_definitions
+            from hermes_cli.plugins import (
+                set_thread_tool_whitelist,
+                clear_thread_tool_whitelist,
+            )
+
+            review_whitelist = {
+                t["function"]["name"]
+                for t in get_tool_definitions(
+                    enabled_toolsets=["memory", "skills"],
+                    quiet_mode=True,
+                )
+            }
+            set_thread_tool_whitelist(
+                review_whitelist,
+                deny_msg_fmt=(
+                    "Background review denied non-whitelisted tool: "
+                    "{tool_name}. Only memory/skill tools are allowed."
+                ),
+            )
+            try:
+                review_agent.run_conversation(
+                    user_message=(
+                        prompt
+                        + "\n\nYou can only call memory and skill "
+                        "management tools. Other tools will be denied "
+                        "at runtime — do not attempt them."
+                    ),
+                    conversation_history=messages_snapshot,
+                )
+            finally:
+                clear_thread_tool_whitelist()
+
+            # Tear down memory providers while stdout is still
+            # redirected so background thread teardown (Honcho flush,
+            # Hindsight sync, etc.) stays silent.  The finally block
+            # below is a safety net for the exception path.
+            try:
+                review_agent.shutdown_memory_provider()
+            except Exception:
+                pass
+            try:
+                review_agent.close()
+            except Exception:
+                pass
+            review_messages = list(getattr(review_agent, "_session_messages", []))
+            review_agent = None
+
+        # Scan the review agent's messages for successful tool actions
+        # and surface a compact summary to the user. Tool messages
+        # already present in messages_snapshot must be skipped, since
+        # the review agent inherits that history and would otherwise
+        # re-surface stale "created"/"updated" messages from the prior
+        # conversation as if they just happened (issue #14944).
+        actions = summarize_background_review_actions(
+            review_messages,
+            messages_snapshot,
+        )
+
+        if actions:
+            summary = " · ".join(dict.fromkeys(actions))
+            agent._safe_print(
+                f"  💾 Self-improvement review: {summary}"
+            )
+            _bg_cb = agent.background_review_callback
+            if _bg_cb:
+                try:
+                    _bg_cb(
+                        f"💾 Self-improvement review: {summary}"
+                    )
+                except Exception:
+                    pass
+
+    except Exception as e:
+        logger.warning("Background memory/skill review failed: %s", e)
+        agent._emit_auxiliary_failure("background review", e)
+    finally:
+        # Safety-net cleanup for the exception path.  Normal
+        # completion already shut down inside redirect_stdout above.
+        # Re-open devnull here so any teardown output (Honcho flush,
+        # Hindsight sync, background thread joins) stays silent even
+        # on the exception path where redirect_stdout already exited.
+        if review_agent is not None:
+            try:
+                with open(os.devnull, "w", encoding="utf-8") as _fn, \
+                     contextlib.redirect_stdout(_fn), \
+                     contextlib.redirect_stderr(_fn):
+                    try:
+                        review_agent.shutdown_memory_provider()
+                    except Exception:
+                        pass
+                    try:
+                        review_agent.close()
+                    except Exception:
+                        pass
+            except Exception:
+                pass
+        # Clear the approval callback on this bg-review thread so a
+        # recycled thread-id doesn't inherit a stale reference.
+        try:
+            _set_approval_callback(None)
+        except Exception:
+            pass
+
+
+def spawn_background_review_thread(
+    agent: Any,
+    messages_snapshot: List[Dict],
+    review_memory: bool = False,
+    review_skills: bool = False,
+):
+    """Build the review thread target and prompt for a background review.
+
+    Returns a ``(target, prompt)`` tuple.  The caller (``AIAgent._spawn_background_review``)
+    owns the actual ``threading.Thread`` construction so test-level patches
+    of ``run_agent.threading.Thread`` keep working.
+    """
+    # Pick the right prompt based on which triggers fired.  Allow per-agent
+    # override (the prompts moved to module-level constants but old code paths
+    # that set agent._MEMORY_REVIEW_PROMPT etc. directly keep working).
+    if review_memory and review_skills:
+        prompt = getattr(agent, "_COMBINED_REVIEW_PROMPT", _COMBINED_REVIEW_PROMPT)
+    elif review_memory:
+        prompt = getattr(agent, "_MEMORY_REVIEW_PROMPT", _MEMORY_REVIEW_PROMPT)
+    else:
+        prompt = getattr(agent, "_SKILL_REVIEW_PROMPT", _SKILL_REVIEW_PROMPT)
+
+    def _target() -> None:
+        _run_review_in_thread(agent, messages_snapshot, prompt)
+
+    return _target, prompt
+
+
+__all__ = [
+    "_MEMORY_REVIEW_PROMPT",
+    "_SKILL_REVIEW_PROMPT",
+    "_COMBINED_REVIEW_PROMPT",
+    "spawn_background_review_thread",
+    "summarize_background_review_actions",
+    "build_memory_write_metadata",
+]
--- a/agent/bedrock_adapter.py
+++ b/agent/bedrock_adapter.py
@ -36,6 +36,19 @@ from typing import Any, Dict, List, Optional, Tuple

 logger = logging.getLogger(__name__)

+# ---------------------------------------------------------------------------
+# Ensure boto3/botocore are installed before any code in this module runs.
+# Upstream removed boto3 from [all] extras (PRs #24220, #24515); lazy_deps
+# handles on-demand installation so the Bedrock provider still works in the
+# EKS deployment without baking boto3 into the base image.
+# ---------------------------------------------------------------------------
+try:
+    from tools.lazy_deps import ensure
+    ensure("provider.bedrock", prompt=False)
+except Exception:
+    pass  # lazy_deps unavailable or install failed — let downstream imports surface the real error
+
+
 # ---------------------------------------------------------------------------
 # Lazy boto3 import — only loaded when the Bedrock provider is actually used.
 # This keeps startup fast for users who don't use Bedrock.
--- a/agent/browser_provider.py
+++ b/agent/browser_provider.py
@ -0,0 +1,175 @@
+"""
+Browser Provider ABC
+====================
+
+Defines the pluggable-backend interface for cloud browser providers
+(Browserbase, Browser Use, Firecrawl, …). Providers register instances via
+:meth:`PluginContext.register_browser_provider`; the active one (selected via
+``browser.cloud_provider`` in ``config.yaml``) services every cloud-mode
+``browser_*`` tool call.
+
+Providers live in ``<repo>/plugins/browser/<name>/`` (built-in, auto-loaded as
+``kind: backend``) or ``~/.hermes/plugins/browser/<name>/`` (user, opt-in via
+``plugins.enabled``).
+
+This ABC mirrors :class:`agent.web_search_provider.WebSearchProvider` (PR
+#25182) — same shape, same registration flow, same picker integration. The
+legacy in-tree ``tools.browser_providers.base.CloudBrowserProvider`` ABC was
+deleted in PR #25214 (this work) along with the per-vendor inline modules in
+``tools/browser_providers/``; the lifecycle contract documented below is
+preserved bit-for-bit so the tool wrapper (:mod:`tools.browser_tool`) does
+not have to translate.
+
+Session metadata contract (preserved from the legacy ``CloudBrowserProvider``)::
+
+    {
+        "session_name": str,        # unique name for agent-browser --session
+        "bb_session_id": str,       # provider session ID (for close/cleanup)
+        "cdp_url": str,             # CDP websocket URL
+        "features": dict,           # feature flags that were enabled
+        "external_call_id": str,    # optional, managed-gateway billing key
+    }
+
+``bb_session_id`` is a legacy key name kept verbatim for backward compat with
+:mod:`tools.browser_tool` — it holds the provider's session ID regardless of
+which provider is in use.
+"""
+
+from __future__ import annotations
+
+import abc
+from typing import Any, Dict
+
+
+# ---------------------------------------------------------------------------
+# ABC
+# ---------------------------------------------------------------------------
+
+
+class BrowserProvider(abc.ABC):
+    """Abstract base class for a cloud browser backend.
+
+    Subclasses must implement :meth:`name`, :meth:`is_available`, and the
+    three lifecycle methods: :meth:`create_session`, :meth:`close_session`,
+    :meth:`emergency_cleanup`.
+
+    The lifecycle shape preserves the legacy ``CloudBrowserProvider`` contract
+    bit-for-bit so the dispatcher in :mod:`tools.browser_tool` is a pure
+    registry lookup — no per-provider conditionals, no shape translation.
+    """
+
+    @property
+    @abc.abstractmethod
+    def name(self) -> str:
+        """Stable short identifier used in the ``browser.cloud_provider``
+        config key.
+
+        Lowercase, hyphens permitted to preserve existing user-visible names.
+        Examples: ``browserbase``, ``browser-use``, ``firecrawl``.
+        """
+
+    @property
+    def display_name(self) -> str:
+        """Human-readable label shown in ``hermes tools``. Defaults to ``name``."""
+        return self.name
+
+    @abc.abstractmethod
+    def is_available(self) -> bool:
+        """Return True when this provider can service calls.
+
+        Typically a cheap check (env var present, managed-gateway token
+        readable, optional Python dep importable). Must NOT make network
+        calls — this runs at tool-registration time and on every
+        ``hermes tools`` paint.
+
+        Mirrors the legacy ``CloudBrowserProvider.is_configured()`` method;
+        renamed for parity with :class:`agent.web_search_provider.WebSearchProvider`.
+        """
+
+    @abc.abstractmethod
+    def create_session(self, task_id: str) -> Dict[str, object]:
+        """Create a cloud browser session and return session metadata.
+
+        Must return a dict with at least::
+
+            {
+                "session_name": str,    # unique name for agent-browser --session
+                "bb_session_id": str,   # provider session ID (for close/cleanup)
+                "cdp_url": str,         # CDP websocket URL
+                "features": dict,       # feature flags that were enabled
+            }
+
+        ``bb_session_id`` is a legacy key name kept for backward compat with
+        the rest of :mod:`tools.browser_tool` — it holds the provider's
+        session ID regardless of which provider is in use.
+
+        May raise ``ValueError`` (missing credentials) or ``RuntimeError``
+        (network / API failure); the dispatcher surfaces these to the user.
+        """
+
+    @abc.abstractmethod
+    def close_session(self, session_id: str) -> bool:
+        """Release / terminate a cloud session by its provider session ID.
+
+        Returns True on success, False on failure. Should not raise — log and
+        return False on any exception so the dispatcher's cleanup loop keeps
+        moving across sessions.
+        """
+
+    @abc.abstractmethod
+    def emergency_cleanup(self, session_id: str) -> None:
+        """Best-effort session teardown during process exit.
+
+        Called from atexit / signal handlers. Must tolerate missing
+        credentials, network errors, etc. — log and move on. Must not raise.
+        """
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        """Return provider metadata for the ``hermes tools`` picker.
+
+        Used by :mod:`hermes_cli.tools_config` to inject this provider as a
+        row in the Browser Automation picker. Shape mirrors the existing
+        hardcoded entries in ``TOOL_CATEGORIES["browser"]``::
+
+            {
+                "name": "Browserbase",
+                "badge": "paid",
+                "tag": "Cloud browser with stealth and proxies",
+                "env_vars": [
+                    {"key": "BROWSERBASE_API_KEY",
+                     "prompt": "Browserbase API key",
+                     "url": "https://browserbase.com"},
+                ],
+                "post_setup": "agent_browser",
+            }
+
+        Default: minimal entry derived from :attr:`display_name`. Override to
+        expose API key prompts, badges, managed-Nous gating, and the
+        ``post_setup`` install hook.
+        """
+        return {
+            "name": self.display_name,
+            "badge": "",
+            "tag": "",
+            "env_vars": [],
+        }
+
+    # ------------------------------------------------------------------
+    # Backward-compat shims for the legacy CloudBrowserProvider API
+    # ------------------------------------------------------------------
+    #
+    # The pre-PR-#25214 ABC exposed ``is_configured()`` and ``provider_name()``;
+    # ``tools.browser_tool`` has ~6 callers that still use those names. Rather
+    # than churn every callsite (and break out-of-tree downstream code that
+    # subclassed CloudBrowserProvider), we expose the old names as thin
+    # delegations to the new API. Subclasses MUST implement :meth:`is_available`
+    # and :attr:`name`; they may override ``is_configured`` / ``provider_name``
+    # for compatibility with the legacy ABC but it is not required.
+
+    def is_configured(self) -> bool:
+        """Backward-compat alias for :meth:`is_available`."""
+        return self.is_available()
+
+    def provider_name(self) -> str:
+        """Backward-compat alias returning :attr:`display_name`."""
+        return self.display_name
--- a/agent/browser_registry.py
+++ b/agent/browser_registry.py
@ -0,0 +1,223 @@
+"""
+Browser Provider Registry
+=========================
+
+Central map of registered cloud browser providers. Populated by plugins at
+import-time via :meth:`PluginContext.register_browser_provider`; consumed by
+:func:`tools.browser_tool._get_cloud_provider` to route each cloud-mode
+``browser_*`` tool call to the active backend.
+
+Active selection
+----------------
+The active provider is chosen by configuration with this precedence:
+
+1. ``browser.cloud_provider`` in ``config.yaml`` (explicit override).
+2. Legacy preference order — ``browser-use`` → ``browserbase`` — filtered by
+   availability. Matches the historic auto-detect order in
+   :func:`tools.browser_tool._get_cloud_provider` (Browser Use checked first
+   because it covers both the managed Nous gateway and direct API key path;
+   Browserbase as the older direct-credentials fallback). ``firecrawl`` is
+   intentionally NOT in the legacy walk — users only get Firecrawl as a
+   cloud browser when they explicitly set ``browser.cloud_provider:
+   firecrawl``, matching pre-migration behaviour where Firecrawl was never
+   auto-selected.
+3. Otherwise ``None`` — the dispatcher falls back to local browser mode.
+
+The explicit-config branch (rule 1) intentionally ignores ``is_available()``
+so the dispatcher surfaces a typed "X_API_KEY is not set" error to the user
+instead of silently switching backends. Matches the legacy
+:func:`tools.browser_tool._get_cloud_provider` behaviour for configured names.
+
+Note: there is no "capability" split here (unlike the web subsystem, which
+has search/extract/crawl). Every browser provider implements the full
+:class:`agent.browser_provider.BrowserProvider` lifecycle; the registry's
+job is purely selection, not capability routing.
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from typing import Dict, List, Optional
+
+from agent.browser_provider import BrowserProvider
+
+logger = logging.getLogger(__name__)
+
+
+_providers: Dict[str, BrowserProvider] = {}
+_lock = threading.Lock()
+
+
+def register_provider(provider: BrowserProvider) -> None:
+    """Register a cloud browser provider.
+
+    Re-registration (same ``name``) overwrites the previous entry and logs
+    a debug message — makes hot-reload scenarios (tests, dev loops) behave
+    predictably.
+    """
+    if not isinstance(provider, BrowserProvider):
+        raise TypeError(
+            f"register_provider() expects a BrowserProvider instance, "
+            f"got {type(provider).__name__}"
+        )
+    name = provider.name
+    if not isinstance(name, str) or not name.strip():
+        raise ValueError("Browser provider .name must be a non-empty string")
+    with _lock:
+        existing = _providers.get(name)
+        _providers[name] = provider
+    if existing is not None:
+        logger.debug(
+            "Browser provider '%s' re-registered (was %r)",
+            name, type(existing).__name__,
+        )
+    else:
+        logger.debug(
+            "Registered browser provider '%s' (%s)",
+            name, type(provider).__name__,
+        )
+
+
+def list_providers() -> List[BrowserProvider]:
+    """Return all registered providers, sorted by name."""
+    with _lock:
+        items = list(_providers.values())
+    return sorted(items, key=lambda p: p.name)
+
+
+def get_provider(name: str) -> Optional[BrowserProvider]:
+    """Return the provider registered under *name*, or None."""
+    if not isinstance(name, str):
+        return None
+    with _lock:
+        return _providers.get(name.strip())
+
+
+# ---------------------------------------------------------------------------
+# Active-provider resolution
+# ---------------------------------------------------------------------------
+
+
+# Legacy auto-detect order — used when no ``browser.cloud_provider`` is set.
+# Matches the pre-migration walk in :func:`tools.browser_tool._get_cloud_provider`.
+# Firecrawl is intentionally absent so users with ``FIRECRAWL_API_KEY`` set
+# for web-extract don't get silently routed to a paid cloud browser. See
+# :func:`_resolve` for the full rationale.
+_LEGACY_PREFERENCE = (
+    "browser-use",
+    "browserbase",
+)
+
+
+def _resolve(configured: Optional[str]) -> Optional[BrowserProvider]:
+    """Resolve the active browser provider.
+
+    Resolution rules (in order):
+
+    1. **Explicit "local".** Returns None — the dispatcher disables cloud
+       mode entirely. Mirrors legacy short-circuit in
+       :func:`tools.browser_tool._get_cloud_provider`.
+    2. **Explicit config wins, ignoring availability.** If ``configured``
+       names a registered provider, return it even if its
+       :meth:`is_available` returns False — the dispatcher will surface a
+       precise "X_API_KEY is not set" error instead of silently routing
+       somewhere else.
+    3. **Legacy preference walk, filtered by availability.** Walk
+       :data:`_LEGACY_PREFERENCE` (``browser-use`` → ``browserbase``) looking
+       for a provider whose ``is_available()`` is True.
+
+    There is intentionally NO "single-eligible shortcut" rule here (unlike
+    :func:`agent.web_search_registry._resolve`). Pre-migration, the
+    auto-detect branch in ``tools.browser_tool._get_cloud_provider`` only
+    considered Browser Use and Browserbase; Firecrawl was reachable only
+    via an explicit ``browser.cloud_provider: firecrawl`` config key.
+    Preserving that gate matters because Firecrawl shares its API key with
+    the *web* extract plugin (``plugins/web/firecrawl/``), so users who set
+    ``FIRECRAWL_API_KEY`` for web extract must NOT get silently routed to a
+    paid cloud browser on a fresh install. Third-party browser-provider
+    plugins added under ``~/.hermes/plugins/browser/<vendor>/`` are subject
+    to the same gate — they must be explicitly configured to take effect.
+
+    Returns None when no provider is configured AND no available provider
+    matches the legacy preference; the dispatcher then falls back to local
+    browser mode.
+    """
+    with _lock:
+        snapshot = dict(_providers)
+
+    def _is_available_safe(p: BrowserProvider) -> bool:
+        """Wrap ``is_available()`` so a buggy provider doesn't kill resolution."""
+        try:
+            return bool(p.is_available())
+        except Exception as exc:  # noqa: BLE001
+            logger.warning(
+                "Browser provider %s.is_available() raised %s — treating as unavailable",
+                p.name, exc, exc_info=True,
+            )
+            return False
+
+    # 1. Explicit "local" short-circuit.
+    if configured == "local":
+        return None
+
+    # 2. Explicit config wins — return regardless of is_available() so the
+    #    user gets a precise downstream error message rather than a silent
+    #    backend switch. Matches _get_cloud_provider() in browser_tool.py.
+    if configured:
+        provider = snapshot.get(configured)
+        if provider is not None:
+            return provider
+        logger.debug(
+            "browser cloud_provider '%s' configured but not registered; "
+            "falling back to auto-detect",
+            configured,
+        )
+
+    # 3. Legacy preference walk — only providers in _LEGACY_PREFERENCE are
+    #    auto-eligible. Filtered by availability so we don't surface a
+    #    provider the user has no credentials for. See docstring for why
+    #    we do NOT fall back to "any single-eligible registered provider".
+    for legacy in _LEGACY_PREFERENCE:
+        provider = snapshot.get(legacy)
+        if provider is not None and _is_available_safe(provider):
+            return provider
+
+    return None
+
+
+def get_active_browser_provider() -> Optional[BrowserProvider]:
+    """Resolve the currently-active cloud browser provider.
+
+    Reads ``browser.cloud_provider`` from config.yaml; falls back per the
+    module docstring. Returns None for local mode or when no provider is
+    available.
+    """
+    try:
+        from hermes_cli.config import read_raw_config
+
+        cfg = read_raw_config()
+        browser_cfg = cfg.get("browser", {})
+    except Exception as exc:
+        logger.debug("Could not read browser config: %s", exc)
+        browser_cfg = {}
+
+    configured: Optional[str] = None
+    if isinstance(browser_cfg, dict) and "cloud_provider" in browser_cfg:
+        try:
+            from tools.tool_backend_helpers import normalize_browser_cloud_provider
+
+            configured = normalize_browser_cloud_provider(
+                browser_cfg.get("cloud_provider")
+            )
+        except Exception as exc:
+            logger.debug("normalize_browser_cloud_provider failed: %s", exc)
+            configured = None
+
+    return _resolve(configured)
+
+
+def _reset_for_tests() -> None:
+    """Clear the registry. **Test-only.**"""
+    with _lock:
+        _providers.clear()
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
--- a/agent/codex_runtime.py
+++ b/agent/codex_runtime.py
@ -0,0 +1,448 @@
+"""Codex API runtime — App Server and Responses-API streaming paths.
+
+Extracted from :class:`AIAgent` to keep the agent loop file focused.
+Each function takes the parent ``AIAgent`` as its first argument
+(``agent``).  AIAgent keeps thin forwarder methods for backward
+compatibility.
+
+* ``run_codex_app_server_turn`` — drives one turn through the
+  ``codex_app_server`` subprocess client (used when a Codex CLI install
+  is the active provider).
+* ``run_codex_stream`` — streams a Codex Responses API call (the
+  ``codex_responses`` api_mode).
+* ``run_codex_create_stream_fallback`` — recovery path when the
+  Responses ``stream=True`` initial create fails.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from types import SimpleNamespace
+from typing import Any, Dict, List
+
+logger = logging.getLogger(__name__)
+
+
+def run_codex_app_server_turn(
+    agent,
+    *,
+    user_message: str,
+    original_user_message: Any,
+    messages: List[Dict[str, Any]],
+    effective_task_id: str,
+    should_review_memory: bool = False,
+) -> Dict[str, Any]:
+    """Codex app-server runtime path. Hands the entire turn to a `codex
+    app-server` subprocess and projects its events back into Hermes'
+    messages list so memory/skill review keep working.
+
+    Called from run_conversation() when agent.api_mode == "codex_app_server".
+    Returns the same dict shape as the chat_completions path.
+    """
+    from agent.transports.codex_app_server_session import CodexAppServerSession
+
+    # Lazy session: one CodexAppServerSession per AIAgent instance.
+    # Spawned on first turn, reused across turns, closed at AIAgent
+    # shutdown (see _cleanup hook).
+    if not hasattr(agent, "_codex_session") or agent._codex_session is None:
+        cwd = getattr(agent, "session_cwd", None) or os.getcwd()
+        # Approval callback: defer to Hermes' standard prompt flow if a
+        # CLI thread has installed one. Gateway / cron contexts get the
+        # codex-side fail-closed default.
+        try:
+            from tools.terminal_tool import _get_approval_callback
+            approval_callback = _get_approval_callback()
+        except Exception:
+            approval_callback = None
+        agent._codex_session = CodexAppServerSession(
+            cwd=cwd,
+            approval_callback=approval_callback,
+        )
+
+    # NOTE: the user message is ALREADY appended to messages by the
+    # standard run_conversation() flow (line ~11823) before the early
+    # return reaches us. Do NOT append again — that would duplicate.
+
+    try:
+        turn = agent._codex_session.run_turn(user_input=user_message)
+    except Exception as exc:
+        logger.exception("codex app-server turn failed")
+        # Crash → unconditionally drop the session so the next turn
+        # respawns from scratch instead of reusing a dead client.
+        try:
+            agent._codex_session.close()
+        except Exception:
+            pass
+        agent._codex_session = None
+        return {
+            "final_response": (
+                f"Codex app-server turn failed: {exc}. "
+                f"Fall back to default runtime with `/codex-runtime auto`."
+            ),
+            "messages": messages,
+            "api_calls": 0,
+            "completed": False,
+            "partial": True,
+            "error": str(exc),
+        }
+
+    # If the turn signalled the underlying client is wedged (deadline
+    # blown, post-tool watchdog tripped, OAuth refresh died, subprocess
+    # exited), retire the session so the next turn respawns codex
+    # rather than riding the broken process. Mirrors openclaw beta.8's
+    # "retire timed-out app-server clients" fix.
+    if getattr(turn, "should_retire", False):
+        logger.warning(
+            "codex app-server session retired (turn error: %s)",
+            turn.error,
+        )
+        try:
+            agent._codex_session.close()
+        except Exception:
+            pass
+        agent._codex_session = None
+
+    # Splice projected messages into the conversation. The projector emits
+    # standard {role, content, tool_calls, tool_call_id} entries, which
+    # is exactly what curator.py / sessions DB expect.
+    if turn.projected_messages:
+        messages.extend(turn.projected_messages)
+
+    # Counter ticks for the agent-improvement loop.
+    # _turns_since_memory and _user_turn_count are ALREADY incremented
+    # in the run_conversation() pre-loop block (lines ~11793-11817) so we
+    # do NOT touch them here — that would double-count.
+    # Only _iters_since_skill needs explicit increment, since the
+    # chat_completions loop bumps it per tool iteration (line ~12110)
+    # and that loop is bypassed on this path.
+    agent._iters_since_skill = (
+        getattr(agent, "_iters_since_skill", 0) + turn.tool_iterations
+    )
+
+    # Now check the skill nudge AFTER iters were incremented — same
+    # pattern the chat_completions path uses (line ~15432).
+    should_review_skills = False
+    if (
+        agent._skill_nudge_interval > 0
+        and agent._iters_since_skill >= agent._skill_nudge_interval
+        and "skill_manage" in agent.valid_tool_names
+    ):
+        should_review_skills = True
+        agent._iters_since_skill = 0
+
+    # External memory provider sync (mirrors line ~15439). Skipped on
+    # interrupt/error to avoid feeding partial transcripts to memory.
+    if not turn.interrupted and turn.error is None:
+        try:
+            agent._sync_external_memory_for_turn(
+                original_user_message=original_user_message,
+                final_response=turn.final_text,
+                interrupted=False,
+            )
+        except Exception:
+            logger.debug("external memory sync raised", exc_info=True)
+
+    # Background review fork — same cadence + signature as the default
+    # path (line ~15449). Only fires when a trigger actually tripped AND
+    # we have a real final response.
+    if (
+        turn.final_text
+        and not turn.interrupted
+        and (should_review_memory or should_review_skills)
+    ):
+        try:
+            agent._spawn_background_review(
+                messages_snapshot=list(messages),
+                review_memory=should_review_memory,
+                review_skills=should_review_skills,
+            )
+        except Exception:
+            logger.debug("background review spawn raised", exc_info=True)
+
+    return {
+        "final_response": turn.final_text,
+        "messages": messages,
+        "api_calls": 1,  # one app-server "turn" maps to one logical API call
+        "completed": not turn.interrupted and turn.error is None,
+        "partial": turn.interrupted or turn.error is not None,
+        "error": turn.error,
+        "codex_thread_id": turn.thread_id,
+        "codex_turn_id": turn.turn_id,
+    }
+
+
+
+
+def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
+    """Execute one streaming Responses API request and return the final response."""
+    import httpx as _httpx
+
+    active_client = client or agent._ensure_primary_openai_client(reason="codex_stream_direct")
+    max_stream_retries = 1
+    has_tool_calls = False
+    first_delta_fired = False
+    # Accumulate streamed text so we can recover if get_final_response()
+    # returns empty output (e.g. chatgpt.com backend-api sends
+    # response.incomplete instead of response.completed).
+    agent._codex_streamed_text_parts: list = []
+    for attempt in range(max_stream_retries + 1):
+        if agent._interrupt_requested:
+            raise InterruptedError("Agent interrupted before Codex stream retry")
+        collected_output_items: list = []
+        try:
+            with active_client.responses.stream(**api_kwargs) as stream:
+                for event in stream:
+                    agent._touch_activity("receiving stream response")
+                    if agent._interrupt_requested:
+                        break
+                    event_type = getattr(event, "type", "")
+                    # Fire callbacks on text content deltas (suppress during tool calls)
+                    if "output_text.delta" in event_type or event_type == "response.output_text.delta":
+                        delta_text = getattr(event, "delta", "")
+                        if delta_text:
+                            agent._codex_streamed_text_parts.append(delta_text)
+                        if delta_text and not has_tool_calls:
+                            if not first_delta_fired:
+                                first_delta_fired = True
+                                if on_first_delta:
+                                    try:
+                                        on_first_delta()
+                                    except Exception:
+                                        pass
+                            agent._fire_stream_delta(delta_text)
+                    # Track tool calls to suppress text streaming
+                    elif "function_call" in event_type:
+                        has_tool_calls = True
+                    # Fire reasoning callbacks
+                    elif "reasoning" in event_type and "delta" in event_type:
+                        reasoning_text = getattr(event, "delta", "")
+                        if reasoning_text:
+                            agent._fire_reasoning_delta(reasoning_text)
+                    # Collect completed output items — some backends
+                    # (chatgpt.com/backend-api/codex) stream valid items
+                    # via response.output_item.done but the SDK's
+                    # get_final_response() returns an empty output list.
+                    elif event_type == "response.output_item.done":
+                        done_item = getattr(event, "item", None)
+                        if done_item is not None:
+                            collected_output_items.append(done_item)
+                    # Log non-completed terminal events for diagnostics
+                    elif event_type in {"response.incomplete", "response.failed"}:
+                        resp_obj = getattr(event, "response", None)
+                        status = getattr(resp_obj, "status", None) if resp_obj else None
+                        incomplete_details = getattr(resp_obj, "incomplete_details", None) if resp_obj else None
+                        logger.warning(
+                            "Codex Responses stream received terminal event %s "
+                            "(status=%s, incomplete_details=%s, streamed_chars=%d). %s",
+                            event_type, status, incomplete_details,
+                            sum(len(p) for p in agent._codex_streamed_text_parts),
+                            agent._client_log_context(),
+                        )
+                final_response = stream.get_final_response()
+                # PATCH: ChatGPT Codex backend streams valid output items
+                # but get_final_response() can return an empty output list.
+                # Backfill from collected items or synthesize from deltas.
+                _out = getattr(final_response, "output", None)
+                if isinstance(_out, list) and not _out:
+                    if collected_output_items:
+                        final_response.output = list(collected_output_items)
+                        logger.debug(
+                            "Codex stream: backfilled %d output items from stream events",
+                            len(collected_output_items),
+                        )
+                    elif agent._codex_streamed_text_parts and not has_tool_calls:
+                        assembled = "".join(agent._codex_streamed_text_parts)
+                        final_response.output = [SimpleNamespace(
+                            type="message",
+                            role="assistant",
+                            status="completed",
+                            content=[SimpleNamespace(type="output_text", text=assembled)],
+                        )]
+                        logger.debug(
+                            "Codex stream: synthesized output from %d text deltas (%d chars)",
+                            len(agent._codex_streamed_text_parts), len(assembled),
+                        )
+                return final_response
+        except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
+            if attempt < max_stream_retries:
+                logger.debug(
+                    "Codex Responses stream transport failed (attempt %s/%s); retrying. %s error=%s",
+                    attempt + 1,
+                    max_stream_retries + 1,
+                    agent._client_log_context(),
+                    exc,
+                )
+                continue
+            logger.debug(
+                "Codex Responses stream transport failed; falling back to create(stream=True). %s error=%s",
+                agent._client_log_context(),
+                exc,
+            )
+            return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
+        except RuntimeError as exc:
+            err_text = str(exc)
+            missing_completed = "response.completed" in err_text
+            # The OpenAI SDK's Responses streaming state machine raises
+            # ``RuntimeError("Expected to have received `response.created`
+            # before `<event-type>`")`` when the first SSE event from the
+            # server is anything other than ``response.created`` — and it
+            # discards the event's payload before we can read it.  Three
+            # real-world backends emit a different first frame:
+            #
+            #   * xAI on grok-4.x OAuth — sends ``error`` (issues
+            #     reported around the May 2026 SuperGrok rollout when
+            #     multi-turn conversations replay encrypted reasoning
+            #     content the OAuth tier rejects)
+            #   * codex-lb relays — send ``codex.rate_limits`` (#14634)
+            #   * custom Responses relays — send ``response.in_progress``
+            #     (#8133)
+            #
+            # In all three cases the underlying byte stream is still
+            # readable: a non-stream ``responses.create(stream=True)``
+            # fallback succeeds and surfaces the real provider error as
+            # a normal exception with body+status_code attached, which
+            # ``_summarize_api_error`` can then translate into a useful
+            # user-facing line.  Treat ``response.created`` prelude
+            # errors the same way we already treat ``response.completed``
+            # postlude errors.
+            prelude_error = (
+                "Expected to have received `response.created`" in err_text
+                or "Expected to have received \"response.created\"" in err_text
+            )
+            if (missing_completed or prelude_error) and attempt < max_stream_retries:
+                logger.debug(
+                    "Responses stream %s (attempt %s/%s); retrying. %s",
+                    "prelude rejected" if prelude_error else "closed before completion",
+                    attempt + 1,
+                    max_stream_retries + 1,
+                    agent._client_log_context(),
+                )
+                continue
+            if missing_completed or prelude_error:
+                logger.debug(
+                    "Responses stream %s; falling back to create(stream=True). %s err=%s",
+                    "rejected before response.created" if prelude_error else "did not emit response.completed",
+                    agent._client_log_context(),
+                    err_text,
+                )
+                return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
+            raise
+
+
+
+def run_codex_create_stream_fallback(agent, api_kwargs: dict, client: Any = None):
+    """Fallback path for stream completion edge cases on Codex-style Responses backends."""
+    active_client = client or agent._ensure_primary_openai_client(reason="codex_create_stream_fallback")
+    fallback_kwargs = dict(api_kwargs)
+    fallback_kwargs["stream"] = True
+    fallback_kwargs = agent._get_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
+    stream_or_response = active_client.responses.create(**fallback_kwargs)
+
+    # Compatibility shim for mocks or providers that still return a concrete response.
+    if hasattr(stream_or_response, "output"):
+        return stream_or_response
+    if not hasattr(stream_or_response, "__iter__"):
+        return stream_or_response
+
+    terminal_response = None
+    collected_output_items: list = []
+    collected_text_deltas: list = []
+    try:
+        for event in stream_or_response:
+            agent._touch_activity("receiving stream response")
+            event_type = getattr(event, "type", None)
+            if not event_type and isinstance(event, dict):
+                event_type = event.get("type")
+
+            # ``error`` SSE frames carry the provider's real failure
+            # reason (subscription / quota / model-not-available /
+            # rejected-reasoning-replay) but never appear in the
+            # ``{completed, incomplete, failed}`` terminal set, so the
+            # raw loop below would silently consume them and end with
+            # "did not emit a terminal response".  xAI in particular
+            # emits ``type=error`` as the FIRST frame for OAuth
+            # accounts whose Grok subscription is missing/exhausted —
+            # the SDK's stream helper raises ``RuntimeError(Expected
+            # to have received response.created before error)`` which
+            # the caller catches and routes here, expecting this
+            # fallback to surface the message.  Synthesize an
+            # APIError-shaped exception so ``_summarize_api_error``
+            # and the credential-pool entitlement detector see the
+            # real text instead of a generic RuntimeError.
+            if event_type == "error":
+                err_message = getattr(event, "message", None)
+                if not err_message and isinstance(event, dict):
+                    err_message = event.get("message")
+                err_code = getattr(event, "code", None)
+                if not err_code and isinstance(event, dict):
+                    err_code = event.get("code")
+                err_param = getattr(event, "param", None)
+                if not err_param and isinstance(event, dict):
+                    err_param = event.get("param")
+                err_message = (err_message or "stream emitted error event").strip()
+                from run_agent import _StreamErrorEvent
+                raise _StreamErrorEvent(err_message, code=err_code, param=err_param)
+
+            # Collect output items and text deltas for backfill
+            if event_type == "response.output_item.done":
+                done_item = getattr(event, "item", None)
+                if done_item is None and isinstance(event, dict):
+                    done_item = event.get("item")
+                if done_item is not None:
+                    collected_output_items.append(done_item)
+            elif event_type in {"response.output_text.delta",}:
+                delta = getattr(event, "delta", "")
+                if not delta and isinstance(event, dict):
+                    delta = event.get("delta", "")
+                if delta:
+                    collected_text_deltas.append(delta)
+
+            if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
+                continue
+
+            terminal_response = getattr(event, "response", None)
+            if terminal_response is None and isinstance(event, dict):
+                terminal_response = event.get("response")
+            if terminal_response is not None:
+                # Backfill empty output from collected stream events
+                _out = getattr(terminal_response, "output", None)
+                if isinstance(_out, list) and not _out:
+                    if collected_output_items:
+                        terminal_response.output = list(collected_output_items)
+                        logger.debug(
+                            "Codex fallback stream: backfilled %d output items",
+                            len(collected_output_items),
+                        )
+                    elif collected_text_deltas:
+                        assembled = "".join(collected_text_deltas)
+                        terminal_response.output = [SimpleNamespace(
+                            type="message", role="assistant",
+                            status="completed",
+                            content=[SimpleNamespace(type="output_text", text=assembled)],
+                        )]
+                        logger.debug(
+                            "Codex fallback stream: synthesized from %d deltas (%d chars)",
+                            len(collected_text_deltas), len(assembled),
+                        )
+                return terminal_response
+    finally:
+        close_fn = getattr(stream_or_response, "close", None)
+        if callable(close_fn):
+            try:
+                close_fn()
+            except Exception:
+                pass
+
+    if terminal_response is not None:
+        return terminal_response
+    raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
+
+
+
+__all__ = [
+    "run_codex_app_server_turn",
+    "run_codex_stream",
+    "run_codex_create_stream_fallback",
+]
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@ -0,0 +1,556 @@
+"""Context compression — extract the AIAgent methods that drive summarisation.
+
+Three concerns live here:
+
+* :func:`check_compression_model_feasibility` — startup probe of the
+  configured auxiliary compression model.  Warns when the aux context
+  window can't fit the main model's compression threshold; auto-lowers
+  the session threshold when possible; hard-rejects auxes below
+  ``MINIMUM_CONTEXT_LENGTH``.
+
+* :func:`replay_compression_warning` — re-emit a stored warning through
+  the gateway ``status_callback`` once it's wired up (the callback is
+  set after :class:`AIAgent` construction).
+
+* :func:`compress_context` — the actual compression call.  Runs the
+  configured compressor, splits the SQLite session, rotates the
+  session_id, notifies plugin context engines / memory providers, and
+  returns the compressed message list and freshly-built system prompt.
+
+* :func:`try_shrink_image_parts_in_messages` — image-too-large recovery
+  helper that re-encodes ``data:image/...;base64,...`` parts at a smaller
+  size so retries can fit under provider ceilings (Anthropic's 5 MB).
+
+``run_agent`` keeps thin wrappers for each so existing call sites
+(``self._compress_context(...)``) keep working.  Tests that exercise
+these paths see no behavioural change.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import tempfile
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Any, List, Optional, Tuple
+
+from agent.model_metadata import estimate_request_tokens_rough
+
+logger = logging.getLogger(__name__)
+
+
+def check_compression_model_feasibility(agent: Any) -> None:
+    """Warn at session start if the auxiliary compression model's context
+    window is smaller than the main model's compression threshold.
+
+    When the auxiliary model cannot fit the content that needs summarising,
+    compression will either fail outright (the LLM call errors) or produce
+    a severely truncated summary.
+
+    Called during ``AIAgent.__init__`` so CLI users see the warning
+    immediately (via ``_vprint``).  The gateway sets ``status_callback``
+    *after* construction, so :func:`replay_compression_warning` re-sends
+    the stored warning through the callback on the first
+    ``run_conversation()`` call.
+    """
+    if not agent.compression_enabled:
+        return
+    try:
+        from agent.auxiliary_client import (
+            _resolve_task_provider_model,
+            get_text_auxiliary_client,
+        )
+        from agent.model_metadata import (
+            MINIMUM_CONTEXT_LENGTH,
+            get_model_context_length,
+        )
+
+        client, aux_model = get_text_auxiliary_client(
+            "compression",
+            main_runtime=agent._current_main_runtime(),
+        )
+        # Best-effort aux provider label for the warning message. The
+        # configured provider may be "auto", in which case we fall back
+        # to the client's base_url hostname so the user can still tell
+        # where the compression model is actually being called.
+        try:
+            _aux_cfg_provider, _, _, _, _ = _resolve_task_provider_model("compression")
+        except Exception:
+            _aux_cfg_provider = ""
+        if client is None or not aux_model:
+            if _aux_cfg_provider and _aux_cfg_provider != "auto":
+                msg = (
+                    "⚠ Configured auxiliary compression provider "
+                    f"'{_aux_cfg_provider}' is unavailable — context "
+                    "compression will drop middle turns without a summary. "
+                    "Check auxiliary.compression in config.yaml and "
+                    "reauthenticate that provider."
+                )
+            else:
+                msg = (
+                    "⚠ No auxiliary LLM provider configured — context "
+                    "compression will drop middle turns without a summary. "
+                    "Run `hermes setup` or set OPENROUTER_API_KEY."
+                )
+            agent._compression_warning = msg
+            agent._emit_status(msg)
+            logger.warning(
+                "No auxiliary LLM provider for compression — "
+                "summaries will be unavailable."
+            )
+            return
+
+        aux_base_url = str(getattr(client, "base_url", ""))
+        aux_api_key = str(getattr(client, "api_key", ""))
+
+        aux_context = get_model_context_length(
+            aux_model,
+            base_url=aux_base_url,
+            api_key=aux_api_key,
+            config_context_length=getattr(agent, "_aux_compression_context_length_config", None),
+            # Each model must be resolved with its own provider so that
+            # provider-specific paths (e.g. Bedrock static table, OpenRouter API)
+            # are invoked for the correct client, not inherited from the main model.
+            provider=(_aux_cfg_provider if _aux_cfg_provider and _aux_cfg_provider != "auto" else getattr(agent, "provider", "")),
+            custom_providers=agent._custom_providers,
+        )
+
+        # Hard floor: the auxiliary compression model must have at least
+        # MINIMUM_CONTEXT_LENGTH (64K) tokens of context.  The main model
+        # is already required to meet this floor (checked earlier in
+        # __init__), so the compression model must too — otherwise it
+        # cannot summarise a full threshold-sized window of main-model
+        # content.  Mirrors the main-model rejection pattern.
+        if aux_context and aux_context < MINIMUM_CONTEXT_LENGTH:
+            raise ValueError(
+                f"Auxiliary compression model {aux_model} has a context "
+                f"window of {aux_context:,} tokens, which is below the "
+                f"minimum {MINIMUM_CONTEXT_LENGTH:,} required by Hermes "
+                f"Agent.  Choose a compression model with at least "
+                f"{MINIMUM_CONTEXT_LENGTH // 1000}K context (set "
+                f"auxiliary.compression.model in config.yaml), or set "
+                f"auxiliary.compression.context_length to override the "
+                f"detected value if it is wrong."
+            )
+
+        threshold = agent.context_compressor.threshold_tokens
+        if aux_context < threshold:
+            # Auto-correct: lower the live session threshold so
+            # compression actually works this session.  The hard floor
+            # above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
+            # so the new threshold is always >= 64K.
+            #
+            # The compression summariser sends a single user-role
+            # prompt (no system prompt, no tools) to the aux model, so
+            # new_threshold == aux_context is safe: the request is
+            # the raw messages plus a small summarisation instruction.
+            old_threshold = threshold
+            new_threshold = aux_context
+            agent.context_compressor.threshold_tokens = new_threshold
+            # Keep threshold_percent in sync so future main-model
+            # context_length changes (update_model) re-derive from a
+            # sensible number rather than the original too-high value.
+            main_ctx = agent.context_compressor.context_length
+            if main_ctx:
+                agent.context_compressor.threshold_percent = (
+                    new_threshold / main_ctx
+                )
+            safe_pct = int((aux_context / main_ctx) * 100) if main_ctx else 50
+            # Build human-readable "model (provider)" labels for both
+            # the main model and the compression model so users can
+            # tell at a glance which provider each side is actually
+            # using. When the configured provider is empty or "auto",
+            # fall back to the client's base_url hostname.
+            _main_model = getattr(agent, "model", "") or "?"
+            _main_provider = getattr(agent, "provider", "") or ""
+            _aux_provider_label = (
+                _aux_cfg_provider
+                if _aux_cfg_provider and _aux_cfg_provider != "auto"
+                else ""
+            )
+            if not _aux_provider_label:
+                try:
+                    from urllib.parse import urlparse
+                    _aux_provider_label = (
+                        urlparse(aux_base_url).hostname or aux_base_url
+                    )
+                except Exception:
+                    _aux_provider_label = aux_base_url or "auto"
+            _main_label = (
+                f"{_main_model} ({_main_provider})"
+                if _main_provider
+                else _main_model
+            )
+            _aux_label = f"{aux_model} ({_aux_provider_label})"
+            msg = (
+                f"⚠ Compression model {_aux_label} context is "
+                f"{aux_context:,} tokens, but the main model "
+                f"{_main_label}'s compression threshold was "
+                f"{old_threshold:,} tokens. "
+                f"Auto-lowered this session's threshold to "
+                f"{new_threshold:,} tokens so compression can run.\n"
+                f"  To make this permanent, edit config.yaml — either:\n"
+                f"  1. Use a larger compression model:\n"
+                f"       auxiliary:\n"
+                f"         compression:\n"
+                f"           model: <model-with-{old_threshold:,}+-context>\n"
+                f"  2. Lower the compression threshold:\n"
+                f"       compression:\n"
+                f"         threshold: 0.{safe_pct:02d}"
+            )
+            agent._compression_warning = msg
+            agent._emit_status(msg)
+            logger.warning(
+                "Auxiliary compression model %s has %d token context, "
+                "below the main model's compression threshold of %d "
+                "tokens — auto-lowered session threshold to %d to "
+                "keep compression working.",
+                aux_model,
+                aux_context,
+                old_threshold,
+                new_threshold,
+            )
+    except ValueError:
+        # Hard rejections (aux below minimum context) must propagate
+        # so the session refuses to start.
+        raise
+    except Exception as exc:
+        logger.debug(
+            "Compression feasibility check failed (non-fatal): %s", exc
+        )
+
+
+def replay_compression_warning(agent: Any) -> None:
+    """Re-send the compression warning through ``status_callback``.
+
+    During ``__init__`` the gateway's ``status_callback`` is not yet
+    wired, so ``_emit_status`` only reaches ``_vprint`` (CLI).  This
+    method is called once at the start of the first
+    ``run_conversation()`` — by then the gateway has set the callback,
+    so every platform (Telegram, Discord, Slack, etc.) receives the
+    warning.
+    """
+    msg = getattr(agent, "_compression_warning", None)
+    if msg and agent.status_callback:
+        try:
+            agent.status_callback("lifecycle", msg)
+        except Exception:
+            pass
+
+
+def compress_context(
+    agent: Any,
+    messages: list,
+    system_message: str,
+    *,
+    approx_tokens: Optional[int] = None,
+    task_id: str = "default",
+    focus_topic: Optional[str] = None,
+) -> Tuple[list, str]:
+    """Compress conversation context and split the session in SQLite.
+
+    Args:
+        agent: The owning :class:`AIAgent`.
+        messages: Current message history (will be summarised).
+        system_message: Current system prompt; rebuilt after compression.
+        approx_tokens: Pre-compression token estimate, logged for ops.
+        task_id: Tool task scope (used for clearing file-read dedup state).
+        focus_topic: Optional focus string for guided compression — the
+            summariser will prioritise preserving information related to
+            this topic.  Inspired by Claude Code's ``/compact <focus>``.
+
+    Returns:
+        ``(compressed_messages, new_system_prompt)`` tuple.
+    """
+    _pre_msg_count = len(messages)
+    logger.info(
+        "context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r",
+        agent.session_id or "none", _pre_msg_count,
+        f"{approx_tokens:,}" if approx_tokens else "unknown", agent.model,
+        focus_topic,
+    )
+    agent._emit_status(
+        "🗜️ Compacting context — summarizing earlier conversation so I can continue..."
+    )
+
+    # Notify external memory provider before compression discards context
+    if agent._memory_manager:
+        try:
+            agent._memory_manager.on_pre_compress(messages)
+        except Exception:
+            pass
+
+    try:
+        compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
+    except TypeError:
+        # Plugin context engine with strict signature that doesn't accept
+        # focus_topic — fall back to calling without it.
+        compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens)
+
+    summary_error = getattr(agent.context_compressor, "_last_summary_error", None)
+    if summary_error:
+        if getattr(agent, "_last_compression_summary_warning", None) != summary_error:
+            agent._last_compression_summary_warning = summary_error
+            agent._emit_warning(
+                f"⚠ Compression summary failed: {summary_error}. "
+                "Inserted a fallback context marker."
+            )
+    else:
+        # No hard failure — but did the configured aux model error out
+        # and get recovered by retrying on main?  Surface that so users
+        # know their auxiliary.compression.model setting is broken even
+        # though compression succeeded.
+        _aux_fail_model = getattr(agent.context_compressor, "_last_aux_model_failure_model", None)
+        _aux_fail_err = getattr(agent.context_compressor, "_last_aux_model_failure_error", None)
+        if _aux_fail_model:
+            # Dedup on (model, error) so we don't spam on every compaction
+            _aux_key = (_aux_fail_model, _aux_fail_err)
+            if getattr(agent, "_last_aux_fallback_warning_key", None) != _aux_key:
+                agent._last_aux_fallback_warning_key = _aux_key
+                agent._emit_warning(
+                    f"ℹ Configured compression model '{_aux_fail_model}' failed "
+                    f"({_aux_fail_err or 'unknown error'}). Recovered using main model — "
+                    "check auxiliary.compression.model in config.yaml."
+                )
+
+    todo_snapshot = agent._todo_store.format_for_injection()
+    if todo_snapshot:
+        compressed.append({"role": "user", "content": todo_snapshot})
+
+    agent._invalidate_system_prompt()
+    new_system_prompt = agent._build_system_prompt(system_message)
+    agent._cached_system_prompt = new_system_prompt
+
+    if agent._session_db:
+        try:
+            # Propagate title to the new session with auto-numbering
+            old_title = agent._session_db.get_session_title(agent.session_id)
+            # Trigger memory extraction on the old session before it rotates.
+            agent.commit_memory_session(messages)
+            agent._session_db.end_session(agent.session_id, "compression")
+            old_session_id = agent.session_id
+            agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
+            os.environ["HERMES_SESSION_ID"] = agent.session_id
+            try:
+                from gateway.session_context import _SESSION_ID
+                _SESSION_ID.set(agent.session_id)
+            except Exception:
+                pass
+            # Update session_log_file to point to the new session's JSON file
+            agent.session_log_file = agent.logs_dir / f"session_{agent.session_id}.json"
+            agent._session_db_created = False
+            agent._session_db.create_session(
+                session_id=agent.session_id,
+                source=agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
+                model=agent.model,
+                model_config=agent._session_init_model_config,
+                parent_session_id=old_session_id,
+            )
+            agent._session_db_created = True
+            # Auto-number the title for the continuation session
+            if old_title:
+                try:
+                    new_title = agent._session_db.get_next_title_in_lineage(old_title)
+                    agent._session_db.set_session_title(agent.session_id, new_title)
+                except (ValueError, Exception) as e:
+                    logger.debug("Could not propagate title on compression: %s", e)
+            agent._session_db.update_system_prompt(agent.session_id, new_system_prompt)
+            # Reset flush cursor — new session starts with no messages written
+            agent._last_flushed_db_idx = 0
+        except Exception as e:
+            logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
+
+    # Notify the context engine that the session_id rotated because of
+    # compression (not a fresh /new). Plugin engines (e.g. hermes-lcm) use
+    # boundary_reason="compression" to preserve DAG lineage across the
+    # rollover instead of re-initializing fresh per-session state.
+    # See hermes-lcm#68. Built-in ContextCompressor ignores kwargs.
+    try:
+        _old_sid = locals().get("old_session_id")
+        if _old_sid and hasattr(agent.context_compressor, "on_session_start"):
+            agent.context_compressor.on_session_start(
+                agent.session_id or "",
+                boundary_reason="compression",
+                old_session_id=_old_sid,
+            )
+    except Exception as _ce_err:
+        logger.debug("context engine on_session_start (compression): %s", _ce_err)
+
+    # Notify memory providers of the compression-driven session_id rotation
+    # so provider-cached per-session state (Hindsight's _document_id,
+    # accumulated turn buffers, counters) refreshes. reset=False because
+    # the logical conversation continues; only the id and DB row rolled
+    # over. See #6672.
+    try:
+        _old_sid = locals().get("old_session_id")
+        if _old_sid and agent._memory_manager:
+            agent._memory_manager.on_session_switch(
+                agent.session_id or "",
+                parent_session_id=_old_sid,
+                reset=False,
+                reason="compression",
+            )
+    except Exception as _me_err:
+        logger.debug("memory manager on_session_switch (compression): %s", _me_err)
+
+    # Warn on repeated compressions (quality degrades with each pass)
+    _cc = agent.context_compressor.compression_count
+    if _cc >= 2:
+        agent._vprint(
+            f"{agent.log_prefix}⚠️  Session compressed {_cc} times — "
+            f"accuracy may degrade. Consider /new to start fresh.",
+            force=True,
+        )
+
+    # Update token estimate after compaction so pressure calculations
+    # use the post-compression count, not the stale pre-compression one.
+    # Use estimate_request_tokens_rough() so tool schemas are included —
+    # with 50+ tools enabled, schemas alone can add 20-30K tokens, and
+    # omitting them delays the next compression cycle far past the
+    # configured threshold (issue #14695).
+    _compressed_est = estimate_request_tokens_rough(
+        compressed,
+        system_prompt=new_system_prompt or "",
+        tools=agent.tools or None,
+    )
+    agent.context_compressor.last_prompt_tokens = _compressed_est
+    agent.context_compressor.last_completion_tokens = 0
+
+    # Clear the file-read dedup cache.  After compression the original
+    # read content is summarised away — if the model re-reads the same
+    # file it needs the full content, not a "file unchanged" stub.
+    try:
+        from tools.file_tools import reset_file_dedup
+        reset_file_dedup(task_id)
+    except Exception:
+        pass
+
+    logger.info(
+        "context compression done: session=%s messages=%d->%d tokens=~%s",
+        agent.session_id or "none", _pre_msg_count, len(compressed),
+        f"{_compressed_est:,}",
+    )
+    return compressed, new_system_prompt
+
+
+def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
+    """Re-encode all native image parts at a smaller size to recover from
+    image-too-large errors (Anthropic 5 MB, unknown other providers).
+
+    Mutates ``api_messages`` in place. Returns True if any image part was
+    actually replaced, False if there were no image parts to shrink or
+    Pillow couldn't help (caller should surface the original error).
+
+    Strategy: look for ``image_url`` / ``input_image`` parts carrying a
+    ``data:image/...;base64,...`` payload.  For each one whose encoded
+    size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
+    ceiling with header overhead), write the base64 to a tempfile, call
+    ``vision_tools._resize_image_for_vision`` to produce a smaller data
+    URL, and substitute it in place.
+
+    Non-data-URL images (http/https URLs) are not touched — the provider
+    fetches those itself and the size limit is different.
+    """
+    if not api_messages:
+        return False
+
+    try:
+        from tools.vision_tools import _resize_image_for_vision
+    except Exception as exc:
+        logger.warning("image-shrink recovery: vision_tools unavailable — %s", exc)
+        return False
+
+    # 4 MB target leaves comfortable headroom under Anthropic's 5 MB.
+    # Non-Anthropic providers we haven't observed rejecting are fine with
+    # much larger; shrinking to 4 MB here loses quality but only fires
+    # after a confirmed provider rejection, so the alternative is failure.
+    target_bytes = 4 * 1024 * 1024
+    changed_count = 0
+
+    def _shrink_data_url(url: str) -> Optional[str]:
+        """Return a smaller data URL, or None if shrink can't help."""
+        if not isinstance(url, str) or not url.startswith("data:"):
+            return None
+        if len(url) <= target_bytes:
+            # This specific image wasn't the oversized one.
+            return None
+        try:
+            header, _, data = url.partition(",")
+            mime = "image/jpeg"
+            if header.startswith("data:"):
+                mime_part = header[len("data:"):].split(";", 1)[0].strip()
+                if mime_part.startswith("image/"):
+                    mime = mime_part
+            import base64 as _b64
+            raw = _b64.b64decode(data)
+            suffix = {
+                "image/png": ".png", "image/gif": ".gif", "image/webp": ".webp",
+                "image/jpeg": ".jpg", "image/jpg": ".jpg", "image/bmp": ".bmp",
+            }.get(mime, ".jpg")
+            tmp = tempfile.NamedTemporaryFile(
+                prefix="hermes_shrink_", suffix=suffix, delete=False,
+            )
+            try:
+                tmp.write(raw)
+                tmp.close()
+                resized = _resize_image_for_vision(
+                    Path(tmp.name),
+                    mime_type=mime,
+                    max_base64_bytes=target_bytes,
+                )
+            finally:
+                try:
+                    Path(tmp.name).unlink(missing_ok=True)
+                except Exception:
+                    pass
+            if not resized or len(resized) >= len(url):
+                # Shrink didn't help (or made it bigger — corrupt input?).
+                return None
+            return resized
+        except Exception as exc:
+            logger.warning("image-shrink recovery: re-encode failed — %s", exc)
+            return None
+
+    for msg in api_messages:
+        if not isinstance(msg, dict):
+            continue
+        content = msg.get("content")
+        if not isinstance(content, list):
+            continue
+        for part in content:
+            if not isinstance(part, dict):
+                continue
+            ptype = part.get("type")
+            if ptype not in {"image_url", "input_image"}:
+                continue
+            image_value = part.get("image_url")
+            # OpenAI chat.completions: {"image_url": {"url": "data:..."}}
+            # OpenAI Responses: {"image_url": "data:..."}
+            if isinstance(image_value, dict):
+                url = image_value.get("url", "")
+                resized = _shrink_data_url(url)
+                if resized:
+                    image_value["url"] = resized
+                    changed_count += 1
+            elif isinstance(image_value, str):
+                resized = _shrink_data_url(image_value)
+                if resized:
+                    part["image_url"] = resized
+                    changed_count += 1
+
+    if changed_count:
+        logger.info(
+            "image-shrink recovery: re-encoded %d image part(s) to fit under %.0f MB",
+            changed_count, target_bytes / (1024 * 1024),
+        )
+    return changed_count > 0
+
+
+__all__ = [
+    "check_compression_model_feasibility",
+    "replay_compression_warning",
+    "compress_context",
+    "try_shrink_image_parts_in_messages",
+]
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@ -166,6 +166,8 @@ class PooledCredential:
    @property
    def runtime_api_key(self) -> str:
        if self.provider == "nous":
+            # Nous stores the runtime inference credential in agent_key for
+            # compatibility. It may be a NAS invoke JWT or legacy opaque key.
            return str(self.agent_key or self.access_token or "")
        return str(self.access_token or "")

@ -621,18 +623,35 @@ class CredentialPool:
                return entry
            store_refresh = state.get("refresh_token", "")
            store_access = state.get("access_token", "")
-            if store_refresh and store_refresh != entry.refresh_token:
+            comparable_updates = {
+                "access_token": store_access,
+                "refresh_token": store_refresh,
+                "expires_at": state.get("expires_at"),
+                "agent_key": state.get("agent_key"),
+                "agent_key_expires_at": state.get("agent_key_expires_at"),
+                "inference_base_url": state.get("inference_base_url"),
+            }
+            should_sync = any(
+                value not in (None, "") and getattr(entry, key, None) != value
+                for key, value in comparable_updates.items()
+            )
+            if should_sync:
                logger.debug(
-                    "Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)",
+                    "Pool entry %s: syncing Nous state from auth.json",
                    entry.id,
                )
                field_updates: Dict[str, Any] = {
-                    "access_token": store_access,
-                    "refresh_token": store_refresh,
                    "last_status": None,
                    "last_status_at": None,
                    "last_error_code": None,
+                    "last_error_reason": None,
+                    "last_error_message": None,
+                    "last_error_reset_at": None,
                }
+                if store_access:
+                    field_updates["access_token"] = store_access
+                if store_refresh:
+                    field_updates["refresh_token"] = store_refresh
                if state.get("expires_at"):
                    field_updates["expires_at"] = state["expires_at"]
                if state.get("agent_key"):
@ -811,36 +830,15 @@ class CredentialPool:
                synced = self._sync_nous_entry_from_auth_store(entry)
                if synced is not entry:
                    entry = synced
-                nous_state = {
-                    "access_token": entry.access_token,
-                    "refresh_token": entry.refresh_token,
-                    "client_id": entry.client_id,
-                    "portal_base_url": entry.portal_base_url,
-                    "inference_base_url": entry.inference_base_url,
-                    "token_type": entry.token_type,
-                    "scope": entry.scope,
-                    "obtained_at": entry.obtained_at,
-                    "expires_at": entry.expires_at,
-                    "agent_key": entry.agent_key,
-                    "agent_key_expires_at": entry.agent_key_expires_at,
-                    "tls": entry.tls,
-                }
-                refreshed = auth_mod.refresh_nous_oauth_from_state(
-                    nous_state,
+                auth_mod.resolve_nous_runtime_credentials(
                    min_key_ttl_seconds=DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
-                    force_refresh=force,
-                    force_mint=force,
+                    inference_auth_mode=(
+                        auth_mod.NOUS_INFERENCE_AUTH_MODE_LEGACY
+                        if force
+                        else auth_mod.NOUS_INFERENCE_AUTH_MODE_AUTO
+                    ),
                )
-                # Apply returned fields: dataclass fields via replace, extras via dict update
-                field_updates = {}
-                extra_updates = dict(entry.extra)
-                _field_names = {f.name for f in fields(entry)}
-                for k, v in refreshed.items():
-                    if k in _field_names:
-                        field_updates[k] = v
-                    elif k in _EXTRA_KEYS:
-                        extra_updates[k] = v
-                updated = replace(entry, extra=extra_updates, **field_updates)
+                updated = self._sync_nous_entry_from_auth_store(entry)
            else:
                return entry
        except Exception as exc:
@ -929,6 +927,49 @@ class CredentialPool:
                    self._persist()
                    self._sync_device_code_entry_to_auth_store(updated)
                    return updated
+                if auth_mod._is_terminal_nous_refresh_error(exc):
+                    logger.debug("Nous refresh token is terminally invalid; clearing local token state")
+                    try:
+                        with _auth_store_lock():
+                            auth_store = _load_auth_store()
+                            state = _load_provider_state(auth_store, "nous") or {
+                                "client_id": entry.client_id,
+                                "portal_base_url": entry.portal_base_url,
+                                "inference_base_url": entry.inference_base_url,
+                                "token_type": entry.token_type,
+                                "scope": entry.scope,
+                                "tls": entry.tls,
+                            }
+                            store_refresh = str(state.get("refresh_token") or "").strip()
+                            entry_refresh = str(entry.refresh_token or "").strip()
+                            if not store_refresh or store_refresh == entry_refresh:
+                                auth_mod._quarantine_nous_oauth_state(
+                                    state,
+                                    exc,
+                                    reason="credential_pool_refresh_failure",
+                                )
+                                auth_mod._quarantine_nous_pool_entries(
+                                    auth_store,
+                                    exc,
+                                    reason="credential_pool_refresh_failure",
+                                )
+                                _save_provider_state(auth_store, "nous", state)
+                                _save_auth_store(auth_store)
+                    except Exception as clear_exc:
+                        logger.debug("Failed to clear terminal Nous OAuth state: %s", clear_exc)
+
+                    singleton_sources = {
+                        auth_mod.NOUS_DEVICE_CODE_SOURCE,
+                        f"manual:{auth_mod.NOUS_DEVICE_CODE_SOURCE}",
+                    }
+                    self._entries = [
+                        item for item in self._entries
+                        if item.source not in singleton_sources
+                    ]
+                    if self._current_id == entry.id:
+                        self._current_id = None
+                    self._persist()
+                    return None
            self._mark_exhausted(entry, None)
            return None

@ -1365,7 +1406,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup

    elif provider == "nous":
        state = _load_provider_state(auth_store, "nous")
-        if state and not _is_suppressed(provider, "device_code"):
+        has_runtime_material = bool(
+            isinstance(state, dict)
+            and (
+                str(state.get("access_token") or "").strip()
+                or str(state.get("agent_key") or "").strip()
+            )
+        )
+        if state and not has_runtime_material:
+            retained = [
+                entry for entry in entries
+                if entry.source not in {"device_code", "manual:device_code"}
+            ]
+            if len(retained) != len(entries):
+                entries[:] = retained
+                changed = True
+        if state and has_runtime_material and not _is_suppressed(provider, "device_code"):
            active_sources.add("device_code")
            # Prefer a user-supplied label embedded in the singleton state
            # (set by persist_nous_credentials(label=...) when the user ran
--- a/agent/iteration_budget.py
+++ b/agent/iteration_budget.py
@ -0,0 +1,62 @@
+"""Per-agent iteration budget — thread-safe consume/refund counter.
+
+Extracted from ``run_agent.py``.  Each ``AIAgent`` instance (parent or
+subagent) holds an :class:`IterationBudget`; the parent's cap comes from
+``max_iterations`` (default 90), each subagent's cap comes from
+``delegation.max_iterations`` (default 50).
+
+``run_agent`` re-exports ``IterationBudget`` so existing
+``from run_agent import IterationBudget`` imports keep working unchanged.
+"""
+
+from __future__ import annotations
+
+import threading
+
+
+class IterationBudget:
+    """Thread-safe iteration counter for an agent.
+
+    Each agent (parent or subagent) gets its own ``IterationBudget``.
+    The parent's budget is capped at ``max_iterations`` (default 90).
+    Each subagent gets an independent budget capped at
+    ``delegation.max_iterations`` (default 50) — this means total
+    iterations across parent + subagents can exceed the parent's cap.
+    Users control the per-subagent limit via ``delegation.max_iterations``
+    in config.yaml.
+
+    ``execute_code`` (programmatic tool calling) iterations are refunded via
+    :meth:`refund` so they don't eat into the budget.
+    """
+
+    def __init__(self, max_total: int):
+        self.max_total = max_total
+        self._used = 0
+        self._lock = threading.Lock()
+
+    def consume(self) -> bool:
+        """Try to consume one iteration.  Returns True if allowed."""
+        with self._lock:
+            if self._used >= self.max_total:
+                return False
+            self._used += 1
+            return True
+
+    def refund(self) -> None:
+        """Give back one iteration (e.g. for execute_code turns)."""
+        with self._lock:
+            if self._used > 0:
+                self._used -= 1
+
+    @property
+    def used(self) -> int:
+        with self._lock:
+            return self._used
+
+    @property
+    def remaining(self) -> int:
+        with self._lock:
+            return max(0, self.max_total - self._used)
+
+
+__all__ = ["IterationBudget"]
--- a/agent/lsp/client.py
+++ b/agent/lsp/client.py
@ -232,7 +232,7 @@ class LSPClient:
        the process is killed and the client is left in state
        ``"error"`` — re-call ``start()`` to retry.
        """
-        if self._state in ("running", "starting"):
+        if self._state in {"running", "starting"}:
            return
        self._state = "starting"
        try:
--- a/agent/lsp/install.py
+++ b/agent/lsp/install.py
@ -151,7 +151,7 @@ def try_install(pkg: str, strategy: str = "auto") -> Optional[str]:
    same path (or ``None``) without reinstalling.  Concurrent calls
    are serialized.
    """
-    if strategy not in ("auto",):
+    if strategy not in {"auto",}:
        # Only ``auto`` triggers an actual install.  In manual/off,
        # we still check whether the binary already exists.
        recipe = INSTALL_RECIPES.get(pkg, {})
--- a/agent/lsp/manager.py
+++ b/agent/lsp/manager.py
@ -162,7 +162,7 @@ class LSPService:
        idle_timeout: float = DEFAULT_IDLE_TIMEOUT,
    ) -> None:
        self._enabled = enabled
-        self._wait_mode = wait_mode if wait_mode in ("document", "full") else "document"
+        self._wait_mode = wait_mode if wait_mode in {"document", "full"} else "document"
        self._wait_timeout = wait_timeout
        self._install_strategy = install_strategy
        self._binary_overrides = binary_overrides or {}
--- a/agent/lsp/reporter.py
+++ b/agent/lsp/reporter.py
@ -28,7 +28,7 @@ def format_diagnostic(d: Dict[str, Any]) -> str:
    col = int(start.get("character", 0)) + 1
    msg = str(d.get("message") or "").rstrip()
    code = d.get("code")
-    code_part = f" [{code}]" if code not in (None, "") else ""
+    code_part = f" [{code}]" if code not in {None, ""} else ""
    source = d.get("source")
    source_part = f" ({source})" if source else ""
    return f"{sev} [{line}:{col}] {msg}{code_part}{source_part}"
--- a/agent/lsp/servers.py
+++ b/agent/lsp/servers.py
@ -237,7 +237,7 @@ def _spawn_pyright(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
            return None
    # If we got the cli ``pyright``, the langserver is its sibling.
    base = os.path.basename(bin_path)
-    if base in ("pyright", "pyright.exe"):
+    if base in {"pyright", "pyright.exe"}:
        sibling = os.path.join(os.path.dirname(bin_path), "pyright-langserver")
        if os.path.exists(sibling):
            bin_path = sibling
--- a/agent/message_sanitization.py
+++ b/agent/message_sanitization.py
@ -0,0 +1,444 @@
+"""Message and tool-payload sanitization helpers.
+
+Pure functions extracted from ``run_agent.py`` so the AIAgent module can
+stay focused on the conversation loop.  These walk OpenAI-format message
+lists and structured payloads, repairing or stripping problematic
+characters that would otherwise crash ``json.dumps`` inside the OpenAI
+SDK or be rejected by upstream APIs.
+
+All helpers are stateless and side-effect-free except for in-place
+mutation of their input (where documented).  Backward-compatible
+re-exports from ``run_agent`` remain in place so existing imports
+``from run_agent import _sanitize_surrogates`` keep working.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# Lone surrogate code points are invalid in UTF-8 and crash json.dumps
+# inside the OpenAI SDK.  Used by every surrogate-sanitization helper
+# below as well as by run_agent and the CLI for paste-from-clipboard
+# scrubbing.
+_SURROGATE_RE = re.compile(r'[\ud800-\udfff]')
+
+
+def _sanitize_surrogates(text: str) -> str:
+    """Replace lone surrogate code points with U+FFFD (replacement character).
+
+    Surrogates are invalid in UTF-8 and will crash ``json.dumps()`` inside the
+    OpenAI SDK.  This is a fast no-op when the text contains no surrogates.
+    """
+    if _SURROGATE_RE.search(text):
+        return _SURROGATE_RE.sub('\ufffd', text)
+    return text
+
+
+def _sanitize_structure_surrogates(payload: Any) -> bool:
+    """Replace surrogate code points in nested dict/list payloads in-place.
+
+    Mirror of ``_sanitize_structure_non_ascii`` but for surrogate recovery.
+    Used to scrub nested structured fields (e.g. ``reasoning_details`` — an
+    array of dicts with ``summary``/``text`` strings) that flat per-field
+    checks don't reach.  Returns True if any surrogates were replaced.
+    """
+    found = False
+
+    def _walk(node):
+        nonlocal found
+        if isinstance(node, dict):
+            for key, value in node.items():
+                if isinstance(value, str):
+                    if _SURROGATE_RE.search(value):
+                        node[key] = _SURROGATE_RE.sub('\ufffd', value)
+                        found = True
+                elif isinstance(value, (dict, list)):
+                    _walk(value)
+        elif isinstance(node, list):
+            for idx, value in enumerate(node):
+                if isinstance(value, str):
+                    if _SURROGATE_RE.search(value):
+                        node[idx] = _SURROGATE_RE.sub('\ufffd', value)
+                        found = True
+                elif isinstance(value, (dict, list)):
+                    _walk(value)
+
+    _walk(payload)
+    return found
+
+
+def _sanitize_messages_surrogates(messages: list) -> bool:
+    """Sanitize surrogate characters from all string content in a messages list.
+
+    Walks message dicts in-place. Returns True if any surrogates were found
+    and replaced, False otherwise. Covers content/text, name, tool call
+    metadata/arguments, AND any additional string or nested structured fields
+    (``reasoning``, ``reasoning_content``, ``reasoning_details``, etc.) so
+    retries don't fail on a non-content field.  Byte-level reasoning models
+    (xiaomi/mimo, kimi, glm) can emit lone surrogates in reasoning output
+    that flow through to ``api_messages["reasoning_content"]`` on the next
+    turn and crash json.dumps inside the OpenAI SDK.
+    """
+    found = False
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        content = msg.get("content")
+        if isinstance(content, str) and _SURROGATE_RE.search(content):
+            msg["content"] = _SURROGATE_RE.sub('\ufffd', content)
+            found = True
+        elif isinstance(content, list):
+            for part in content:
+                if isinstance(part, dict):
+                    text = part.get("text")
+                    if isinstance(text, str) and _SURROGATE_RE.search(text):
+                        part["text"] = _SURROGATE_RE.sub('\ufffd', text)
+                        found = True
+        name = msg.get("name")
+        if isinstance(name, str) and _SURROGATE_RE.search(name):
+            msg["name"] = _SURROGATE_RE.sub('\ufffd', name)
+            found = True
+        tool_calls = msg.get("tool_calls")
+        if isinstance(tool_calls, list):
+            for tc in tool_calls:
+                if not isinstance(tc, dict):
+                    continue
+                tc_id = tc.get("id")
+                if isinstance(tc_id, str) and _SURROGATE_RE.search(tc_id):
+                    tc["id"] = _SURROGATE_RE.sub('\ufffd', tc_id)
+                    found = True
+                fn = tc.get("function")
+                if isinstance(fn, dict):
+                    fn_name = fn.get("name")
+                    if isinstance(fn_name, str) and _SURROGATE_RE.search(fn_name):
+                        fn["name"] = _SURROGATE_RE.sub('\ufffd', fn_name)
+                        found = True
+                    fn_args = fn.get("arguments")
+                    if isinstance(fn_args, str) and _SURROGATE_RE.search(fn_args):
+                        fn["arguments"] = _SURROGATE_RE.sub('\ufffd', fn_args)
+                        found = True
+        # Walk any additional string / nested fields (reasoning,
+        # reasoning_content, reasoning_details, etc.) — surrogates from
+        # byte-level reasoning models (xiaomi/mimo, kimi, glm) can lurk
+        # in these fields and aren't covered by the per-field checks above.
+        # Matches _sanitize_messages_non_ascii's coverage (PR #10537).
+        for key, value in msg.items():
+            if key in {"content", "name", "tool_calls", "role"}:
+                continue
+            if isinstance(value, str):
+                if _SURROGATE_RE.search(value):
+                    msg[key] = _SURROGATE_RE.sub('\ufffd', value)
+                    found = True
+            elif isinstance(value, (dict, list)):
+                if _sanitize_structure_surrogates(value):
+                    found = True
+    return found
+
+
+def _escape_invalid_chars_in_json_strings(raw: str) -> str:
+    """Escape unescaped control chars inside JSON string values.
+
+    Walks the raw JSON character-by-character, tracking whether we are
+    inside a double-quoted string. Inside strings, replaces literal
+    control characters (0x00-0x1F) that aren't already part of an escape
+    sequence with their ``\\uXXXX`` equivalents. Pass-through for everything
+    else.
+
+    Ported from #12093 — complements the other repair passes in
+    ``_repair_tool_call_arguments`` when ``json.loads(strict=False)`` is
+    not enough (e.g. llama.cpp backends that emit literal apostrophes or
+    tabs alongside other malformations).
+    """
+    out: list[str] = []
+    in_string = False
+    i = 0
+    n = len(raw)
+    while i < n:
+        ch = raw[i]
+        if in_string:
+            if ch == "\\" and i + 1 < n:
+                # Already-escaped char — pass through as-is
+                out.append(ch)
+                out.append(raw[i + 1])
+                i += 2
+                continue
+            if ch == '"':
+                in_string = False
+                out.append(ch)
+            elif ord(ch) < 0x20:
+                out.append(f"\\u{ord(ch):04x}")
+            else:
+                out.append(ch)
+        else:
+            if ch == '"':
+                in_string = True
+            out.append(ch)
+        i += 1
+    return "".join(out)
+
+
+def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
+    """Attempt to repair malformed tool_call argument JSON.
+
+    Models like GLM-5.1 via Ollama can produce truncated JSON, trailing
+    commas, Python ``None``, etc.  The API proxy rejects these with HTTP 400
+    "invalid tool call arguments".  This function applies common repairs;
+    if all fail it returns ``"{}"`` so the request succeeds (better than
+    crashing the session).  All repairs are logged at WARNING level.
+    """
+    raw_stripped = raw_args.strip() if isinstance(raw_args, str) else ""
+
+    # Fast-path: empty / whitespace-only -> empty object
+    if not raw_stripped:
+        logger.warning("Sanitized empty tool_call arguments for %s", tool_name)
+        return "{}"
+
+    # Python-literal None -> normalise to {}
+    if raw_stripped == "None":
+        logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
+        return "{}"
+
+    # Repair pass 0: llama.cpp backends sometimes emit literal control
+    # characters (tabs, newlines) inside JSON string values. json.loads
+    # with strict=False accepts these and lets us re-serialise the
+    # result into wire-valid JSON without any string surgery. This is
+    # the most common local-model repair case (#12068).
+    try:
+        parsed = json.loads(raw_stripped, strict=False)
+        reserialised = json.dumps(parsed, separators=(",", ":"))
+        if reserialised != raw_stripped:
+            logger.warning(
+                "Repaired unescaped control chars in tool_call arguments for %s",
+                tool_name,
+            )
+        return reserialised
+    except (json.JSONDecodeError, TypeError, ValueError):
+        pass
+
+    # Attempt common JSON repairs
+    fixed = raw_stripped
+    # 1. Strip trailing commas before } or ]
+    fixed = re.sub(r',\s*([}\]])', r'\1', fixed)
+    # 2. Close unclosed structures
+    open_curly = fixed.count('{') - fixed.count('}')
+    open_bracket = fixed.count('[') - fixed.count(']')
+    if open_curly > 0:
+        fixed += '}' * open_curly
+    if open_bracket > 0:
+        fixed += ']' * open_bracket
+    # 3. Remove excess closing braces/brackets (bounded to 50 iterations)
+    for _ in range(50):
+        try:
+            json.loads(fixed)
+            break
+        except json.JSONDecodeError:
+            if fixed.endswith('}') and fixed.count('}') > fixed.count('{'):
+                fixed = fixed[:-1]
+            elif fixed.endswith(']') and fixed.count(']') > fixed.count('['):
+                fixed = fixed[:-1]
+            else:
+                break
+
+    try:
+        json.loads(fixed)
+        logger.warning(
+            "Repaired malformed tool_call arguments for %s: %s → %s",
+            tool_name, raw_stripped[:80], fixed[:80],
+        )
+        return fixed
+    except json.JSONDecodeError:
+        pass
+
+    # Repair pass 4: escape unescaped control chars inside JSON strings,
+    # then retry. Catches cases where strict=False alone fails because
+    # other malformations are present too.
+    try:
+        escaped = _escape_invalid_chars_in_json_strings(fixed)
+        if escaped != fixed:
+            json.loads(escaped)
+            logger.warning(
+                "Repaired control-char-laced tool_call arguments for %s: %s → %s",
+                tool_name, raw_stripped[:80], escaped[:80],
+            )
+            return escaped
+    except (json.JSONDecodeError, TypeError, ValueError):
+        pass
+
+    # Last resort: replace with empty object so the API request doesn't
+    # crash the entire session.
+    logger.warning(
+        "Unrepairable tool_call arguments for %s — "
+        "replaced with empty object (was: %s)",
+        tool_name, raw_stripped[:80],
+    )
+    return "{}"
+
+
+def _strip_non_ascii(text: str) -> str:
+    """Remove non-ASCII characters, replacing with closest ASCII equivalent or removing.
+
+    Used as a last resort when the system encoding is ASCII and can't handle
+    any non-ASCII characters (e.g. LANG=C on Chromebooks).
+    """
+    return text.encode('ascii', errors='ignore').decode('ascii')
+
+
+def _sanitize_messages_non_ascii(messages: list) -> bool:
+    """Strip non-ASCII characters from all string content in a messages list.
+
+    This is a last-resort recovery for systems with ASCII-only encoding
+    (LANG=C, Chromebooks, minimal containers).  Returns True if any
+    non-ASCII content was found and sanitized.
+    """
+    found = False
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        # Sanitize content (string)
+        content = msg.get("content")
+        if isinstance(content, str):
+            sanitized = _strip_non_ascii(content)
+            if sanitized != content:
+                msg["content"] = sanitized
+                found = True
+        elif isinstance(content, list):
+            for part in content:
+                if isinstance(part, dict):
+                    text = part.get("text")
+                    if isinstance(text, str):
+                        sanitized = _strip_non_ascii(text)
+                        if sanitized != text:
+                            part["text"] = sanitized
+                            found = True
+        # Sanitize name field (can contain non-ASCII in tool results)
+        name = msg.get("name")
+        if isinstance(name, str):
+            sanitized = _strip_non_ascii(name)
+            if sanitized != name:
+                msg["name"] = sanitized
+                found = True
+        # Sanitize tool_calls
+        tool_calls = msg.get("tool_calls")
+        if isinstance(tool_calls, list):
+            for tc in tool_calls:
+                if isinstance(tc, dict):
+                    fn = tc.get("function", {})
+                    if isinstance(fn, dict):
+                        fn_args = fn.get("arguments")
+                        if isinstance(fn_args, str):
+                            sanitized = _strip_non_ascii(fn_args)
+                            if sanitized != fn_args:
+                                fn["arguments"] = sanitized
+                                found = True
+        # Sanitize any additional top-level string fields (e.g. reasoning_content)
+        for key, value in msg.items():
+            if key in {"content", "name", "tool_calls", "role"}:
+                continue
+            if isinstance(value, str):
+                sanitized = _strip_non_ascii(value)
+                if sanitized != value:
+                    msg[key] = sanitized
+                    found = True
+    return found
+
+
+def _sanitize_tools_non_ascii(tools: list) -> bool:
+    """Strip non-ASCII characters from tool payloads in-place."""
+    return _sanitize_structure_non_ascii(tools)
+
+
+def _strip_images_from_messages(messages: list) -> bool:
+    """Remove image_url content parts from all messages in-place.
+
+    Called when a server signals it does not support images (e.g.
+    "Only 'text' content type is supported.").  Mutates messages so the
+    next API call sends text only.
+
+    Preserves message alternation invariants:
+      * ``tool``-role messages whose content was entirely images are replaced
+        with a plaintext placeholder, NOT deleted — deleting them would leave
+        the paired ``tool_call_id`` on the prior assistant message unmatched,
+        which providers reject with HTTP 400.
+      * Non-tool messages whose content becomes empty are dropped.  In
+        practice this only hits synthetic image-only user messages appended
+        for attachment delivery; real user turns always include text.
+
+    Returns True if any image parts were removed.
+    """
+    found = False
+    to_delete = []
+    for i, msg in enumerate(messages):
+        if not isinstance(msg, dict):
+            continue
+        content = msg.get("content")
+        if not isinstance(content, list):
+            continue
+        new_parts = []
+        for part in content:
+            if isinstance(part, dict) and part.get("type") in {"image_url", "image", "input_image"}:
+                found = True
+            else:
+                new_parts.append(part)
+        if len(new_parts) < len(content):
+            if new_parts:
+                msg["content"] = new_parts
+            elif msg.get("role") == "tool":
+                # Preserve tool_call_id linkage — providers require every
+                # assistant tool_call to have a matching tool response.
+                msg["content"] = "[image content removed — server does not support images]"
+            else:
+                # Synthetic image-only user/assistant message with no text;
+                # safe to drop.
+                to_delete.append(i)
+    for i in reversed(to_delete):
+        del messages[i]
+    return found
+
+
+def _sanitize_structure_non_ascii(payload: Any) -> bool:
+    """Strip non-ASCII characters from nested dict/list payloads in-place."""
+    found = False
+
+    def _walk(node):
+        nonlocal found
+        if isinstance(node, dict):
+            for key, value in node.items():
+                if isinstance(value, str):
+                    sanitized = _strip_non_ascii(value)
+                    if sanitized != value:
+                        node[key] = sanitized
+                        found = True
+                elif isinstance(value, (dict, list)):
+                    _walk(value)
+        elif isinstance(node, list):
+            for idx, value in enumerate(node):
+                if isinstance(value, str):
+                    sanitized = _strip_non_ascii(value)
+                    if sanitized != value:
+                        node[idx] = sanitized
+                        found = True
+                elif isinstance(value, (dict, list)):
+                    _walk(value)
+
+    _walk(payload)
+    return found
+
+
+__all__ = [
+    "_SURROGATE_RE",
+    "_sanitize_surrogates",
+    "_sanitize_structure_surrogates",
+    "_sanitize_messages_surrogates",
+    "_escape_invalid_chars_in_json_strings",
+    "_repair_tool_call_arguments",
+    "_strip_non_ascii",
+    "_sanitize_messages_non_ascii",
+    "_sanitize_tools_non_ascii",
+    "_strip_images_from_messages",
+    "_sanitize_structure_non_ascii",
+]
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -194,6 +194,7 @@ DEFAULT_CONTEXT_LENGTHS = {
    "llama": 131072,
    # Qwen — specific model families before the catch-all.
    # Official docs: https://help.aliyun.com/zh/model-studio/developer-reference/
+    "qwen3.6-plus": 1048576,      # 1M context (DashScope/Alibaba & OpenRouter)
    "qwen3-coder-plus": 1000000,  # 1M context
    "qwen3-coder": 262144,        # 256K context
    "qwen": 131072,
--- a/agent/process_bootstrap.py
+++ b/agent/process_bootstrap.py
@ -0,0 +1,167 @@
+"""Process-level bootstrap helpers for ``run_agent``.
+
+Three concerns, all tied to ``AIAgent`` boot-time / runtime IO setup:
+
+1. **Lazy OpenAI SDK import** — ``_load_openai_cls`` + ``_OpenAIProxy``
+   defer the 240ms-ish ``from openai import OpenAI`` cost until first use,
+   while preserving ``isinstance(client, OpenAI)`` checks and
+   ``patch("run_agent.OpenAI", ...)`` test patterns.
+
+2. **Crash-resistant stdio** — ``_SafeWriter`` wraps stdout/stderr so
+   ``OSError: Input/output error`` from broken pipes (systemd, Docker,
+   thread teardown races) cannot crash the agent.  ``_install_safe_stdio``
+   applies the wrapper.
+
+3. **HTTP proxy resolution** — ``_get_proxy_from_env`` reads
+   ``HTTPS_PROXY`` / ``HTTP_PROXY`` / ``ALL_PROXY``;
+   ``_get_proxy_for_base_url`` respects ``NO_PROXY`` for the given base URL.
+
+``run_agent`` re-exports every name so existing
+``from run_agent import _get_proxy_from_env`` imports keep working
+unchanged.
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+import urllib.request
+from typing import Optional
+
+from utils import base_url_hostname, normalize_proxy_url
+
+
+# Cached at module level so we only pay the OpenAI SDK import cost once
+# per process (after the first lazy load).
+_OPENAI_CLS_CACHE = None
+
+
+def _load_openai_cls() -> type:
+    """Import and cache ``openai.OpenAI``."""
+    global _OPENAI_CLS_CACHE
+    if _OPENAI_CLS_CACHE is None:
+        from openai import OpenAI as _cls
+        _OPENAI_CLS_CACHE = _cls
+    return _OPENAI_CLS_CACHE
+
+
+class _OpenAIProxy:
+    """Module-level proxy that looks like ``openai.OpenAI`` but imports lazily."""
+
+    __slots__ = ()
+
+    def __call__(self, *args, **kwargs):
+        return _load_openai_cls()(*args, **kwargs)
+
+    def __instancecheck__(self, obj):
+        return isinstance(obj, _load_openai_cls())
+
+    def __repr__(self):
+        return "<lazy openai.OpenAI proxy>"
+
+
+class _SafeWriter:
+    """Transparent stdio wrapper that catches OSError/ValueError from broken pipes.
+
+    When hermes-agent runs as a systemd service, Docker container, or headless
+    daemon, the stdout/stderr pipe can become unavailable (idle timeout, buffer
+    exhaustion, socket reset). Any print() call then raises
+    ``OSError: [Errno 5] Input/output error``, which can crash agent setup or
+    run_conversation() — especially via double-fault when an except handler
+    also tries to print.
+
+    Additionally, when subagents run in ThreadPoolExecutor threads, the shared
+    stdout handle can close between thread teardown and cleanup, raising
+    ``ValueError: I/O operation on closed file`` instead of OSError.
+
+    This wrapper delegates all writes to the underlying stream and silently
+    catches both OSError and ValueError. It is transparent when the wrapped
+    stream is healthy.
+    """
+
+    __slots__ = ("_inner",)
+
+    def __init__(self, inner):
+        object.__setattr__(self, "_inner", inner)
+
+    def write(self, data):
+        try:
+            return self._inner.write(data)
+        except (OSError, ValueError):
+            return len(data) if isinstance(data, str) else 0
+
+    def flush(self):
+        try:
+            self._inner.flush()
+        except (OSError, ValueError):
+            pass
+
+    def fileno(self):
+        return self._inner.fileno()
+
+    def isatty(self):
+        try:
+            return self._inner.isatty()
+        except (OSError, ValueError):
+            return False
+
+    def __getattr__(self, name):
+        return getattr(self._inner, name)
+
+
+def _get_proxy_from_env() -> Optional[str]:
+    """Read proxy URL from environment variables.
+
+    Checks HTTPS_PROXY, HTTP_PROXY, ALL_PROXY (and lowercase variants) in order.
+    Returns the first valid proxy URL found, or None if no proxy is configured.
+    """
+    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                "https_proxy", "http_proxy", "all_proxy"):
+        value = os.environ.get(key, "").strip()
+        if value:
+            return normalize_proxy_url(value)
+    return None
+
+
+def _get_proxy_for_base_url(base_url: Optional[str]) -> Optional[str]:
+    """Return an env-configured proxy unless NO_PROXY excludes this base URL."""
+    proxy = _get_proxy_from_env()
+    if not proxy or not base_url:
+        return proxy
+
+    host = base_url_hostname(base_url)
+    if not host:
+        return proxy
+
+    try:
+        if urllib.request.proxy_bypass_environment(host):
+            return None
+    except Exception:
+        pass
+
+    return proxy
+
+
+def _install_safe_stdio() -> None:
+    """Wrap stdout/stderr so best-effort console output cannot crash the agent."""
+    for stream_name in ("stdout", "stderr"):
+        stream = getattr(sys, stream_name, None)
+        if stream is not None and not isinstance(stream, _SafeWriter):
+            setattr(sys, stream_name, _SafeWriter(stream))
+
+
+# Module-level proxy instance — drops in for ``openai.OpenAI``.  Imported as
+# ``from agent.process_bootstrap import OpenAI`` (or re-exported via
+# ``run_agent`` for legacy tests).
+OpenAI = _OpenAIProxy()
+
+
+__all__ = [
+    "OpenAI",
+    "_OpenAIProxy",
+    "_load_openai_cls",
+    "_SafeWriter",
+    "_install_safe_stdio",
+    "_get_proxy_from_env",
+    "_get_proxy_for_base_url",
+]
--- a/agent/shell_hooks.py
+++ b/agent/shell_hooks.py
@ -83,6 +83,7 @@ logger = logging.getLogger(__name__)
 DEFAULT_TIMEOUT_SECONDS = 60
 MAX_TIMEOUT_SECONDS = 300
 ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
+_DEFAULT_BLOCK_MESSAGE = "Blocked by shell hook."

 # (event, matcher, command) triples that have been wired to the plugin
 # manager in the current process.  Matcher is part of the key because
@ -481,6 +482,17 @@ def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
    return json.dumps(payload, ensure_ascii=False, default=str)


+def _block_message(primary: Any, secondary: Any) -> str:
+    """Return a validated string block message, falling back to the default.
+
+    Accepts two candidate fields (primary wins over secondary) so callers
+    can express field-priority differences between the two hook wire formats
+    without duplicating the type-check logic.
+    """
+    raw = primary or secondary
+    return raw if isinstance(raw, str) and raw else _DEFAULT_BLOCK_MESSAGE
+
+
 def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
    """Translate stdout JSON into a Hermes wire-shape dict.

@ -515,13 +527,9 @@ def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:

    if event == "pre_tool_call":
        if data.get("action") == "block":
-            message = data.get("message") or data.get("reason") or ""
-            if isinstance(message, str) and message:
-                return {"action": "block", "message": message}
+            return {"action": "block", "message": _block_message(data.get("message"), data.get("reason"))}
        if data.get("decision") == "block":
-            message = data.get("reason") or data.get("message") or ""
-            if isinstance(message, str) and message:
-                return {"action": "block", "message": message}
+            return {"action": "block", "message": _block_message(data.get("reason"), data.get("message"))}
        return None

    context = data.get("context")
--- a/agent/stream_diag.py
+++ b/agent/stream_diag.py
@ -0,0 +1,280 @@
+"""Stream diagnostics — per-attempt counters, exception chains, retry logging.
+
+When a streaming chat-completions request dies mid-response, we want to
+know why: which Cloudflare edge served the request, which OpenRouter
+downstream provider answered, how many bytes/chunks we got before the
+drop, the HTTP status, the underlying httpx error class.  These helpers
+collect that info and emit it both to ``agent.log`` (full detail) and to
+the user-facing status line (compact).
+
+All helpers are extracted from :class:`AIAgent` for cleanliness.
+``run_agent`` keeps thin forwarder methods so existing call sites and
+tests that patch ``run_agent.<helper>`` keep working.
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# Per-attempt stream diagnostic headers.  Lowercased; httpx returns
+# CIMultiDict so case-insensitive lookups already work, but we read .get()
+# on the dict from agent.log for free-form post-hoc analysis.
+STREAM_DIAG_HEADERS = (
+    "cf-ray",
+    "cf-cache-status",
+    "x-openrouter-provider",
+    "x-openrouter-model",
+    "x-openrouter-id",
+    "x-request-id",
+    "x-vercel-id",
+    "via",
+    "server",
+    "x-forwarded-for",
+)
+
+
+def stream_diag_init() -> Dict[str, Any]:
+    """Return a fresh per-attempt diagnostic dict.
+
+    Mutated in-place by the streaming functions and read from the retry
+    block when a stream dies.  Lives on ``request_client_holder`` so it
+    survives across the closure boundary.
+    """
+    return {
+        "started_at": time.time(),
+        "first_chunk_at": None,
+        "chunks": 0,
+        "bytes": 0,
+        "headers": {},
+        "http_status": None,
+    }
+
+
+def stream_diag_capture_response(agent: Any, diag: Dict[str, Any], http_response: Any) -> None:
+    """Snapshot interesting headers + HTTP status from the live stream.
+
+    Called once at stream open (before iterating chunks) so the metadata
+    survives even if the stream dies before any chunk arrives.  Failures
+    are swallowed — diag is best-effort.
+    """
+    if http_response is None or not isinstance(diag, dict):
+        return
+    try:
+        diag["http_status"] = getattr(http_response, "status_code", None)
+    except Exception:
+        pass
+    try:
+        headers = getattr(http_response, "headers", None) or {}
+        captured: Dict[str, str] = {}
+        # Allow per-agent override of the headers list (back-compat).
+        target_headers = getattr(agent, "_STREAM_DIAG_HEADERS", STREAM_DIAG_HEADERS)
+        for name in target_headers:
+            try:
+                val = headers.get(name)
+                if val:
+                    # Truncate single-value to keep log lines bounded.
+                    captured[name] = str(val)[:120]
+            except Exception:
+                continue
+        diag["headers"] = captured
+    except Exception:
+        pass
+
+
+def flatten_exception_chain(error: BaseException) -> str:
+    """Return a compact ``Outer(msg) <- Inner(msg) <- ...`` rendering.
+
+    OpenAI SDK wraps httpx errors as ``APIConnectionError`` /
+    ``APIError`` and only the wrapper's class is visible at the catch
+    site — but the underlying ``RemoteProtocolError`` /
+    ``ConnectError`` / ``ReadError`` is what tells us WHY the stream
+    died.  Walks ``__cause__`` then ``__context__`` (deduped, max 4
+    deep) to surface the chain in one line.
+    """
+    seen: List[BaseException] = []
+    link: Optional[BaseException] = error
+    while link is not None and len(seen) < 4:
+        if link in seen:
+            break
+        seen.append(link)
+        nxt = getattr(link, "__cause__", None) or getattr(
+            link, "__context__", None
+        )
+        if nxt is None or nxt is link:
+            break
+        link = nxt
+    parts: List[str] = []
+    for e in seen:
+        msg = str(e).strip().replace("\n", " ")
+        if len(msg) > 140:
+            msg = msg[:140] + "…"
+        parts.append(f"{type(e).__name__}({msg})" if msg else type(e).__name__)
+    return " <- ".join(parts) if parts else type(error).__name__
+
+
+def log_stream_retry(
+    agent: Any,
+    *,
+    kind: str,
+    error: BaseException,
+    attempt: int,
+    max_attempts: int,
+    mid_tool_call: bool,
+    diag: Optional[Dict[str, Any]] = None,
+) -> None:
+    """Record a transient stream-drop and retry to ``agent.log``.
+
+    Always logs a structured WARNING so users have a breadcrumb regardless
+    of UI verbosity.  Subagents in particular benefit because their
+    retries no longer spam the parent's terminal — but the file log keeps
+    full detail (provider, error class, attempt, base_url, subagent_id).
+
+    When *diag* is provided (the per-attempt stream-diagnostic dict from
+    :func:`stream_diag_init`), the WARNING also captures upstream headers
+    (cf-ray, x-openrouter-provider, x-openrouter-id), HTTP status, bytes
+    streamed before the drop, and elapsed time on the dying attempt.
+    These are the breadcrumbs needed to answer "is one CF edge / one
+    downstream provider responsible, or is it random across runs?"
+    """
+    try:
+        try:
+            _summary = agent._summarize_api_error(error)
+        except Exception:
+            _summary = str(error)
+        if _summary and len(_summary) > 240:
+            _summary = _summary[:240] + "…"
+
+        # Inner-cause chain (httpx errors hide under openai.APIError).
+        try:
+            _chain = flatten_exception_chain(error)
+        except Exception:
+            _chain = type(error).__name__
+
+        # Per-attempt counters and upstream headers.
+        _now = time.time()
+        _bytes = 0
+        _chunks = 0
+        _elapsed = 0.0
+        _ttfb = None
+        _headers_repr = "-"
+        _http_status = "-"
+        if isinstance(diag, dict):
+            try:
+                _bytes = int(diag.get("bytes") or 0)
+                _chunks = int(diag.get("chunks") or 0)
+                _started = float(diag.get("started_at") or _now)
+                _elapsed = max(0.0, _now - _started)
+                _first = diag.get("first_chunk_at")
+                if _first is not None:
+                    _ttfb = max(0.0, float(_first) - _started)
+                headers = diag.get("headers") or {}
+                if isinstance(headers, dict) and headers:
+                    _headers_repr = " ".join(
+                        f"{k}={v}" for k, v in headers.items()
+                    )
+                if diag.get("http_status") is not None:
+                    _http_status = str(diag.get("http_status"))
+            except Exception:
+                pass
+
+        logger.warning(
+            "Stream %s on attempt %s/%s — retrying. "
+            "subagent_id=%s depth=%s provider=%s base_url=%s "
+            "error_type=%s error=%s "
+            "chain=%s "
+            "http_status=%s bytes=%d chunks=%d elapsed=%.2fs ttfb=%s "
+            "upstream=[%s]",
+            kind,
+            attempt,
+            max_attempts,
+            getattr(agent, "_subagent_id", None) or "-",
+            getattr(agent, "_delegate_depth", 0),
+            agent.provider or "-",
+            agent.base_url or "-",
+            type(error).__name__,
+            _summary,
+            _chain,
+            _http_status,
+            _bytes,
+            _chunks,
+            _elapsed,
+            f"{_ttfb:.2f}s" if _ttfb is not None else "-",
+            _headers_repr,
+            extra={"mid_tool_call": mid_tool_call},
+        )
+    except Exception:
+        logger.debug("stream-retry log emit failed", exc_info=True)
+
+
+def emit_stream_drop(
+    agent: Any,
+    *,
+    error: BaseException,
+    attempt: int,
+    max_attempts: int,
+    mid_tool_call: bool,
+    diag: Optional[Dict[str, Any]] = None,
+) -> None:
+    """Emit a single user-visible line for a stream drop+retry.
+
+    Both top-level agents and subagents announce drops in the UI — the
+    parent prefixes subagent lines with ``[subagent-N]`` via ``log_prefix``
+    so they're easy to attribute.  All cases also write a structured
+    WARNING to ``agent.log`` via :func:`log_stream_retry` with the full
+    diagnostic detail (subagent_id, provider, base_url, error_type,
+    cf-ray, x-openrouter-provider, bytes/chunks, elapsed) for post-hoc
+    analysis.
+
+    The user-visible status line is intentionally compact: provider,
+    error class, attempt N/M, plus ``after Xs`` when the stream dropped
+    mid-flight.  Full diagnostic detail goes to ``agent.log`` only —
+    ``hermes logs --level WARNING | grep "Stream drop"`` to inspect.
+    """
+    kind = "drop mid tool-call" if mid_tool_call else "drop"
+    log_stream_retry(
+        agent,
+        kind=kind,
+        error=error,
+        attempt=attempt,
+        max_attempts=max_attempts,
+        mid_tool_call=mid_tool_call,
+        diag=diag,
+    )
+    provider = agent.provider or "provider"
+    # Compose a brief "after Xs" suffix when we have timing data — helps
+    # the user distinguish "couldn't connect" (0s) from "died after 30s
+    # of streaming" (likely upstream idle-kill or proxy timeout).
+    _suffix = ""
+    if isinstance(diag, dict):
+        try:
+            started = diag.get("started_at")
+            if started is not None:
+                _suffix = f" after {max(0.0, time.time() - float(started)):.1f}s"
+        except Exception:
+            pass
+    try:
+        agent._emit_status(
+            f"⚠️ {provider} stream {kind} ({type(error).__name__}){_suffix} "
+            f"— reconnecting, retry {attempt}/{max_attempts}"
+        )
+        agent._touch_activity(
+            f"stream retry {attempt}/{max_attempts} "
+            f"after {type(error).__name__}"
+        )
+    except Exception:
+        pass
+
+
+__all__ = [
+    "STREAM_DIAG_HEADERS",
+    "stream_diag_init",
+    "stream_diag_capture_response",
+    "flatten_exception_chain",
+    "log_stream_retry",
+    "emit_stream_drop",
+]
--- a/agent/system_prompt.py
+++ b/agent/system_prompt.py
@ -0,0 +1,333 @@
+"""System-prompt assembly for :class:`AIAgent`.
+
+The agent's system prompt is built once per session and reused across all
+turns — only context compression triggers a rebuild.  This keeps the
+upstream prefix cache warm.  See ``hermes-agent-dev``'s
+``references/system-prompt-invariant.md`` for the invariants and
+``references/self-improvement-loop.md`` for how the background-review
+fork inherits the cached prompt verbatim.
+
+Three tiers are joined with ``\\n\\n``:
+
+* ``stable``   — identity (SOUL.md or DEFAULT_AGENT_IDENTITY), tool
+  guidance, computer-use guidance, nous subscription block, tool-use
+  enforcement guidance + per-model operational guidance, skills prompt,
+  alibaba model-name workaround, environment hints, platform hints.
+* ``context``  — caller-supplied ``system_message`` plus context files
+  (AGENTS.md / .cursorrules / etc.) discovered under ``TERMINAL_CWD``.
+* ``volatile`` — memory snapshot, USER.md profile, external memory
+  provider block, timestamp/session/model/provider line.
+
+Pure helpers that read the agent's state.  AIAgent keeps thin forwarders.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from typing import Any, Dict, List, Optional
+
+from agent.prompt_builder import (
+    DEFAULT_AGENT_IDENTITY,
+    GOOGLE_MODEL_OPERATIONAL_GUIDANCE,
+    HERMES_AGENT_HELP_GUIDANCE,
+    KANBAN_GUIDANCE,
+    MEMORY_GUIDANCE,
+    OPENAI_MODEL_EXECUTION_GUIDANCE,
+    PLATFORM_HINTS,
+    SESSION_SEARCH_GUIDANCE,
+    SKILLS_GUIDANCE,
+    TOOL_USE_ENFORCEMENT_GUIDANCE,
+    TOOL_USE_ENFORCEMENT_MODELS,
+)
+
+
+def _ra():
+    """Lazy reference to the ``run_agent`` module.
+
+    Helpers like ``load_soul_md``, ``build_environment_hints``,
+    ``build_context_files_prompt``, ``build_nous_subscription_prompt``,
+    ``build_skills_system_prompt`` and ``get_toolset_for_tool`` are
+    imported into ``run_agent``'s namespace.  Many tests
+    ``patch("run_agent.load_soul_md", ...)``; if we imported them
+    directly here those patches would not reach us.  Looking them up
+    through ``run_agent`` on every call preserves the patch contract.
+    """
+    import run_agent
+    return run_agent
+
+
+def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) -> Dict[str, str]:
+    """Assemble the system prompt as three ordered parts.
+
+    Returns a dict with three keys:
+      * ``stable``   — identity, tool guidance, skills prompt,
+        environment hints, platform hints, model-family operational
+        guidance.
+      * ``context``  — context files (AGENTS.md, .cursorrules, etc.)
+        and caller-supplied system_message.
+      * ``volatile`` — memory snapshot, user profile, external
+        memory provider block, timestamp line.
+
+    Joined into a single string by :func:`build_system_prompt` and
+    cached on ``agent._cached_system_prompt`` for the lifetime of the
+    AIAgent.  Hermes never re-renders parts of this string mid-
+    session — that's the only way to keep upstream prompt caches
+    warm across turns.
+    """
+    # Local import to avoid pulling model_tools at module load.  Tests
+    # patch ``run_agent.get_toolset_for_tool`` and similar helpers, so
+    # we resolve through ``_ra()`` to honor those patches.
+    _r = _ra()
+
+    # ── Stable tier ────────────────────────────────────────────────
+    stable_parts: List[str] = []
+
+    # Try SOUL.md as primary identity unless the caller explicitly skipped it.
+    # Some execution modes (cron) still want HERMES_HOME persona while keeping
+    # cwd project instructions disabled.
+    _soul_loaded = False
+    if agent.load_soul_identity or not agent.skip_context_files:
+        _soul_content = _r.load_soul_md()
+        if _soul_content:
+            stable_parts.append(_soul_content)
+            _soul_loaded = True
+
+    if not _soul_loaded:
+        # Fallback to hardcoded identity
+        stable_parts.append(DEFAULT_AGENT_IDENTITY)
+
+    # Pointer to the hermes-agent skill + docs for user questions about Hermes itself.
+    stable_parts.append(HERMES_AGENT_HELP_GUIDANCE)
+
+    # Tool-aware behavioral guidance: only inject when the tools are loaded
+    tool_guidance = []
+    if "memory" in agent.valid_tool_names:
+        tool_guidance.append(MEMORY_GUIDANCE)
+    if "session_search" in agent.valid_tool_names:
+        tool_guidance.append(SESSION_SEARCH_GUIDANCE)
+    if "skill_manage" in agent.valid_tool_names:
+        tool_guidance.append(SKILLS_GUIDANCE)
+    # Kanban worker/orchestrator lifecycle — only present when the
+    # dispatcher spawned this process (kanban_show check_fn gates on
+    # HERMES_KANBAN_TASK env var). Normal chat sessions never see
+    # this block.
+    if "kanban_show" in agent.valid_tool_names:
+        tool_guidance.append(KANBAN_GUIDANCE)
+    if tool_guidance:
+        stable_parts.append(" ".join(tool_guidance))
+
+    # Computer-use (macOS) — goes in as its own block rather than being
+    # merged into tool_guidance because the content is multi-paragraph.
+    if "computer_use" in agent.valid_tool_names:
+        from agent.prompt_builder import COMPUTER_USE_GUIDANCE
+        stable_parts.append(COMPUTER_USE_GUIDANCE)
+
+    nous_subscription_prompt = _r.build_nous_subscription_prompt(agent.valid_tool_names)
+    if nous_subscription_prompt:
+        stable_parts.append(nous_subscription_prompt)
+    # Tool-use enforcement: tells the model to actually call tools instead
+    # of describing intended actions.  Controlled by config.yaml
+    # agent.tool_use_enforcement:
+    #   "auto" (default) — matches TOOL_USE_ENFORCEMENT_MODELS
+    #   true  — always inject (all models)
+    #   false — never inject
+    #   list  — custom model-name substrings to match
+    if agent.valid_tool_names:
+        _enforce = agent._tool_use_enforcement
+        _inject = False
+        if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in {"true", "always", "yes", "on"}):
+            _inject = True
+        elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in {"false", "never", "no", "off"}):
+            _inject = False
+        elif isinstance(_enforce, list):
+            model_lower = (agent.model or "").lower()
+            _inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str))
+        else:
+            # "auto" or any unrecognised value — use hardcoded defaults
+            model_lower = (agent.model or "").lower()
+            _inject = any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS)
+        if _inject:
+            stable_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE)
+            _model_lower = (agent.model or "").lower()
+            # Google model operational guidance (conciseness, absolute
+            # paths, parallel tool calls, verify-before-edit, etc.)
+            if "gemini" in _model_lower or "gemma" in _model_lower:
+                stable_parts.append(GOOGLE_MODEL_OPERATIONAL_GUIDANCE)
+            # OpenAI GPT/Codex execution discipline (tool persistence,
+            # prerequisite checks, verification, anti-hallucination).
+            if "gpt" in _model_lower or "codex" in _model_lower:
+                stable_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
+
+    has_skills_tools = any(name in agent.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
+    if has_skills_tools:
+        avail_toolsets = {
+            toolset
+            for toolset in (
+                _r.get_toolset_for_tool(tool_name) for tool_name in agent.valid_tool_names
+            )
+            if toolset
+        }
+        skills_prompt = _r.build_skills_system_prompt(
+            available_tools=agent.valid_tool_names,
+            available_toolsets=avail_toolsets,
+        )
+    else:
+        skills_prompt = ""
+    if skills_prompt:
+        stable_parts.append(skills_prompt)
+
+    # Alibaba Coding Plan API always returns "glm-4.7" as model name regardless
+    # of the requested model. Inject explicit model identity into the system prompt
+    # so the agent can correctly report which model it is (workaround for API bug).
+    # Stable for the lifetime of an agent instance — model and provider are fixed
+    # at construction time.
+    if agent.provider == "alibaba":
+        _model_short = agent.model.split("/")[-1] if "/" in agent.model else agent.model
+        stable_parts.append(
+            f"You are powered by the model named {_model_short}. "
+            f"The exact model ID is {agent.model}. "
+            f"When asked what model you are, always answer based on this information, "
+            f"not on any model name returned by the API."
+        )
+
+    # Environment hints (WSL, Termux, etc.) — tell the agent about the
+    # execution environment so it can translate paths and adapt behavior.
+    # Stable for the lifetime of the process.
+    _env_hints = _r.build_environment_hints()
+    if _env_hints:
+        stable_parts.append(_env_hints)
+
+    platform_key = (agent.platform or "").lower().strip()
+    if platform_key in PLATFORM_HINTS:
+        stable_parts.append(PLATFORM_HINTS[platform_key])
+    elif platform_key:
+        # Check plugin registry for platform-specific LLM guidance
+        try:
+            from gateway.platform_registry import platform_registry
+            _entry = platform_registry.get(platform_key)
+            if _entry and _entry.platform_hint:
+                stable_parts.append(_entry.platform_hint)
+        except Exception:
+            pass
+
+    # ── Context tier (cwd-dependent, may change between sessions) ─
+    context_parts: List[str] = []
+
+    # Note: ephemeral_system_prompt is NOT included here. It's injected at
+    # API-call time only so it stays out of the cached/stored system prompt.
+    if system_message is not None:
+        context_parts.append(system_message)
+
+    if not agent.skip_context_files:
+        # Use TERMINAL_CWD for context file discovery when set (gateway
+        # mode).  The gateway process runs from the hermes-agent install
+        # dir, so os.getcwd() would pick up the repo's AGENTS.md and
+        # other dev files — inflating token usage by ~10k for no benefit.
+        _context_cwd = os.getenv("TERMINAL_CWD") or None
+        context_files_prompt = _r.build_context_files_prompt(
+            cwd=_context_cwd, skip_soul=_soul_loaded)
+        if context_files_prompt:
+            context_parts.append(context_files_prompt)
+
+    # ── Volatile tier (changes per session/turn — never cached) ───
+    volatile_parts: List[str] = []
+
+    if agent._memory_store:
+        if agent._memory_enabled:
+            mem_block = agent._memory_store.format_for_system_prompt("memory")
+            if mem_block:
+                volatile_parts.append(mem_block)
+        # USER.md is always included when enabled.
+        if agent._user_profile_enabled:
+            user_block = agent._memory_store.format_for_system_prompt("user")
+            if user_block:
+                volatile_parts.append(user_block)
+
+    # External memory provider system prompt block (additive to built-in)
+    if agent._memory_manager:
+        try:
+            _ext_mem_block = agent._memory_manager.build_system_prompt()
+            if _ext_mem_block:
+                volatile_parts.append(_ext_mem_block)
+        except Exception:
+            pass
+
+    from hermes_time import now as _hermes_now
+    now = _hermes_now()
+    timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
+    if agent.pass_session_id and agent.session_id:
+        timestamp_line += f"\nSession ID: {agent.session_id}"
+    if agent.model:
+        timestamp_line += f"\nModel: {agent.model}"
+    if agent.provider:
+        timestamp_line += f"\nProvider: {agent.provider}"
+    volatile_parts.append(timestamp_line)
+
+    return {
+        "stable":   "\n\n".join(p.strip() for p in stable_parts   if p and p.strip()),
+        "context":  "\n\n".join(p.strip() for p in context_parts  if p and p.strip()),
+        "volatile": "\n\n".join(p.strip() for p in volatile_parts if p and p.strip()),
+    }
+
+
+def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str:
+    """Assemble the full system prompt from all layers.
+
+    Called once per session (cached on ``agent._cached_system_prompt``) and
+    only rebuilt after context compression events. This ensures the system
+    prompt is stable across all turns in a session, maximizing prefix cache
+    hits.
+
+    Layers are ordered cache-friendly: stable identity/guidance first,
+    then session-stable context files, then per-call volatile content
+    (memory, USER profile, timestamp).  The whole string is treated as
+    one cached block — Hermes never rebuilds or reinjects parts of it
+    mid-session, which is the only way to keep upstream prompt caches
+    warm across turns.
+    """
+    parts = build_system_prompt_parts(agent, system_message=system_message)
+    return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
+
+
+def invalidate_system_prompt(agent: Any) -> None:
+    """Invalidate the cached system prompt, forcing a rebuild on the next turn.
+
+    Called after context compression events. Also reloads memory from disk
+    so the rebuilt prompt captures any writes from this session.
+    """
+    agent._cached_system_prompt = None
+    if agent._memory_store:
+        agent._memory_store.load_from_disk()
+
+
+def format_tools_for_system_message(agent: Any) -> str:
+    """Format tool definitions for the system message in the trajectory format.
+
+    Returns:
+        str: JSON string representation of tool definitions
+    """
+    if not agent.tools:
+        return "[]"
+
+    # Convert tool definitions to the format expected in trajectories
+    formatted_tools = []
+    for tool in agent.tools:
+        func = tool["function"]
+        formatted_tool = {
+            "name": func["name"],
+            "description": func.get("description", ""),
+            "parameters": func.get("parameters", {}),
+            "required": None  # Match the format in the example
+        }
+        formatted_tools.append(formatted_tool)
+
+    return json.dumps(formatted_tools, ensure_ascii=False)
+
+
+__all__ = [
+    "build_system_prompt_parts",
+    "build_system_prompt",
+    "invalidate_system_prompt",
+    "format_tools_for_system_message",
+]
--- a/agent/tool_dispatch_helpers.py
+++ b/agent/tool_dispatch_helpers.py
@ -0,0 +1,336 @@
+"""Tool-dispatch helpers — parallelism gating, multimodal envelopes, mutation tracking.
+
+Pure module-level utilities extracted from ``run_agent.py``:
+
+* ``_is_destructive_command`` — terminal-command heuristic used to gate
+  parallel batch dispatch.
+* ``_should_parallelize_tool_batch`` / ``_extract_parallel_scope_path`` /
+  ``_paths_overlap`` — the rules engine deciding when a multi-tool batch
+  can run concurrently.
+* ``_is_multimodal_tool_result`` / ``_multimodal_text_summary`` /
+  ``_append_subdir_hint_to_multimodal`` — envelope helpers for the
+  ``{"_multimodal": True, "content": [...], "text_summary": ...}`` dict
+  shape returned by tools like ``computer_use``.
+* ``_extract_file_mutation_targets`` / ``_extract_error_preview`` —
+  per-turn file-mutation verifier inputs.
+* ``_trajectory_normalize_msg`` — strip image blobs from a message for
+  trajectory saving.
+
+All helpers are stateless.  ``run_agent`` re-exports each name so existing
+``from run_agent import ...`` imports in tests and other modules keep
+working unchanged.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from agent.tool_result_classification import (
+    FILE_MUTATING_TOOL_NAMES as _FILE_MUTATING_TOOLS,
+)
+
+logger = logging.getLogger(__name__)
+
+# Tools that must never run concurrently (interactive / user-facing).
+# When any of these appear in a batch, we fall back to sequential execution.
+_NEVER_PARALLEL_TOOLS = frozenset({"clarify"})
+
+# Read-only tools with no shared mutable session state.
+_PARALLEL_SAFE_TOOLS = frozenset({
+    "ha_get_state",
+    "ha_list_entities",
+    "ha_list_services",
+    "read_file",
+    "search_files",
+    "session_search",
+    "skill_view",
+    "skills_list",
+    "vision_analyze",
+    "web_extract",
+    "web_search",
+})
+
+# File tools can run concurrently when they target independent paths.
+_PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
+
+# Patterns that indicate a terminal command may modify/delete files.
+_DESTRUCTIVE_PATTERNS = re.compile(
+    r"""(?:^|\s|&&|\|\||;|`)(?:
+        rm\s|rmdir\s|
+        cp\s|install\s|
+        mv\s|
+        sed\s+-i|
+        truncate\s|
+        dd\s|
+        shred\s|
+        git\s+(?:reset|clean|checkout)\s
+    )""",
+    re.VERBOSE,
+)
+# Output redirects that overwrite files (> but not >>)
+_REDIRECT_OVERWRITE = re.compile(r'[^>]>[^>]|^>[^>]')
+
+
+def _is_destructive_command(cmd: str) -> bool:
+    """Heuristic: does this terminal command look like it modifies/deletes files?"""
+    if not cmd:
+        return False
+    if _DESTRUCTIVE_PATTERNS.search(cmd):
+        return True
+    if _REDIRECT_OVERWRITE.search(cmd):
+        return True
+    return False
+
+
+def _is_mcp_tool_parallel_safe(tool_name: str) -> bool:
+    """Check if an MCP tool comes from a server with parallel tool calls enabled.
+
+    Lazy-imports from ``tools.mcp_tool`` to avoid circular dependencies.
+    Returns False if the MCP module is not available.
+    """
+    try:
+        from tools.mcp_tool import is_mcp_tool_parallel_safe
+        return is_mcp_tool_parallel_safe(tool_name)
+    except Exception:
+        return False
+
+
+def _should_parallelize_tool_batch(tool_calls) -> bool:
+    """Return True when a tool-call batch is safe to run concurrently."""
+    if len(tool_calls) <= 1:
+        return False
+
+    tool_names = [tc.function.name for tc in tool_calls]
+    if any(name in _NEVER_PARALLEL_TOOLS for name in tool_names):
+        return False
+
+    reserved_paths: list[Path] = []
+    for tool_call in tool_calls:
+        tool_name = tool_call.function.name
+        try:
+            function_args = json.loads(tool_call.function.arguments)
+        except Exception:
+            logging.debug(
+                "Could not parse args for %s — defaulting to sequential; raw=%s",
+                tool_name,
+                tool_call.function.arguments[:200],
+            )
+            return False
+        if not isinstance(function_args, dict):
+            logging.debug(
+                "Non-dict args for %s (%s) — defaulting to sequential",
+                tool_name,
+                type(function_args).__name__,
+            )
+            return False
+
+        if tool_name in _PATH_SCOPED_TOOLS:
+            scoped_path = _extract_parallel_scope_path(tool_name, function_args)
+            if scoped_path is None:
+                return False
+            if any(_paths_overlap(scoped_path, existing) for existing in reserved_paths):
+                return False
+            reserved_paths.append(scoped_path)
+            continue
+
+        if tool_name not in _PARALLEL_SAFE_TOOLS:
+            # Check if it's an MCP tool from a server that opted into parallel calls.
+            if not _is_mcp_tool_parallel_safe(tool_name):
+                return False
+
+    return True
+
+
+def _extract_parallel_scope_path(tool_name: str, function_args: dict) -> Optional[Path]:
+    """Return the normalized file target for path-scoped tools."""
+    if tool_name not in _PATH_SCOPED_TOOLS:
+        return None
+
+    raw_path = function_args.get("path")
+    if not isinstance(raw_path, str) or not raw_path.strip():
+        return None
+
+    expanded = Path(raw_path).expanduser()
+    if expanded.is_absolute():
+        return Path(os.path.abspath(str(expanded)))
+
+    # Avoid resolve(); the file may not exist yet.
+    return Path(os.path.abspath(str(Path.cwd() / expanded)))
+
+
+def _paths_overlap(left: Path, right: Path) -> bool:
+    """Return True when two paths may refer to the same subtree."""
+    left_parts = left.parts
+    right_parts = right.parts
+    if not left_parts or not right_parts:
+        # Empty paths shouldn't reach here (guarded upstream), but be safe.
+        return bool(left_parts) == bool(right_parts) and bool(left_parts)
+    common_len = min(len(left_parts), len(right_parts))
+    return left_parts[:common_len] == right_parts[:common_len]
+
+
+def _is_multimodal_tool_result(value: Any) -> bool:
+    """True if the value is a multimodal tool result envelope.
+
+    Multimodal handlers (e.g. tools/computer_use) return a dict with
+    `_multimodal=True`, a `content` key holding OpenAI-style content
+    parts, and an optional `text_summary` for string-only fallbacks.
+    """
+    return (
+        isinstance(value, dict)
+        and value.get("_multimodal") is True
+        and isinstance(value.get("content"), list)
+    )
+
+
+def _multimodal_text_summary(value: Any) -> str:
+    """Extract a plain text view of a multimodal tool result.
+
+    Used wherever downstream code needs a string — logging, previews,
+    persistence size heuristics, fall-back content for providers that
+    don't support multipart tool messages.
+    """
+    if _is_multimodal_tool_result(value):
+        if value.get("text_summary"):
+            return str(value["text_summary"])
+        parts = []
+        for p in value.get("content") or []:
+            if isinstance(p, dict) and p.get("type") == "text":
+                parts.append(str(p.get("text", "")))
+        if parts:
+            return "\n".join(parts)
+        return "[multimodal tool result]"
+    if isinstance(value, str):
+        return value
+    try:
+        return json.dumps(value, default=str)
+    except Exception:
+        return str(value)
+
+
+def _append_subdir_hint_to_multimodal(value: Dict[str, Any], hint: str) -> None:
+    """Mutate a multimodal tool-result envelope to append a subdir hint.
+
+    The hint is added to the first text part so the model sees it; image
+    parts are left untouched. `text_summary` is also updated for
+    string-fallback callers.
+    """
+    if not _is_multimodal_tool_result(value):
+        return
+    parts = value.get("content") or []
+    for p in parts:
+        if isinstance(p, dict) and p.get("type") == "text":
+            p["text"] = str(p.get("text", "")) + hint
+            break
+    else:
+        parts.insert(0, {"type": "text", "text": hint})
+        value["content"] = parts
+    if isinstance(value.get("text_summary"), str):
+        value["text_summary"] = value["text_summary"] + hint
+
+
+def _extract_file_mutation_targets(tool_name: str, args: Dict[str, Any]) -> List[str]:
+    """Return the file paths a ``write_file`` or ``patch`` call is targeting.
+
+    For ``write_file`` and ``patch`` in replace mode this is just ``args["path"]``.
+    For ``patch`` in V4A patch mode we parse the patch content for
+    ``*** Update File:`` / ``*** Add File:`` / ``*** Delete File:`` headers so
+    the verifier can track each file in a multi-file patch separately.
+    """
+    if tool_name not in _FILE_MUTATING_TOOLS:
+        return []
+    if tool_name == "write_file":
+        p = args.get("path")
+        return [str(p)] if p else []
+    # tool_name == "patch"
+    mode = args.get("mode") or "replace"
+    if mode == "replace":
+        p = args.get("path")
+        return [str(p)] if p else []
+    if mode == "patch":
+        body = args.get("patch") or ""
+        if not isinstance(body, str) or not body:
+            return []
+        paths: List[str] = []
+        for _m in re.finditer(
+            r'^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$',
+            body,
+            re.MULTILINE,
+        ):
+            p = _m.group(1).strip()
+            if p:
+                paths.append(p)
+        return paths
+    return []
+
+
+def _extract_error_preview(result: Any, max_len: int = 180) -> str:
+    """Pull a one-line error summary out of a tool result for footer display."""
+    text = _multimodal_text_summary(result) if result is not None else ""
+    if not isinstance(text, str):
+        try:
+            text = str(text)
+        except Exception:
+            return ""
+    # Try to parse JSON and pull the ``error`` field — tool handlers return
+    # ``{"success": false, "error": "..."}``; raw string wins if parse fails.
+    stripped = text.strip()
+    if stripped.startswith("{"):
+        try:
+            data = json.loads(stripped)
+            if isinstance(data, dict) and isinstance(data.get("error"), str):
+                text = data["error"]
+        except Exception:
+            pass
+    # Collapse whitespace, trim to max_len.
+    text = " ".join(text.split())
+    if len(text) > max_len:
+        text = text[: max_len - 1] + "…"
+    return text
+
+
+def _trajectory_normalize_msg(msg: Dict[str, Any]) -> Dict[str, Any]:
+    """Strip image blobs from a message for trajectory saving.
+
+    Returns a shallow copy with multimodal tool results replaced by their
+    text_summary, and image parts in content lists replaced by
+    `[screenshot]` placeholders. Keeps the message schema otherwise intact.
+    """
+    if not isinstance(msg, dict):
+        return msg
+    content = msg.get("content")
+    if _is_multimodal_tool_result(content):
+        return {**msg, "content": _multimodal_text_summary(content)}
+    if isinstance(content, list):
+        cleaned = []
+        for p in content:
+            if isinstance(p, dict) and p.get("type") in {"image", "image_url", "input_image"}:
+                cleaned.append({"type": "text", "text": "[screenshot]"})
+            else:
+                cleaned.append(p)
+        return {**msg, "content": cleaned}
+    return msg
+
+
+__all__ = [
+    "_NEVER_PARALLEL_TOOLS",
+    "_PARALLEL_SAFE_TOOLS",
+    "_PATH_SCOPED_TOOLS",
+    "_DESTRUCTIVE_PATTERNS",
+    "_REDIRECT_OVERWRITE",
+    "_is_destructive_command",
+    "_should_parallelize_tool_batch",
+    "_extract_parallel_scope_path",
+    "_paths_overlap",
+    "_is_multimodal_tool_result",
+    "_multimodal_text_summary",
+    "_append_subdir_hint_to_multimodal",
+    "_extract_file_mutation_targets",
+    "_extract_error_preview",
+    "_trajectory_normalize_msg",
+]
--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@ -0,0 +1,920 @@
+"""Tool-call execution — sequential and concurrent dispatch.
+
+Both AIAgent methods (``_execute_tool_calls_sequential`` and
+``_execute_tool_calls_concurrent``) live here as module-level
+functions that take the parent ``AIAgent`` as their first argument.
+
+``run_agent`` keeps thin wrappers so existing call sites work; tests
+that patch ``run_agent._set_interrupt`` are honored because the
+extracted functions reach back through the ``run_agent`` module via
+``_ra()`` for that symbol.
+"""
+
+from __future__ import annotations
+
+import concurrent.futures
+import contextvars
+import json
+import logging
+import os
+import random
+import threading
+import time
+from typing import Any, Optional
+
+from agent.display import (
+    KawaiiSpinner,
+    build_tool_preview as _build_tool_preview,
+    get_cute_tool_message as _get_cute_tool_message_impl,
+    get_tool_emoji as _get_tool_emoji,
+    _detect_tool_failure,
+)
+from agent.tool_guardrails import ToolGuardrailDecision
+from agent.tool_dispatch_helpers import (
+    _is_destructive_command,
+    _is_multimodal_tool_result,
+    _multimodal_text_summary,
+    _append_subdir_hint_to_multimodal,
+)
+from tools.terminal_tool import (
+    _get_approval_callback,
+    _get_sudo_password_callback,
+    set_approval_callback as _set_approval_callback,
+    set_sudo_password_callback as _set_sudo_password_callback,
+    get_active_env,
+)
+from tools.tool_result_storage import (
+    maybe_persist_tool_result,
+    enforce_turn_budget,
+)
+
+logger = logging.getLogger(__name__)
+
+# Maximum number of concurrent worker threads for parallel tool execution.
+# Mirrors the constant in ``run_agent`` for tests/imports that look here.
+_MAX_TOOL_WORKERS = 8
+
+
+def _ra():
+    """Lazy reference to ``run_agent`` so patches like ``run_agent._set_interrupt`` work."""
+    import run_agent
+    return run_agent
+
+
+def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
+    """Execute multiple tool calls concurrently using a thread pool.
+
+    Results are collected in the original tool-call order and appended to
+    messages so the API sees them in the expected sequence.
+    """
+    tool_calls = assistant_message.tool_calls
+    num_tools = len(tool_calls)
+
+    # ── Pre-flight: interrupt check ──────────────────────────────────
+    if agent._interrupt_requested:
+        print(f"{agent.log_prefix}⚡ Interrupt: skipping {num_tools} tool call(s)")
+        for tc in tool_calls:
+            messages.append({
+                "role": "tool",
+                "name": tc.function.name,
+                "content": f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]",
+                "tool_call_id": tc.id,
+            })
+        return
+
+    # ── Parse args + pre-execution bookkeeping ───────────────────────
+    parsed_calls = []  # list of (tool_call, function_name, function_args)
+    for tool_call in tool_calls:
+        function_name = tool_call.function.name
+
+        # Reset nudge counters
+        if function_name == "memory":
+            agent._turns_since_memory = 0
+        elif function_name == "skill_manage":
+            agent._iters_since_skill = 0
+
+        try:
+            function_args = json.loads(tool_call.function.arguments)
+        except json.JSONDecodeError:
+            function_args = {}
+        if not isinstance(function_args, dict):
+            function_args = {}
+
+        # Checkpoint for file-mutating tools
+        if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
+            try:
+                file_path = function_args.get("path", "")
+                if file_path:
+                    work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
+                    agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
+            except Exception:
+                pass
+
+        # Checkpoint before destructive terminal commands
+        if function_name == "terminal" and agent._checkpoint_mgr.enabled:
+            try:
+                cmd = function_args.get("command", "")
+                if _is_destructive_command(cmd):
+                    cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
+                    agent._checkpoint_mgr.ensure_checkpoint(
+                        cwd, f"before terminal: {cmd[:60]}"
+                    )
+            except Exception:
+                pass
+
+        block_result = None
+        blocked_by_guardrail = False
+        try:
+            from hermes_cli.plugins import get_pre_tool_call_block_message
+            block_message = get_pre_tool_call_block_message(
+                function_name, function_args, task_id=effective_task_id or "",
+            )
+        except Exception:
+            block_message = None
+
+        if block_message is not None:
+            block_result = json.dumps({"error": block_message}, ensure_ascii=False)
+        else:
+            guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
+            if not guardrail_decision.allows_execution:
+                block_result = agent._guardrail_block_result(guardrail_decision)
+                blocked_by_guardrail = True
+
+        parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail))
+
+    # ── Logging / callbacks ──────────────────────────────────────────
+    tool_names_str = ", ".join(name for _, name, _, _, _ in parsed_calls)
+    if not agent.quiet_mode:
+        print(f"  ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
+        for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
+            args_str = json.dumps(args, ensure_ascii=False)
+            if agent.verbose_logging:
+                print(f"  📞 Tool {i}: {name}({list(args.keys())})")
+                print(agent._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False)))
+            else:
+                args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
+                print(f"  📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
+
+    for tc, name, args, block_result, blocked_by_guardrail in parsed_calls:
+        if block_result is not None:
+            continue
+        if agent.tool_progress_callback:
+            try:
+                preview = _build_tool_preview(name, args)
+                agent.tool_progress_callback("tool.started", name, preview, args)
+            except Exception as cb_err:
+                logging.debug(f"Tool progress callback error: {cb_err}")
+
+    for tc, name, args, block_result, blocked_by_guardrail in parsed_calls:
+        if block_result is not None:
+            continue
+        if agent.tool_start_callback:
+            try:
+                agent.tool_start_callback(tc.id, name, args)
+            except Exception as cb_err:
+                logging.debug(f"Tool start callback error: {cb_err}")
+
+    # ── Concurrent execution ─────────────────────────────────────────
+    # Each slot holds (function_name, function_args, function_result, duration, error_flag, blocked_flag)
+    results = [None] * num_tools
+    for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls):
+        if block_result is not None:
+            results[i] = (name, args, block_result, 0.0, True, True)
+
+    # Touch activity before launching workers so the gateway knows
+    # we're executing tools (not stuck).
+    agent._current_tool = tool_names_str
+    agent._touch_activity(f"executing {num_tools} tools concurrently: {tool_names_str}")
+
+    # Capture CLI callbacks from the agent thread so worker threads can
+    # register them locally.  Without this, _get_approval_callback() in
+    # terminal_tool returns None in ThreadPoolExecutor workers, causing
+    # the dangerous-command prompt to fall back to input() — which
+    # deadlocks against prompt_toolkit's raw terminal mode (#13617).
+    _parent_approval_cb = _get_approval_callback()
+    _parent_sudo_cb = _get_sudo_password_callback()
+
+    def _run_tool(index, tool_call, function_name, function_args):
+        """Worker function executed in a thread."""
+        # Register this worker tid so the agent can fan out an interrupt
+        # to it — see AIAgent.interrupt().  Must happen first thing, and
+        # must be paired with discard + clear in the finally block.
+        _worker_tid = threading.current_thread().ident
+        with agent._tool_worker_threads_lock:
+            agent._tool_worker_threads.add(_worker_tid)
+        # Race: if the agent was interrupted between fan-out (which
+        # snapshotted an empty/earlier set) and our registration, apply
+        # the interrupt to our own tid now so is_interrupted() inside
+        # the tool returns True on the next poll.
+        if agent._interrupt_requested:
+            try:
+                _ra()._set_interrupt(True, _worker_tid)
+            except Exception:
+                pass
+        # Set the activity callback on THIS worker thread so
+        # _wait_for_process (terminal commands) can fire heartbeats.
+        # The callback is thread-local; the main thread's callback
+        # is invisible to worker threads.
+        try:
+            from tools.environments.base import set_activity_callback
+            set_activity_callback(agent._touch_activity)
+        except Exception:
+            pass
+        # Propagate approval/sudo callbacks to this worker thread.
+        # Mirrors cli.py run_agent() pattern (GHSA-qg5c-hvr5-hjgr).
+        if _parent_approval_cb is not None:
+            try:
+                _set_approval_callback(_parent_approval_cb)
+            except Exception:
+                pass
+        if _parent_sudo_cb is not None:
+            try:
+                _set_sudo_password_callback(_parent_sudo_cb)
+            except Exception:
+                pass
+        start = time.time()
+        try:
+            result = agent._invoke_tool(
+                function_name,
+                function_args,
+                effective_task_id,
+                tool_call.id,
+                messages=messages,
+                pre_tool_block_checked=True,
+            )
+        except Exception as tool_error:
+            result = f"Error executing tool '{function_name}': {tool_error}"
+            logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
+        duration = time.time() - start
+        is_error, _ = _detect_tool_failure(function_name, result)
+        if is_error:
+            logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
+        else:
+            logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
+        results[index] = (function_name, function_args, result, duration, is_error, False)
+        # Tear down worker-tid tracking.  Clear any interrupt bit we may
+        # have set so the next task scheduled onto this recycled tid
+        # starts with a clean slate.
+        with agent._tool_worker_threads_lock:
+            agent._tool_worker_threads.discard(_worker_tid)
+        try:
+            _ra()._set_interrupt(False, _worker_tid)
+        except Exception:
+            pass
+        # Clear thread-local callbacks so a recycled worker thread
+        # doesn't hold stale references to a disposed CLI instance.
+        try:
+            _set_approval_callback(None)
+            _set_sudo_password_callback(None)
+        except Exception:
+            pass
+
+    # Start spinner for CLI mode (skip when TUI handles tool progress)
+    spinner = None
+    if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
+        face = random.choice(KawaiiSpinner.get_waiting_faces())
+        spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots', print_fn=agent._print_fn)
+        spinner.start()
+
+    try:
+        runnable_calls = [
+            (i, tc, name, args)
+            for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls)
+            if block_result is None
+        ]
+        futures = []
+        if runnable_calls:
+            max_workers = min(len(runnable_calls), _MAX_TOOL_WORKERS)
+            with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                for i, tc, name, args in runnable_calls:
+                    # Propagate ContextVars (e.g. _approval_session_key); mirrors asyncio.to_thread.
+                    ctx = contextvars.copy_context()
+                    f = executor.submit(ctx.run, _run_tool, i, tc, name, args)
+                    futures.append(f)
+
+                # Wait for all to complete with periodic heartbeats so the
+                # gateway's inactivity monitor doesn't kill us during long
+                # concurrent tool batches. Also check for user interrupts
+                # so we don't block indefinitely when the user sends /stop
+                # or a new message during concurrent tool execution.
+                _conc_start = time.time()
+                _interrupt_logged = False
+                while True:
+                    done, not_done = concurrent.futures.wait(
+                        futures, timeout=5.0,
+                    )
+                    if not not_done:
+                        break
+
+                    # Check for interrupt — the per-thread interrupt signal
+                    # already causes individual tools (terminal, execute_code)
+                    # to abort, but tools without interrupt checks (web_search,
+                    # read_file) will run to completion. Cancel any futures
+                    # that haven't started yet so we don't block on them.
+                    if agent._interrupt_requested:
+                        if not _interrupt_logged:
+                            _interrupt_logged = True
+                            agent._vprint(
+                                f"{agent.log_prefix}⚡ Interrupt: cancelling "
+                                f"{len(not_done)} pending concurrent tool(s)",
+                                force=True,
+                            )
+                        for f in not_done:
+                            f.cancel()
+                        # Give already-running tools a moment to notice the
+                        # per-thread interrupt signal and exit gracefully.
+                        concurrent.futures.wait(not_done, timeout=3.0)
+                        break
+
+                    _conc_elapsed = int(time.time() - _conc_start)
+                    # Heartbeat every ~30s (6 × 5s poll intervals)
+                    if _conc_elapsed > 0 and _conc_elapsed % 30 < 6:
+                        _still_running = [
+                            parsed_calls[futures.index(f)][1]
+                            for f in not_done
+                            if f in futures
+                        ]
+                        agent._touch_activity(
+                            f"concurrent tools running ({_conc_elapsed}s, "
+                            f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
+                        )
+    finally:
+        if spinner:
+            # Build a summary message for the spinner stop
+            completed = sum(1 for r in results if r is not None)
+            total_dur = sum(r[3] for r in results if r is not None)
+            spinner.stop(f"⚡ {completed}/{num_tools} tools completed in {total_dur:.1f}s total")
+
+    # ── Post-execution: display per-tool results ─────────────────────
+    for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls):
+        r = results[i]
+        blocked = False
+        if r is None:
+            # Tool was cancelled (interrupt) or thread didn't return
+            if agent._interrupt_requested:
+                function_result = f"[Tool execution cancelled — {name} was skipped due to user interrupt]"
+            else:
+                function_result = f"Error executing tool '{name}': thread did not return a result"
+            tool_duration = 0.0
+        else:
+            function_name, function_args, function_result, tool_duration, is_error, blocked = r
+
+            if not blocked:
+                function_result = agent._append_guardrail_observation(
+                    function_name,
+                    function_args,
+                    function_result,
+                    failed=is_error,
+                )
+
+            if is_error:
+                _err_text = _multimodal_text_summary(function_result)
+                result_preview = _err_text[:200] if len(_err_text) > 200 else _err_text
+                logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
+
+            # Track file-mutation outcome for the turn-end verifier.
+            # `blocked` calls never actually ran — don't let a guardrail
+            # block count as either a failure or a success.
+            if not blocked:
+                try:
+                    agent._record_file_mutation_result(
+                        function_name, function_args, function_result, is_error,
+                    )
+                except Exception as _ver_err:
+                    logging.debug("file-mutation verifier record failed: %s", _ver_err)
+
+            if not blocked and agent.tool_progress_callback:
+                try:
+                    agent.tool_progress_callback(
+                        "tool.completed", function_name, None, None,
+                        duration=tool_duration, is_error=is_error,
+                    )
+                except Exception as cb_err:
+                    logging.debug(f"Tool progress callback error: {cb_err}")
+
+            if agent.verbose_logging:
+                logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
+                logging.debug(f"Tool result ({len(function_result)} chars): {function_result}")
+
+        # Print cute message per tool
+        if agent._should_emit_quiet_tool_messages():
+            cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result)
+            agent._safe_print(f"  {cute_msg}")
+        elif not agent.quiet_mode:
+            _preview_str = _multimodal_text_summary(function_result)
+            if agent.verbose_logging:
+                print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s")
+                print(agent._wrap_verbose("Result: ", _preview_str))
+            else:
+                response_preview = _preview_str[:agent.log_prefix_chars] + "..." if len(_preview_str) > agent.log_prefix_chars else _preview_str
+                print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}")
+
+        agent._current_tool = None
+        agent._touch_activity(f"tool completed: {name} ({tool_duration:.1f}s)")
+
+        if not blocked and agent.tool_complete_callback:
+            try:
+                agent.tool_complete_callback(tc.id, name, args, function_result)
+            except Exception as cb_err:
+                logging.debug(f"Tool complete callback error: {cb_err}")
+
+        function_result = maybe_persist_tool_result(
+            content=function_result,
+            tool_name=name,
+            tool_use_id=tc.id,
+            env=get_active_env(effective_task_id),
+        ) if not _is_multimodal_tool_result(function_result) else function_result
+
+        subdir_hints = agent._subdirectory_hints.check_tool_call(name, args)
+        if subdir_hints:
+            if _is_multimodal_tool_result(function_result):
+                # Append the hint to the text summary part so the model
+                # still sees it; don't touch the image blocks.
+                _append_subdir_hint_to_multimodal(function_result, subdir_hints)
+            else:
+                function_result += subdir_hints
+
+        # Unwrap _multimodal dicts to an OpenAI-style content list so any
+        # vision-capable provider receives [{type:text},{type:image_url}]
+        # rather than a raw Python dict.  The Anthropic adapter already
+        # accepts content lists; vision-capable OpenAI-compatible servers
+        # (mlx-vlm, GPT-4o, …) accept image_url in tool messages natively.
+        # Text-only servers get a string-safe fallback here so a rejected
+        # image tool result never poisons canonical session history.
+        # String results pass through unchanged.
+        _tool_content = agent._tool_result_content_for_active_model(name, function_result)
+        tool_msg = {
+            "role": "tool",
+            "name": name,
+            "content": _tool_content,
+            "tool_call_id": tc.id,
+        }
+        messages.append(tool_msg)
+
+        # ── Per-tool /steer drain ───────────────────────────────────
+        # Same as the sequential path: drain between each collected
+        # result so the steer lands as early as possible.
+        agent._apply_pending_steer_to_tool_results(messages, 1)
+
+    # ── Per-turn aggregate budget enforcement ─────────────────────────
+    num_tools = len(parsed_calls)
+    if num_tools > 0:
+        turn_tool_msgs = messages[-num_tools:]
+        enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id))
+
+    # ── /steer injection ──────────────────────────────────────────────
+    # Append any pending user steer text to the last tool result so the
+    # agent sees it on its next iteration. Runs AFTER budget enforcement
+    # so the steer marker is never truncated. See steer() for details.
+    if num_tools > 0:
+        agent._apply_pending_steer_to_tool_results(messages, num_tools)
+
+
+
+def execute_tool_calls_sequential(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
+    """Execute tool calls sequentially (original behavior). Used for single calls or interactive tools."""
+    for i, tool_call in enumerate(assistant_message.tool_calls, 1):
+        # SAFETY: check interrupt BEFORE starting each tool.
+        # If the user sent "stop" during a previous tool's execution,
+        # do NOT start any more tools -- skip them all immediately.
+        if agent._interrupt_requested:
+            remaining_calls = assistant_message.tool_calls[i-1:]
+            if remaining_calls:
+                agent._vprint(f"{agent.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)", force=True)
+            for skipped_tc in remaining_calls:
+                skipped_name = skipped_tc.function.name
+                skip_msg = {
+                    "role": "tool",
+                    "name": skipped_name,
+                    "content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
+                    "tool_call_id": skipped_tc.id,
+                }
+                messages.append(skip_msg)
+            break
+
+        function_name = tool_call.function.name
+
+        try:
+            function_args = json.loads(tool_call.function.arguments)
+        except json.JSONDecodeError as e:
+            logging.warning(f"Unexpected JSON error after validation: {e}")
+            function_args = {}
+        if not isinstance(function_args, dict):
+            function_args = {}
+
+        # Check plugin hooks for a block directive before executing.
+        _block_msg: Optional[str] = None
+        try:
+            from hermes_cli.plugins import get_pre_tool_call_block_message
+            _block_msg = get_pre_tool_call_block_message(
+                function_name, function_args, task_id=effective_task_id or "",
+            )
+        except Exception:
+            pass
+
+        _guardrail_block_decision: ToolGuardrailDecision | None = None
+        if _block_msg is None:
+            guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
+            if not guardrail_decision.allows_execution:
+                _guardrail_block_decision = guardrail_decision
+
+        _execution_blocked = _block_msg is not None or _guardrail_block_decision is not None
+
+        if _execution_blocked:
+            # Tool blocked by plugin or guardrail policy — skip counters,
+            # callbacks, checkpointing, activity mutation, and real execution.
+            pass
+        # Reset nudge counters when the relevant tool is actually used
+        elif function_name == "memory":
+            agent._turns_since_memory = 0
+        elif function_name == "skill_manage":
+            agent._iters_since_skill = 0
+
+        if not agent.quiet_mode:
+            args_str = json.dumps(function_args, ensure_ascii=False)
+            if agent.verbose_logging:
+                print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())})")
+                print(agent._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False)))
+            else:
+                args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
+                print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
+
+        if not _execution_blocked:
+            agent._current_tool = function_name
+            agent._touch_activity(f"executing tool: {function_name}")
+
+        # Set activity callback for long-running tool execution (terminal
+        # commands, etc.) so the gateway's inactivity monitor doesn't kill
+        # the agent while a command is running.
+        if not _execution_blocked:
+            try:
+                from tools.environments.base import set_activity_callback
+                set_activity_callback(agent._touch_activity)
+            except Exception:
+                pass
+
+        if not _execution_blocked and agent.tool_progress_callback:
+            try:
+                preview = _build_tool_preview(function_name, function_args)
+                agent.tool_progress_callback("tool.started", function_name, preview, function_args)
+            except Exception as cb_err:
+                logging.debug(f"Tool progress callback error: {cb_err}")
+
+        if not _execution_blocked and agent.tool_start_callback:
+            try:
+                agent.tool_start_callback(tool_call.id, function_name, function_args)
+            except Exception as cb_err:
+                logging.debug(f"Tool start callback error: {cb_err}")
+
+        # Checkpoint: snapshot working dir before file-mutating tools
+        if not _execution_blocked and function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
+            try:
+                file_path = function_args.get("path", "")
+                if file_path:
+                    work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
+                    agent._checkpoint_mgr.ensure_checkpoint(
+                        work_dir, f"before {function_name}"
+                    )
+            except Exception:
+                pass  # never block tool execution
+
+        # Checkpoint before destructive terminal commands
+        if not _execution_blocked and function_name == "terminal" and agent._checkpoint_mgr.enabled:
+            try:
+                cmd = function_args.get("command", "")
+                if _is_destructive_command(cmd):
+                    cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
+                    agent._checkpoint_mgr.ensure_checkpoint(
+                        cwd, f"before terminal: {cmd[:60]}"
+                    )
+            except Exception:
+                pass  # never block tool execution
+
+        tool_start_time = time.time()
+
+        if _block_msg is not None:
+            # Tool blocked by plugin policy — return error without executing.
+            function_result = json.dumps({"error": _block_msg}, ensure_ascii=False)
+            tool_duration = 0.0
+        elif _guardrail_block_decision is not None:
+            # Tool blocked by tool-loop guardrail — synthesize exactly one
+            # tool result for the original tool_call_id without executing.
+            function_result = agent._guardrail_block_result(_guardrail_block_decision)
+            tool_duration = 0.0
+        elif function_name == "todo":
+            from tools.todo_tool import todo_tool as _todo_tool
+            function_result = _todo_tool(
+                todos=function_args.get("todos"),
+                merge=function_args.get("merge", False),
+                store=agent._todo_store,
+            )
+            tool_duration = time.time() - tool_start_time
+            if agent._should_emit_quiet_tool_messages():
+                agent._vprint(f"  {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
+        elif function_name == "session_search":
+            session_db = agent._get_session_db_for_recall()
+            if not session_db:
+                from hermes_state import format_session_db_unavailable
+                function_result = json.dumps({"success": False, "error": format_session_db_unavailable()})
+            else:
+                from tools.session_search_tool import session_search as _session_search
+                function_result = _session_search(
+                    query=function_args.get("query", ""),
+                    role_filter=function_args.get("role_filter"),
+                    limit=function_args.get("limit", 3),
+                    db=session_db,
+                    current_session_id=agent.session_id,
+                )
+            tool_duration = time.time() - tool_start_time
+            if agent._should_emit_quiet_tool_messages():
+                agent._vprint(f"  {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
+        elif function_name == "memory":
+            target = function_args.get("target", "memory")
+            from tools.memory_tool import memory_tool as _memory_tool
+            function_result = _memory_tool(
+                action=function_args.get("action"),
+                target=target,
+                content=function_args.get("content"),
+                old_text=function_args.get("old_text"),
+                store=agent._memory_store,
+            )
+            # Bridge: notify external memory provider of built-in memory writes
+            if agent._memory_manager and function_args.get("action") in {"add", "replace"}:
+                try:
+                    agent._memory_manager.on_memory_write(
+                        function_args.get("action", ""),
+                        target,
+                        function_args.get("content", ""),
+                        metadata=agent._build_memory_write_metadata(
+                            task_id=effective_task_id,
+                            tool_call_id=getattr(tool_call, "id", None),
+                        ),
+                    )
+                except Exception:
+                    pass
+            tool_duration = time.time() - tool_start_time
+            if agent._should_emit_quiet_tool_messages():
+                agent._vprint(f"  {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
+        elif function_name == "clarify":
+            from tools.clarify_tool import clarify_tool as _clarify_tool
+            function_result = _clarify_tool(
+                question=function_args.get("question", ""),
+                choices=function_args.get("choices"),
+                callback=agent.clarify_callback,
+            )
+            tool_duration = time.time() - tool_start_time
+            if agent._should_emit_quiet_tool_messages():
+                agent._vprint(f"  {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
+        elif function_name == "delegate_task":
+            tasks_arg = function_args.get("tasks")
+            if tasks_arg and isinstance(tasks_arg, list):
+                spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
+            else:
+                goal_preview = (function_args.get("goal") or "")[:30]
+                spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
+            spinner = None
+            if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
+                face = random.choice(KawaiiSpinner.get_waiting_faces())
+                spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots', print_fn=agent._print_fn)
+                spinner.start()
+            agent._delegate_spinner = spinner
+            _delegate_result = None
+            try:
+                function_result = agent._dispatch_delegate_task(function_args)
+                _delegate_result = function_result
+            finally:
+                agent._delegate_spinner = None
+                tool_duration = time.time() - tool_start_time
+                cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result)
+                if spinner:
+                    spinner.stop(cute_msg)
+                elif agent._should_emit_quiet_tool_messages():
+                    agent._vprint(f"  {cute_msg}")
+        elif agent._context_engine_tool_names and function_name in agent._context_engine_tool_names:
+            # Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
+            spinner = None
+            if agent._should_emit_quiet_tool_messages():
+                face = random.choice(KawaiiSpinner.get_waiting_faces())
+                emoji = _get_tool_emoji(function_name)
+                preview = _build_tool_preview(function_name, function_args) or function_name
+                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
+                spinner.start()
+            _ce_result = None
+            try:
+                function_result = agent.context_compressor.handle_tool_call(function_name, function_args, messages=messages)
+                _ce_result = function_result
+            except Exception as tool_error:
+                function_result = json.dumps({"error": f"Context engine tool '{function_name}' failed: {tool_error}"})
+                logger.error("context_engine.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
+            finally:
+                tool_duration = time.time() - tool_start_time
+                cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result)
+                if spinner:
+                    spinner.stop(cute_msg)
+                elif agent._should_emit_quiet_tool_messages():
+                    agent._vprint(f"  {cute_msg}")
+        elif agent._memory_manager and agent._memory_manager.has_tool(function_name):
+            # Memory provider tools (hindsight_retain, honcho_search, etc.)
+            # These are not in the tool registry — route through MemoryManager.
+            spinner = None
+            if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
+                face = random.choice(KawaiiSpinner.get_waiting_faces())
+                emoji = _get_tool_emoji(function_name)
+                preview = _build_tool_preview(function_name, function_args) or function_name
+                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
+                spinner.start()
+            _mem_result = None
+            try:
+                function_result = agent._memory_manager.handle_tool_call(function_name, function_args)
+                _mem_result = function_result
+            except Exception as tool_error:
+                function_result = json.dumps({"error": f"Memory tool '{function_name}' failed: {tool_error}"})
+                logger.error("memory_manager.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
+            finally:
+                tool_duration = time.time() - tool_start_time
+                cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_mem_result)
+                if spinner:
+                    spinner.stop(cute_msg)
+                elif agent._should_emit_quiet_tool_messages():
+                    agent._vprint(f"  {cute_msg}")
+        elif agent.quiet_mode:
+            spinner = None
+            if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
+                face = random.choice(KawaiiSpinner.get_waiting_faces())
+                emoji = _get_tool_emoji(function_name)
+                preview = _build_tool_preview(function_name, function_args) or function_name
+                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
+                spinner.start()
+            _spinner_result = None
+            try:
+                function_result = _ra().handle_function_call(
+                    function_name, function_args, effective_task_id,
+                    tool_call_id=tool_call.id,
+                    session_id=agent.session_id or "",
+                    enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
+                    skip_pre_tool_call_hook=True,
+                )
+                _spinner_result = function_result
+            except Exception as tool_error:
+                function_result = f"Error executing tool '{function_name}': {tool_error}"
+                logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
+            finally:
+                tool_duration = time.time() - tool_start_time
+                cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
+                if spinner:
+                    spinner.stop(cute_msg)
+                elif agent._should_emit_quiet_tool_messages():
+                    agent._vprint(f"  {cute_msg}")
+        else:
+            try:
+                function_result = _ra().handle_function_call(
+                    function_name, function_args, effective_task_id,
+                    tool_call_id=tool_call.id,
+                    session_id=agent.session_id or "",
+                    enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
+                    skip_pre_tool_call_hook=True,
+                )
+            except Exception as tool_error:
+                function_result = f"Error executing tool '{function_name}': {tool_error}"
+                logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
+            tool_duration = time.time() - tool_start_time
+
+        if isinstance(function_result, str):
+            result_preview = function_result if agent.verbose_logging else (
+                function_result[:200] if len(function_result) > 200 else function_result
+            )
+            _result_len = len(function_result)
+        else:
+            # Multimodal dict result (_multimodal=True) — not sliceable as string
+            result_preview = function_result
+            _result_len = len(str(function_result))
+
+        # Log tool errors to the persistent error log so [error] tags
+        # in the UI always have a corresponding detailed entry on disk.
+        _is_error_result, _ = _detect_tool_failure(function_name, function_result)
+        if not _execution_blocked:
+            function_result = agent._append_guardrail_observation(
+                function_name,
+                function_args,
+                function_result,
+                failed=_is_error_result,
+            )
+            result_preview = function_result if agent.verbose_logging else (
+                function_result[:200] if len(function_result) > 200 else function_result
+            )
+        if _is_error_result:
+            logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
+        else:
+            logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, _result_len)
+
+        # Track file-mutation outcome for the turn-end verifier.  See
+        # the concurrent path for the rationale; both paths must feed
+        # the same state so the footer reflects every tool call in the
+        # turn, not just the parallel ones.
+        if not _execution_blocked:
+            try:
+                agent._record_file_mutation_result(
+                    function_name, function_args, function_result, _is_error_result,
+                )
+            except Exception as _ver_err:
+                logging.debug("file-mutation verifier record failed: %s", _ver_err)
+
+        if not _execution_blocked and agent.tool_progress_callback:
+            try:
+                agent.tool_progress_callback(
+                    "tool.completed", function_name, None, None,
+                    duration=tool_duration, is_error=_is_error_result,
+                )
+            except Exception as cb_err:
+                logging.debug(f"Tool progress callback error: {cb_err}")
+
+        agent._current_tool = None
+        agent._touch_activity(f"tool completed: {function_name} ({tool_duration:.1f}s)")
+
+        if agent.verbose_logging:
+            logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
+            _log_result = _multimodal_text_summary(function_result)
+            logging.debug(f"Tool result ({len(_log_result)} chars): {_log_result}")
+
+        if not _execution_blocked and agent.tool_complete_callback:
+            try:
+                agent.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
+            except Exception as cb_err:
+                logging.debug(f"Tool complete callback error: {cb_err}")
+
+        function_result = maybe_persist_tool_result(
+            content=function_result,
+            tool_name=function_name,
+            tool_use_id=tool_call.id,
+            env=get_active_env(effective_task_id),
+        ) if not _is_multimodal_tool_result(function_result) else function_result
+
+        # Discover subdirectory context files from tool arguments
+        subdir_hints = agent._subdirectory_hints.check_tool_call(function_name, function_args)
+        if subdir_hints:
+            if _is_multimodal_tool_result(function_result):
+                _append_subdir_hint_to_multimodal(function_result, subdir_hints)
+            else:
+                function_result += subdir_hints
+
+        # Unwrap _multimodal dicts to an OpenAI-style content list
+        # (see parallel path for rationale). String results pass through.
+        _tool_content = agent._tool_result_content_for_active_model(function_name, function_result)
+        tool_msg = {
+            "role": "tool",
+            "name": function_name,
+            "content": _tool_content,
+            "tool_call_id": tool_call.id
+        }
+        messages.append(tool_msg)
+
+        # ── Per-tool /steer drain ───────────────────────────────────
+        # Drain pending steer BETWEEN individual tool calls so the
+        # injection lands as soon as a tool finishes — not after the
+        # entire batch.  The model sees it on the next API iteration.
+        agent._apply_pending_steer_to_tool_results(messages, 1)
+
+        if not agent.quiet_mode:
+            if agent.verbose_logging:
+                print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s")
+                print(agent._wrap_verbose("Result: ", function_result))
+            else:
+                _fr_str = function_result if isinstance(function_result, str) else str(function_result)
+                response_preview = _fr_str[:agent.log_prefix_chars] + "..." if len(_fr_str) > agent.log_prefix_chars else _fr_str
+                print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")
+
+        if agent._interrupt_requested and i < len(assistant_message.tool_calls):
+            remaining = len(assistant_message.tool_calls) - i
+            agent._vprint(f"{agent.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)", force=True)
+            for skipped_tc in assistant_message.tool_calls[i:]:
+                skipped_name = skipped_tc.function.name
+                skip_msg = {
+                    "role": "tool",
+                    "name": skipped_name,
+                    "content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]",
+                    "tool_call_id": skipped_tc.id
+                }
+                messages.append(skip_msg)
+            break
+
+        if agent.tool_delay > 0 and i < len(assistant_message.tool_calls):
+            time.sleep(agent.tool_delay)
+
+    # ── Per-turn aggregate budget enforcement ─────────────────────────
+    num_tools_seq = len(assistant_message.tool_calls)
+    if num_tools_seq > 0:
+        enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id))
+
+    # ── /steer injection ──────────────────────────────────────────────
+    # See _execute_tool_calls_parallel for the rationale. Same hook,
+    # applied to sequential execution as well.
+    if num_tools_seq > 0:
+        agent._apply_pending_steer_to_tool_results(messages, num_tools_seq)
+
+
+
+
+__all__ = [
+    "execute_tool_calls_concurrent",
+    "execute_tool_calls_sequential",
+]
--- a/agent/transports/codex_app_server.py
+++ b/agent/transports/codex_app_server.py
@ -74,12 +74,43 @@ class CodexAppServerClient:
        env: Optional[dict[str, str]] = None,
    ) -> None:
        self._codex_bin = codex_bin
-        cmd = [codex_bin, "app-server"] + list(extra_args or [])
        spawn_env = os.environ.copy()
        if env:
            spawn_env.update(env)
        if codex_home:
            spawn_env["CODEX_HOME"] = codex_home
+
+        app_server_args = list(extra_args or [])
+        # Kanban workers must be able to write their handoff/status back to
+        # the board DB, which lives outside the per-task workspace. Keep the
+        # Codex sandbox on, but add the Kanban root as the only extra writable
+        # root. Without this, codex-runtime workers finish their actual work
+        # but crash/block when kanban_complete/kanban_block writes SQLite.
+        if spawn_env.get("HERMES_KANBAN_TASK"):
+            kanban_db = spawn_env.get("HERMES_KANBAN_DB")
+            kanban_root = (
+                os.path.dirname(kanban_db)
+                if kanban_db
+                else spawn_env.get(
+                    "HERMES_KANBAN_ROOT",
+                    os.path.join(
+                        spawn_env.get("HERMES_HOME", os.path.expanduser("~/.hermes")),
+                        "kanban",
+                    ),
+                )
+            )
+            app_server_args.extend(
+                [
+                    "-c",
+                    'sandbox_mode="workspace-write"',
+                    "-c",
+                    f'sandbox_workspace_write.writable_roots=["{kanban_root}"]',
+                    "-c",
+                    "sandbox_workspace_write.network_access=false",
+                ]
+            )
+
+        cmd = [codex_bin, "app-server"] + app_server_args
        # Codex emits tracing to stderr; default WARN keeps it quiet for users.
        spawn_env.setdefault("RUST_LOG", "warn")

--- a/agent/transports/codex_app_server_session.py
+++ b/agent/transports/codex_app_server_session.py
@ -404,7 +404,7 @@ class CodexAppServerSession:
            return result

        result.turn_id = (ts.get("turn") or {}).get("id")
-        deadline = time.time() + turn_timeout
+        deadline = time.monotonic() + turn_timeout
        turn_complete = False
        # Post-tool watchdog state. last_tool_completion_at is set whenever
        # a tool-shaped item completes; if no further notification arrives
@ -412,7 +412,7 @@ class CodexAppServerSession:
        # fast-fail and retire the session.
        last_tool_completion_at: Optional[float] = None

-        while time.time() < deadline and not turn_complete:
+        while time.monotonic() < deadline and not turn_complete:
            if self._interrupt_event.is_set():
                self._issue_interrupt(result.turn_id)
                result.interrupted = True
@ -440,7 +440,7 @@ class CodexAppServerSession:
            # up on this turn instead of waiting for the outer deadline.
            if (
                last_tool_completion_at is not None
-                and (time.time() - last_tool_completion_at)
+                and (time.monotonic() - last_tool_completion_at)
                    > post_tool_quiet_timeout
            ):
                self._issue_interrupt(result.turn_id)
@ -471,7 +471,7 @@ class CodexAppServerSession:
                        result.projected_messages.extend(proj.messages)
                    if proj.is_tool_iteration:
                        result.tool_iterations += 1
-                        last_tool_completion_at = time.time()
+                        last_tool_completion_at = time.monotonic()
                    if proj.final_text is not None:
                        result.final_text = proj.final_text
                        if _has_turn_aborted_marker(proj.final_text):
@ -514,7 +514,7 @@ class CodexAppServerSession:
                result.tool_iterations += 1
                # Arm/refresh the post-tool quiet watchdog whenever a
                # tool-shaped item completes.
-                last_tool_completion_at = time.time()
+                last_tool_completion_at = time.monotonic()
            else:
                # Any non-tool projected activity (assistant message,
                # status update, etc.) means codex is still producing
@ -541,7 +541,7 @@ class CodexAppServerSession:
                turn_status = (
                    (note.get("params") or {}).get("turn") or {}
                ).get("status")
-                if turn_status and turn_status not in ("completed", "interrupted"):
+                if turn_status and turn_status not in {"completed", "interrupted"}:
                    err_obj = (
                        (note.get("params") or {}).get("turn") or {}
                    ).get("error")
@ -775,9 +775,9 @@ def _approval_choice_to_codex_decision(choice: str) -> str:
    (verified against codex-rs/app-server-protocol/src/protocol/v2/item.rs
    on codex 0.130.0).
    """
-    if choice in ("once",):
+    if choice in {"once",}:
        return "accept"
-    if choice in ("session", "always"):
+    if choice in {"session", "always"}:
        return "acceptForSession"
    return "decline"

--- a/apps/dashboard/src/components/ChatSidebar.tsx
+++ b/apps/dashboard/src/components/ChatSidebar.tsx
@ -30,6 +30,7 @@ import { Card } from "@/components/ui/card";
 import { ModelPickerDialog } from "@/components/ModelPickerDialog";
 import { ToolCall, type ToolEntry } from "@/components/ToolCall";
 import { GatewayClient, type ConnectionState } from "@/lib/gatewayClient";
+import { HERMES_BASE_PATH } from "@/lib/api";

 import { cn } from "@/lib/utils";
 import { AlertCircle, ChevronDown, RefreshCw } from "lucide-react";
@ -160,7 +161,7 @@ export function ChatSidebar({ channel, className }: ChatSidebarProps) {
    const proto = window.location.protocol === "https:" ? "wss:" : "ws:";
    const qs = new URLSearchParams({ token, channel });
    const ws = new WebSocket(
-      `${proto}//${window.location.host}/api/events?${qs.toString()}`,
+      `${proto}//${window.location.host}${HERMES_BASE_PATH}/api/events?${qs.toString()}`,
    );

    // `unmounting` suppresses the banner during cleanup — `ws.close()`
--- a/apps/dashboard/src/lib/gatewayClient.ts
+++ b/apps/dashboard/src/lib/gatewayClient.ts
@ -5,6 +5,8 @@ import {
  type GatewayEventName,
 } from "@hermes/shared";

+import { HERMES_BASE_PATH } from "@/lib/api";
+
 export type { ConnectionState, GatewayEvent, GatewayEventName };

 /**
@ -24,7 +26,7 @@ export class GatewayClient extends JsonRpcGatewayClient {

    const scheme = location.protocol === "https:" ? "wss:" : "ws:";
    await super.connect(
-      `${scheme}//${location.host}/api/ws?token=${encodeURIComponent(resolved)}`,
+      `${scheme}//${location.host}${HERMES_BASE_PATH}/api/ws?token=${encodeURIComponent(resolved)}`,
    );
  }
 }
--- a/apps/dashboard/src/pages/ChatPage.tsx
+++ b/apps/dashboard/src/pages/ChatPage.tsx
@ -24,6 +24,7 @@ import { Terminal } from "@xterm/xterm";
 import "@xterm/xterm/css/xterm.css";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { Typography } from "@/components/NouiTypography";
+import { HERMES_BASE_PATH } from "@/lib/api";
 import { cn } from "@/lib/utils";
 import { Copy, PanelRight, X } from "lucide-react";
 import { useCallback, useEffect, useMemo, useRef, useState } from "react";
@ -44,7 +45,7 @@ function buildWsUrl(
  const proto = window.location.protocol === "https:" ? "wss:" : "ws:";
  const qs = new URLSearchParams({ token, channel });
  if (resume) qs.set("resume", resume);
-  return `${proto}//${window.location.host}/api/pty?${qs.toString()}`;
+  return `${proto}//${window.location.host}${HERMES_BASE_PATH}/api/pty?${qs.toString()}`;
 }

 // Channel id ties this chat tab's PTY child (publisher) to its sidebar
@ -286,6 +287,17 @@ export default function ChatPage({ isActive = true }: { isActive?: boolean }) {
      fontWeight: "400",
      fontWeightBold: "700",
      macOptionIsMeta: true,
+      // Hold Option (Alt on Linux/Windows) to force native text selection
+      // even when the inner Hermes TUI has enabled xterm mouse-events
+      // mode (CSI ?1000h family). Without this, click-and-drag in the
+      // chat canvas selects nothing and Cmd+C falls back to copying the
+      // entire visible buffer, which is rarely what the user wants.
+      // See #25720.
+      macOptionClickForcesSelection: true,
+      // Right-click selects the word under the pointer. xterm.js default
+      // is false; enabling it gives users a single-action selection
+      // path on top of the modifier-based bypass above.
+      rightClickSelectsWord: true,
      // Single-scroll-system experiment:
      // let the inner Hermes TUI own transcript history/scroll behavior.
      // The outer browser xterm should act as a display/input bridge only.
--- a/cli.py
+++ b/cli.py
@ -1396,7 +1396,7 @@ def _detect_light_mode() -> bool:
            last = cfgbg.split(";")[-1] if ";" in cfgbg else cfgbg
            if last.isdigit():
                bg = int(last)
-                if bg in (7, 15):
+                if bg in {7, 15}:
                    result = True
                    _LIGHT_MODE_CACHE = result
                    return result
@ -2412,6 +2412,7 @@ def _looks_like_slash_command(text: str) -> bool:

 from agent.skill_commands import (
    scan_skill_commands,
+    get_skill_commands,
    build_skill_invocation_message,
    build_preloaded_skills_prompt,
 )
@ -2824,6 +2825,11 @@ class HermesCLI:
        # turn (which would make Ctrl+C feel like it did nothing).
        self._last_turn_interrupted = False
        self._should_exit = False
+        # /exit --delete: when True, the current session's SQLite history and
+        # on-disk transcripts are deleted during shutdown. Set by
+        # process_command() when the user runs /exit --delete or /quit --delete.
+        # Ported from google-gemini/gemini-cli#19332.
+        self._delete_session_on_exit = False
        self._last_ctrl_c_time = 0
        self._clarify_state = None
        self._clarify_freetext = False
@ -7653,6 +7659,16 @@ class HermesCLI:
        canonical = _cmd_def.name if _cmd_def else _base_word
        
        if canonical in {"quit", "exit"}:
+            # Parse --delete flag: /exit --delete also removes the current
+            # session's transcripts + SQLite history. Ported from
+            # google-gemini/gemini-cli#19332.
+            _rest = cmd_original.split(None, 1)
+            _args = (_rest[1] if len(_rest) > 1 else "").strip().lower()
+            if _args in {"--delete", "-d"}:
+                self._delete_session_on_exit = True
+            elif _args:
+                _cprint(f"  {_DIM}✗ Unknown argument: {_escape(_args)}. Use /exit --delete to also remove session history.{_RST}")
+                return True
            return False
        elif canonical == "help":
            self.show_help()
@ -9598,12 +9614,18 @@ class HermesCLI:
        prompt caching intact.
        """
        try:
-            from agent.skill_commands import reload_skills
+            from agent.skill_commands import reload_skills, get_skill_commands

            if not self._command_running:
                print("🔄 Reloading skills...")

            result = reload_skills()
+
+            # Sync cli.py's module-level _skill_commands so all consumers
+            # (help display, command dispatch, Tab-completion lambda) see the
+            # updated dict without needing to restart the session.
+            global _skill_commands
+            _skill_commands = get_skill_commands()
            added = result.get("added", [])      # [{"name", "description"}, ...]
            removed = result.get("removed", [])  # [{"name", "description"}, ...]
            total = result.get("total", 0)
@ -12609,7 +12631,7 @@ class HermesCLI:


        _completer = SlashCommandCompleter(
-            skill_commands_provider=lambda: _skill_commands,
+            skill_commands_provider=lambda: get_skill_commands(),
            command_filter=cli_ref._command_available,
        )
        input_area = TextArea(
@ -13777,7 +13799,7 @@ class HermesCLI:
            if _errno == errno.EIO:
                pass  # suppress broken-stdout I/O errors on interrupt (#13710)
            elif (
-                _errno in (errno.EINVAL, errno.EBADF)
+                _errno in {errno.EINVAL, errno.EBADF}
                or "is not registered" in _msg
                or "Bad file descriptor" in _msg
                or "Invalid argument" in _msg
@ -13824,6 +13846,19 @@ class HermesCLI:
                    self._session_db.end_session(self.agent.session_id, "cli_close")
                except (Exception, KeyboardInterrupt) as e:
                    logger.debug("Could not close session in DB: %s", e)
+                # /exit --delete: also remove the current session's transcripts
+                # and SQLite history. Ported from google-gemini/gemini-cli#19332.
+                if getattr(self, '_delete_session_on_exit', False):
+                    try:
+                        from hermes_constants import get_hermes_home as _ghh
+                        _sessions_dir = _ghh() / "sessions"
+                        _sid = self.agent.session_id
+                        if self._session_db.delete_session(_sid, sessions_dir=_sessions_dir):
+                            _cprint(f"  {_DIM}✓ Session {_escape(_sid)} deleted{_RST}")
+                        else:
+                            _cprint(f"  {_DIM}✗ Session {_escape(_sid)} not found for deletion{_RST}")
+                    except (Exception, KeyboardInterrupt) as e:
+                        logger.debug("Could not delete session on exit: %s", e)
            # Plugin hook: on_session_end — safety net for interrupted exits.
            # run_conversation() already fires this per-turn on normal completion,
            # so only fire here if the agent was mid-turn (_agent_running) when
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@ -1802,7 +1802,12 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                for job in parallel_jobs:
                    _ctx = contextvars.copy_context()
                    _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
-                _results.extend(f.result() for f in _futures)
+                for f in concurrent.futures.as_completed(_futures, timeout=600):
+                    try:
+                        _results.append(f.result())
+                    except Exception as exc:
+                        logger.error("Parallel cron job future failed: %s", exc)
+                        _results.append(False)

        # Best-effort sweep of MCP stdio subprocesses that survived their
        # session teardown during this tick.  Runs AFTER every job has
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@ -71,6 +71,35 @@ def _coerce_port(value: Any, default: int = DEFAULT_PORT) -> int:
        return default


+_TRUE_REQUEST_BOOL_STRINGS = frozenset({"1", "true", "yes", "on"})
+_FALSE_REQUEST_BOOL_STRINGS = frozenset({"0", "false", "no", "off"})
+
+
+def _coerce_request_bool(value: Any, default: bool = False) -> bool:
+    """Normalize boolean-like API payload values.
+
+    External clients should send real JSON booleans, but some OpenAI-compatible
+    frontends and middleware serialize flags like ``stream`` as strings.  Using
+    Python truthiness on those values misroutes requests because ``"false"`` is
+    still truthy.  Treat only explicit bool-ish scalars as booleans; everything
+    else falls back to the caller's default.
+    """
+    if isinstance(value, bool):
+        return value
+    if value is None:
+        return default
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in _TRUE_REQUEST_BOOL_STRINGS:
+            return True
+        if normalized in _FALSE_REQUEST_BOOL_STRINGS:
+            return False
+        return default
+    if isinstance(value, (int, float)):
+        return bool(value)
+    return default
+
+
 def _normalize_chat_content(
    content: Any, *, _max_depth: int = 10, _depth: int = 0,
 ) -> str:
@ -481,7 +510,12 @@ else:
    body_limit_middleware = None  # type: ignore[assignment]

 _SECURITY_HEADERS = {
+    "Content-Security-Policy": "default-src 'none'; frame-ancestors 'none'",
+    "Permissions-Policy": "camera=(), microphone=(), geolocation=()",
+    "Strict-Transport-Security": "max-age=31536000; includeSubDomains",
    "X-Content-Type-Options": "nosniff",
+    "X-Frame-Options": "DENY",
+    "X-XSS-Protection": "0",
    "Referrer-Policy": "no-referrer",
 }

@ -1005,7 +1039,7 @@ class APIServerAdapter(BasePlatformAdapter):
                status=400,
            )

-        stream = body.get("stream", False)
+        stream = _coerce_request_bool(body.get("stream"), default=False)

        # Extract system message (becomes ephemeral system prompt layered ON TOP of core)
        system_prompt = None
@ -2082,7 +2116,7 @@ class APIServerAdapter(BasePlatformAdapter):
        instructions = body.get("instructions")
        previous_response_id = body.get("previous_response_id")
        conversation = body.get("conversation")
-        store = body.get("store", True)
+        store = _coerce_request_bool(body.get("store"), default=True)

        # conversation and previous_response_id are mutually exclusive
        if conversation and previous_response_id:
@ -2165,7 +2199,7 @@ class APIServerAdapter(BasePlatformAdapter):
        # groups the entire conversation under one session entry.
        session_id = stored_session_id or str(uuid.uuid4())

-        stream = bool(body.get("stream", False))
+        stream = _coerce_request_bool(body.get("stream"), default=False)
        if stream:
            # Streaming branch — emit OpenAI Responses SSE events as the
            # agent runs so frontends can render text deltas and tool
@ -3228,7 +3262,10 @@ class APIServerAdapter(BasePlatformAdapter):
                status=409,
            )

-        resolve_all = bool(body.get("all") or body.get("resolve_all"))
+        resolve_all = (
+            _coerce_request_bool(body.get("all"), default=False)
+            or _coerce_request_bool(body.get("resolve_all"), default=False)
+        )
        try:
            from tools.approval import resolve_gateway_approval

--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -2014,6 +2014,13 @@ class BasePlatformAdapter(ABC):
            text = f"{caption}\n{text}"
        return await self.send(chat_id=chat_id, content=text, reply_to=reply_to, metadata=metadata)

+    def prepare_tts_text(self, text: str) -> str:
+        """Prepare text for TTS. Override to filter tool output, code, etc.
+
+        Default strips markdown formatting and truncates to 4000 chars.
+        """
+        return re.sub(r'[*_`#\[\]()]', '', text)[:4000].strip()
+
    async def play_tts(
        self,
        chat_id: str,
@ -3144,7 +3151,7 @@ class BasePlatformAdapter(ABC):
                        from tools.tts_tool import text_to_speech_tool, check_tts_requirements
                        if check_tts_requirements():
                            import json as _json
-                            speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000].strip()
+                            speech_text = self.prepare_tts_text(text_content)
                            if not speech_text:
                                raise ValueError("Empty text after markdown cleanup")
                            tts_result_str = await asyncio.to_thread(
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@ -3639,18 +3639,18 @@ class DiscordAdapter(BasePlatformAdapter):
        configured = self.config.extra.get("thread_require_mention")
        if configured is not None:
            if isinstance(configured, str):
-                return configured.lower() not in ("false", "0", "no", "off")
+                return configured.lower() not in {"false", "0", "no", "off"}
            return bool(configured)
-        return os.getenv("DISCORD_THREAD_REQUIRE_MENTION", "false").lower() in ("true", "1", "yes", "on")
+        return os.getenv("DISCORD_THREAD_REQUIRE_MENTION", "false").lower() in {"true", "1", "yes", "on"}

    def _discord_history_backfill(self) -> bool:
        """Return whether history backfill is enabled for shared sessions."""
        configured = self.config.extra.get("history_backfill")
        if configured is not None:
            if isinstance(configured, str):
-                return configured.lower() not in ("false", "0", "no", "off")
+                return configured.lower() not in {"false", "0", "no", "off"}
            return bool(configured)
-        return os.getenv("DISCORD_HISTORY_BACKFILL", "true").lower() in ("true", "1", "yes")
+        return os.getenv("DISCORD_HISTORY_BACKFILL", "true").lower() in {"true", "1", "yes"}

    def _discord_history_backfill_limit(self) -> int:
        """Return the max number of messages to scan backwards for context.
@ -3737,7 +3737,7 @@ class DiscordAdapter(BasePlatformAdapter):
                    break

                # Skip system messages (pins, joins, thread renames, etc.)
-                if msg.type not in (discord.MessageType.default, discord.MessageType.reply):
+                if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
                    continue

                # Respect DISCORD_ALLOW_BOTS for other bots.
--- a/gateway/platforms/helpers.py
+++ b/gateway/platforms/helpers.py
@ -168,8 +168,8 @@ class TextBatchAggregator:
 # Pre-compiled regexes for performance
 _RE_BOLD = re.compile(r"\*\*(.+?)\*\*", re.DOTALL)
 _RE_ITALIC_STAR = re.compile(r"\*(.+?)\*", re.DOTALL)
-_RE_BOLD_UNDER = re.compile(r"__(.+?)__", re.DOTALL)
-_RE_ITALIC_UNDER = re.compile(r"_(.+?)_", re.DOTALL)
+_RE_BOLD_UNDER = re.compile(r"\b__(?![\s_])(.+?)(?<![\s_])__\b", re.DOTALL)
+_RE_ITALIC_UNDER = re.compile(r"\b_(?![\s_])(.+?)(?<![\s_])_\b", re.DOTALL)
 _RE_CODE_BLOCK = re.compile(r"```[a-zA-Z0-9_+-]*\n?")
 _RE_INLINE_CODE = re.compile(r"`(.+?)`")
 _RE_HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@ -348,6 +348,17 @@ class MatrixAdapter(BasePlatformAdapter):
        self._sync_task: Optional[asyncio.Task] = None
        self._closing = False
        self._startup_ts: float = 0.0
+        # Clock-skew detection: count grace-check drops that happen well
+        # after startup (i.e. not initial-sync backfill).  If the host's
+        # system clock is set ahead of real time, the startup grace check
+        # `event_ts < startup_ts - 5` silently drops every live message.
+        # See #12614 — the symptom is "bot joins rooms but never replies".
+        # Drops only count when their skew matches the first sampled drop
+        # (within 60s), so varied-age backfill from freshly-invited rooms
+        # doesn't trip the heuristic.
+        self._late_grace_drops: int = 0
+        self._late_grace_skew: float = 0.0
+        self._clock_skew_warned: bool = False

        # Cache: room_id → bool (is DM)
        self._dm_rooms: Dict[str, bool] = {}
@ -842,6 +853,11 @@ class MatrixAdapter(BasePlatformAdapter):

        # Initial sync to catch up, then start background sync.
        self._startup_ts = time.time()
+        # Reset clock-skew detector for each connect cycle so a reconnect
+        # after the user fixes NTP doesn't inherit stale counters.
+        self._late_grace_drops = 0
+        self._late_grace_skew = 0.0
+        self._clock_skew_warned = False
        self._closing = False

        try:
@ -1542,6 +1558,49 @@ class MatrixAdapter(BasePlatformAdapter):
        )
        event_ts = raw_ts / 1000.0 if raw_ts else 0.0
        if event_ts and event_ts < self._startup_ts - _STARTUP_GRACE_SECONDS:
+            # If we are well past startup but events are still being dropped
+            # by the grace check, the host clock is probably set ahead of
+            # real time — every live event then looks "older than startup".
+            # Warn once so users can fix NTP instead of chasing a ghost.
+            # See #12614 (Schnurzel700, April 2026).
+            #
+            # Filter out backfill (events legitimately old) by requiring:
+            #  - we are >30s past startup (initial-sync replay window closed)
+            #  - the skew is *consistent* across consecutive drops, which is
+            #    the signature of a constant clock offset rather than a
+            #    variable-age room history.  Backfill from a freshly invited
+            #    room can deliver events spanning hours/days — those skews
+            #    will be all over the place and reset the counter.
+            if not self._clock_skew_warned and (
+                time.time() - self._startup_ts > 30
+            ):
+                skew = self._startup_ts - event_ts
+                # Sanity bound: malformed events with negative or absurd
+                # timestamps shouldn't count.
+                if 5 < skew < 86400:
+                    if self._late_grace_drops == 0:
+                        self._late_grace_skew = skew
+                        self._late_grace_drops = 1
+                    elif abs(skew - self._late_grace_skew) < 60:
+                        # Consistent offset → likely real clock skew.
+                        self._late_grace_drops += 1
+                    else:
+                        # Varied skew → likely backfill, restart sampling.
+                        self._late_grace_skew = skew
+                        self._late_grace_drops = 1
+                    if self._late_grace_drops >= 3:
+                        logger.warning(
+                            "Matrix: dropped %d consecutive live events as "
+                            "'too old' more than 30s after startup (skew "
+                            "≈ %.0fs). The host system clock is likely set "
+                            "ahead of real time, which causes the startup "
+                            "grace filter to silently discard every incoming "
+                            "message. Run `timedatectl set-ntp true` (or "
+                            "sync NTP) and restart the bot.",
+                            self._late_grace_drops,
+                            skew,
+                        )
+                        self._clock_skew_warned = True
            return

        # Extract content from the event.
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@ -482,7 +482,7 @@ class SlackAdapter(BasePlatformAdapter):
            "text": text,
        }
        try:
-            async with aiohttp.ClientSession() as session:
+            async with aiohttp.ClientSession(trust_env=True) as session:
                async with session.post(
                    ctx["response_url"],
                    json=payload,
--- a/gateway/platforms/sms.py
+++ b/gateway/platforms/sms.py
@ -128,6 +128,7 @@ class SmsAdapter(BasePlatformAdapter):
        await site.start()
        self._http_session = aiohttp.ClientSession(
            timeout=aiohttp.ClientTimeout(total=30),
+            trust_env=True,
        )
        self._running = True

@ -169,6 +170,7 @@ class SmsAdapter(BasePlatformAdapter):

        session = self._http_session or aiohttp.ClientSession(
            timeout=aiohttp.ClientTimeout(total=30),
+            trust_env=True,
        )
        try:
            for chunk in chunks:
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@ -1663,7 +1663,17 @@ class TelegramAdapter(BasePlatformAdapter):
                                continue
                        raise
                message_ids.append(str(msg.message_id))
-            
+
+            # Re-trigger typing indicator after sending a message.
+            # Telegram clears the typing state when a new message is delivered,
+            # so without this the "...typing" bubble disappears mid-response
+            # (especially noticeable when the agent sends intermediate progress
+            # messages like "Checking:" before running tools).
+            try:
+                await self.send_typing(chat_id, metadata=metadata)
+            except Exception:
+                pass  # Typing failures are non-fatal
+
            return SendResult(
                success=True,
                message_id=message_ids[0] if message_ids else None,
--- a/gateway/run.py
+++ b/gateway/run.py
@ -4763,11 +4763,106 @@ class GatewayRunner:
                            pass
            return False

+        # Auto-decompose: turn fresh triage tasks into ready workgraphs
+        # before the dispatcher fans out workers. Gated by
+        # ``kanban.auto_decompose`` (default True). Capped by
+        # ``kanban.auto_decompose_per_tick`` (default 3) so a bulk-load
+        # of triage tasks doesn't burst-spend the aux LLM in one tick;
+        # remainder defers to subsequent ticks.
+        auto_decompose_enabled = bool(kanban_cfg.get("auto_decompose", True))
+        try:
+            auto_decompose_per_tick = int(
+                kanban_cfg.get("auto_decompose_per_tick", 3) or 3
+            )
+        except (TypeError, ValueError):
+            auto_decompose_per_tick = 3
+        if auto_decompose_per_tick < 1:
+            auto_decompose_per_tick = 1
+
+        def _auto_decompose_tick() -> int:
+            """Run the auto-decomposer for up to N triage tasks across all
+            boards. Returns the number of triage tasks that were
+            successfully decomposed or specified this tick.
+            """
+            try:
+                from hermes_cli import kanban_decompose as _decomp
+            except Exception as exc:  # pragma: no cover
+                logger.warning(
+                    "kanban auto-decompose: import failed (%s); skipping", exc,
+                )
+                return 0
+            try:
+                boards = _kb.list_boards(include_archived=False)
+            except Exception:
+                boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)]
+            attempted = 0
+            successes = 0
+            for b in boards:
+                slug = b.get("slug") or _kb.DEFAULT_BOARD
+                if attempted >= auto_decompose_per_tick:
+                    break
+                # Pin this board for the duration of the call — same
+                # pattern as the dashboard specify endpoint. The
+                # decomposer module connects with no board kwarg and
+                # relies on the env var.
+                prev_env = os.environ.get("HERMES_KANBAN_BOARD")
+                try:
+                    os.environ["HERMES_KANBAN_BOARD"] = slug
+                    try:
+                        triage_ids = _decomp.list_triage_ids()
+                    except Exception as exc:
+                        logger.debug(
+                            "kanban auto-decompose: list_triage_ids failed on board %s (%s)",
+                            slug, exc,
+                        )
+                        triage_ids = []
+                    for tid in triage_ids:
+                        if attempted >= auto_decompose_per_tick:
+                            break
+                        attempted += 1
+                        try:
+                            outcome = _decomp.decompose_task(
+                                tid, author="auto-decomposer",
+                            )
+                        except Exception:
+                            logger.exception(
+                                "kanban auto-decompose: decompose_task crashed on %s",
+                                tid,
+                            )
+                            continue
+                        if outcome.ok:
+                            successes += 1
+                            if outcome.fanout and outcome.child_ids:
+                                logger.info(
+                                    "kanban auto-decompose [%s]: %s → %d children",
+                                    slug, tid, len(outcome.child_ids),
+                                )
+                            else:
+                                logger.info(
+                                    "kanban auto-decompose [%s]: %s → single task (no fanout)",
+                                    slug, tid,
+                                )
+                        else:
+                            # Common no-op reasons (no aux client configured) shouldn't
+                            # spam logs every tick. Log at debug.
+                            logger.debug(
+                                "kanban auto-decompose [%s]: %s skipped: %s",
+                                slug, tid, outcome.reason,
+                            )
+                finally:
+                    if prev_env is None:
+                        os.environ.pop("HERMES_KANBAN_BOARD", None)
+                    else:
+                        os.environ["HERMES_KANBAN_BOARD"] = prev_env
+            return successes
+
        logger.info(
            "kanban dispatcher: embedded in gateway (interval=%.1fs)", interval
        )
        while self._running:
            try:
+                if auto_decompose_enabled:
+                    await asyncio.to_thread(_auto_decompose_tick)
                results = await asyncio.to_thread(_tick_once)
                any_spawned = False
                for slug, res in (results or []):
@ -8845,7 +8940,7 @@ class GatewayRunner:
                lines.append("Failed/paused: (none)")
            return "\n".join(lines)

-        if action in ("pause", "resume"):
+        if action in {"pause", "resume"}:
            if not target:
                return f"Usage: /platform {action} <name>"
            platform = _resolve_platform(target)
@ -8953,13 +9048,15 @@ class GatewayRunner:
            logger.debug("Failed to write restart dedup marker: %s", e)

        active_agents = self._running_agent_count()
-        # When running under a service manager (systemd/launchd), use the
-        # service restart path: exit with code 75 so the service manager
-        # restarts us.  The detached subprocess approach (setsid + bash)
-        # doesn't work under systemd because KillMode=mixed kills all
-        # processes in the cgroup, including the detached helper.
+        # When running under a service manager (systemd/launchd) or inside a
+        # Docker/Podman container, use the service restart path: exit with
+        # code 75 so the service manager / container restart policy restarts
+        # us.  The detached subprocess approach (setsid + bash) doesn't work
+        # under systemd (KillMode=mixed kills the cgroup) or Docker (tini
+        # exits when the gateway dies, taking the detached helper with it).
        _under_service = bool(os.environ.get("INVOCATION_ID"))  # systemd sets this
-        if _under_service:
+        _in_container = os.path.exists("/.dockerenv") or os.path.exists("/run/.containerenv")
+        if _under_service or _in_container:
            self.request_restart(detached=False, via_service=True)
        else:
            self.request_restart(detached=True, via_service=False)
@ -12528,6 +12625,12 @@ class GatewayRunner:
            and getattr(source, "chat_type", None) == "dm"
        ):
            metadata["telegram_dm_topic_reply_fallback"] = True
+            # Telegram DM topic lanes need direct_messages_topic_id in metadata
+            # so synthetic/queued messages (goal continuations, status notices)
+            # route to the correct topic even when reply anchor is unavailable.
+            tid = str(thread_id)
+            if tid and tid not in {"", "1"}:
+                metadata["direct_messages_topic_id"] = tid
            anchor = reply_to_message_id or getattr(source, "message_id", None)
            if anchor is not None:
                metadata["telegram_reply_to_message_id"] = str(anchor)
@ -12813,7 +12916,11 @@ class GatewayRunner:
                update_cmd = (
                    f"PYTHONUNBUFFERED=1 {hermes_cmd_str} update --gateway"
                    f" > {shlex.quote(str(output_path))} 2>&1; "
-                    f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
+                    # Avoid `status=$?`: `status` is a read-only special parameter
+                    # in zsh, and this command string is copied/reused in macOS/zsh
+                    # operator wrappers. Keep the template zsh-safe even though this
+                    # specific subprocess currently runs under bash.
+                    f"rc=$?; printf '%s' \"$rc\" > {shlex.quote(str(exit_code_path))}"
                )
                setsid_bin = shutil.which("setsid")
                if setsid_bin:
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
--- a/hermes_cli/codex_runtime_switch.py
+++ b/hermes_cli/codex_runtime_switch.py
@ -48,9 +48,9 @@ def parse_args(arg_string: str) -> tuple[Optional[str], list[str]]:
    if not raw:
        return None, []
    # Accept human-friendly synonyms
-    if raw in ("on", "codex", "enable"):
+    if raw in {"on", "codex", "enable"}:
        return "codex_app_server", []
-    if raw in ("off", "default", "disable", "hermes"):
+    if raw in {"off", "default", "disable", "hermes"}:
        return "auto", []
    if raw in VALID_RUNTIMES:
        return raw, []
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -123,7 +123,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("model", "Switch model for this session", "Configuration",
               aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
    CommandDef("codex-runtime", "Toggle codex app-server runtime for OpenAI/Codex models",
-               "Configuration", args_hint="[auto|codex_app_server]"),
+               "Configuration", aliases=("codex_runtime",),
+               args_hint="[auto|codex_app_server]"),
    CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
               cli_only=True),

--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -926,6 +926,31 @@ DEFAULT_CONFIG = {
            "timeout": 120,
            "extra_body": {},
        },
+        # Kanban decomposer — decomposes a triage task into a graph of
+        # child tasks routed to specialist profiles by description.
+        # Invoked by ``hermes kanban decompose`` and the kanban
+        # auto-decompose dispatcher tick. Returns a JSON task graph;
+        # uses more tokens than the specifier so allow more headroom.
+        "kanban_decomposer": {
+            "provider": "auto",
+            "model": "",
+            "base_url": "",
+            "api_key": "",
+            "timeout": 180,
+            "extra_body": {},
+        },
+        # Profile describer — auto-generates a 1-2 sentence description
+        # of what a profile is good at. Invoked by
+        # ``hermes profile describe <name> --auto`` and the dashboard's
+        # auto-generate button. Short, cheap call.
+        "profile_describer": {
+            "provider": "auto",
+            "model": "",
+            "base_url": "",
+            "api_key": "",
+            "timeout": 60,
+            "extra_body": {},
+        },
        # Curator — skill-usage review fork. Timeout is generous because the
        # review pass can take several minutes on reasoning models (umbrella
        # building over hundreds of candidate skills). "auto" = use main chat
@ -1473,6 +1498,25 @@ DEFAULT_CONFIG = {
        # same task/profile (spawn_failed, timed_out, or crashed). Reassignment
        # resets the streak for the new profile.
        "failure_limit": 2,
+        # Profile that decomposes tasks in the Triage column. When unset,
+        # falls back to the default profile (the one `hermes` launches with
+        # no -p flag). Set this to a dedicated 'orchestrator' profile if you
+        # want decomposition to use a different model/skills from your main
+        # working profile.
+        "orchestrator_profile": "",
+        # Where a child task lands if the orchestrator can't match an
+        # assignee to any installed profile. When unset, falls back to the
+        # default profile. A task never ends up with assignee=None.
+        "default_assignee": "",
+        # When true, the kanban dispatcher auto-runs the decomposer on
+        # tasks that land in Triage (every dispatcher tick). When false,
+        # decomposition is manual via `hermes kanban decompose <id>` or
+        # the dashboard's Decompose button.
+        "auto_decompose": True,
+        # Max triage tasks to decompose per dispatcher tick. Prevents a
+        # large bulk-load of triage tasks from spending a burst of aux
+        # LLM calls in one tick. Excess tasks defer to the next tick.
+        "auto_decompose_per_tick": 3,
    },

    # execute_code settings — controls the tool used for programmatic tool calls.
@ -2913,6 +2957,7 @@ def _normalize_custom_provider_entry(
        "api_mode", "transport", "model", "default_model", "models",
        "context_length", "rate_limit_delay",
        "request_timeout_seconds", "stale_timeout_seconds",
+        "discover_models",
    }
    for camel, snake in _CAMEL_ALIASES.items():
        if camel in entry and snake not in entry:
@ -3003,6 +3048,10 @@ def _normalize_custom_provider_entry(
    if isinstance(rate_limit_delay, (int, float)) and rate_limit_delay >= 0:
        normalized["rate_limit_delay"] = rate_limit_delay

+    discover_models = entry.get("discover_models")
+    if isinstance(discover_models, bool):
+        normalized["discover_models"] = discover_models
+
    return normalized


--- a/hermes_cli/dep_ensure.py
+++ b/hermes_cli/dep_ensure.py
@ -91,7 +91,7 @@ def ensure_dependency(dep: str, interactive: bool = True) -> bool:
            reply = input(f"{desc} is not installed. Install now? [Y/n] ").strip().lower()
        except (EOFError, KeyboardInterrupt):
            return False
-        if reply not in ("", "y", "yes"):
+        if reply not in {"", "y", "yes"}:
            return False

    result = subprocess.run(
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@ -160,19 +160,25 @@ def _has_healthy_oauth_fallback_for_apikey_provider(provider_label: str) -> bool
    still show a failed API-key connectivity row, but it should not promote
    that direct-key problem into the final blocking summary.
    """
-    try:
-        from hermes_cli.auth import (
-            get_gemini_oauth_auth_status,
-            get_minimax_oauth_auth_status,
-        )
-    except Exception:
-        return False
-
    normalized = (provider_label or "").strip().lower()
    if normalized in {"google / gemini", "gemini"}:
-        return bool((get_gemini_oauth_auth_status() or {}).get("logged_in"))
+        try:
+            from hermes_cli.auth import get_gemini_oauth_auth_status
+            return bool((get_gemini_oauth_auth_status() or {}).get("logged_in"))
+        except Exception:
+            return False
    if normalized == "minimax":
-        return bool((get_minimax_oauth_auth_status() or {}).get("logged_in"))
+        try:
+            from hermes_cli.auth import get_minimax_oauth_auth_status
+            return bool((get_minimax_oauth_auth_status() or {}).get("logged_in"))
+        except Exception:
+            return False
+    if normalized == "xai":
+        try:
+            from hermes_cli.auth import get_xai_oauth_auth_status
+            return bool((get_xai_oauth_auth_status() or {}).get("logged_in"))
+        except Exception:
+            return False
    return False


@ -645,31 +651,41 @@ def run_doctor(args):

            # Check credentials for the configured provider.
            # Limit to API-key providers in PROVIDER_REGISTRY — other provider
-            # types (OAuth, SDK, openrouter/anthropic/custom/auto) have their
-            # own env-var checks elsewhere in doctor, and get_auth_status()
-            # returns a bare {logged_in: False} for anything it doesn't
-            # explicitly dispatch, which would produce false positives.
-            if runtime_provider and runtime_provider not in {"auto", "custom", "openrouter"}:
+            # types (OAuth, SDK, anthropic/custom/auto) have their own env-var
+            # checks elsewhere in doctor, and get_auth_status() returns a bare
+            # {logged_in: False} for anything it doesn't explicitly dispatch,
+            # which would produce false positives.
+            if runtime_provider and runtime_provider not in ("auto", "custom"):
                try:
-                    from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
-                    pconfig = PROVIDER_REGISTRY.get(runtime_provider)
-                    if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
-                        status = get_auth_status(runtime_provider) or {}
+                    if runtime_provider == "openrouter":
+                        from hermes_cli.config import get_env_value
+
                        configured = bool(
-                            status.get("configured")
-                            or status.get("logged_in")
-                            or status.get("api_key")
+                            str(get_env_value("OPENROUTER_API_KEY") or "").strip()
+                            or str(get_env_value("OPENAI_API_KEY") or "").strip()
                        )
-                        if not configured:
-                            check_fail(
-                                f"model.provider '{runtime_provider}' is set but no API key is configured",
-                                "(check ~/.hermes/.env or run 'hermes setup')",
-                            )
-                            issues.append(
-                                f"No credentials found for provider '{runtime_provider}'. "
-                                f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
-                                f"or switch providers with 'hermes config set model.provider <name>'"
+                    else:
+                        from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
+
+                        pconfig = PROVIDER_REGISTRY.get(runtime_provider)
+                        configured = True
+                        if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
+                            status = get_auth_status(runtime_provider) or {}
+                            configured = bool(
+                                status.get("configured")
+                                or status.get("logged_in")
+                                or status.get("api_key")
                            )
+                    if not configured:
+                        check_fail(
+                            f"model.provider '{runtime_provider}' is set but no API key is configured",
+                            "(check ~/.hermes/.env or run 'hermes setup')",
+                        )
+                        issues.append(
+                            f"No credentials found for provider '{runtime_provider}'. "
+                            f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
+                            f"or switch providers with 'hermes config set model.provider <name>'"
+                        )
                except Exception:
                    pass

@ -817,6 +833,20 @@ def run_doctor(args):
    except Exception as e:
        check_warn("Auth provider status", f"(could not check: {e})")

+    # xAI OAuth — separate try/except so an import failure here cannot
+    # disrupt the already-printed Nous/Codex/Gemini/MiniMax rows above.
+    try:
+        from hermes_cli.auth import get_xai_oauth_auth_status
+        xai_oauth_status = get_xai_oauth_auth_status() or {}
+        if xai_oauth_status.get("logged_in"):
+            check_ok("xAI OAuth", "(logged in)")
+        else:
+            check_warn("xAI OAuth", "(not logged in)")
+            if xai_oauth_status.get("error"):
+                check_info(xai_oauth_status["error"])
+    except Exception:
+        pass
+
    if _safe_which("codex"):
        check_ok("codex CLI")
    else:
@ -1073,10 +1103,20 @@ def run_doctor(args):
    if terminal_env == "ssh":
        ssh_host = os.getenv("TERMINAL_SSH_HOST")
        if ssh_host:
+            ssh_user = os.getenv("TERMINAL_SSH_USER")
+            ssh_port = os.getenv("TERMINAL_SSH_PORT")
+            ssh_key = os.getenv("TERMINAL_SSH_KEY")
+            target = f"{ssh_user}@{ssh_host}" if ssh_user else ssh_host
+            cmd = ["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes"]
+            if ssh_port:
+                cmd += ["-p", ssh_port]
+            if ssh_key:
+                cmd += ["-i", os.path.expanduser(ssh_key)]
+            cmd += [target, "echo ok"]
            # Try to connect
            try:
                result = subprocess.run(
-                    ["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes", ssh_host, "echo ok"],
+                    cmd,
                    capture_output=True,
                    text=True,
                    timeout=15
@ -1474,6 +1514,15 @@ def run_doctor(args):
            }
            if base_url_host_matches(base, "api.kimi.com"):
                headers["User-Agent"] = "claude-code/0.1.0"
+            # Google's Generative Language API (generativelanguage.googleapis.com)
+            # rejects ``Authorization: Bearer <api-key>`` with 401
+            # ``ACCESS_TOKEN_TYPE_UNSUPPORTED`` — that header is reserved for
+            # OAuth 2 access tokens, not plain API keys. Plain keys use
+            # ``x-goog-api-key`` (or ``?key=``). Without this, a perfectly valid
+            # GOOGLE_API_KEY/GEMINI_API_KEY always shows red in ``hermes doctor``.
+            if url and base_url_host_matches(url, "generativelanguage.googleapis.com"):
+                headers.pop("Authorization", None)
+                headers["x-goog-api-key"] = key
            r = httpx.get(url, headers=headers, timeout=10)
            if (
                pname == "Alibaba/DashScope"
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@ -2110,24 +2110,30 @@ def _build_service_path_dirs(project_root: Path | None = None) -> list[str]:
    if project_root is None:
        project_root = PROJECT_ROOT

+    def _is_dir(path: Path) -> bool:
+        try:
+            return path.is_dir()
+        except OSError:
+            return False
+
    candidates = []

    venv_bin = project_root / "venv" / "bin"
-    if venv_bin.is_dir():
+    if _is_dir(venv_bin):
        candidates.append(str(venv_bin))
    elif sys.prefix != sys.base_prefix:
        candidates.append(str(Path(sys.prefix) / "bin"))

    node_bin = project_root / "node_modules" / ".bin"
-    if node_bin.is_dir():
+    if _is_dir(node_bin):
        candidates.append(str(node_bin))

    hermes_home = get_hermes_home()
    hermes_node = hermes_home / "node" / "bin"
-    if hermes_node.is_dir():
+    if _is_dir(hermes_node):
        candidates.append(str(hermes_node))
    hermes_nm = hermes_home / "node_modules" / ".bin"
-    if hermes_nm.is_dir():
+    if _is_dir(hermes_nm):
        candidates.append(str(hermes_nm))

    return candidates
--- a/hermes_cli/goals.py
+++ b/hermes_cli/goals.py
@ -34,6 +34,7 @@ import logging
 import re
 import time
 from dataclasses import dataclass, field, asdict
+from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional, Tuple

 logger = logging.getLogger(__name__)
@ -110,6 +111,7 @@ JUDGE_SYSTEM_PROMPT = (
 JUDGE_USER_PROMPT_TEMPLATE = (
    "Goal:\n{goal}\n\n"
    "Agent's most recent response:\n{response}\n\n"
+    "Current time: {current_time}\n\n"
    "Is the goal satisfied?"
 )

@ -120,6 +122,7 @@ JUDGE_USER_PROMPT_WITH_SUBGOALS_TEMPLATE = (
    "Additional criteria the user added mid-loop (all must also be "
    "satisfied for the goal to be DONE):\n{subgoals_block}\n\n"
    "Agent's most recent response:\n{response}\n\n"
+    "Current time: {current_time}\n\n"
    "Decision: For each numbered criterion above, find concrete "
    "evidence in the agent's response that the criterion is "
    "satisfied. Do not accept generic phrases like 'all requirements "
@ -415,6 +418,7 @@ def judge_goal(

    # Build the prompt — pick the with-subgoals variant when applicable.
    clean_subgoals = [s.strip() for s in (subgoals or []) if s and s.strip()]
+    current_time = datetime.now(tz=timezone.utc).astimezone().strftime("%Y-%m-%d %H:%M:%S %Z")
    if clean_subgoals:
        subgoals_block = "\n".join(
            f"- {i}. {text}" for i, text in enumerate(clean_subgoals, start=1)
@ -423,11 +427,13 @@ def judge_goal(
            goal=_truncate(goal, 2000),
            subgoals_block=_truncate(subgoals_block, 2000),
            response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
+            current_time=current_time,
        )
    else:
        prompt = JUDGE_USER_PROMPT_TEMPLATE.format(
            goal=_truncate(goal, 2000),
            response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
+            current_time=current_time,
        )

    try:
--- a/hermes_cli/kanban.py
+++ b/hermes_cli/kanban.py
@ -610,6 +610,43 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu
        help="Emit one JSON object per task on stdout",
    )

+    # --- decompose --- (triage → fan-out via auxiliary LLM + orchestrator)
+    p_decompose = sub.add_parser(
+        "decompose",
+        help="Decompose a triage-column task into a graph of child tasks "
+             "routed to specialist profiles by description. Falls back to "
+             "specify-style single-task promotion when the task doesn't "
+             "benefit from fan-out. Uses auxiliary.kanban_decomposer.",
+    )
+    p_decompose.add_argument(
+        "task_id",
+        nargs="?",
+        default=None,
+        help="Task id to decompose (required unless --all is given)",
+    )
+    p_decompose.add_argument(
+        "--all",
+        dest="all_triage",
+        action="store_true",
+        help="Decompose every task currently in the triage column",
+    )
+    p_decompose.add_argument(
+        "--tenant",
+        default=None,
+        help="When used with --all, restrict the sweep to this tenant",
+    )
+    p_decompose.add_argument(
+        "--author",
+        default=None,
+        help="Author name recorded on the audit comment "
+             "(default: $HERMES_PROFILE or 'decomposer')",
+    )
+    p_decompose.add_argument(
+        "--json",
+        action="store_true",
+        help="Emit one JSON object per task on stdout",
+    )
+
    # --- gc ---
    p_gc = sub.add_parser(
        "gc", help="Garbage-collect archived-task workspaces, old events, and old logs",
@ -740,6 +777,7 @@ def kanban_command(args: argparse.Namespace) -> int:
        "notify-unsubscribe": _cmd_notify_unsubscribe,
        "context":  _cmd_context,
        "specify":  _cmd_specify,
+        "decompose":  _cmd_decompose,
        "gc":       _cmd_gc,
    }
    handler = handlers.get(action)
@ -2115,6 +2153,87 @@ def _cmd_specify(args: argparse.Namespace) -> int:
    return 0 if (ok_count > 0 or not ids) else 1


+def _cmd_decompose(args: argparse.Namespace) -> int:
+    """Fan a triage task (or all of them) out into a graph of child
+    tasks via the auxiliary LLM, routed to specialist profiles by
+    description. Thin wrapper over ``kanban_decompose``."""
+    from hermes_cli import kanban_decompose as decomp
+
+    all_flag = bool(getattr(args, "all_triage", False))
+    tenant = getattr(args, "tenant", None)
+    author = getattr(args, "author", None) or _profile_author()
+    want_json = bool(getattr(args, "json", False))
+
+    if args.task_id and all_flag:
+        print(
+            "kanban: pass either a task id OR --all, not both",
+            file=sys.stderr,
+        )
+        return 2
+
+    if all_flag:
+        ids = decomp.list_triage_ids(tenant=tenant)
+        if not ids:
+            msg = (
+                "No triage tasks"
+                + (f" for tenant {tenant!r}" if tenant else "")
+                + "."
+            )
+            if want_json:
+                print(json.dumps({"decomposed": 0, "total": 0}))
+            else:
+                print(msg)
+            return 0
+    elif args.task_id:
+        ids = [args.task_id]
+    else:
+        print(
+            "kanban: decompose requires a task id or --all",
+            file=sys.stderr,
+        )
+        return 2
+
+    ok_count = 0
+    for tid in ids:
+        outcome = decomp.decompose_task(tid, author=author)
+        if outcome.ok:
+            ok_count += 1
+        if want_json:
+            print(json.dumps({
+                "task_id": outcome.task_id,
+                "ok": outcome.ok,
+                "reason": outcome.reason,
+                "fanout": outcome.fanout,
+                "child_ids": outcome.child_ids,
+                "new_title": outcome.new_title,
+            }))
+        elif outcome.ok:
+            if outcome.fanout and outcome.child_ids:
+                child_summary = ", ".join(outcome.child_ids)
+                print(
+                    f"Decomposed {outcome.task_id} → {len(outcome.child_ids)} "
+                    f"children ({child_summary}); root promoted to todo"
+                )
+            else:
+                title_suffix = (
+                    f" — retitled: {outcome.new_title!r}"
+                    if outcome.new_title
+                    else ""
+                )
+                print(
+                    f"Specified {outcome.task_id} → todo "
+                    f"(no fanout){title_suffix}"
+                )
+        else:
+            print(
+                f"kanban: decompose {outcome.task_id}: {outcome.reason}",
+                file=sys.stderr,
+            )
+    if not all_flag:
+        return 0 if ok_count == 1 else 1
+    return 0 if (ok_count > 0 or not ids) else 1
+
+
 def _cmd_gc(args: argparse.Namespace) -> int:
    """Remove scratch workspaces of archived tasks, prune old events, and
    delete old worker logs."""
--- a/hermes_cli/kanban_db.py
+++ b/hermes_cli/kanban_db.py
@ -93,6 +93,7 @@ from toolsets import get_toolset_names
 VALID_STATUSES = {"triage", "todo", "ready", "running", "blocked", "done", "archived"}
 VALID_WORKSPACE_KINDS = {"scratch", "worktree", "dir"}
 KNOWN_TOOLSET_NAMES = frozenset(name.casefold() for name in get_toolset_names())
+_IS_WINDOWS = sys.platform == "win32"

 # A running task's claim is valid for 15 minutes; after that the next
 # dispatcher tick reclaims it.  Workers that outlive this window should call
@ -2776,6 +2777,180 @@ def specify_triage_task(
    return True


+def decompose_triage_task(
+    conn: sqlite3.Connection,
+    task_id: str,
+    *,
+    root_assignee: Optional[str],
+    children: list[dict],
+    author: Optional[str] = None,
+) -> Optional[list[str]]:
+    """Fan a triage task out into child tasks and promote the root to ``todo``.
+
+    The root task stays alive and becomes the parent of every child —
+    when all children reach ``done``, the root promotes to ``ready`` and
+    its assignee (typically the orchestrator profile) wakes back up to
+    judge completion or spawn more work.
+
+    ``children`` is a list of dicts, each shaped like::
+
+        {
+            "title": "...",
+            "body": "...",                     # optional
+            "assignee": "profile-name",        # optional, None -> default fallback
+            "parents": [0, 2],                 # indices into this same children list
+        }
+
+    Returns the list of created child task ids (in input order) on
+    success. Returns ``None`` when:
+      - The root task does not exist
+      - The root task is not in ``triage``
+      - A cycle would result (caller built a bad graph)
+
+    Validation of titles/assignees happens inside the same write_txn as
+    the inserts so a malformed entry aborts the whole decomposition
+    cleanly (no orphan children).
+    """
+    if not children:
+        return None
+    if root_assignee is not None:
+        root_assignee = _canonical_assignee(root_assignee)
+
+    # Pre-validate the children list shape outside the txn. Cheap checks
+    # that don't need DB access. Bad input aborts before we touch the DB.
+    for idx, child in enumerate(children):
+        if not isinstance(child, dict):
+            raise ValueError(f"child[{idx}] is not a dict")
+        title = child.get("title")
+        if not isinstance(title, str) or not title.strip():
+            raise ValueError(f"child[{idx}].title is required")
+        parents_idx = child.get("parents") or []
+        if not isinstance(parents_idx, list):
+            raise ValueError(f"child[{idx}].parents must be a list")
+        for p in parents_idx:
+            if not isinstance(p, int) or p < 0 or p >= len(children):
+                raise ValueError(
+                    f"child[{idx}].parents[{p}] is not a valid index into children"
+                )
+            if p == idx:
+                raise ValueError(f"child[{idx}] cannot list itself as a parent")
+
+    # We do the full decomposition in a SINGLE write_txn so it's
+    # atomic: either every child is created AND the root flips to
+    # ``todo``, or nothing changes. We deliberately do NOT call any
+    # kb helper that opens its own write_txn (create_task, link_tasks,
+    # add_comment) from inside this block — see architecture.md
+    # write_txn pitfalls. Instead we inline the INSERTs and
+    # _append_event calls.
+    now = int(time.time())
+    child_ids: list[str] = []
+    with write_txn(conn):
+        root_row = conn.execute(
+            "SELECT id, status, tenant FROM tasks WHERE id = ?", (task_id,)
+        ).fetchone()
+        if root_row is None:
+            return None
+        if root_row["status"] != "triage":
+            return None
+        tenant = root_row["tenant"]
+
+        # Create children. Status is 'todo' regardless of parents — we
+        # link them under the root AFTER creation so the dispatcher
+        # sees a coherent state, and recompute_ready() at the end
+        # promotes parent-free children to 'ready'.
+        for idx, child in enumerate(children):
+            new_id = _new_task_id()
+            title = child["title"].strip()
+            body = child.get("body")
+            assignee = _canonical_assignee(child.get("assignee"))
+            conn.execute(
+                "INSERT INTO tasks "
+                "(id, title, body, assignee, status, workspace_kind, "
+                " tenant, created_at, created_by) "
+                "VALUES (?, ?, ?, ?, 'todo', 'scratch', ?, ?, ?)",
+                (
+                    new_id,
+                    title,
+                    body if isinstance(body, str) else None,
+                    assignee,
+                    tenant,
+                    now,
+                    (author or "decomposer"),
+                ),
+            )
+            _append_event(
+                conn, new_id, "created",
+                {"by": author or "decomposer", "from_decompose_of": task_id},
+            )
+            child_ids.append(new_id)
+
+        # Link children to their sibling parents (within the decomposed graph).
+        for idx, child in enumerate(children):
+            for p_idx in child.get("parents") or []:
+                parent_id = child_ids[p_idx]
+                child_id = child_ids[idx]
+                conn.execute(
+                    "INSERT OR IGNORE INTO task_links (parent_id, child_id) "
+                    "VALUES (?, ?)",
+                    (parent_id, child_id),
+                )
+                _append_event(
+                    conn, child_id, "linked",
+                    {"parent": parent_id, "child": child_id},
+                )
+
+        # Link the ROOT task as a child of every leaf child — i.e. the
+        # root waits for the whole graph. Simpler than computing leaves:
+        # link root under every child. Cycle-free because the root is
+        # only ever a child here, never a parent of children.
+        for cid in child_ids:
+            conn.execute(
+                "INSERT OR IGNORE INTO task_links (parent_id, child_id) "
+                "VALUES (?, ?)",
+                (cid, task_id),
+            )
+
+        # Flip the root: triage -> todo, set assignee to the orchestrator.
+        sets = ["status = 'todo'"]
+        params: list[Any] = []
+        if root_assignee is not None:
+            sets.append("assignee = ?")
+            params.append(root_assignee)
+        params.append(task_id)
+        conn.execute(
+            f"UPDATE tasks SET {', '.join(sets)} WHERE id = ?",
+            tuple(params),
+        )
+
+        # Audit comment + event on the root so the timeline shows the fan-out.
+        if author and author.strip():
+            conn.execute(
+                "INSERT INTO task_comments (task_id, author, body, created_at) "
+                "VALUES (?, ?, ?, ?)",
+                (
+                    task_id,
+                    author.strip(),
+                    "Decomposed into "
+                    + ", ".join(child_ids)
+                    + ". Root will wake when all children complete.",
+                    now,
+                ),
+            )
+        _append_event(
+            conn, task_id, "decomposed",
+            {
+                "child_ids": child_ids,
+                "root_assignee": root_assignee,
+            },
+        )
+
+    # Outside the write_txn: promote parent-free children to 'ready'
+    # so the dispatcher picks them up on its next tick. Same pattern
+    # specify_triage_task uses.
+    recompute_ready(conn)
+    return child_ids
+
+
 def archive_task(conn: sqlite3.Connection, task_id: str) -> bool:
    with write_txn(conn):
        cur = conn.execute(
@ -4024,6 +4199,7 @@ def _default_spawn(
            stderr=subprocess.STDOUT,
            env=env,
            start_new_session=True,
+            creationflags=subprocess.CREATE_NO_WINDOW if _IS_WINDOWS else 0,
        )
    except FileNotFoundError:
        log_f.close()
--- a/hermes_cli/kanban_decompose.py
+++ b/hermes_cli/kanban_decompose.py
@ -0,0 +1,440 @@
+"""Kanban decomposer — fan a triage task out into a graph of child tasks.
+
+Invoked by ``hermes kanban decompose [task_id | --all]`` and the
+auto-decompose path in the gateway dispatcher loop. Reads the user's
+profile roster (with descriptions) and asks the auxiliary LLM to
+return a task graph in JSON. Then atomically creates the children,
+links them under the root, and flips the root ``triage -> todo``.
+
+The root task stays alive and becomes the parent of every leaf child,
+so when the whole graph completes the root wakes back up — its
+assignee (the orchestrator profile) gets a chance to judge completion
+and add more tasks if the work isn't done yet.
+
+Design notes
+------------
+
+* Mirrors the shape of ``hermes_cli/kanban_specify.py``: lazy aux
+  client import inside the function, lenient response parse, never
+  raises on expected failure modes.
+
+* The system prompt sees the *configured* profile roster — names plus
+  descriptions plus the default fallback. Profiles without a
+  description are still listed (with a note) so the orchestrator can
+  match on name as a fallback, but the user has an obvious incentive
+  to describe them.
+
+* ``fanout=false`` collapses to the same effect as ``kanban specify``:
+  we tighten the body and flip ``triage -> todo`` as a single task,
+  no children created. This makes ``decompose`` a strict superset of
+  ``specify`` from the user's perspective.
+
+* If the LLM picks an assignee that doesn't exist as a profile, we
+  rewrite it to the configured ``default_assignee`` (or the default
+  profile if unset). A child task NEVER ends up with ``assignee=None``.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass
+from typing import Optional
+
+from hermes_cli import kanban_db as kb
+from hermes_cli import profiles as profiles_mod
+
+logger = logging.getLogger(__name__)
+
+
+_SYSTEM_PROMPT = """You are the Kanban decomposer for the Hermes Agent board.
+
+A user dropped a rough idea into the Triage column. Your job is to break it
+into a small graph of concrete child tasks and route each one to the best-
+matching profile from the available roster.
+
+You will be given:
+  - The original task title and body
+  - The list of available profiles (each with name + description)
+  - The fallback "default_assignee" used when no profile fits
+
+Output a single JSON object with this exact shape:
+
+  {
+    "fanout": true,
+    "rationale": "<one sentence on why this decomposition>",
+    "tasks": [
+      {
+        "title": "<concrete task title, imperative voice, <= 80 chars>",
+        "body":  "<detailed spec for the worker on this child task>",
+        "assignee": "<profile name from the roster, or null for default>",
+        "parents": [<int>, ...]
+      },
+      ...
+    ]
+  }
+
+Rules:
+  - "parents" is a list of INDICES (0-based) into this same "tasks" list,
+    expressing actual data dependencies. Tasks with no parents run in
+    PARALLEL. Tasks with parents wait until every parent completes.
+  - Prefer parallelism. If two tasks can be done independently, give
+    them no parents so the dispatcher fans them out at once.
+  - Use 2-6 tasks for normal work. Don't create 20 tiny tasks. Don't
+    cram everything into 1 task.
+  - Pick assignees from the roster by matching the task to the profile's
+    DESCRIPTION (not just the name). When nothing matches well, use null
+    and the system will route to the default_assignee.
+  - Each child task body is what a fresh worker will read with no other
+    context — be specific about goal, approach, and acceptance criteria.
+
+When the task is genuinely a single unit of work (no useful decomposition),
+return:
+
+  {
+    "fanout": false,
+    "rationale": "<one sentence>",
+    "title": "<tightened title>",
+    "body":  "<concrete spec for a single worker>"
+  }
+
+In that case the task stays as one work item, just with a tightened spec.
+
+No preamble, no closing remarks, no code fences. Output only the JSON object.
+"""
+
+
+_USER_TEMPLATE = """Task id: {task_id}
+Title: {title}
+Body:
+{body}
+
+Available profiles (assignees you may pick from):
+{roster}
+
+Default assignee (used when no profile fits a task): {default_assignee}
+"""
+
+
+_FENCE_RE = re.compile(r"^```(?:json)?\s*|\s*```$", re.MULTILINE)
+
+
+@dataclass
+class DecomposeOutcome:
+    """Result of decomposing a single triage task."""
+
+    task_id: str
+    ok: bool
+    reason: str = ""
+    fanout: bool = False
+    child_ids: list[str] | None = None
+    new_title: Optional[str] = None
+
+
+def _truncate(text: str, limit: int) -> str:
+    if len(text) <= limit:
+        return text
+    return text[: limit - 1] + "…"
+
+
+def _extract_json_blob(raw: str) -> Optional[dict]:
+    if not raw:
+        return None
+    stripped = _FENCE_RE.sub("", raw.strip())
+    first = stripped.find("{")
+    last = stripped.rfind("}")
+    if first == -1 or last == -1 or last <= first:
+        return None
+    candidate = stripped[first : last + 1]
+    try:
+        val = json.loads(candidate)
+    except (ValueError, json.JSONDecodeError):
+        return None
+    if not isinstance(val, dict):
+        return None
+    return val
+
+
+def _profile_author() -> str:
+    """Mirror of ``hermes_cli.kanban._profile_author``."""
+    return (
+        os.environ.get("HERMES_PROFILE")
+        or os.environ.get("USER")
+        or "decomposer"
+    )
+
+
+def _load_config() -> dict:
+    try:
+        from hermes_cli.config import load_config
+        return load_config() or {}
+    except Exception:
+        return {}
+
+
+def _resolve_orchestrator_profile(cfg: dict) -> str:
+    """Resolve which profile owns decomposition.
+
+    Falls back to the active default profile when ``kanban.orchestrator_profile``
+    is unset, so a task is never stranded for lack of an orchestrator.
+    """
+    kanban_cfg = cfg.get("kanban", {}) if isinstance(cfg, dict) else {}
+    explicit = (kanban_cfg.get("orchestrator_profile") or "").strip()
+    if explicit:
+        try:
+            if profiles_mod.profile_exists(explicit):
+                return explicit
+        except Exception:
+            pass
+    # Fall back to the active default profile.
+    try:
+        return profiles_mod.get_active_profile_name() or "default"
+    except Exception:
+        return "default"
+
+
+def _resolve_default_assignee(cfg: dict) -> str:
+    """Resolve which profile catches child tasks the orchestrator can't route."""
+    kanban_cfg = cfg.get("kanban", {}) if isinstance(cfg, dict) else {}
+    explicit = (kanban_cfg.get("default_assignee") or "").strip()
+    if explicit:
+        try:
+            if profiles_mod.profile_exists(explicit):
+                return explicit
+        except Exception:
+            pass
+    try:
+        return profiles_mod.get_active_profile_name() or "default"
+    except Exception:
+        return "default"
+
+
+def _build_roster() -> tuple[list[dict], set[str]]:
+    """Return (roster_for_prompt, valid_assignee_names).
+
+    Each roster entry is ``{name, description, has_description}``. The
+    valid-set is used after the LLM responds to rewrite invalid
+    assignees to the default fallback.
+    """
+    roster: list[dict] = []
+    valid: set[str] = set()
+    try:
+        all_profiles = profiles_mod.list_profiles()
+    except Exception as exc:
+        logger.warning("decompose: failed to list profiles: %s", exc)
+        return roster, valid
+    for p in all_profiles:
+        desc = (p.description or "").strip()
+        roster.append({
+            "name": p.name,
+            "description": desc or f"(no description; profile named {p.name!r})",
+            "has_description": bool(desc),
+        })
+        valid.add(p.name)
+    return roster, valid
+
+
+def _format_roster(roster: list[dict]) -> str:
+    if not roster:
+        return "  (no profiles installed — decomposer cannot route work)"
+    lines = []
+    for entry in roster:
+        tag = "" if entry["has_description"] else " ⚠ undescribed"
+        lines.append(f"  - {entry['name']}{tag}: {entry['description']}")
+    return "\n".join(lines)
+
+
+def decompose_task(
+    task_id: str,
+    *,
+    author: Optional[str] = None,
+    timeout: Optional[int] = None,
+) -> DecomposeOutcome:
+    """Decompose a triage task into a graph of child tasks.
+
+    Returns an outcome describing what happened. Never raises for
+    expected failure modes (task not in triage, no aux client
+    configured, API error, malformed response, decomposer returned
+    fanout=true with empty task list) — those surface via ``ok=False``.
+    """
+    with kb.connect() as conn:
+        task = kb.get_task(conn, task_id)
+    if task is None:
+        return DecomposeOutcome(task_id, False, "unknown task id")
+    if task.status != "triage":
+        return DecomposeOutcome(
+            task_id, False, f"task is not in triage (status={task.status!r})"
+        )
+
+    cfg = _load_config()
+    orchestrator = _resolve_orchestrator_profile(cfg)
+    default_assignee = _resolve_default_assignee(cfg)
+    roster, valid_names = _build_roster()
+
+    try:
+        from agent.auxiliary_client import (  # type: ignore
+            get_auxiliary_extra_body,
+            get_text_auxiliary_client,
+        )
+    except Exception as exc:
+        logger.debug("decompose: auxiliary client import failed: %s", exc)
+        return DecomposeOutcome(task_id, False, "auxiliary client unavailable")
+
+    try:
+        client, model = get_text_auxiliary_client("kanban_decomposer")
+    except Exception as exc:
+        logger.debug("decompose: get_text_auxiliary_client failed: %s", exc)
+        return DecomposeOutcome(task_id, False, "auxiliary client unavailable")
+
+    if client is None or not model:
+        return DecomposeOutcome(task_id, False, "no auxiliary client configured")
+
+    user_msg = _USER_TEMPLATE.format(
+        task_id=task.id,
+        title=_truncate(task.title or "", 400),
+        body=_truncate(task.body or "(no body)", 4000),
+        roster=_format_roster(roster),
+        default_assignee=default_assignee,
+    )
+
+    try:
+        resp = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": _SYSTEM_PROMPT},
+                {"role": "user", "content": user_msg},
+            ],
+            temperature=0.3,
+            max_tokens=4000,
+            timeout=timeout or 180,
+            extra_body=get_auxiliary_extra_body() or None,
+        )
+    except Exception as exc:
+        logger.info(
+            "decompose: API call failed for %s (%s)", task_id, exc,
+        )
+        return DecomposeOutcome(task_id, False, f"LLM error: {type(exc).__name__}")
+
+    try:
+        raw = resp.choices[0].message.content or ""
+    except Exception:
+        raw = ""
+
+    parsed = _extract_json_blob(raw)
+    if parsed is None:
+        return DecomposeOutcome(task_id, False, "LLM returned malformed JSON")
+
+    fanout = bool(parsed.get("fanout"))
+    audit_author = author or _profile_author()
+
+    if not fanout:
+        # Fall back to single-task spec promotion (same effect as specify).
+        new_title = parsed.get("title")
+        new_body = parsed.get("body")
+        title_val = new_title.strip() if isinstance(new_title, str) and new_title.strip() else None
+        body_val = new_body if isinstance(new_body, str) and new_body.strip() else None
+        if title_val is None and body_val is None:
+            return DecomposeOutcome(
+                task_id, False, "decomposer returned fanout=false with no title/body",
+            )
+        with kb.connect() as conn:
+            ok = kb.specify_triage_task(
+                conn,
+                task_id,
+                title=title_val,
+                body=body_val,
+                author=audit_author,
+            )
+        if not ok:
+            return DecomposeOutcome(
+                task_id, False, "task moved out of triage before promotion",
+            )
+        return DecomposeOutcome(
+            task_id, True, "single task (no fanout)",
+            fanout=False, new_title=title_val,
+        )
+
+    raw_tasks = parsed.get("tasks") or []
+    if not isinstance(raw_tasks, list) or not raw_tasks:
+        return DecomposeOutcome(
+            task_id, False, "decomposer returned fanout=true with empty tasks list",
+        )
+
+    # Rewrite invalid assignees to the default fallback. Never leave a
+    # task with assignee=None — the user explicitly does not want that.
+    children: list[dict] = []
+    for idx, entry in enumerate(raw_tasks):
+        if not isinstance(entry, dict):
+            return DecomposeOutcome(
+                task_id, False, f"tasks[{idx}] is not an object",
+            )
+        title = entry.get("title")
+        if not isinstance(title, str) or not title.strip():
+            return DecomposeOutcome(
+                task_id, False, f"tasks[{idx}].title is missing or empty",
+            )
+        body = entry.get("body")
+        if not isinstance(body, str):
+            body = ""
+        assignee = entry.get("assignee")
+        if not isinstance(assignee, str) or not assignee.strip():
+            chosen = default_assignee
+        elif assignee not in valid_names:
+            logger.info(
+                "decompose: task %s child %d picked unknown assignee %r — "
+                "routing to default_assignee %r",
+                task_id, idx, assignee, default_assignee,
+            )
+            chosen = default_assignee
+        else:
+            chosen = assignee
+        parents = entry.get("parents") or []
+        if not isinstance(parents, list):
+            parents = []
+        # Clean parent indices: drop non-int and out-of-range.
+        clean_parents = [p for p in parents if isinstance(p, int) and 0 <= p < len(raw_tasks) and p != idx]
+        children.append({
+            "title": title.strip()[:200],
+            "body": body.strip(),
+            "assignee": chosen,
+            "parents": clean_parents,
+        })
+
+    try:
+        with kb.connect() as conn:
+            child_ids = kb.decompose_triage_task(
+                conn,
+                task_id,
+                root_assignee=orchestrator,
+                children=children,
+                author=audit_author,
+            )
+    except ValueError as exc:
+        return DecomposeOutcome(task_id, False, f"DB rejected graph: {exc}")
+    except Exception as exc:
+        logger.exception("decompose: DB error on task %s", task_id)
+        return DecomposeOutcome(task_id, False, f"DB error: {type(exc).__name__}")
+
+    if child_ids is None:
+        return DecomposeOutcome(
+            task_id, False, "task moved out of triage before decomposition",
+        )
+
+    return DecomposeOutcome(
+        task_id, True, f"decomposed into {len(child_ids)} children",
+        fanout=True, child_ids=child_ids,
+    )
+
+
+def list_triage_ids(*, tenant: Optional[str] = None) -> list[str]:
+    """Return task ids currently in the triage column."""
+    with kb.connect() as conn:
+        rows = kb.list_tasks(
+            conn,
+            status="triage",
+            tenant=tenant,
+            limit=1000,
+        )
+    return [row.id for row in rows]
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -9082,6 +9082,7 @@ def cmd_profile(args):
                clone_config=clone,
                no_alias=no_alias,
                no_skills=no_skills,
+                description=getattr(args, "description", None),
            )
            print(f"\nProfile '{name}' created at {profile_dir}")

@ -9181,6 +9182,107 @@ def cmd_profile(args):
            print(f"Error: {e}")
            sys.exit(1)

+    elif action == "describe":
+        # Read or write a profile's description. The description is
+        # consumed by the kanban decomposer to route tasks based on
+        # role instead of name alone.
+        from hermes_cli import profiles as _profiles_mod
+
+        all_flag = bool(getattr(args, "all_missing", False))
+        auto_flag = bool(getattr(args, "auto", False))
+        overwrite_flag = bool(getattr(args, "overwrite", False))
+        text_value = getattr(args, "text", None)
+        name = getattr(args, "profile_name", None)
+
+        if all_flag and not auto_flag:
+            print("profile describe: --all requires --auto", file=sys.stderr)
+            sys.exit(2)
+        if all_flag and (text_value or name):
+            print(
+                "profile describe: --all is mutually exclusive with a profile name / --text",
+                file=sys.stderr,
+            )
+            sys.exit(2)
+        if not all_flag and not name:
+            print("profile describe: profile name is required (or --all --auto)", file=sys.stderr)
+            sys.exit(2)
+        if text_value and auto_flag:
+            print(
+                "profile describe: --text is mutually exclusive with --auto",
+                file=sys.stderr,
+            )
+            sys.exit(2)
+
+        # Show current description if no operation requested.
+        if name and not text_value and not auto_flag:
+            try:
+                if _profiles_mod.normalize_profile_name(name) == "default":
+                    from hermes_constants import get_hermes_home as _hh
+                    profile_dir = Path(_hh())
+                else:
+                    profile_dir = _profiles_mod.get_profile_dir(name)
+            except Exception as exc:
+                print(f"Error: {exc}", file=sys.stderr)
+                sys.exit(1)
+            if not profile_dir.is_dir():
+                print(f"Error: profile '{name}' not found", file=sys.stderr)
+                sys.exit(1)
+            meta = _profiles_mod.read_profile_meta(profile_dir)
+            desc = meta.get("description") or ""
+            if not desc:
+                print(f"(no description set for '{name}')")
+            else:
+                tag = "[auto] " if meta.get("description_auto") else ""
+                print(f"{tag}{desc}")
+            sys.exit(0)
+
+        # --text path: just write the user-authored description.
+        if text_value:
+            try:
+                if _profiles_mod.normalize_profile_name(name) == "default":
+                    from hermes_constants import get_hermes_home as _hh
+                    profile_dir = Path(_hh())
+                else:
+                    profile_dir = _profiles_mod.get_profile_dir(name)
+                _profiles_mod.write_profile_meta(
+                    profile_dir,
+                    description=text_value,
+                    description_auto=False,
+                )
+                print(f"Description updated for '{name}'.")
+            except Exception as exc:
+                print(f"Error: {exc}", file=sys.stderr)
+                sys.exit(1)
+            sys.exit(0)
+
+        # --auto path: invoke the LLM describer.
+        from hermes_cli import profile_describer as _pd
+
+        if all_flag:
+            targets = _pd.list_describable_profiles(missing_only=True)
+            if not targets:
+                print("All profiles already have descriptions.")
+                sys.exit(0)
+        else:
+            targets = [name]
+
+        ok_count = 0
+        fail_count = 0
+        for tgt in targets:
+            outcome = _pd.describe_profile(tgt, overwrite=overwrite_flag)
+            if outcome.ok:
+                ok_count += 1
+                print(f"Described '{outcome.profile_name}': {outcome.description}")
+            else:
+                fail_count += 1
+                print(
+                    f"profile describe {outcome.profile_name}: {outcome.reason}",
+                    file=sys.stderr,
+                )
+        if not all_flag:
+            sys.exit(0 if ok_count == 1 else 1)
+        sys.exit(0 if ok_count > 0 else 1)
+
    elif action == "show":
        name = args.profile_name
        from hermes_cli.profiles import (
@ -9684,8 +9786,8 @@ _BUILTIN_SUBCOMMANDS = frozenset(
        "config", "cron", "curator", "dashboard", "debug", "doctor",
        "dump", "fallback", "gateway", "hooks", "import", "insights",
        "kanban", "login", "logout", "logs", "lsp", "mcp", "memory",
-        "model", "pairing", "plugins", "postinstall", "profile", "proxy", "send",
-        "sessions", "setup",
+        "model", "pairing", "plugins", "postinstall", "profile", "proxy",
+        "send", "sessions", "setup",
        "skills", "slack", "status", "tools", "uninstall", "update",
        "version", "webhook", "whatsapp", "chat",
        # Help-ish invocations — plugin commands not being listed in
@ -12076,6 +12178,13 @@ Examples:
        action="store_true",
        help="Create an empty profile with no bundled skills (opts out of `hermes update` skill sync)",
    )
+    profile_create.add_argument(
+        "--description",
+        default=None,
+        help="One- or two-sentence description of what this profile is good at. "
+             "Used by the kanban decomposer to route tasks based on role instead "
+             "of profile name alone. Skip and add later via `hermes profile describe`.",
+    )

    profile_delete = profile_subparsers.add_parser("delete", help="Delete a profile")
    profile_delete.add_argument("profile_name", help="Profile to delete")
@ -12083,6 +12192,40 @@ Examples:
        "-y", "--yes", action="store_true", help="Skip confirmation prompt"
    )

+    profile_describe = profile_subparsers.add_parser(
+        "describe",
+        help="Read or set a profile's description (used by the kanban orchestrator)",
+    )
+    profile_describe.add_argument(
+        "profile_name",
+        nargs="?",
+        default=None,
+        help="Profile to describe (omit + use --all --auto to sweep)",
+    )
+    profile_describe.add_argument(
+        "--text",
+        default=None,
+        help="Set description to this exact text (overwrites any existing description)",
+    )
+    profile_describe.add_argument(
+        "--auto",
+        action="store_true",
+        help="Auto-generate description via the auxiliary LLM "
+             "(uses auxiliary.profile_describer)",
+    )
+    profile_describe.add_argument(
+        "--overwrite",
+        action="store_true",
+        help="With --auto, replace user-authored descriptions too (default: only "
+             "fill in missing or previously-auto descriptions)",
+    )
+    profile_describe.add_argument(
+        "--all",
+        dest="all_missing",
+        action="store_true",
+        help="With --auto, run on every profile missing a description",
+    )
+
    profile_show = profile_subparsers.add_parser("show", help="Show profile details")
    profile_show.add_argument("profile_name", help="Profile to show")

--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@ -1688,7 +1688,26 @@ def list_authenticated_providers(
                continue
            # Live model discovery from custom provider endpoints (matches
            # Section 3 behavior for user ``providers:`` entries).
-            if api_url and api_key:
+            # Also probes when no api_key is set (e.g. local llama.cpp /
+            # Ollama servers) — the /models endpoint often works without
+            # auth.  The CLI's _model_flow_named_custom always probes, so
+            # the Telegram/Discord picker should do the same for parity.
+            # Live-discovery policy:
+            # - With an api_key, the user has explicitly opted into the
+            #   endpoint and live /models is the source of truth — replace
+            #   the (possibly partial) ``models:`` subset configured for
+            #   context-length overrides with the full live catalog.
+            #   This is the Bifrost / aggregator-gateway case.
+            # - Without an api_key but with an explicit ``models:`` list
+            #   (or top-level ``model:``), the user is narrowing a public
+            #   endpoint to a specific subset (e.g. ollama.com /v1/models
+            #   returns 35 models but the user only wants 4). Preserve the
+            #   explicit list and skip live discovery.
+            # - Without an api_key AND no explicit models, fall through to
+            #   live discovery so bare-endpoint custom providers (local
+            #   llama.cpp / Ollama servers) still appear populated.
+            should_probe = bool(api_url) and (bool(api_key) or not grp["models"])
+            if should_probe:
                try:
                    from hermes_cli.models import fetch_api_models

--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@ -608,6 +608,38 @@ class PluginContext:
            self.manifest.name, provider.name,
        )

+    # -- browser provider registration ---------------------------------------
+
+    def register_browser_provider(self, provider) -> None:
+        """Register a cloud browser backend.
+
+        ``provider`` must be an instance of
+        :class:`agent.browser_provider.BrowserProvider`. The
+        ``provider.name`` attribute is what ``browser.cloud_provider`` in
+        ``config.yaml`` matches against when routing cloud-mode
+        ``browser_*`` tool calls.
+
+        Mirrors :meth:`register_web_search_provider` exactly — same
+        registration shape, same gating, same logging. The browser
+        subsystem's dispatcher (:func:`tools.browser_tool._get_cloud_provider`)
+        consults the registry built up by these calls.
+        """
+        from agent.browser_provider import BrowserProvider
+        from agent.browser_registry import register_provider as _register_browser_provider
+
+        if not isinstance(provider, BrowserProvider):
+            logger.warning(
+                "Plugin '%s' tried to register a browser provider that does "
+                "not inherit from BrowserProvider. Ignoring.",
+                self.manifest.name,
+            )
+            return
+        _register_browser_provider(provider)
+        logger.info(
+            "Plugin '%s' registered browser provider: %s",
+            self.manifest.name, provider.name,
+        )
+
    # -- platform adapter registration ---------------------------------------

    def register_platform(
--- a/hermes_cli/profile_describer.py
+++ b/hermes_cli/profile_describer.py
@ -0,0 +1,299 @@
+"""Profile describer — auto-generate ``description`` for a profile.
+
+Used by ``hermes profile describe <name> --auto`` and the dashboard's
+"auto-generate description" button. Reads the profile's installed
+skills, model+provider, name, and optionally a small slice of memory,
+then asks the auxiliary LLM to produce a 1-2 sentence description of
+what the profile is good at.
+
+Result is written to ``<profile_dir>/profile.yaml`` with
+``description_auto: true`` so the dashboard can surface a "review"
+badge. User can edit afterward to confirm.
+
+Design notes
+------------
+- Mirrors the shape of ``hermes_cli/kanban_specify.py``: lazy aux
+  client import inside the function, lenient response parse, never
+  raises on expected failure modes.
+- Reads at most ``MAX_SKILLS_FOR_PROMPT`` skill names to keep the
+  prompt bounded. No skill body — names + categories are enough
+  signal and avoid blowing context on profiles with 100+ skills.
+- Memory is intentionally NOT read here. Memories are personal and
+  the orchestrator routes work to a *role* not a *biography*. If we
+  find later that memory adds signal we can wire it; for now,
+  skills + name + model is plenty.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+
+from hermes_cli import profiles as profiles_mod
+
+logger = logging.getLogger(__name__)
+
+# Cap on how many skill names we feed the LLM. Profiles with 200+
+# skills (uncommon but possible) would blow context otherwise. The cap
+# is per-category — see _collect_skills.
+MAX_SKILLS_FOR_PROMPT = 60
+
+
+_SYSTEM_PROMPT = """You are a profile-describer for the Hermes Agent kanban board.
+
+A user runs multiple "profiles" — distinct agent identities, each with their
+own skills, model, and configuration. The kanban board's orchestrator routes
+work to whichever profile best fits each task. To do that well, every
+profile needs a short, concrete description of what it's good at.
+
+You are given a profile's:
+  - Name
+  - Model / provider
+  - List of installed skill names (a strong signal of role / domain)
+
+Produce a single JSON object with exactly one key:
+
+  {
+    "description": "<1-2 sentence description, plain prose, no preamble>"
+  }
+
+Rules:
+  - The description is what an orchestrator will read to decide whether to
+    route a task here. Lead with the profile's strongest capability.
+  - Stay concrete. Bad: "an AI agent that helps users."
+                  Good: "Reads and modifies Python codebases — runs tests,
+                         refactors functions, opens GitHub PRs."
+  - 1-2 sentences, <= 280 characters total.
+  - Never invent capabilities the skills don't suggest.
+  - Never write "Hermes Agent profile" or other meta-narration.
+  - No code fences, no preamble, no closing remarks. Output only JSON.
+"""
+
+
+_USER_TEMPLATE = """Profile name: {name}
+Default model: {model}
+Provider: {provider}
+Installed skill count: {skill_count}
+Notable skills (up to {skill_cap}):
+{skill_list}
+"""
+
+
+_FENCE_RE = re.compile(r"^```(?:json)?\s*|\s*```$", re.MULTILINE)
+
+
+@dataclass
+class DescribeOutcome:
+    """Result of describing a single profile."""
+
+    profile_name: str
+    ok: bool
+    reason: str = ""
+    description: Optional[str] = None
+
+
+def _collect_skills(profile_dir: Path) -> list[str]:
+    """Return a stable, capped list of skill names for the prompt.
+
+    Format: ``category/skill_name`` where category is the immediate
+    subdir under ``skills/`` (e.g. ``devops``, ``research``). Skills
+    that live directly under ``skills/`` show as bare ``skill_name``.
+    """
+    skills_dir = profile_dir / "skills"
+    if not skills_dir.is_dir():
+        return []
+    names: list[str] = []
+    for md in skills_dir.rglob("SKILL.md"):
+        path_str = str(md)
+        if "/.hub/" in path_str or "/.git/" in path_str:
+            continue
+        try:
+            rel = md.relative_to(skills_dir)
+        except ValueError:
+            continue
+        parts = rel.parts[:-1]  # drop SKILL.md filename
+        if not parts:
+            continue
+        # parts[-1] is the skill dir name; parts[:-1] is the category path
+        if len(parts) == 1:
+            names.append(parts[0])
+        else:
+            names.append(f"{parts[0]}/{parts[-1]}")
+    names.sort()
+    # Keep within prompt budget. Skills earlier in alphabet aren't more
+    # important — we'll let the LLM see a sample. Pick evenly-spaced
+    # entries instead of just the head so a profile with skills A..Z
+    # doesn't get described as "starts with A".
+    if len(names) <= MAX_SKILLS_FOR_PROMPT:
+        return names
+    step = len(names) / MAX_SKILLS_FOR_PROMPT
+    sampled = [names[int(i * step)] for i in range(MAX_SKILLS_FOR_PROMPT)]
+    return sampled
+
+
+def _extract_json_blob(raw: str) -> Optional[dict]:
+    if not raw:
+        return None
+    stripped = _FENCE_RE.sub("", raw.strip())
+    first = stripped.find("{")
+    last = stripped.rfind("}")
+    if first == -1 or last == -1 or last <= first:
+        return None
+    candidate = stripped[first : last + 1]
+    try:
+        val = json.loads(candidate)
+    except (ValueError, json.JSONDecodeError):
+        return None
+    if not isinstance(val, dict):
+        return None
+    return val
+
+
+def describe_profile(
+    profile_name: str,
+    *,
+    overwrite: bool = False,
+    timeout: Optional[int] = None,
+) -> DescribeOutcome:
+    """Auto-generate a description for one profile.
+
+    Returns an outcome describing what happened. Never raises for
+    expected failure modes (profile missing, no aux client configured,
+    API error, malformed response) — those surface via ``ok=False`` so
+    a sweep can continue past individual failures.
+
+    ``overwrite`` controls whether an existing user-authored description
+    is replaced. By default we refuse to overwrite a description with
+    ``description_auto: false`` to protect curated text. Auto-generated
+    descriptions (``description_auto: true``) are always replaceable.
+    """
+    canon = profiles_mod.normalize_profile_name(profile_name)
+    if not profiles_mod.profile_exists(canon):
+        # Special case: "default" exists as a virtual profile name
+        # mapped to the default home dir. profile_exists() handles it.
+        return DescribeOutcome(canon, False, "profile not found")
+
+    try:
+        if canon == "default":
+            from hermes_constants import get_hermes_home  # type: ignore
+            profile_dir = Path(get_hermes_home())
+        else:
+            profile_dir = profiles_mod.get_profile_dir(canon)
+    except Exception as exc:
+        return DescribeOutcome(canon, False, f"cannot resolve profile dir: {exc}")
+
+    # Honor curated descriptions unless --overwrite.
+    existing = profiles_mod.read_profile_meta(profile_dir)
+    if existing.get("description") and not existing.get("description_auto") and not overwrite:
+        return DescribeOutcome(
+            canon,
+            False,
+            "profile already has a user-authored description "
+            "(use --overwrite to replace)",
+        )
+
+    skill_names = _collect_skills(profile_dir)
+    skill_list = "\n".join(f"  - {n}" for n in skill_names) or "  (no skills installed)"
+    skill_count = sum(
+        1 for _ in (profile_dir / "skills").rglob("SKILL.md")
+        if "/.hub/" not in str(_) and "/.git/" not in str(_)
+    ) if (profile_dir / "skills").is_dir() else 0
+
+    # Read model + provider from the profile's config.
+    try:
+        model, provider = profiles_mod._read_config_model(profile_dir)
+    except Exception:
+        model, provider = None, None
+
+    try:
+        from agent.auxiliary_client import (  # type: ignore
+            get_auxiliary_extra_body,
+            get_text_auxiliary_client,
+        )
+    except Exception as exc:
+        logger.debug("describe: auxiliary client import failed: %s", exc)
+        return DescribeOutcome(canon, False, "auxiliary client unavailable")
+
+    try:
+        client, aux_model = get_text_auxiliary_client("profile_describer")
+    except Exception as exc:
+        logger.debug("describe: get_text_auxiliary_client failed: %s", exc)
+        return DescribeOutcome(canon, False, "auxiliary client unavailable")
+
+    if client is None or not aux_model:
+        return DescribeOutcome(canon, False, "no auxiliary client configured")
+
+    user_msg = _USER_TEMPLATE.format(
+        name=canon,
+        model=(model or "(unset)"),
+        provider=(provider or "(unset)"),
+        skill_count=skill_count,
+        skill_cap=MAX_SKILLS_FOR_PROMPT,
+        skill_list=skill_list,
+    )
+
+    try:
+        resp = client.chat.completions.create(
+            model=aux_model,
+            messages=[
+                {"role": "system", "content": _SYSTEM_PROMPT},
+                {"role": "user", "content": user_msg},
+            ],
+            temperature=0.3,
+            max_tokens=400,
+            timeout=timeout or 60,
+            extra_body=get_auxiliary_extra_body() or None,
+        )
+    except Exception as exc:
+        logger.info("describe: API call failed for %s (%s)", canon, exc)
+        return DescribeOutcome(canon, False, f"LLM error: {type(exc).__name__}")
+
+    try:
+        raw = resp.choices[0].message.content or ""
+    except Exception:
+        raw = ""
+
+    parsed = _extract_json_blob(raw)
+    if parsed is None:
+        # Fall back: take the raw text trimmed to one paragraph.
+        text = raw.strip().split("\n\n", 1)[0]
+        if not text:
+            return DescribeOutcome(canon, False, "LLM returned an empty response")
+        description = text[:280]
+    else:
+        val = parsed.get("description")
+        if not isinstance(val, str) or not val.strip():
+            return DescribeOutcome(
+                canon, False, "LLM response missing 'description' field"
+            )
+        description = val.strip()[:280]
+
+    try:
+        profiles_mod.write_profile_meta(
+            profile_dir,
+            description=description,
+            description_auto=True,
+        )
+    except Exception as exc:
+        return DescribeOutcome(canon, False, f"failed to write profile.yaml: {exc}")
+
+    return DescribeOutcome(canon, True, "described", description=description)
+
+
+def list_describable_profiles(*, missing_only: bool = True) -> list[str]:
+    """Return profile names that can be described.
+
+    ``missing_only=True`` (default) returns only profiles without a
+    description. ``missing_only=False`` returns every profile.
+    """
+    out: list[str] = []
+    for p in profiles_mod.list_profiles():
+        if missing_only and (p.description or "").strip() and not p.description_auto:
+            continue
+        out.append(p.name)
+    return out
--- a/hermes_cli/profiles.py
+++ b/hermes_cli/profiles.py
@ -412,6 +412,17 @@ class ProfileInfo:
    distribution_name: Optional[str] = None
    distribution_version: Optional[str] = None
    distribution_source: Optional[str] = None
+    # Free-form description (1-2 sentences) of what this profile is good
+    # at. Persisted in ``<profile_dir>/profile.yaml``. Empty when the
+    # user has not described the profile (legacy profiles, fresh
+    # installs). Surfaced to the kanban decomposer so it can route work
+    # to the right profile based on role rather than name alone.
+    description: str = ""
+    # When True, ``description`` was auto-generated by the LLM
+    # describer and has not been confirmed by the user. The dashboard
+    # surfaces a "review" badge in this case so the user can edit or
+    # accept.
+    description_auto: bool = False


 def _read_distribution_meta(profile_dir: Path) -> tuple:
@ -479,6 +490,82 @@ def _count_skills(profile_dir: Path) -> int:
    return count


+# ---------------------------------------------------------------------------
+# profile.yaml — per-profile metadata (description, role, etc.)
+# ---------------------------------------------------------------------------
+#
+# We keep this file deliberately tiny and separate from the profile's
+# ``config.yaml``. ``config.yaml`` is the user-facing Hermes config
+# (~5000 lines of defaults); ``profile.yaml`` is metadata ABOUT the
+# profile itself (its role, who described it). Mixing them makes both
+# harder to read.
+#
+# Missing file -> empty defaults; never an error. The kanban decomposer
+# tolerates empty descriptions and just falls back to the profile name.
+
+
+def _profile_yaml_path(profile_dir: Path) -> Path:
+    return profile_dir / "profile.yaml"
+
+
+def read_profile_meta(profile_dir: Path) -> dict:
+    """Read ``<profile_dir>/profile.yaml`` and return a dict.
+
+    Returns ``{"description": "", "description_auto": False}`` when the
+    file is missing or unreadable. Never raises — a corrupt
+    profile.yaml on an unrelated profile must not break
+    ``hermes profile list``.
+    """
+    path = _profile_yaml_path(profile_dir)
+    if not path.is_file():
+        return {"description": "", "description_auto": False}
+    try:
+        import yaml
+        with open(path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f) or {}
+    except Exception:
+        return {"description": "", "description_auto": False}
+    if not isinstance(data, dict):
+        return {"description": "", "description_auto": False}
+    return {
+        "description": str(data.get("description") or "").strip(),
+        "description_auto": bool(data.get("description_auto", False)),
+    }
+
+
+def write_profile_meta(
+    profile_dir: Path,
+    *,
+    description: Optional[str] = None,
+    description_auto: Optional[bool] = None,
+) -> None:
+    """Update ``<profile_dir>/profile.yaml`` in place.
+
+    Only the explicitly passed fields are overwritten; unspecified
+    fields preserve existing values. Creates the file if missing.
+    Profile directory itself must exist.
+    """
+    if not profile_dir.is_dir():
+        raise FileNotFoundError(f"profile directory does not exist: {profile_dir}")
+    import yaml
+    path = _profile_yaml_path(profile_dir)
+    existing: dict = {}
+    if path.is_file():
+        try:
+            with open(path, "r", encoding="utf-8") as f:
+                loaded = yaml.safe_load(f) or {}
+            if isinstance(loaded, dict):
+                existing = loaded
+        except Exception:
+            existing = {}
+    if description is not None:
+        existing["description"] = description.strip()
+    if description_auto is not None:
+        existing["description_auto"] = bool(description_auto)
+    with open(path, "w", encoding="utf-8") as f:
+        yaml.safe_dump(existing, f, sort_keys=False, default_flow_style=False)
+
+
 # ---------------------------------------------------------------------------
 # CRUD operations
 # ---------------------------------------------------------------------------
@ -493,6 +580,7 @@ def list_profiles() -> List[ProfileInfo]:
    if default_home.is_dir():
        model, provider = _read_config_model(default_home)
        dist_name, dist_version, dist_source = _read_distribution_meta(default_home)
+        meta = read_profile_meta(default_home)
        profiles.append(ProfileInfo(
            name="default",
            path=default_home,
@ -505,6 +593,8 @@ def list_profiles() -> List[ProfileInfo]:
            distribution_name=dist_name,
            distribution_version=dist_version,
            distribution_source=dist_source,
+            description=meta.get("description", ""),
+            description_auto=meta.get("description_auto", False),
        ))

    # Named profiles
@ -519,6 +609,7 @@ def list_profiles() -> List[ProfileInfo]:
            model, provider = _read_config_model(entry)
            alias_path = wrapper_dir / name
            dist_name, dist_version, dist_source = _read_distribution_meta(entry)
+            meta = read_profile_meta(entry)
            profiles.append(ProfileInfo(
                name=name,
                path=entry,
@ -532,6 +623,8 @@ def list_profiles() -> List[ProfileInfo]:
                distribution_name=dist_name,
                distribution_version=dist_version,
                distribution_source=dist_source,
+                description=meta.get("description", ""),
+                description_auto=meta.get("description_auto", False),
            ))

    return profiles
@ -544,6 +637,7 @@ def create_profile(
    clone_config: bool = False,
    no_alias: bool = False,
    no_skills: bool = False,
+    description: Optional[str] = None,
 ) -> Path:
    """Create a new profile directory.

@ -667,6 +761,19 @@ def create_profile(
        except OSError:
            pass  # best-effort — the feature still works via the empty skills/ dir

+    # Persist description if the caller provided one. Done last so a
+    # partial-create failure doesn't strand a description file in an
+    # incomplete profile.
+    if description and description.strip():
+        try:
+            write_profile_meta(
+                profile_dir,
+                description=description.strip(),
+                description_auto=False,
+            )
+        except Exception:
+            pass  # non-fatal — user can describe later with `hermes profile describe`
+
    return profile_dir


--- a/hermes_cli/proxy/adapters/base.py
+++ b/hermes_cli/proxy/adapters/base.py
@ -81,6 +81,21 @@ class UpstreamAdapter(ABC):
              refresh fails. The proxy will return 401 to the client.
        """

+    def get_retry_credential(
+        self,
+        *,
+        failed_credential: UpstreamCredential,
+        status_code: int,
+    ) -> Optional[UpstreamCredential]:
+        """Return an alternate credential after an upstream auth failure.
+
+        The default is no retry. Providers can override this for one-shot
+        fallback paths, such as switching from a preferred token type to a
+        legacy bearer after the upstream rejects the first request.
+        """
+        _ = failed_credential, status_code
+        return None
+
    def describe(self) -> str:
        """One-line status summary for ``proxy status``."""
        try:
--- a/hermes_cli/proxy/adapters/nous_portal.py
+++ b/hermes_cli/proxy/adapters/nous_portal.py
@ -1,12 +1,13 @@
 """Nous Portal upstream adapter.

-Reads the user's Nous OAuth state from ``~/.hermes/auth.json``, refreshes
-the access token and mints a fresh agent key when needed, and exposes the
-upstream base URL plus minted bearer for the proxy server to forward to.
+Reads the user's Nous OAuth state from ``~/.hermes/auth.json`` through the
+shared runtime resolver, refreshes the access token and resolves the
+``agent_key`` compatibility credential when needed, then exposes the upstream
+base URL plus bearer for the proxy server to forward to.

-The minted ``agent_key`` (not the OAuth ``access_token``) is what
-``inference-api.nousresearch.com`` accepts as a bearer. The refresh helper
-already handles both — see :func:`hermes_cli.auth.refresh_nous_oauth_from_state`.
+The ``agent_key`` field may hold either a NAS invoke JWT or the legacy
+opaque session key. The refresh helper handles both — see
+:func:`hermes_cli.auth.resolve_nous_runtime_credentials`.
 """

 from __future__ import annotations
@ -16,11 +17,18 @@ import threading
 from typing import Any, Dict, FrozenSet, Optional

 from hermes_cli.auth import (
+    AuthError,
    DEFAULT_NOUS_INFERENCE_URL,
+    NOUS_INFERENCE_AUTH_MODE_AUTO,
+    NOUS_INFERENCE_AUTH_MODE_LEGACY,
    _load_auth_store,
+    _auth_store_lock,
+    _is_terminal_nous_refresh_error,
+    _quarantine_nous_oauth_state,
+    _quarantine_nous_pool_entries,
    _save_auth_store,
    _write_shared_nous_state,
-    refresh_nous_oauth_from_state,
+    resolve_nous_runtime_credentials,
 )
 from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential

@ -43,9 +51,8 @@ class NousPortalAdapter(UpstreamAdapter):
    """Proxy upstream for the Nous Portal inference API."""

    def __init__(self) -> None:
-        # Lock guards _load → refresh → _save against parallel proxy requests
-        # racing to refresh expired tokens. Refresh itself is HTTP, so we
-        # hold the lock across the network call (brief; OAuth refresh is fast).
+        # Serialize proxy requests in this process; cross-process token refresh
+        # and persistence are handled by resolve_nous_runtime_credentials().
        self._lock = threading.Lock()

    @property
@ -72,6 +79,26 @@ class NousPortalAdapter(UpstreamAdapter):
        )

    def get_credential(self) -> UpstreamCredential:
+        return self._get_credential(
+            inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_AUTO,
+        )
+
+    def get_retry_credential(
+        self,
+        *,
+        failed_credential: UpstreamCredential,
+        status_code: int,
+    ) -> Optional[UpstreamCredential]:
+        if status_code != 401:
+            return None
+        if failed_credential.bearer.count(".") != 2:
+            return None
+        logger.info("proxy: Nous upstream rejected bearer; retrying with legacy session key")
+        return self._get_credential(
+            inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
+        )
+
+    def _get_credential(self, *, inference_auth_mode: str) -> UpstreamCredential:
        with self._lock:
            state = self._read_state()
            if state is None:
@ -80,28 +107,43 @@ class NousPortalAdapter(UpstreamAdapter):
                )

            try:
-                refreshed = refresh_nous_oauth_from_state(state)
+                refreshed = resolve_nous_runtime_credentials(
+                    inference_auth_mode=inference_auth_mode,
+                )
+            except AuthError as exc:
+                if _is_terminal_nous_refresh_error(exc):
+                    _quarantine_nous_oauth_state(
+                        state,
+                        exc,
+                        reason="proxy_refresh_failure",
+                    )
+                    self._save_state(
+                        state,
+                        quarantine_error=exc,
+                        quarantine_reason="proxy_refresh_failure",
+                    )
+                raise RuntimeError(
+                    f"Failed to refresh Nous Portal credentials: {exc}"
+                ) from exc
            except Exception as exc:
                raise RuntimeError(
                    f"Failed to refresh Nous Portal credentials: {exc}"
                ) from exc

-            self._save_state(refreshed)
-
-            agent_key = refreshed.get("agent_key")
+            agent_key = refreshed.get("api_key")
            if not agent_key:
                raise RuntimeError(
                    "Nous Portal refresh did not return a usable agent_key. "
                    "Try `hermes login nous` to re-authenticate."
                )

-            base_url = refreshed.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL
+            base_url = refreshed.get("base_url") or DEFAULT_NOUS_INFERENCE_URL
            base_url = base_url.rstrip("/")

            return UpstreamCredential(
                bearer=agent_key,
                base_url=base_url,
-                expires_at=refreshed.get("agent_key_expires_at"),
+                expires_at=refreshed.get("expires_at"),
            )

    # ------------------------------------------------------------------
@ -111,7 +153,8 @@ class NousPortalAdapter(UpstreamAdapter):

    def _read_state(self) -> Optional[Dict[str, Any]]:
        try:
-            store = _load_auth_store()
+            with _auth_store_lock():
+                store = _load_auth_store()
        except Exception as exc:
            logger.warning("proxy: failed to load auth store: %s", exc)
            return None
@ -121,17 +164,28 @@ class NousPortalAdapter(UpstreamAdapter):
            return None
        return dict(state)  # copy so the refresh helper can mutate freely

-    def _save_state(self, state: Dict[str, Any]) -> None:
+    def _save_state(
+        self,
+        state: Dict[str, Any],
+        *,
+        quarantine_error: Optional[AuthError] = None,
+        quarantine_reason: Optional[str] = None,
+    ) -> None:
        try:
-            store = _load_auth_store()
-            providers = store.setdefault("providers", {})
-            providers["nous"] = state
-            _save_auth_store(store)
+            with _auth_store_lock():
+                store = _load_auth_store()
+                if quarantine_error is not None and quarantine_reason:
+                    _quarantine_nous_pool_entries(
+                        store,
+                        quarantine_error,
+                        reason=quarantine_reason,
+                    )
+                providers = store.setdefault("providers", {})
+                providers["nous"] = state
+                _save_auth_store(store)
            _write_shared_nous_state(state)
        except Exception as exc:
-            # Best effort — we still return the fresh credential. The next
-            # request just won't see cached state, which means another refresh.
-            logger.warning("proxy: failed to persist refreshed Nous state: %s", exc)
+            logger.warning("proxy: failed to persist Nous quarantine state: %s", exc)


 __all__ = ["NousPortalAdapter"]
--- a/hermes_cli/proxy/cli.py
+++ b/hermes_cli/proxy/cli.py
@ -114,7 +114,7 @@ def cmd_proxy(args: Any) -> int:
        return cmd_proxy_start(args)
    if sub == "status":
        return cmd_proxy_status(args)
-    if sub in ("providers", "list"):
+    if sub in {"providers", "list"}:
        return cmd_proxy_list_providers(args)
    # No subcommand → print short help.
    print(
--- a/hermes_cli/proxy/server.py
+++ b/hermes_cli/proxy/server.py
@ -26,7 +26,7 @@ except ImportError:
    web = None  # type: ignore[assignment]
    AIOHTTP_AVAILABLE = False

-from hermes_cli.proxy.adapters.base import UpstreamAdapter
+from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential

 logger = logging.getLogger(__name__)

@ -76,7 +76,7 @@ def _filter_response_headers(headers) -> dict:
        if key.lower() in _HOP_BY_HOP_HEADERS:
            continue
        # aiohttp recomputes Content-Encoding/Content-Length on stream — let it.
-        if key.lower() in ("content-encoding", "content-length"):
+        if key.lower() in {"content-encoding", "content-length"}:
            continue
        out[key] = value
    return out
@ -136,50 +136,93 @@ def create_app(adapter: UpstreamAdapter) -> "web.Application":
            logger.warning("proxy: credential resolution failed: %s", exc)
            return _json_error(401, str(exc), code="upstream_auth_failed")

-        upstream_url = f"{cred.base_url.rstrip('/')}{rel_path}"
-        # Preserve query string verbatim.
-        if request.query_string:
-            upstream_url = f"{upstream_url}?{request.query_string}"
-
        # Forward body verbatim. Read into memory once — request bodies for
        # chat/completions/embeddings are small (<1MB typically). If we ever
        # need to forward large multipart uploads we'll switch to streaming
        # the request body too.
        body = await request.read()

-        fwd_headers = _filter_request_headers(request.headers)
-        fwd_headers["Authorization"] = f"{cred.token_type} {cred.bearer}"
-
-        logger.debug(
-            "proxy: forwarding %s %s -> %s (body=%d bytes)",
-            request.method, rel_path, upstream_url, len(body),
-        )
-
-        # Use a per-request session so connection state doesn't leak between
-        # clients. Could be optimized to a shared session later.
        timeout = aiohttp.ClientTimeout(total=None, sock_connect=15, sock_read=300)
-        try:
-            session = aiohttp.ClientSession(timeout=timeout)
-        except Exception as exc:  # pragma: no cover - aiohttp setup issue
-            return _json_error(500, f"proxy session init failed: {exc}")

-        try:
-            upstream_resp = await session.request(
-                request.method,
-                upstream_url,
-                data=body if body else None,
-                headers=fwd_headers,
-                allow_redirects=False,
+        async def _send_upstream(active_cred: UpstreamCredential):
+            upstream_url = f"{active_cred.base_url.rstrip('/')}{rel_path}"
+            # Preserve query string verbatim.
+            if request.query_string:
+                upstream_url = f"{upstream_url}?{request.query_string}"
+
+            fwd_headers = _filter_request_headers(request.headers)
+            fwd_headers["Authorization"] = f"{active_cred.token_type} {active_cred.bearer}"
+
+            logger.debug(
+                "proxy: forwarding %s %s -> %s (body=%d bytes)",
+                request.method, rel_path, upstream_url, len(body),
            )
-        except aiohttp.ClientError as exc:
-            await session.close()
-            logger.warning("proxy: upstream connection failed: %s", exc)
-            return _json_error(502, f"upstream connection failed: {exc}",
-                               code="upstream_unreachable")
-        except asyncio.TimeoutError:
-            await session.close()
-            return _json_error(504, "upstream request timed out",
-                               code="upstream_timeout")
+
+            try:
+                session = aiohttp.ClientSession(timeout=timeout)
+            except Exception as exc:  # pragma: no cover - aiohttp setup issue
+                raise RuntimeError(f"proxy session init failed: {exc}") from exc
+
+            try:
+                upstream_resp = await session.request(
+                    request.method,
+                    upstream_url,
+                    data=body if body else None,
+                    headers=fwd_headers,
+                    allow_redirects=False,
+                )
+            except Exception:
+                await session.close()
+                raise
+            return session, upstream_resp
+
+        async def _open_upstream(active_cred: UpstreamCredential):
+            try:
+                return await _send_upstream(active_cred)
+            except RuntimeError as exc:
+                return _json_error(500, str(exc)), None
+            except aiohttp.ClientError as exc:
+                logger.warning("proxy: upstream connection failed: %s", exc)
+                return (
+                    _json_error(
+                        502,
+                        f"upstream connection failed: {exc}",
+                        code="upstream_unreachable",
+                    ),
+                    None,
+                )
+            except asyncio.TimeoutError:
+                return (
+                    _json_error(
+                        504,
+                        "upstream request timed out",
+                        code="upstream_timeout",
+                    ),
+                    None,
+                )
+
+        session_or_response, upstream_resp = await _open_upstream(cred)
+        if upstream_resp is None:
+            return session_or_response
+        session = session_or_response
+
+        if upstream_resp.status == 401:
+            try:
+                retry_cred = adapter.get_retry_credential(
+                    failed_credential=cred,
+                    status_code=upstream_resp.status,
+                )
+            except Exception as exc:
+                logger.warning("proxy: retry credential resolution failed: %s", exc)
+                retry_cred = None
+
+            if retry_cred is not None:
+                upstream_resp.release()
+                await session.close()
+                session_or_response, upstream_resp = await _open_upstream(retry_cred)
+                if upstream_resp is None:
+                    return session_or_response
+                session = session_or_response

        # Stream response back. Headers first, then chunked body.
        resp = web.StreamResponse(
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@ -209,7 +209,7 @@ def _maybe_apply_codex_app_server_runtime(
    Returns the (possibly-rewritten) api_mode."""
    if not model_cfg:
        return api_mode
-    if provider not in ("openai", "openai-codex"):
+    if provider not in {"openai", "openai-codex"}:
        return api_mode
    runtime = str(model_cfg.get("openai_runtime") or "").strip().lower()
    if runtime == "codex_app_server":
@ -875,10 +875,9 @@ def _resolve_explicit_runtime(
            explicit_base_url
            or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
        )
-        # Only use agent_key for inference — access_token is an OAuth token for the
-        # portal API (minting keys, refreshing tokens), not for the inference API.
-        # Falling back to access_token sends an OAuth bearer token to the inference
-        # endpoint, which returns 404 because it is not a valid inference credential.
+        # Only use the agent_key compatibility field for inference. It may be
+        # either a NAS invoke JWT or a legacy opaque session key; raw OAuth
+        # access_token fallback is handled by resolve_nous_runtime_credentials().
        api_key = explicit_api_key or str(state.get("agent_key") or "").strip()
        expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
        if not api_key:
@ -1069,17 +1068,19 @@ def resolve_runtime_provider(
                getattr(entry, "runtime_api_key", None)
                or getattr(entry, "access_token", "")
            )
-        # For Nous, the pool entry's runtime_api_key is the agent_key — a
-        # short-lived inference credential (~30 min TTL).  The pool doesn't
+        # For Nous, the pool entry's runtime_api_key is the agent_key
+        # compatibility field: either an invoke JWT or legacy opaque key.
+        # The pool doesn't
        # refresh it during selection (that would trigger network calls in
        # non-runtime contexts like `hermes auth list`).  If the key is
        # expired, clear pool_api_key so we fall through to
-        # resolve_nous_runtime_credentials() which handles refresh + mint.
+        # resolve_nous_runtime_credentials() which handles refresh + fallback.
        if provider == "nous" and entry is not None and pool_api_key:
            min_ttl = max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")))
            nous_state = {
                "agent_key": getattr(entry, "agent_key", None),
                "agent_key_expires_at": getattr(entry, "agent_key_expires_at", None),
+                "scope": getattr(entry, "scope", None),
            }
            if not _agent_key_is_usable(nous_state, min_ttl):
                logger.debug("Nous pool entry agent_key expired/missing, falling through to runtime resolution")
--- a/hermes_cli/session_recap.py
+++ b/hermes_cli/session_recap.py
@ -171,7 +171,7 @@ def _recent_window(
    cut = 0
    for i in range(len(messages) - 1, -1, -1):
        msg = messages[i]
-        if isinstance(msg, Mapping) and msg.get("role") in ("user", "assistant"):
+        if isinstance(msg, Mapping) and msg.get("role") in {"user", "assistant"}:
            count += 1
            if count >= window:
                cut = i
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@ -259,6 +259,27 @@ def show_status(args):
    if minimax_status.get("error") and not minimax_logged_in:
        print(f"    Error:      {minimax_status.get('error')}")

+    # xAI OAuth — separate try/except so an import failure here cannot
+    # disrupt the already-printed Nous/Codex/Qwen/MiniMax rows above.
+    try:
+        from hermes_cli.auth import get_xai_oauth_auth_status
+        xai_oauth_status = get_xai_oauth_auth_status() or {}
+    except Exception:
+        xai_oauth_status = {}
+
+    xai_oauth_logged_in = bool(xai_oauth_status.get("logged_in"))
+    print(
+        f"  {'xAI OAuth':<12}  {check_mark(xai_oauth_logged_in)} "
+        f"{'logged in' if xai_oauth_logged_in else 'not logged in (run: hermes auth add xai-oauth)'}"
+    )
+    xai_auth_file = xai_oauth_status.get("auth_store")
+    if xai_auth_file:
+        print(f"    Auth file:  {xai_auth_file}")
+    if xai_oauth_status.get("last_refresh"):
+        print(f"    Refreshed:  {_format_iso_timestamp(xai_oauth_status.get('last_refresh'))}")
+    if xai_oauth_status.get("error") and not xai_oauth_logged_in:
+        print(f"    Error:      {xai_oauth_status.get('error')}")
+
    # =========================================================================
    # Nous Subscription Features
    # =========================================================================
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@ -88,12 +88,40 @@ CONFIGURABLE_TOOLSETS = [
 # who want it opt in via `hermes tools` → Video Generation, which walks
 # them through provider + model selection.
 #
-# X search is off by default — gated on xAI credentials (SuperGrok OAuth
-# or XAI_API_KEY). Users opt in via `hermes tools` → X (Twitter) Search,
-# which walks them through credential setup. The tool's check_fn means
-# the schema won't appear to the model even if enabled without credentials.
+# X search is off by default for users without xAI credentials, but
+# auto-enables when SuperGrok OAuth tokens are stored OR XAI_API_KEY is
+# set — mirroring the HASS_TOKEN → homeassistant auto-enable below. The
+# `hermes tools` → X (Twitter) Search setup walks users through credential
+# setup. The tool's check_fn means the schema still won't appear to the
+# model if the credential later goes missing or expires.
 _DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "spotify", "discord", "discord_admin", "video", "video_gen", "x_search"}

+
+def _xai_credentials_present() -> bool:
+    """Cheap, side-effect-free check for usable xAI credentials.
+
+    Used to auto-enable the ``x_search`` toolset when the user has either
+    completed xAI Grok OAuth (SuperGrok subscription) or set
+    ``XAI_API_KEY``. Does NOT hit the network — only inspects the local
+    auth store and environment. The tool's runtime ``check_fn`` still
+    gates schema registration if creds later expire or get revoked.
+    """
+    try:
+        from hermes_cli.auth import _read_xai_oauth_tokens
+
+        _read_xai_oauth_tokens()
+        return True
+    except Exception:
+        pass
+    try:
+        from tools.xai_http import get_env_value as _xai_get_env_value
+
+        if str(_xai_get_env_value("XAI_API_KEY") or "").strip():
+            return True
+    except Exception:
+        pass
+    return bool(str(os.environ.get("XAI_API_KEY") or "").strip())
+
 # Platform-scoped toolsets: only appear in the `hermes tools` checklist for
 # these platforms, and only resolve/save for these platforms.  A toolset
 # absent from this map is available on every platform (current behaviour).
@ -350,6 +378,17 @@ TOOL_CATEGORIES = {
    "browser": {
        "name": "Browser Automation",
        "icon": "🌐",
+        # Per-provider rows for Browserbase, Browser Use, and Firecrawl are
+        # injected at runtime from plugins.browser.<vendor>.provider via
+        # _plugin_browser_providers() in _visible_providers(). Only
+        # non-provider UX setup-flow rows remain here:
+        #   - "Nous Subscription (Browser Use cloud)" — managed Browser Use
+        #     billed via Nous subscription (requires_nous_auth +
+        #     override_env_vars). Uses the browser-use plugin as the
+        #     underlying backend but has a distinct setup UX.
+        #   - "Local Browser" — non-cloud option, no CloudBrowserProvider.
+        #   - "Camofox" — anti-detection local Firefox; short-circuits the
+        #     cloud-provider dispatch path via _is_camofox_mode().
        "providers": [
            {
                "name": "Nous Subscription (Browser Use cloud)",
@ -370,37 +409,6 @@ TOOL_CATEGORIES = {
                "browser_provider": "local",
                "post_setup": "agent_browser",
            },
-            {
-                "name": "Browserbase",
-                "badge": "paid",
-                "tag": "Cloud browser with stealth and proxies",
-                "env_vars": [
-                    {"key": "BROWSERBASE_API_KEY", "prompt": "Browserbase API key", "url": "https://browserbase.com"},
-                    {"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"},
-                ],
-                "browser_provider": "browserbase",
-                "post_setup": "agent_browser",
-            },
-            {
-                "name": "Browser Use",
-                "badge": "paid",
-                "tag": "Cloud browser with remote execution",
-                "env_vars": [
-                    {"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"},
-                ],
-                "browser_provider": "browser-use",
-                "post_setup": "agent_browser",
-            },
-            {
-                "name": "Firecrawl",
-                "badge": "paid",
-                "tag": "Cloud browser with remote execution",
-                "env_vars": [
-                    {"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
-                ],
-                "browser_provider": "firecrawl",
-                "post_setup": "agent_browser",
-            },
            {
                "name": "Camofox",
                "badge": "free · local",
@ -1170,6 +1178,23 @@ def _get_platform_tools(
            if ts_tools and ts_tools.issubset(all_tool_names):
                enabled_toolsets.add(ts_key)

+        # Auto-enable ``x_search`` when xAI credentials are configured.
+        # Unlike ``homeassistant`` (whose ``ha_*`` tools live inside the
+        # platform composite and thus pass the subset check above),
+        # ``x_search`` is its own one-tool toolset that the composite does
+        # NOT include, so the subset loop never picks it up. Inject it
+        # directly here, mirroring the HASS_TOKEN → ``homeassistant`` rule
+        # below: once you have working creds, you don't have to also click
+        # through ``hermes tools`` to flip the toolset on. Only fires when
+        # the user has not yet saved an explicit toolset list — once they
+        # do, the saved list is authoritative.
+        x_search_auto_enabled = (
+            _toolset_allowed_for_platform("x_search", platform)
+            and _xai_credentials_present()
+        )
+        if x_search_auto_enabled:
+            enabled_toolsets.add("x_search")
+
        default_off = set(_DEFAULT_OFF_TOOLSETS)
        # Legacy safety: if the platform's own name matches a default-off
        # toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
@ -1187,6 +1212,11 @@ def _get_platform_tools(
        # regressed after #14798 made cron honor per-platform tool config.
        if "homeassistant" in default_off and os.getenv("HASS_TOKEN"):
            default_off.remove("homeassistant")
+        # Symmetric carve-out for x_search auto-enable (see the inject
+        # block above). Without this, the default_off subtraction would
+        # strip the entry we just added.
+        if x_search_auto_enabled and "x_search" in default_off:
+            default_off.remove("x_search")
        enabled_toolsets -= default_off

    # Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
@ -1653,6 +1683,61 @@ def _plugin_web_search_providers() -> list[dict]:
    return rows


+# Mirror of _plugin_web_search_providers for cloud browser backends. After
+# PR #25214, Browserbase / Browser Use / Firecrawl live as plugins under
+# plugins/browser/<vendor>/; this helper is the sole source of provider rows
+# for those three in the "Browser Automation" picker. The hardcoded
+# ``TOOL_CATEGORIES["browser"]`` entries that drove the category before
+# were deleted in the same PR; only non-provider UX setup-flow rows remain
+# ("Nous Subscription", "Local Browser", "Camofox") — see the comment block
+# in ``TOOL_CATEGORIES["browser"]`` for why each one stays hardcoded.
+def _plugin_browser_providers() -> list[dict]:
+    """Build picker-row dicts from plugin-registered cloud browser providers.
+
+    Each returned dict mirrors the legacy ``TOOL_CATEGORIES["browser"]``
+    schema (``name`` / ``badge`` / ``tag`` / ``env_vars`` /
+    ``browser_provider`` / ``post_setup``) so the picker behaves identically
+    whether a provider was hardcoded or plugin-registered.
+
+    Populates ``browser_provider`` (the legacy config key written to
+    ``browser.cloud_provider``) and a ``browser_plugin_name`` marker so
+    setup / write paths can route through the registry when they want to.
+    """
+    try:
+        from agent.browser_registry import list_providers as _list_browser_providers
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        providers = _list_browser_providers()
+    except Exception:
+        return []
+
+    rows: list[dict] = []
+    for provider in providers:
+        name = getattr(provider, "name", None)
+        if not name:
+            continue
+        try:
+            schema = provider.get_setup_schema()
+        except Exception:
+            continue
+        if not isinstance(schema, dict):
+            continue
+        row = {
+            "name": schema.get("name", provider.display_name),
+            "badge": schema.get("badge", ""),
+            "tag": schema.get("tag", ""),
+            "env_vars": schema.get("env_vars", []),
+            "browser_provider": name,
+            "browser_plugin_name": name,
+        }
+        # Pass-through optional fields the schema can opt into.
+        if schema.get("post_setup"):
+            row["post_setup"] = schema["post_setup"]
+        rows.append(row)
+    return rows
+
+
 def _visible_providers(cat: dict, config: dict) -> list[dict]:
    """Return provider entries visible for the current auth/config state."""
    features = get_nous_subscription_features(config)
@ -1682,6 +1767,14 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
    if cat.get("name") == "Web Search & Extract":
        visible.extend(_plugin_web_search_providers())

+    # Inject plugin-registered cloud browser backends. After PR #25214,
+    # Browserbase / Browser Use / Firecrawl are the plugin-supplied rows;
+    # the hardcoded "Nous Subscription" / "Local Browser" / "Camofox" rows
+    # stay because they're non-provider UX setup flows (subscription auth,
+    # local fallback, and the REST-API anti-detection backend respectively).
+    if cat.get("name") == "Browser Automation":
+        visible.extend(_plugin_browser_providers())
+
    return visible


@ -2590,6 +2683,9 @@ def _reconfigure_provider(provider: dict, config: dict):
        else:
            _print_info("    Kept current")

+    if provider.get("post_setup"):
+        _run_post_setup(provider["post_setup"])
+
    # Imagegen backends prompt for model selection on reconfig too.
    plugin_name = provider.get("image_gen_plugin_name")
    if plugin_name:
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -2609,7 +2609,11 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
    so the UI can render the verification page link + user code.
    """
    if provider_id == "nous":
-        from hermes_cli.auth import _request_device_code, PROVIDER_REGISTRY
+        from hermes_cli.auth import (
+            _nous_device_scope_with_env_override,
+            _request_nous_device_code_with_scope_fallback,
+            PROVIDER_REGISTRY,
+        )
        import httpx
        pconfig = PROVIDER_REGISTRY["nous"]
        portal_base_url = (
@ -2618,22 +2622,34 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
            or pconfig.portal_base_url
        ).rstrip("/")
        client_id = pconfig.client_id
-        scope = pconfig.scope
+        scope, explicit_scope = _nous_device_scope_with_env_override(
+            None,
+            default_scope=pconfig.scope,
+        )
+
        def _do_nous_device_request():
-            with httpx.Client(timeout=httpx.Timeout(15.0), headers={"Accept": "application/json"}) as client:
-                return _request_device_code(
+            with httpx.Client(
+                timeout=httpx.Timeout(15.0),
+                headers={"Accept": "application/json"},
+            ) as client:
+                return _request_nous_device_code_with_scope_fallback(
                    client=client,
                    portal_base_url=portal_base_url,
                    client_id=client_id,
                    scope=scope,
+                    allow_legacy_fallback=not explicit_scope,
                )
-        device_data = await asyncio.get_running_loop().run_in_executor(None, _do_nous_device_request)
+
+        device_data, effective_scope = await asyncio.get_running_loop().run_in_executor(
+            None, _do_nous_device_request
+        )
        sid, sess = _new_oauth_session("nous", "device_code")
        sess["device_code"] = str(device_data["device_code"])
        sess["interval"] = int(device_data["interval"])
        sess["expires_at"] = time.time() + int(device_data["expires_in"])
        sess["portal_base_url"] = portal_base_url
        sess["client_id"] = client_id
+        sess["scope"] = effective_scope
        threading.Thread(
            target=_nous_poller, args=(sid,), daemon=True, name=f"oauth-poll-{sid[:6]}"
        ).start()
@ -2762,7 +2778,11 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:

 def _nous_poller(session_id: str) -> None:
    """Background poller that drives a Nous device-code flow to completion."""
-    from hermes_cli.auth import _poll_for_token, refresh_nous_oauth_from_state
+    from hermes_cli.auth import (
+        NOUS_INFERENCE_AUTH_MODE_FRESH,
+        _poll_for_token,
+        refresh_nous_oauth_from_state,
+    )
    from datetime import datetime, timezone
    import httpx
    with _oauth_sessions_lock:
@ -2773,6 +2793,7 @@ def _nous_poller(session_id: str) -> None:
    client_id = sess["client_id"]
    device_code = sess["device_code"]
    interval = sess["interval"]
+    scope = sess.get("scope")
    expires_in = max(60, int(sess["expires_at"] - time.time()))
    try:
        with httpx.Client(timeout=httpx.Timeout(15.0), headers={"Accept": "application/json"}) as client:
@ -2791,7 +2812,7 @@ def _nous_poller(session_id: str) -> None:
            "portal_base_url": portal_base_url,
            "inference_base_url": token_data.get("inference_base_url"),
            "client_id": client_id,
-            "scope": token_data.get("scope"),
+            "scope": token_data.get("scope") or scope,
            "token_type": token_data.get("token_type", "Bearer"),
            "access_token": token_data["access_token"],
            "refresh_token": token_data.get("refresh_token"),
@ -2803,8 +2824,11 @@ def _nous_poller(session_id: str) -> None:
            "expires_in": token_ttl,
        }
        full_state = refresh_nous_oauth_from_state(
-            auth_state, min_key_ttl_seconds=300, timeout_seconds=15.0,
-            force_refresh=False, force_mint=True,
+            auth_state,
+            min_key_ttl_seconds=300,
+            timeout_seconds=15.0,
+            force_refresh=False,
+            inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_FRESH,
        )
        from hermes_cli.auth import persist_nous_credentials
        persist_nous_credentials(full_state)
@ -5381,4 +5405,7 @@ def start_server(
        open_browser,
    )
    print(f"  Hermes Web UI → http://{host}:{port}")
-    uvicorn.run(app, host=host, port=port, log_level="warning")
+    # proxy_headers=False so _ws_client_is_allowed sees the real connection peer
+    # rather than X-Forwarded-For's rewritten value (which would defeat the
+    # loopback gate when behind a reverse proxy).
+    uvicorn.run(app, host=host, port=port, log_level="warning", proxy_headers=False)
--- a/optional-skills/creative/meme-generation/scripts/generate_meme.py
+++ b/optional-skills/creative/meme-generation/scripts/generate_meme.py
@ -358,7 +358,7 @@ def generate_meme(template_id: str, texts: list[str], output_path: str) -> str:
    img = _overlay_on_image(img, texts, fields)

    output = Path(output_path)
-    if output.suffix.lower() in (".jpg", ".jpeg"):
+    if output.suffix.lower() in {".jpg", ".jpeg"}:
        img = img.convert("RGB")
    img.save(str(output), quality=95)
    return str(output)
@ -378,7 +378,7 @@ def generate_from_image(
        result = _overlay_on_image(img, texts, fields)

    output = Path(output_path)
-    if output.suffix.lower() in (".jpg", ".jpeg"):
+    if output.suffix.lower() in {".jpg", ".jpeg"}:
        result = result.convert("RGB")
    result.save(str(output), quality=95)
    return str(output)
--- a/optional-skills/devops/watchers/scripts/watch_rss.py
+++ b/optional-skills/devops/watchers/scripts/watch_rss.py
@ -43,7 +43,7 @@ def _parse_feed(xml_bytes: bytes):
    entries = []
    for item in root.iter():
        tag = _strip_ns(item.tag)
-        if tag not in ("item", "entry"):
+        if tag not in {"item", "entry"}:
            continue
        # ElementTree Elements without children are *falsy* — use `is not None`.
        children = {_strip_ns(c.tag): c for c in item}
--- a/optional-skills/finance/stocks/scripts/stocks_client.py
+++ b/optional-skills/finance/stocks/scripts/stocks_client.py
@ -125,7 +125,7 @@ def fetch_url(url: str, headers: dict | None = None, retries: int = MAX_RETRIES)
                return json.loads(raw.decode("utf-8", errors="replace"))
        except urllib.error.HTTPError as e:
            last_err = e
-            if e.code in (404, 400):
+            if e.code in {404, 400}:
                break  # no point retrying
            wait = BACKOFF_BASE ** attempt
            time.sleep(wait)
--- a/optional-skills/health/fitness-nutrition/scripts/body_calc.py
+++ b/optional-skills/health/fitness-nutrition/scripts/body_calc.py
@ -95,11 +95,11 @@ def one_rep_max(weight, reps):

 def macros(tdee_kcal, goal):
    goal = goal.lower()
-    if goal in ("cut", "lose", "deficit"):
+    if goal in {"cut", "lose", "deficit"}:
        cals = tdee_kcal - 500
        p, f, c = 0.40, 0.30, 0.30
        label = "Fat Loss (-500 kcal)"
-    elif goal in ("bulk", "gain", "surplus"):
+    elif goal in {"bulk", "gain", "surplus"}:
        cals = tdee_kcal + 400
        p, f, c = 0.30, 0.25, 0.45
        label = "Lean Bulk (+400 kcal)"
@ -184,7 +184,7 @@ def main():
                int(sys.argv[4]), sys.argv[5], int(sys.argv[6]),
            )

-        elif cmd in ("1rm", "orm"):
+        elif cmd in {"1rm", "orm"}:
            one_rep_max(float(sys.argv[2]), int(sys.argv[3]))

        elif cmd == "macros":
--- a/optional-skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py
+++ b/optional-skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py
@ -610,7 +610,7 @@ def _is_secret_key(key: str) -> bool:
    normalized = _normalize_secret_key(key)
    if normalized == "token" or normalized.endswith("token"):
        return True
-    if normalized in ("auth", "authorization"):
+    if normalized in {"auth", "authorization"}:
        return True
    return any(marker in normalized for marker in _SECRET_KEY_MARKERS)

@ -831,7 +831,7 @@ class Migrator:
        # Flip the config-block flag when a conflict/error occurs on a
        # config.yaml write.  Later config-mutating options will skip rather
        # than attempting a partial write.
-        if status in (STATUS_CONFLICT, STATUS_ERROR) and destination is not None:
+        if status in {STATUS_CONFLICT, STATUS_ERROR} and destination is not None:
            dest_str = str(destination)
            if dest_str.endswith("config.yaml") or dest_str.endswith("config.yml"):
                self._config_apply_blocked = True
@ -1526,7 +1526,7 @@ class Migrator:
                api_key = resolve_secret_input(raw_key, openclaw_env)
                if not api_key:
                    # Warn if a SecretRef with file/exec source was silently unresolvable
-                    if isinstance(raw_key, dict) and raw_key.get("source") in ("file", "exec"):
+                    if isinstance(raw_key, dict) and raw_key.get("source") in {"file", "exec"}:
                        self.record(
                            "provider-keys",
                            self.source_root / "openclaw.json",
@ -1736,7 +1736,7 @@ class Migrator:
        tts_data: Dict[str, Any] = {}

        provider = tts.get("provider")
-        if isinstance(provider, str) and provider in ("elevenlabs", "openai", "edge", "microsoft"):
+        if isinstance(provider, str) and provider in {"elevenlabs", "openai", "edge", "microsoft"}:
            # OpenClaw renamed "edge" to "microsoft"; Hermes still uses "edge"
            tts_data["provider"] = "edge" if provider == "microsoft" else provider

@ -2304,11 +2304,11 @@ class Migrator:
        if defaults.get("thinkingDefault"):
            # Map OpenClaw thinking -> Hermes reasoning_effort
            thinking = defaults["thinkingDefault"]
-            if thinking in ("always", "high", "xhigh"):
+            if thinking in {"always", "high", "xhigh"}:
                agent_cfg["reasoning_effort"] = "high"
-            elif thinking in ("auto", "medium", "adaptive"):
+            elif thinking in {"auto", "medium", "adaptive"}:
                agent_cfg["reasoning_effort"] = "medium"
-            elif thinking in ("off", "low", "none", "minimal"):
+            elif thinking in {"off", "low", "none", "minimal"}:
                agent_cfg["reasoning_effort"] = "low"
            changes = True

@ -2626,8 +2626,8 @@ class Migrator:
            if not isinstance(ch_cfg, dict):
                continue
            complex_keys = {k: v for k, v in ch_cfg.items()
-                          if k not in ("botToken", "appToken", "allowFrom", "enabled")
-                          and v and k not in ("requireMention", "autoThread")}
+                          if k not in {"botToken", "appToken", "allowFrom", "enabled"}
+                          and v and k not in {"requireMention", "autoThread"}}
            if complex_keys:
                complex_archive[ch_name] = complex_keys

@ -2671,7 +2671,7 @@ class Migrator:

        # Archive remaining browser settings
        advanced = {k: v for k, v in browser.items()
-                   if k not in ("cdpUrl", "headless") and v}
+                   if k not in {"cdpUrl", "headless"} and v}
        if advanced and self.archive_dir:
            if self.execute:
                self.archive_dir.mkdir(parents=True, exist_ok=True)
--- a/optional-skills/productivity/telephony/scripts/telephony.py
+++ b/optional-skills/productivity/telephony/scripts/telephony.py
@ -109,7 +109,7 @@ def _config_lookup(*paths: tuple[str, ...], default: str = "") -> str:
                node = None
                break
            node = node.get(key)
-        if node not in (None, "") and not isinstance(node, dict):
+        if node not in {None, ""} and not isinstance(node, dict):
            return str(node)
    return default

--- a/optional-skills/research/darwinian-evolver/scripts/show_snapshot.py
+++ b/optional-skills/research/darwinian-evolver/scripts/show_snapshot.py
@ -51,7 +51,7 @@ def main() -> int:
        field = args.field
        if field is None:
            for k, v in vars(org).items():
-                if isinstance(v, str) and not k.startswith("_") and k not in ("id",):
+                if isinstance(v, str) and not k.startswith("_") and k not in {"id",}:
                    field = k
                    break
        val = getattr(org, field, None) if field else None
--- a/optional-skills/research/domain-intel/scripts/domain_intel.py
+++ b/optional-skills/research/domain-intel/scripts/domain_intel.py
@ -185,7 +185,7 @@ def whois_lookup(domain):
    for key, pat in patterns.items():
        matches = re.findall(pat, raw, re.IGNORECASE)
        if matches:
-            if key in ("name_servers", "status"):
+            if key in {"name_servers", "status"}:
                result[key] = list(dict.fromkeys(m.strip().lower() for m in matches))
            else:
                result[key] = matches[0].strip()
--- a/optional-skills/research/osint-investigation/scripts/_http.py
+++ b/optional-skills/research/osint-investigation/scripts/_http.py
@ -60,7 +60,7 @@ def get(
                    f"HTTP 429 rate-limited by {urllib.parse.urlsplit(url).netloc}. "
                    f"Slow down or supply a real API key. Body: {body[:300]}"
                ) from e
-            if e.code in (500, 502, 503, 504) and attempt < max_retries:
+            if e.code in {500, 502, 503, 504} and attempt < max_retries:
                retry_after = e.headers.get("Retry-After") if e.headers else None
                wait = float(retry_after) if (retry_after and retry_after.isdigit()) else backoff ** (attempt + 1)
                time.sleep(wait)
--- a/optional-skills/research/osint-investigation/scripts/fetch_icij_offshore.py
+++ b/optional-skills/research/osint-investigation/scripts/fetch_icij_offshore.py
@ -122,7 +122,7 @@ def fetch(

    with zipfile.ZipFile(zip_path) as zf:
        for node_type, csv_substring in targets:
-            relevant_needles = [n for (k, n) in needles if k in (node_type, "Entity", "Officer")] or []
+            relevant_needles = [n for (k, n) in needles if k in {node_type, "Entity", "Officer"}] or []
            # Only scan a CSV if we have a needle that could plausibly match it,
            # or if we have ONLY a jurisdiction filter.
            applicable_needles = [n for (k, n) in needles if k == node_type]
--- a/plugins/browser/browser_use/init.py
+++ b/plugins/browser/browser_use/init.py
@ -0,0 +1,14 @@
+"""Browser Use cloud browser plugin — bundled, auto-loaded.
+
+Mirrors the ``plugins/web/<vendor>/`` layout: ``provider.py`` holds the
+provider class; ``__init__.py::register`` instantiates and registers it.
+"""
+
+from __future__ import annotations
+
+from plugins.browser.browser_use.provider import BrowserUseBrowserProvider
+
+
+def register(ctx) -> None:
+    """Register the Browser Use provider with the plugin context."""
+    ctx.register_browser_provider(BrowserUseBrowserProvider())
--- a/plugins/browser/browser_use/plugin.yaml
+++ b/plugins/browser/browser_use/plugin.yaml
@ -0,0 +1,7 @@
+name: browser-browser-use
+version: 1.0.0
+description: "Browser Use (https://browser-use.com) cloud browser backend. Supports both direct BROWSER_USE_API_KEY and the managed Nous tool gateway. Also powers the 'Nous Subscription' UX flow that bills usage to a Nous subscription."
+author: NousResearch
+kind: backend
+provides_browser_providers:
+  - browser-use
--- a/plugins/browser/browser_use/provider.py
+++ b/plugins/browser/browser_use/provider.py
@ -1,4 +1,32 @@
-"""Browser Use cloud browser provider."""
+"""Browser Use cloud browser provider — plugin form.
+
+Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
+ABC introduced in PR #25214). The legacy in-tree module
+``tools.browser_providers.browser_use`` was removed in the same PR; this file
+is now the canonical implementation.
+
+Browser Use is the only browser backend with dual auth: a direct
+``BROWSER_USE_API_KEY`` for self-billed users, or the managed Nous tool
+gateway (which Hermes uses to bill Browser Use sessions to a Nous
+subscription). The dispatch order — direct API key first, managed gateway
+second — preserves the pre-migration behaviour in
+``tools.browser_providers.browser_use.BrowserUseProvider._get_config_or_none``.
+
+Config keys this provider responds to::
+
+    browser:
+      cloud_provider: "browser-use"   # explicit selection
+    tool_gateway:
+      browser: "gateway"              # optional: prefer managed gateway
+                                      #   even when BROWSER_USE_API_KEY is set
+
+Auth env vars (one of)::
+
+    BROWSER_USE_API_KEY=...           # https://browser-use.com
+    # OR a managed Nous gateway entry (configured via 'hermes setup')
+"""
+
+from __future__ import annotations

 import logging
 import os
@ -8,11 +36,14 @@ from typing import Any, Dict, Optional

 import requests

-from tools.browser_providers.base import CloudBrowserProvider
-from tools.managed_tool_gateway import resolve_managed_tool_gateway
-from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway
+from agent.browser_provider import BrowserProvider

 logger = logging.getLogger(__name__)
+
+# Idempotency tracking for managed-mode session creation. The managed Nous
+# gateway returns 409 "already in progress" on retried POSTs; we forward the
+# original idempotency key so the gateway can deduplicate. Cleared on
+# success or terminal failure.
 _pending_create_keys: Dict[str, str] = {}
 _pending_create_keys_lock = threading.Lock()

@ -38,6 +69,16 @@ def _clear_pending_create_key(task_id: str) -> None:


 def _should_preserve_pending_create_key(response: requests.Response) -> bool:
+    """Decide whether to keep the idempotency key after a failed create.
+
+    Preserve the key when the failure looks retryable (5xx) OR when the
+    gateway reports the original request is still in flight (409 "already
+    in progress") — in either case, retrying with the same key lets the
+    gateway deduplicate.
+
+    Drop the key on any other 4xx (auth failure, bad request, etc.) — those
+    won't succeed by being retried.
+    """
    if response.status_code >= 500:
        return True

@ -60,13 +101,24 @@ def _should_preserve_pending_create_key(response: requests.Response) -> bool:
    return "already in progress" in message


-class BrowserUseProvider(CloudBrowserProvider):
-    """Browser Use (https://browser-use.com) cloud browser backend."""
+class BrowserUseBrowserProvider(BrowserProvider):
+    """Browser Use (https://browser-use.com) cloud browser backend.

-    def provider_name(self) -> str:
+    Dual auth: prefers a direct BROWSER_USE_API_KEY when set, falling back
+    to the managed Nous tool gateway when ``tool_gateway.browser`` config
+    routes through it. Setting ``tool_gateway.browser: gateway`` flips the
+    order so managed billing wins even when BROWSER_USE_API_KEY is present.
+    """
+
+    @property
+    def name(self) -> str:
+        return "browser-use"
+
+    @property
+    def display_name(self) -> str:
        return "Browser Use"

-    def is_configured(self) -> bool:
+    def is_available(self) -> bool:
        return self._get_config_or_none() is not None

    # ------------------------------------------------------------------
@ -74,6 +126,14 @@ class BrowserUseProvider(CloudBrowserProvider):
    # ------------------------------------------------------------------

    def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
+        # Import here to avoid a hard dependency at module-import time —
+        # managed_tool_gateway pulls in the Nous auth stack which can be
+        # heavy and is not needed for direct-API-key users.
+        from tools.managed_tool_gateway import resolve_managed_tool_gateway
+        from tools.tool_backend_helpers import prefers_gateway
+
+        # Direct API key wins unless the user has explicitly opted into the
+        # managed Nous gateway via ``tool_gateway.browser: gateway``.
        api_key = os.environ.get("BROWSER_USE_API_KEY")
        if api_key and not prefers_gateway("browser"):
            return {
@ -93,6 +153,8 @@ class BrowserUseProvider(CloudBrowserProvider):
        }

    def _get_config(self) -> Dict[str, Any]:
+        from tools.tool_backend_helpers import managed_nous_tools_enabled
+
        config = self._get_config_or_none()
        if config is None:
            message = (
@ -111,11 +173,10 @@ class BrowserUseProvider(CloudBrowserProvider):
    # ------------------------------------------------------------------

    def _headers(self, config: Dict[str, Any]) -> Dict[str, str]:
-        headers = {
+        return {
            "Content-Type": "application/json",
            "X-Browser-Use-API-Key": config["api_key"],
        }
-        return headers

    def create_session(self, task_id: str) -> Dict[str, object]:
        config = self._get_config()
@ -166,7 +227,9 @@ class BrowserUseProvider(CloudBrowserProvider):
        if managed_mode:
            _clear_pending_create_key(task_id)
        session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
-        external_call_id = response.headers.get("x-external-call-id") if managed_mode else None
+        external_call_id = (
+            response.headers.get("x-external-call-id") if managed_mode else None
+        )

        logger.info("Created Browser Use session %s", session_name)

@ -184,7 +247,9 @@ class BrowserUseProvider(CloudBrowserProvider):
        try:
            config = self._get_config()
        except ValueError:
-            logger.warning("Cannot close Browser Use session %s — missing credentials", session_id)
+            logger.warning(
+                "Cannot close Browser Use session %s — missing credentials", session_id
+            )
            return False

        try:
@ -212,7 +277,10 @@ class BrowserUseProvider(CloudBrowserProvider):
    def emergency_cleanup(self, session_id: str) -> None:
        config = self._get_config_or_none()
        if config is None:
-            logger.warning("Cannot emergency-cleanup Browser Use session %s — missing credentials", session_id)
+            logger.warning(
+                "Cannot emergency-cleanup Browser Use session %s — missing credentials",
+                session_id,
+            )
            return
        try:
            requests.patch(
@ -222,4 +290,21 @@ class BrowserUseProvider(CloudBrowserProvider):
                timeout=5,
            )
        except Exception as e:
-            logger.debug("Emergency cleanup failed for Browser Use session %s: %s", session_id, e)
+            logger.debug(
+                "Emergency cleanup failed for Browser Use session %s: %s", session_id, e
+            )
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "Browser Use",
+            "badge": "paid",
+            "tag": "Cloud browser with remote execution",
+            "env_vars": [
+                {
+                    "key": "BROWSER_USE_API_KEY",
+                    "prompt": "Browser Use API key",
+                    "url": "https://browser-use.com",
+                },
+            ],
+            "post_setup": "agent_browser",
+        }
--- a/plugins/browser/browserbase/init.py
+++ b/plugins/browser/browserbase/init.py
@ -0,0 +1,15 @@
+"""Browserbase cloud browser plugin — bundled, auto-loaded.
+
+Mirrors the ``plugins/web/<vendor>/`` and ``plugins/image_gen/openai/``
+layout: ``provider.py`` holds the provider class; ``__init__.py::register``
+instantiates and registers it via the plugin context.
+"""
+
+from __future__ import annotations
+
+from plugins.browser.browserbase.provider import BrowserbaseBrowserProvider
+
+
+def register(ctx) -> None:
+    """Register the Browserbase provider with the plugin context."""
+    ctx.register_browser_provider(BrowserbaseBrowserProvider())
--- a/plugins/browser/browserbase/plugin.yaml
+++ b/plugins/browser/browserbase/plugin.yaml
@ -0,0 +1,7 @@
+name: browser-browserbase
+version: 1.0.0
+description: "Browserbase (https://browserbase.com) cloud browser backend. Requires BROWSERBASE_API_KEY + BROWSERBASE_PROJECT_ID. Supports stealth, proxies, and keep-alive sessions; auto-falls-back when paid features are unavailable."
+author: NousResearch
+kind: backend
+provides_browser_providers:
+  - browserbase
--- a/plugins/browser/browserbase/provider.py
+++ b/plugins/browser/browserbase/provider.py
@ -1,4 +1,35 @@
-"""Browserbase cloud browser provider (direct credentials only)."""
+"""Browserbase cloud browser provider — plugin form.
+
+Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
+ABC introduced in PR #25214). The legacy in-tree module
+``tools.browser_providers.browserbase`` was removed in the same PR; this file
+is now the canonical implementation.
+
+Browserbase requires direct ``BROWSERBASE_API_KEY`` and ``BROWSERBASE_PROJECT_ID``
+credentials. Managed Nous gateway support has been removed — the Nous
+subscription now routes through Browser Use instead (see
+``plugins/browser/browser_use/``).
+
+Config keys this provider responds to::
+
+    browser:
+      cloud_provider: "browserbase"
+
+Auth env vars::
+
+    BROWSERBASE_API_KEY=...       # https://browserbase.com
+    BROWSERBASE_PROJECT_ID=...
+
+Optional feature knobs::
+
+    BROWSERBASE_BASE_URL=...      # default https://api.browserbase.com
+    BROWSERBASE_PROXIES=true      # default true
+    BROWSERBASE_ADVANCED_STEALTH=false
+    BROWSERBASE_KEEP_ALIVE=true   # default true
+    BROWSERBASE_SESSION_TIMEOUT=... (ms, integer)
+"""
+
+from __future__ import annotations

 import logging
 import os
@ -7,27 +38,31 @@ from typing import Any, Dict, Optional

 import requests

-from tools.browser_providers.base import CloudBrowserProvider
+from agent.browser_provider import BrowserProvider

 logger = logging.getLogger(__name__)


-class BrowserbaseProvider(CloudBrowserProvider):
+class BrowserbaseBrowserProvider(BrowserProvider):
    """Browserbase (https://browserbase.com) cloud browser backend.

-    This provider requires direct BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID
-    credentials.  Managed Nous gateway support has been removed — the Nous
-    subscription now routes through Browser Use instead.
+    Direct credentials only — managed-Nous-gateway support lives on the
+    Browser Use provider now.
    """

-    def provider_name(self) -> str:
+    @property
+    def name(self) -> str:
+        return "browserbase"
+
+    @property
+    def display_name(self) -> str:
        return "Browserbase"

-    def is_configured(self) -> bool:
+    def is_available(self) -> bool:
        return self._get_config_or_none() is not None

    # ------------------------------------------------------------------
-    # Session lifecycle
+    # Config resolution
    # ------------------------------------------------------------------

    def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
@ -37,7 +72,9 @@ class BrowserbaseProvider(CloudBrowserProvider):
            return {
                "api_key": api_key,
                "project_id": project_id,
-                "base_url": os.environ.get("BROWSERBASE_BASE_URL", "https://api.browserbase.com").rstrip("/"),
+                "base_url": os.environ.get(
+                    "BROWSERBASE_BASE_URL", "https://api.browserbase.com"
+                ).rstrip("/"),
            }
        return None

@ -50,13 +87,21 @@ class BrowserbaseProvider(CloudBrowserProvider):
            )
        return config

+    # ------------------------------------------------------------------
+    # Session lifecycle
+    # ------------------------------------------------------------------
+
    def create_session(self, task_id: str) -> Dict[str, object]:
        config = self._get_config()

        # Optional env-var knobs
        enable_proxies = os.environ.get("BROWSERBASE_PROXIES", "true").lower() != "false"
-        enable_advanced_stealth = os.environ.get("BROWSERBASE_ADVANCED_STEALTH", "false").lower() == "true"
-        enable_keep_alive = os.environ.get("BROWSERBASE_KEEP_ALIVE", "true").lower() != "false"
+        enable_advanced_stealth = (
+            os.environ.get("BROWSERBASE_ADVANCED_STEALTH", "false").lower() == "true"
+        )
+        enable_keep_alive = (
+            os.environ.get("BROWSERBASE_KEEP_ALIVE", "true").lower() != "false"
+        )
        custom_timeout_ms = os.environ.get("BROWSERBASE_SESSION_TIMEOUT")

        features_enabled = {
@ -78,7 +123,9 @@ class BrowserbaseProvider(CloudBrowserProvider):
                if timeout_val > 0:
                    session_config["timeout"] = timeout_val
            except ValueError:
-                logger.warning("Invalid BROWSERBASE_SESSION_TIMEOUT value: %s", custom_timeout_ms)
+                logger.warning(
+                    "Invalid BROWSERBASE_SESSION_TIMEOUT value: %s", custom_timeout_ms
+                )

        if enable_proxies:
            session_config["proxies"] = True
@ -156,7 +203,9 @@ class BrowserbaseProvider(CloudBrowserProvider):
            features_enabled["custom_timeout"] = True

        feature_str = ", ".join(k for k, v in features_enabled.items() if v)
-        logger.info("Created Browserbase session %s with features: %s", session_name, feature_str)
+        logger.info(
+            "Created Browserbase session %s with features: %s", session_name, feature_str
+        )

        return {
            "session_name": session_name,
@ -169,7 +218,9 @@ class BrowserbaseProvider(CloudBrowserProvider):
        try:
            config = self._get_config()
        except ValueError:
-            logger.warning("Cannot close Browserbase session %s — missing credentials", session_id)
+            logger.warning(
+                "Cannot close Browserbase session %s — missing credentials", session_id
+            )
            return False

        try:
@ -203,7 +254,10 @@ class BrowserbaseProvider(CloudBrowserProvider):
    def emergency_cleanup(self, session_id: str) -> None:
        config = self._get_config_or_none()
        if config is None:
-            logger.warning("Cannot emergency-cleanup Browserbase session %s — missing credentials", session_id)
+            logger.warning(
+                "Cannot emergency-cleanup Browserbase session %s — missing credentials",
+                session_id,
+            )
            return
        try:
            requests.post(
@ -219,4 +273,25 @@ class BrowserbaseProvider(CloudBrowserProvider):
                timeout=5,
            )
        except Exception as e:
-            logger.debug("Emergency cleanup failed for Browserbase session %s: %s", session_id, e)
+            logger.debug(
+                "Emergency cleanup failed for Browserbase session %s: %s", session_id, e
+            )
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "Browserbase",
+            "badge": "paid",
+            "tag": "Cloud browser with stealth and proxies",
+            "env_vars": [
+                {
+                    "key": "BROWSERBASE_API_KEY",
+                    "prompt": "Browserbase API key",
+                    "url": "https://browserbase.com",
+                },
+                {
+                    "key": "BROWSERBASE_PROJECT_ID",
+                    "prompt": "Browserbase project ID",
+                },
+            ],
+            "post_setup": "agent_browser",
+        }
--- a/plugins/browser/firecrawl/init.py
+++ b/plugins/browser/firecrawl/init.py
@ -0,0 +1,16 @@
+"""Firecrawl cloud browser plugin — bundled, auto-loaded.
+
+Distinct from ``plugins/web/firecrawl/`` (the web search/extract/crawl
+plugin); both share the FIRECRAWL_API_KEY but speak to different endpoints
+(``/v2/browser`` here vs ``/v2/search`` / ``/v2/scrape`` / ``/v2/crawl``
+over there).
+"""
+
+from __future__ import annotations
+
+from plugins.browser.firecrawl.provider import FirecrawlBrowserProvider
+
+
+def register(ctx) -> None:
+    """Register the Firecrawl cloud-browser provider with the plugin context."""
+    ctx.register_browser_provider(FirecrawlBrowserProvider())
--- a/plugins/browser/firecrawl/plugin.yaml
+++ b/plugins/browser/firecrawl/plugin.yaml
@ -0,0 +1,7 @@
+name: browser-firecrawl
+version: 1.0.0
+description: "Firecrawl (https://firecrawl.dev) cloud browser backend. Requires FIRECRAWL_API_KEY. Distinct from the firecrawl WEB search/extract plugin — the two share an API key but operate on different endpoints."
+author: NousResearch
+kind: backend
+provides_browser_providers:
+  - firecrawl
--- a/plugins/browser/firecrawl/provider.py
+++ b/plugins/browser/firecrawl/provider.py
@ -1,26 +1,61 @@
-"""Firecrawl cloud browser provider."""
+"""Firecrawl cloud browser provider — plugin form.
+
+Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
+ABC introduced in PR #25214). The legacy in-tree module
+``tools.browser_providers.firecrawl`` was removed in the same PR; this file
+is now the canonical implementation.
+
+This is the cloud-browser path — distinct from the firecrawl WEB plugin at
+``plugins/web/firecrawl/`` which handles search/extract/crawl on
+``/v2/search`` / ``/v2/scrape`` / ``/v2/crawl``. The two plugins share the
+``FIRECRAWL_API_KEY`` env var but talk to different endpoints (this one
+hits ``/v2/browser``).
+
+Config keys this provider responds to::
+
+    browser:
+      cloud_provider: "firecrawl"   # explicit selection only — not in the
+                                    # legacy auto-detect walk
+
+Auth env vars::
+
+    FIRECRAWL_API_KEY=...           # https://firecrawl.dev
+    FIRECRAWL_API_URL=...           # optional override (default https://api.firecrawl.dev)
+    FIRECRAWL_BROWSER_TTL=...       # optional, default 300 seconds
+"""
+
+from __future__ import annotations

 import logging
 import os
 import uuid
-from typing import Dict
+from typing import Any, Dict

 import requests

-from tools.browser_providers.base import CloudBrowserProvider
+from agent.browser_provider import BrowserProvider

 logger = logging.getLogger(__name__)

 _BASE_URL = "https://api.firecrawl.dev"


-class FirecrawlProvider(CloudBrowserProvider):
-    """Firecrawl (https://firecrawl.dev) cloud browser backend."""
+class FirecrawlBrowserProvider(BrowserProvider):
+    """Firecrawl (https://firecrawl.dev) cloud browser backend.

-    def provider_name(self) -> str:
+    Cloud-browser path only — search/extract/crawl live in the separate
+    ``plugins/web/firecrawl/`` plugin.
+    """
+
+    @property
+    def name(self) -> str:
+        return "firecrawl"
+
+    @property
+    def display_name(self) -> str:
        return "Firecrawl"

-    def is_configured(self) -> bool:
+    def is_available(self) -> bool:
        return bool(os.environ.get("FIRECRAWL_API_KEY"))

    # ------------------------------------------------------------------
@ -100,13 +135,34 @@ class FirecrawlProvider(CloudBrowserProvider):
            return False

    def emergency_cleanup(self, session_id: str) -> None:
+        if not self.is_available():
+            logger.warning(
+                "Cannot emergency-cleanup Firecrawl session %s — missing credentials",
+                session_id,
+            )
+            return
        try:
            requests.delete(
                f"{self._api_url()}/v2/browser/{session_id}",
                headers=self._headers(),
                timeout=5,
            )
-        except ValueError:
-            logger.warning("Cannot emergency-cleanup Firecrawl session %s — missing credentials", session_id)
        except Exception as e:
-            logger.debug("Emergency cleanup failed for Firecrawl session %s: %s", session_id, e)
+            logger.debug(
+                "Emergency cleanup failed for Firecrawl session %s: %s", session_id, e
+            )
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "Firecrawl",
+            "badge": "paid",
+            "tag": "Cloud browser with remote execution",
+            "env_vars": [
+                {
+                    "key": "FIRECRAWL_API_KEY",
+                    "prompt": "Firecrawl API key",
+                    "url": "https://firecrawl.dev",
+                },
+            ],
+            "post_setup": "agent_browser",
+        }
--- a/plugins/disk-cleanup/init.py
+++ b/plugins/disk-cleanup/init.py
@ -222,7 +222,7 @@ def _fmt_summary(summary: Dict[str, Any]) -> str:

 def _handle_slash(raw_args: str) -> Optional[str]:
    argv = raw_args.strip().split()
-    if not argv or argv[0] in ("help", "-h", "--help"):
+    if not argv or argv[0] in {"help", "-h", "--help"}:
        return _HELP_TEXT

    sub = argv[0]
--- a/plugins/google_meet/init.py
+++ b/plugins/google_meet/init.py
@ -72,7 +72,7 @@ def register(ctx) -> None:
    # tested path there and guest-join Chromium is flakier. Refuse to register
    # rather than half-working.
    system = platform.system().lower()
-    if system not in ("linux", "darwin"):
+    if system not in {"linux", "darwin"}:
        logger.info(
            "google_meet plugin: platform=%s not supported (linux/macos only)",
            system,
--- a/plugins/google_meet/cli.py
+++ b/plugins/google_meet/cli.py
@ -159,7 +159,7 @@ def _cmd_setup() -> int:
    print("---------------------")

    system = _p.system()
-    system_ok = system in ("Linux", "Darwin")
+    system_ok = system in {"Linux", "Darwin"}
    print(f"  platform       : {system}  [{'ok' if system_ok else 'unsupported'}]")

    try:
@ -231,7 +231,7 @@ def _cmd_install(*, realtime: bool, assume_yes: bool) -> int:
    import subprocess as _sp

    system = _p.system()
-    if system not in ("Linux", "Darwin"):
+    if system not in {"Linux", "Darwin"}:
        print(f"google_meet install: {system} is not supported (linux/macos only)")
        return 1

@ -242,7 +242,7 @@ def _cmd_install(*, realtime: bool, assume_yes: bool) -> int:
            ans = input(f"{prompt} [y/N] ").strip().lower()
        except EOFError:
            return False
-        return ans in ("y", "yes")
+        return ans in {"y", "yes"}

    print("google_meet install")
    print("-------------------")
--- a/plugins/google_meet/meet_bot.py
+++ b/plugins/google_meet/meet_bot.py
@ -447,7 +447,7 @@ def _mac_audio_device_index(device_name: str) -> str:
 def run_bot() -> int:  # noqa: C901 — orchestration, explicit branches
    url = os.environ.get("HERMES_MEET_URL", "").strip()
    out_dir_env = os.environ.get("HERMES_MEET_OUT_DIR", "").strip()
-    headed = os.environ.get("HERMES_MEET_HEADED", "").lower() in ("1", "true", "yes")
+    headed = os.environ.get("HERMES_MEET_HEADED", "").lower() in {"1", "true", "yes"}
    auth_state = os.environ.get("HERMES_MEET_AUTH_STATE", "").strip()
    guest_name = os.environ.get("HERMES_MEET_GUEST_NAME", "Hermes Agent")
    duration_s = _parse_duration(os.environ.get("HERMES_MEET_DURATION", ""))
@ -808,7 +808,7 @@ def _looks_like_human_speaker(speaker: str, bot_guest_name: str) -> bool:
    if not speaker or not speaker.strip():
        return False
    spk = speaker.strip().lower()
-    if spk in ("unknown", "you", bot_guest_name.strip().lower()):
+    if spk in {"unknown", "you", bot_guest_name.strip().lower()}:
        return False
    return True

--- a/plugins/google_meet/node/cli.py
+++ b/plugins/google_meet/node/cli.py
@ -103,7 +103,7 @@ def node_command(args: argparse.Namespace) -> int:
        print(f"removed {args.name!r}" if ok else f"no such node: {args.name!r}")
        return 0 if ok else 1

-    if cmd in ("status", "ping"):
+    if cmd in {"status", "ping"}:
        entry = reg.get(args.name)
        if entry is None:
            print(f"no such node: {args.name!r}", file=sys.stderr)
--- a/plugins/google_meet/realtime/openai_client.py
+++ b/plugins/google_meet/realtime/openai_client.py
@ -183,7 +183,7 @@ class RealtimeSession:
                    rid = (frame.get("response") or {}).get("id")
                    if rid:
                        self._last_response_id = rid
-                elif ftype in ("response.done", "response.completed", "response.cancelled"):
+                elif ftype in {"response.done", "response.completed", "response.cancelled"}:
                    break
                elif ftype == "error":
                    err = frame.get("error") or frame
--- a/plugins/google_meet/tools.py
+++ b/plugins/google_meet/tools.py
@ -36,7 +36,7 @@ def check_meet_requirements() -> bool:
    handlers relax the requirement when a node is addressed.
    """
    import platform as _p
-    if _p.system().lower() not in ("linux", "darwin"):
+    if _p.system().lower() not in {"linux", "darwin"}:
        return False
    try:
        import playwright  # noqa: F401
@ -238,7 +238,7 @@ def handle_meet_join(args: Dict[str, Any], **_kw) -> str:
    if not url:
        return _err("url is required")
    mode = (args.get("mode") or "transcribe").strip().lower()
-    if mode not in ("transcribe", "realtime"):
+    if mode not in {"transcribe", "realtime"}:
        return _err(f"mode must be 'transcribe' or 'realtime' (got {mode!r})")

    node = args.get("node")
--- a/plugins/kanban/dashboard/dist/index.js
+++ b/plugins/kanban/dashboard/dist/index.js
@ -908,6 +908,7 @@
            return createNewBoard(payload).then(function () { setShowNewBoard(false); });
          },
        }) : null,
+        h(OrchestrationPanel, null),
        h(AttentionStrip, {
          boardData,
          onOpen: setSelectedTaskId,
@ -1386,6 +1387,288 @@
    }, "?");
  }

+  // ---------------------------------------------------------------------
+  // OrchestrationPanel — collapsible settings panel for the kanban
+  // orchestrator (orchestrator profile picker, default assignee picker,
+  // auto-decompose toggle, plus per-profile description editing with
+  // auto-generate). Backed by /orchestration + /profiles endpoints.
+  // ---------------------------------------------------------------------
+
+  function OrchestrationPanel() {
+    const [expanded, setExpanded] = useState(false);
+    const [settings, setSettings] = useState(null);
+    const [profiles, setProfiles] = useState([]);
+    const [busy, setBusy] = useState({});
+    const [msg, setMsg] = useState(null);
+
+    const loadAll = useCallback(function () {
+      Promise.all([
+        SDK.fetchJSON(`${API}/orchestration`),
+        SDK.fetchJSON(`${API}/profiles`),
+      ]).then(function (results) {
+        setSettings(results[0] || null);
+        setProfiles((results[1] && results[1].profiles) || []);
+        setMsg(null);
+      }).catch(function (err) {
+        setMsg({ ok: false, text: "Failed to load: " + (err.message || String(err)) });
+      });
+    }, []);
+
+    useEffect(function () {
+      // Load on mount so the collapsed pill shows the real mode without
+      // requiring the user to expand the panel first.
+      if (settings === null) loadAll();
+    }, [settings, loadAll]);
+
+    const saveSettings = function (patch) {
+      setMsg(null);
+      return SDK.fetchJSON(`${API}/orchestration`, {
+        method: "PUT",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify(patch),
+      }).then(function (res) {
+        setSettings(res);
+        setMsg({ ok: true, text: "Settings saved." });
+        return res;
+      }).catch(function (err) {
+        setMsg({ ok: false, text: "Save failed: " + (err.message || String(err)) });
+      });
+    };
+
+    const saveProfileDescription = function (name, description) {
+      setBusy(function (b) { return Object.assign({}, b, { [name]: "save" }); });
+      return SDK.fetchJSON(`${API}/profiles/${encodeURIComponent(name)}`, {
+        method: "PATCH",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ description: description }),
+      }).then(function () {
+        loadAll();
+        setMsg({ ok: true, text: `Description saved for ${name}.` });
+      }).catch(function (err) {
+        setMsg({ ok: false, text: "Save failed: " + (err.message || String(err)) });
+      }).then(function () {
+        setBusy(function (b) {
+          const next = Object.assign({}, b); delete next[name]; return next;
+        });
+      });
+    };
+
+    const autoGenerateDescription = function (name, overwrite) {
+      setBusy(function (b) { return Object.assign({}, b, { [name]: "auto" }); });
+      return SDK.fetchJSON(`${API}/profiles/${encodeURIComponent(name)}/describe-auto`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ overwrite: !!overwrite }),
+      }).then(function (res) {
+        if (res && res.ok) {
+          loadAll();
+          setMsg({ ok: true, text: `Auto-generated description for ${name}.` });
+        } else {
+          setMsg({
+            ok: false,
+            text: "Auto-generate failed: " + ((res && res.reason) || "unknown error"),
+          });
+        }
+      }).catch(function (err) {
+        setMsg({ ok: false, text: "Auto-generate failed: " + (err.message || String(err)) });
+      }).then(function () {
+        setBusy(function (b) {
+          const next = Object.assign({}, b); delete next[name]; return next;
+        });
+      });
+    };
+
+    const headerLabel = expanded
+      ? "▾ Orchestration settings"
+      : "▸ Orchestration settings";
+
+    // Mode pill — always visible (collapsed or expanded). One click flips
+    // between Auto and Manual. Auto = dispatcher decomposes new triage tasks
+    // every tick. Manual = pre-PR behavior, the user clicks ⚗ Decompose on
+    // each triage card (or runs `hermes kanban decompose <id>`) and tasks
+    // stay in triage until then.
+    const autoOn = !!(settings && settings.auto_decompose);
+    const modePillTitle = settings === null
+      ? "Loading mode…"
+      : (autoOn
+          ? "Orchestration: Auto — the dispatcher decomposes new triage tasks automatically every tick. Click to switch to Manual (pre-PR behavior)."
+          : "Orchestration: Manual — triage tasks stay in triage until you click ⚗ Decompose on each card. Click to switch to Auto.");
+    const modePill = h("button", {
+      type: "button",
+      onClick: function () {
+        if (settings === null) return;  // not loaded yet
+        saveSettings({ auto_decompose: !autoOn });
+      },
+      disabled: settings === null,
+      title: modePillTitle,
+      className: "inline-flex items-center gap-1 rounded-full border px-2 py-0.5 "
+                 + "text-xs font-medium "
+                 + (autoOn
+                    ? "border-emerald-500/40 bg-emerald-500/10 text-emerald-700 dark:text-emerald-300"
+                    : "border-muted-foreground/30 bg-muted/30 text-muted-foreground"),
+    },
+      "Orchestration: ",
+      h("span", { className: "ml-1 font-semibold" },
+        settings === null ? "…" : (autoOn ? "Auto" : "Manual"))
+    );
+
+    if (!expanded) {
+      return h("div", { className: "flex items-center gap-3 text-xs" },
+        modePill,
+        h("button", {
+          type: "button",
+          onClick: function () { setExpanded(true); },
+          className: "underline text-muted-foreground hover:text-foreground",
+          title: "Configure the kanban orchestrator (profile picker, default assignee, auto-decompose, profile descriptions)",
+        }, headerLabel),
+      );
+    }
+
+    const profileOptions = profiles.map(function (p) {
+      const tag = p.is_default ? " (default)" : "";
+      return h(SelectOption, { key: p.name, value: p.name }, p.name + tag);
+    });
+
+    return h(Card, { className: "p-3" },
+      h(CardContent, { className: "p-2 flex flex-col gap-3" },
+        h("div", { className: "flex items-center justify-between" },
+          h("button", {
+            type: "button",
+            onClick: function () { setExpanded(false); },
+            className: "text-sm font-medium underline-offset-2 hover:underline",
+          }, headerLabel),
+          modePill,
+          h(Button, { onClick: loadAll, size: "sm" }, "Reload"),
+        ),
+        msg ? h("div", {
+          className: msg.ok ? "hermes-kanban-msg-ok" : "hermes-kanban-msg-err",
+        }, msg.text) : null,
+
+        settings ? h("div", { className: "grid gap-3 sm:grid-cols-3" },
+          h("div", { className: "flex flex-col gap-1" },
+            h(Label, { className: "text-xs text-muted-foreground" },
+              "Orchestrator profile"),
+            h(Select, {
+              value: settings.orchestrator_profile || "",
+              className: "h-8",
+              onChange: function (e) {
+                const v = (e && e.target ? e.target.value : e) || "";
+                saveSettings({ orchestrator_profile: v });
+              },
+            },
+              h(SelectOption, { value: "" },
+                "(default: " + (settings.active_profile || "default") + ")"),
+              profileOptions,
+            ),
+            h("div", { className: "text-[10px] text-muted-foreground" },
+              "Resolved: " + (settings.resolved_orchestrator_profile || "default")),
+          ),
+          h("div", { className: "flex flex-col gap-1" },
+            h(Label, { className: "text-xs text-muted-foreground" },
+              "Default assignee"),
+            h(Select, {
+              value: settings.default_assignee || "",
+              className: "h-8",
+              onChange: function (e) {
+                const v = (e && e.target ? e.target.value : e) || "";
+                saveSettings({ default_assignee: v });
+              },
+            },
+              h(SelectOption, { value: "" },
+                "(default: " + (settings.active_profile || "default") + ")"),
+              profileOptions,
+            ),
+            h("div", { className: "text-[10px] text-muted-foreground" },
+              "Resolved: " + (settings.resolved_default_assignee || "default")),
+          ),
+          h("div", { className: "flex flex-col gap-1" },
+            h(Label, { className: "text-xs text-muted-foreground" },
+              "Orchestration mode"),
+            h("label", { className: "flex items-center gap-2 text-xs h-8" },
+              h("input", {
+                type: "checkbox",
+                checked: !!settings.auto_decompose,
+                onChange: function (e) {
+                  saveSettings({ auto_decompose: !!e.target.checked });
+                },
+              }),
+              settings.auto_decompose ? "Auto (default)" : "Manual",
+            ),
+            h("div", { className: "text-[10px] text-muted-foreground" },
+              "When on, the dispatcher decomposes new triage tasks automatically."),
+          ),
+        ) : h("div", { className: "text-xs text-muted-foreground" },
+          "Loading…"),
+
+        h("div", { className: "border-t pt-3" },
+          h(Label, { className: "text-xs text-muted-foreground" },
+            "Profile descriptions"),
+          h("div", { className: "text-[10px] text-muted-foreground pb-2" },
+            "Descriptions guide the orchestrator's routing. Click ⚗ to auto-generate, or edit and save."),
+          profiles.length === 0
+            ? h("div", { className: "text-xs text-muted-foreground" }, "No profiles installed.")
+            : h("div", { className: "flex flex-col gap-2" },
+                profiles.map(function (p) {
+                  return h(ProfileDescriptionRow, {
+                    key: p.name,
+                    profile: p,
+                    busy: busy[p.name] || null,
+                    onSave: saveProfileDescription,
+                    onAuto: autoGenerateDescription,
+                  });
+                }),
+              ),
+        ),
+      ),
+    );
+  }
+
+  function ProfileDescriptionRow(props) {
+    const p = props.profile;
+    const [draft, setDraft] = useState(p.description || "");
+    const busy = props.busy;
+    // Re-sync the local draft if the server-side description changes (e.g.
+    // after auto-generate). Cheap because re-runs only happen on prop change.
+    useEffect(function () {
+      setDraft(p.description || "");
+    }, [p.description]);
+
+    const tag = p.description_auto && p.description ? " [auto, review]" : "";
+    return h("div", { className: "flex flex-col gap-1 border-l-2 pl-2",
+      style: { borderColor: p.description ? "#888" : "#cc6" } },
+      h("div", { className: "flex items-center gap-2 text-xs" },
+        h("span", { className: "font-medium" }, p.name),
+        p.is_default ? h("span", { className: "text-[10px] text-muted-foreground" }, "(default)") : null,
+        p.description_auto && p.description
+          ? h("span", { className: "text-[10px] text-yellow-600" }, "auto — review")
+          : null,
+        !p.description
+          ? h("span", { className: "text-[10px] text-yellow-600" }, "⚠ no description")
+          : null,
+      ),
+      h("div", { className: "flex items-center gap-2" },
+        h(Input, {
+          value: draft,
+          onChange: function (e) { setDraft(e.target.value); },
+          placeholder: "What is this profile good at?",
+          className: "h-7 text-xs flex-1",
+        }),
+        h(Button, {
+          onClick: function () { props.onSave(p.name, draft); },
+          size: "sm",
+          disabled: !!busy || draft === (p.description || ""),
+          title: "Save the description above as user-authored",
+        }, busy === "save" ? "Saving…" : "Save"),
+        h(Button, {
+          onClick: function () { props.onAuto(p.name, true); },
+          size: "sm",
+          disabled: !!busy,
+          title: "Auto-generate a description from this profile's skills and model",
+        }, busy === "auto" ? "Generating…" : "⚗ Auto"),
+      ),
+    );
+  }
+
  function BoardSwitcher(props) {
    const { t } = useI18n();
    const list = props.boardList || [];
@ -2395,6 +2678,25 @@
      });
    };

+    // POST /tasks/:id/decompose — fan a triage task out into a graph
+    // of child tasks routed to specialist profiles by description.
+    // Refreshes both the drawer (so the user sees the root flip to
+    // todo) and the board (so the new children appear in the columns).
+    const doDecompose = function () {
+      return SDK.fetchJSON(
+        withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}/decompose`, boardSlug),
+        {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({}),
+        }
+      ).then(function (res) {
+        load();
+        props.onRefresh();
+        return res;
+      });
+    };
+
    const addLink = function (parentId) {
      return SDK.fetchJSON(withBoard(`${API}/links`, boardSlug), {
        method: "POST",
@ -2486,6 +2788,7 @@
          boardSlug: boardSlug,
          onPatch: doPatch,
          onSpecify: doSpecify,
+          onDecompose: doDecompose,
          onAddParent: addLink,
          onRemoveParent: removeLink,
          onAddChild: addChild,
@ -2559,6 +2862,7 @@
        task: t,
        onPatch: props.onPatch,
        onSpecify: props.onSpecify,
+        onDecompose: props.onDecompose,
      }),
      h(DiagnosticsSection, {
        task: t,
@ -3023,6 +3327,8 @@
    const task = props.task;
    const [specifyBusy, setSpecifyBusy] = useState(false);
    const [specifyMsg, setSpecifyMsg] = useState(null);
+    const [decomposeBusy, setDecomposeBusy] = useState(false);
+    const [decomposeMsg, setDecomposeMsg] = useState(null);
    const b = function (label, patch, enabled, confirmMsg) {
      return h(Button, {
        onClick: function () { if (enabled !== false) props.onPatch(patch, { confirm: confirmMsg }); },
@ -3067,9 +3373,57 @@
        }, specifyBusy ? "Specifying…" : "✨ Specify")
      : null;

+    // "Decompose" is the orchestrator-driven fan-out. Like Specify, only
+    // makes sense on triage-column tasks — elsewhere the backend short-
+    // circuits with ok:false. When the orchestrator returns fanout:false
+    // we render the same single-task message as Specify; when it fans
+    // out we report the child count for quick at-a-glance verification.
+    const decomposeButton = (task.status === "triage" && props.onDecompose)
+      ? h(Button, {
+          onClick: function () {
+            if (decomposeBusy) return;
+            setDecomposeBusy(true);
+            setDecomposeMsg(null);
+            props.onDecompose().then(function (res) {
+              if (res && res.ok) {
+                if (res.fanout && res.child_ids && res.child_ids.length) {
+                  setDecomposeMsg({
+                    ok: true,
+                    text: `Decomposed into ${res.child_ids.length} children: ${res.child_ids.join(", ")}`,
+                  });
+                } else {
+                  const suffix = res.new_title
+                    ? ` — retitled: ${res.new_title}`
+                    : "";
+                  setDecomposeMsg({
+                    ok: true,
+                    text: `Single task (no fanout)${suffix}`,
+                  });
+                }
+              } else {
+                setDecomposeMsg({
+                  ok: false,
+                  text: "Decompose failed: " + ((res && res.reason) || "unknown error"),
+                });
+              }
+            }).catch(function (err) {
+              setDecomposeMsg({
+                ok: false,
+                text: "Decompose failed: " + (err.message || String(err)),
+              });
+            }).then(function () {
+              setDecomposeBusy(false);
+            });
+          },
+          disabled: decomposeBusy,
+          size: "sm",
+        }, decomposeBusy ? "Decomposing…" : "⚗ Decompose")
+      : null;
+
    return h("div", null,
      h("div", { className: "hermes-kanban-actions" },
        specifyButton,
+        decomposeButton,
        b("→ triage",  { status: "triage" },   task.status !== "triage"),
        b("→ ready",   { status: "ready" },    task.status !== "ready"),
        // No direct → running button: /tasks/:id PATCH rejects status=running
@ -3091,6 +3445,11 @@
          ? "hermes-kanban-msg-ok"
          : "hermes-kanban-msg-err",
      }, specifyMsg.text) : null,
+      decomposeMsg ? h("div", {
+        className: decomposeMsg.ok
+          ? "hermes-kanban-msg-ok"
+          : "hermes-kanban-msg-err",
+      }, decomposeMsg.text) : null,
    );
  }

--- a/plugins/kanban/dashboard/plugin_api.py
+++ b/plugins/kanban/dashboard/plugin_api.py
@ -628,7 +628,7 @@ def update_task(task_id: str, payload: UpdateTaskBody, board: Optional[str] = Qu
                    status_code=400,
                    detail="Cannot set status to 'running' directly; use the dispatcher/claim path",
                )
-            elif s in ("todo", "triage"):
+            elif s in {"todo", "triage"}:
                ok = _set_status_direct(conn, task_id, s)
            else:
                raise HTTPException(status_code=400, detail=f"unknown status: {s}")
@ -742,7 +742,7 @@ def _set_status_direct(
            (task_id, run_id, json.dumps({"status": new_status}), int(time.time())),
        )
    # If we re-opened something, children may have gone stale.
-    if new_status in ("done", "ready"):
+    if new_status in {"done", "ready"}:
        kanban_db.recompute_ready(conn)
    return True

@ -868,7 +868,7 @@ def bulk_update(payload: BulkTaskBody, board: Optional[str] = Query(None)):
                            ok = kanban_db.unblock_task(conn, tid)
                        else:
                            ok = _set_status_direct(conn, tid, "ready")
-                    elif s in ("todo", "running", "triage"):
+                    elif s in {"todo", "running", "triage"}:
                        ok = _set_status_direct(conn, tid, s)
                    else:
                        entry.update(ok=False, error=f"unknown status {s!r}")
@ -1535,6 +1535,279 @@ def switch_board(slug: str):
 _EVENT_POLL_SECONDS = 0.3


+# ---------------------------------------------------------------------------
+# Profile metadata & description editing (consumed by the kanban orchestrator)
+# ---------------------------------------------------------------------------
+
+class DescribeBody(BaseModel):
+    description: Optional[str] = None  # explicit user-authored text
+
+
+class DescribeAutoBody(BaseModel):
+    overwrite: bool = False
+
+
+@router.get("/profiles")
+def list_profile_roster():
+    """Return every installed profile with its description.
+
+    Consumed by the dashboard's settings panel (orchestrator picker)
+    and the profile-description editing UI. Profiles without a
+    description still appear here — they're routable on name alone,
+    just less precisely.
+    """
+    try:
+        from hermes_cli import profiles as profiles_mod
+        profiles = profiles_mod.list_profiles()
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=f"failed to list profiles: {exc}")
+    return {
+        "profiles": [
+            {
+                "name": p.name,
+                "is_default": bool(p.is_default),
+                "model": p.model or "",
+                "provider": p.provider or "",
+                "description": p.description or "",
+                "description_auto": bool(p.description_auto),
+                "skill_count": int(p.skill_count or 0),
+            }
+            for p in profiles
+        ],
+    }
+
+
+@router.patch("/profiles/{profile_name}")
+def update_profile_description(profile_name: str, payload: DescribeBody):
+    """Set or clear the description of a profile.
+
+    Empty string clears the description; non-empty stores it as a
+    user-authored description (``description_auto: false``) so the
+    auto-describer won't overwrite it on a sweep without
+    ``--overwrite``.
+    """
+    try:
+        from hermes_cli import profiles as profiles_mod
+        canon = profiles_mod.normalize_profile_name(profile_name)
+        if canon == "default":
+            from hermes_constants import get_hermes_home  # type: ignore
+            from pathlib import Path as _Path
+            profile_dir = _Path(get_hermes_home())
+        else:
+            profile_dir = profiles_mod.get_profile_dir(canon)
+        if not profile_dir.is_dir():
+            raise HTTPException(status_code=404, detail=f"profile '{profile_name}' not found")
+        text = (payload.description or "").strip()
+        profiles_mod.write_profile_meta(
+            profile_dir,
+            description=text,
+            description_auto=False,
+        )
+    except HTTPException:
+        raise
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=f"failed to update profile: {exc}")
+    return {"ok": True, "profile": canon, "description": text}
+
+
+@router.post("/profiles/{profile_name}/describe-auto")
+def auto_describe_profile(profile_name: str, payload: DescribeAutoBody):
+    """Generate a description for the named profile via the auxiliary
+    LLM (``auxiliary.profile_describer``). Persists with
+    ``description_auto: true`` so the dashboard can surface a "review"
+    badge.
+
+    Maps 1:1 to ``hermes profile describe <name> --auto``. Non-OK
+    outcomes are NOT HTTP errors — the UI renders the reason inline
+    (e.g. "no auxiliary client configured") so the operator can fix
+    config and retry without a page reload.
+    """
+    try:
+        from hermes_cli import profile_describer  # noqa: WPS433 (intentional)
+        outcome = profile_describer.describe_profile(
+            profile_name,
+            overwrite=bool(payload.overwrite),
+        )
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=f"describer crashed: {exc}")
+    return {
+        "ok": bool(outcome.ok),
+        "profile": outcome.profile_name,
+        "reason": outcome.reason,
+        "description": outcome.description,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Decompose endpoint (orchestrator-driven fan-out)
+# ---------------------------------------------------------------------------
+
+class DecomposeBody(BaseModel):
+    author: Optional[str] = None
+
+
+@router.post("/tasks/{task_id}/decompose")
+def decompose_task_endpoint(
+    task_id: str,
+    payload: DecomposeBody,
+    board: Optional[str] = Query(None),
+):
+    """Fan a triage-column task out into a graph of child tasks via the
+    auxiliary LLM, routed to specialist profiles by description. Maps
+    1:1 to ``hermes kanban decompose <task_id>``.
+
+    Returns the outcome shape used by the CLI: ``{ok, task_id, reason,
+    fanout, child_ids, new_title}``. A non-OK outcome is NOT an HTTP
+    error — the UI renders the reason inline.
+
+    Runs in FastAPI's threadpool (sync ``def``) because the LLM call
+    can take minutes on reasoning models.
+    """
+    board = _resolve_board(board)
+    prev_env = os.environ.get("HERMES_KANBAN_BOARD")
+    try:
+        os.environ["HERMES_KANBAN_BOARD"] = board or kanban_db.DEFAULT_BOARD
+        from hermes_cli import kanban_decompose  # noqa: WPS433 (intentional)
+        outcome = kanban_decompose.decompose_task(
+            task_id,
+            author=(payload.author or None),
+        )
+    finally:
+        if prev_env is None:
+            os.environ.pop("HERMES_KANBAN_BOARD", None)
+        else:
+            os.environ["HERMES_KANBAN_BOARD"] = prev_env
+
+    return {
+        "ok": bool(outcome.ok),
+        "task_id": outcome.task_id,
+        "reason": outcome.reason,
+        "fanout": bool(outcome.fanout),
+        "child_ids": outcome.child_ids or [],
+        "new_title": outcome.new_title,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Orchestration settings (kanban.orchestrator_profile / default_assignee /
+# auto_decompose) — surfaced to the dashboard's settings panel
+# ---------------------------------------------------------------------------
+
+class OrchestrationSettingsBody(BaseModel):
+    orchestrator_profile: Optional[str] = None
+    default_assignee: Optional[str] = None
+    auto_decompose: Optional[bool] = None
+
+
+@router.get("/orchestration")
+def get_orchestration_settings():
+    """Return the current kanban orchestration knobs from config.yaml
+    plus the resolved effective values (filling in fallbacks)."""
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config() or {}
+    except Exception:
+        cfg = {}
+    kanban_cfg = (cfg.get("kanban") or {}) if isinstance(cfg, dict) else {}
+    explicit_orch = (kanban_cfg.get("orchestrator_profile") or "").strip()
+    explicit_default = (kanban_cfg.get("default_assignee") or "").strip()
+    auto_decompose = bool(kanban_cfg.get("auto_decompose", True))
+
+    # Resolve fallbacks the same way the decomposer does.
+    resolved_orch = explicit_orch
+    resolved_default = explicit_default
+    try:
+        from hermes_cli import profiles as profiles_mod
+        active_default = profiles_mod.get_active_profile_name() or "default"
+        if not resolved_orch or not profiles_mod.profile_exists(resolved_orch):
+            resolved_orch = active_default
+        if not resolved_default or not profiles_mod.profile_exists(resolved_default):
+            resolved_default = active_default
+    except Exception:
+        active_default = "default"
+        if not resolved_orch:
+            resolved_orch = active_default
+        if not resolved_default:
+            resolved_default = active_default
+
+    return {
+        "orchestrator_profile": explicit_orch,
+        "default_assignee": explicit_default,
+        "auto_decompose": auto_decompose,
+        "resolved_orchestrator_profile": resolved_orch,
+        "resolved_default_assignee": resolved_default,
+        "active_profile": active_default,
+    }
+
+
+@router.put("/orchestration")
+def set_orchestration_settings(payload: OrchestrationSettingsBody):
+    """Update the kanban orchestration knobs in ~/.hermes/config.yaml.
+
+    Each field is optional — only fields explicitly passed are
+    written. ``orchestrator_profile`` / ``default_assignee`` accept
+    empty strings to clear the override and fall back to the default
+    profile.
+    """
+    try:
+        from hermes_cli.config import load_config, save_config
+        cfg = load_config() or {}
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=f"failed to load config: {exc}")
+
+    kanban_section = cfg.setdefault("kanban", {})
+    if not isinstance(kanban_section, dict):
+        kanban_section = {}
+        cfg["kanban"] = kanban_section
+
+    # Validate any non-empty profile names exist before saving.
+    try:
+        from hermes_cli import profiles as profiles_mod
+    except Exception:
+        profiles_mod = None  # type: ignore
+
+    if payload.orchestrator_profile is not None:
+        name = (payload.orchestrator_profile or "").strip()
+        if name and profiles_mod is not None:
+            try:
+                if not profiles_mod.profile_exists(name):
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"profile '{name}' does not exist",
+                    )
+            except HTTPException:
+                raise
+            except Exception:
+                pass  # fail open if the lookup itself errors
+        kanban_section["orchestrator_profile"] = name
+
+    if payload.default_assignee is not None:
+        name = (payload.default_assignee or "").strip()
+        if name and profiles_mod is not None:
+            try:
+                if not profiles_mod.profile_exists(name):
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"profile '{name}' does not exist",
+                    )
+            except HTTPException:
+                raise
+            except Exception:
+                pass
+        kanban_section["default_assignee"] = name
+
+    if payload.auto_decompose is not None:
+        kanban_section["auto_decompose"] = bool(payload.auto_decompose)
+
+    try:
+        save_config(cfg)
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=f"failed to save config: {exc}")
+
+    # Echo back the resolved state (callers usually re-render from it).
+    return get_orchestration_settings()
+
+
@router.websocket("/events")
 async def stream_events(ws: WebSocket):
    # Enforce the dashboard session token as a query param — browsers can't
--- a/plugins/memory/byterover/init.py
+++ b/plugins/memory/byterover/init.py
@ -263,7 +263,7 @@ class ByteRoverMemoryProvider(MemoryProvider):

    def on_memory_write(self, action: str, target: str, content: str) -> None:
        """Mirror built-in memory writes to ByteRover."""
-        if action not in ("add", "replace") or not content:
+        if action not in {"add", "replace"} or not content:
            return

        def _write():
@ -289,7 +289,7 @@ class ByteRoverMemoryProvider(MemoryProvider):
        for msg in messages[-10:]:  # last 10 messages
            role = msg.get("role", "")
            content = msg.get("content", "")
-            if isinstance(content, str) and content.strip() and role in ("user", "assistant"):
+            if isinstance(content, str) and content.strip() and role in {"user", "assistant"}:
                parts.append(f"{role}: {content[:500]}")

        if not parts:
--- a/plugins/memory/hindsight/init.py
+++ b/plugins/memory/hindsight/init.py
@ -416,7 +416,7 @@ def _build_embedded_profile_env(config: dict[str, Any], *, llm_api_key: str | No
    current_base_url = config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "")

    # The embedded daemon expects OpenAI wire format for these providers.
-    daemon_provider = "openai" if current_provider in ("openai_compatible", "openrouter") else current_provider
+    daemon_provider = "openai" if current_provider in {"openai_compatible", "openrouter"} else current_provider

    env_values = {
        "HINDSIGHT_API_LLM_PROVIDER": str(daemon_provider),
@ -596,7 +596,7 @@ class HindsightMemoryProvider(MemoryProvider):
        try:
            cfg = _load_config()
            mode = cfg.get("mode", "cloud")
-            if mode in ("local", "local_embedded"):
+            if mode in {"local", "local_embedded"}:
                available, _ = _check_local_runtime()
                return available
            if mode == "local_external":
@ -888,7 +888,7 @@ class HindsightMemoryProvider(MemoryProvider):
                from hindsight import HindsightEmbedded
                HindsightEmbedded.__del__ = lambda self: None
                llm_provider = self._config.get("llm_provider", "")
-                if llm_provider in ("openai_compatible", "openrouter"):
+                if llm_provider in {"openai_compatible", "openrouter"}:
                    llm_provider = "openai"
                logger.debug("Creating HindsightEmbedded client (profile=%s, provider=%s)",
                             self._config.get("profile", "hermes"), llm_provider)
@ -1132,7 +1132,7 @@ class HindsightMemoryProvider(MemoryProvider):
                self._mode = "disabled"
                return
        self._api_key = self._config.get("apiKey") or self._config.get("api_key") or os.environ.get("HINDSIGHT_API_KEY", "")
-        default_url = _DEFAULT_LOCAL_URL if self._mode in ("local_embedded", "local_external") else _DEFAULT_API_URL
+        default_url = _DEFAULT_LOCAL_URL if self._mode in {"local_embedded", "local_external"} else _DEFAULT_API_URL
        self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url)
        self._llm_base_url = self._config.get("llm_base_url", "")

@ -1152,10 +1152,10 @@ class HindsightMemoryProvider(MemoryProvider):
        self._budget = budget if budget in _VALID_BUDGETS else "mid"

        memory_mode = self._config.get("memory_mode", "hybrid")
-        self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid"
+        self._memory_mode = memory_mode if memory_mode in {"context", "tools", "hybrid"} else "hybrid"

        prefetch_method = self._config.get("recall_prefetch_method") or self._config.get("prefetch_method", "recall")
-        self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall"
+        self._prefetch_method = prefetch_method if prefetch_method in {"recall", "reflect"} else "recall"

        # Bank options
        self._bank_mission = self._config.get("bank_mission", "")
--- a/Show more
+++ b/Show more