Merge branch 'main' into docker_s6

2026-07-15 14:22:43 +00:00 · 2026-05-25 09:39:27 +10:00 · 2026-05-25 09:39:27 +10:00 · 59da190512
commit 59da190512
parent 0988ab83b7 9c08070703
417 changed files with 26434 additions and 3321 deletions
--- a/.github/workflows/supply-chain-audit.yml
+++ b/.github/workflows/supply-chain-audit.yml
@ -47,14 +47,17 @@ jobs:
          HEAD="${{ github.event.pull_request.head.sha }}"

          # Added lines only, excluding lockfiles.
-          DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)
+          # Three-dot diff (base...head) diffs from the merge base to HEAD,
+          # so only changes introduced by this PR are included — not changes
+          # that landed on main after the PR branched off.
+          DIFF=$(git diff "$BASE"..."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)

          FINDINGS=""

          # --- .pth files (auto-execute on Python startup) ---
          # The exact mechanism used in the litellm supply chain attack:
          # https://github.com/BerriAI/litellm/issues/24512
-          PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true)
+          PTH_FILES=$(git diff --name-only "$BASE"..."$HEAD" | grep '\.pth$' || true)
          if [ -n "$PTH_FILES" ]; then
            FINDINGS="${FINDINGS}
          ### 🚨 CRITICAL: .pth file added or modified
@ -97,7 +100,7 @@ jobs:

          # --- Install-hook files (setup.py/sitecustomize/usercustomize/__init__.pth) ---
          # These execute during pip install or interpreter startup.
-          SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(^|/)(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true)
+          SETUP_HITS=$(git diff --name-only "$BASE"..."$HEAD" | grep -E '(^|/)(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true)
          if [ -n "$SETUP_HITS" ]; then
            FINDINGS="${FINDINGS}
          ### 🚨 CRITICAL: Install-hook file added or modified
@ -158,7 +161,7 @@ jobs:
          HEAD="${{ github.event.pull_request.head.sha }}"

          # Only check added lines in pyproject.toml
-          ADDED=$(git diff "$BASE".."$HEAD" -- pyproject.toml | grep '^+' | grep -v '^+++' || true)
+          ADDED=$(git diff "$BASE"..."$HEAD" -- pyproject.toml | grep '^+' | grep -v '^+++' || true)

          if [ -z "$ADDED" ]; then
            echo "found=false" >> "$GITHUB_OUTPUT"
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -23,11 +23,22 @@ concurrency:
 jobs:
  test:
    runs-on: ubuntu-latest
-    timeout-minutes: 60
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        slice: [1, 2, 3, 4, 5, 6]
    steps:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

+      - name: Restore duration cache
+        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae  # v5.0.5
+        with:
+          path: test_durations.json
+          # Single stable key. main always overwrites, PRs always find it.
+          key: test-durations
+
      - name: Install ripgrep (prebuilt binary)
        run: |
          set -euo pipefail
@ -54,7 +65,7 @@ jobs:
          source .venv/bin/activate
          uv pip install -e ".[all,dev]"

-      - name: Run tests
+      - name: Run tests (slice ${{ matrix.slice }}/6)
        # Per-file isolation via scripts/run_tests_parallel.py: discovers
        # every test_*.py file under tests/ (excluding integration/ + e2e/),
        # then runs `python -m pytest <file>` in a freshly-spawned subprocess
@ -72,15 +83,61 @@ jobs:
        # state across files, which is exactly the leakage we wanted to
        # fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
        # the job with cleaner semantics.
+        #
+        # Matrix slicing (--slice I/N): files are distributed across 6
+        # jobs by cached duration (LPT algorithm) so each job gets
+        # roughly equal wall time. Without a cache, files default to 2s
+        # estimate and get split roughly evenly by count — still correct,
+        # just not perfectly balanced.
        run: |
          source .venv/bin/activate
-          python scripts/run_tests_parallel.py
+          python scripts/run_tests_parallel.py --slice ${{ matrix.slice }}/6
        env:
          # Ensure tests don't accidentally call real APIs
          OPENROUTER_API_KEY: ""
          OPENAI_API_KEY: ""
          NOUS_API_KEY: ""

+      - name: Upload per-slice durations
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
+        with:
+          name: test-durations-slice-${{ matrix.slice }}
+          path: test_durations.json
+          retention-days: 1
+
+  # Merge per-slice duration data into a single cache, so future runs
+  # (including PRs) get balanced slicing.
+  save-durations:
+    needs: test
+    if: always() && github.ref == 'refs/heads/main'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Download all slice durations
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
+        with:
+          pattern: test-durations-slice-*
+          path: durations
+          merge-multiple: true
+
+      - name: Merge into single durations file
+        run: |
+          python3 -c "
+          import json, glob, os
+          merged = {}
+          for f in glob.glob('durations/*test_durations.json'):
+            with open(f) as fh:
+              merged.update(json.load(fh))
+          with open('test_durations.json', 'w') as fh:
+            json.dump(merged, fh, indent=2, sort_keys=True)
+          print(f'Merged {len(merged)} file durations')
+          "
+
+      - name: Save merged duration cache
+        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae  # v5.0.5
+        with:
+          path: test_durations.json
+          key: test-durations
+
  e2e:
    runs-on: ubuntu-latest
    timeout-minutes: 15
@ -121,4 +178,4 @@ jobs:
        env:
          OPENROUTER_API_KEY: ""
          OPENAI_API_KEY: ""
-          NOUS_API_KEY: ""
+          NOUS_API_KEY: ""
--- a/.gitignore
+++ b/.gitignore
@ -18,6 +18,7 @@ __pycache__/web_tools.cpython-310.pyc
 logs/
 data/
 .pytest_cache/
+test_durations.json
 .pytest-cache/
 tmp/
 temp_vision_images/
--- a/README.md
+++ b/README.md
@ -79,6 +79,27 @@ hermes doctor       # Diagnose any issues

 📖 **[Full documentation →](https://hermes-agent.nousresearch.com/docs/)**

+---
+
+## Skip the API-key collection — Nous Portal
+
+Hermes works with whatever provider you want — that's not changing. But if you'd rather not collect five separate API keys for the model, web search, image generation, TTS, and a cloud browser, **[Nous Portal](https://portal.nousresearch.com)** covers all of them under one subscription:
+
+- **300+ models** — pick any of them with `/model <name>`
+- **Tool Gateway** — web search (Firecrawl), image generation (FAL), text-to-speech (OpenAI), cloud browser (Browser Use), all routed through your sub. No extra accounts.
+
+One command from a fresh install:
+
+```bash
+hermes setup --portal
+```
+
+That logs you in via OAuth, sets Nous as your provider, and turns on the Tool Gateway. Check what's wired up any time with `hermes portal status`. Full details on the [Tool Gateway docs page](https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway).
+
+You can still bring your own keys per-tool whenever you want — the gateway is per-backend, not all-or-nothing.
+
+---
+
 ## CLI vs Messaging Quick Reference

 Hermes has two entry points: start the terminal UI with `hermes`, or run the gateway and talk to it from Telegram, Discord, Slack, WhatsApp, Signal, or Email. Once you're in a conversation, many slash commands are shared across both interfaces.
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@ -65,6 +65,27 @@ hermes doctor       # 诊断问题

 📖 **[完整文档 →](https://hermes-agent.nousresearch.com/docs/)**

+---
+
+## 省去到处收集 API Key — Nous Portal
+
+Hermes 始终允许你使用任意服务商，这点不会改变。但如果你不想为模型、网页搜索、图像生成、TTS、云浏览器分别去申请五个不同的 API Key，**[Nous Portal](https://portal.nousresearch.com)** 用一个订阅就能覆盖全部：
+
+- **300+ 模型** — 用 `/model <name>` 随时切换
+- **Tool Gateway** — 网页搜索（Firecrawl）、图像生成（FAL）、文本转语音（OpenAI）、云浏览器（Browser Use），全部通过订阅托管。无需额外注册任何账户。
+
+全新安装时一条命令即可：
+
+```bash
+hermes setup --portal
+```
+
+它会通过 OAuth 登录、把 Nous 设为推理服务商，并启用 Tool Gateway。随时用 `hermes portal status` 查看路由状态。完整说明见 [Tool Gateway 文档](https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway)。
+
+你随时可以按工具单独切回自己的 API Key — Gateway 是按工具粒度生效的，不是一刀切。
+
+---
+
 ## CLI 与消息平台 快速对照

 Hermes 有两种入口：用 `hermes` 启动终端 UI，或运行网关从 Telegram、Discord、Slack、WhatsApp、Signal 或 Email 与之对话。进入对话后，许多斜杠命令在两种界面中通用。
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@ -1534,7 +1534,11 @@ class HermesACPAgent(acp.Agent):
                )
            except Exception:
                logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True)
-        if final_response and conn and not streamed_message:
+        if final_response and conn and (not streamed_message or result.get("response_transformed")):
+            # Deliver the final response when streaming did not already send it,
+            # or when a plugin hook transformed the response after streaming
+            # finished (e.g. transform_llm_output) — otherwise the appended /
+            # rewritten text never reaches the client.
            update = acp.update_agent_message_text(final_response)
            await conn.session_update(session_id, update)

--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@ -607,6 +607,31 @@ def init_agent(
            # Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401).
            _is_native_anthropic = agent.provider == "anthropic"
            effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "")
+
+            # MiniMax OAuth issues short-lived (~15-min) access tokens. The
+            # Anthropic SDK caches ``api_key`` as a static string at client
+            # construction time, so a session that resolves the bearer once
+            # at startup will keep sending the same token until MiniMax
+            # returns 401 mid-session. Swap the static string for a callable
+            # token provider — ``build_anthropic_client`` recognizes the
+            # callable and installs an httpx event hook that mints a fresh
+            # bearer per outbound request (re-reading auth.json so a refresh
+            # persisted by another process is visible immediately).
+            # The cached refresh path is a no-op when the token still has
+            # ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS`` of life left, so steady-
+            # state cost is one file read + one timestamp compare per request.
+            if agent.provider == "minimax-oauth" and isinstance(effective_key, str) and effective_key:
+                try:
+                    from hermes_cli.auth import build_minimax_oauth_token_provider
+                    effective_key = build_minimax_oauth_token_provider()
+                except Exception as _mm_exc:  # noqa: BLE001 — never block startup on this
+                    import logging as _logging
+                    _logging.getLogger(__name__).warning(
+                        "MiniMax OAuth: failed to install per-request token provider "
+                        "(%s); falling back to static bearer that will expire ~15min in.",
+                        _mm_exc,
+                    )
+
            agent.api_key = effective_key
            agent._anthropic_api_key = effective_key
            agent._anthropic_base_url = base_url
@ -618,7 +643,7 @@ def init_agent(
            # that cause 401/403 on their endpoints.  Guards #1739 and
            # the third-party identity-injection bug.
            from agent.anthropic_adapter import _is_oauth_token as _is_oat
-            agent._is_anthropic_oauth = _is_oat(effective_key) if _is_native_anthropic else False
+            agent._is_anthropic_oauth = _is_oat(effective_key) if (_is_native_anthropic and isinstance(effective_key, str)) else False
            agent._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout)
            # No OpenAI client needed for Anthropic mode
            agent.client = None
@ -951,16 +976,14 @@ def init_agent(

    # Expose session ID to tools (terminal, execute_code) so agents can
    # reference their own session for --resume commands, cross-session
-    # coordination, and logging.  Uses the ContextVar system from
-    # session_context.py for concurrency safety (gateway runs multiple
-    # sessions in one process).  Also writes os.environ as fallback for
-    # CLI mode where ContextVars aren't used.
-    os.environ["HERMES_SESSION_ID"] = agent.session_id
+    # coordination, and logging. Keep the ContextVar and os.environ
+    # fallback synchronized because different tool paths still read both.
    try:
-        from gateway.session_context import _SESSION_ID
-        _SESSION_ID.set(agent.session_id)
+        from gateway.session_context import set_current_session_id
+
+        set_current_session_id(agent.session_id)
    except Exception:
-        pass  # CLI/test mode — ContextVar not needed
+        os.environ["HERMES_SESSION_ID"] = agent.session_id

    # Session logs go into ~/.hermes/sessions/ alongside gateway sessions
    hermes_home = get_hermes_home()
@ -1125,7 +1148,18 @@ def init_agent(
    # through _ra().get_tool_definitions()).  Duplicate function names cause
    # 400 errors on providers that enforce unique names (e.g. Xiaomi
    # MiMo via Nous Portal).
-    if agent._memory_manager and agent.tools is not None:
+    #
+    # Respect the platform's enabled_toolsets configuration (#5544):
+    #   enabled_toolsets is None        → no filter, inject (backward compat)
+    #   "memory" in enabled_toolsets    → user opted in, inject
+    #   otherwise (incl. [])            → user excluded memory, skip injection
+    #
+    # Without this gate, `platform_toolsets: telegram: []` still leaks memory
+    # provider tools (fact_store, etc.) into the tool surface — a 10x latency
+    # penalty on local models and a frequent trigger of tool-call loops.
+    if agent._memory_manager and agent.tools is not None and (
+        agent.enabled_toolsets is None or "memory" in agent.enabled_toolsets
+    ):
        _existing_tool_names = {
            t.get("function", {}).get("name")
            for t in agent.tools
@ -1393,6 +1427,7 @@ def init_agent(
            base_url=agent.base_url,
            api_key=getattr(agent, "api_key", ""),
            provider=agent.provider,
+            api_mode=agent.api_mode,
        )
        if not agent.quiet_mode:
            _ra().logger.info("Using context engine: %s", _selected_engine.name)
@ -1435,8 +1470,22 @@ def init_agent(
    # errors. Even with the cache fix, dedup is the right defense
    # against plugin paths that may register the same schemas via
    # ctx.register_tool(). Mirrors the memory tools dedup above.
+    #
+    # Respect the platform's enabled_toolsets configuration (#5544):
+    # context engine tools follow the same gating pattern as memory
+    # provider tools — without the gate, `platform_toolsets: telegram: []`
+    # would still leak lcm_* tools into the tool surface and incur the
+    # same local-model latency penalty.
    agent._context_engine_tool_names: set = set()
-    if hasattr(agent, "context_compressor") and agent.context_compressor and agent.tools is not None:
+    if (
+        hasattr(agent, "context_compressor")
+        and agent.context_compressor
+        and agent.tools is not None
+        and (
+            agent.enabled_toolsets is None
+            or "context_engine" in agent.enabled_toolsets
+        )
+    ):
        _existing_tool_names = {
            t.get("function", {}).get("name")
            for t in agent.tools
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@ -132,7 +132,7 @@ def convert_to_trajectory_format(agent, messages: List[Dict[str, Any]], user_que
                    except json.JSONDecodeError:
                        # This shouldn't happen since we validate and retry during conversation,
                        # but if it does, log warning and use empty dict
-                        logging.warning(f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}")
+                        logger.warning(f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}")
                        arguments = {}
                    
                    tool_call_json = {
@ -617,9 +617,28 @@ def recover_with_credential_pool(
        # existing entitlement keyword set in ``_is_entitlement_failure``.
        # Any 403 against ``xai-oauth`` is treated as entitlement here so
        # the refresh loop can't spin in those cases either.
+        #
+        # Exception (#29344): xAI's ``[WKE=unauthenticated:...]`` suffix and
+        # the ``OAuth2 access token could not be validated`` phrasing are
+        # xAI's authoritative "this is a stale token, not entitlement"
+        # signal.  When either fires we must NOT apply the catch-all
+        # override — refresh is the recoverable path for these bodies, and
+        # blanket-classifying them as entitlement was the bug that left
+        # long-running TUI sessions stuck on stale tokens until the user
+        # exited and reopened.
        is_entitlement = agent._is_entitlement_failure(error_context, status_code)
        if not is_entitlement and status_code == 403 and (agent.provider or "") == "xai-oauth":
-            is_entitlement = True
+            _disambiguator_haystack = " ".join(
+                str(error_context.get(k) or "").lower()
+                for k in ("message", "reason", "code", "error")
+                if isinstance(error_context, dict)
+            )
+            _is_xai_auth_failure = (
+                "[wke=unauthenticated:" in _disambiguator_haystack
+                or "oauth2 access token could not be validated" in _disambiguator_haystack
+            )
+            if not _is_xai_auth_failure:
+                is_entitlement = True
        if is_entitlement:
            _ra().logger.info(
                "Credential %s — entitlement-shaped 403 from %s; "
@ -728,7 +747,7 @@ def try_recover_primary_transport(
        time.sleep(wait_time)
        return True
    except Exception as e:
-        logging.warning("Primary transport recovery failed: %s", e)
+        logger.warning("Primary transport recovery failed: %s", e)
        return False

 # ── End provider fallback ──────────────────────────────────────────────
@ -891,19 +910,20 @@ def restore_primary_runtime(agent) -> bool:
            base_url=rt["compressor_base_url"],
            api_key=rt["compressor_api_key"],
            provider=rt["compressor_provider"],
+            api_mode=rt.get("compressor_api_mode", ""),
        )

        # ── Reset fallback chain for the new turn ──
        agent._fallback_activated = False
        agent._fallback_index = 0

-        logging.info(
+        logger.info(
            "Primary runtime restored for new turn: %s (%s)",
            agent.model, agent.provider,
        )
        return True
    except Exception as e:
-        logging.warning("Failed to restore primary runtime: %s", e)
+        logger.warning("Failed to restore primary runtime: %s", e)
        return False

 # Which error types indicate a transient transport failure worth
@ -1064,10 +1084,7 @@ def dump_api_request_debug(

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
        dump_file = agent.logs_dir / f"request_dump_{agent.session_id}_{timestamp}.json"
-        dump_file.write_text(
-            json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str),
-            encoding="utf-8",
-        )
+        atomic_json_write(dump_file, dump_payload, default=str)

        agent._vprint(f"{agent.log_prefix}🧾 Request debug dump written to: {dump_file}")

@ -1077,7 +1094,7 @@ def dump_api_request_debug(
        return dump_file
    except Exception as dump_error:
        if agent.verbose_logging:
-            logging.warning(f"Failed to dump API request debug payload: {dump_error}")
+            logger.warning(f"Failed to dump API request debug payload: {dump_error}")
        return None


@ -1352,6 +1369,22 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
        # API key — falling back would send Anthropic credentials to third-party endpoints.
        _is_native_anthropic = new_provider == "anthropic"
        effective_key = (api_key or agent.api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or agent.api_key or "")
+
+        # MiniMax OAuth: swap static string for a per-request callable token
+        # provider so the rebuilt client survives 15-min token expiry. See
+        # the matching block in agent_init.py for the full rationale.
+        if new_provider == "minimax-oauth" and isinstance(effective_key, str) and effective_key:
+            try:
+                from hermes_cli.auth import build_minimax_oauth_token_provider
+                effective_key = build_minimax_oauth_token_provider()
+            except Exception as _mm_exc:  # noqa: BLE001
+                import logging as _logging
+                _logging.getLogger(__name__).warning(
+                    "MiniMax OAuth: failed to install per-request token provider "
+                    "on switch (%s); using static bearer.",
+                    _mm_exc,
+                )
+
        agent.api_key = effective_key
        agent._anthropic_api_key = effective_key
        agent._anthropic_base_url = base_url or getattr(agent, "_anthropic_base_url", None)
@ -1359,7 +1392,7 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
            effective_key, agent._anthropic_base_url,
            timeout=get_provider_request_timeout(agent.provider, agent.model),
        )
-        agent._is_anthropic_oauth = _is_oauth_token(effective_key) if _is_native_anthropic else False
+        agent._is_anthropic_oauth = _is_oauth_token(effective_key) if (_is_native_anthropic and isinstance(effective_key, str)) else False
        agent.client = None
        agent._client_kwargs = {}
    else:
@ -1446,6 +1479,7 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
        "compressor_api_key": getattr(_cc, "api_key", "") if _cc else "",
        "compressor_provider": getattr(_cc, "provider", agent.provider) if _cc else agent.provider,
        "compressor_context_length": _cc.context_length if _cc else 0,
+        "compressor_api_mode": getattr(_cc, "api_mode", agent.api_mode) if _cc else agent.api_mode,
        "compressor_threshold_tokens": _cc.threshold_tokens if _cc else 0,
    }
    if api_mode == "anthropic_messages":
@ -1477,7 +1511,7 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
    agent._fallback_chain = fallback_chain
    agent._fallback_model = fallback_chain[0] if fallback_chain else None

-    logging.info(
+    logger.info(
        "Model switched in-place: %s (%s) -> %s (%s)",
        old_model, old_provider, new_model, new_provider,
    )
@ -2116,33 +2150,56 @@ def apply_pending_steer_to_tool_results(agent, messages: list, num_tool_msgs: in


 def force_close_tcp_sockets(client: Any) -> int:
-    """Force-close underlying TCP sockets to prevent CLOSE-WAIT accumulation.
+    """Abort in-flight TCP I/O by shutting down sockets WITHOUT closing FDs.

-    When a provider drops a connection mid-stream, httpx's ``client.close()``
-    performs a graceful shutdown which leaves sockets in CLOSE-WAIT until the
-    OS times them out (often minutes).  This method walks the httpx transport
-    pool and issues ``socket.shutdown(SHUT_RDWR)`` + ``socket.close()`` to
-    force an immediate TCP RST, freeing the file descriptors.
+    When a provider drops a connection mid-stream — or the user issues an
+    interrupt — we want to unblock httpx's reader/writer immediately rather
+    than waiting for the kernel's per-connection timeout. ``shutdown(SHUT_RDWR)``
+    achieves that: it sends FIN, breaks any pending ``recv``/``send`` with EOF
+    or ``EPIPE``, but does NOT release the file descriptor.

-    Returns the number of sockets force-closed.
+    Historically this helper also called ``socket.close()`` so the FD got
+    released immediately, but that's unsafe when (as is the case for both the
+    interrupt-abort path and stale-call kill path) the helper runs on a
+    different thread than the one driving the request:
+
+      * The Python ``socket.socket`` we close here is the SAME object held by
+        httpx's pool, so closing it via Python sets its ``_fd`` to -1 and
+        future operations on that Python object fail safely.
+      * BUT the SSL wrapper (``ssl.SSLSocket``'s underlying OpenSSL ``BIO``)
+        caches the raw integer FD. Once ``os.close(fd)`` runs, the kernel may
+        immediately recycle that integer to the next ``open()`` call — e.g.
+        the kanban dispatcher opening ``kanban.db``.
+      * The owning worker thread then unwinds httpx, the SSL layer flushes a
+        pending TLS record, and the encrypted bytes get written into the
+        wrong file (issue #29507: 24-byte TLS application-data record
+        clobbering SQLite header bytes 5..28).
+
+    The fix is to let the owning thread own the close. ``shutdown()`` from any
+    thread is FD-safe; ``close()`` is not. The httpx connection's own close
+    path — which runs from the worker thread when it unwinds — will release
+    the FD via the same ``socket.socket`` object, and because Python's socket
+    close atomically swaps ``_fd`` to -1 *before* issuing ``os.close``, there
+    is no FD-aliasing window when only one thread closes.
+
+    Returns the number of sockets shut down. (Field kept as
+    ``tcp_force_closed=N`` in the log line for backwards-compatible parsing.)
    """
    import socket as _socket

-    closed = 0
+    shutdown_count = 0
    try:
        for sock in _iter_pool_sockets(client):
            try:
                sock.shutdown(_socket.SHUT_RDWR)
            except OSError:
+                # Already shut down / not connected / FD invalid — all benign.
                pass
-            try:
-                sock.close()
-            except OSError:
-                pass
-            closed += 1
+            # IMPORTANT (#29507): do NOT call sock.close() here. See docstring.
+            shutdown_count += 1
    except Exception as exc:
        _ra().logger.debug("Force-close TCP sockets sweep error: %s", exc)
-    return closed
+    return shutdown_count



--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -1606,182 +1606,155 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
    return out


-def convert_messages_to_anthropic(
-    messages: List[Dict],
-    base_url: str | None = None,
-    model: str | None = None,
-) -> Tuple[Optional[Any], List[Dict]]:
-    """Convert OpenAI-format messages to Anthropic format.
+def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert an assistant message to Anthropic content blocks.

-    Returns (system_prompt, anthropic_messages).
-    System messages are extracted since Anthropic takes them as a separate param.
-    system_prompt is a string or list of content blocks (when cache_control present).
-
-    When *base_url* is provided and points to a third-party Anthropic-compatible
-    endpoint, all thinking block signatures are stripped.  Signatures are
-    Anthropic-proprietary — third-party endpoints cannot validate them and will
-    reject them with HTTP 400 "Invalid signature in thinking block".
-
-    When *model* is provided and matches the Kimi / Moonshot family (or
-    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
-    synthesised from ``reasoning_content`` are preserved on replayed
-    assistant tool-call messages — Kimi requires the field to exist, even
-    if empty.
+    Handles thinking blocks, regular content, tool calls, and
+    reasoning_content injection for Kimi/DeepSeek endpoints.
    """
-    system = None
-    result = []
-
-    for m in messages:
-        role = m.get("role", "user")
-        content = m.get("content", "")
-
-        if role == "system":
-            if isinstance(content, list):
-                # Preserve cache_control markers on content blocks
-                has_cache = any(
-                    p.get("cache_control") for p in content if isinstance(p, dict)
-                )
-                if has_cache:
-                    system = [p for p in content if isinstance(p, dict)]
-                else:
-                    system = "\n".join(
-                        p["text"] for p in content if p.get("type") == "text"
-                    )
-            else:
-                system = content
-            continue
-
-        if role == "assistant":
-            blocks = _extract_preserved_thinking_blocks(m)
-            if content:
-                if isinstance(content, list):
-                    converted_content = _convert_content_to_anthropic(content)
-                    if isinstance(converted_content, list):
-                        blocks.extend(converted_content)
-                else:
-                    blocks.append({"type": "text", "text": str(content)})
-            for tc in m.get("tool_calls", []):
-                if not tc or not isinstance(tc, dict):
-                    continue
-                fn = tc.get("function", {})
-                args = fn.get("arguments", "{}")
-                try:
-                    parsed_args = json.loads(args) if isinstance(args, str) else args
-                except (json.JSONDecodeError, ValueError):
-                    parsed_args = {}
-                blocks.append({
-                    "type": "tool_use",
-                    "id": _sanitize_tool_id(tc.get("id", "")),
-                    "name": fn.get("name", ""),
-                    "input": parsed_args,
-                })
-            # Kimi's /coding endpoint (Anthropic protocol) requires assistant
-            # tool-call messages to carry reasoning_content when thinking is
-            # enabled server-side.  Preserve it as a thinking block so Kimi
-            # can validate the message history.  See hermes-agent#13848.
-            #
-            # Accept empty string "" — _copy_reasoning_content_for_api()
-            # injects "" as a tier-3 fallback for Kimi tool-call messages
-            # that had no reasoning.  Kimi requires the field to exist, even
-            # if empty.
-            #
-            # Prepend (not append): Anthropic protocol requires thinking
-            # blocks before text and tool_use blocks.
-            #
-            # Guard: only add when reasoning_details didn't already contribute
-            # thinking blocks.  On native Anthropic, reasoning_details produces
-            # signed thinking blocks — adding another unsigned one from
-            # reasoning_content would create a duplicate (same text) that gets
-            # downgraded to a spurious text block on the last assistant message.
-            reasoning_content = m.get("reasoning_content")
-            _already_has_thinking = any(
-                isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
-                for b in blocks
-            )
-            if isinstance(reasoning_content, str) and not _already_has_thinking:
-                blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
-            # Anthropic rejects empty assistant content
-            effective = blocks or content
-            if not effective or effective == "":
-                effective = [{"type": "text", "text": "(empty)"}]
-            result.append({"role": "assistant", "content": effective})
-            continue
-
-        if role == "tool":
-            # Sanitize tool_use_id and ensure non-empty content.
-            # Computer-use (and other multimodal) tool results arrive as
-            # either a list of OpenAI-style content parts, or a dict
-            # marked `_multimodal` with an embedded `content` list. Convert
-            # both into Anthropic `tool_result` inner blocks (text + image).
-            multimodal_blocks: Optional[List[Dict[str, Any]]] = None
-            if isinstance(content, dict) and content.get("_multimodal"):
-                multimodal_blocks = _content_parts_to_anthropic_blocks(
-                    content.get("content") or []
-                )
-                # Fallback text if the conversion produced nothing usable.
-                if not multimodal_blocks and content.get("text_summary"):
-                    multimodal_blocks = [
-                        {"type": "text", "text": str(content["text_summary"])}
-                    ]
-            elif isinstance(content, list):
-                converted = _content_parts_to_anthropic_blocks(content)
-                if any(b.get("type") == "image" for b in converted):
-                    multimodal_blocks = converted
-            # Back-compat: some callers stash blocks under a private key.
-            if multimodal_blocks is None:
-                stashed = m.get("_anthropic_content_blocks")
-                if isinstance(stashed, list) and stashed:
-                    text_content = content if isinstance(content, str) and content.strip() else None
-                    multimodal_blocks = (
-                        [{"type": "text", "text": text_content}] + stashed
-                        if text_content else list(stashed)
-                    )
-
-            if multimodal_blocks:
-                result_content: Any = multimodal_blocks
-            elif isinstance(content, str):
-                result_content = content
-            else:
-                result_content = json.dumps(content) if content else "(no output)"
-            if not result_content:
-                result_content = "(no output)"
-            tool_result = {
-                "type": "tool_result",
-                "tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
-                "content": result_content,
-            }
-            if isinstance(m.get("cache_control"), dict):
-                tool_result["cache_control"] = dict(m["cache_control"])
-            # Merge consecutive tool results into one user message
-            if (
-                result
-                and result[-1]["role"] == "user"
-                and isinstance(result[-1]["content"], list)
-                and result[-1]["content"]
-                and result[-1]["content"][0].get("type") == "tool_result"
-            ):
-                result[-1]["content"].append(tool_result)
-            else:
-                result.append({"role": "user", "content": [tool_result]})
-            continue
-
-        # Regular user message — validate non-empty content (Anthropic rejects empty)
+    content = m.get("content", "")
+    blocks = _extract_preserved_thinking_blocks(m)
+    if content:
        if isinstance(content, list):
-            converted_blocks = _convert_content_to_anthropic(content)
-            # Check if all text blocks are empty
-            if not converted_blocks or all(
-                b.get("text", "").strip() == ""
-                for b in converted_blocks
-                if isinstance(b, dict) and b.get("type") == "text"
-            ):
-                converted_blocks = [{"type": "text", "text": "(empty message)"}]
-            result.append({"role": "user", "content": converted_blocks})
+            converted_content = _convert_content_to_anthropic(content)
+            if isinstance(converted_content, list):
+                blocks.extend(converted_content)
        else:
-            # Validate string content is non-empty
-            if not content or (isinstance(content, str) and not content.strip()):
-                content = "(empty message)"
-            result.append({"role": "user", "content": content})
+            blocks.append({"type": "text", "text": str(content)})
+    for tc in m.get("tool_calls", []):
+        if not tc or not isinstance(tc, dict):
+            continue
+        fn = tc.get("function", {})
+        args = fn.get("arguments", "{}")
+        try:
+            parsed_args = json.loads(args) if isinstance(args, str) else args
+        except (json.JSONDecodeError, ValueError):
+            parsed_args = {}
+        blocks.append({
+            "type": "tool_use",
+            "id": _sanitize_tool_id(tc.get("id", "")),
+            "name": fn.get("name", ""),
+            "input": parsed_args,
+        })
+    # Kimi's /coding endpoint (Anthropic protocol) requires assistant
+    # tool-call messages to carry reasoning_content when thinking is
+    # enabled server-side.  Preserve it as a thinking block so Kimi
+    # can validate the message history.  See hermes-agent#13848.
+    #
+    # Accept empty string "" — _copy_reasoning_content_for_api()
+    # injects "" as a tier-3 fallback for Kimi tool-call messages
+    # that had no reasoning.  Kimi requires the field to exist, even
+    # if empty.
+    #
+    # Prepend (not append): Anthropic protocol requires thinking
+    # blocks before text and tool_use blocks.
+    #
+    # Guard: only add when reasoning_details didn't already contribute
+    # thinking blocks.  On native Anthropic, reasoning_details produces
+    # signed thinking blocks — adding another unsigned one from
+    # reasoning_content would create a duplicate (same text) that gets
+    # downgraded to a spurious text block on the last assistant message.
+    reasoning_content = m.get("reasoning_content")
+    _already_has_thinking = any(
+        isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
+        for b in blocks
+    )
+    if isinstance(reasoning_content, str) and not _already_has_thinking:
+        blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
+    # Anthropic rejects empty assistant content
+    effective = blocks or content
+    if not effective or effective == "":
+        effective = [{"type": "text", "text": "(empty)"}]
+    return {"role": "assistant", "content": effective}

+
+def _convert_tool_message_to_result(
+    result: List[Dict[str, Any]], m: Dict[str, Any]
+) -> None:
+    """Convert a tool message to an Anthropic tool_result, merging consecutive
+    results into one user message.
+
+    Mutates ``result`` in place — either appends a new user message or extends
+    the trailing user message's tool_result list.
+    """
+    content = m.get("content", "")
+    multimodal_blocks: Optional[List[Dict[str, Any]]] = None
+    if isinstance(content, dict) and content.get("_multimodal"):
+        multimodal_blocks = _content_parts_to_anthropic_blocks(
+            content.get("content") or []
+        )
+        # Fallback text if the conversion produced nothing usable.
+        if not multimodal_blocks and content.get("text_summary"):
+            multimodal_blocks = [
+                {"type": "text", "text": str(content["text_summary"])}
+            ]
+    elif isinstance(content, list):
+        converted = _content_parts_to_anthropic_blocks(content)
+        if any(b.get("type") == "image" for b in converted):
+            multimodal_blocks = converted
+    # Back-compat: some callers stash blocks under a private key.
+    if multimodal_blocks is None:
+        stashed = m.get("_anthropic_content_blocks")
+        if isinstance(stashed, list) and stashed:
+            text_content = content if isinstance(content, str) and content.strip() else None
+            multimodal_blocks = (
+                [{"type": "text", "text": text_content}] + stashed
+                if text_content else list(stashed)
+            )
+
+    if multimodal_blocks:
+        result_content: Any = multimodal_blocks
+    elif isinstance(content, str):
+        result_content = content
+    else:
+        result_content = json.dumps(content) if content else "(no output)"
+    if not result_content:
+        result_content = "(no output)"
+    tool_result = {
+        "type": "tool_result",
+        "tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
+        "content": result_content,
+    }
+    if isinstance(m.get("cache_control"), dict):
+        tool_result["cache_control"] = dict(m["cache_control"])
+    # Merge consecutive tool results into one user message
+    if (
+        result
+        and result[-1]["role"] == "user"
+        and isinstance(result[-1]["content"], list)
+        and result[-1]["content"]
+        and result[-1]["content"][0].get("type") == "tool_result"
+    ):
+        result[-1]["content"].append(tool_result)
+    else:
+        result.append({"role": "user", "content": [tool_result]})
+
+
+def _convert_user_message(content: Any) -> Dict[str, Any]:
+    """Validate and convert a user message to anthropic format."""
+    if isinstance(content, list):
+        converted_blocks = _convert_content_to_anthropic(content)
+        if not converted_blocks or all(
+            b.get("text", "").strip() == ""
+            for b in converted_blocks
+            if isinstance(b, dict) and b.get("type") == "text"
+        ):
+            converted_blocks = [{"type": "text", "text": "(empty message)"}]
+        return {"role": "user", "content": converted_blocks}
+    else:
+        if not content or (isinstance(content, str) and not content.strip()):
+            content = "(empty message)"
+        return {"role": "user", "content": content}
+
+
+def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
+    """Strip tool_use blocks with no matching tool_result, and vice versa.
+
+    Context compression or session truncation can remove either side of a
+    tool-call pair.  Anthropic rejects both orphans with HTTP 400.
+
+    Mutates ``result`` in place.
+    """
    # Strip orphaned tool_use blocks (no matching tool_result follows)
    tool_result_ids = set()
    for m in result:
@ -1799,10 +1772,7 @@ def convert_messages_to_anthropic(
            if not m["content"]:
                m["content"] = [{"type": "text", "text": "(tool call removed)"}]

-    # Strip orphaned tool_result blocks (no matching tool_use precedes them).
-    # This is the mirror of the above: context compression or session truncation
-    # can remove an assistant message containing a tool_use while leaving the
-    # subsequent tool_result intact.  Anthropic rejects these with a 400.
+    # Strip orphaned tool_result blocks (no matching tool_use precedes them)
    tool_use_ids = set()
    for m in result:
        if m["role"] == "assistant" and isinstance(m["content"], list):
@ -1819,12 +1789,16 @@ def convert_messages_to_anthropic(
            if not m["content"]:
                m["content"] = [{"type": "text", "text": "(tool result removed)"}]

-    # Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
+
+def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Merge consecutive same-role messages to enforce Anthropic alternation.
+
+    Returns a new list (caller must rebind ``result``).
+    """
    fixed = []
    for m in result:
        if fixed and fixed[-1]["role"] == m["role"]:
            if m["role"] == "user":
-                # Merge consecutive user messages
                prev_content = fixed[-1]["content"]
                curr_content = m["content"]
                if isinstance(prev_content, str) and isinstance(curr_content, str):
@ -1832,7 +1806,6 @@ def convert_messages_to_anthropic(
                elif isinstance(prev_content, list) and isinstance(curr_content, list):
                    fixed[-1]["content"] = prev_content + curr_content
                else:
-                    # Mixed types — wrap string in list
                    if isinstance(prev_content, str):
                        prev_content = [{"type": "text", "text": prev_content}]
                    if isinstance(curr_content, str):
@ -1855,7 +1828,6 @@ def convert_messages_to_anthropic(
                elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
                    fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
                else:
-                    # Mixed types — normalize both to list and merge
                    if isinstance(prev_blocks, str):
                        prev_blocks = [{"type": "text", "text": prev_blocks}]
                    if isinstance(curr_blocks, str):
@ -1863,37 +1835,34 @@ def convert_messages_to_anthropic(
                    fixed[-1]["content"] = prev_blocks + curr_blocks
        else:
            fixed.append(m)
-    result = fixed
+    return fixed

-    # ── Thinking block signature management ──────────────────────────
-    # Anthropic signs thinking blocks against the full turn content.
-    # Any upstream mutation (context compression, session truncation,
-    # orphan stripping, message merging) invalidates the signature,
-    # causing HTTP 400 "Invalid signature in thinking block".
-    #
-    # Signatures are Anthropic-proprietary.  Third-party endpoints
-    # (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
-    # them and will reject them outright.  When targeting a third-party
-    # endpoint, strip ALL thinking/redacted_thinking blocks from every
-    # assistant message — the third-party will generate its own
-    # thinking blocks if it supports extended thinking.
-    #
-    # For direct Anthropic (strategy following clawdbot/OpenClaw):
-    # 1. Strip thinking/redacted_thinking from all assistant messages
-    #    EXCEPT the last one — preserves reasoning continuity on the
-    #    current tool-use chain while avoiding stale signature errors.
-    # 2. Downgrade unsigned thinking blocks (no signature) to text —
-    #    Anthropic can't validate them and will reject them.
-    # 3. Strip cache_control from thinking/redacted_thinking blocks —
-    #    cache markers can interfere with signature validation.
+
+def _manage_thinking_signatures(
+    result: List[Dict[str, Any]], base_url: str | None, model: str | None
+) -> None:
+    """Strip or preserve thinking blocks based on endpoint type.
+
+    Anthropic signs thinking blocks against the full turn content.
+    Any upstream mutation (context compression, session truncation, orphan
+    stripping, message merging) invalidates the signature, causing HTTP 400
+    "Invalid signature in thinking block".
+
+    Signatures are Anthropic-proprietary.  Third-party endpoints (MiniMax,
+    Azure AI Foundry, AWS Bedrock, self-hosted proxies) cannot validate them
+    and will reject them outright.  Kimi's /coding and DeepSeek's /anthropic
+    endpoints speak the Anthropic protocol upstream but require unsigned
+    thinking blocks (synthesised from ``reasoning_content``) to round-trip on
+    replayed assistant tool-call messages.  See hermes-agent#13848 (Kimi) and
+    hermes-agent#16748 (DeepSeek).
+
+    Mutates ``result`` in place.
+    """
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
-    # Kimi /coding and DeepSeek /anthropic share a contract: both speak the
-    # Anthropic Messages protocol upstream but require that thinking blocks
-    # synthesised from reasoning_content round-trip on subsequent turns when
-    # thinking is enabled.  Signed Anthropic blocks still have to be stripped
-    # (neither endpoint can validate Anthropic's signatures); unsigned blocks
-    # are preserved.  See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
+    # Kimi / DeepSeek share a contract: strip signed Anthropic blocks
+    # (neither upstream can validate Anthropic signatures), preserve unsigned
+    # ones synthesised from reasoning_content.  See #13848, #16748.
    _preserve_unsigned_thinking = (
        _is_kimi_family_endpoint(base_url, model)
        or _is_deepseek_anthropic_endpoint(base_url)
@ -1910,26 +1879,19 @@ def convert_messages_to_anthropic(
            continue

        if _preserve_unsigned_thinking:
-            # Kimi's /coding and DeepSeek's /anthropic endpoints both enable
-            # thinking server-side and require unsigned thinking blocks on
-            # replayed assistant tool-call messages.  Strip signed Anthropic
-            # blocks (neither upstream can validate Anthropic signatures) but
-            # preserve the unsigned ones we synthesised from reasoning_content.
+            # Kimi / DeepSeek: strip signed, preserve unsigned.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("signature") or b.get("data"):
-                    # Anthropic-signed block — upstream can't validate, strip
+                    # Signed (or redacted-with-data) — upstream can't validate, strip.
                    continue
-                # Unsigned thinking (synthesised from reasoning_content) —
-                # keep it: the upstream needs it for message-history validation.
                new_content.append(b)
            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
        elif _is_third_party or idx != last_assistant_idx:
-            # Third-party endpoint: strip ALL thinking blocks from every
-            # assistant message — signatures are Anthropic-proprietary.
+            # Third-party: strip ALL thinking blocks (signatures are proprietary).
            # Direct Anthropic: strip from non-latest assistant messages only.
            stripped = [
                b for b in m["content"]
@ -1937,24 +1899,21 @@ def convert_messages_to_anthropic(
            ]
            m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
        else:
-            # Latest assistant on direct Anthropic: keep signed thinking
-            # blocks for reasoning continuity; downgrade unsigned ones to
-            # plain text.
+            # Latest assistant on direct Anthropic: keep signed, downgrade unsigned
+            # to text so the reasoning isn't lost.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("type") == "redacted_thinking":
-                    # Redacted blocks use 'data' for the signature payload
+                    # Redacted blocks use 'data' for the signature payload —
+                    # drop the block when 'data' is missing (can't be validated).
                    if b.get("data"):
                        new_content.append(b)
-                    # else: drop — no data means it can't be validated
                elif b.get("signature"):
-                    # Signed thinking block — keep it
                    new_content.append(b)
                else:
-                    # Unsigned thinking — downgrade to text so it's not lost
                    thinking_text = b.get("thinking", "")
                    if thinking_text:
                        new_content.append({"type": "text", "text": thinking_text})
@ -1966,12 +1925,15 @@ def convert_messages_to_anthropic(
            if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
                b.pop("cache_control", None)

-    # ── Image eviction: keep only the most recent N screenshots ─────
-    # computer_use screenshots (base64 images) sit inside tool_result
-    # blocks: they accumulate and are sent with every API call. Each
-    # costs ~1,465 tokens; after 10+ the conversation becomes slow
-    # even for simple text queries. Walk backward, keep the most recent
-    # _MAX_KEEP_IMAGES, replace older ones with a text placeholder.
+
+def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
+    """Keep only the most recent ``_MAX_KEEP_IMAGES`` computer-use screenshots.
+
+    Base64 images cost ~1,465 tokens each and accumulate across tool calls.
+    Walk backward, keep the most recent N, replace older ones with a placeholder.
+
+    Mutates ``result`` in place.
+    """
    _MAX_KEEP_IMAGES = 3
    _image_count = 0
    for msg in reversed(result):
@ -1998,6 +1960,68 @@ def convert_messages_to_anthropic(
                    for b in inner
                ]

+
+def convert_messages_to_anthropic(
+    messages: List[Dict],
+    base_url: str | None = None,
+    model: str | None = None,
+) -> Tuple[Optional[Any], List[Dict]]:
+    """Convert OpenAI-format messages to Anthropic format.
+
+    Returns (system_prompt, anthropic_messages).
+    System messages are extracted since Anthropic takes them as a separate param.
+    system_prompt is a string or list of content blocks (when cache_control present).
+
+    When *base_url* is provided and points to a third-party Anthropic-compatible
+    endpoint, all thinking block signatures are stripped.  Signatures are
+    Anthropic-proprietary — third-party endpoints cannot validate them and will
+    reject them with HTTP 400 "Invalid signature in thinking block".
+
+    When *model* is provided and matches the Kimi / Moonshot family (or
+    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
+    synthesised from ``reasoning_content`` are preserved on replayed
+    assistant tool-call messages — Kimi requires the field to exist, even
+    if empty.
+    """
+    system = None
+    result: List[Dict[str, Any]] = []
+
+    for m in messages:
+        role = m.get("role", "user")
+        content = m.get("content", "")
+
+        if role == "system":
+            if isinstance(content, list):
+                # Preserve cache_control markers on content blocks
+                has_cache = any(
+                    p.get("cache_control") for p in content if isinstance(p, dict)
+                )
+                if has_cache:
+                    system = [p for p in content if isinstance(p, dict)]
+                else:
+                    system = "\n".join(
+                        p["text"] for p in content if p.get("type") == "text"
+                    )
+            else:
+                system = content
+            continue
+
+        if role == "assistant":
+            result.append(_convert_assistant_message(m))
+            continue
+
+        if role == "tool":
+            _convert_tool_message_to_result(result, m)
+            continue
+
+        # Regular user message
+        result.append(_convert_user_message(content))
+
+    _strip_orphaned_tool_blocks(result)
+    result = _merge_consecutive_roles(result)
+    _manage_thinking_signatures(result, base_url, model)
+    _evict_old_screenshots(result)
+
    return system, result


@ -2098,9 +2122,13 @@ def build_anthropic_kwargs(
                block["text"] = text

        # 3. Prefix tool names with mcp_ (Claude Code convention)
+        #    Skip names that already begin with the marker — native MCP server
+        #    tools (from mcp_servers: in config.yaml) are registered under their
+        #    full mcp_<server>_<tool> name and would double-prefix otherwise,
+        #    breaking round-trip registry lookup in normalize_response. GH-25255.
        if anthropic_tools:
            for tool in anthropic_tools:
-                if "name" in tool:
+                if "name" in tool and not tool["name"].startswith(_MCP_TOOL_PREFIX):
                    tool["name"] = _MCP_TOOL_PREFIX + tool["name"]

        # 4. Prefix tool names in message history (tool_use and tool_result blocks)
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -3730,6 +3730,37 @@ _VISION_AUTO_PROVIDER_ORDER = (
 )


+def _main_model_supports_vision(provider: str, model: Optional[str]) -> bool:
+    """Return True when ``provider``/``model`` is known to accept image input.
+
+    Used by the vision auto-detect chain to skip the user's main provider
+    when it's known to be text-only (e.g. DeepSeek, gpt-oss without vision).
+    Without this guard, ``resolve_vision_provider_client(provider="auto")``
+    would happily return the main-provider client and any subsequent image
+    payload would surface as a cryptic provider-side error
+    (``unknown variant `image_url`, expected `text```, #31179).
+
+    Returns True when capability lookup is unknown — preserves the historical
+    behaviour of attempting the call, so providers we haven't catalogued yet
+    don't silently regress to text-only.
+    """
+    try:
+        from agent.image_routing import _lookup_supports_vision
+        from hermes_cli.config import load_config
+    except ImportError:
+        return True
+    try:
+        supports = _lookup_supports_vision(provider, model, load_config())
+    except Exception:  # pragma: no cover - defensive
+        return True
+    if supports is None:
+        # No capability data — keep current behaviour and let the call attempt
+        # happen rather than silently skipping. This avoids false-positive
+        # skips for new/custom providers.
+        return True
+    return bool(supports)
+
+
 def _normalize_vision_provider(provider: Optional[str]) -> str:
    return _normalize_aux_provider(provider)

@ -3870,6 +3901,23 @@ def resolve_vision_provider_client(
                    "vision support) — falling through to aggregator chain",
                    main_provider,
                )
+            elif not _main_model_supports_vision(main_provider, vision_model):
+                # The main model is known to be text-only (e.g. DeepSeek V4,
+                # gpt-oss-120b without vision). Building a client and sending
+                # an image would produce a cryptic provider-side error like
+                # ``unknown variant `image_url`, expected `text``` (#31179).
+                # Fall through to the aggregator chain instead.
+                #
+                # Only log the provider name (not the model) — mirrors the
+                # sibling _PROVIDERS_WITHOUT_VISION branch above, and avoids
+                # CodeQL py/clear-text-logging-sensitive-data heuristic false
+                # positives on multi-value interpolations.
+                logger.debug(
+                    "Vision auto-detect: skipping main provider %s "
+                    "(reports no vision capability) — falling through to "
+                    "aggregator chain",
+                    main_provider,
+                )
            else:
                rpc_client, rpc_model = resolve_provider_client(
                    main_provider, vision_model,
@ -4281,6 +4329,23 @@ def _get_cached_client(
    return client, model or default_model


+# Aliases that target direct REST APIs not modeled as first-class providers
+# in PROVIDER_REGISTRY. Used for ``auxiliary.<task>.provider`` so users can
+# write the obvious name and have it resolve to a working ``custom`` endpoint
+# without needing to know our internal provider IDs.
+#
+# Why these specifically: PROVIDER_REGISTRY has ``openai-codex`` (OAuth) and
+# ``custom`` (manual base_url + OPENAI_API_KEY) but no plain ``openai`` for
+# direct API-key access. Users predictably type ``provider: openai`` and
+# expect it to use OPENAI_API_KEY against api.openai.com. Previously this
+# silently fell back to the user's main provider, sending OpenAI model names
+# to e.g. DeepSeek and producing cryptic ``unknown variant 'image_url'``
+# errors (issue #31179).
+_AUX_DIRECT_API_BASE_URLS: Dict[str, str] = {
+    "openai": "https://api.openai.com/v1",
+}
+
+
 def _resolve_task_provider_model(
    task: str = None,
    provider: str = None,
@ -4317,6 +4382,25 @@ def _resolve_task_provider_model(
    resolved_model = model or cfg_model
    resolved_api_mode = cfg_api_mode

+    # Convenience aliases for direct API-key endpoints that aren't first-class
+    # providers (e.g. ``provider: openai`` → custom + api.openai.com/v1).
+    # Applied to both explicit args and config-derived values. When the user
+    # has already supplied a base_url we keep their endpoint but still rewrite
+    # the provider to ``custom`` so resolution doesn't hit the
+    # PROVIDER_REGISTRY-only path (which has no ``openai`` entry).
+    def _expand_direct_api_alias(prov: Optional[str], existing_base: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
+        if not prov:
+            return prov, existing_base
+        target_base = _AUX_DIRECT_API_BASE_URLS.get(prov.strip().lower())
+        if target_base is None:
+            return prov, existing_base
+        return "custom", existing_base or target_base
+
+    if provider:
+        provider, base_url = _expand_direct_api_alias(provider, base_url)
+    if cfg_provider:
+        cfg_provider, cfg_base_url = _expand_direct_api_alias(cfg_provider, cfg_base_url)
+
    if base_url:
        return "custom", resolved_model, base_url, api_key, resolved_api_mode
    if provider:
@ -4344,7 +4428,17 @@ _DEFAULT_AUX_TIMEOUT = 30.0


 def _get_auxiliary_task_config(task: str) -> Dict[str, Any]:
-    """Return the config dict for auxiliary.<task>, or {} when unavailable."""
+    """Return the config dict for auxiliary.<task>, or {} when unavailable.
+
+    For plugin-registered auxiliary tasks (see
+    :meth:`hermes_cli.plugins.PluginContext.register_auxiliary_task`) the
+    plugin's declared *defaults* are layered underneath the user's config
+    so an unconfigured plugin task still works:
+
+        plugin defaults  ←  config.yaml auxiliary.<task>  (user wins)
+
+    Built-in tasks ignore this path (their defaults live in DEFAULT_CONFIG).
+    """
    if not task:
        return {}
    try:
@ -4354,7 +4448,27 @@ def _get_auxiliary_task_config(task: str) -> Dict[str, Any]:
        return {}
    aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
    task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
-    return task_config if isinstance(task_config, dict) else {}
+    if not isinstance(task_config, dict):
+        task_config = {}
+
+    # Layer plugin-declared defaults underneath user config so
+    # ctx.register_auxiliary_task(defaults={...}) takes effect without
+    # forcing the user to write config.yaml entries.
+    try:
+        from hermes_cli.plugins import get_plugin_auxiliary_tasks
+        for _entry in get_plugin_auxiliary_tasks():
+            if _entry.get("key") == task:
+                _defaults = _entry.get("defaults") or {}
+                if isinstance(_defaults, dict):
+                    merged = dict(_defaults)
+                    merged.update(task_config)
+                    return merged
+                break
+    except Exception:
+        # Plugin discovery failure must not break aux task config reads.
+        pass
+
+    return task_config


 def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float:
--- a/agent/background_review.py
+++ b/agent/background_review.py
@ -115,7 +115,10 @@ _SKILL_REVIEW_PROMPT = (
    "Protected skills (DO NOT edit these):\n"
    "  • Bundled skills (shipped with Hermes, e.g. 'hermes-agent').\n"
    "  • Hub-installed skills (installed via 'hermes skills install').\n"
-    "  • Pinned skills (marked via 'hermes curator pin').\n"
+    "Pinned skills (marked via 'hermes curator pin') CAN be improved — "
+    "pin only blocks deletion/archive/consolidation by the curator, not "
+    "content updates. Patch them when a pitfall or missing step turns up, "
+    "same as any other agent-created skill.\n"
    "If the only skills that need updating are protected, say\n"
    "'Nothing to save.' and stop.\n\n"
    "Do NOT capture (these become persistent self-imposed constraints "
@ -198,7 +201,10 @@ _COMBINED_REVIEW_PROMPT = (
    "Protected skills (DO NOT edit these):\n"
    "  • Bundled skills (shipped with Hermes, e.g. 'hermes-agent').\n"
    "  • Hub-installed skills (installed via 'hermes skills install').\n"
-    "  • Pinned skills (marked via 'hermes curator pin').\n"
+    "Pinned skills (marked via 'hermes curator pin') CAN be improved — "
+    "pin only blocks deletion/archive/consolidation by the curator, not "
+    "content updates. Patch them when a pitfall or missing step turns up, "
+    "same as any other agent-created skill.\n"
    "If the only skills that need updating are protected, say\n"
    "'Nothing to save.' and stop.\n\n"
    "Do NOT capture as skills (these become persistent self-imposed "
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@ -91,23 +91,55 @@ def interruptible_api_call(agent, api_kwargs: dict):
    provider fallback.
    """
    result = {"response": None, "error": None}
-    request_client_holder = {"client": None}
+    request_client_holder = {"client": None, "owner_tid": None}
    request_client_lock = threading.Lock()

    def _set_request_client(client):
        with request_client_lock:
            request_client_holder["client"] = client
+            # #29507: stamp the owning thread so a stranger-thread interrupt
+            # only shuts the connection down rather than racing the worker
+            # for FD ownership during ``client.close()``.
+            request_client_holder["owner_tid"] = threading.get_ident()
        return client

    def _take_request_client():
        with request_client_lock:
            client = request_client_holder.get("client")
            request_client_holder["client"] = None
+            request_client_holder["owner_tid"] = None
            return client

    def _close_request_client_once(reason: str) -> None:
-        request_client = _take_request_client()
-        if request_client is not None:
+        # #29507: dispatch on the calling thread.
+        #
+        # When ``_call`` (the worker) reaches its ``finally`` it owns the
+        # close and we pop + fully close as before. When a *stranger* thread
+        # (the interrupt-check loop, the stale-call detector) drives the
+        # close, only shut the sockets down so the worker's blocked
+        # ``recv``/``send`` unwinds with an ``EPIPE`` / EOF — and let the
+        # worker close ``client`` from its own thread on its way out. That
+        # avoids the FD-recycling race where the kernel reassigned a
+        # just-closed TLS socket FD to ``kanban.db``, and the still-live SSL
+        # BIO on the worker thread then wrote a 24-byte TLS application-data
+        # record into the SQLite header (#29507).
+        with request_client_lock:
+            request_client = request_client_holder.get("client")
+            owner_tid = request_client_holder.get("owner_tid")
+            stranger_thread = (
+                request_client is not None
+                and owner_tid is not None
+                and owner_tid != threading.get_ident()
+            )
+            if not stranger_thread:
+                # Owning thread (or no recorded owner) → pop and fully close.
+                request_client_holder["client"] = None
+                request_client_holder["owner_tid"] = None
+        if request_client is None:
+            return
+        if stranger_thread:
+            agent._abort_request_openai_client(request_client, reason=reason)
+        else:
            agent._close_request_openai_client(request_client, reason=reason)

    def _call():
@ -725,7 +757,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
    current_base_url = str(getattr(agent, "base_url", "") or "").rstrip("/").lower()
    fb_base_url_for_dedup = (fb.get("base_url") or "").strip().rstrip("/").lower()
    if fb_provider == current_provider and fb_model == current_model:
-        logging.warning(
+        logger.warning(
            "Fallback skip: chain entry %s/%s matches current provider/model",
            fb_provider, fb_model,
        )
@ -736,7 +768,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
        and fb_base_url_for_dedup == current_base_url
        and fb_model == current_model
    ):
-        logging.warning(
+        logger.warning(
            "Fallback skip: chain entry base_url %s matches current backend",
            fb_base_url_for_dedup,
        )
@ -768,7 +800,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
            explicit_base_url=fb_base_url_hint,
            explicit_api_key=fb_api_key_hint)
        if fb_client is None:
-            logging.warning(
+            logger.warning(
                "Fallback to %s failed: provider not configured",
                fb_provider)
            return agent._try_activate_fallback()  # try next in chain
@ -776,8 +808,11 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
            from hermes_cli.model_normalize import normalize_model_for_provider

            fb_model = normalize_model_for_provider(fb_model, fb_provider)
-        except Exception:
-            pass
+        except Exception as _norm_err:
+            logger.warning(
+                "Could not normalize fallback model %r for provider %r: %s",
+                fb_model, fb_provider, _norm_err,
+            )

        # Determine api_mode from provider / base URL / model
        fb_api_mode = "chat_completions"
@ -905,19 +940,20 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
                base_url=agent.base_url,
                api_key=getattr(agent, "api_key", ""),  # callable preserved → call_llm
                provider=agent.provider,
+                api_mode=agent.api_mode,
            )

        agent._emit_status(
            f"🔄 Primary model failed — switching to fallback: "
            f"{fb_model} via {fb_provider}"
        )
-        logging.info(
+        logger.info(
            "Fallback activated: %s → %s (%s)",
            old_model, fb_model, fb_provider,
        )
        return True
    except Exception as e:
-        logging.error("Failed to activate fallback %s: %s", fb_model, e)
+        logger.error("Failed to activate fallback %s: %s", fb_model, e)
        return agent._try_activate_fallback()  # try next in chain


@ -1133,7 +1169,7 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
                final_response = "I reached the iteration limit and couldn't generate a summary."

    except Exception as e:
-        logging.warning(f"Failed to get summary response: {e}")
+        logger.warning(f"Failed to get summary response: {e}")
        final_response = f"I reached the maximum iterations ({agent.max_iterations}) but couldn't summarize. Error: {str(e)}"

    return final_response
@ -1162,12 +1198,12 @@ def cleanup_task_resources(agent, task_id: str) -> None:
            _ra().cleanup_vm(task_id)
    except Exception as e:
        if agent.verbose_logging:
-            logging.warning(f"Failed to cleanup VM for task {task_id}: {e}")
+            logger.warning(f"Failed to cleanup VM for task {task_id}: {e}")
    try:
        _ra().cleanup_browser(task_id)
    except Exception as e:
        if agent.verbose_logging:
-            logging.warning(f"Failed to cleanup browser for task {task_id}: {e}")
+            logger.warning(f"Failed to cleanup browser for task {task_id}: {e}")



@ -1271,23 +1307,44 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
        return result["response"]

    result = {"response": None, "error": None, "partial_tool_names": []}
-    request_client_holder = {"client": None, "diag": None}
+    request_client_holder = {"client": None, "diag": None, "owner_tid": None}
    request_client_lock = threading.Lock()

    def _set_request_client(client):
        with request_client_lock:
            request_client_holder["client"] = client
+            # See #29507 explanation in the non-streaming variant above.
+            request_client_holder["owner_tid"] = threading.get_ident()
        return client

    def _take_request_client():
        with request_client_lock:
            client = request_client_holder.get("client")
            request_client_holder["client"] = None
+            request_client_holder["owner_tid"] = None
            return client

    def _close_request_client_once(reason: str) -> None:
-        request_client = _take_request_client()
-        if request_client is not None:
+        # See #29507 explanation in the non-streaming variant above. A
+        # stranger thread (the interrupt-check / stale-stream detector loop)
+        # only aborts sockets — never pops, never calls ``client.close()`` —
+        # so the worker thread retains ownership of the FD release.
+        with request_client_lock:
+            request_client = request_client_holder.get("client")
+            owner_tid = request_client_holder.get("owner_tid")
+            stranger_thread = (
+                request_client is not None
+                and owner_tid is not None
+                and owner_tid != threading.get_ident()
+            )
+            if not stranger_thread:
+                request_client_holder["client"] = None
+                request_client_holder["owner_tid"] = None
+        if request_client is None:
+            return
+        if stranger_thread:
+            agent._abort_request_openai_client(request_client, reason=reason)
+        else:
            agent._close_request_openai_client(request_client, reason=reason)

    first_delta_fired = {"done": False}
@ -2020,8 +2077,21 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
            # Streaming failed AFTER some tokens were already delivered to
            # the platform.  Re-raising would let the outer retry loop make
            # a new API call, creating a duplicate message.  Return a
-            # partial "stop" response instead so the outer loop treats this
-            # turn as complete (no retry, no fallback).
+            # partial response stub instead and let the outer loop decide:
+            #
+            #   - text-only partials → finish_reason="length" so the
+            #     conversation loop persists the partial assistant content
+            #     and asks the model to continue from where the stream
+            #     died (issue #30963: partial stop misclassified as a
+            #     clean completion was exiting the loop with budget
+            #     remaining and an unfinished goal).
+            #
+            #   - partial mid-tool-call → finish_reason="stop" stays.
+            #     The user-visible warning we append says "Ask me to
+            #     retry if you want to continue", so the agent should
+            #     hand control back rather than auto-retry a tool call
+            #     that may have side-effects.
+            #
            # Recover whatever content was already streamed to the user.
            # _current_streamed_assistant_text accumulates text fired
            # through _fire_stream_delta, so it has exactly what the
@ -2059,14 +2129,17 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                    "of text; surfaced warning to user: %s",
                    _partial_names, len(_partial_text or ""), result["error"],
                )
+                _stub_finish_reason = "stop"
            else:
                logger.warning(
-                    "Partial stream delivered before error; returning stub "
-                    "response with %s chars of recovered content to prevent "
-                    "duplicate messages: %s",
+                    "Partial stream delivered before error; returning "
+                    "length-truncated stub with %s chars of recovered "
+                    "content so the loop can continue from where the "
+                    "stream died: %s",
                    len(_partial_text or ""),
                    result["error"],
                )
+                _stub_finish_reason = "length"
            _stub_msg = SimpleNamespace(
                role="assistant", content=_partial_text, tool_calls=None,
                reasoning_content=None,
@ -2075,7 +2148,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                id="partial-stream-stub",
                model=getattr(agent, "model", "unknown"),
                choices=[SimpleNamespace(
-                    index=0, message=_stub_msg, finish_reason="stop",
+                    index=0, message=_stub_msg, finish_reason=_stub_finish_reason,
                )],
                usage=None,
            )
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -609,6 +609,7 @@ class ContextCompressor(ContextEngine):
        """Update tracked token usage from API response."""
        self.last_prompt_tokens = usage.get("prompt_tokens", 0)
        self.last_completion_tokens = usage.get("completion_tokens", 0)
+        self.last_total_tokens = usage.get("total_tokens", self.last_prompt_tokens + self.last_completion_tokens)

    def should_compress(self, prompt_tokens: int = None) -> bool:
        """Check if context exceeds the compression threshold.
@ -897,7 +898,7 @@ class ContextCompressor(ContextEngine):
        into the warning log.
        """
        self._summary_model_fallen_back = True
-        logging.warning(
+        logger.warning(
            "Summary model '%s' %s (%s). "
            "Falling back to main model '%s' for compression.",
            self.summary_model, reason, e, self.model,
@ -1086,7 +1087,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # No provider configured — long cooldown, unlikely to self-resolve
            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
            self._last_summary_error = "no auxiliary LLM provider configured"
-            logging.warning("Context compression: no provider available for "
+            logger.warning("Context compression: no provider available for "
                            "summary. Middle turns will be dropped without summary "
                            "for %d seconds.",
                            _SUMMARY_FAILURE_COOLDOWN_SECONDS)
@ -1182,7 +1183,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            if len(err_text) > 220:
                err_text = err_text[:217].rstrip() + "..."
            self._last_summary_error = err_text
-            logging.warning(
+            logger.warning(
                "Failed to generate context summary: %s. "
                "Further summary attempts paused for %d seconds.",
                e,
--- a/agent/context_engine.py
+++ b/agent/context_engine.py
@ -200,6 +200,7 @@ class ContextEngine(ABC):
        base_url: str = "",
        api_key: str = "",
        provider: str = "",
+        api_mode: str = "",
    ) -> None:
        """Called when the user switches models or on fallback activation.

--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@ -381,12 +381,12 @@ def compress_context(
            agent._session_db.end_session(agent.session_id, "compression")
            old_session_id = agent.session_id
            agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
-            os.environ["HERMES_SESSION_ID"] = agent.session_id
            try:
-                from gateway.session_context import _SESSION_ID
-                _SESSION_ID.set(agent.session_id)
+                from gateway.session_context import set_current_session_id
+
+                set_current_session_id(agent.session_id)
            except Exception:
-                pass
+                os.environ["HERMES_SESSION_ID"] = agent.session_id
            agent._session_db_created = False
            agent._session_db.create_session(
                session_id=agent.session_id,
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@ -46,6 +46,7 @@ from agent.message_sanitization import (
    _strip_non_ascii,
 )
 from agent.model_metadata import (
+    MINIMUM_CONTEXT_LENGTH,
    estimate_messages_tokens_rough,
    estimate_request_tokens_rough,
    get_next_probe_tier,
@ -73,6 +74,50 @@ from utils import base_url_host_matches, env_var_enabled
 logger = logging.getLogger(__name__)


+def _ollama_context_limit_error(agent: Any, request_tokens: int) -> Optional[str]:
+    """Return a user-facing error when Ollama is loaded with too little context."""
+    if not getattr(agent, "tools", None):
+        return None
+
+    runtime_ctx = getattr(agent, "_ollama_num_ctx", None)
+    if not isinstance(runtime_ctx, int) or runtime_ctx <= 0:
+        return None
+    if runtime_ctx >= MINIMUM_CONTEXT_LENGTH:
+        return None
+
+    model = getattr(agent, "model", "") or "the selected model"
+    base_url = getattr(agent, "base_url", "") or "unknown base URL"
+    provider = getattr(agent, "provider", "") or "unknown"
+    tool_count = len(getattr(agent, "tools", None) or [])
+
+    logger.warning(
+        "Ollama runtime context too small for Hermes tool use: "
+        "model=%s provider=%s base_url=%s runtime_context=%d "
+        "minimum_context=%d estimated_request_tokens=%d tool_count=%d "
+        "session=%s",
+        model,
+        provider,
+        base_url,
+        runtime_ctx,
+        MINIMUM_CONTEXT_LENGTH,
+        request_tokens,
+        tool_count,
+        getattr(agent, "session_id", None) or "none",
+    )
+
+    return (
+        f"Ollama loaded `{model}` with only {runtime_ctx:,} tokens of runtime "
+        f"context, but Hermes needs at least {MINIMUM_CONTEXT_LENGTH:,} tokens "
+        "for reliable tool use.\n\n"
+        "Increase the Ollama context for this model and restart/reload the "
+        "model before trying again. A known-good starting point is 65,536 "
+        "tokens. In Hermes config, set `model.ollama_num_ctx: 65536` "
+        "(and `model.context_length: 65536` if you also override the displayed "
+        "model context). If you manage the model through an Ollama Modelfile, "
+        "set `PARAMETER num_ctx 65536` there instead."
+    )
+
+
 def _ra():
    """Lazy reference to ``run_agent`` so callers can patch
    ``run_agent.handle_function_call`` / ``run_agent._set_interrupt`` /
@ -527,6 +572,7 @@ def run_conversation(
    api_call_count = 0
    final_response = None
    interrupted = False
+    failed = False
    codex_ack_continuations = 0
    length_continue_retries = 0
    truncated_tool_call_retries = 0
@ -883,6 +929,26 @@ def run_conversation(
        # Calculate approximate request size for logging
        total_chars = sum(len(str(msg)) for msg in api_messages)
        approx_tokens = estimate_messages_tokens_rough(api_messages)
+        approx_request_tokens = estimate_request_tokens_rough(
+            api_messages, tools=agent.tools or None
+        )
+
+        _runtime_context_error = _ollama_context_limit_error(
+            agent, approx_request_tokens
+        )
+        if _runtime_context_error:
+            final_response = _runtime_context_error
+            failed = True
+            _turn_exit_reason = "ollama_runtime_context_too_small"
+            messages.append({"role": "assistant", "content": final_response})
+            agent._emit_status("❌ Ollama runtime context is too small for Hermes tool use")
+            api_call_count -= 1
+            agent._api_call_count = api_call_count
+            try:
+                agent.iteration_budget.refund()
+            except Exception:
+                pass
+            break
        
        # Thinking spinner for quiet mode (animated during API call)
        thinking_spinner = None
@ -923,6 +989,7 @@ def run_conversation(
        copilot_auth_retry_attempted=False
        thinking_sig_retry_attempted = False
        image_shrink_retry_attempted = False
+        multimodal_tool_content_retry_attempted = False
        oauth_1m_beta_retry_attempted = False
        llama_cpp_grammar_retry_attempted = False
        has_retried_429 = False
@ -1116,7 +1183,7 @@ def run_conversation(
                                    else str(_codex_error_obj) if _codex_error_obj
                                    else f"Responses API returned status '{_codex_resp_status}'"
                                )
-                                logging.warning(
+                                logger.warning(
                                    "Codex response status='%s' (error=%s). Routing to fallback. %s",
                                    _codex_resp_status, _codex_error_msg,
                                    agent._client_log_context(),
@ -1268,7 +1335,7 @@ def run_conversation(
                            primary_recovery_attempted = False
                            continue
                        agent._emit_status(f"❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
-                        logging.error(f"{agent.log_prefix}Invalid API response after {max_retries} retries.")
+                        logger.error(f"{agent.log_prefix}Invalid API response after {max_retries} retries.")
                        agent._persist_session(messages, conversation_history)
                        return {
                            "messages": messages,
@ -1281,7 +1348,7 @@ def run_conversation(
                    # Backoff before retry — jittered exponential: 5s base, 120s cap
                    wait_time = jittered_backoff(retry_count, base_delay=5.0, max_delay=120.0)
                    agent._vprint(f"{agent.log_prefix}⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...", force=True)
-                    logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
+                    logger.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
                    
                    # Sleep in small increments to stay responsive to interrupts
                    sleep_end = time.time() + wait_time
@ -1347,7 +1414,18 @@ def run_conversation(
                        finish_reason = "length"

                if finish_reason == "length":
-                    agent._vprint(f"{agent.log_prefix}⚠️  Response truncated (finish_reason='length') - model hit max output tokens", force=True)
+                    if getattr(response, "id", "") == "partial-stream-stub":
+                        agent._vprint(
+                            f"{agent.log_prefix}⚠️  Stream interrupted by network error "
+                            f"(finish_reason='length' on partial-stream-stub)",
+                            force=True,
+                        )
+                    else:
+                        agent._vprint(
+                            f"{agent.log_prefix}⚠️  Response truncated "
+                            f"(finish_reason='length') - model hit max output tokens",
+                            force=True,
+                        )

                    # Normalize the truncated response to a single OpenAI-style
                    # message shape so text-continuation and tool-call retry
@ -1440,17 +1518,40 @@ def run_conversation(
                                truncated_response_parts.append(assistant_message.content)

                            if length_continue_retries < 3:
-                                agent._vprint(
-                                    f"{agent.log_prefix}↻ Requesting continuation "
-                                    f"({length_continue_retries}/3)..."
+                                # Distinguish a real output-token truncation
+                                # from a partial-stream-stub network error
+                                # (#30963).  Same continuation machinery,
+                                # but the prompt has to tell the truth or
+                                # the model goes off rails ("I wasn't
+                                # truncated, I'm done").
+                                _is_partial_stream_stub = (
+                                    getattr(response, "id", "") == "partial-stream-stub"
                                )
-                                continue_msg = {
-                                    "role": "user",
-                                    "content": (
+                                if _is_partial_stream_stub:
+                                    agent._vprint(
+                                        f"{agent.log_prefix}↻ Stream interrupted — "
+                                        f"requesting continuation "
+                                        f"({length_continue_retries}/3)..."
+                                    )
+                                    _continue_content = (
+                                        "[System: The previous response was cut off by a "
+                                        "network error mid-stream. Continue exactly where "
+                                        "you left off. Do not restart or repeat prior text. "
+                                        "Finish the answer directly.]"
+                                    )
+                                else:
+                                    agent._vprint(
+                                        f"{agent.log_prefix}↻ Requesting continuation "
+                                        f"({length_continue_retries}/3)..."
+                                    )
+                                    _continue_content = (
                                        "[System: Your previous response was truncated by the output "
                                        "length limit. Continue exactly where you left off. Do not "
                                        "restart or repeat prior text. Finish the answer directly.]"
-                                    ),
+                                    )
+                                continue_msg = {
+                                    "role": "user",
+                                    "content": _continue_content,
                                }
                                messages.append(continue_msg)
                                agent._session_messages = messages
@ -1994,6 +2095,31 @@ def run_conversation(
                            "or shrink didn't reduce size; surfacing original error."
                        )

+                # Multimodal-tool-content recovery: providers that follow
+                # the OpenAI spec strictly (tool message content must be a
+                # string) reject our list-type content with a 400.  Strip
+                # image parts from any list-type tool messages, mark the
+                # (provider, model) as no-list-tool-content for the rest
+                # of this session so future tool results preemptively
+                # downgrade, and retry once.  See issue #27344.
+                if (
+                    classified.reason == FailoverReason.multimodal_tool_content_unsupported
+                    and not multimodal_tool_content_retry_attempted
+                ):
+                    multimodal_tool_content_retry_attempted = True
+                    if agent._try_strip_image_parts_from_tool_messages(api_messages):
+                        agent._vprint(
+                            f"{agent.log_prefix}📐 Provider rejected list-type tool content — "
+                            f"downgraded screenshots to text and retrying...",
+                            force=True,
+                        )
+                        continue
+                    else:
+                        logger.info(
+                            "multimodal-tool-content recovery: no list-type tool "
+                            "messages with image parts found; surfacing original error."
+                        )
+
                # Anthropic OAuth subscription rejected the 1M-context beta
                # header ("long context beta is not yet available for this
                # subscription"). Disable the beta for the rest of this
@ -2133,7 +2259,7 @@ def run_conversation(
                        f"stripped all thinking blocks, retrying...",
                        force=True,
                    )
-                    logging.warning(
+                    logger.warning(
                        "%sThinking block signature recovery: stripped "
                        "reasoning_details from %d messages",
                        agent.log_prefix, len(messages),
@ -2158,7 +2284,7 @@ def run_conversation(
                        from tools.schema_sanitizer import strip_pattern_and_format
                        _, _stripped = strip_pattern_and_format(agent.tools)
                    except Exception as _strip_exc:  # pragma: no cover — defensive
-                        logging.warning(
+                        logger.warning(
                            "%sllama.cpp grammar recovery: strip helper failed: %s",
                            agent.log_prefix, _strip_exc,
                        )
@ -2169,7 +2295,7 @@ def run_conversation(
                            f"stripped {_stripped} pattern/format keyword(s), retrying...",
                            force=True,
                        )
-                        logging.warning(
+                        logger.warning(
                            "%sllama.cpp grammar recovery: stripped %d "
                            "pattern/format keyword(s) from tool schemas",
                            agent.log_prefix, _stripped,
@ -2177,7 +2303,7 @@ def run_conversation(
                        continue
                    # No keywords found to strip — fall through to normal
                    # retry path rather than loop forever on the same error.
-                    logging.warning(
+                    logger.warning(
                        "%sllama.cpp grammar error but no pattern/format "
                        "keywords to strip — falling through to normal retry",
                        agent.log_prefix,
@ -2278,6 +2404,7 @@ def run_conversation(
                            base_url=agent.base_url,
                            api_key=getattr(agent, "api_key", ""),
                            provider=agent.provider,
+                            api_mode=agent.api_mode,
                        )
                        # Context probing flags — only set on built-in
                        # compressor (plugin engines manage their own).
@ -2391,7 +2518,7 @@ def run_conversation(
                                error_context=error_context,
                            )
                        else:
-                            logging.info(
+                            logger.info(
                                "Nous 429 looks like upstream capacity "
                                "(no exhausted bucket in headers or "
                                "last-known state) -- not tripping "
@ -2451,7 +2578,7 @@ def run_conversation(
                    if compression_attempts > max_compression_attempts:
                        agent._vprint(f"{agent.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True)
                        agent._vprint(f"{agent.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
-                        logging.error(f"{agent.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
+                        logger.error(f"{agent.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
                        agent._persist_session(messages, conversation_history)
                        return {
                            "messages": messages,
@ -2482,7 +2609,7 @@ def run_conversation(
                    else:
                        agent._vprint(f"{agent.log_prefix}❌ Payload too large and cannot compress further.", force=True)
                        agent._vprint(f"{agent.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
-                        logging.error(f"{agent.log_prefix}413 payload too large. Cannot compress further.")
+                        logger.error(f"{agent.log_prefix}413 payload too large. Cannot compress further.")
                        agent._persist_session(messages, conversation_history)
                        return {
                            "messages": messages,
@ -2535,7 +2662,7 @@ def run_conversation(
                        if compression_attempts > max_compression_attempts:
                            agent._vprint(f"{agent.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
                            agent._vprint(f"{agent.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
-                            logging.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
+                            logger.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
                            agent._persist_session(messages, conversation_history)
                            return {
                                "messages": messages,
@ -2587,6 +2714,7 @@ def run_conversation(
                            base_url=agent.base_url,
                            api_key=getattr(agent, "api_key", ""),
                            provider=agent.provider,
+                            api_mode=agent.api_mode,
                        )
                        # Context probing flags — only set on built-in
                        # compressor (plugin engines manage their own).
@ -2608,7 +2736,7 @@ def run_conversation(
                    if compression_attempts > max_compression_attempts:
                        agent._vprint(f"{agent.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
                        agent._vprint(f"{agent.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
-                        logging.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
+                        logger.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
                        agent._persist_session(messages, conversation_history)
                        return {
                            "messages": messages,
@ -2641,7 +2769,7 @@ def run_conversation(
                        # Can't compress further and already at minimum tier
                        agent._vprint(f"{agent.log_prefix}❌ Context length exceeded and cannot compress further.", force=True)
                        agent._vprint(f"{agent.log_prefix}   💡 The conversation has accumulated too much content. Try /new to start fresh, or /compress to manually trigger compression.", force=True)
-                        logging.error(f"{agent.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
+                        logger.error(f"{agent.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
                        agent._persist_session(messages, conversation_history)
                        return {
                            "messages": messages,
@ -2678,6 +2806,21 @@ def run_conversation(
                    # retryable=True mapping takes effect instead.
                    and not isinstance(api_error, ssl.SSLError)
                )
+                # ``FailoverReason.billing`` (HTTP 402) is NOT in this
+                # exclusion set.  By the time we reach this block:
+                #   • credential-pool rotation (line ~2031) has already
+                #     fired for billing and either ``continue``d or
+                #     returned (False, ...) — pool is exhausted or absent.
+                #   • the eager-fallback branch above (line ~2422) also
+                #     fires on billing and ``continue``s if a fallback
+                #     provider is configured.
+                # Falling through to here means BOTH recovery paths
+                # gave up.  Treating 402 as retryable from this point
+                # just burns more paid requests against a depleted
+                # balance with no recovery mechanism left — see #31273
+                # (real-world: ~$40 in 48h on a 24/7 gateway).  Aborting
+                # mirrors how 401/403 (also ``should_fallback=True``)
+                # already behave once their recovery paths have failed.
                is_client_error = (
                    is_local_validation_error
                    or (
@ -2685,7 +2828,6 @@ def run_conversation(
                        and not classified.should_compress
                        and classified.reason not in {
                            FailoverReason.rate_limit,
-                            FailoverReason.billing,
                            FailoverReason.overloaded,
                            FailoverReason.context_overflow,
                            FailoverReason.payload_too_large,
@ -2734,7 +2876,7 @@ def run_conversation(
                                agent._vprint(f"{agent.log_prefix}      • Check credits: https://openrouter.ai/settings/credits", force=True)
                    else:
                        agent._vprint(f"{agent.log_prefix}   💡 This type of error won't be fixed by retrying.", force=True)
-                    logging.error(f"{agent.log_prefix}Non-retryable client error: {api_error}")
+                    logger.error(f"{agent.log_prefix}Non-retryable client error: {api_error}")
                    # Skip session persistence when the error is likely
                    # context-overflow related (status 400 + large session).
                    # Persisting the failed user message would make the
@ -2811,7 +2953,7 @@ def run_conversation(
                            force=True,
                        )

-                    logging.error(
+                    logger.error(
                        "%sAPI call failed after %s retries. %s | provider=%s model=%s msgs=%s tokens=~%s",
                        agent.log_prefix, max_retries, _final_summary,
                        _provider, _model, len(api_messages), f"{approx_tokens:,}",
@ -3342,6 +3484,19 @@ def run_conversation(
                        f"⚠️ Tool guardrail halted {decision.tool_name}: {decision.code}"
                    )
                    messages.append({"role": "assistant", "content": final_response})
+                    # Emit the halt message to the client so it's not
+                    # indistinguishable from a crash.  The stream display
+                    # was flushed (callback(None)) before tool execution,
+                    # but the callback is still alive — fire the text
+                    # through it so SSE/TUI clients see the explanation.
+                    if final_response:
+                        agent._safe_print(f"\n{final_response}\n")
+                        if agent.stream_delta_callback:
+                            try:
+                                agent.stream_delta_callback(final_response)
+                                agent.stream_delta_callback(None)
+                            except Exception:
+                                pass
                    break

                # Reset per-turn retry counters after successful tool
@ -3848,7 +4003,11 @@ def run_conversation(
                )

    # Determine if conversation completed successfully
-    completed = final_response is not None and api_call_count < agent.max_iterations
+    completed = (
+        final_response is not None
+        and api_call_count < agent.max_iterations
+        and not failed
+    )

    # Save trajectory if enabled.  ``user_message`` may be a multimodal
    # list of parts; the trajectory format wants a plain string.
@ -3933,6 +4092,8 @@ def run_conversation(
        except Exception as _ver_err:
            logger.debug("file-mutation verifier footer failed: %s", _ver_err)

+    _response_transformed = False
+
    # Plugin hook: transform_llm_output
    # Fired once per turn after the tool-calling loop completes.
    # Plugins can transform the LLM's output text before it's returned.
@ -3950,6 +4111,7 @@ def run_conversation(
            for _hook_result in _transform_results:
                if isinstance(_hook_result, str) and _hook_result:
                    final_response = _hook_result
+                    _response_transformed = True
                    break  # First non-empty string wins
        except Exception as exc:
            logger.warning("transform_llm_output hook failed: %s", exc)
@ -3998,8 +4160,10 @@ def run_conversation(
        "api_calls": api_call_count,
        "completed": completed,
        "turn_exit_reason": _turn_exit_reason,
+        "failed": failed,
        "partial": False,  # True only when stopped due to invalid tool calls
        "interrupted": interrupted,
+        "response_transformed": _response_transformed,
        "response_previewed": getattr(agent, "_response_was_previewed", False),
        "model": agent.model,
        "provider": agent.provider,
--- a/agent/display.py
+++ b/agent/display.py
@ -787,33 +787,65 @@ class KawaiiSpinner:
 # Cute tool message (completion line that replaces the spinner)
 # =========================================================================

+_ERROR_SUFFIX_MAX_LEN = 48
+
+
+def _trim_error(msg: str) -> str:
+    """Shrink an error message for inline display in a tool status line.
+
+    Strips overly long absolute paths down to just the filename so the
+    suffix stays readable on narrow terminals.
+    """
+    msg = msg.strip()
+    # Common case: "File not found: /very/long/absolute/path/foo.py"
+    if "File not found:" in msg:
+        _, _, tail = msg.partition("File not found:")
+        tail = tail.strip()
+        if "/" in tail:
+            msg = f"File not found: {tail.rsplit('/', 1)[-1]}"
+    if len(msg) > _ERROR_SUFFIX_MAX_LEN:
+        msg = msg[: _ERROR_SUFFIX_MAX_LEN - 3] + "..."
+    return msg
+
+
 def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
    """Inspect a tool result string for signs of failure.

-    Returns ``(is_failure, suffix)`` where *suffix* is an informational tag
-    like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic
-    failures.  On success, returns ``(False, "")``.
+    Returns ``(is_failure, suffix)`` where *suffix* is a short informational
+    tag like ``" [exit 1]"`` for terminal failures, ``" [full]"`` for memory
+    overflow, or a trimmed error message (``" [File not found: foo.py]"``).
+    On success returns ``(False, "")``.
    """
    if result is None:
        return False, ""
    if file_mutation_result_landed(tool_name, result):
        return False, ""

+    data = safe_json_loads(result)
+
+    # Terminal: non-zero exit code is the canonical failure signal.
    if tool_name == "terminal":
-        data = safe_json_loads(result)
        if isinstance(data, dict):
            exit_code = data.get("exit_code")
            if exit_code is not None and exit_code != 0:
+                err_msg = data.get("error")
+                if err_msg:
+                    return True, f" [{_trim_error(str(err_msg))}]"
                return True, f" [exit {exit_code}]"
        return False, ""

-    # Memory-specific: distinguish "full" from real errors
+    # Memory: distinguish "store full" from real errors.
    if tool_name == "memory":
-        data = safe_json_loads(result)
        if isinstance(data, dict):
            if data.get("success") is False and "exceed the limit" in data.get("error", ""):
                return True, " [full]"

+    # Structured error in JSON result (any tool that surfaces {"error": ...}).
+    if isinstance(data, dict):
+        err = data.get("error") or data.get("message")
+        if err and (data.get("success") is False or "error" in data):
+            return True, f" [{_trim_error(str(err))}]"
+
    # Generic heuristic for non-terminal tools
    # Multimodal tool results (dicts with _multimodal=True) are not strings —
    # treat them as successes since failures would be JSON-encoded strings.
@ -921,11 +953,29 @@ def get_cute_tool_message(
    if tool_name == "todo":
        todos_arg = args.get("todos")
        merge = args.get("merge", False)
+        # Parse result for completion progress
+        total = 0
+        done = 0
+        if result:
+            try:
+                data = safe_json_loads(result)
+                if data:
+                    s = data.get("summary", {})
+                    total = s.get("total", 0)
+                    done = s.get("completed", 0)
+            except Exception:
+                pass
        if todos_arg is None:
+            if total > 0:
+                return _wrap(f"┊ 📋 plan      {done}/{total} task(s)  {dur}")
            return _wrap(f"┊ 📋 plan      reading tasks  {dur}")
        elif merge:
+            if total > 0 and done > 0:
+                return _wrap(f"┊ 📋 plan      update {done}/{total} ✓  {dur}")
            return _wrap(f"┊ 📋 plan      update {len(todos_arg)} task(s)  {dur}")
        else:
+            if total > 0 and done > 0:
+                return _wrap(f"┊ 📋 plan      {done}/{total} task(s)  {dur}")
            return _wrap(f"┊ 📋 plan      {len(todos_arg)} task(s)  {dur}")
    if tool_name == "session_search":
        return _wrap(f"┊ 🔍 recall    \"{_trunc(args.get('query', ''), 35)}\"  {dur}")
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@ -50,6 +50,7 @@ class FailoverReason(enum.Enum):

    # Request format
    format_error = "format_error"        # 400 bad request — abort or strip + retry
+    multimodal_tool_content_unsupported = "multimodal_tool_content_unsupported"  # Provider rejected list-type content in tool messages (e.g. Xiaomi MiMo) — downgrade to text and retry

    # Provider-specific
    thinking_signature = "thinking_signature"  # Anthropic thinking block sig invalid
@ -165,6 +166,32 @@ _IMAGE_TOO_LARGE_PATTERNS = [
    # the likely culprit; we still try the shrink path before giving up.
 ]

+# Providers that follow the OpenAI spec strictly require tool message
+# ``content`` to be a string.  Some (Anthropic native, Codex Responses,
+# Gemini native, first-party OpenAI) extend this to accept a content-parts
+# list (text + image_url) so screenshots from computer_use survive.  Others
+# (Xiaomi MiMo, some Alibaba endpoints, a long tail of OpenAI-compatible
+# providers) reject the list with a 400 — the patterns below are the most
+# common error shapes we see.  Recovery: strip image parts from tool
+# messages in-place, record the (provider, model) for the rest of the
+# session so we don't waste another call learning the same lesson, retry.
+#
+# See: https://github.com/NousResearch/hermes-agent/issues/27344
+_MULTIMODAL_TOOL_CONTENT_PATTERNS = [
+    # Xiaomi MiMo: {"error":{"code":"400","message":"Param Incorrect","param":"text is not set"}}
+    "text is not set",
+    # Generic "tool message must be string" shapes
+    "tool message content must be a string",
+    "tool content must be a string",
+    "tool message must be a string",
+    # OpenAI-compat servers that reject list-type tool content with a
+    # schema-validation message
+    "expected string, got list",
+    "expected string, got array",
+    # Alibaba/DashScope variant
+    "tool_call.content must be string",
+]
+
 # Context overflow patterns
 _CONTEXT_OVERFLOW_PATTERNS = [
    "context length",
@ -213,6 +240,24 @@ _MODEL_NOT_FOUND_PATTERNS = [
    "unsupported model",
 ]

+# Request-validation patterns — the request is malformed and will fail
+# identically on every retry. Some OpenAI-compatible gateways (notably
+# codex.nekos.me) return these as 5xx instead of the standard 4xx, which
+# makes the generic "5xx → retryable server_error" rule misfire: the retry
+# loop hammers the same deterministic rejection 3+ times, then the
+# transport-recovery path resets the counter and does it again, producing
+# a request flood. When a 5xx body carries one of these unambiguous
+# request-validation signals, classify as a non-retryable format_error so
+# the loop fails fast and falls back instead of looping.
+_REQUEST_VALIDATION_PATTERNS = [
+    "unknown parameter",
+    "unsupported parameter",
+    "unrecognized request argument",
+    "invalid_request_error",
+    "unknown_parameter",
+    "unsupported_parameter",
+]
+
 # OpenRouter aggregator policy-block patterns.
 #
 # When a user's OpenRouter account privacy setting (or a per-request
@ -718,6 +763,23 @@ def _classify_by_status(
        )

    if status_code in {500, 502}:
+        # Some OpenAI-compatible gateways return request-validation errors
+        # with a 5xx status (codex.nekos.me returns 502 for unknown/
+        # unsupported parameters). These are deterministic — every retry
+        # gets the identical rejection — so the generic "5xx → retryable
+        # server_error" rule turns one bad request into a retry flood.
+        # Detect the unambiguous request-validation signals (in either the
+        # message text or the structured error code) and fail fast.
+        if (
+            any(p in error_msg for p in _REQUEST_VALIDATION_PATTERNS)
+            or error_code.lower() in {"invalid_request_error", "unknown_parameter",
+                                      "unsupported_parameter"}
+        ):
+            return result_fn(
+                FailoverReason.format_error,
+                retryable=False,
+                should_fallback=True,
+            )
        return result_fn(FailoverReason.server_error, retryable=True)

    if status_code in {503, 529}:
@ -781,6 +843,19 @@ def _classify_400(
 ) -> ClassifiedError:
    """Classify 400 Bad Request — context overflow, format error, or generic."""

+    # Multimodal tool content rejected from 400.  Must be checked BEFORE
+    # image_too_large because the recovery is different (strip image parts
+    # from tool messages, mark the model as no-list-tool-content for the
+    # rest of the session) and BEFORE context_overflow because some of the
+    # patterns ("text is not set") are ambiguous in isolation but become
+    # specific when combined with a 400 on a request known to contain
+    # multimodal tool content.
+    if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
+        return result_fn(
+            FailoverReason.multimodal_tool_content_unsupported,
+            retryable=True,
+        )
+
    # Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
    # Must be checked BEFORE context_overflow because messages can trip both
    # patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
@ -922,6 +997,13 @@ def _classify_by_message(
            should_compress=True,
        )

+    # Multimodal tool content patterns (from message text when no status_code)
+    if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
+        return result_fn(
+            FailoverReason.multimodal_tool_content_unsupported,
+            retryable=True,
+        )
+
    # Image-too-large patterns (from message text when no status_code)
    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
        return result_fn(
--- a/agent/file_safety.py
+++ b/agent/file_safety.py
@ -97,6 +97,43 @@ def is_write_denied(path: str) -> bool:
        if resolved.startswith(prefix):
            return True

+    # Hermes control-plane files: block both the ACTIVE profile's view
+    # (hermes_home) AND the global root view. Without the root pass, a
+    # profile-mode session leaves <root>/auth.json + <root>/config.yaml
+    # writable — letting a prompt-injected write_file overwrite the global
+    # files that every profile inherits from (same shape as #15981).
+    control_file_names = ("auth.json", "config.yaml", "webhook_subscriptions.json")
+    mcp_tokens_dir_name = "mcp-tokens"
+
+    hermes_dirs = []
+    for base in (_hermes_home_path(), _hermes_root_path()):
+        try:
+            real = os.path.realpath(base)
+            if real not in hermes_dirs:
+                hermes_dirs.append(real)
+        except Exception:
+            continue
+
+    for base_real in hermes_dirs:
+        for name in control_file_names:
+            try:
+                if resolved == os.path.realpath(os.path.join(base_real, name)):
+                    return True
+            except Exception:
+                continue
+        try:
+            mcp_real = os.path.realpath(os.path.join(base_real, mcp_tokens_dir_name))
+            if resolved == mcp_real or resolved.startswith(mcp_real + os.sep):
+                return True
+        except Exception:
+            pass
+        try:
+            pairing_real = os.path.realpath(os.path.join(base_real, "pairing"))
+            if resolved == pairing_real or resolved.startswith(pairing_real + os.sep):
+                return True
+        except Exception:
+            pass
+
    safe_root = get_safe_write_root()
    if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
        return True
@ -105,21 +142,266 @@ def is_write_denied(path: str) -> bool:


 def get_read_block_error(path: str) -> Optional[str]:
-    """Return an error message when a read targets internal Hermes cache files."""
+    """Return an error message when a read targets a denied Hermes path.
+
+    Two categories are blocked:
+
+      * Internal Hermes cache files under ``HERMES_HOME/skills/.hub`` —
+        readable metadata that an attacker could use as a prompt-injection
+        carrier.
+      * Credential / secret stores under HERMES_HOME and the global Hermes
+        root: ``auth.json``, ``auth.lock``, ``.anthropic_oauth.json``,
+        ``.env``, ``webhook_subscriptions.json``, and anything under
+        ``mcp-tokens/``. These hold plaintext provider keys, OAuth tokens,
+        and HMAC secrets that the agent never needs to read directly —
+        provider tools / gateway adapters consume them through internal
+        channels.
+
+    **This is NOT a security boundary.** The terminal tool runs as the
+    same OS user with shell access; the agent can still ``cat auth.json``
+    or ``cat ~/.hermes/.env`` and exfiltrate the file. The read-deny exists
+    as defense-in-depth that:
+
+      * Returns a clear error to models that respect tool denials, which
+        empirically prompts most modern models to stop rather than reach
+        for the shell.
+      * Surfaces a visible audit trail when something tries to read
+        credentials — easier to spot in logs than a generic ``cat``.
+
+    Treat any user-visible framing around this as "may help" rather than
+    "stops attackers." A determined model or malicious instruction can
+    always shell out.
+
+    Callers that resolve relative paths against a non-process cwd
+    (e.g. ``TERMINAL_CWD`` in ``tools/file_tools.py``) MUST pre-resolve
+    and pass the absolute path string.  This function's own ``resolve()``
+    is anchored at the Python process cwd, so a relative input like
+    ``"auth.json"`` would otherwise miss the denylist when the task's
+    terminal cwd differs from the process cwd.
+    """
    resolved = Path(path).expanduser().resolve()
-    hermes_home = _hermes_home_path().resolve()
-    blocked_dirs = [
-        hermes_home / "skills" / ".hub" / "index-cache",
-        hermes_home / "skills" / ".hub",
-    ]
-    for blocked in blocked_dirs:
+
+    # Resolve BOTH the active HERMES_HOME (profile-aware) AND the global
+    # Hermes root so credential stores at <root>/auth.json etc. are also
+    # blocked when running under a profile (HERMES_HOME points at
+    # <root>/profiles/<name> in profile mode). Same shape as the write
+    # deny widening (#15981, #14157).
+    hermes_dirs: list[Path] = []
+    for base in (_hermes_home_path(), _hermes_root_path()):
        try:
-            resolved.relative_to(blocked)
+            real = base.resolve()
+            if real not in hermes_dirs:
+                hermes_dirs.append(real)
+        except Exception:
+            continue
+
+    # Skills .hub: prompt-injection carriers.
+    for hd in hermes_dirs:
+        blocked_dirs = [
+            hd / "skills" / ".hub" / "index-cache",
+            hd / "skills" / ".hub",
+        ]
+        for blocked in blocked_dirs:
+            try:
+                resolved.relative_to(blocked)
+            except ValueError:
+                continue
+            return (
+                f"Access denied: {path} is an internal Hermes cache file "
+                "and cannot be read directly to prevent prompt injection. "
+                "Use the skills_list or skill_view tools instead."
+            )
+
+    # Credential / secret stores. Exact-file matches under either
+    # HERMES_HOME or <root>.
+    credential_file_names = (
+        "auth.json",
+        "auth.lock",
+        ".anthropic_oauth.json",
+        ".env",
+        "webhook_subscriptions.json",
+    )
+    for hd in hermes_dirs:
+        for name in credential_file_names:
+            try:
+                blocked = (hd / name).resolve()
+            except Exception:
+                continue
+            if resolved == blocked:
+                return (
+                    f"Access denied: {path} is a Hermes credential store "
+                    "and cannot be read directly. Provider tools consume "
+                    "these credentials through internal channels. "
+                    "(Defense-in-depth — not a security boundary; the "
+                    "terminal tool can still bypass.)"
+                )
+
+    # mcp-tokens/: directory prefix match — anything inside is OAuth
+    # token material.
+    for hd in hermes_dirs:
+        try:
+            mcp_tokens = (hd / "mcp-tokens").resolve()
+        except Exception:
+            continue
+        if resolved == mcp_tokens:
+            return (
+                f"Access denied: {path} is the Hermes MCP token directory "
+                "and cannot be read directly. (Defense-in-depth — not a "
+                "security boundary; the terminal tool can still bypass.)"
+            )
+        try:
+            resolved.relative_to(mcp_tokens)
        except ValueError:
            continue
        return (
-            f"Access denied: {path} is an internal Hermes cache file "
-            "and cannot be read directly to prevent prompt injection. "
-            "Use the skills_list or skill_view tools instead."
+            f"Access denied: {path} is a Hermes MCP token file "
+            "and cannot be read directly. (Defense-in-depth — not a "
+            "security boundary; the terminal tool can still bypass.)"
        )
+
    return None
+
+
+# ---------------------------------------------------------------------------
+# Cross-profile write guard (#TBD)
+#
+# Hermes profiles are separate HERMES_HOME dirs under
+# ``<root>/profiles/<name>/``. Each profile has its own skills/, plugins/,
+# cron/, memories/. When an agent runs under one profile, writing into
+# ANOTHER profile's directories is almost always wrong — those skills /
+# plugins / cron jobs / memories affect a different session the user runs
+# from a different shell.
+#
+# Soft guard, NOT a security boundary: the agent runs as the same OS user
+# and has unrestricted terminal access, so this returns a warning the model
+# can choose to honor or override with ``cross_profile=True``. Same shape
+# as the dangerous-command approval flow — the agent is told the boundary
+# exists, and explicit user direction is required to cross it.
+#
+# Reference: May 2026 incident where a hermes-security profile session
+# edited skills under both ``~/.hermes/profiles/hermes-security/skills/``
+# AND ``~/.hermes/skills/`` (the default profile's skills) without realizing
+# the second path belonged to a different profile.
+# ---------------------------------------------------------------------------
+
+# Profile-scoped directories under HERMES_HOME / <root> / <root>/profiles/<X>/
+# that should be guarded. Adding a new area here extends the guard with no
+# other code change.
+PROFILE_SCOPED_AREAS = ("skills", "plugins", "cron", "memories")
+
+
+def _resolve_active_profile_name() -> str:
+    """Return the active profile name derived from HERMES_HOME.
+
+    ``~/.hermes``              -> ``"default"``
+    ``~/.hermes/profiles/X``  -> ``"X"``
+
+    Falls back to ``"default"`` on any resolution failure so the guard
+    never raises into the tool path.
+    """
+    try:
+        home_real = _hermes_home_path().resolve()
+        root_real = _hermes_root_path().resolve()
+    except (OSError, RuntimeError):
+        return "default"
+    profiles_dir = root_real / "profiles"
+    try:
+        rel = home_real.relative_to(profiles_dir)
+        parts = rel.parts
+        if len(parts) >= 1:
+            return parts[0]
+    except ValueError:
+        pass
+    return "default"
+
+
+def classify_cross_profile_target(path: str) -> Optional[dict]:
+    """Classify a write target as cross-profile if it lands in another
+    profile's scoped area (skills/plugins/cron/memories).
+
+    Returns ``None`` when the target is outside Hermes scope, or is inside
+    the ACTIVE profile, or doesn't hit a profile-scoped area. Otherwise
+    returns a dict with:
+
+      * ``active_profile``: name of the profile the agent is running as
+      * ``target_profile``: name of the profile the path belongs to
+      * ``area``: which scoped area (``"skills"``, ``"plugins"``, etc.)
+      * ``target_path``: the resolved path string
+
+    The caller decides what to do with the result — surface a warning to
+    the model, prompt the user, or (with explicit consent /
+    ``cross_profile=True``) proceed anyway.
+    """
+    try:
+        target = Path(os.path.expanduser(str(path))).resolve()
+        root_real = _hermes_root_path().resolve()
+    except (OSError, RuntimeError):
+        return None
+
+    target_profile: Optional[str] = None
+    area: Optional[str] = None
+
+    try:
+        rel = target.relative_to(root_real)
+    except ValueError:
+        return None
+
+    parts = rel.parts
+    if not parts:
+        return None
+
+    if parts[0] in PROFILE_SCOPED_AREAS:
+        # ``<root>/<area>/...`` → default profile.
+        target_profile = "default"
+        area = parts[0]
+    elif (
+        parts[0] == "profiles"
+        and len(parts) >= 3
+        and parts[2] in PROFILE_SCOPED_AREAS
+    ):
+        # ``<root>/profiles/<name>/<area>/...`` → named profile.
+        target_profile = parts[1]
+        area = parts[2]
+    else:
+        return None
+
+    active_profile = _resolve_active_profile_name()
+    if target_profile == active_profile:
+        # In-profile write — not a cross-profile event.
+        return None
+
+    return {
+        "active_profile": active_profile,
+        "target_profile": target_profile,
+        "area": area,
+        "target_path": str(target),
+    }
+
+
+def get_cross_profile_warning(path: str) -> Optional[str]:
+    """Return a model-facing warning string when ``path`` is cross-profile.
+
+    Returns ``None`` when the write is in-scope (same profile) or outside
+    Hermes entirely. Caller is expected to surface the warning to the
+    agent as a tool-result error, NOT to silently allow the write — the
+    agent must either get explicit user direction to proceed, or pass
+    ``cross_profile=True`` to its write tool.
+
+    This is defense-in-depth: the terminal tool runs as the same OS user
+    and can write any of these paths without going through this guard.
+    Treat the guard as a confusion-reducer, not a security boundary.
+    """
+    info = classify_cross_profile_target(path)
+    if info is None:
+        return None
+    return (
+        f"Cross-profile write blocked by soft guard: {info['target_path']} "
+        f"belongs to Hermes profile {info['target_profile']!r}, but the "
+        f"agent is running under profile {info['active_profile']!r}. "
+        f"Editing another profile's {info['area']}/ will affect that "
+        f"profile's future sessions, not the one you are currently in. "
+        f"Confirm with the user before proceeding. To bypass this guard "
+        f"after explicit user direction, retry the call with "
+        f"``cross_profile=True``. (Defense-in-depth — not a security "
+        f"boundary; the terminal tool can still bypass.)"
+    )
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -209,6 +209,7 @@ DEFAULT_CONTEXT_LENGTHS = {
    # via a custom provider. Values sourced from models.dev (2026-04).
    # Keys use substring matching (longest-first), so e.g. "grok-4.20"
    # matches "grok-4.20-0309-reasoning" / "-non-reasoning" / "-multi-agent-0309".
+    "grok-build": 256000,       # grok-build-0.1
    "grok-code-fast": 256000,   # grok-code-fast-1
    "grok-4-1-fast": 2000000,   # grok-4-1-fast-(non-)reasoning
    "grok-2-vision": 8192,      # grok-2-vision, -1212, -latest
@ -640,7 +641,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
        return cache

    except Exception as e:
-        logging.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
+        logger.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
        return _model_metadata_cache or {}


--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@ -167,6 +167,9 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "gemini": "google",
    "google": "google",
    "xai": "xai",
+    # xAI OAuth is an authentication/transport path for the same xAI model
+    # catalog, so model metadata should resolve through the xAI provider.
+    "xai-oauth": "xai",
    "xiaomi": "xiaomi",
    "nvidia": "nvidia",
    "groq": "groq",
--- a/agent/redact.py
+++ b/agent/redact.py
@ -176,6 +176,15 @@ _URL_USERINFO_RE = re.compile(
    r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
 )

+# HTTP access logs often use a relative request target rather than a full URL:
+# `"POST /webhook?password=... HTTP/1.1"`. The full-URL redactor above only
+# sees strings containing `://`, so handle request-target query strings too.
+_HTTP_REQUEST_TARGET_QUERY_RE = re.compile(
+    r"\b((?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS|TRACE|CONNECT)\s+[^ \t\r\n\"']*?)"
+    r"\?([^ \t\r\n\"']+)",
+    re.IGNORECASE,
+)
+
 # Form-urlencoded body detection: conservative — only applies when the entire
 # text looks like a query string (k=v&k=v pattern with no newlines).
 _FORM_BODY_RE = re.compile(
@ -293,6 +302,15 @@ def _redact_url_userinfo(text: str) -> str:
    )


+def _redact_http_request_target_query_params(text: str) -> str:
+    """Redact sensitive query params in HTTP access-log request targets."""
+    def _sub(m: re.Match) -> str:
+        prefix = m.group(1)
+        query = _redact_query_string(m.group(2))
+        return f"{prefix}?{query}"
+    return _HTTP_REQUEST_TARGET_QUERY_RE.sub(_sub, text)
+
+
 def _redact_form_body(text: str) -> str:
    """Redact sensitive values in a form-urlencoded body.

@ -397,6 +415,11 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
        if "?" in text:
            text = _redact_url_query_params(text)

+    # HTTP access logs can contain relative request targets with query params
+    # and no URL scheme, e.g. `"POST /hook?password=... HTTP/1.1"`.
+    if "?" in text and "=" in text and _has_http_method_substring(text):
+        text = _redact_http_request_target_query_params(text)
+
    # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
    if "&" in text and "=" in text:
        text = _redact_form_body(text)
@ -456,6 +479,25 @@ def _has_known_prefix_substring(text: str) -> bool:
    return any(p in text for p in _PREFIX_SUBSTRINGS)


+_HTTP_METHOD_SUBSTRINGS = (
+    "GET ",
+    "POST ",
+    "PUT ",
+    "PATCH ",
+    "DELETE ",
+    "HEAD ",
+    "OPTIONS ",
+    "TRACE ",
+    "CONNECT ",
+)
+
+
+def _has_http_method_substring(text: str) -> bool:
+    """Cheap pre-check before scanning for access-log request targets."""
+    upper = text.upper()
+    return any(method in upper for method in _HTTP_METHOD_SUBSTRINGS)
+
+
 class RedactingFormatter(logging.Formatter):
    """Log formatter that redacts secrets from all log messages."""

--- a/agent/secret_sources/bitwarden.py
+++ b/agent/secret_sources/bitwarden.py
@ -70,7 +70,7 @@ _BWS_RUN_TIMEOUT = 30

 # In-process cache so repeated load_hermes_dotenv() calls (CLI startup,
 # gateway hot-reload, test suites) don't re-fetch from BSM.
-_CacheKey = Tuple[str, str]  # (access_token_fingerprint, project_id)
+_CacheKey = Tuple[str, str, str]  # (access_token_fingerprint, project_id, server_url)
 _CACHE: Dict[_CacheKey, "_CachedFetch"] = {}


@ -317,11 +317,18 @@ def fetch_bitwarden_secrets(
    binary: Optional[Path] = None,
    cache_ttl_seconds: float = 300,
    use_cache: bool = True,
+    server_url: str = "",
 ) -> Tuple[Dict[str, str], List[str]]:
    """Pull the secrets for ``project_id`` from Bitwarden Secrets Manager.

    Returns ``(secrets_dict, warnings_list)``.

+    Set ``server_url`` to point at a non-default Bitwarden region or a
+    self-hosted instance — e.g. ``https://vault.bitwarden.eu`` for EU
+    Cloud accounts.  When empty, ``bws`` uses its built-in default
+    (``https://vault.bitwarden.com``, US Cloud).  This is plumbed into
+    the subprocess as ``BWS_SERVER_URL``.
+
    Raises :class:`RuntimeError` for fatal conditions (missing binary,
    auth failure, unparseable output).  Callers in the env_loader path
    catch this and emit a single warning; callers in the user-facing
@ -332,7 +339,7 @@ def fetch_bitwarden_secrets(
    if not project_id:
        raise RuntimeError("Bitwarden project_id is empty")

-    cache_key = (_token_fingerprint(access_token), project_id)
+    cache_key = (_token_fingerprint(access_token), project_id, server_url or "")
    if use_cache:
        cached = _CACHE.get(cache_key)
        if cached and cached.is_fresh(cache_ttl_seconds):
@ -347,19 +354,26 @@ def fetch_bitwarden_secrets(
            "`hermes secrets bitwarden setup`."
        )

-    secrets, warnings = _run_bws_list(bws, access_token, project_id)
+    secrets, warnings = _run_bws_list(bws, access_token, project_id, server_url)
    _CACHE[cache_key] = _CachedFetch(secrets=secrets, fetched_at=time.time())
    return secrets, warnings


 def _run_bws_list(
-    bws: Path, access_token: str, project_id: str
+    bws: Path, access_token: str, project_id: str, server_url: str = ""
 ) -> Tuple[Dict[str, str], List[str]]:
    cmd = [str(bws), "secret", "list", project_id, "--output", "json"]
    env = os.environ.copy()
    env["BWS_ACCESS_TOKEN"] = access_token
    # Make sure we're not echoing telemetry / colour codes into json.
    env.setdefault("NO_COLOR", "1")
+    # Region / self-hosted support.  bws defaults to https://vault.bitwarden.com
+    # (US Cloud); EU Cloud users need https://vault.bitwarden.eu, and
+    # self-hosted users need their own URL.  When unset, fall back to whatever
+    # BWS_SERVER_URL the caller already had in their shell env (preserved by
+    # the copy above) so manual overrides keep working too.
+    if server_url:
+        env["BWS_SERVER_URL"] = server_url

    try:
        proc = subprocess.run(  # noqa: S603 — bws path is trusted
@ -437,6 +451,7 @@ def apply_bitwarden_secrets(
    override_existing: bool = False,
    cache_ttl_seconds: float = 300,
    auto_install: bool = True,
+    server_url: str = "",
 ) -> FetchResult:
    """Pull secrets from BSM and set them on ``os.environ``.

@ -444,6 +459,10 @@ def apply_bitwarden_secrets(
    files have loaded.  It is intentionally defensive — any failure
    returns a :class:`FetchResult` with ``error`` set; it never raises.

+    ``server_url`` selects the Bitwarden region or self-hosted endpoint
+    (e.g. ``https://vault.bitwarden.eu`` for EU Cloud).  Empty string
+    means use ``bws``'s default (US Cloud).
+
    Parameters mirror the ``secrets.bitwarden.*`` config keys so the
    caller can just splat the dict in.
    """
@ -482,6 +501,7 @@ def apply_bitwarden_secrets(
            project_id=project_id,
            binary=binary,
            cache_ttl_seconds=cache_ttl_seconds,
+            server_url=server_url,
        )
    except RuntimeError as exc:
        result.error = str(exc)
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@ -12,7 +12,7 @@ import sys
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Set, Tuple

-from hermes_constants import get_config_path, get_skills_dir
+from hermes_constants import get_config_path, get_skills_dir, is_termux

 logger = logging.getLogger(__name__)

@ -136,6 +136,14 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:

    If the field is absent or empty the skill is compatible with **all**
    platforms (backward-compatible default).
+
+    Termux note: on Termux/Android, ``sys.platform`` is ``"linux"`` on
+    older Pythons but became ``"android"`` on Python 3.13+. Termux is a
+    Linux userland riding on the Android kernel, so skills tagged
+    ``linux`` are treated as compatible in Termux regardless of which
+    ``sys.platform`` value Python reports. Individual Linux commands
+    inside a skill may still misbehave (no systemd, BusyBox utils, no
+    apt/dnf, etc.) but that is on the skill, not on platform gating.
    """
    platforms = frontmatter.get("platforms")
    if not platforms:
@ -143,11 +151,21 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
    if not isinstance(platforms, list):
        platforms = [platforms]
    current = sys.platform
+    running_in_termux = is_termux()
    for platform in platforms:
        normalized = str(platform).lower().strip()
        mapped = PLATFORM_MAP.get(normalized, normalized)
        if current.startswith(mapped):
            return True
+        # Termux runs a Linux userland on Android. Accept linux-tagged
+        # skills regardless of whether sys.platform is "linux" (pre-3.13
+        # Termux) or "android" (Python 3.13+ Termux, and any other
+        # Android runtime).
+        if running_in_termux and mapped == "linux":
+            return True
+        # Explicit termux/android tags match a Termux session too.
+        if running_in_termux and mapped in ("termux", "android"):
+            return True
    return False


--- a/agent/system_prompt.py
+++ b/agent/system_prompt.py
@ -205,6 +205,40 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
    if _env_hints:
        stable_parts.append(_env_hints)

+    # Active-profile hint — names the Hermes profile the agent is running
+    # under so it doesn't conflate ~/.hermes/skills/ (default profile) with
+    # ~/.hermes/profiles/<active>/skills/ (this profile's). Deterministic
+    # for the lifetime of the agent — profile name doesn't change
+    # mid-session, so this doesn't break the prompt cache.
+    # See file_safety._resolve_active_profile_name + classify_cross_profile_target
+    # for the matching tool-side guard.
+    try:
+        from agent.file_safety import _resolve_active_profile_name
+        active_profile = _resolve_active_profile_name()
+    except Exception:
+        active_profile = "default"
+    if active_profile == "default":
+        stable_parts.append(
+            "Active Hermes profile: default. Other profiles (if any) live "
+            "under ~/.hermes/profiles/<name>/. Each profile has its own "
+            "skills/, plugins/, cron/, and memories/ that affect a different "
+            "session than this one. Do not modify another profile's "
+            "skills/plugins/cron/memories unless the user explicitly directs "
+            "you to."
+        )
+    else:
+        stable_parts.append(
+            f"Active Hermes profile: {active_profile}. This session reads "
+            f"and writes ~/.hermes/profiles/{active_profile}/. The default "
+            f"profile's data lives at ~/.hermes/skills/, ~/.hermes/plugins/, "
+            f"~/.hermes/cron/, ~/.hermes/memories/ — those belong to a "
+            f"different session run from a different shell. Do NOT modify "
+            f"another profile's skills/plugins/cron/memories unless the user "
+            f"explicitly directs you to. The cross-profile write guard will "
+            f"refuse such writes by default; pass cross_profile=True only "
+            f"after explicit direction."
+        )
+
    platform_key = (agent.platform or "").lower().strip()
    if platform_key in PLATFORM_HINTS:
        stable_parts.append(PLATFORM_HINTS[platform_key])
--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@ -388,6 +388,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
                    agent.tool_progress_callback(
                        "tool.completed", function_name, None, None,
                        duration=tool_duration, is_error=is_error,
+                        result=function_result,
                    )
                except Exception as cb_err:
                    logging.debug(f"Tool progress callback error: {cb_err}")
@ -491,7 +492,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
        try:
            function_args = json.loads(tool_call.function.arguments)
        except json.JSONDecodeError as e:
-            logging.warning(f"Unexpected JSON error after validation: {e}")
+            logger.warning(f"Unexpected JSON error after validation: {e}")
            function_args = {}
        if not isinstance(function_args, dict):
            function_args = {}
@ -822,6 +823,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
                agent.tool_progress_callback(
                    "tool.completed", function_name, None, None,
                    duration=tool_duration, is_error=_is_error_result,
+                    result=function_result,
                )
            except Exception as cb_err:
                logging.debug(f"Tool progress callback error: {cb_err}")
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@ -106,7 +106,17 @@ class AnthropicTransport(ProviderTransport):
            elif block.type == "tool_use":
                name = block.name
                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
-                    name = name[len(_MCP_PREFIX):]
+                    stripped = name[len(_MCP_PREFIX):]
+                    # Only strip the mcp_ prefix for OAuth-injected tools
+                    # (where Hermes adds the prefix when sending to Anthropic
+                    # and must remove it on the way back).  Native MCP server
+                    # tools (from mcp_servers: in config.yaml) are registered
+                    # in the tool registry under their FULL mcp_<server>_<tool>
+                    # name and must NOT be stripped.  GH-25255.
+                    from tools.registry import registry as _tool_registry
+                    if (_tool_registry.get_entry(stripped)
+                            and not _tool_registry.get_entry(name)):
+                        name = stripped
                tool_calls.append(
                    ToolCall(
                        id=block.id,
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@ -113,9 +113,8 @@ class ChatCompletionsTransport(ProviderTransport):
        self, messages: list[dict[str, Any]], **kwargs
    ) -> list[dict[str, Any]]:
        """Messages are already in OpenAI format — strip internal fields
-        that strict chat-completions providers reject with HTTP 400/422.
-
-        Strips:
+        that strict chat-completions providers reject with HTTP 400/422
+        (or, in the case of some OpenAI-compatible gateways, 5xx):

        - Codex Responses API fields: ``codex_reasoning_items`` /
          ``codex_message_items`` on the message, ``call_id`` /
@ -127,6 +126,16 @@ class ChatCompletionsTransport(ProviderTransport):
          ``Extra inputs are not permitted, field: 'messages[N].tool_name'``.
          Permissive providers (OpenRouter, MiniMax) silently ignore the
          field, which masked the bug for months.
+        - Hermes-internal scaffolding markers — any top-level message key
+          starting with ``_`` (e.g. ``_empty_recovery_synthetic``,
+          ``_empty_terminal_sentinel``, ``_thinking_prefill``). These are
+          bookkeeping flags the agent loop attaches to messages so the
+          persistence layer can later strip its own scaffolding; they must
+          never reach the wire. Permissive providers (real OpenAI,
+          Anthropic) silently drop unknown message keys, but strict
+          gateways (e.g. opencode-go, codex.nekos.me) reject with
+          ``Extra inputs are not permitted, field: 'messages[N]._empty_recovery_synthetic'``,
+          which then poisons every subsequent request in the session.
        """
        needs_sanitize = False
        for msg in messages:
@ -139,6 +148,9 @@ class ChatCompletionsTransport(ProviderTransport):
            ):
                needs_sanitize = True
                break
+            if any(isinstance(k, str) and k.startswith("_") for k in msg):
+                needs_sanitize = True
+                break
            tool_calls = msg.get("tool_calls")
            if isinstance(tool_calls, list):
                for tc in tool_calls:
@ -160,6 +172,11 @@ class ChatCompletionsTransport(ProviderTransport):
            msg.pop("codex_reasoning_items", None)
            msg.pop("codex_message_items", None)
            msg.pop("tool_name", None)
+            # Drop all Hermes-internal scaffolding markers (``_``-prefixed).
+            # OpenAI's message schema has no ``_``-prefixed fields, so this
+            # is safe and future-proofs against new markers being added.
+            for key in [k for k in msg if isinstance(k, str) and k.startswith("_")]:
+                msg.pop(key, None)
            tool_calls = msg.get("tool_calls")
            if isinstance(tool_calls, list):
                for tc in tool_calls:
--- a/agent/transports/codex_app_server_session.py
+++ b/agent/transports/codex_app_server_session.py
@ -87,6 +87,39 @@ class TurnResult:
 _TURN_ABORTED_MARKERS = ("<turn_aborted>", "<turn_aborted/>")


+def _coerce_turn_input_text(user_input: Any) -> str:
+    """Collapse Hermes/OpenAI rich content into app-server text input.
+
+    The current `turn/start` path sends text items only. TUI image attachment
+    can hand us OpenAI-style content parts, so keep the text/path hints and
+    replace opaque image payloads with a small marker instead of putting a
+    Python list into the `text` field.
+    """
+    if isinstance(user_input, str):
+        return user_input
+    if isinstance(user_input, list):
+        parts: list[str] = []
+        for item in user_input:
+            if isinstance(item, str):
+                if item.strip():
+                    parts.append(item)
+                continue
+            if not isinstance(item, dict):
+                if item is not None:
+                    parts.append(str(item))
+                continue
+            item_type = item.get("type")
+            if item_type in {"text", "input_text"}:
+                text = item.get("text") or item.get("content") or ""
+                if text:
+                    parts.append(str(text))
+            elif item_type in {"image", "image_url", "input_image"}:
+                parts.append("[image attached]")
+        text = "\n\n".join(p for p in parts if p).strip()
+        return text or "What do you see in this image?"
+    return "" if user_input is None else str(user_input)
+
+
 # Substrings in codex stderr / JSON-RPC error messages that signal the
 # subprocess died because its OAuth credentials are no longer valid.
 # Kept conservative: we only redirect users to `codex login` when we're
@ -327,7 +360,7 @@ class CodexAppServerSession:

    def run_turn(
        self,
-        user_input: str,
+        user_input: Any,
        *,
        turn_timeout: float = 600.0,
        notification_poll_timeout: float = 0.25,
@ -365,6 +398,8 @@ class CodexAppServerSession:
        self._interrupt_event.clear()
        projector = CodexEventProjector()

+        user_input_text = _coerce_turn_input_text(user_input)
+
        # Send turn/start with the user input. Text-only for now (codex
        # supports rich content but Hermes' text path is the common case).
        try:
@ -372,7 +407,7 @@ class CodexAppServerSession:
                "turn/start",
                {
                    "threadId": self._thread_id,
-                    "input": [{"type": "text", "text": user_input}],
+                    "input": [{"type": "text", "text": user_input_text}],
                },
                timeout=10,
            )
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@ -39,7 +39,7 @@ model:
  #   LM Studio is first-class and uses provider: "lmstudio".
  #   It works with both no-auth and auth-enabled server modes.
  #
-  # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
+  # Can also be overridden for a single invocation with the --provider flag.
  provider: "auto"
  
  # API configuration (falls back to OPENROUTER_API_KEY env var)
--- a/cli.py
+++ b/cli.py
@ -51,6 +51,8 @@ os.environ["HERMES_QUIET"] = "1"  # Our own modules

 import yaml

+from hermes_cli.fallback_config import get_fallback_chain
+
 # prompt_toolkit for fixed input area TUI
 from prompt_toolkit.history import FileHistory
 from prompt_toolkit.styles import Style as PTStyle
@ -81,17 +83,73 @@ except Exception:
 import threading
 import queue

-from agent.usage_pricing import (
-    CanonicalUsage,
-    estimate_usage_cost,
-    format_duration_compact,
-    format_token_count_compact,
-)
-from agent.markdown_tables import (
-    is_table_divider,
-    looks_like_table_row,
-    realign_markdown_tables,
-)
+def CanonicalUsage(*args, **kwargs):
+    from agent.usage_pricing import CanonicalUsage as _CanonicalUsage
+
+    return _CanonicalUsage(*args, **kwargs)
+
+
+def estimate_usage_cost(*args, **kwargs):
+    from agent.usage_pricing import estimate_usage_cost as _estimate_usage_cost
+
+    return _estimate_usage_cost(*args, **kwargs)
+
+
+def format_duration_compact(*args, **kwargs):
+    seconds = float(args[0] if args else kwargs.get("seconds", 0.0))
+    if seconds < 60:
+        return f"{seconds:.0f}s"
+    minutes = seconds / 60
+    if minutes < 60:
+        return f"{minutes:.0f}m"
+    hours = minutes / 60
+    if hours < 24:
+        remaining_min = int(minutes % 60)
+        return f"{int(hours)}h {remaining_min}m" if remaining_min else f"{int(hours)}h"
+    days = hours / 24
+    return f"{days:.1f}d"
+
+
+def format_token_count_compact(*args, **kwargs):
+    value = int(args[0] if args else kwargs.get("value", 0))
+    abs_value = abs(value)
+    if abs_value < 1_000:
+        return str(value)
+
+    sign = "-" if value < 0 else ""
+    units = ((1_000_000_000, "B"), (1_000_000, "M"), (1_000, "K"))
+    for threshold, suffix in units:
+        if abs_value >= threshold:
+            scaled = abs_value / threshold
+            if scaled < 10:
+                text = f"{scaled:.2f}"
+            elif scaled < 100:
+                text = f"{scaled:.1f}"
+            else:
+                text = f"{scaled:.0f}"
+            if "." in text:
+                text = text.rstrip("0").rstrip(".")
+            return f"{sign}{text}{suffix}"
+
+    return f"{value:,}"
+
+
+def is_table_divider(*args, **kwargs):
+    from agent.markdown_tables import is_table_divider as _is_table_divider
+
+    return _is_table_divider(*args, **kwargs)
+
+
+def looks_like_table_row(*args, **kwargs):
+    from agent.markdown_tables import looks_like_table_row as _looks_like_table_row
+
+    return _looks_like_table_row(*args, **kwargs)
+
+
+def realign_markdown_tables(*args, **kwargs):
+    from agent.markdown_tables import realign_markdown_tables as _realign_markdown_tables
+
+    return _realign_markdown_tables(*args, **kwargs)
 # NOTE: `from agent.account_usage import ...` is deliberately NOT at module
 # top — it transitively pulls the OpenAI SDK chain (~230 ms cold) and is only
 # needed when the user runs `/limits`. Lazy-imported inside the handler below.
@ -357,6 +415,12 @@ def load_cli_config() -> Dict[str, Any]:
        "display": {
            "compact": False,
            "resume_display": "full",
+            # Recap tuning for /resume — see hermes_cli/config.py DEFAULT_CONFIG.
+            "resume_exchanges": 10,
+            "resume_max_user_chars": 300,
+            "resume_max_assistant_chars": 200,
+            "resume_max_assistant_lines": 3,
+            "resume_skip_tool_only": True,
            "show_reasoning": False,
            "streaming": True,
            "busy_input_mode": "interrupt",
@ -410,7 +474,9 @@ def load_cli_config() -> Dict[str, Any]:
    if config_path.exists():
        try:
            with open(config_path, "r", encoding="utf-8") as f:
-                file_config = yaml.safe_load(f) or {}
+                from hermes_cli.config import _normalize_root_model_keys
+
+                file_config = _normalize_root_model_keys(yaml.safe_load(f) or {})
            
            _file_has_terminal_config = "terminal" in file_config

@ -431,21 +497,6 @@ def load_cli_config() -> Dict[str, Any]:
                    if "model" in file_config["model"] and "default" not in file_config["model"]:
                        defaults["model"]["default"] = file_config["model"]["model"]

-            # Legacy root-level provider/base_url fallback.
-            # Some users (or old code) put provider: / base_url: at the
-            # config root instead of inside the model: section.  These are
-            # only used as a FALLBACK when model.provider / model.base_url
-            # is not already set — never as an override.  The canonical
-            # location is model.provider (written by `hermes model`).
-            if not defaults["model"].get("provider"):
-                root_provider = file_config.get("provider")
-                if root_provider:
-                    defaults["model"]["provider"] = root_provider
-            if not defaults["model"].get("base_url"):
-                root_base_url = file_config.get("base_url")
-                if root_base_url:
-                    defaults["model"]["base_url"] = root_base_url
-            
            # Deep merge file_config into defaults.
            # First: merge keys that exist in both (deep-merge dicts, overwrite scalars)
            for key in defaults:
@ -717,31 +768,142 @@ from rich.markup import escape as _escape
 from rich.panel import Panel
 from rich.text import Text as _RichText

-import fire
+# Import agent and tool systems lazily. Bare interactive startup only needs the
+# prompt; the full agent/tool registry is initialized on first use.
+def AIAgent(*args, **kwargs):
+    from run_agent import AIAgent as _AIAgent

-# Import the agent and tool systems
-from run_agent import AIAgent
-from model_tools import get_tool_definitions, get_toolset_for_tool
+    return _AIAgent(*args, **kwargs)
+
+
+def get_tool_definitions(*args, **kwargs):
+    from model_tools import get_tool_definitions as _get_tool_definitions
+
+    return _get_tool_definitions(*args, **kwargs)
+
+
+def get_toolset_for_tool(*args, **kwargs):
+    from model_tools import get_toolset_for_tool as _get_toolset_for_tool
+
+    return _get_toolset_for_tool(*args, **kwargs)

 # Extracted CLI modules (Phase 3)
 from hermes_cli.banner import build_welcome_banner
 from hermes_cli.commands import SlashCommandCompleter, SlashCommandAutoSuggest
-from toolsets import get_all_toolsets, get_toolset_info, validate_toolset
+
+
+def get_all_toolsets(*args, **kwargs):
+    from toolsets import get_all_toolsets as _get_all_toolsets
+
+    return _get_all_toolsets(*args, **kwargs)
+
+
+def get_toolset_info(*args, **kwargs):
+    from toolsets import get_toolset_info as _get_toolset_info
+
+    return _get_toolset_info(*args, **kwargs)
+
+
+def validate_toolset(*args, **kwargs):
+    from toolsets import validate_toolset as _validate_toolset
+
+    return _validate_toolset(*args, **kwargs)
+
+
+def _sync_process_session_id(session_id: str) -> None:
+    """Keep process-local session-id consumers aligned after CLI switches."""
+    from gateway.session_context import set_current_session_id
+
+    set_current_session_id(session_id)

 # Cron job system for scheduled tasks (execution is handled by the gateway)
-from cron import get_job
+def get_job(*args, **kwargs):
+    from cron import get_job as _get_job
+
+    return _get_job(*args, **kwargs)

 # Resource cleanup imports for safe shutdown (terminal VMs, browser sessions)
-from tools.terminal_tool import cleanup_all_environments as _cleanup_all_terminals
-from tools.terminal_tool import set_sudo_password_callback, set_approval_callback
-from tools.skills_tool import set_secret_capture_callback
 from hermes_cli.callbacks import prompt_for_secret
-from tools.browser_tool import _emergency_cleanup_all_sessions as _cleanup_all_browsers
+
+
+def _cleanup_all_terminals(*args, **kwargs):
+    from tools.terminal_tool import cleanup_all_environments
+
+    return cleanup_all_environments(*args, **kwargs)
+
+
+def set_sudo_password_callback(*args, **kwargs):
+    from tools.terminal_tool import set_sudo_password_callback as _set_sudo_password_callback
+
+    return _set_sudo_password_callback(*args, **kwargs)
+
+
+def set_approval_callback(*args, **kwargs):
+    from tools.terminal_tool import set_approval_callback as _set_approval_callback
+
+    return _set_approval_callback(*args, **kwargs)
+
+
+def set_secret_capture_callback(*args, **kwargs):
+    from tools.skills_tool import set_secret_capture_callback as _set_secret_capture_callback
+
+    return _set_secret_capture_callback(*args, **kwargs)
+
+
+def _cleanup_all_browsers(*args, **kwargs):
+    from tools.browser_tool import _emergency_cleanup_all_sessions
+
+    return _emergency_cleanup_all_sessions(*args, **kwargs)

 # Guard to prevent cleanup from running multiple times on exit
 _cleanup_done = False
 # Weak reference to the active AIAgent for memory provider shutdown at exit
 _active_agent_ref = None
+_deferred_agent_startup_done = False
+
+
+def _prepare_deferred_agent_startup() -> None:
+    """Run Termux-deferred agent discovery before the first real agent turn."""
+    global _deferred_agent_startup_done
+    if _deferred_agent_startup_done:
+        return
+    if os.environ.get("HERMES_DEFER_AGENT_STARTUP") != "1":
+        return
+    _deferred_agent_startup_done = True
+    _accept_hooks = os.environ.get("HERMES_ACCEPT_HOOKS", "").lower() in {
+        "1",
+        "true",
+        "yes",
+        "on",
+    }
+    try:
+        from hermes_cli.plugins import discover_plugins
+
+        discover_plugins()
+    except Exception:
+        logger.warning(
+            "plugin discovery failed at deferred CLI startup",
+            exc_info=True,
+        )
+    try:
+        from tools.mcp_tool import discover_mcp_tools
+
+        discover_mcp_tools()
+    except Exception:
+        logger.debug(
+            "MCP tool discovery failed at deferred CLI startup",
+            exc_info=True,
+        )
+    try:
+        from agent.shell_hooks import register_from_config
+        from hermes_cli.config import load_config
+
+        register_from_config(load_config(), accept_hooks=_accept_hooks)
+    except Exception:
+        logger.debug(
+            "shell-hook registration failed at deferred CLI startup",
+            exc_info=True,
+        )

 def _run_cleanup():
    """Run resource cleanup exactly once."""
@ -2455,7 +2617,13 @@ def _build_compact_banner() -> str:
        line1 = f"{agent_name} - AI Agent Framework"
        tiny_line = agent_name

-    version_line = format_banner_version_label()
+    if os.environ.get("HERMES_FAST_STARTUP_BANNER") == "1":
+        from hermes_cli import __release_date__ as _release_date
+        from hermes_cli import __version__ as _version
+
+        version_line = f"Hermes Agent v{_version} ({_release_date})"
+    else:
+        version_line = format_banner_version_label()

    w = min(shutil.get_terminal_size().columns - 2, 88)
    if w < 30:
@ -2504,19 +2672,48 @@ def _looks_like_slash_command(text: str) -> bool:
 # Skill Slash Commands — dynamic commands generated from installed skills
 # ============================================================================

-from agent.skill_commands import (
-    scan_skill_commands,
-    get_skill_commands,
-    build_skill_invocation_message,
-    build_preloaded_skills_prompt,
-)
-from agent.skill_bundles import (
-    get_skill_bundles,
-    build_bundle_invocation_message,
-)
+_skill_commands = None
+_skill_bundles = None

-_skill_commands = scan_skill_commands()
-_skill_bundles = get_skill_bundles()
+
+def _ensure_skill_commands() -> dict:
+    global _skill_commands
+    if _skill_commands is None:
+        from agent.skill_commands import scan_skill_commands
+
+        _skill_commands = scan_skill_commands()
+    return _skill_commands
+
+
+def get_skill_commands() -> dict:
+    return _ensure_skill_commands()
+
+
+def build_skill_invocation_message(*args, **kwargs):
+    from agent.skill_commands import build_skill_invocation_message as _impl
+
+    return _impl(*args, **kwargs)
+
+
+def build_preloaded_skills_prompt(*args, **kwargs):
+    from agent.skill_commands import build_preloaded_skills_prompt as _impl
+
+    return _impl(*args, **kwargs)
+
+
+def get_skill_bundles() -> dict:
+    global _skill_bundles
+    if _skill_bundles is None:
+        from agent.skill_bundles import get_skill_bundles as _impl
+
+        _skill_bundles = _impl()
+    return _skill_bundles
+
+
+def build_bundle_invocation_message(*args, **kwargs):
+    from agent.skill_bundles import build_bundle_invocation_message as _impl
+
+    return _impl(*args, **kwargs)


 def _get_plugin_cmd_handler_names() -> set:
@ -2615,7 +2812,7 @@ class HermesCLI:
        api_key: str = None,
        base_url: str = None,
        max_turns: int = None,
-        verbose: bool = False,
+        verbose: Optional[bool] = None,
        compact: bool = False,
        resume: str = None,
        checkpoints: bool = False,
@ -2666,7 +2863,12 @@ class HermesCLI:
        else:
            self.busy_input_mode = "interrupt"

-        self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose")
+        # self.verbose ONLY controls global DEBUG logging (root logger level).
+        # display.tool_progress="verbose" controls tool-call rendering (full args,
+        # results, think blocks) and is independent — see _apply_logging_levels.
+        # Coupling the two (PR #6a1aa420e) caused all module DEBUG logs to spew
+        # to console whenever a user set tool_progress: verbose in config.
+        self.verbose = bool(verbose) if verbose is not None else False
        
        # streaming: stream tokens to the terminal as they arrive (display.streaming in config.yaml)
        self.streaming_enabled = CLI_CONFIG["display"].get("streaming", False)
@ -2852,12 +3054,9 @@ class HermesCLI:
                pass
        
        # Fallback provider chain — tried in order when primary fails after retries.
-        # Supports new list format (fallback_providers) and legacy single-dict (fallback_model).
-        fb = CLI_CONFIG.get("fallback_providers") or CLI_CONFIG.get("fallback_model") or []
-        # Normalize legacy single-dict to a one-element list
-        if isinstance(fb, dict):
-            fb = [fb] if fb.get("provider") and fb.get("model") else []
-        self._fallback_model = fb
+        # Merge new ``fallback_providers`` entries with any legacy
+        # ``fallback_model`` entries so old configs still participate.
+        self._fallback_model = get_fallback_chain(CLI_CONFIG)

        # Signature of the currently-initialised agent's runtime.  Used to
        # rebuild the agent when provider / model / base_url changes across
@ -2865,7 +3064,9 @@ class HermesCLI:
        self._active_agent_route_signature = None

        # Agent will be initialized on first use
-        self.agent: Optional[AIAgent] = None
+        self.agent: Optional[Any] = None
+        self._tool_callbacks_installed = False
+        self._tirith_security_checked = False
        self._app = None  # prompt_toolkit Application (set in run())
        
        # Conversation state
@ -4488,6 +4689,41 @@ class HermesCLI:
        route["request_overrides"] = overrides
        return route

+    def _install_tool_callbacks(self) -> None:
+        """Install tool callbacks that need the live prompt UI."""
+        if getattr(self, "_tool_callbacks_installed", False):
+            return
+        set_sudo_password_callback(self._sudo_password_callback)
+        set_approval_callback(self._approval_callback)
+        set_secret_capture_callback(self._secret_capture_callback)
+        try:
+            from tools.computer_use_tool import set_approval_callback as _set_cu_cb
+
+            _set_cu_cb(self._computer_use_approval_callback)
+        except ImportError:
+            pass
+        self._tool_callbacks_installed = True
+
+    def _ensure_tirith_security(self) -> None:
+        """Check tirith availability once before tools can run terminal commands."""
+        if getattr(self, "_tirith_security_checked", False):
+            return
+        self._tirith_security_checked = True
+        try:
+            from tools.tirith_security import ensure_installed, is_platform_supported
+
+            tirith_path = ensure_installed(log_failures=False)
+            if tirith_path is None and is_platform_supported():
+                security_cfg = self.config.get("security", {}) or {}
+                tirith_enabled = security_cfg.get("tirith_enabled", True)
+                if tirith_enabled:
+                    _cprint(
+                        f"  {_DIM}⚠ tirith security scanner enabled but not available "
+                        f"— command scanning will use pattern matching only{_RST}"
+                    )
+        except Exception:
+            pass
+
    def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, request_overrides: dict | None = None) -> bool:
        """
        Initialize the agent on first use.
@ -4499,6 +4735,10 @@ class HermesCLI:
        if self.agent is not None:
            return True

+        _prepare_deferred_agent_startup()
+        self._install_tool_callbacks()
+        self._ensure_tirith_security()
+
        if not self._ensure_runtime_credentials():
            return False

@ -4713,8 +4953,10 @@ class HermesCLI:
                context_length=ctx_len,
            )
        
-        # Show tool availability warnings if any tools are disabled
-        self._show_tool_availability_warnings()
+        # Tool discovery is intentionally deferred on the Termux bare prompt
+        # path; availability warnings are shown once tools are initialized.
+        if os.environ.get("HERMES_DEFER_AGENT_STARTUP") != "1":
+            self._show_tool_availability_warnings()

        # Warn about very low context lengths (common with local servers)
        if ctx_len and ctx_len <= 8192:
@ -4852,10 +5094,13 @@ class HermesCLI:
        if self.resume_display == "minimal":
            return

-        MAX_DISPLAY_EXCHANGES = 10   # max user+assistant pairs to show
-        MAX_USER_LEN = 300           # truncate user messages
-        MAX_ASST_LEN = 200           # truncate assistant text
-        MAX_ASST_LINES = 3           # max lines of assistant text
+        # Read limits from config (with hardcoded defaults)
+        _disp = CLI_CONFIG.get("display", {})
+        MAX_DISPLAY_EXCHANGES = int(_disp.get("resume_exchanges", 10))
+        MAX_USER_LEN = int(_disp.get("resume_max_user_chars", 300))
+        MAX_ASST_LEN = int(_disp.get("resume_max_assistant_chars", 200))
+        MAX_ASST_LINES = int(_disp.get("resume_max_assistant_lines", 3))
+        SKIP_TOOL_ONLY = _disp.get("resume_skip_tool_only", True)

        # Collect displayable entries (skip system, tool-result messages)
        entries = []  # list of (role, display_text)
@ -4918,6 +5163,10 @@ class HermesCLI:
                if not parts:
                    # Skip pure-reasoning messages that have no visible output
                    continue
+                # Skip tool-call-only entries when SKIP_TOOL_ONLY is enabled
+                has_text = bool(text)
+                if SKIP_TOOL_ONLY and not has_text and tool_calls:
+                    continue
                entries.append(("assistant", " ".join(parts)))
                _last_asst_idx = len(entries) - 1
                _last_asst_full = " ".join(full_parts)
@ -5491,9 +5740,13 @@ class HermesCLI:
    
    def _show_status(self):
        """Show compact startup status line."""
-        # Get tool count
-        tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets, quiet_mode=True)
-        tool_count = len(tools) if tools else 0
+        # Avoid pulling the full tool registry into the bare Termux prompt path.
+        if os.environ.get("HERMES_DEFER_AGENT_STARTUP") == "1":
+            tool_status = "tools deferred"
+        else:
+            tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets, quiet_mode=True)
+            tool_count = len(tools) if tools else 0
+            tool_status = f"{tool_count} tools"

        # Format model name (shorten if needed)
        model_short = self.model.split("/")[-1] if "/" in self.model else self.model
@ -5525,7 +5778,7 @@ class HermesCLI:

        self._console_print(
            f"  {api_indicator} [{accent_color}]{model_short}[/] "
-            f"[dim {separator_color}]·[/] [bold {label_color}]{tool_count} tools[/]"
+            f"[dim {separator_color}]·[/] [bold {label_color}]{tool_status}[/]"
            f"{toolsets_info}{provider_info}"
        )

@ -5638,9 +5891,10 @@ class HermesCLI:
                    continue
                ChatConsole().print(f"    [bold {_accent_hex()}]{cmd:<15}[/] [dim]-[/] {_escape(desc)}")

-        if _skill_commands:
-            _cprint(f"\n  ⚡ {_BOLD}Skill Commands{_RST} ({len(_skill_commands)} installed):")
-            for cmd, info in sorted(_skill_commands.items()):
+        skill_commands = _ensure_skill_commands()
+        if skill_commands:
+            _cprint(f"\n  ⚡ {_BOLD}Skill Commands{_RST} ({len(skill_commands)} installed):")
+            for cmd, info in sorted(skill_commands.items()):
                ChatConsole().print(
                    f"    [bold {_accent_hex()}]{cmd:<22}[/] [dim]-[/] {_escape(info['description'])}"
                )
@ -5918,15 +6172,16 @@ class HermesCLI:
        else:
            print("  Recent sessions:")
        print()
-        print(f"  {'Title':<32} {'Preview':<40} {'Last Active':<13} {'ID'}")
-        print(f"  {'─' * 32} {'─' * 40} {'─' * 13} {'─' * 24}")
-        for session in sessions:
-            title = (session.get("title") or "—")[:30]
+        print(f"  {'#':<3} {'Title':<32} {'Preview':<40} {'Last Active':<13} {'ID'}")
+        print(f"  {'─' * 3} {'─' * 32} {'─' * 40} {'─' * 13} {'─' * 24}")
+        for idx, session in enumerate(sessions, start=1):
+            title = session.get("title") or "—"
            preview = (session.get("preview") or "")[:38]
            last_active = _relative_time(session.get("last_active"))
-            print(f"  {title:<32} {preview:<40} {last_active:<13} {session['id']}")
+            print(f"  {idx:<3} {title:<32} {preview:<40} {last_active:<13} {session['id']}")
        print()
-        print("  Use /resume <session id or title> to continue where you left off.")
+        print("  Use /resume <number>, /resume <session id>, or /resume <session title> to continue.")
+        print("  Example: /resume 2")
        print()
        return True

@ -6037,6 +6292,7 @@ class HermesCLI:
        self.conversation_history = []
        self._pending_title = None
        self._resumed = False
+        _sync_process_session_id(self.session_id)

        if self.agent:
            self.agent.session_id = self.session_id
@ -6270,7 +6526,7 @@ class HermesCLI:
        target = parts[1].strip() if len(parts) > 1 else ""

        if not target:
-            _cprint("  Usage: /resume <session_id_or_title>")
+            _cprint("  Usage: /resume <number|session_id_or_title>")
            if self._show_recent_sessions(reason="resume"):
                return
            _cprint("  Tip:   Use /history or `hermes sessions list` to find sessions.")
@ -6281,10 +6537,20 @@ class HermesCLI:
            _cprint(f"  {format_session_db_unavailable()}")
            return

-        # Resolve title or ID
-        from hermes_cli.main import _resolve_session_by_name_or_id
-        resolved = _resolve_session_by_name_or_id(target)
-        target_id = resolved or target
+        # Resolve numbered selection, title, or ID
+        if target.isdigit():
+            sessions = self._list_recent_sessions(limit=10)
+            index = int(target)
+            if index < 1 or index > len(sessions):
+                _cprint(f"  Resume index {index} is out of range.")
+                _cprint("  Use /resume with no arguments to see available sessions.")
+                return
+            selected = sessions[index - 1]
+            target_id = selected["id"]
+        else:
+            from hermes_cli.main import _resolve_session_by_name_or_id
+            resolved = _resolve_session_by_name_or_id(target)
+            target_id = resolved or target

        session_meta = self._session_db.get_session(target_id)
        if not session_meta:
@ -6323,6 +6589,7 @@ class HermesCLI:
        self.session_id = target_id
        self._resumed = True
        self._pending_title = None
+        _sync_process_session_id(target_id)

        # Load conversation history (strip transcript-only metadata entries)
        restored = self._session_db.get_messages_as_conversation(target_id)
@ -6374,6 +6641,7 @@ class HermesCLI:
                f" ({msg_count} user message{'s' if msg_count != 1 else ''},"
                f" {len(self.conversation_history)} total)"
            )
+            self._display_resumed_history()
        else:
            _cprint(f"  ↻ Resumed session {target_id}{title_part} — no messages, starting fresh.")

@ -6496,6 +6764,7 @@ class HermesCLI:
        self.session_start = now
        self._pending_title = None
        self._resumed = True  # Prevents auto-title generation
+        _sync_process_session_id(new_session_id)

        # Sync the agent
        if self.agent:
@ -7857,6 +8126,7 @@ class HermesCLI:
                "clear",
                "This clears the screen and starts a new session.\n"
                "The current conversation history will be discarded.",
+                cmd_original=cmd_original,
            ) is None:
                return
            self.new_session(silent=True)
@ -7981,12 +8251,16 @@ class HermesCLI:
            if not self._handle_handoff_command(cmd_original):
                return False
        elif canonical == "new":
-            parts = cmd_original.split(maxsplit=1)
-            title = parts[1].strip() if len(parts) > 1 else None
+            # Strip inline-skip tokens (now/--yes/-y) before deriving the title
+            # so "/new now My Session" yields title="My Session" instead of
+            # title="now My Session". See _split_destructive_skip.
+            _new_args, _ = self._split_destructive_skip(cmd_original)
+            title = _new_args.strip() or None
            if self._confirm_destructive_slash(
                "new",
                "This starts a fresh session.\n"
                "The current conversation history will be discarded.",
+                cmd_original=cmd_original,
            ) is None:
                return
            self.new_session(title=title)
@ -8013,6 +8287,7 @@ class HermesCLI:
            if self._confirm_destructive_slash(
                "undo",
                "This removes the last user/assistant exchange from history.",
+                cmd_original=cmd_original,
            ) is None:
                return
            self.undo_last()
@ -8161,6 +8436,8 @@ class HermesCLI:
        else:
            # Check for user-defined quick commands (bypass agent loop, no LLM call)
            base_cmd = cmd_lower.split()[0]
+            skill_commands = _ensure_skill_commands()
+            skill_bundles = get_skill_bundles()
            quick_commands = self.config.get("quick_commands", {})
            if base_cmd.lstrip("/") in quick_commands:
                qcmd = quick_commands[base_cmd.lstrip("/")]
@ -8216,14 +8493,14 @@ class HermesCLI:
                        _cprint(f"\033[1;31mPlugin command error: {e}{_RST}")
            # Skill bundles take precedence over individual skills — /<bundle>
            # loads multiple skills at once. Rescans cheaply when files change.
-            elif base_cmd in get_skill_bundles():
+            elif base_cmd in skill_bundles:
                user_instruction = cmd_original[len(base_cmd):].strip()
                bundle_result = build_bundle_invocation_message(
                    base_cmd, user_instruction, task_id=self.session_id
                )
                if bundle_result:
                    msg, loaded_names, missing = bundle_result
-                    bundle_info = get_skill_bundles()[base_cmd]
+                    bundle_info = skill_bundles[base_cmd]
                    print(
                        f"\n⚡ Loading bundle: {bundle_info['name']} "
                        f"({len(loaded_names)} skills)"
@ -8239,13 +8516,13 @@ class HermesCLI:
                        f"[bold red]Failed to load bundle for {base_cmd}[/]"
                    )
            # Check for skill slash commands (/gif-search, /axolotl, etc.)
-            elif base_cmd in _skill_commands:
+            elif base_cmd in skill_commands:
                user_instruction = cmd_original[len(base_cmd):].strip()
                msg = build_skill_invocation_message(
                    base_cmd, user_instruction, task_id=self.session_id
                )
                if msg:
-                    skill_name = _skill_commands[base_cmd]["name"]
+                    skill_name = skill_commands[base_cmd]["name"]
                    print(f"\n⚡ Loading skill: {skill_name}")
                    if hasattr(self, '_pending_input'):
                        self._pending_input.put(msg)
@ -8257,7 +8534,7 @@ class HermesCLI:
                # that execution-time resolution agrees with tab-completion.
                from hermes_cli.commands import COMMANDS
                typed_base = cmd_lower.split()[0]
-                all_known = set(COMMANDS) | set(_skill_commands) | set(get_skill_bundles())
+                all_known = set(COMMANDS) | set(skill_commands) | set(skill_bundles)
                matches = [c for c in all_known if c.startswith(typed_base)]
                if len(matches) > 1:
                    # Prefer an exact match (typed the full command name)
@ -9088,18 +9365,23 @@ class HermesCLI:
            _cprint("  Failed to save runtime_footer setting to config.yaml")

    def _toggle_verbose(self):
-        """Cycle tool progress mode: off → new → all → verbose → off."""
+        """Cycle tool progress mode: off → new → all → verbose → off.
+
+        Tool-progress display (full args / results / think blocks at the
+        ``verbose`` step) is INDEPENDENT of global DEBUG logging.  Cycling
+        through here does not change ``self.verbose`` or the agent's
+        ``verbose_logging`` / ``quiet_mode`` — those remain under the
+        explicit ``-v``/``--verbose`` flag and the ``/verbose-logging``
+        toggle.  See PR #6a1aa420e for the history that decoupled them.
+        """
        cycle = ["off", "new", "all", "verbose"]
        try:
            idx = cycle.index(self.tool_progress_mode)
        except ValueError:
            idx = 2  # default to "all"
        self.tool_progress_mode = cycle[(idx + 1) % len(cycle)]
-        self.verbose = self.tool_progress_mode == "verbose"

        if self.agent:
-            self.agent.verbose_logging = self.verbose
-            self.agent.quiet_mode = not self.verbose
            self.agent.reasoning_callback = self._current_reasoning_callback()

        # Use raw ANSI codes via _cprint so the output is routed through
@ -9111,7 +9393,7 @@ class HermesCLI:
            "off": f"{_Colors.DIM}Tool progress: OFF{_Colors.RESET} — silent mode, just the final response.",
            "new": f"{_Colors.YELLOW}Tool progress: NEW{_Colors.RESET} — show each new tool (skip repeats).",
            "all": f"{_Colors.GREEN}Tool progress: ALL{_Colors.RESET} — show every tool call.",
-            "verbose": f"{_Colors.BOLD}{_Colors.GREEN}Tool progress: VERBOSE{_Colors.RESET} — full args, results, think blocks, and debug logs.",
+            "verbose": f"{_Colors.BOLD}{_Colors.GREEN}Tool progress: VERBOSE{_Colors.RESET} — full args, results, and think blocks.",
        }
        _cprint(labels.get(self.tool_progress_mode, ""))

@ -9657,7 +9939,49 @@ class HermesCLI:
        if _reload_thread.is_alive():
            print("  ⚠️  MCP reload timed out (30s). Some servers may not have reconnected.")

-    def _confirm_destructive_slash(self, command: str, detail: str) -> Optional[str]:
+    # Inline-skip tokens that bypass the destructive-slash confirmation modal.
+    # Matches the escape-hatch pattern users on broken modal platforms
+    # (currently native Windows PowerShell — issue #30768) need to self-serve
+    # without having to flip approvals.destructive_slash_confirm in config.
+    _DESTRUCTIVE_SKIP_TOKENS = frozenset({"now", "--yes", "-y"})
+
+    @classmethod
+    def _split_destructive_skip(cls, cmd_text: Optional[str]) -> tuple[str, bool]:
+        """Split inline-skip tokens out of a destructive slash command.
+
+        Returns ``(remainder, skip)`` where ``remainder`` is the original
+        text with the command word and any recognized skip tokens removed,
+        and ``skip`` is True iff at least one skip token was found.
+
+        Examples:
+            "/reset now"            -> ("", True)
+            "/reset --yes My title" -> ("My title", True)
+            "/new My title"         -> ("My title", False)
+            "/clear"                -> ("", False)
+        """
+        if not cmd_text:
+            return "", False
+        tokens = cmd_text.strip().split()
+        if not tokens:
+            return "", False
+        # Drop leading "/cmd" word — callers pass the full command text.
+        if tokens[0].startswith("/"):
+            tokens = tokens[1:]
+        skip = False
+        kept: list[str] = []
+        for tok in tokens:
+            if tok.lower() in cls._DESTRUCTIVE_SKIP_TOKENS:
+                skip = True
+                continue
+            kept.append(tok)
+        return " ".join(kept), skip
+
+    def _confirm_destructive_slash(
+        self,
+        command: str,
+        detail: str,
+        cmd_original: Optional[str] = None,
+    ) -> Optional[str]:
        """Prompt the user to confirm a destructive session slash command.

        Used by ``/clear``, ``/new``/``/reset``, and ``/undo`` before they
@ -9673,9 +9997,24 @@ class HermesCLI:
        gate is off the function returns ``"once"`` immediately without
        prompting.

+        Inline-skip: if ``cmd_original`` contains ``now``, ``--yes``, or
+        ``-y`` as an argument (e.g. ``/reset now``, ``/new --yes My title``),
+        the modal is bypassed and ``"once"`` is returned immediately. This is
+        an escape hatch for platforms where the prompt_toolkit modal hangs
+        (issue #30768 — native Windows PowerShell). Callers are responsible
+        for stripping the skip tokens from any remaining argument parsing
+        (see :meth:`_split_destructive_skip`).
+
        Returns ``"once"``, ``"always"``, or ``None`` (cancelled).  Callers
        proceed with the destructive action when the result is non-None.
        """
+        # Inline-skip escape hatch — works regardless of platform/modal state.
+        # See class-level _DESTRUCTIVE_SKIP_TOKENS for the accepted tokens.
+        if cmd_original:
+            _, _skip = self._split_destructive_skip(cmd_original)
+            if _skip:
+                return "once"
+
        # Gate check — respects prior "Always Approve" clicks.
        try:
            cfg = load_cli_config()
@ -10010,9 +10349,7 @@ class HermesCLI:
                self._last_scrollback_tool = function_name
                try:
                    from agent.display import get_cute_tool_message
-                    line = get_cute_tool_message(function_name, stored_args, duration)
-                    if is_error:
-                        line = f"{line} [error]"
+                    line = get_cute_tool_message(function_name, stored_args, duration, result=kwargs.get("result"))
                    _cprint(f"  {line}")
                except Exception:
                    pass
@ -12023,37 +12360,11 @@ class HermesCLI:
        self._voice_tts_done = threading.Event()  # Signals TTS playback finished
        self._voice_tts_done.set()  # Initially "done" (no TTS pending)

-        # Register callbacks so terminal_tool prompts route through our UI
-        set_sudo_password_callback(self._sudo_password_callback)
-        set_approval_callback(self._approval_callback)
-        set_secret_capture_callback(self._secret_capture_callback)
+        if os.environ.get("HERMES_DEFER_AGENT_STARTUP") != "1":
+            self._install_tool_callbacks()

-        # Computer-use shares the same approval UI (prompt_toolkit dialog).
-        # The tool handler expects a 3-arg callback (action, args, summary)
-        # and returns "approve_once" | "approve_session" | "always_approve"
-        # | "deny". Adapt our existing generic callback.
-        try:
-            from tools.computer_use_tool import set_approval_callback as _set_cu_cb
-            _set_cu_cb(self._computer_use_approval_callback)
-        except ImportError:
-            pass  # computer_use extras not installed
-
-        # Ensure tirith security scanner is available (downloads if needed).
-        # Warn the user if tirith is enabled in config but not available,
-        # so they know command security scanning is degraded.  Suppressed
-        # on platforms where tirith ships no binary (Windows etc.) — the
-        # user can't act on it and pattern-matching guards still run.
-        try:
-            from tools.tirith_security import ensure_installed, is_platform_supported
-            tirith_path = ensure_installed(log_failures=False)
-            if tirith_path is None and is_platform_supported():
-                security_cfg = self.config.get("security", {}) or {}
-                tirith_enabled = security_cfg.get("tirith_enabled", True)
-                if tirith_enabled:
-                    _cprint(f"  {_DIM}⚠ tirith security scanner enabled but not available "
-                            f"— command scanning will use pattern matching only{_RST}")
-        except Exception:
-            pass  # Non-fatal — fail-open at scan time if unavailable
+        if os.environ.get("HERMES_DEFER_AGENT_STARTUP") != "1":
+            self._ensure_tirith_security()
        
        # Key bindings for the input area
        kb = KeyBindings()
@ -14211,7 +14522,7 @@ def main(
    api_key: str = None,
    base_url: str = None,
    max_turns: int = None,
-    verbose: bool = False,
+    verbose: Optional[bool] = None,
    quiet: bool = False,
    compact: bool = False,
    list_tools: bool = False,
@ -14557,4 +14868,6 @@ def main(


 if __name__ == "__main__":
+    import fire
+
    fire.Fire(main)
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@ -529,7 +529,9 @@ def _send_media_via_adapter(
    """
    from pathlib import Path

-    from gateway.platforms.base import should_send_media_as_audio
+    from gateway.platforms.base import BasePlatformAdapter, should_send_media_as_audio
+
+    media_files = BasePlatformAdapter.filter_media_delivery_paths(media_files)

    for media_path, _is_voice in media_files:
        try:
@ -614,6 +616,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
    # Extract MEDIA: tags so attachments are forwarded as files, not raw text
    from gateway.platforms.base import BasePlatformAdapter
    media_files, cleaned_delivery_content = BasePlatformAdapter.extract_media(delivery_content)
+    media_files = BasePlatformAdapter.filter_media_delivery_paths(media_files)

    try:
        config = load_gateway_config()
--- a/gateway/config.py
+++ b/gateway/config.py
@ -424,7 +424,9 @@ _PLATFORM_CONNECTED_CHECKERS: dict[Platform, Callable[[PlatformConfig], bool]] =
    Platform.SMS: lambda cfg: bool(os.getenv("TWILIO_ACCOUNT_SID")),
    Platform.API_SERVER: lambda cfg: True,
    Platform.WEBHOOK: lambda cfg: True,
-    Platform.MSGRAPH_WEBHOOK: lambda cfg: True,
+    Platform.MSGRAPH_WEBHOOK: lambda cfg: bool(
+        str(cfg.extra.get("client_state") or "").strip()
+    ),
    Platform.FEISHU: lambda cfg: bool(cfg.extra.get("app_id")),
    Platform.WECOM: lambda cfg: bool(cfg.extra.get("bot_id")),
    Platform.WECOM_CALLBACK: lambda cfg: bool(
@ -926,73 +928,6 @@ def load_gateway_config() -> GatewayConfig:
                        ac = ",".join(str(v) for v in ac)
                    os.environ["SLACK_ALLOWED_CHANNELS"] = str(ac)

-            # Discord settings → env vars (env vars take precedence)
-            discord_cfg = yaml_cfg.get("discord", {})
-            if isinstance(discord_cfg, dict):
-                if "require_mention" in discord_cfg and not os.getenv("DISCORD_REQUIRE_MENTION"):
-                    os.environ["DISCORD_REQUIRE_MENTION"] = str(discord_cfg["require_mention"]).lower()
-                if "thread_require_mention" in discord_cfg and not os.getenv("DISCORD_THREAD_REQUIRE_MENTION"):
-                    os.environ["DISCORD_THREAD_REQUIRE_MENTION"] = str(discord_cfg["thread_require_mention"]).lower()
-                frc = discord_cfg.get("free_response_channels")
-                if frc is not None and not os.getenv("DISCORD_FREE_RESPONSE_CHANNELS"):
-                    if isinstance(frc, list):
-                        frc = ",".join(str(v) for v in frc)
-                    os.environ["DISCORD_FREE_RESPONSE_CHANNELS"] = str(frc)
-                if "auto_thread" in discord_cfg and not os.getenv("DISCORD_AUTO_THREAD"):
-                    os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
-                if "reactions" in discord_cfg and not os.getenv("DISCORD_REACTIONS"):
-                    os.environ["DISCORD_REACTIONS"] = str(discord_cfg["reactions"]).lower()
-                # ignored_channels: channels where bot never responds (even when mentioned)
-                ic = discord_cfg.get("ignored_channels")
-                if ic is not None and not os.getenv("DISCORD_IGNORED_CHANNELS"):
-                    if isinstance(ic, list):
-                        ic = ",".join(str(v) for v in ic)
-                    os.environ["DISCORD_IGNORED_CHANNELS"] = str(ic)
-                # allowed_channels: if set, bot ONLY responds in these channels (whitelist)
-                ac = discord_cfg.get("allowed_channels")
-                if ac is not None and not os.getenv("DISCORD_ALLOWED_CHANNELS"):
-                    if isinstance(ac, list):
-                        ac = ",".join(str(v) for v in ac)
-                    os.environ["DISCORD_ALLOWED_CHANNELS"] = str(ac)
-                # no_thread_channels: channels where bot responds directly without creating thread
-                ntc = discord_cfg.get("no_thread_channels")
-                if ntc is not None and not os.getenv("DISCORD_NO_THREAD_CHANNELS"):
-                    if isinstance(ntc, list):
-                        ntc = ",".join(str(v) for v in ntc)
-                    os.environ["DISCORD_NO_THREAD_CHANNELS"] = str(ntc)
-                # history_backfill: recover missed channel messages for shared sessions
-                # when require_mention is active.  Fetches messages between bot turns
-                # and prepends them to the user message for context.
-                if "history_backfill" in discord_cfg and not os.getenv("DISCORD_HISTORY_BACKFILL"):
-                    os.environ["DISCORD_HISTORY_BACKFILL"] = str(discord_cfg["history_backfill"]).lower()
-                hbl = discord_cfg.get("history_backfill_limit")
-                if hbl is not None and not os.getenv("DISCORD_HISTORY_BACKFILL_LIMIT"):
-                    os.environ["DISCORD_HISTORY_BACKFILL_LIMIT"] = str(hbl)
-                # allow_mentions: granular control over what the bot can ping.
-                # Safe defaults (no @everyone/roles) are applied in the adapter;
-                # these YAML keys only override when set and let users opt back
-                # into unsafe modes (e.g. roles=true) if they actually want it.
-                allow_mentions_cfg = discord_cfg.get("allow_mentions")
-                if isinstance(allow_mentions_cfg, dict):
-                    for yaml_key, env_key in (
-                        ("everyone", "DISCORD_ALLOW_MENTION_EVERYONE"),
-                        ("roles", "DISCORD_ALLOW_MENTION_ROLES"),
-                        ("users", "DISCORD_ALLOW_MENTION_USERS"),
-                        ("replied_user", "DISCORD_ALLOW_MENTION_REPLIED_USER"),
-                    ):
-                        if yaml_key in allow_mentions_cfg and not os.getenv(env_key):
-                            os.environ[env_key] = str(allow_mentions_cfg[yaml_key]).lower()
-                # reply_to_mode: top-level preferred, falls back to extra.reply_to_mode
-                # YAML 1.1 parses bare 'off' as boolean False — coerce to string "off".
-                _discord_extra = discord_cfg.get("extra") if isinstance(discord_cfg.get("extra"), dict) else {}
-                _discord_rtm = (
-                    discord_cfg["reply_to_mode"] if "reply_to_mode" in discord_cfg
-                    else _discord_extra.get("reply_to_mode")
-                )
-                if _discord_rtm is not None and not os.getenv("DISCORD_REPLY_TO_MODE"):
-                    _rtm_str = "off" if _discord_rtm is False else str(_discord_rtm).lower()
-                    os.environ["DISCORD_REPLY_TO_MODE"] = _rtm_str
-
            # Bridge top-level require_mention to Telegram when the telegram: section
            # does not already provide one.  Users often write "require_mention: true"
            # at the top level alongside group_sessions_per_user, expecting it to work
@ -1878,6 +1813,17 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
    # need to seed ``PlatformConfig.extra`` from env vars (e.g. Google Chat's
    # project_id / subscription_name) can supply ``env_enablement_fn`` on
    # their PlatformEntry — called here BEFORE adapter construction.
+    #
+    # Enablement gate (#31116): when a plugin registers ``is_connected``
+    # (the "has the user actually configured credentials for this?" check),
+    # we MUST consult it before flipping ``enabled = True``.  Otherwise
+    # ``check_fn`` alone — which for adapter plugins typically just
+    # verifies the SDK is importable / lazy-installs it — silently enables
+    # platforms the user never opted into, and the gateway then tries to
+    # connect to Discord / Teams / Google Chat with no token and emits
+    # noisy retry-forever errors.  ``_platform_status`` was already fixed
+    # for the same bug class in commit 7849a3d73; this is the runtime
+    # counterpart.
    try:
        from hermes_cli.plugins import discover_plugins
        discover_plugins()  # idempotent
@ -1890,34 +1836,99 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
                logger.debug("check_fn for %s raised: %s", entry.name, e)
                continue
            platform = Platform(entry.name)
-            if platform not in config.platforms:
-                config.platforms[platform] = PlatformConfig()
-            config.platforms[platform].enabled = True
-            # Seed extras from env if the plugin opted in.
+            existing_cfg = config.platforms.get(platform)
+            # Seed candidate extras from ``env_enablement_fn`` so plugins
+            # whose ``is_connected`` reads ``config.extra`` (e.g. Google
+            # Chat's ``_is_connected`` checks ``config.extra["project_id"]``)
+            # see the same state they will after enablement. Without this,
+            # Google-Chat-on-env-vars-only setups silently fail the gate
+            # below even though the user is configured.  Plugins whose
+            # ``is_connected`` reads env vars directly (Discord, IRC,
+            # Teams, LINE, ntfy, Simplex) are unaffected; this only
+            # restores Google Chat.
+            seed_for_probe = None
            if entry.env_enablement_fn is not None:
                try:
-                    seed = entry.env_enablement_fn()
+                    seed_for_probe = entry.env_enablement_fn()
                except Exception as e:
                    logger.debug(
                        "env_enablement_fn for %s raised: %s", entry.name, e
                    )
-                    seed = None
-                if isinstance(seed, dict) and seed:
-                    # Extract the home_channel dict (if provided) so we wire it
-                    # up as a proper HomeChannel dataclass.  Everything else is
-                    # merged into ``extra``.
-                    home = seed.pop("home_channel", None)
-                    config.platforms[platform].extra.update(seed)
-                    if isinstance(home, dict) and home.get("chat_id"):
-                        config.platforms[platform].home_channel = HomeChannel(
-                            platform=platform,
-                            chat_id=str(home["chat_id"]),
-                            name=str(home.get("name") or "Home"),
-                            thread_id=(
-                                str(home["thread_id"])
-                                if home.get("thread_id")
-                                else None
-                            ),
+                    seed_for_probe = None
+
+            # Only consult is_connected for platforms that are NOT already
+            # explicitly configured in YAML / env (existing_cfg with
+            # enabled=True means the user wrote it themselves or another
+            # env-var bridge enabled it — keep that decision).
+            if existing_cfg is None or not existing_cfg.enabled:
+                if entry.is_connected is not None:
+                    try:
+                        # Probe with ``enabled=True`` since we're asking
+                        # "would this plugin BE configured if we enabled
+                        # it?" not "is it currently enabled?". Google
+                        # Chat's ``_is_connected`` short-circuits on
+                        # ``config.enabled`` being False, which on the
+                        # default ``PlatformConfig()`` would fail the
+                        # gate even with proper env vars set.
+                        if existing_cfg is not None:
+                            probe_cfg = existing_cfg
+                            if not probe_cfg.enabled:
+                                probe_cfg = PlatformConfig(
+                                    enabled=True,
+                                    extra=dict(probe_cfg.extra or {}),
+                                )
+                        else:
+                            probe_cfg = PlatformConfig(enabled=True)
+                        if isinstance(seed_for_probe, dict) and seed_for_probe:
+                            # Don't mutate ``existing_cfg``; the probe gets
+                            # a transient view with env-seeded extras layered
+                            # on top of whatever's already there.
+                            probe_extra = dict(getattr(probe_cfg, "extra", {}) or {})
+                            for k, v in seed_for_probe.items():
+                                if k == "home_channel":
+                                    continue
+                                probe_extra.setdefault(k, v)
+                            probe_cfg = PlatformConfig(
+                                enabled=True,
+                                extra=probe_extra,
+                            )
+                        configured = bool(entry.is_connected(probe_cfg))
+                    except Exception as exc:
+                        logger.debug(
+                            "is_connected for %s raised: %s — skipping enablement",
+                            entry.name, exc,
                        )
+                        configured = False
+                    if not configured:
+                        logger.debug(
+                            "Plugin platform '%s' available but not configured "
+                            "(is_connected returned False) — skipping enable",
+                            entry.name,
+                        )
+                        continue
+            if platform not in config.platforms:
+                config.platforms[platform] = PlatformConfig()
+            config.platforms[platform].enabled = True
+            # Commit env-seeded extras onto the now-enabled platform.
+            # We've already called ``env_enablement_fn`` above (for the
+            # probe); reuse that result instead of calling it twice.
+            if isinstance(seed_for_probe, dict) and seed_for_probe:
+                seed = dict(seed_for_probe)
+                # Extract the home_channel dict (if provided) so we wire it
+                # up as a proper HomeChannel dataclass.  Everything else is
+                # merged into ``extra``.
+                home = seed.pop("home_channel", None)
+                config.platforms[platform].extra.update(seed)
+                if isinstance(home, dict) and home.get("chat_id"):
+                    config.platforms[platform].home_channel = HomeChannel(
+                        platform=platform,
+                        chat_id=str(home["chat_id"]),
+                        name=str(home.get("name") or "Home"),
+                        thread_id=(
+                            str(home["thread_id"])
+                            if home.get("thread_id")
+                            else None
+                        ),
+                    )
    except Exception as e:
        logger.debug("Plugin platform enable pass failed: %s", e)
--- a/gateway/pairing.py
+++ b/gateway/pairing.py
@ -18,6 +18,7 @@ Security features (based on OWASP + NIST SP 800-63-4 guidance):
 Storage: ~/.hermes/pairing/
 """

+import hashlib
 import json
 import os
 import secrets
@ -27,6 +28,10 @@ import time
 from pathlib import Path
 from typing import Optional

+from gateway.whatsapp_identity import (
+    expand_whatsapp_aliases,
+    normalize_whatsapp_identifier,
+)
 from hermes_constants import get_hermes_dir
 from utils import atomic_replace

@ -109,12 +114,40 @@ class PairingStore:
    def _save_json(self, path: Path, data: dict) -> None:
        _secure_write(path, json.dumps(data, indent=2, ensure_ascii=False))

+    def _normalize_user_id(self, platform: str, user_id: str) -> str:
+        """Normalize platform-specific user IDs before persisting them."""
+        raw_user_id = str(user_id or "").strip()
+        if platform == "whatsapp":
+            return normalize_whatsapp_identifier(raw_user_id) or raw_user_id
+        return raw_user_id
+
+    def _user_id_aliases(self, platform: str, user_id: str) -> set[str]:
+        """Return all known equivalent user IDs for auth/rate-limit checks."""
+        raw_user_id = str(user_id or "").strip()
+        if not raw_user_id:
+            return set()
+
+        aliases = {raw_user_id, self._normalize_user_id(platform, raw_user_id)}
+        if platform == "whatsapp":
+            aliases.update(expand_whatsapp_aliases(raw_user_id))
+        aliases.discard("")
+        return aliases
+
+    def _user_ids_match(self, platform: str, left: str, right: str) -> bool:
+        """Return True when two user IDs represent the same principal."""
+        left_aliases = self._user_id_aliases(platform, left)
+        right_aliases = self._user_id_aliases(platform, right)
+        return bool(left_aliases and right_aliases and (left_aliases & right_aliases))
+
    # ----- Approved users -----

    def is_approved(self, platform: str, user_id: str) -> bool:
        """Check if a user is approved (paired) on a platform."""
        approved = self._load_json(self._approved_path(platform))
-        return user_id in approved
+        for approved_user_id in approved:
+            if self._user_ids_match(platform, approved_user_id, user_id):
+                return True
+        return False

    def list_approved(self, platform: str = None) -> list:
        """List approved users, optionally filtered by platform."""
@ -129,7 +162,16 @@ class PairingStore:
    def _approve_user(self, platform: str, user_id: str, user_name: str = "") -> None:
        """Add a user to the approved list. Must be called under self._lock."""
        approved = self._load_json(self._approved_path(platform))
-        approved[user_id] = {
+        normalized_user_id = self._normalize_user_id(platform, user_id)
+        duplicate_ids = [
+            approved_user_id
+            for approved_user_id in approved
+            if self._user_ids_match(platform, approved_user_id, normalized_user_id)
+        ]
+        for approved_user_id in duplicate_ids:
+            del approved[approved_user_id]
+
+        approved[normalized_user_id] = {
            "user_name": user_name,
            "approved_at": time.time(),
        }
@ -140,14 +182,25 @@ class PairingStore:
        path = self._approved_path(platform)
        with self._lock:
            approved = self._load_json(path)
-            if user_id in approved:
-                del approved[user_id]
+            matching_ids = [
+                approved_user_id
+                for approved_user_id in approved
+                if self._user_ids_match(platform, approved_user_id, user_id)
+            ]
+            if matching_ids:
+                for approved_user_id in matching_ids:
+                    del approved[approved_user_id]
                self._save_json(path, approved)
                return True
        return False

    # ----- Pending codes -----

+    @staticmethod
+    def _hash_code(code: str, salt: bytes) -> str:
+        """Hash a pairing code with the given salt using SHA-256."""
+        return hashlib.sha256(salt + code.encode("utf-8")).hexdigest()
+
    def generate_code(
        self, platform: str, user_id: str, user_name: str = ""
    ) -> Optional[str]:
@ -158,9 +211,13 @@ class PairingStore:
          - User is rate-limited (too recent request)
          - Max pending codes reached for this platform
          - User/platform is in lockout due to failed attempts
+
+        The code is NOT stored in plaintext.  Only a salted SHA-256 hash is
+        persisted so that reading the pending file does not reveal codes.
        """
        with self._lock:
            self._cleanup_expired(platform)
+            normalized_user_id = self._normalize_user_id(platform, user_id)

            # Check lockout
            if self._is_locked_out(platform):
@ -178,9 +235,18 @@ class PairingStore:
            # Generate cryptographically random code
            code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))

-            # Store pending request
-            pending[code] = {
-                "user_id": user_id,
+            # Hash the code with a random salt before storing
+            salt = os.urandom(16)
+            code_hash = self._hash_code(code, salt)
+
+            # Use a unique entry id as the key (not the code itself)
+            entry_id = secrets.token_hex(8)
+
+            # Store pending request with hashed code
+            pending[entry_id] = {
+                "hash": code_hash,
+                "salt": salt.hex(),
+                "user_id": normalized_user_id,
                "user_name": user_name,
                "created_at": time.time(),
            }
@ -195,10 +261,16 @@ class PairingStore:
        """
        Approve a pairing code. Adds the user to the approved list.

-        Returns {user_id, user_name} on success, None if code is
+        Returns ``{user_id, user_name}`` on success, ``None`` if the code is
        invalid/expired OR the platform is currently locked out after
        ``MAX_FAILED_ATTEMPTS`` failed approvals (#10195). Callers can
        disambiguate with ``_is_locked_out(platform)``.
+
+        Verification: the user-provided code is hashed with each stored
+        entry's salt and compared to the stored hash using constant-time
+        comparison. Pre-hash entries (legacy plaintext-key format from
+        pre-upgrade pending.json files) are silently ignored — they get
+        pruned at TTL by ``_cleanup_expired``.
        """
        with self._lock:
            self._cleanup_expired(platform)
@ -213,37 +285,77 @@ class PairingStore:
                return None

            pending = self._load_json(self._pending_path(platform))
-            if code not in pending:
+
+            # Find the entry whose hash matches the provided code.
+            # Tolerate legacy plaintext-key entries (no salt/hash) and
+            # malformed entries — skip them rather than KeyError, so an
+            # in-place upgrade across an existing pending.json doesn't
+            # crash on the first approve call. Legacy entries get pruned
+            # at their TTL by _cleanup_expired.
+            matched_key = None
+            matched_entry = None
+            for entry_id, entry in pending.items():
+                if not isinstance(entry, dict):
+                    continue
+                if "salt" not in entry or "hash" not in entry:
+                    continue
+                try:
+                    salt = bytes.fromhex(entry["salt"])
+                except ValueError:
+                    continue
+                candidate_hash = self._hash_code(code, salt)
+                if secrets.compare_digest(candidate_hash, entry["hash"]):
+                    matched_key = entry_id
+                    matched_entry = entry
+                    break
+
+            if matched_key is None:
                self._record_failed_attempt(platform)
                return None

-            entry = pending.pop(code)
+            del pending[matched_key]
            self._save_json(self._pending_path(platform), pending)

            # Add to approved list
-            self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
+            self._approve_user(platform, matched_entry["user_id"],
+                               matched_entry.get("user_name", ""))

            return {
-                "user_id": entry["user_id"],
-                "user_name": entry.get("user_name", ""),
+                "user_id": matched_entry["user_id"],
+                "user_name": matched_entry.get("user_name", ""),
            }

    def list_pending(self, platform: str = None) -> list:
-        """List pending pairing requests, optionally filtered by platform."""
+        """List pending pairing requests, optionally filtered by platform.
+
+        Codes are stored hashed — the ``code`` field is replaced with the
+        first 8 hex characters of the hash so admins can distinguish entries
+        without revealing the original code. Legacy plaintext-key entries
+        (pre-hash format) are shown with a "legacy" placeholder so admins
+        can see them age out without crashing on a missing ``hash`` field.
+        """
        results = []
-        platforms = [platform] if platform else self._all_platforms("pending")
-        for p in platforms:
-            self._cleanup_expired(p)
-            pending = self._load_json(self._pending_path(p))
-            for code, info in pending.items():
-                age_min = int((time.time() - info["created_at"]) / 60)
-                results.append({
-                    "platform": p,
-                    "code": code,
-                    "user_id": info["user_id"],
-                    "user_name": info.get("user_name", ""),
-                    "age_minutes": age_min,
-                })
+        with self._lock:
+            platforms = [platform] if platform else self._all_platforms("pending")
+            for p in platforms:
+                self._cleanup_expired(p)
+                pending = self._load_json(self._pending_path(p))
+                for entry_id, info in pending.items():
+                    if not isinstance(info, dict):
+                        continue
+                    created_at = info.get("created_at")
+                    if not isinstance(created_at, (int, float)):
+                        continue
+                    age_min = int((time.time() - created_at) / 60)
+                    hash_val = info.get("hash")
+                    code_display = hash_val[:8] if isinstance(hash_val, str) else "legacy"
+                    results.append({
+                        "platform": p,
+                        "code": code_display,
+                        "user_id": info.get("user_id", ""),
+                        "user_name": info.get("user_name", ""),
+                        "age_minutes": age_min,
+                    })
        return results

    def clear_pending(self, platform: str = None) -> int:
@ -262,15 +374,20 @@ class PairingStore:
    def _is_rate_limited(self, platform: str, user_id: str) -> bool:
        """Check if a user has requested a code too recently."""
        limits = self._load_json(self._rate_limit_path())
-        key = f"{platform}:{user_id}"
-        last_request = limits.get(key, 0)
-        return (time.time() - last_request) < RATE_LIMIT_SECONDS
+        for alias in self._user_id_aliases(platform, user_id):
+            key = f"{platform}:{alias}"
+            last_request = limits.get(key, 0)
+            if (time.time() - last_request) < RATE_LIMIT_SECONDS:
+                return True
+        return False

    def _record_rate_limit(self, platform: str, user_id: str) -> None:
        """Record the time of a pairing request for rate limiting."""
        limits = self._load_json(self._rate_limit_path())
-        key = f"{platform}:{user_id}"
-        limits[key] = time.time()
+        now = time.time()
+        for alias in self._user_id_aliases(platform, user_id):
+            key = f"{platform}:{alias}"
+            limits[key] = now
        self._save_json(self._rate_limit_path(), limits)

    def _is_locked_out(self, platform: str) -> bool:
@ -297,17 +414,29 @@ class PairingStore:
    # ----- Cleanup -----

    def _cleanup_expired(self, platform: str) -> None:
-        """Remove expired pending codes."""
+        """Remove expired pending codes.
+
+        Tolerant of malformed / legacy entries — anything without a numeric
+        ``created_at`` is treated as expired (it's effectively unusable
+        with the new hash-keyed schema anyway).
+        """
        path = self._pending_path(platform)
        pending = self._load_json(path)
        now = time.time()
-        expired = [
-            code for code, info in pending.items()
-            if (now - info["created_at"]) > CODE_TTL_SECONDS
-        ]
+        expired = []
+        for entry_id, info in pending.items():
+            if not isinstance(info, dict):
+                expired.append(entry_id)
+                continue
+            created_at = info.get("created_at")
+            if not isinstance(created_at, (int, float)):
+                expired.append(entry_id)
+                continue
+            if (now - created_at) > CODE_TTL_SECONDS:
+                expired.append(entry_id)
        if expired:
-            for code in expired:
-                del pending[code]
+            for entry_id in expired:
+                del pending[entry_id]
            self._save_json(path, pending)

    def _all_platforms(self, suffix: str) -> list:
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@ -35,6 +35,7 @@ import re
 import sqlite3
 import time
 import uuid
+from pathlib import Path
 from typing import Any, Dict, List, Optional

 try:
@ -337,10 +338,12 @@ class ResponseStore:
                db_path = str(get_hermes_home() / "response_store.db")
            except Exception:
                db_path = ":memory:"
+        self._db_path: Optional[str] = db_path if db_path != ":memory:" else None
        try:
            self._conn = sqlite3.connect(db_path, check_same_thread=False)
        except Exception:
            self._conn = sqlite3.connect(":memory:", check_same_thread=False)
+            self._db_path = None
        # Use shared WAL-fallback helper so response_store.db degrades
        # gracefully on NFS/SMB/FUSE-mounted HERMES_HOME (same filesystem
        # issue addressed for state.db/kanban.db — see
@ -361,6 +364,31 @@ class ResponseStore:
            )"""
        )
        self._conn.commit()
+        # response_store.db contains conversation history (tool payloads,
+        # prompts, results). Tighten to owner-only after creation so other
+        # local users on a shared box can't read it. Run once at __init__
+        # rather than after every commit — chmod-on-every-write is wasted
+        # syscalls on a hot path.
+        self._tighten_file_permissions()
+
+    def _tighten_file_permissions(self) -> None:
+        """Force owner-only permissions on the DB and SQLite sidecars."""
+        if not self._db_path:
+            return
+        for candidate in (
+            Path(self._db_path),
+            Path(f"{self._db_path}-wal"),
+            Path(f"{self._db_path}-shm"),
+        ):
+            try:
+                if candidate.exists():
+                    candidate.chmod(0o600)
+            except OSError:
+                logger.debug(
+                    "Failed to restrict response store permissions for %s",
+                    candidate,
+                    exc_info=True,
+                )

    def get(self, response_id: str) -> Optional[Dict[str, Any]]:
        """Retrieve a stored response by ID (updates access time for LRU)."""
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -15,6 +15,7 @@ import re
 import socket as _socket
 import subprocess
 import sys
+import time
 import uuid
 from abc import ABC, abstractmethod
 from urllib.parse import urlsplit
@ -40,6 +41,16 @@ def _platform_name(platform) -> str:
    return str(value or "").lower()


+def _float_env(name: str, default: float) -> float:
+    raw = os.environ.get(name, "").strip()
+    if not raw:
+        return default
+    try:
+        return float(raw)
+    except (TypeError, ValueError):
+        return default
+
+
 def _thread_metadata_for_source(source, reply_to_message_id: str | None = None) -> dict | None:
    """Build platform-aware thread metadata for adapter sends.

@ -472,7 +483,7 @@ sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))

 from gateway.config import Platform, PlatformConfig
 from gateway.session import SessionSource, build_session_key
-from hermes_constants import get_hermes_dir
+from hermes_constants import get_hermes_dir, get_hermes_home


 GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = (
@ -813,6 +824,86 @@ def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
 # ---------------------------------------------------------------------------

 DOCUMENT_CACHE_DIR = get_hermes_dir("cache/documents", "document_cache")
+SCREENSHOT_CACHE_DIR = get_hermes_dir("cache/screenshots", "browser_screenshots")
+_HERMES_HOME = get_hermes_home()
+MEDIA_DELIVERY_ALLOW_DIRS_ENV = "HERMES_MEDIA_ALLOW_DIRS"
+MEDIA_DELIVERY_SAFE_ROOTS = (
+    IMAGE_CACHE_DIR,
+    AUDIO_CACHE_DIR,
+    VIDEO_CACHE_DIR,
+    DOCUMENT_CACHE_DIR,
+    SCREENSHOT_CACHE_DIR,
+    _HERMES_HOME / "image_cache",
+    _HERMES_HOME / "audio_cache",
+    _HERMES_HOME / "video_cache",
+    _HERMES_HOME / "document_cache",
+    _HERMES_HOME / "browser_screenshots",
+)
+
+
+def _media_delivery_allowed_roots() -> List[Path]:
+    """Return roots from which model-emitted local media may be delivered."""
+    roots = [Path(root) for root in MEDIA_DELIVERY_SAFE_ROOTS]
+    extra_roots = os.environ.get(MEDIA_DELIVERY_ALLOW_DIRS_ENV, "")
+    for chunk in extra_roots.split(os.pathsep):
+        for raw_root in chunk.split(","):
+            raw_root = raw_root.strip()
+            if not raw_root:
+                continue
+            root = Path(os.path.expanduser(raw_root))
+            if root.is_absolute():
+                roots.append(root)
+    return roots
+
+
+def _path_is_within(path: Path, root: Path) -> bool:
+    try:
+        path.relative_to(root)
+        return True
+    except ValueError:
+        return False
+
+
+def validate_media_delivery_path(path: str) -> Optional[str]:
+    """Return a safe absolute file path for native media delivery, else None.
+
+    MEDIA tags and bare local paths in model output are untrusted text. Only
+    existing regular files under Hermes-managed media caches, or roots the
+    operator explicitly allowlists, may be uploaded as native attachments.
+    Symlinks are resolved before the containment check.
+    """
+    if not path:
+        return None
+
+    candidate = str(path).strip()
+    if len(candidate) >= 2 and candidate[0] == candidate[-1] and candidate[0] in "`\"'":
+        candidate = candidate[1:-1].strip()
+    candidate = candidate.lstrip("`\"'").rstrip("`\"',.;:)}]")
+    if not candidate:
+        return None
+
+    expanded = Path(os.path.expanduser(candidate))
+    if not expanded.is_absolute():
+        return None
+
+    try:
+        resolved = expanded.resolve(strict=True)
+    except (OSError, RuntimeError, ValueError):
+        return None
+
+    if not resolved.is_file():
+        return None
+
+    for root in _media_delivery_allowed_roots():
+        try:
+            resolved_root = root.expanduser().resolve(strict=False)
+        except (OSError, RuntimeError, ValueError):
+            continue
+        if _path_is_within(resolved, resolved_root):
+            return str(resolved)
+
+    return None
+

 SUPPORTED_DOCUMENT_TYPES = {
    ".pdf": "application/pdf",
@ -1023,6 +1114,14 @@ class MessageEvent:
        return args


+@dataclass
+class TextDebounceState:
+    event: MessageEvent
+    task: asyncio.Task | None
+    first_ts: float
+    last_ts: float
+
+
 _PLAINTEXT_GATEWAY_RESTART_PATTERNS: tuple[re.Pattern[str], ...] = (
    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?gateway[.!?\s]*$", re.IGNORECASE),
    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?hermes\s+gateway[.!?\s]*$", re.IGNORECASE),
@ -1318,6 +1417,17 @@ class BasePlatformAdapter(ABC):
        self._active_sessions: Dict[str, asyncio.Event] = {}
        self._pending_messages: Dict[str, MessageEvent] = {}
        self._session_tasks: Dict[str, asyncio.Task] = {}
+        self._busy_text_mode: str = (
+            os.environ.get("HERMES_GATEWAY_BUSY_TEXT_MODE", "queue").strip().lower()
+            or "queue"
+        )
+        self._busy_text_debounce_seconds: float = _float_env(
+            "HERMES_GATEWAY_BUSY_TEXT_DEBOUNCE_SECONDS", 0.35
+        )
+        self._busy_text_hard_cap_seconds: float = _float_env(
+            "HERMES_GATEWAY_BUSY_TEXT_HARD_CAP_SECONDS", 1.0
+        )
+        self._text_debounce: dict[str, TextDebounceState] = {}
        # Background message-processing tasks spawned by handle_message().
        # Gateway shutdown cancels these so an old gateway instance doesn't keep
        # working on a task after --replace or manual restarts.
@ -2119,6 +2229,35 @@ class BasePlatformAdapter(ABC):
            text = f"{caption}\n{text}"
        return await self.send(chat_id=chat_id, content=text, reply_to=reply_to, metadata=metadata)

+    @staticmethod
+    def validate_media_delivery_path(path: str) -> Optional[str]:
+        """Return a resolved path if it is safe for native attachment upload."""
+        return validate_media_delivery_path(path)
+
+    @staticmethod
+    def filter_media_delivery_paths(media_files) -> List[Tuple[str, bool]]:
+        """Drop unsafe MEDIA paths and normalize accepted paths."""
+        safe_media: List[Tuple[str, bool]] = []
+        for media_path, is_voice in media_files or []:
+            safe_path = validate_media_delivery_path(str(media_path))
+            if safe_path:
+                safe_media.append((safe_path, bool(is_voice)))
+            else:
+                logger.warning("Skipping unsafe MEDIA directive path outside allowed roots")
+        return safe_media
+
+    @staticmethod
+    def filter_local_delivery_paths(file_paths) -> List[str]:
+        """Drop unsafe bare local file paths and normalize accepted paths."""
+        safe_paths: List[str] = []
+        for file_path in file_paths or []:
+            safe_path = validate_media_delivery_path(str(file_path))
+            if safe_path:
+                safe_paths.append(safe_path)
+            else:
+                logger.warning("Skipping unsafe local file path outside allowed roots")
+        return safe_paths
+
    @staticmethod
    def extract_media(content: str) -> Tuple[List[Tuple[str, bool]], str]:
        """
@ -2616,6 +2755,161 @@ class BasePlatformAdapter(ABC):
            return f"{existing_text}\n\n{new_text}".strip()
        return existing_text

+    def _text_debounce_store(self) -> dict[str, TextDebounceState]:
+        store = getattr(self, "_text_debounce", None)
+        if store is None:
+            store = {}
+            self._text_debounce = store
+        return store
+
+    def _is_queue_text_debounce_candidate(self, event: MessageEvent) -> bool:
+        """Return True for normal text eligible for queue-mode debounce."""
+        result = (
+            getattr(self, "_busy_text_mode", "queue") == "queue"
+            and event.message_type == MessageType.TEXT
+            and not getattr(event, "internal", False)
+            and not event.is_command()
+            and bool((event.text or "").strip())
+        )
+        if result:
+            logger.debug(
+                "[%s] Queue-text debounce candidate accepted: session=%s text_len=%d",
+                self.name,
+                getattr(event, "session_key", "?"),
+                len(event.text or ""),
+            )
+        return result
+
+    def _can_merge_text_debounce_events(self, existing: MessageEvent, event: MessageEvent) -> bool:
+        """Return True when two text debounce events came from the same sender."""
+
+        def _identity(candidate: MessageEvent) -> tuple[str, ...] | None:
+            source = getattr(candidate, "source", None)
+            if source is None:
+                return None
+            platform = _platform_name(getattr(source, "platform", None))
+            sender = getattr(source, "user_id_alt", None) or getattr(source, "user_id", None)
+            if sender:
+                return (platform, str(sender))
+            if getattr(source, "chat_type", None) in {"dm", "private"} and getattr(source, "chat_id", None):
+                return (platform, "dm", str(source.chat_id))
+            return None
+
+        existing_sender = _identity(existing)
+        incoming_sender = _identity(event)
+        return existing_sender is not None and existing_sender == incoming_sender
+
+    def _text_debounce_delay(self, session_key: str) -> float:
+        """Return bounded busy-text debounce delay for ``session_key``."""
+        state = self._text_debounce_store().get(session_key)
+        if state is None:
+            return 0.0
+        now = time.monotonic()
+        window_deadline = state.last_ts + self._busy_text_debounce_seconds
+        hard_cap_deadline = state.first_ts + self._busy_text_hard_cap_seconds
+        return max(0.0, min(window_deadline, hard_cap_deadline) - now)
+
+    async def _queue_text_debounce(self, session_key: str, event: MessageEvent) -> None:
+        """Buffer normal queue-mode busy text and schedule a bounded flush."""
+        store = self._text_debounce_store()
+        state = store.get(session_key)
+
+        if state is not None and not self._can_merge_text_debounce_events(state.event, event):
+            # Preserve sender attribution in shared sessions. The current
+            # buffer becomes the next pending turn; the new sender starts a
+            # fresh debounce burst when the pending slot allows it.
+            await self._flush_text_debounce_now(session_key)
+            state = store.get(session_key)
+            if state is not None and not self._can_merge_text_debounce_events(state.event, event):
+                existing_pending = self._pending_messages.get(session_key)
+                if existing_pending is not None and self._can_merge_text_debounce_events(existing_pending, event):
+                    merge_pending_message_event(
+                        self._pending_messages,
+                        session_key,
+                        event,
+                        merge_text=True,
+                    )
+                return
+
+        now = time.monotonic()
+        if state is None:
+            state = TextDebounceState(
+                event=event,
+                task=None,
+                first_ts=now,
+                last_ts=now,
+            )
+            store[session_key] = state
+        else:
+            if event.text:
+                state.event.text = (
+                    f"{state.event.text}\n{event.text}"
+                    if state.event.text
+                    else event.text
+                )
+            latest_message_id = getattr(event, "message_id", None)
+            latest_anchor = latest_message_id or getattr(event, "reply_to_message_id", None)
+            if latest_message_id is not None:
+                state.event.message_id = str(latest_message_id)
+            if latest_anchor is not None and hasattr(state.event, "reply_to_message_id"):
+                state.event.reply_to_message_id = str(latest_anchor)
+            state.last_ts = now
+
+        if state.task is not None and not state.task.done():
+            state.task.cancel()
+
+        delay = self._text_debounce_delay(session_key)
+        state.task = asyncio.create_task(self._flush_text_debounce(session_key, delay))
+
+    async def _flush_text_debounce(self, session_key: str, delay: float) -> None:
+        """Timer task that flushes the debounced text buffer."""
+        try:
+            await asyncio.sleep(delay)
+            await self._flush_text_debounce_now(session_key)
+        except asyncio.CancelledError:
+            return
+        finally:
+            current = asyncio.current_task()
+            state = self._text_debounce_store().get(session_key)
+            if state is not None and state.task is current:
+                state.task = None
+
+    async def _flush_text_debounce_now(self, session_key: str) -> bool:
+        """Force-flush one debounced busy-text burst into the pending slot."""
+        store = self._text_debounce_store()
+        state = store.get(session_key)
+        if state is None:
+            return False
+
+        current = asyncio.current_task()
+        if state.task is not None and state.task is not current and not state.task.done():
+            state.task.cancel()
+        state.task = None
+
+        existing_pending = self._pending_messages.get(session_key)
+        if (
+            existing_pending is not None
+            and not self._can_merge_text_debounce_events(existing_pending, state.event)
+        ):
+            return False
+
+        state = store.pop(session_key, None)
+        if state is None:
+            return False
+        merge_pending_message_event(
+            self._pending_messages,
+            session_key,
+            state.event,
+            merge_text=True,
+        )
+        return True
+
+    def _discard_text_debounce(self, session_key: str) -> None:
+        """Cancel and drop pending text debounce state for control commands."""
+        state = self._text_debounce_store().pop(session_key, None)
+        if state is not None and state.task is not None and not state.task.done():
+            state.task.cancel()
+
    # ------------------------------------------------------------------
    # Session task + guard ownership helpers
    # ------------------------------------------------------------------
@ -2685,6 +2979,7 @@ class BasePlatformAdapter(ABC):
        self._active_sessions.pop(session_key, None)
        self._pending_messages.pop(session_key, None)
        self._session_tasks.pop(session_key, None)
+        self._discard_text_debounce(session_key)
        return True

    def _start_session_processing(
@ -2766,6 +3061,7 @@ class BasePlatformAdapter(ABC):
                )
        if discard_pending:
            self._pending_messages.pop(session_key, None)
+            self._discard_text_debounce(session_key)
        if release_guard:
            self._release_session_guard(session_key)

@ -2780,6 +3076,7 @@ class BasePlatformAdapter(ABC):
        command-scoped guard, then — if a follow-up message landed while the
        command was running — spawns a fresh processing task for it.
        """
+        await self._flush_text_debounce_now(session_key)
        pending_event = self._pending_messages.pop(session_key, None)
        self._release_session_guard(session_key, guard=command_guard)
        if pending_event is None:
@ -2911,6 +3208,7 @@ class BasePlatformAdapter(ABC):
                # through the dedicated handoff path that serializes
                # cancellation + runner response + pending drain.
                if cmd in {"stop", "new", "reset"}:
+                    self._discard_text_debounce(session_key)
                    try:
                        await self._dispatch_active_session_command(event, session_key, cmd)
                    except Exception as e:
@ -2955,8 +3253,9 @@ class BasePlatformAdapter(ABC):
            # clarify-intercept can resolve it and unblock the agent.
            #
            # Without this bypass: the message gets queued in
-            # _pending_messages AND triggers an interrupt, killing the
-            # agent run mid-clarify and discarding the user's answer.
+            # _pending_messages as a follow-up turn instead of reaching the
+            # clarify resolver, leaving the agent blocked and discarding the
+            # user's answer.
            # Same shape as the /approve deadlock fix (PR #4926) — both
            # cases are "agent thread blocked on Event.wait, message must
            # reach the resolver before being treated as a new turn."
@ -3015,27 +3314,28 @@ class BasePlatformAdapter(ABC):
                merge_pending_message_event(self._pending_messages, session_key, event)
                return  # Don't interrupt now - will run after current task completes

-            # Default behavior for non-photo follow-ups: interrupt the running agent.
-            #
-            # Use merge_text=True so rapid TEXT follow-ups (#4469) accumulate
-            # into the single pending slot instead of clobbering each other.
-            # Without merging, three rapid messages "A", "B", "C" land like:
-            #   _pending_messages[k] = A  (interrupts)
-            #   _pending_messages[k] = B  (replaces A before consumer reads)
-            #   _pending_messages[k] = C  (replaces B)
-            # ...and only "C" reaches the next turn.  merge_pending_message_event
-            # already does the right thing for photo/media bursts; the
-            # ``merge_text=True`` flag extends that to plain TEXT events.
-            # Same shape as the Telegram bursty-grace path in gateway/run.py.
-            logger.debug("[%s] New message while session %s is active — triggering interrupt", self.name, session_key)
-            merge_pending_message_event(
-                self._pending_messages,
-                session_key,
-                event,
-                merge_text=True,
-            )
-            # Signal the interrupt (the processing task checks this)
-            self._active_sessions[session_key].set()
+            if self._is_queue_text_debounce_candidate(event):
+                logger.debug(
+                    "[%s] New text message while session %s is active — "
+                    "debouncing follow-up (busy_text_mode=queue, window=%.2fs)",
+                    self.name,
+                    session_key,
+                    self._busy_text_debounce_seconds,
+                )
+                await self._queue_text_debounce(session_key, event)
+            else:
+                logger.debug(
+                    "[%s] New message while session %s is active — queuing follow-up "
+                    "(no interrupt, will cascade after current turn)",
+                    self.name,
+                    session_key,
+                )
+                merge_pending_message_event(
+                    self._pending_messages,
+                    session_key,
+                    event,
+                    merge_text=event.message_type == MessageType.TEXT,
+                )
            return  # Don't process now - will be handled after current task finishes
        
        # Mark session as active BEFORE spawning background task to close
@ -3166,6 +3466,7 @@ class BasePlatformAdapter(ABC):

                # Extract MEDIA:<path> tags (from TTS tool) before other processing
                media_files, response = self.extract_media(response)
+                media_files = self.filter_media_delivery_paths(media_files)

                # Extract image URLs and send them as native platform attachments
                images, text_content = self.extract_images(response)
@ -3179,6 +3480,7 @@ class BasePlatformAdapter(ABC):
                # Auto-detect bare local file paths for native media delivery
                # (helps small models that don't use MEDIA: syntax)
                local_files, text_content = self.extract_local_files(text_content)
+                local_files = self.filter_local_delivery_paths(local_files)
                if local_files:
                    logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
                
@ -3387,10 +3689,15 @@ class BasePlatformAdapter(ABC):
                ProcessingOutcome.SUCCESS if processing_ok else ProcessingOutcome.FAILURE,
            )

+            # The active drain owns debounce state. If a queue-mode timer has
+            # not fired yet, force-flush into _pending_messages here and let
+            # this task hand off the follow-up.
+            await self._flush_text_debounce_now(session_key)
+
            # Check if there's a pending message that was queued during our processing
            if session_key in self._pending_messages:
                pending_event = self._pending_messages.pop(session_key)
-                logger.debug("[%s] Processing queued message from interrupt", self.name)
+                logger.debug("[%s] Processing queued follow-up message", self.name)
                # Keep the _active_sessions entry live across the turn chain
                # and only CLEAR the interrupt Event — do NOT delete the entry.
                # If we deleted here, a concurrent inbound message arriving
@ -3399,7 +3706,7 @@ class BasePlatformAdapter(ABC):
                # with the recursive drain below.  Two agents on one
                # session_key = duplicate responses, duplicate tool calls.
                # Clearing the Event keeps the guard live so follow-ups take
-                # the busy-handler path (queue + interrupt) as intended.
+                # the busy-handler path as intended.
                _active = self._active_sessions.get(session_key)
                if _active is not None:
                    _active.clear()
@ -3492,6 +3799,9 @@ class BasePlatformAdapter(ABC):
                    await self.stop_typing(event.source.chat_id)
            except Exception:
                pass
+            # Final drain/release boundary: force-flush any timer that missed
+            # the in-band drain before deciding whether the guard can clear.
+            await self._flush_text_debounce_now(session_key)
            # Late-arrival drain: a message may have arrived during the
            # cleanup awaits above (typing_task cancel, stop_typing).  Such
            # messages passed the Level-1 guard (entry still live, Event
@ -3611,6 +3921,10 @@ class BasePlatformAdapter(ABC):
        self._session_tasks.clear()
        self._pending_messages.clear()
        self._active_sessions.clear()
+        for state in list(self._text_debounce_store().values()):
+            if state.task is not None and not state.task.done():
+                state.task.cancel()
+        self._text_debounce_store().clear()

    def has_pending_interrupt(self, session_key: str) -> bool:
        """Check if there's a pending interrupt for a session."""
--- a/gateway/platforms/bluebubbles.py
+++ b/gateway/platforms/bluebubbles.py
@ -189,7 +189,10 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        app = web.Application()
        app.router.add_get("/health", lambda _: web.Response(text="ok"))
        app.router.add_post(self.webhook_path, self._handle_webhook)
-        self._runner = web.AppRunner(app)
+        # The webhook auth value is carried in the query string because the
+        # BlueBubbles webhook API cannot send custom headers. Do not let
+        # aiohttp access logs write that request target to agent.log.
+        self._runner = web.AppRunner(app, access_log=None)
        await self._runner.setup()
        site = web.TCPSite(self._runner, self.webhook_host, self.webhook_port)
        await site.start()
@ -242,6 +245,14 @@ class BlueBubblesAdapter(BasePlatformAdapter):
            return f"{base}?password={quote(self.password, safe='')}"
        return base

+    @property
+    def _webhook_register_url_for_log(self) -> str:
+        """Webhook registration URL safe for logs."""
+        base = self._webhook_url
+        if self.password:
+            return f"{base}?password=***"
+        return base
+
    async def _find_registered_webhooks(self, url: str) -> list:
        """Return list of BB webhook entries matching *url*."""
        try:
@ -269,7 +280,8 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        existing = await self._find_registered_webhooks(webhook_url)
        if existing:
            logger.info(
-                "[bluebubbles] webhook already registered: %s", webhook_url
+                "[bluebubbles] webhook already registered: %s",
+                self._webhook_register_url_for_log,
            )
            return True

@ -284,7 +296,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
            if 200 <= status < 300:
                logger.info(
                    "[bluebubbles] webhook registered with server: %s",
-                    webhook_url,
+                    self._webhook_register_url_for_log,
                )
                return True
            else:
@ -324,7 +336,8 @@ class BlueBubblesAdapter(BasePlatformAdapter):
                    removed = True
            if removed:
                logger.info(
-                    "[bluebubbles] webhook unregistered: %s", webhook_url
+                    "[bluebubbles] webhook unregistered: %s",
+                    self._webhook_register_url_for_log,
                )
        except Exception as exc:
            logger.debug(
@ -934,4 +947,3 @@ class BlueBubblesAdapter(BasePlatformAdapter):
            asyncio.create_task(self.mark_read(session_chat_id))

        return web.Response(text="ok")
-
--- a/gateway/platforms/dingtalk.py
+++ b/gateway/platforms/dingtalk.py
@ -358,6 +358,19 @@ class DingTalkAdapter(BasePlatformAdapter):
            await asyncio.gather(*self._bg_tasks, return_exceptions=True)
            self._bg_tasks.clear()

+        # Finalize any open streaming cards before the HTTP client closes so
+        # they don't stay stuck in streaming state on DingTalk's UI after
+        # a gateway restart.  _close_streaming_siblings handles its own
+        # per-card exceptions; the outer try is a safety net for token fetch.
+        for _chat_id in list(self._streaming_cards):
+            try:
+                await self._close_streaming_siblings(_chat_id)
+            except Exception as _exc:
+                logger.debug(
+                    "[%s] Failed to finalize streaming card on disconnect for %s: %s",
+                    self.name, _chat_id, _exc,
+                )
+
        if self._http_client:
            await self._http_client.aclose()
            self._http_client = None
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@ -1514,8 +1514,10 @@ class FeishuAdapter(BasePlatformAdapter):
            connection_mode=str(
                extra.get("connection_mode") or os.getenv("FEISHU_CONNECTION_MODE", "websocket")
            ).strip().lower(),
-            encrypt_key=os.getenv("FEISHU_ENCRYPT_KEY", "").strip(),
-            verification_token=os.getenv("FEISHU_VERIFICATION_TOKEN", "").strip(),
+            encrypt_key=str(extra.get("encrypt_key") or os.getenv("FEISHU_ENCRYPT_KEY", "")).strip(),
+            verification_token=str(
+                extra.get("verification_token") or os.getenv("FEISHU_VERIFICATION_TOKEN", "")
+            ).strip(),
            group_policy=os.getenv("FEISHU_GROUP_POLICY", "allowlist").strip().lower(),
            allowed_group_users=frozenset(
                item.strip()
@ -1642,6 +1644,11 @@ class FeishuAdapter(BasePlatformAdapter):
                self._connection_mode,
            )
            return False
+        if self._connection_mode == "webhook" and not (self._verification_token or self._encrypt_key):
+            logger.error(
+                "[Feishu] Webhook mode requires FEISHU_VERIFICATION_TOKEN or FEISHU_ENCRYPT_KEY."
+            )
+            return False

        try:
            self._app_lock_identity = self._app_id
@ -2563,13 +2570,44 @@ class FeishuAdapter(BasePlatformAdapter):
        if approval_id is None:
            logger.debug("[Feishu] Card action missing approval_id, ignoring")
            return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
+        state = self._approval_state.get(approval_id)
+        if not state:
+            logger.debug("[Feishu] Approval %s already resolved or unknown", approval_id)
+            return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
        choice = _APPROVAL_CHOICE_MAP.get(action_value.get("hermes_action"), "deny")

        operator = getattr(event, "operator", None)
        open_id = str(getattr(operator, "open_id", "") or "")
+        sender_id = SimpleNamespace(open_id=open_id, user_id=str(getattr(operator, "user_id", "") or ""))
+        if not self._allow_group_message(sender_id, state.get("chat_id", ""), is_bot=False):
+            logger.warning("[Feishu] Unauthorized approval click by %s", open_id or "<unknown>")
+            return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
+
+        callback_chat_id = str(getattr(getattr(event, "context", None), "open_chat_id", "") or "")
+        expected_chat_id = str(state.get("chat_id", "") or "")
+        if callback_chat_id and expected_chat_id and callback_chat_id != expected_chat_id:
+            logger.warning(
+                "[Feishu] Approval callback chat mismatch for %s (expected=%s, got=%s)",
+                approval_id,
+                expected_chat_id,
+                callback_chat_id,
+            )
+            return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
+
        user_name = self._get_cached_sender_name(open_id) or open_id

-        if not self._submit_on_loop(loop, self._resolve_approval(approval_id, choice, user_name)):
+        chat_context = getattr(event, "context", None)
+        chat_id = str(getattr(chat_context, "open_chat_id", "") or "")
+        if not self._submit_on_loop(
+            loop,
+            self._resolve_approval(
+                approval_id=approval_id,
+                choice=choice,
+                user_name=user_name,
+                open_id=open_id,
+                chat_id=chat_id,
+            ),
+        ):
            return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None

        if P2CardActionTriggerResponse is None:
@ -2617,12 +2655,34 @@ class FeishuAdapter(BasePlatformAdapter):
            response.card = card
        return response

-    async def _resolve_approval(self, approval_id: Any, choice: str, user_name: str) -> None:
+    async def _resolve_approval(
+        self,
+        approval_id: Any,
+        choice: str,
+        user_name: str,
+        *,
+        open_id: str = "",
+        chat_id: str = "",
+    ) -> None:
        """Pop approval state and unblock the waiting agent thread."""
-        state = self._approval_state.pop(approval_id, None)
+        state = self._approval_state.get(approval_id)
        if not state:
            logger.debug("[Feishu] Approval %s already resolved or unknown", approval_id)
            return
+        if not self._is_interactive_operator_authorized(open_id):
+            logger.warning("[Feishu] Unauthorized approval click by %s for approval %s", open_id or "<unknown>", approval_id)
+            return
+        expected_chat_id = str(state.get("chat_id", "") or "")
+        if expected_chat_id and chat_id and expected_chat_id != chat_id:
+            logger.warning(
+                "[Feishu] Approval %s chat mismatch (expected=%s, got=%s)",
+                approval_id, expected_chat_id, chat_id,
+            )
+            return
+        state = self._approval_state.pop(approval_id, None)
+        if not state:
+            logger.debug("[Feishu] Approval %s already resolved while validating callback", approval_id)
+            return
        try:
            from tools.approval import resolve_gateway_approval
            count = resolve_gateway_approval(state["session_key"], choice)
@ -3229,11 +3289,6 @@ class FeishuAdapter(BasePlatformAdapter):
            self._record_webhook_anomaly(remote_ip, "400")
            return web.json_response({"code": 400, "msg": "invalid json"}, status=400)

-        # URL verification challenge — respond before other checks so that Feishu's
-        # subscription setup works even before encrypt_key is wired.
-        if payload.get("type") == "url_verification":
-            return web.json_response({"challenge": payload.get("challenge", "")})
-
        # Verification token check — second layer of defence beyond signature (matches openclaw).
        if self._verification_token:
            header = payload.get("header") or {}
@ -3243,6 +3298,13 @@ class FeishuAdapter(BasePlatformAdapter):
                self._record_webhook_anomaly(remote_ip, "401-token")
                return web.Response(status=401, text="Invalid verification token")

+        # URL verification challenge — Feishu includes the verification token in
+        # challenge requests. Validate the token (above) before reflecting the
+        # challenge so an unauthenticated remote request cannot prove endpoint
+        # control by getting attacker-supplied challenge data echoed back.
+        if payload.get("type") == "url_verification":
+            return web.json_response({"challenge": payload.get("challenge", "")})
+
        # Timing-safe signature verification (only enforced when encrypt_key is set).
        if self._encrypt_key and not self._is_webhook_signature_valid(request.headers, body_bytes):
            logger.warning("[Feishu] Webhook rejected: invalid signature from %s", remote_ip)
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@ -138,7 +138,8 @@ _OUTBOUND_MENTION_RE = re.compile(
 )

 _E2EE_INSTALL_HINT = (
-    "Install with: pip install 'mautrix[encryption]'  (requires libolm C library)"
+    "Install with: pip install 'mautrix[encryption]' asyncpg aiosqlite  "
+    "(requires libolm C library)"
 )

 _MATRIX_IMAGE_FILENAME_EXTS = frozenset({
@ -214,9 +215,22 @@ def _create_matrix_session(proxy_url: str | None):


 def _check_e2ee_deps() -> bool:
-    """Return True if mautrix E2EE dependencies (python-olm) are available."""
+    """Return True if mautrix E2EE dependencies are available.
+
+    Verifies python-olm (via mautrix.crypto.OlmMachine), the SQLite crypto
+    store backend (mautrix.crypto.store.asyncpg.PgCryptoStore — yes, the
+    PgCryptoStore class also drives the sqlite backend in mautrix 0.21),
+    and the database drivers actually used at connect time (``asyncpg`` for
+    the underlying upgrade_table machinery, ``aiosqlite`` for the
+    ``sqlite:///`` URL we pass to ``Database.create``).  Without all four,
+    encrypted rooms fail at connect time with a confusing
+    ``No module named 'asyncpg'`` (#31116).
+    """
    try:
        from mautrix.crypto import OlmMachine  # noqa: F401
+        from mautrix.crypto.store.asyncpg import PgCryptoStore  # noqa: F401
+        import asyncpg  # noqa: F401
+        import aiosqlite  # noqa: F401

        return True
    except (ImportError, AttributeError):
@ -226,8 +240,13 @@ def _check_e2ee_deps() -> bool:
 def check_matrix_requirements() -> bool:
    """Return True if the Matrix adapter can be used.

-    Lazy-installs mautrix via ``tools.lazy_deps.ensure("platform.matrix")``
-    on first call if not present. Rebinds all module-level type globals on success.
+    Lazy-installs the full ``platform.matrix`` feature group via
+    ``tools.lazy_deps.ensure_and_bind`` whenever any of the declared
+    packages (mautrix, Markdown, aiosqlite, asyncpg, aiohttp-socks) is
+    missing — not just mautrix itself.  Previously this short-circuited on
+    ``import mautrix``, which left the other four packages uninstalled
+    forever and broke E2EE connect with ``No module named 'asyncpg'``
+    (#31116).  Rebinds module-level type globals on success.
    """
    token = os.getenv("MATRIX_ACCESS_TOKEN", "")
    password = os.getenv("MATRIX_PASSWORD", "")
@ -239,9 +258,20 @@ def check_matrix_requirements() -> bool:
    if not homeserver:
        logger.warning("Matrix: MATRIX_HOMESERVER not set")
        return False
+
+    # Check whether any package in the platform.matrix feature group is
+    # missing.  ``feature_missing`` is cheap (per-spec importlib.metadata
+    # lookups) and correctly handles ``mautrix[encryption]`` by stripping
+    # the extras marker before checking the bare package.
    try:
-        import mautrix  # noqa: F401
-    except ImportError:
+        from tools.lazy_deps import feature_missing, ensure_and_bind
+        missing = feature_missing("platform.matrix")
+    except Exception as exc:  # pragma: no cover — defensive
+        logger.debug("Matrix: lazy_deps lookup failed: %s", exc)
+        missing = ()
+        ensure_and_bind = None  # type: ignore[assignment]
+
+    if missing or ensure_and_bind is None:
        def _import():
            from mautrix.types import (
                ContentURI, EventID, EventType, PaginationDirection,
@ -261,10 +291,14 @@ def check_matrix_requirements() -> bool:
                "UserID": UserID,
            }

-        from tools.lazy_deps import ensure_and_bind
+        if ensure_and_bind is None:
+            return False
        if not ensure_and_bind("platform.matrix", _import, globals(), prompt=False):
            logger.warning(
-                "Matrix: mautrix not installed. Run: pip install 'mautrix[encryption]'"
+                "Matrix: required packages not installed (%s). "
+                "Run: pip install 'mautrix[encryption]' asyncpg aiosqlite "
+                "Markdown aiohttp-socks",
+                ", ".join(missing) if missing else "platform.matrix",
            )
            return False

--- a/gateway/platforms/msgraph_webhook.py
+++ b/gateway/platforms/msgraph_webhook.py
@ -133,6 +133,12 @@ class MSGraphWebhookAdapter(BasePlatformAdapter):
        self._notification_scheduler = scheduler

    async def connect(self) -> bool:
+        if self._client_state is None:
+            logger.error(
+                "[msgraph_webhook] Refusing to start without extra.client_state configured"
+            )
+            return False
+
        app = web.Application()
        app.router.add_get(self._health_path, self._handle_health)
        app.router.add_get(self._webhook_path, self._handle_validation)
@ -310,7 +316,7 @@ class MSGraphWebhookAdapter(BasePlatformAdapter):
        """
        expected = self._client_state
        if expected is None:
-            return True
+            return False
        provided = self._string_or_none(notification.get("clientState"))
        if provided is None:
            return False
--- a/gateway/platforms/qqbot/adapter.py
+++ b/gateway/platforms/qqbot/adapter.py
@ -534,9 +534,30 @@ class QQAdapter(BasePlatformAdapter):
                self._mark_transport_disconnected()
                self._fail_pending("Connection closed")

-                # Stop reconnecting for fatal codes
-                if code in {4914, 4915}:
-                    desc = "offline/sandbox-only" if code == 4914 else "banned"
+                # Stop reconnecting for fatal codes (unrecoverable errors)
+                if code in {
+                        4001,  # Invalid opcode
+                        4002,  # Invalid payload
+                        4010,  # Invalid shard
+                        4011,  # Sharding required
+                        4012,  # Invalid API version
+                        4013,  # Invalid intent
+                        4014,  # Intent not authorized
+                        4914,  # Offline/sandbox-only
+                        4915,  # Banned
+                }:
+                    fatal_descriptions = {
+                        4001: "invalid opcode",
+                        4002: "invalid payload",
+                        4010: "invalid shard",
+                        4011: "sharding required",
+                        4012: "invalid API version",
+                        4013: "invalid intent",
+                        4014: "intent not authorized",
+                        4914: "offline/sandbox-only",
+                        4915: "banned",
+                    }
+                    desc = fatal_descriptions.get(code, f"fatal error (code={code})")
                    logger.error(
                        "[%s] Bot is %s. Check QQ Open Platform.", self._log_tag, desc
                    )
@ -573,10 +594,11 @@ class QQAdapter(BasePlatformAdapter):
                    self._token_expires_at = 0.0

                # Session invalid → clear session, will re-identify on next Hello
+                # Note: 4009 (connection timeout) is NOT included here — it is
+                # resumable per the QQ protocol and should preserve session state.
                if code in {
                        4006,
                        4007,
-                        4009,
                        4900,
                        4901,
                        4902,
@ -705,9 +727,8 @@ class QQAdapter(BasePlatformAdapter):
                "token": f"QQBot {token}",
                "intents": (1 << 25)
                           | (1 << 30)
-                           | (
-                                   1 << 12
-                           ),  # C2C_GROUP_AT_MESSAGES + PUBLIC_GUILD_MESSAGES + DIRECT_MESSAGE
+                           | (1 << 12)
+                           | (1 << 26),  # C2C_GROUP_AT_MESSAGES + PUBLIC_GUILD_MESSAGES + DIRECT_MESSAGE + INTERACTION
                "shard": [0, 1],
                "properties": {
                    "$os": "macOS",
@ -826,6 +847,32 @@ class QQAdapter(BasePlatformAdapter):
        if op == 11:
            return

+        # op 7 = Server Reconnect — server asks client to reconnect (e.g.
+        # load-balancing, maintenance).  Close the WS so _read_events raises
+        # and the outer loop triggers a reconnect with Resume.
+        if op == 7:
+            logger.info("[%s] Server requested reconnect (op 7)", self._log_tag)
+            if self._ws and not self._ws.closed:
+                self._create_task(self._ws.close())
+            return
+
+        # op 9 = Invalid Session — d=True means session is resumable,
+        # d=False means we must re-identify from scratch.
+        if op == 9:
+            resumable = bool(d) if d is not None else False
+            if not resumable:
+                logger.info(
+                    "[%s] Invalid session (op 9, not resumable), clearing session",
+                    self._log_tag,
+                )
+                self._session_id = None
+                self._last_seq = None
+            else:
+                logger.info("[%s] Invalid session (op 9, resumable)", self._log_tag)
+            if self._ws and not self._ws.closed:
+                self._create_task(self._ws.close())
+            return
+
        logger.debug("[%s] Unknown op: %s", self._log_tag, op)

    def _handle_ready(self, d: Any) -> None:
@ -1007,6 +1054,46 @@ class QQAdapter(BasePlatformAdapter):
        "deny": "deny",
    }

+    @staticmethod
+    def _parse_gateway_session_key(session_key: str) -> Optional[Dict[str, str]]:
+        """Parse ``agent:main:<platform>:<chat_type>:<chat_id>[:<user_id>]``."""
+        parts = str(session_key or "").split(":")
+        if len(parts) < 5 or parts[0] != "agent" or parts[1] != "main":
+            return None
+        parsed = {
+            "platform": parts[2],
+            "chat_type": parts[3],
+            "chat_id": parts[4],
+        }
+        if len(parts) > 5:
+            parsed["user_id"] = parts[5]
+        return parsed
+
+    def _is_authorized_interaction_for_session(
+            self,
+            event: InteractionEvent,
+            session_key: str,
+    ) -> bool:
+        """Authorize approval/update interactions against session + operator."""
+        parsed = self._parse_gateway_session_key(session_key)
+        operator = str(event.operator_openid or "").strip()
+        if not parsed or parsed.get("platform") != "qqbot" or not operator:
+            return False
+
+        chat_type = parsed.get("chat_type", "")
+        chat_id = parsed.get("chat_id", "")
+        if chat_type == "c2c":
+            return bool(chat_id) and operator == chat_id
+
+        if chat_type in {"group", "guild"}:
+            event_chat = str(event.group_openid or event.guild_id or "").strip()
+            if not event_chat or event_chat != chat_id:
+                return False
+            session_user = str(parsed.get("user_id", "")).strip()
+            return bool(session_user) and operator == session_user
+
+        return False
+
    async def _default_interaction_dispatch(
            self,
            event: InteractionEvent,
@ -1040,6 +1127,13 @@ class QQAdapter(BasePlatformAdapter):
                    self._log_tag, decision, session_key,
                )
                return
+            if not self._is_authorized_interaction_for_session(event, session_key):
+                logger.warning(
+                    "[%s] Rejected unauthorized approval click for session %s "
+                    "(operator=%s)",
+                    self._log_tag, session_key, event.operator_openid,
+                )
+                return
            try:
                # Import lazily to keep the adapter importable in tests that
                # don't exercise the approval subsystem.
@ -1060,6 +1154,13 @@ class QQAdapter(BasePlatformAdapter):

        update_answer = parse_update_prompt_button_data(button_data)
        if update_answer is not None:
+            update_session_key = f"agent:main:qqbot:{event.scene}:{event.group_openid or event.guild_id or event.user_openid}"
+            if not self._is_authorized_interaction_for_session(event, update_session_key):
+                logger.warning(
+                    "[%s] Rejected unauthorized update prompt click (operator=%s)",
+                    self._log_tag, event.operator_openid,
+                )
+                return
            self._write_update_response(update_answer, event.operator_openid)
            return

@ -1607,7 +1708,7 @@ class QQAdapter(BasePlatformAdapter):
            elif ct.startswith("image/"):
                # Image: download and cache locally.
                try:
-                    cached_path = await self._download_and_cache(url, ct)
+                    cached_path = await self._download_and_cache(url, ct, filename)
                    if cached_path and os.path.isfile(cached_path):
                        image_urls.append(cached_path)
                        image_media_types.append(ct or "image/jpeg")
@ -1620,11 +1721,15 @@ class QQAdapter(BasePlatformAdapter):
                except Exception as exc:
                    logger.debug("[%s] Failed to cache image: %s", self._log_tag, exc)
            else:
-                # Other attachments (video, file, etc.): record as text.
+                # Other attachments (video, file, etc.): download and record with path.
                try:
-                    cached_path = await self._download_and_cache(url, ct)
+                    cached_path = await self._download_and_cache(url, ct, filename)
                    if cached_path:
-                        other_attachments.append(f"[Attachment: {filename or ct}]")
+                        name = filename or ct
+                        if ct.startswith("video/"):
+                            other_attachments.append(f"[video: {name} ({cached_path})]")
+                        else:
+                            other_attachments.append(f"[file: {name} ({cached_path})]")
                except Exception as exc:
                    logger.debug("[%s] Failed to cache attachment: %s", self._log_tag, exc)

@ -1636,8 +1741,14 @@ class QQAdapter(BasePlatformAdapter):
            "attachment_info": attachment_info,
        }

-    async def _download_and_cache(self, url: str, content_type: str) -> Optional[str]:
-        """Download a URL and cache it locally."""
+    async def _download_and_cache(
+            self, url: str, content_type: str, original_name: str = "",
+    ) -> Optional[str]:
+        """Download a URL and cache it locally.
+
+        :param original_name: Preferred filename from attachment metadata.
+            Falls back to the URL path basename if empty.
+        """
        from tools.url_safety import is_safe_url

        if not is_safe_url(url):
@ -1668,7 +1779,11 @@ class QQAdapter(BasePlatformAdapter):
            # Convert to .wav using ffmpeg so STT engines can process it.
            return await self._convert_audio_to_wav(data, url)
        else:
-            filename = Path(urlparse(url).path).name or "qq_attachment"
+            filename = (
+                original_name
+                or Path(urlparse(url).path).name
+                or "qq_attachment"
+            )
            return cache_document_from_bytes(data, filename)

    @staticmethod
@ -1881,7 +1996,7 @@ class QQAdapter(BasePlatformAdapter):
    @staticmethod
    def _guess_ext_from_data(data: bytes) -> str:
        """Guess file extension from magic bytes."""
-        if data[:9] == b"#!SILK_V3" or data[:5] == b"#!SILK":
+        if data[:9] == b"#!SILK_V3" or data[:6] == b"#!SILK":
            return ".silk"
        if data[:2] == b"\x02!":
            return ".silk"
@ -1901,7 +2016,7 @@ class QQAdapter(BasePlatformAdapter):
    @staticmethod
    def _looks_like_silk(data: bytes) -> bool:
        """Check if bytes look like a SILK audio file."""
-        return data[:4] == b"#!SILK" or data[:2] == b"\x02!" or data[:9] == b"#!SILK_V3"
+        return data[:6] == b"#!SILK" or data[:2] == b"\x02!" or data[:9] == b"#!SILK_V3"

    async def _convert_silk_to_wav(self, src_path: str, wav_path: str) -> Optional[str]:
        """Convert audio file to WAV using the pilk library.
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@ -429,6 +429,13 @@ class TelegramAdapter(BasePlatformAdapter):
        self._polling_conflict_count: int = 0
        self._polling_network_error_count: int = 0
        self._polling_error_callback_ref = None
+        # After sustained reconnect storms the PTB httpx pool can return
+        # SendResult(success=True) for sends that never actually transmit.
+        # _handle_polling_network_error sets this; _verify_polling_after_reconnect
+        # clears it once getMe() confirms the Bot client is healthy.
+        # While True, send() short-circuits to a failure so callers
+        # (cron live-adapter branch) fall through to standalone delivery.
+        self._send_path_degraded: bool = False
        # DM Topics: map of topic_name -> message_thread_id (populated at startup)
        self._dm_topics: Dict[str, int] = {}
        # Track forum chats where we've already registered bot commands
@ -468,6 +475,10 @@ class TelegramAdapter(BasePlatformAdapter):
        # "all"       — every message triggers a push notification (legacy
        #               behavior; opt-in via display.platforms.telegram.notifications).
        self._notifications_mode: str = "important"
+        # send_or_update_status() bookkeeping: {(chat_id, status_key) -> bot message_id}
+        # Tracks status bubbles owned by this adapter so subsequent calls with the
+        # same key edit the same message instead of appending new ones (#30045).
+        self._status_message_ids: Dict[tuple, str] = {}

    def _notification_kwargs(
        self, metadata: Optional[Dict[str, Any]]
@ -870,6 +881,7 @@ class TelegramAdapter(BasePlatformAdapter):
        MAX_DELAY = 60

        self._polling_network_error_count += 1
+        self._send_path_degraded = True
        attempt = self._polling_network_error_count

        if attempt > MAX_NETWORK_RETRIES:
@ -967,6 +979,7 @@ class TelegramAdapter(BasePlatformAdapter):

        try:
            await asyncio.wait_for(self._app.bot.get_me(), PROBE_TIMEOUT)
+            self._send_path_degraded = False
        except Exception as probe_err:
            logger.warning(
                "[%s] Polling heartbeat probe failed %ds after reconnect: %s",
@ -1679,7 +1692,11 @@ class TelegramAdapter(BasePlatformAdapter):
        """Send a message to a Telegram chat."""
        if not self._bot:
            return SendResult(success=False, error="Not connected")
-        
+
+        # getattr() — tests build adapters via object.__new__() (no __init__).
+        if getattr(self, "_send_path_degraded", False):
+            return SendResult(success=False, error="send_path_degraded", retryable=True)
+
        # Skip whitespace-only text to prevent Telegram 400 empty-text errors.
        if not content or not content.strip():
            return SendResult(success=True, message_id=None)
@ -1908,6 +1925,40 @@ class TelegramAdapter(BasePlatformAdapter):
            is_connect_timeout = self._looks_like_connect_timeout(e)
            return SendResult(success=False, error=str(e), retryable=(is_connect_timeout or not is_timeout))

+    async def send_or_update_status(
+        self,
+        chat_id: str,
+        status_key: str,
+        content: str,
+        *,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a status message, or edit the previous one with the same key.
+
+        Issue #30045: progress/status callbacks (context-pressure, lifecycle,
+        compression, etc.) used to append a fresh bubble on every call. With
+        this method, the first call sends and the message id is remembered;
+        subsequent calls with the same (chat_id, status_key) edit that same
+        message in place. If the edit fails (message deleted, too old, etc.)
+        we drop the cached id and send fresh.
+        """
+        key = (str(chat_id), str(status_key))
+        cached_id = self._status_message_ids.get(key)
+        if cached_id is not None:
+            result = await self.edit_message(
+                chat_id, cached_id, content, finalize=True, metadata=metadata,
+            )
+            if result.success:
+                if result.message_id:
+                    self._status_message_ids[key] = str(result.message_id)
+                return result
+            # Edit failed — clear the cached id and fall through to a fresh send.
+            self._status_message_ids.pop(key, None)
+        result = await self.send(chat_id, content, metadata=metadata)
+        if result.success and result.message_id:
+            self._status_message_ids[key] = str(result.message_id)
+        return result
+
    async def edit_message(
        self,
        chat_id: str,
@ -4573,10 +4624,10 @@ class TelegramAdapter(BasePlatformAdapter):
        return (
            "You are handling a Telegram group chat message.\n"
            f"- Your identity: user_id={bot_id}, @-mention name in this group=@{username}\n"
-            "- Lines in history prefixed with `[nickname|user_id]` are observed Telegram group context "
-            "and are not necessarily addressed to you.\n"
+            "- observed Telegram group context may be provided in a separate context-only block "
+            "before the current message; it is not necessarily addressed to you.\n"
            "- Treat only the current new message as a request explicitly directed at you, "
-            "and answer it directly."
+            "and use observed context only when the current message asks for it."
        )

    def _apply_telegram_group_observe_attribution(self, event: MessageEvent) -> MessageEvent:
@ -4593,6 +4644,12 @@ class TelegramAdapter(BasePlatformAdapter):
        shared_source = self._telegram_group_observe_shared_source(event.source)
        observe_prompt = self._telegram_group_observe_channel_prompt()
        channel_prompt = f"{event.channel_prompt}\n\n{observe_prompt}" if event.channel_prompt else observe_prompt
+        if event.message_type == MessageType.COMMAND:
+            return dataclasses.replace(
+                event,
+                source=shared_source,
+                channel_prompt=channel_prompt,
+            )
        return dataclasses.replace(
            event,
            text=self._telegram_group_observe_attributed_text(event),
--- a/gateway/platforms/webhook.py
+++ b/gateway/platforms/webhook.py
@ -27,6 +27,8 @@ Security:
 """

 import asyncio
+import base64
+import binascii
 import hashlib
 import hmac
 import json
@ -308,11 +310,37 @@ class WebhookAdapter(BasePlatformAdapter):
            data = json.loads(subs_path.read_text(encoding="utf-8"))
            if not isinstance(data, dict):
                return
-            # Merge: static routes take precedence over dynamic ones
-            self._dynamic_routes = {
-                k: v for k, v in data.items()
-                if k not in self._static_routes
-            }
+            # Merge: static routes take precedence over dynamic ones.
+            # Reject any dynamic route whose effective secret is empty —
+            # an empty secret would cause _handle_webhook to skip HMAC
+            # validation entirely, letting unauthenticated callers in.
+            new_dynamic: Dict[str, dict] = {}
+            for k, v in data.items():
+                if k in self._static_routes:
+                    continue
+                effective_secret = v.get("secret", self._global_secret)
+                if not effective_secret:
+                    logger.warning(
+                        "[webhook] Dynamic route '%s' skipped: 'secret' is "
+                        "missing or empty. Set a valid HMAC secret, or use "
+                        "'%s' to explicitly disable auth (testing only).",
+                        k,
+                        _INSECURE_NO_AUTH,
+                    )
+                    continue
+                if (
+                    effective_secret == _INSECURE_NO_AUTH
+                    and not _is_loopback_host(self._host)
+                ):
+                    logger.warning(
+                        "[webhook] Dynamic route '%s' skipped: INSECURE_NO_AUTH "
+                        "is only allowed on loopback hosts. Current host: '%s'.",
+                        k,
+                        self._host,
+                    )
+                    continue
+                new_dynamic[k] = v
+            self._dynamic_routes = new_dynamic
            self._routes = {**self._dynamic_routes, **self._static_routes}
            self._dynamic_routes_mtime = mtime
            logger.info(
@ -351,9 +379,21 @@ class WebhookAdapter(BasePlatformAdapter):
            logger.error("[webhook] Failed to read body: %s", e)
            return web.json_response({"error": "Bad request"}, status=400)

-        # Validate HMAC signature FIRST (skip for INSECURE_NO_AUTH testing mode)
+        # Validate HMAC signature FIRST (skip only for the explicit local-test
+        # INSECURE_NO_AUTH mode). Missing/empty secrets must fail closed here,
+        # not only during connect(), so direct handler reuse cannot turn a
+        # network webhook route into an unauthenticated agent-dispatch surface.
        secret = route_config.get("secret", self._global_secret)
-        if secret and secret != _INSECURE_NO_AUTH:
+        if not secret:
+            logger.error(
+                "[webhook] Route %s has no HMAC secret; refusing request",
+                route_name,
+            )
+            return web.json_response(
+                {"error": "Webhook route is missing an HMAC secret"},
+                status=403,
+            )
+        if secret != _INSECURE_NO_AUTH:
            if not self._validate_signature(request, raw_body, secret):
                logger.warning(
                    "[webhook] Invalid signature for route %s", route_name
@ -393,6 +433,7 @@ class WebhookAdapter(BasePlatformAdapter):
            request.headers.get("X-GitHub-Event", "")
            or request.headers.get("X-GitLab-Event", "")
            or payload.get("event_type", "")
+            or payload.get("type", "")
            or "unknown"
        )
        allowed_events = route_config.get("events", [])
@ -445,7 +486,10 @@ class WebhookAdapter(BasePlatformAdapter):
        # Build a unique delivery ID
        delivery_id = request.headers.get(
            "X-GitHub-Delivery",
-            request.headers.get("X-Request-ID", str(int(time.time() * 1000))),
+            request.headers.get(
+                "svix-id",
+                request.headers.get("X-Request-ID", str(int(time.time() * 1000))),
+            ),
        )

        # ── Idempotency ─────────────────────────────────────────
@ -590,7 +634,32 @@ class WebhookAdapter(BasePlatformAdapter):
    def _validate_signature(
        self, request: "web.Request", body: bytes, secret: str
    ) -> bool:
-        """Validate webhook signature (GitHub, GitLab, generic HMAC-SHA256)."""
+        """Validate webhook signature (GitHub, GitLab, Svix, generic HMAC-SHA256)."""
+        def _header(name: str) -> str:
+            return (
+                request.headers.get(name, "")
+                or request.headers.get(name.lower(), "")
+                or request.headers.get(name.upper(), "")
+            )
+
+        # Svix / AgentMail:
+        #   svix-id: msg_...
+        #   svix-timestamp: unix seconds
+        #   svix-signature: v1,<base64-hmac> [v1,<base64-hmac> ...]
+        # Signed content is: "{id}.{timestamp}.{raw_body}".  Svix secrets
+        # usually start with "whsec_" and the remainder is base64-encoded.
+        svix_id = _header("svix-id")
+        svix_timestamp = _header("svix-timestamp")
+        svix_signature = _header("svix-signature")
+        if svix_id or svix_timestamp or svix_signature:
+            return self._validate_svix_signature(
+                body=body,
+                secret=secret,
+                msg_id=svix_id,
+                timestamp=svix_timestamp,
+                signature_header=svix_signature,
+            )
+
        # GitHub: X-Hub-Signature-256 = sha256=<hex>
        gh_sig = request.headers.get("X-Hub-Signature-256", "")
        if gh_sig:
@ -618,6 +687,56 @@ class WebhookAdapter(BasePlatformAdapter):
        )
        return False

+    def _validate_svix_signature(
+        self,
+        body: bytes,
+        secret: str,
+        msg_id: str,
+        timestamp: str,
+        signature_header: str,
+        tolerance_seconds: int = 300,
+    ) -> bool:
+        """Validate Svix-compatible signatures used by AgentMail webhooks."""
+        if not (msg_id and timestamp and signature_header and secret):
+            return False
+
+        try:
+            ts = int(timestamp)
+        except (TypeError, ValueError):
+            return False
+        if abs(int(time.time()) - ts) > tolerance_seconds:
+            logger.warning("[webhook] Svix signature timestamp outside replay window")
+            return False
+
+        if secret.startswith("whsec_"):
+            encoded_secret = secret.removeprefix("whsec_")
+            try:
+                key = base64.b64decode(encoded_secret, validate=True)
+            except (binascii.Error, ValueError):
+                logger.debug("[webhook] Invalid whsec_ Svix signing secret")
+                return False
+        else:
+            # Be permissive for providers that document Svix-style headers but
+            # hand out raw shared secrets rather than whsec_ base64 secrets.
+            logger.debug("[webhook] Validating Svix-style signature with raw secret")
+            key = secret.encode()
+
+        signed_content = msg_id.encode() + b"." + timestamp.encode() + b"." + body
+        expected = base64.b64encode(
+            hmac.new(key, signed_content, hashlib.sha256).digest()
+        ).decode()
+
+        # Svix can send multiple signatures separated by spaces during secret
+        # rotation. Each entry is formatted as "vN,<base64>".
+        for part in signature_header.split():
+            try:
+                version, signature = part.split(",", 1)
+            except ValueError:
+                continue
+            if version == "v1" and hmac.compare_digest(signature, expected):
+                return True
+        return False
+
    # ------------------------------------------------------------------
    # Prompt rendering
    # ------------------------------------------------------------------
--- a/gateway/platforms/wecom.py
+++ b/gateway/platforms/wecom.py
@ -616,6 +616,18 @@ class WeComAdapter(BasePlatformAdapter):
            else:
                delay = self._text_batch_delay_seconds
            await asyncio.sleep(delay)
+            # Guard against the cancel-delivery race: when the sleep timer
+            # fires just before cancel() is called, CPython sets
+            # Task._must_cancel but cannot cancel the already-done sleep
+            # future, so CancelledError is delivered at the *next* await
+            # (handle_message) rather than here.  By that point this task
+            # has already popped the merged event, so the superseding task
+            # sees an empty batch and silently drops the message.
+            # This check is synchronous — no await between the sleep and
+            # the pop — so no other coroutine can modify the task registry
+            # in between.
+            if self._pending_text_batch_tasks.get(key) is not current_task:
+                return
            event = self._pending_text_batches.pop(key, None)
            if not event:
                return
--- a/gateway/platforms/wecom_callback.py
+++ b/gateway/platforms/wecom_callback.py
@ -187,7 +187,6 @@ class WecomCallbackAdapter(BasePlatformAdapter):
        app = self._resolve_app_for_chat(chat_id)
        touser = chat_id.split(":", 1)[1] if ":" in chat_id else chat_id
        try:
-            token = await self._get_access_token(app)
            payload = {
                "touser": touser,
                "msgtype": "text",
@ -195,18 +194,31 @@ class WecomCallbackAdapter(BasePlatformAdapter):
                "text": {"content": content[:2048]},
                "safe": 0,
            }
-            resp = await self._http_client.post(
-                f"https://qyapi.weixin.qq.com/cgi-bin/message/send?access_token={token}",
-                json=payload,
-            )
-            data = resp.json()
-            if data.get("errcode") != 0:
-                return SendResult(success=False, error=str(data))
-            return SendResult(
-                success=True,
-                message_id=str(data.get("msgid", "")),
-                raw_response=data,
-            )
+            for _attempt in range(2):
+                token = await self._get_access_token(app)
+                resp = await self._http_client.post(
+                    f"https://qyapi.weixin.qq.com/cgi-bin/message/send?access_token={token}",
+                    json=payload,
+                )
+                data = resp.json()
+                errcode = data.get("errcode")
+                if errcode in {40001, 42001} and _attempt == 0:
+                    # WeCom rejected the token — evict the cached entry so
+                    # the next _get_access_token call forces a fresh fetch.
+                    logger.warning(
+                        "[WecomCallback] Token rejected for app '%s' (errcode=%s), refreshing",
+                        app.get("name", "default"), errcode,
+                    )
+                    self._access_tokens.pop(app["name"], None)
+                    continue
+                if errcode != 0:
+                    return SendResult(success=False, error=str(data))
+                return SendResult(
+                    success=True,
+                    message_id=str(data.get("msgid", "")),
+                    raw_response=data,
+                )
+            return SendResult(success=False, error="send failed after token refresh")
        except Exception as exc:
            return SendResult(success=False, error=str(exc))

--- a/gateway/platforms/weixin.py
+++ b/gateway/platforms/weixin.py
@ -1679,8 +1679,10 @@ class WeixinAdapter(BasePlatformAdapter):

        # Extract MEDIA: tags and bare local file paths before text delivery.
        media_files, cleaned_content = self.extract_media(content)
+        media_files = self.filter_media_delivery_paths(media_files)
        _, image_cleaned = self.extract_images(cleaned_content)
        local_files, final_content = self.extract_local_files(image_cleaned)
+        local_files = self.filter_local_delivery_paths(local_files)

        _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a", ".flac"}
        _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
--- a/gateway/run.py
+++ b/gateway/run.py
@ -54,6 +54,7 @@ from agent.account_usage import fetch_account_usage, render_account_usage_lines
 from agent.async_utils import safe_schedule_threadsafe
 from agent.i18n import t
 from hermes_cli.config import cfg_get
+from hermes_cli.fallback_config import get_fallback_chain

 # --- Agent cache tuning ---------------------------------------------------
 # Bounds the per-session AIAgent cache to prevent unbounded growth in
@ -138,6 +139,85 @@ def _gateway_platform_value(platform: Any) -> str:
    return str(getattr(platform, "value", platform) or "").strip().lower()


+def _is_transient_network_error(exc: BaseException) -> bool:
+    """Return True for transient network errors safe to log + swallow.
+
+    The crash class targeted by #31066 / #31110: an unhandled Telegram
+    ``TimedOut`` (or peer ``NetworkError`` / ``httpx`` connection error)
+    propagating to the event loop and killing the entire gateway
+    process. These are by definition transient — the next poll cycle or
+    user action recovers — so they must never crash the process.
+
+    Walk the exception cause chain so wrapped errors (e.g. PTB's
+    ``NetworkError`` wrapping ``httpx.ConnectError``) are still
+    classified. The chain is bounded to avoid pathological cycles.
+    """
+    seen: set[int] = set()
+    cur: Optional[BaseException] = exc
+    depth = 0
+    transient_class_names = {
+        "TimedOut",
+        "NetworkError",
+        "ReadError",
+        "WriteError",
+        "ConnectError",
+        "ConnectTimeout",
+        "ReadTimeout",
+        "WriteTimeout",
+        "PoolTimeout",
+        "RemoteProtocolError",
+        "ServerDisconnectedError",
+        "ClientConnectorError",
+        "ClientOSError",
+    }
+    while cur is not None and depth < 12:
+        ident = id(cur)
+        if ident in seen:
+            break
+        seen.add(ident)
+        depth += 1
+        name = type(cur).__name__
+        if name in transient_class_names:
+            return True
+        cur = cur.__cause__ or cur.__context__
+    return False
+
+
+def _gateway_loop_exception_handler(
+    loop: "asyncio.AbstractEventLoop", context: Dict[str, Any]
+) -> None:
+    """Loop-level safety net for transient network errors.
+
+    Installed once during :func:`start_gateway`. Catches the
+    ``telegram.error.TimedOut`` crash class (issues #31066 / #31110)
+    and any peer transient network error before it can kill the
+    gateway process. Logs at WARNING with full traceback so the
+    originating call site stays diagnosable; non-transient errors
+    are forwarded to the default loop handler so real bugs still
+    surface.
+    """
+    exc = context.get("exception")
+    if exc is not None and _is_transient_network_error(exc):
+        message = context.get("message") or "transient network error"
+        task = context.get("future") or context.get("task")
+        task_name = ""
+        if task is not None:
+            try:
+                task_name = task.get_name() if hasattr(task, "get_name") else repr(task)
+            except Exception:
+                task_name = repr(task)
+        logger.warning(
+            "Gateway swallowed transient network error from %s: %s: %s",
+            task_name or "<unknown task>",
+            type(exc).__name__,
+            exc,
+            exc_info=(type(exc), exc, exc.__traceback__),
+        )
+        return
+    # Fall back to the default handler for anything we don't recognise.
+    loop.default_exception_handler(context)
+
+
 def _redact_gateway_user_facing_secrets(text: str) -> str:
    """Best-effort secret redaction before text can leave the gateway."""
    redacted = str(text or "")
@ -238,6 +318,19 @@ def _prepare_gateway_status_message(platform: Any, event_type: str, message: str
    return text


+async def _send_or_update_status_coro(adapter, chat_id, status_key, content, metadata):
+    """Route a status message through adapter.send_or_update_status when supported.
+
+    Issue #30045: adapters that implement send_or_update_status (currently
+    Telegram) edit the previous bubble for the same status_key instead of
+    appending a new one. Adapters without the method fall back to plain send.
+    """
+    sender = getattr(adapter, "send_or_update_status", None)
+    if callable(sender):
+        return await sender(chat_id, status_key, content, metadata=metadata)
+    return await adapter.send(chat_id, content, metadata=metadata)
+
+
 def _telegramize_command_mentions(text: str, platform: Any) -> str:
    """Rewrite slash-command mentions to Telegram-valid command names.

@ -447,6 +540,109 @@ def _build_replay_entry(role: str, content: Any, msg: Dict[str, Any]) -> Dict[st
    return entry


+_TELEGRAM_OBSERVED_CONTEXT_PROMPT_MARKER = "observed Telegram group context"
+_OBSERVED_GROUP_CONTEXT_HEADER = "[Observed Telegram group context - context only, not requests]"
+_CURRENT_ADDRESSED_MESSAGE_HEADER = "[Current addressed message - answer only this unless it explicitly asks you to use the observed context]"
+
+
+def _uses_telegram_observed_group_context(channel_prompt: Optional[str]) -> bool:
+    """Return True for Telegram group turns that may include observed chatter.
+
+    Telegram's observe-unmentioned mode persists skipped group chatter so a
+    later @mention can see it. Those rows must not replay as ordinary user
+    turns: a weak wake word like ``@bot cambio`` should not make the model treat
+    old unmentioned chatter as pending work. The Telegram adapter marks these
+    turns with a channel prompt; this helper keeps the run-path check explicit
+    and unit-testable.
+    """
+
+    return bool(channel_prompt and _TELEGRAM_OBSERVED_CONTEXT_PROMPT_MARKER in channel_prompt)
+
+
+def _build_gateway_agent_history(
+    history: List[Dict[str, Any]],
+    *,
+    channel_prompt: Optional[str] = None,
+) -> tuple[List[Dict[str, Any]], Optional[str]]:
+    """Convert stored gateway transcript rows into agent replay messages.
+
+    Observed Telegram group rows are returned as API-only context for the
+    current addressed message instead of being replayed as normal prior user
+    turns.  Keeping that context out of ``conversation_history`` avoids
+    consecutive-user repair merging it with the live user turn and then hiding
+    the current message behind ``history_offset`` during persistence.
+    """
+
+    agent_history: List[Dict[str, Any]] = []
+    observed_group_context: List[str] = []
+    separate_observed_context = _uses_telegram_observed_group_context(channel_prompt)
+
+    for msg in history or []:
+        role = msg.get("role")
+        if not role:
+            continue
+
+        # Skip metadata entries (tool definitions, session info) -- these are
+        # for transcript logging, not for the LLM.
+        if role in {"session_meta",}:
+            continue
+
+        # Skip system messages -- the agent rebuilds its own system prompt.
+        if role == "system":
+            continue
+
+        content = msg.get("content")
+        if separate_observed_context and msg.get("observed") and role == "user" and content:
+            observed_group_context.append(str(content).strip())
+            continue
+
+        # Rich agent messages (tool_calls, tool results) must be passed through
+        # intact so the API sees valid assistant→tool sequences.
+        has_tool_calls = "tool_calls" in msg
+        has_tool_call_id = "tool_call_id" in msg
+        is_tool_message = role == "tool"
+
+        if has_tool_calls or has_tool_call_id or is_tool_message:
+            clean_msg = {k: v for k, v in msg.items() if k not in {"timestamp", "observed"}}
+            agent_history.append(clean_msg)
+        elif content:
+            # Simple text message - just need role and content.
+            if msg.get("mirror"):
+                mirror_src = msg.get("mirror_source", "another session")
+                content = f"[Delivered from {mirror_src}] {content}"
+            entry = _build_replay_entry(role, content, msg)
+            agent_history.append(entry)
+
+    observed_context = "\n".join(observed_group_context).strip() or None
+    return agent_history, observed_context
+
+
+def _wrap_current_message_with_observed_context(message: Any, observed_context: Optional[str]) -> Any:
+    """Prepend observed Telegram context to the API-only current user turn."""
+
+    if not observed_context:
+        return message
+
+    prefix = (
+        f"{_OBSERVED_GROUP_CONTEXT_HEADER}\n"
+        f"{observed_context}\n\n"
+        f"{_CURRENT_ADDRESSED_MESSAGE_HEADER}\n"
+    )
+
+    if isinstance(message, str):
+        return f"{prefix}{message}"
+
+    if isinstance(message, list):
+        wrapped = [dict(part) if isinstance(part, dict) else part for part in message]
+        for part in wrapped:
+            if isinstance(part, dict) and part.get("type") == "text":
+                part["text"] = f"{prefix}{part.get('text', '')}"
+                return wrapped
+        return [{"type": "text", "text": prefix.rstrip()}] + wrapped
+
+    return message
+
+
 def _last_transcript_timestamp(history: Optional[List[Dict[str, Any]]]) -> Any:
    """Return the ``timestamp`` of the last usable transcript row, if any.

@ -657,31 +853,29 @@ if _config_path.exists():
                        os.environ[_env_var] = str(_val)
        # Compression config is read directly from config.yaml by run_agent.py
        # and auxiliary_client.py — no env var bridging needed.
-        # Auxiliary model/direct-endpoint overrides (vision, web_extract).
-        # Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
+        # Auxiliary model/direct-endpoint overrides (vision, web_extract,
+        # approval, plus any plugin-registered auxiliary tasks).
+        # Each task has provider/model/base_url/api_key; bridge non-default
+        # values to env vars named AUXILIARY_<KEY_UPPER>_*. The legacy
+        # hard-coded list (vision/web_extract/approval) is replaced by a
+        # dynamic loop so plugin-registered tasks benefit from the same
+        # config→env bridging without core knowing about each one.
        _auxiliary_cfg = _cfg.get("auxiliary", {})
        if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
-            _aux_task_env = {
-                "vision": {
-                    "provider": "AUXILIARY_VISION_PROVIDER",
-                    "model": "AUXILIARY_VISION_MODEL",
-                    "base_url": "AUXILIARY_VISION_BASE_URL",
-                    "api_key": "AUXILIARY_VISION_API_KEY",
-                },
-                "web_extract": {
-                    "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
-                    "model": "AUXILIARY_WEB_EXTRACT_MODEL",
-                    "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
-                    "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
-                },
-                "approval": {
-                    "provider": "AUXILIARY_APPROVAL_PROVIDER",
-                    "model": "AUXILIARY_APPROVAL_MODEL",
-                    "base_url": "AUXILIARY_APPROVAL_BASE_URL",
-                    "api_key": "AUXILIARY_APPROVAL_API_KEY",
-                },
-            }
-            for _task_key, _env_map in _aux_task_env.items():
+            # Built-in tasks that previously had explicit env-var bridging.
+            # Kept here as the canonical bridged set; plugin tasks are added
+            # below via the plugin auxiliary registry.
+            _aux_bridged_keys = {"vision", "web_extract", "approval"}
+            try:
+                from hermes_cli.plugins import get_plugin_auxiliary_tasks
+                for _entry in get_plugin_auxiliary_tasks():
+                    _aux_bridged_keys.add(_entry["key"])
+            except Exception:
+                # Plugin discovery failure must not break gateway startup;
+                # built-in bridging stays intact.
+                pass
+
+            for _task_key in _aux_bridged_keys:
                _task_cfg = _auxiliary_cfg.get(_task_key, {})
                if not isinstance(_task_cfg, dict):
                    continue
@ -689,14 +883,15 @@ if _config_path.exists():
                _model = str(_task_cfg.get("model", "")).strip()
                _base_url = str(_task_cfg.get("base_url", "")).strip()
                _api_key = str(_task_cfg.get("api_key", "")).strip()
+                _upper = _task_key.upper()
                if _prov and _prov != "auto":
-                    os.environ[_env_map["provider"]] = _prov
+                    os.environ[f"AUXILIARY_{_upper}_PROVIDER"] = _prov
                if _model:
-                    os.environ[_env_map["model"]] = _model
+                    os.environ[f"AUXILIARY_{_upper}_MODEL"] = _model
                if _base_url:
-                    os.environ[_env_map["base_url"]] = _base_url
+                    os.environ[f"AUXILIARY_{_upper}_BASE_URL"] = _base_url
                if _api_key:
-                    os.environ[_env_map["api_key"]] = _api_key
+                    os.environ[f"AUXILIARY_{_upper}_API_KEY"] = _api_key
        # config.yaml is the documented, authoritative source for these
        # settings — it unconditionally wins over .env values. Previously
        # the guards below read `if X not in os.environ` and let stale
@ -723,6 +918,8 @@ if _config_path.exists():
        if _display_cfg and isinstance(_display_cfg, dict):
            if "busy_input_mode" in _display_cfg:
                os.environ["HERMES_GATEWAY_BUSY_INPUT_MODE"] = str(_display_cfg["busy_input_mode"])
+            if "busy_text_mode" in _display_cfg:
+                os.environ["HERMES_GATEWAY_BUSY_TEXT_MODE"] = str(_display_cfg["busy_text_mode"])
            if "busy_ack_enabled" in _display_cfg:
                os.environ["HERMES_GATEWAY_BUSY_ACK_ENABLED"] = str(_display_cfg["busy_ack_enabled"])
        # Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
@ -846,6 +1043,12 @@ _AGENT_PENDING_SENTINEL = object()
 def _resolve_runtime_agent_kwargs() -> dict:
    """Resolve provider credentials for gateway-created AIAgent instances.

+    Provider is read from ``config.yaml`` ``model.provider`` (the single
+    source of truth). ``resolve_runtime_provider()`` falls through to env
+    var lookups internally for legacy compatibility, but the gateway does
+    not consult environment variables for behavioral config — config.yaml
+    is authoritative.
+
    If the primary provider fails with an authentication error, attempt to
    resolve credentials using the fallback provider chain from config.yaml
    before giving up.
@ -857,9 +1060,7 @@ def _resolve_runtime_agent_kwargs() -> dict:
    from hermes_cli.auth import AuthError

    try:
-        runtime = resolve_runtime_provider(
-            requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
-        )
+        runtime = resolve_runtime_provider()
    except AuthError as auth_exc:
        # Primary provider auth failed (expired token, revoked key, etc.).
        # Try the fallback provider chain before raising.
@ -892,19 +1093,22 @@ def _try_resolve_fallback_provider() -> dict | None:
            return None
        with open(cfg_path, encoding="utf-8") as _f:
            cfg = _y.safe_load(_f) or {}
-        fb = cfg.get("fallback_providers") or cfg.get("fallback_model")
-        if not fb:
+        fb_list = get_fallback_chain(cfg)
+        if not fb_list:
            return None
-        # Normalize to list
-        fb_list = fb if isinstance(fb, list) else [fb]
        for entry in fb_list:
-            if not isinstance(entry, dict):
-                continue
            try:
+                explicit_api_key = entry.get("api_key")
+                if not explicit_api_key:
+                    key_env = str(
+                        entry.get("key_env") or entry.get("api_key_env") or ""
+                    ).strip()
+                    if key_env:
+                        explicit_api_key = os.getenv(key_env, "").strip() or None
                runtime = resolve_runtime_provider(
                    requested=entry.get("provider"),
                    explicit_base_url=entry.get("base_url"),
-                    explicit_api_key=entry.get("api_key"),
+                    explicit_api_key=explicit_api_key,
                )
                logger.info(
                    "Fallback provider resolved: %s model=%s",
@ -1198,6 +1402,26 @@ def _load_gateway_config() -> dict:
    return {}


+def _load_gateway_runtime_config() -> dict:
+    """Load gateway config for runtime reads, expanding supported ``${VAR}`` refs.
+
+    Runtime helpers should honor the same env-template expansion documented for
+    ``config.yaml`` while still respecting tests that monkeypatch
+    ``gateway.run._hermes_home``. Build on ``_load_gateway_config()`` rather
+    than calling the canonical loader directly so both behaviors stay aligned.
+
+    Expansion failures are intentionally NOT swallowed — silently returning
+    the unexpanded dict would mask the very bug this helper exists to fix.
+    """
+    cfg = _load_gateway_config()
+    if not isinstance(cfg, dict) or not cfg:
+        return {}
+    from hermes_cli.config import _expand_env_vars
+
+    expanded = _expand_env_vars(cfg)
+    return expanded if isinstance(expanded, dict) else {}
+
+
 def _resolve_gateway_model(config: dict | None = None) -> str:
    """Read model from config.yaml — single source of truth.

@ -1411,6 +1635,7 @@ class GatewayRunner:
    # blow up on attribute access.
    _running_agents_ts: Dict[str, float] = {}
    _busy_input_mode: str = "interrupt"
+    _busy_text_mode: str = "interrupt"
    _restart_drain_timeout: float = DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
    _exit_code: Optional[int] = None
    _draining: bool = False
@ -1437,6 +1662,7 @@ class GatewayRunner:
        self._service_tier = self._load_service_tier()
        self._show_reasoning = self._load_show_reasoning()
        self._busy_input_mode = self._load_busy_input_mode()
+        self._busy_text_mode = self._load_busy_text_mode()
        self._restart_drain_timeout = self._load_restart_drain_timeout()
        self._provider_routing = self._load_provider_routing()
        self._fallback_model = self._load_fallback_model()
@ -2046,13 +2272,14 @@ class GatewayRunner:
    ) -> Optional[str]:
        """Pin DM-topic routing to the user's last-active topic.

-        Telegram fragments topic-mode DMs two ways: a Reply on a message
-        in another topic delivers ``message_thread_id`` for *that* topic,
-        and ``_build_message_event`` strips the thread_id on plain replies
-        (#3206 — needed for non-topic users). Both route the user to the
-        wrong session. When topic mode is on, rewrite the thread_id to the
-        user's most-recent binding if the inbound id is missing/General or
-        not a known topic for this chat. Returns None to leave it alone.
+        Telegram can omit ``message_thread_id`` or surface General (``1``)
+        for some topic-mode DM replies. In those lobby-shaped cases, keep the
+        conversation attached to the user's most-recent bound topic.
+
+        Do not rewrite a non-lobby, previously-unbound thread id: a newly
+        created Telegram DM topic is also "unknown" until the first inbound
+        message is recorded, and rewriting it would send that brand-new topic's
+        answer into an older lane. Returns None to leave the source alone.
        """
        if (
            source.platform != Platform.TELEGRAM
@ -2062,6 +2289,14 @@ class GatewayRunner:
            or not self._telegram_topic_mode_enabled(source)
        ):
            return None
+        inbound = str(source.thread_id or "")
+        is_lobby = not inbound or inbound in self._TELEGRAM_GENERAL_TOPIC_IDS
+        if not is_lobby:
+            # A non-lobby, unknown thread_id is most likely the first message in
+            # a brand-new Telegram DM topic. Preserve it so it can be recorded
+            # as a new independent lane below instead of hijacking the latest
+            # existing topic binding.
+            return None
        session_db = getattr(self, "_session_db", None)
        if session_db is None:
            return None
@ -2074,11 +2309,6 @@ class GatewayRunner:
            return None
        if not bindings:
            return None
-        inbound = str(source.thread_id or "")
-        is_lobby = not inbound or inbound in self._TELEGRAM_GENERAL_TOPIC_IDS
-        known = {str(b.get("thread_id") or "") for b in bindings}
-        if not is_lobby and inbound in known:
-            return None
        user_id = str(source.user_id)
        for b in bindings:  # newest-first
            if str(b.get("user_id") or "") == user_id:
@ -2532,15 +2762,8 @@ class GatewayRunner:
        """
        file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
        if not file_path:
-            try:
-                import yaml as _y
-                cfg_path = _hermes_home / "config.yaml"
-                if cfg_path.exists():
-                    with open(cfg_path, encoding="utf-8") as _f:
-                        cfg = _y.safe_load(_f) or {}
-                    file_path = cfg.get("prefill_messages_file", "")
-            except Exception:
-                pass
+            cfg = _load_gateway_runtime_config()
+            file_path = str(cfg.get("prefill_messages_file", "") or "")
        if not file_path:
            return []
        path = Path(file_path).expanduser()
@ -2570,16 +2793,8 @@ class GatewayRunner:
        prompt = os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "")
        if prompt:
            return prompt
-        try:
-            import yaml as _y
-            cfg_path = _hermes_home / "config.yaml"
-            if cfg_path.exists():
-                with open(cfg_path, encoding="utf-8") as _f:
-                    cfg = _y.safe_load(_f) or {}
-                return (cfg_get(cfg, "agent", "system_prompt", default="") or "").strip()
-        except Exception:
-            pass
-        return ""
+        cfg = _load_gateway_runtime_config()
+        return str(cfg_get(cfg, "agent", "system_prompt", default="") or "").strip()

    @staticmethod
    def _load_reasoning_config() -> dict | None:
@ -2590,16 +2805,8 @@ class GatewayRunner:
        default (medium).
        """
        from hermes_constants import parse_reasoning_effort
-        effort = ""
-        try:
-            import yaml as _y
-            cfg_path = _hermes_home / "config.yaml"
-            if cfg_path.exists():
-                with open(cfg_path, encoding="utf-8") as _f:
-                    cfg = _y.safe_load(_f) or {}
-                effort = str(cfg_get(cfg, "agent", "reasoning_effort", default="") or "").strip()
-        except Exception:
-            pass
+        cfg = _load_gateway_runtime_config()
+        effort = str(cfg_get(cfg, "agent", "reasoning_effort", default="") or "").strip()
        result = parse_reasoning_effort(effort)
        if effort and effort.strip() and result is None:
            logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
@ -2673,16 +2880,8 @@ class GatewayRunner:
        "fast"/"priority"/"on" => "priority", while "normal"/"off" disables it.
        Returns None when unset or unsupported.
        """
-        raw = ""
-        try:
-            import yaml as _y
-            cfg_path = _hermes_home / "config.yaml"
-            if cfg_path.exists():
-                with open(cfg_path, encoding="utf-8") as _f:
-                    cfg = _y.safe_load(_f) or {}
-                raw = str(cfg_get(cfg, "agent", "service_tier", default="") or "").strip()
-        except Exception:
-            pass
+        cfg = _load_gateway_runtime_config()
+        raw = str(cfg_get(cfg, "agent", "service_tier", default="") or "").strip()

        value = raw.lower()
        if not value or value in {"normal", "default", "standard", "off", "none"}:
@ -2695,54 +2894,43 @@ class GatewayRunner:
    @staticmethod
    def _load_show_reasoning() -> bool:
        """Load show_reasoning toggle from config.yaml display section."""
-        try:
-            import yaml as _y
-            cfg_path = _hermes_home / "config.yaml"
-            if cfg_path.exists():
-                with open(cfg_path, encoding="utf-8") as _f:
-                    cfg = _y.safe_load(_f) or {}
-                return is_truthy_value(
-                    cfg_get(cfg, "display", "show_reasoning"),
-                    default=False,
-                )
-        except Exception:
-            pass
-        return False
+        cfg = _load_gateway_runtime_config()
+        return is_truthy_value(
+            cfg_get(cfg, "display", "show_reasoning"),
+            default=False,
+        )

    @staticmethod
    def _load_busy_input_mode() -> str:
        """Load gateway drain-time busy-input behavior from config/env."""
        mode = os.getenv("HERMES_GATEWAY_BUSY_INPUT_MODE", "").strip().lower()
        if not mode:
-            try:
-                import yaml as _y
-                cfg_path = _hermes_home / "config.yaml"
-                if cfg_path.exists():
-                    with open(cfg_path, encoding="utf-8") as _f:
-                        cfg = _y.safe_load(_f) or {}
-                    mode = str(cfg_get(cfg, "display", "busy_input_mode", default="") or "").strip().lower()
-            except Exception:
-                pass
+            cfg = _load_gateway_runtime_config()
+            mode = str(cfg_get(cfg, "display", "busy_input_mode", default="") or "").strip().lower()
        if mode == "queue":
            return "queue"
        if mode == "steer":
            return "steer"
        return "interrupt"

+    @staticmethod
+    def _load_busy_text_mode() -> str:
+        """Load normal busy TEXT follow-up behavior from config/env."""
+        mode = os.getenv("HERMES_GATEWAY_BUSY_TEXT_MODE", "").strip().lower()
+        if not mode:
+            cfg = _load_gateway_runtime_config()
+            mode = str(cfg_get(cfg, "display", "busy_text_mode", default="") or "").strip().lower()
+        if mode == "interrupt":
+            return "interrupt"
+        return "queue"
+
    @staticmethod
    def _load_restart_drain_timeout() -> float:
        """Load graceful gateway restart/stop drain timeout in seconds."""
        raw = os.getenv("HERMES_RESTART_DRAIN_TIMEOUT", "").strip()
        if not raw:
-            try:
-                import yaml as _y
-                cfg_path = _hermes_home / "config.yaml"
-                if cfg_path.exists():
-                    with open(cfg_path, encoding="utf-8") as _f:
-                        cfg = _y.safe_load(_f) or {}
-                    raw = str(cfg_get(cfg, "agent", "restart_drain_timeout", default="") or "").strip()
-            except Exception:
-                pass
+            cfg = _load_gateway_runtime_config()
+            raw = str(cfg_get(cfg, "agent", "restart_drain_timeout", default="") or "").strip()
        value = parse_restart_drain_timeout(raw)
        if raw and value == DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT:
            try:
@ -2767,19 +2955,12 @@ class GatewayRunner:
        """
        mode = os.getenv("HERMES_BACKGROUND_NOTIFICATIONS", "")
        if not mode:
-            try:
-                import yaml as _y
-                cfg_path = _hermes_home / "config.yaml"
-                if cfg_path.exists():
-                    with open(cfg_path, encoding="utf-8") as _f:
-                        cfg = _y.safe_load(_f) or {}
-                    raw = cfg_get(cfg, "display", "background_process_notifications")
-                    if raw is False:
-                        mode = "off"
-                    elif raw not in {None, ""}:
-                        mode = str(raw)
-            except Exception:
-                pass
+            cfg = _load_gateway_runtime_config()
+            raw = cfg_get(cfg, "display", "background_process_notifications")
+            if raw is False:
+                mode = "off"
+            elif raw not in {None, ""}:
+                mode = str(raw)
        mode = (mode or "all").strip().lower()
        valid = {"all", "result", "error", "off"}
        if mode not in valid:
@ -2805,12 +2986,12 @@ class GatewayRunner:
        return {}

    @staticmethod
-    def _load_fallback_model() -> list | dict | None:
+    def _load_fallback_model() -> list | None:
        """Load fallback provider chain from config.yaml.

-        Returns a list of provider dicts (``fallback_providers``), a single
-        dict (legacy ``fallback_model``), or None if not configured.
-        AIAgent.__init__ normalizes both formats into a chain.
+        Returns the merged effective chain from ``fallback_providers`` plus any
+        legacy ``fallback_model`` entries. ``fallback_providers`` stays first
+        when both keys are present.
        """
        try:
            import yaml as _y
@ -2818,7 +2999,7 @@ class GatewayRunner:
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
-                fb = cfg.get("fallback_providers") or cfg.get("fallback_model") or None
+                fb = get_fallback_chain(cfg)
                if fb:
                    return fb
        except Exception:
@ -2890,11 +3071,19 @@ class GatewayRunner:

        running_agent = self._running_agents.get(session_key)

+        effective_mode = self._busy_input_mode
+        busy_text_mode = getattr(self, "_busy_text_mode", "queue")
+        if (
+            event.message_type == MessageType.TEXT
+            and busy_text_mode == "queue"
+            and effective_mode != "steer"
+        ):
+            return False
+
        # Steer mode: inject mid-run via running_agent.steer() instead of
        # queueing + interrupting.  If the agent isn't running yet
        # (sentinel) or lacks steer(), or the payload is empty, fall back
        # to queue semantics so nothing is lost.
-        effective_mode = self._busy_input_mode
        steered = False
        if effective_mode == "steer":
            steer_text = (event.text or "").strip()
@ -2919,7 +3108,12 @@ class GatewayRunner:
        # successful steer — the text already landed inside the run and
        # must NOT also be replayed as a next-turn user message.
        if not steered:
-            merge_pending_message_event(adapter._pending_messages, session_key, event)
+            merge_pending_message_event(
+                adapter._pending_messages,
+                session_key,
+                event,
+                merge_text=event.message_type == MessageType.TEXT,
+            )

        is_queue_mode = effective_mode == "queue"
        is_steer_mode = effective_mode == "steer"
@ -3851,6 +4045,7 @@ class GatewayRunner:
            adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
            adapter.set_session_store(self.session_store)
            adapter.set_busy_session_handler(self._handle_active_session_busy_message)
+            adapter._busy_text_mode = self._busy_text_mode
            
            # Try to connect
            logger.info("Connecting to %s...", platform.value)
@ -4955,6 +5150,11 @@ class GatewayRunner:
        if not candidates:
            return

+        from gateway.platforms.base import BasePlatformAdapter
+        candidates = BasePlatformAdapter.filter_local_delivery_paths(candidates)
+        if not candidates:
+            return
+
        _IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".webp"}
        _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}

@ -5458,6 +5658,7 @@ class GatewayRunner:
                    adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
                    adapter.set_session_store(self.session_store)
                    adapter.set_busy_session_handler(self._handle_active_session_busy_message)
+                    adapter._busy_text_mode = self._busy_text_mode

                    success = await self._connect_adapter_with_timeout(adapter, platform)
                    if success:
@ -5897,6 +6098,12 @@ class GatewayRunner:
            if platform_registry.is_registered(platform.value):
                adapter = platform_registry.create_adapter(platform.value, config)
                if adapter is not None:
+                    # Adapters that need a back-reference to the gateway runner
+                    # (e.g. for cross-platform admin alerts) declare a
+                    # ``gateway_runner`` attribute. Inject it after creation so
+                    # plugin adapters don't need a custom factory signature.
+                    if hasattr(adapter, "gateway_runner"):
+                        adapter.gateway_runner = self
                    return adapter
                # Registered but failed to instantiate — don't silently fall
                # through to built-ins (there are none for plugin platforms).
@ -5939,15 +6146,6 @@ class GatewayRunner:
            adapter._notifications_mode = _notify_mode
            return adapter
        
-        elif platform == Platform.DISCORD:
-            from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
-            if not check_discord_requirements():
-                logger.warning("Discord: discord.py not installed")
-                return None
-            adapter = DiscordAdapter(config)
-            adapter.gateway_runner = self  # For cross-platform admin alerts on unauthorized slash
-            return adapter
-        
        elif platform == Platform.WHATSAPP:
            from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
            if not check_whatsapp_requirements():
@ -6214,18 +6412,6 @@ class GatewayRunner:
            if allow_bots_var and os.getenv(allow_bots_var, "none").lower().strip() in {"mentions", "all"}:
                return True

-        # Discord role-based access (DISCORD_ALLOWED_ROLES): the adapter's
-        # on_message pre-filter already verified role membership — if the
-        # message reached here, the user passed that check. Authorize
-        # directly to avoid the "no allowlists configured" branch below
-        # rejecting role-only setups where DISCORD_ALLOWED_USERS is empty
-        # (issue #7871).
-        if (
-            source.platform == Platform.DISCORD
-            and os.getenv("DISCORD_ALLOWED_ROLES", "").strip()
-        ):
-            return True
-
        # Check pairing store (always checked, regardless of allowlists)
        platform_name = source.platform.value if source.platform else ""
        if self.pairing_store.is_approved(platform_name, user_id):
@ -11164,14 +11350,16 @@ class GatewayRunner:
            # send_multiple_images (Telegram sendPhoto recompresses to ~1280px).
            force_document_attachments = "[[as_document]]" in response

+            from gateway.platforms.base import BasePlatformAdapter, should_send_media_as_audio
+
            media_files, _ = adapter.extract_media(response)
+            media_files = BasePlatformAdapter.filter_media_delivery_paths(media_files)
            _, cleaned = adapter.extract_images(response)
            local_files, _ = adapter.extract_local_files(cleaned)
+            local_files = BasePlatformAdapter.filter_local_delivery_paths(local_files)

            _thread_meta = self._thread_metadata_for_source(event.source, self._reply_anchor_for_event(event))

-            from gateway.platforms.base import should_send_media_as_audio
-
            _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
            _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}

@ -11463,6 +11651,8 @@ class GatewayRunner:
            # Extract media files from the response
            if response:
                media_files, response = adapter.extract_media(response)
+                from gateway.platforms.base import BasePlatformAdapter
+                media_files = BasePlatformAdapter.filter_media_delivery_paths(media_files)
                images, text_content = adapter.extract_images(response)

                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
@ -12551,7 +12741,7 @@ class GatewayRunner:
                return t("gateway.title.current_no_title", session_id=session_id)

    async def _handle_resume_command(self, event: MessageEvent) -> str:
-        """Handle /resume command — switch to a previously-named session."""
+        """Handle /resume command — list or switch to a previous session."""
        if not self._session_db:
            from hermes_state import format_session_db_unavailable
            return format_session_db_unavailable(prefix=t("gateway.shared.session_db_unavailable_prefix"))
@ -12560,30 +12750,44 @@ class GatewayRunner:
        session_key = self._session_key_for_source(source)
        name = event.get_command_args().strip()

+        def _list_titled_sessions() -> list[dict]:
+            user_source = source.platform.value if source.platform else None
+            sessions = self._session_db.list_sessions_rich(source=user_source, limit=10)
+            return [s for s in sessions if s.get("title")][:10]
+
        if not name:
            # List recent titled sessions for this user/platform
            try:
-                user_source = source.platform.value if source.platform else None
-                sessions = self._session_db.list_sessions_rich(
-                    source=user_source, limit=10
-                )
-                titled = [s for s in sessions if s.get("title")]
+                titled = _list_titled_sessions()
                if not titled:
                    return t("gateway.resume.no_named_sessions")
                lines = [t("gateway.resume.list_header")]
-                for s in titled[:10]:
+                for idx, s in enumerate(titled[:10], start=1):
                    title = s["title"]
                    preview = s.get("preview", "")[:40]
                    preview_part = t("gateway.resume.list_preview_suffix", preview=preview) if preview else ""
-                    lines.append(t("gateway.resume.list_item", title=title, preview_part=preview_part))
-                lines.append(t("gateway.resume.list_footer"))
+                    lines.append(t("gateway.resume.list_item_numbered", index=idx, title=title, preview_part=preview_part))
+                lines.append(t("gateway.resume.list_footer_numbered"))
                return "\n".join(lines)
            except Exception as e:
                logger.debug("Failed to list titled sessions: %s", e)
                return t("gateway.resume.list_failed", error=e)

-        # Resolve the name to a session ID.
-        target_id = self._session_db.resolve_session_by_title(name)
+        # Resolve a numbered choice or a title to a session ID.
+        if name.isdigit():
+            try:
+                titled = _list_titled_sessions()
+            except Exception as e:
+                logger.debug("Failed to list titled sessions for numeric resume: %s", e)
+                return t("gateway.resume.list_failed", error=e)
+            index = int(name)
+            if index < 1 or index > len(titled):
+                return t("gateway.resume.out_of_range", index=index)
+            target = titled[index - 1]
+            target_id = target.get("id")
+            name = target.get("title") or name
+        else:
+            target_id = self._session_db.resolve_session_by_title(name)
        if not target_id:
            return t("gateway.resume.not_found", name=name)
        # Compression creates child continuations that hold the live transcript.
@ -16065,11 +16269,7 @@ class GatewayRunner:
                )
                return
            _fut = safe_schedule_threadsafe(
-                _status_adapter.send(
-                    _status_chat_id,
-                    prepared_message,
-                    metadata=_status_thread_metadata,
-                ),
+                _send_or_update_status_coro(_status_adapter, _status_chat_id, event_type, prepared_message, _status_thread_metadata),
                _loop_for_step,
                logger=logger,
                log_message=f"status_callback ({event_type}) scheduling error",
@ -16470,45 +16670,16 @@ class GatewayRunner:
            #      that may include tool_calls, tool_call_id, reasoning, etc.
            #      - These must be passed through intact so the API sees valid
            #        assistant→tool sequences (dropping tool_calls causes 500 errors)
-            agent_history = []
-            for msg in history:
-                role = msg.get("role")
-                if not role:
-                    continue
-                
-                # Skip metadata entries (tool definitions, session info)
-                # -- these are for transcript logging, not for the LLM
-                if role in {"session_meta",}:
-                    continue
-                
-                # Skip system messages -- the agent rebuilds its own system prompt
-                if role == "system":
-                    continue
-                
-                # Rich agent messages (tool_calls, tool results) must be passed
-                # through intact so the API sees valid assistant→tool sequences
-                has_tool_calls = "tool_calls" in msg
-                has_tool_call_id = "tool_call_id" in msg
-                is_tool_message = role == "tool"
-                
-                if has_tool_calls or has_tool_call_id or is_tool_message:
-                    clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}
-                    agent_history.append(clean_msg)
-                else:
-                    # Simple text message - just need role and content
-                    content = msg.get("content")
-                    if content:
-                        # Tag cross-platform mirror messages so the agent knows their origin
-                        if msg.get("mirror"):
-                            mirror_src = msg.get("mirror_source", "another session")
-                            content = f"[Delivered from {mirror_src}] {content}"
-                        # Preserve assistant reasoning + Codex replay fields so
-                        # multi-turn reasoning context, prefix-cache hits, and
-                        # provider-specific echo requirements survive session
-                        # reload.  See ``_ASSISTANT_REPLAY_FIELDS`` for the full
-                        # whitelist and rationale.
-                        entry = _build_replay_entry(role, content, msg)
-                        agent_history.append(entry)
+            #
+            # Telegram observed group context is handled structurally here:
+            # observed=True transcript rows are withheld from replayable
+            # history and attached to the current addressed message as
+            # API-only context, so persisted history stores only the real
+            # addressed user turn.
+            agent_history, observed_group_context = _build_gateway_agent_history(
+                history,
+                channel_prompt=channel_prompt,
+            )
            
            # Collect MEDIA paths already in history so we can exclude them
            # from the current turn's extraction. This is compression-safe:
@ -16741,7 +16912,17 @@ class GatewayRunner:
                else:
                    _run_message = message

-                result = agent.run_conversation(_run_message, conversation_history=agent_history, task_id=session_id)
+                _api_run_message = _wrap_current_message_with_observed_context(
+                    _run_message,
+                    observed_group_context,
+                )
+                _conversation_kwargs = {
+                    "conversation_history": agent_history,
+                    "task_id": session_id,
+                }
+                if observed_group_context:
+                    _conversation_kwargs["persist_user_message"] = message
+                result = agent.run_conversation(_api_run_message, **_conversation_kwargs)
            finally:
                unregister_gateway_notify(_approval_session_key)
                # Cancel any pending clarify entries so blocked agent
@ -16957,6 +17138,7 @@ class GatewayRunner:
                "context_length": _context_length,
                "session_id": effective_session_id,
                "response_previewed": result.get("response_previewed", False),
+                "response_transformed": result.get("response_transformed", False),
            }
        
        # Start progress message sender if enabled
@ -17594,7 +17776,11 @@ class GatewayRunner:
            _content_delivered = bool(
                _sc and getattr(_sc, "final_content_delivered", False)
            )
-            if not _is_empty_sentinel and (_streamed or _previewed or _content_delivered):
+            # Plugin hooks (e.g. transform_llm_output) may have appended content
+            # after streaming finished — when the response was transformed, always
+            # send the final version so the appended content reaches the client.
+            _transformed = bool(response.get("response_transformed"))
+            if not _is_empty_sentinel and not _transformed and (_streamed or _previewed or _content_delivered):
                logger.info(
                    "Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s content_delivered=%s).",
                    session_key or "?",
@ -17603,6 +17789,28 @@ class GatewayRunner:
                    _content_delivered,
                )
                response["already_sent"] = True
+            elif not _is_empty_sentinel and _transformed and _sc is not None:
+                # Plugin hooks transformed the response after streaming — edit the
+                # existing streamed message instead of sending a duplicate.
+                _sc_msg_id = _sc.message_id
+                if _sc_msg_id:
+                    try:
+                        await _sc.adapter.edit_message(
+                            chat_id=source.chat_id,
+                            message_id=_sc_msg_id,
+                            content=response["final_response"],
+                            finalize=True,
+                        )
+                        response["already_sent"] = True
+                        logger.info(
+                            "Edited streamed message %s for session %s to include plugin-transformed content.",
+                            _sc_msg_id, session_key or "?",
+                        )
+                    except Exception as _edit_err:
+                        logger.warning(
+                            "Failed to edit streamed message for session %s: %s",
+                            session_key or "?", _edit_err,
+                        )

        # Schedule deletion of tracked temporary progress bubbles after the
        # final response lands. Failed runs skip this so bubbles remain as
@ -18029,6 +18237,21 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
        runner.request_restart(detached=False, via_service=True)
    
    loop = asyncio.get_running_loop()
+
+    # Install a loop-level exception handler that swallows transient
+    # network errors from background tasks. Issues #31066 / #31110:
+    # an unhandled ``telegram.error.TimedOut`` (or peer NetworkError /
+    # httpx connection error) in any awaited coroutine would propagate
+    # to the loop and kill the gateway process, taking down every
+    # profile attached to the same runner. systemd then restarts the
+    # service after ~5s but the active conversation turn is lost.
+    #
+    # The fix is intentionally narrow: only well-known transient
+    # network errors are swallowed (and logged with full traceback so
+    # the originating call site is still discoverable). Anything else
+    # is forwarded to the default handler so real bugs still surface.
+    loop.set_exception_handler(_gateway_loop_exception_handler)
+
    if threading.current_thread() is threading.main_thread():
        for sig in (signal.SIGINT, signal.SIGTERM):
            try:
--- a/gateway/session.py
+++ b/gateway/session.py
@ -1277,6 +1277,7 @@ class SessionStore:
                    platform_message_id=(
                        message.get("platform_message_id") or message.get("message_id")
                    ),
+                    observed=bool(message.get("observed")),
                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
--- a/gateway/session_context.py
+++ b/gateway/session_context.py
@ -83,6 +83,21 @@ _VAR_MAP = {
 }


+def set_current_session_id(session_id: str) -> None:
+    """Synchronize ``HERMES_SESSION_ID`` across ContextVar and ``os.environ``.
+
+    Long-lived single-process entrypoints like the CLI can rotate sessions via
+    ``/new``, ``/resume``, ``/branch``, or compression splits without
+    reconstructing the entire agent. Tools still consult
+    ``get_session_env("HERMES_SESSION_ID")`` with an ``os.environ`` fallback,
+    so both storage paths must move together when the active session changes.
+    """
+    import os
+
+    os.environ["HERMES_SESSION_ID"] = session_id
+    _SESSION_ID.set(session_id)
+
+
 def set_session_vars(
    platform: str = "",
    chat_id: str = "",
--- a/gateway/stream_consumer.py
+++ b/gateway/stream_consumer.py
@ -192,6 +192,11 @@ class GatewayStreamConsumer:
        """True when the stream consumer delivered the final assistant reply."""
        return self._final_response_sent

+    @property
+    def message_id(self) -> str | None:
+        """The Discord/chat message ID of the last-sent or edited message."""
+        return self._message_id
+
    @property
    def final_content_delivered(self) -> bool:
        """True when the final response content reached the user, even if
--- a/hermes_cli/_parser.py
+++ b/hermes_cli/_parser.py
@ -129,7 +129,8 @@ def build_top_level_parser():
        default=None,
        help=(
            "Provider override for this invocation (e.g. openrouter, anthropic). "
-            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
+            "Applies to -z/--oneshot and --tui. The persistent provider lives in config.yaml "
+            "under model.provider — use `hermes setup` or edit the file to change it."
        ),
    )
    parser.add_argument(
@ -268,7 +269,11 @@ def build_top_level_parser():
        help="Inference provider (default: auto). Built-in or a user-defined name from `providers:` in config.yaml.",
    )
    chat_parser.add_argument(
-        "-v", "--verbose", action="store_true", help="Verbose output"
+        "-v",
+        "--verbose",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Verbose output",
    )
    chat_parser.add_argument(
        "-Q",
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -41,7 +41,7 @@ from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from http.server import BaseHTTPRequestHandler, HTTPServer, ThreadingHTTPServer
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import Any, Callable, Dict, FrozenSet, List, Optional, Tuple
 from urllib.parse import parse_qs, urlencode, urlparse

 import httpx
@ -553,6 +553,7 @@ _PLACEHOLDER_SECRET_VALUES = {
    "***",
    "changeme",
    "your_api_key",
+    "your_api_key_here",
    "your-api-key",
    "placeholder",
    "example",
@ -1559,6 +1560,67 @@ def _optional_base_url(value: Any) -> Optional[str]:
    return cleaned if cleaned else None


+# Allowlist of hosts the Nous Portal proxy is willing to forward minted
+# bearer tokens to. The bearer is a long-lived agent_key minted by
+# portal.nousresearch.com — sending it anywhere else would leak it.
+#
+# This is consulted only for URLs coming from the NETWORK side (Portal
+# refresh / agent-key-mint responses). User-controlled env-var overrides
+# (NOUS_INFERENCE_BASE_URL) bypass validation — that's the documented
+# dev/staging escape hatch and the env source is already trusted (the
+# user set it themselves).
+_ALLOWED_NOUS_INFERENCE_HOSTS: FrozenSet[str] = frozenset({
+    "inference-api.nousresearch.com",
+})
+
+
+def _validate_nous_inference_url_from_network(url: Optional[str]) -> Optional[str]:
+    """Validate a Portal-returned inference URL against the host allowlist.
+
+    Returns ``url`` (normalised by stripping trailing slashes) if it's a
+    well-formed ``https://<allowlisted-host>/...`` URL. Returns ``None``
+    if the URL is missing, malformed, non-https, or points at an
+    unexpected host — letting the caller fall back to the configured
+    default rather than persist or forward a poisoned value.
+
+    Defense-in-depth: a compromised refresh / mint response from the
+    Portal API (MITM, malicious response injection) could otherwise
+    redirect every subsequent proxy request — bearing the user's
+    legitimately-minted agent_key — to an attacker-controlled endpoint.
+    Validating scheme + host at the source closes that loop before the
+    poisoned URL ever lands in ``auth.json``.
+
+    The env-var override path (``NOUS_INFERENCE_BASE_URL``) bypasses
+    this — env values come from the trusted OS user, not from the
+    network, and the override is documented for staging/dev use.
+
+    Co-authored-by: memosr <mehmet.sr35@gmail.com>
+    """
+    if not isinstance(url, str):
+        return None
+    cleaned = url.strip()
+    if not cleaned:
+        return None
+    try:
+        parsed = urlparse(cleaned)
+    except Exception:
+        return None
+    if parsed.scheme != "https":
+        logger.warning(
+            "nous: refusing non-https inference URL scheme %r from Portal response",
+            parsed.scheme,
+        )
+        return None
+    if parsed.hostname not in _ALLOWED_NOUS_INFERENCE_HOSTS:
+        logger.warning(
+            "nous: refusing inference URL host %r from Portal response "
+            "(not in allowlist); falling back to default",
+            parsed.hostname,
+        )
+        return None
+    return cleaned.rstrip("/")
+
+
 def _decode_jwt_claims(token: Any) -> Dict[str, Any]:
    if not isinstance(token, str) or token.count(".") != 2:
        return {}
@ -2004,7 +2066,10 @@ def resolve_qwen_runtime_credentials(
 def get_qwen_auth_status() -> Dict[str, Any]:
    auth_path = _qwen_cli_auth_path()
    try:
-        creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
+        # Validate the runtime credentials, including refresh when the cached
+        # CLI token is expired. Otherwise stale tokens show up as "logged in"
+        # and `hermes model` walks users into a broken Qwen setup flow.
+        creds = resolve_qwen_runtime_credentials(refresh_if_expiring=True)
        return {
            "logged_in": True,
            "auth_file": str(auth_path),
@ -4776,7 +4841,7 @@ def refresh_nous_oauth_pure(
            state["refresh_token"] = refreshed.get("refresh_token") or state["refresh_token"]
            state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
            state["scope"] = refreshed.get("scope") or state.get("scope")
-            refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
+            refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
            if refreshed_url:
                state["inference_base_url"] = refreshed_url
            state["obtained_at"] = now.isoformat()
@ -4812,7 +4877,7 @@ def refresh_nous_oauth_pure(
            state["agent_key_expires_in"] = mint_payload.get("expires_in")
            state["agent_key_reused"] = bool(mint_payload.get("reused", False))
            state["agent_key_obtained_at"] = now.isoformat()
-            minted_url = _optional_base_url(mint_payload.get("inference_base_url"))
+            minted_url = _validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))
            if minted_url:
                state["inference_base_url"] = minted_url

@ -5090,7 +5155,7 @@ def resolve_nous_runtime_credentials(
                        state["refresh_token"] = refreshed.get("refresh_token") or refresh_token
                        state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
                        state["scope"] = refreshed.get("scope") or state.get("scope")
-                        refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
+                        refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
                        if refreshed_url:
                            inference_base_url = refreshed_url
                        state["obtained_at"] = now.isoformat()
@ -5198,7 +5263,7 @@ def resolve_nous_runtime_credentials(
                                state["refresh_token"] = refreshed.get("refresh_token") or latest_refresh_token
                                state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
                                state["scope"] = refreshed.get("scope") or state.get("scope")
-                                refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
+                                refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
                                if refreshed_url:
                                    inference_base_url = refreshed_url
                                state["obtained_at"] = now.isoformat()
@ -5253,7 +5318,7 @@ def resolve_nous_runtime_credentials(
                state["agent_key_expires_in"] = mint_payload.get("expires_in")
                state["agent_key_reused"] = bool(mint_payload.get("reused", False))
                state["agent_key_obtained_at"] = now.isoformat()
-                minted_url = _optional_base_url(mint_payload.get("inference_base_url"))
+                minted_url = _validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))
                if minted_url:
                    inference_base_url = minted_url
                _oauth_trace(
@ -7045,10 +7110,95 @@ def _refresh_minimax_oauth_state(
    return new_state


+def _minimax_oauth_quarantine_on_terminal_refresh(state: Dict[str, Any], exc: AuthError) -> None:
+    """Wipe dead tokens from auth.json after a terminal refresh failure.
+
+    Shared by both the eager-resolve path and the lazy per-request token
+    provider. Mirrors the Nous / xAI-OAuth / Codex-OAuth quarantine pattern
+    so subsequent calls fail fast without a network retry.
+    """
+    if not (exc.relogin_required and state.get("refresh_token")):
+        return
+    for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
+        state.pop(_k, None)
+    state["last_auth_error"] = {
+        "provider": "minimax-oauth",
+        "code": exc.code or "refresh_failed",
+        "message": str(exc),
+        "reason": "runtime_refresh_failure",
+        "relogin_required": True,
+        "at": datetime.now(timezone.utc).isoformat(),
+    }
+    try:
+        _minimax_save_auth_state(state)
+    except Exception as _save_exc:
+        logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
+
+
+def build_minimax_oauth_token_provider() -> Callable[[], str]:
+    """Return a zero-arg callable that yields a fresh MiniMax access token.
+
+    The Anthropic SDK caches ``api_key`` as a static string at construction
+    time, so a session that resolves credentials once at startup will keep
+    sending the same bearer until MiniMax's server returns 401 — typically
+    ~15 minutes in, because MiniMax issues short-lived access tokens.
+
+    Returning a *callable* instead of a string lets us hook into the
+    existing Entra-ID bearer infrastructure in
+    :mod:`agent.anthropic_adapter`: ``build_anthropic_client`` detects a
+    callable and routes through ``_build_anthropic_client_with_bearer_hook``,
+    which mints a fresh ``Authorization`` header on every outbound request.
+    Each invocation re-reads the persisted state from ``auth.json`` and
+    calls :func:`_refresh_minimax_oauth_state` — that helper is a no-op
+    when the token still has more than ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS``
+    of life left, so the steady-state cost is one file read + one
+    timestamp compare per request.
+
+    Reading state fresh each time also means a refresh persisted by one
+    process (CLI, gateway, cron) is immediately visible to every other
+    process sharing the same ``auth.json``.
+    """
+    def _provide() -> str:
+        state = get_provider_auth_state("minimax-oauth")
+        if not state or not state.get("access_token"):
+            raise AuthError(
+                "Not logged into MiniMax OAuth. Run `hermes model` and select "
+                "MiniMax (OAuth).",
+                provider="minimax-oauth", code="not_logged_in", relogin_required=True,
+            )
+        try:
+            state = _refresh_minimax_oauth_state(state)
+        except AuthError as exc:
+            _minimax_oauth_quarantine_on_terminal_refresh(state, exc)
+            raise
+        token = state.get("access_token")
+        if not token:
+            raise AuthError(
+                "MiniMax OAuth state has no access_token after refresh.",
+                provider="minimax-oauth", code="no_access_token", relogin_required=True,
+            )
+        return token
+
+    return _provide
+
+
 def resolve_minimax_oauth_runtime_credentials(
    *, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
+    as_token_provider: bool = False,
 ) -> Dict[str, Any]:
-    """Return {provider, api_key, base_url, source} for minimax-oauth."""
+    """Return {provider, api_key, base_url, source} for minimax-oauth.
+
+    When ``as_token_provider`` is True, ``api_key`` is a zero-arg callable
+    that mints a fresh access token per call (proactively refreshing if
+    the cached token is within ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS`` of
+    expiry). This is what the runtime provider path uses so that long
+    sessions survive MiniMax's short access-token lifetime — see
+    :func:`build_minimax_oauth_token_provider` for the rationale.
+
+    The default (string ``api_key``) preserves the historical contract for
+    diagnostic call sites like ``hermes status`` that just want to know
+    whether a valid token exists right now.
+    """
    state = get_provider_auth_state("minimax-oauth")
    if not state or not state.get("access_token"):
        raise AuthError(
@ -7059,28 +7209,15 @@ def resolve_minimax_oauth_runtime_credentials(
    try:
        state = _refresh_minimax_oauth_state(state)
    except AuthError as exc:
-        if exc.relogin_required and state.get("refresh_token"):
-            # Terminal refresh failure — clear dead tokens from auth.json so
-            # subsequent calls fail fast without a network retry, mirroring
-            # the Nous / xAI-OAuth / Codex-OAuth quarantine pattern.
-            for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
-                state.pop(_k, None)
-            state["last_auth_error"] = {
-                "provider": "minimax-oauth",
-                "code": exc.code or "refresh_failed",
-                "message": str(exc),
-                "reason": "runtime_refresh_failure",
-                "relogin_required": True,
-                "at": datetime.now(timezone.utc).isoformat(),
-            }
-            try:
-                _minimax_save_auth_state(state)
-            except Exception as _save_exc:
-                logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
+        _minimax_oauth_quarantine_on_terminal_refresh(state, exc)
        raise
+    if as_token_provider:
+        api_key: Any = build_minimax_oauth_token_provider()
+    else:
+        api_key = state["access_token"]
    return {
        "provider": "minimax-oauth",
-        "api_key": state["access_token"],
+        "api_key": api_key,
        "base_url": state["inference_base_url"].rstrip("/"),
        "source": "oauth",
    }
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -164,7 +164,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
               cli_only=True),
    CommandDef("skills", "Search, install, inspect, or manage skills",
               "Tools & Skills", cli_only=True,
-               subcommands=("search", "browse", "inspect", "install")),
+               subcommands=("search", "browse", "inspect", "install", "audit")),
    CommandDef("bundles", "List skill bundles (aliases /<name> for multiple skills)",
               "Tools & Skills"),
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
@ -449,7 +449,7 @@ def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
    :func:`hermes_cli.plugins.PluginContext.register_command`. They behave
    like ``CommandDef`` entries for gateway surfacing: they appear in the
    Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
-    (via :func:`gateway.platforms.discord._register_slash_commands`) in
+    (via :func:`plugins.platforms.discord.adapter._register_slash_commands`) in
    Discord's native slash command picker.

    Lookup is lazy so importing this module never forces plugin discovery
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -1009,6 +1009,19 @@ DEFAULT_CONFIG = {
        "compact": False,
        "personality": "kawaii",
        "resume_display": "full",
+        # Recap tuning for /resume and startup resume. The defaults match the
+        # historical hardcoded values; expose them as config so power users can
+        # widen or tighten the snapshot to taste.
+        "resume_exchanges": 10,            # max user+assistant pairs to show
+        "resume_max_user_chars": 300,      # truncate user message text
+        "resume_max_assistant_chars": 200, # truncate non-last assistant text
+        "resume_max_assistant_lines": 3,   # truncate non-last assistant lines
+        # When True (default), assistant entries that are *only* tool calls
+        # (no visible text) are skipped in the recap. This prevents the recap
+        # from being dominated by `[2 tool calls: terminal, read_file]` lines
+        # when an exchange was tool-heavy. Set False to restore the legacy
+        # behavior of showing tool-call summaries inline.
+        "resume_skip_tool_only": True,
        "busy_input_mode": "interrupt",  # interrupt | queue | steer
        # When true, `hermes --tui` auto-resumes the most recent human-
        # facing session on launch instead of forging a fresh one.
@ -1776,6 +1789,14 @@ DEFAULT_CONFIG = {
            # ~/.hermes/bin/ on first use.  When False you must install
            # bws yourself and have it on PATH.
            "auto_install": True,
+            # Bitwarden region / self-hosted endpoint.  Empty string
+            # means use the bws CLI default (US Cloud,
+            # https://vault.bitwarden.com).  Set to
+            # https://vault.bitwarden.eu for EU Cloud, or your own URL
+            # for self-hosted Bitwarden.  Plumbed into the bws subprocess
+            # as BWS_SERVER_URL.  Prompted for during
+            # `hermes secrets bitwarden setup`.
+            "server_url": "",
        },
    },

--- a/hermes_cli/curses_ui.py
+++ b/hermes_cli/curses_ui.py
@ -71,7 +71,7 @@ def curses_checklist(
                curses.use_default_colors()
                curses.init_pair(1, curses.COLOR_GREEN, -1)
                curses.init_pair(2, curses.COLOR_YELLOW, -1)
-                curses.init_pair(3, 8, -1)  # dim gray
+                curses.init_pair(3, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)  # dim gray
            cursor = 0
            scroll_offset = 0

--- a/hermes_cli/debug.py
+++ b/hermes_cli/debug.py
@ -14,6 +14,7 @@ Currently supports:
 import io
 import json
 import logging
+import re
 import sys
 import time
 import urllib.error
@ -36,6 +37,12 @@ _REDACTION_BANNER = (
    "run with --no-redact to disable]\n"
 )

+_EMAIL_ADDRESS_RE = re.compile(
+    r"(?<![A-Za-z0-9._%+-])"
+    r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"
+    r"(?![A-Za-z0-9._%+-])"
+)
+

 # ---------------------------------------------------------------------------
 # Paste services — try paste.rs first, dpaste.com as fallback.
@ -398,7 +405,8 @@ def _redact_log_text(text: str) -> str:
        return text
    from agent.redact import redact_sensitive_text

-    return redact_sensitive_text(text, force=True)
+    text = redact_sensitive_text(text, force=True)
+    return _EMAIL_ADDRESS_RE.sub("[REDACTED_EMAIL]", text)


 def _capture_log_snapshot(
--- a/hermes_cli/env_loader.py
+++ b/hermes_cli/env_loader.py
@ -21,6 +21,44 @@ _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
 # tests) don't spam the same warning multiple times.
 _WARNED_KEYS: set[str] = set()

+# Map of env-var name → source label ("bitwarden", etc.) for credentials
+# that were injected by an external secret source during load_hermes_dotenv().
+# Used by setup / `hermes model` flows to label detected credentials so
+# users understand WHERE a key came from when their .env doesn't contain it
+# directly (otherwise the "credentials detected ✓" line looks identical to
+# the .env case and they don't know Bitwarden is wired up).
+_SECRET_SOURCES: dict[str, str] = {}
+
+
+def get_secret_source(env_var: str) -> str | None:
+    """Return the label of the secret source that supplied ``env_var``, if any.
+
+    Returns ``"bitwarden"`` for keys pulled from Bitwarden Secrets Manager
+    during the current process's ``load_hermes_dotenv()`` call.  Returns
+    ``None`` for keys that came from ``.env``, the shell environment, or
+    aren't tracked.
+    """
+    return _SECRET_SOURCES.get(env_var)
+
+
+def format_secret_source_suffix(env_var: str) -> str:
+    """Return a human-readable suffix like ``" (from Bitwarden)"`` or ``""``.
+
+    Use this when printing a detected credential so the user can see where
+    it came from.  Empty string when the credential came from ``.env`` or
+    the shell — those are the implicit / "default" cases users already
+    understand.
+    """
+    source = get_secret_source(env_var)
+    if not source:
+        return ""
+    if source == "bitwarden":
+        return " (from Bitwarden)"
+    # Generic fallback — future-proofing for additional secret sources
+    # (e.g. 1Password, HashiCorp Vault) without having to update every
+    # call site.
+    return f" (from {source})"
+

 def _format_offending_chars(value: str, limit: int = 3) -> str:
    """Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
@ -102,6 +140,10 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
    This produces mangled values — e.g. a bot token duplicated 8×
    (see #8908).

+    Also strips embedded null bytes which crash ``os.environ[k] = v``
+    with ``ValueError: embedded null byte`` — typically introduced by
+    copy-pasting API keys from terminals or rich-text editors.
+
    We delegate to ``hermes_cli.config._sanitize_env_lines`` which
    already knows all valid Hermes env-var names and can split
    concatenated lines correctly.
@ -117,7 +159,11 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
    try:
        with open(path, **read_kw) as f:
            original = f.readlines()
-        sanitized = _sanitize_env_lines(original)
+        # Strip null bytes before _sanitize_env_lines so they never
+        # reach python-dotenv (which passes them to os.environ and
+        # crashes with ValueError).
+        stripped = [line.replace("\x00", "") for line in original]
+        sanitized = _sanitize_env_lines(stripped)
        if sanitized != original:
            import tempfile
            fd, tmp = tempfile.mkstemp(
@ -206,6 +252,7 @@ def _apply_external_secret_sources(home_path: Path) -> None:
        override_existing=bool(bw_cfg.get("override_existing", False)),
        cache_ttl_seconds=float(bw_cfg.get("cache_ttl_seconds", 300)),
        auto_install=bool(bw_cfg.get("auto_install", True)),
+        server_url=str(bw_cfg.get("server_url", "") or "").strip(),
    )

    if result.applied:
@ -213,6 +260,12 @@ def _apply_external_secret_sources(home_path: Path) -> None:
        # and might have the same copy-paste corruption as a manually
        # edited .env (see #6843).
        _sanitize_loaded_credentials()
+        # Remember where these came from so the setup / `hermes model`
+        # flows can label detected credentials with "(from Bitwarden)" —
+        # otherwise users see "credentials ✓" with no hint that the value
+        # came from BSM rather than .env.
+        for name in result.applied:
+            _SECRET_SOURCES[name] = "bitwarden"
        print(
            f"  Bitwarden Secrets Manager: applied {len(result.applied)} "
            f"secret{'s' if len(result.applied) != 1 else ''} "
--- a/hermes_cli/fallback_cmd.py
+++ b/hermes_cli/fallback_cmd.py
@ -21,6 +21,8 @@ from __future__ import annotations
 import copy
 from typing import Any, Dict, List, Optional

+from hermes_cli.fallback_config import get_fallback_chain
+

 # ---------------------------------------------------------------------------
 # Helpers
@ -30,20 +32,11 @@ def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
    """Return the normalized fallback chain as a list of dicts.

    Accepts both the new list format (``fallback_providers``) and the legacy
-    single-dict format (``fallback_model``).  The returned list is always a
-    fresh copy — callers can mutate without touching the config dict.
+    ``fallback_model`` format. When both are present, the effective chain is
+    merged with ``fallback_providers`` entries kept first. The returned list is
+    always a fresh copy — callers can mutate without touching the config dict.
    """
-    chain = config.get("fallback_providers") or []
-    if isinstance(chain, list):
-        result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
-        if result:
-            return result
-    legacy = config.get("fallback_model")
-    if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
-        return [dict(legacy)]
-    if isinstance(legacy, list):
-        return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
-    return []
+    return get_fallback_chain(config)


 def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
--- a/hermes_cli/fallback_config.py
+++ b/hermes_cli/fallback_config.py
@ -0,0 +1,72 @@
+"""Helpers for reading the effective fallback provider chain from config."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def _normalized_base_url(value: Any) -> str:
+    if not isinstance(value, str):
+        return ""
+    return value.strip().rstrip("/")
+
+
+def _iter_fallback_entries(raw: Any) -> list[dict[str, Any]]:
+    if isinstance(raw, dict):
+        candidates = [raw]
+    elif isinstance(raw, list):
+        candidates = raw
+    else:
+        return []
+
+    entries: list[dict[str, Any]] = []
+    for entry in candidates:
+        if not isinstance(entry, dict):
+            continue
+        provider = str(entry.get("provider") or "").strip()
+        model = str(entry.get("model") or "").strip()
+        if not provider or not model:
+            continue
+
+        normalized = dict(entry)
+        normalized["provider"] = provider
+        normalized["model"] = model
+
+        base_url = _normalized_base_url(entry.get("base_url"))
+        if base_url:
+            normalized["base_url"] = base_url
+
+        entries.append(normalized)
+    return entries
+
+
+def _entry_identity(entry: dict[str, Any]) -> tuple[str, str, str]:
+    return (
+        str(entry.get("provider") or "").strip().lower(),
+        str(entry.get("model") or "").strip().lower(),
+        _normalized_base_url(entry.get("base_url")).lower(),
+    )
+
+
+def get_fallback_chain(config: dict[str, Any] | None) -> list[dict[str, Any]]:
+    """Return the effective fallback chain merged across old and new config keys.
+
+    ``fallback_providers`` remains the primary source of truth and keeps its
+    order. Legacy ``fallback_model`` entries are appended afterwards unless
+    they target the same provider/model/base_url route as an earlier entry.
+    The returned list always contains fresh dict copies.
+    """
+
+    config = config or {}
+    chain: list[dict[str, Any]] = []
+    seen: set[tuple[str, str, str]] = set()
+
+    for key in ("fallback_providers", "fallback_model"):
+        for entry in _iter_fallback_entries(config.get(key)):
+            identity = _entry_identity(entry)
+            if identity in seen:
+                continue
+            seen.add(identity)
+            chain.append(entry)
+
+    return chain
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@ -3349,34 +3349,9 @@ _PLATFORMS = [
             "help": "For DMs, this is your user ID. You can set it later by typing /set-home in chat."},
        ],
    },
-    {
-        "key": "discord",
-        "label": "Discord",
-        "emoji": "💬",
-        "token_var": "DISCORD_BOT_TOKEN",
-        "setup_instructions": [
-            "1. Go to https://discord.com/developers/applications → New Application",
-            "2. Go to Bot → Reset Token → copy the bot token",
-            "3. Enable: Bot → Privileged Gateway Intents → Message Content Intent",
-            "4. Invite the bot to your server:",
-            "   OAuth2 → URL Generator → check BOTH scopes:",
-            "     - bot",
-            "     - applications.commands  (required for slash commands!)",
-            "   Bot Permissions: Send Messages, Read Message History, Attach Files",
-            "   Copy the URL and open it in your browser to invite.",
-            "5. Get your user ID: enable Developer Mode in Discord settings,",
-            "   then right-click your name → Copy ID",
-        ],
-        "vars": [
-            {"name": "DISCORD_BOT_TOKEN", "prompt": "Bot token", "password": True,
-             "help": "Paste the token from step 2 above."},
-            {"name": "DISCORD_ALLOWED_USERS", "prompt": "Allowed user IDs or usernames (comma-separated)", "password": False,
-             "is_allowlist": True,
-             "help": "Paste your user ID from step 5 above."},
-            {"name": "DISCORD_HOME_CHANNEL", "prompt": "Home channel ID (for cron/notification delivery, or empty to set later with /set-home)", "password": False,
-             "help": "Right-click a channel → Copy Channel ID (requires Developer Mode)."},
-        ],
-    },
+    # Discord moved to plugins/platforms/discord/ — its setup metadata is
+    # discovered dynamically via _all_platforms() from the platform registry
+    # entry registered by plugins/platforms/discord/adapter.py::register().
    {
        "key": "slack",
        "label": "Slack",
@ -3784,7 +3759,12 @@ def _platform_status(platform: dict) -> str:
                configured = bool(entry.is_connected(synthetic))
            except Exception:
                configured = False
-        if not configured:
+        else:
+            # No is_connected hook — fall back to check_fn as a coarse
+            # "are deps present" gate. Don't fall back when is_connected
+            # is defined and returned False; that would let "SDK is
+            # installed" override "no token configured" and incorrectly
+            # report the platform as ready.
            try:
                configured = bool(entry.check_fn())
            except Exception:
@ -4040,15 +4020,11 @@ def _setup_dingtalk():
        client_id, client_secret = result
        save_env_value("DINGTALK_CLIENT_ID", client_id)
        save_env_value("DINGTALK_CLIENT_SECRET", client_secret)
-        save_env_value("DINGTALK_ALLOW_ALL_USERS", "true")
        print()
        print_success(f"{emoji} {label} configured via QR scan!")
    else:
        # ── Manual entry ──
        _setup_standard_platform(dingtalk_platform)
-        # Also enable allow-all by default for convenience
-        if get_env_value("DINGTALK_CLIENT_ID"):
-            save_env_value("DINGTALK_ALLOW_ALL_USERS", "true")


 def _setup_wecom():
@ -4769,7 +4745,9 @@ def _builtin_setup_fn(key: str):
    from hermes_cli import setup as _s
    return {
        "telegram": _s._setup_telegram,
-        "discord": _s._setup_discord,
+        # discord moved into the plugin: setup_fn is registered by
+        # plugins/platforms/discord/adapter.py::register() and dispatched
+        # via the plugin path in _configure_platform().
        "slack": _s._setup_slack,
        "matrix": _s._setup_matrix,
        "mattermost": _s._setup_mattermost,
--- a/hermes_cli/gateway_windows.py
+++ b/hermes_cli/gateway_windows.py
@ -365,7 +365,9 @@ def _write_task_script() -> Path:

    content = _build_gateway_cmd_script(python_path, working_dir, hermes_home, profile_arg)
    script_path = get_task_script_path()
-    script_path.write_text(content, encoding="utf-8", newline="")
+    tmp = script_path.with_suffix(".tmp")
+    tmp.write_text(content, encoding="utf-8", newline="")
+    tmp.replace(script_path)
    return script_path


@ -436,7 +438,9 @@ def _install_startup_entry(script_path: Path) -> Path:
    """Write the Startup-folder fallback launcher. Returns its path."""
    entry = get_startup_entry_path()
    entry.parent.mkdir(parents=True, exist_ok=True)
-    entry.write_text(_build_startup_launcher(script_path), encoding="utf-8", newline="")
+    tmp = entry.with_suffix(".tmp")
+    tmp.write_text(_build_startup_launcher(script_path), encoding="utf-8", newline="")
+    tmp.replace(entry)
    return entry


--- a/hermes_cli/kanban.py
+++ b/hermes_cli/kanban.py
@ -550,6 +550,39 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu
    p_unblock = sub.add_parser("unblock", help="Return one or more blocked/scheduled tasks to ready")
    p_unblock.add_argument("task_ids", nargs="+")

+    p_promote = sub.add_parser(
+        "promote",
+        help="Manually move one or more todo/blocked tasks to ready (recovery path)",
+    )
+    p_promote.add_argument("task_id")
+    p_promote.add_argument(
+        "reason",
+        nargs="*",
+        help="Audit-trail reason (recorded on the task_events row)",
+    )
+    p_promote.add_argument(
+        "--ids",
+        nargs="+",
+        default=None,
+        help="Additional task ids to promote with the same reason (bulk mode)",
+    )
+    p_promote.add_argument(
+        "--force",
+        action="store_true",
+        help="Promote even if parent dependencies are not yet done/archived",
+    )
+    p_promote.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Validate the promotion without mutating state",
+    )
+    p_promote.add_argument(
+        "--json",
+        dest="json",
+        action="store_true",
+        help="Emit machine-readable JSON result",
+    )
+
    p_archive = sub.add_parser("archive", help="Archive one or more tasks")
    p_archive.add_argument("task_ids", nargs="*",
                           help="Task ids to archive (default mode)")
@ -899,6 +932,7 @@ def kanban_command(args: argparse.Namespace) -> int:
        "block":    _cmd_block,
        "schedule": _cmd_schedule,
        "unblock":  _cmd_unblock,
+        "promote":  _cmd_promote,
        "archive":  _cmd_archive,
        "tail":     _cmd_tail,
        "dispatch": _cmd_dispatch,
@ -1955,6 +1989,57 @@ def _cmd_unblock(args: argparse.Namespace) -> int:
    return 0 if not failed else 1


+def _cmd_promote(args: argparse.Namespace) -> int:
+    reason = " ".join(args.reason).strip() if args.reason else None
+    author = _profile_author()
+    as_json = getattr(args, "json", False)
+    extra_ids = list(getattr(args, "ids", None) or [])
+    # Dedupe while preserving order; positional task_id always first.
+    ids: list[str] = []
+    seen: set[str] = set()
+    for tid in [args.task_id, *extra_ids]:
+        if tid not in seen:
+            ids.append(tid)
+            seen.add(tid)
+
+    results: list[dict[str, object]] = []
+    with kb.connect() as conn:
+        for tid in ids:
+            ok, err = kb.promote_task(
+                conn,
+                tid,
+                actor=author,
+                reason=reason,
+                force=bool(args.force),
+                dry_run=bool(args.dry_run),
+            )
+            results.append({
+                "task_id": tid,
+                "promoted": ok,
+                "dry_run": bool(args.dry_run),
+                "forced": bool(args.force),
+                "reason": reason,
+                "error": err,
+            })
+
+    failed = [r for r in results if not r["promoted"]]
+    if as_json:
+        # Single-id stays a flat object for back-compat; bulk emits a list.
+        payload: object = results[0] if len(results) == 1 else results
+        print(json.dumps(payload, indent=2, ensure_ascii=False))
+        return 0 if not failed else 1
+
+    tag = " (dry)" if args.dry_run else ""
+    label = "Would promote" if args.dry_run else "Promoted"
+    for r in results:
+        if r["promoted"]:
+            suffix = f": {reason}" if reason else ""
+            print(f"{label} {r['task_id']} -> ready{tag}{suffix}")
+        else:
+            print(f"cannot promote {r['task_id']}: {r['error']}", file=sys.stderr)
+    return 0 if not failed else 1
+
+
 def _cmd_archive(args: argparse.Namespace) -> int:
    ids = list(args.task_ids or [])
    purge_ids = list(getattr(args, "purge_ids", None) or [])
--- a/hermes_cli/kanban_db.py
+++ b/hermes_cli/kanban_db.py
@ -75,6 +75,7 @@ import json
 import os
 import re
 import secrets
+import shutil
 import sqlite3
 import subprocess
 import sys
@ -82,6 +83,7 @@ import threading
 import logging
 import time
 from dataclasses import dataclass, field
+from datetime import datetime
 from pathlib import Path
 from typing import Any, Iterable, Optional

@ -1005,6 +1007,131 @@ def _validate_sqlite_header(path: Path) -> None:
    )


+class KanbanDbCorruptError(RuntimeError):
+    """Raised when an existing kanban DB file fails integrity checks.
+
+    Fail-closed guard against silent recreation of a corrupt board file,
+    which would otherwise destroy the user's tasks. Carries both the
+    original path and the timestamped backup we made before refusing.
+    """
+
+    def __init__(self, db_path: Path, backup_path: Optional[Path], reason: str):
+        self.db_path = db_path
+        self.backup_path = backup_path
+        self.reason = reason
+        backup_str = str(backup_path) if backup_path is not None else "<backup failed>"
+        super().__init__(
+            f"Refusing to open corrupt kanban DB at {db_path}: {reason}. "
+            f"Original preserved; backup at {backup_str}."
+        )
+
+
+def _backup_corrupt_db(path: Path) -> Optional[Path]:
+    """Copy a corrupt DB (and its WAL/SHM sidecars) to a timestamped backup.
+
+    Returns the backup path of the main DB file, or ``None`` if the copy
+    itself failed (the caller still raises loudly in that case).
+
+    Writes are confined to the original DB's parent directory. The
+    backup basename is derived purely from ``path.name``, never from
+    caller-supplied directory segments — no traversal is possible.
+    """
+    # Resolve once and pin the parent so subsequent path operations cannot
+    # escape it. ``Path.resolve()`` collapses any ``..`` segments and
+    # symlinks, and we only ever write inside ``parent``.
+    resolved = path.resolve()
+    parent = resolved.parent
+    base_name = resolved.name  # basename only
+    stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    candidate = parent / f"{base_name}.corrupt.{stamp}.bak"
+    # Defensive: candidate must still be inside parent after construction.
+    # f-string interpolation of ``base_name`` cannot escape ``parent``
+    # because ``base_name`` is itself a resolved basename, but assert it
+    # anyway so static analyzers can see the containment guarantee.
+    if candidate.parent != parent:
+        return None
+    counter = 0
+    while candidate.exists():
+        counter += 1
+        candidate = parent / f"{base_name}.corrupt.{stamp}.{counter}.bak"
+        if candidate.parent != parent:
+            return None
+    try:
+        shutil.copy2(resolved, candidate)
+    except OSError:
+        return None
+    for suffix in ("-wal", "-shm"):
+        sidecar = parent / (base_name + suffix)
+        if sidecar.parent != parent or not sidecar.exists():
+            continue
+        try:
+            sidecar_backup = parent / (candidate.name + suffix)
+            if sidecar_backup.parent != parent:
+                continue
+            shutil.copy2(sidecar, sidecar_backup)
+        except OSError:
+            pass
+    return candidate
+
+
+def _guard_existing_db_is_healthy(path: Path) -> None:
+    """Run ``PRAGMA integrity_check`` on an existing non-empty DB file.
+
+    Opens the probe in read/write mode so SQLite can recover or
+    checkpoint a healthy WAL/hot-journal DB before we declare it
+    corrupt. If the file is malformed, copy it (and any WAL/SHM
+    sidecars) to a timestamped backup and raise
+    :class:`KanbanDbCorruptError` so callers cannot silently recreate
+    the schema on top of a damaged DB.
+
+    Transient lock/busy errors (``sqlite3.OperationalError``) are NOT
+    treated as corruption; they propagate raw so the caller sees a
+    normal lock failure and no spurious ``.corrupt`` backup is made.
+
+    No-op for missing files, zero-byte files (treated as fresh), and
+    paths already proven healthy this process (cache hit).
+
+    Path-trust note: ``path`` arrives via :func:`connect`, which itself
+    resolves it from an explicit ``db_path`` argument, the
+    :func:`kanban_db_path` env-var chain, or the kanban-home default —
+    all sources Hermes treats as user-controlled-but-trusted on the
+    user's own machine. We additionally resolve the path here and
+    confine all filesystem writes to its parent directory so any
+    accidental ``..`` segments are collapsed before any I/O happens.
+    """
+    # Resolve before any I/O. ``Path.resolve()`` normalizes ``..`` and
+    # symlinks, giving us a canonical path whose parent dir we can pin.
+    try:
+        resolved = path.resolve()
+    except OSError:
+        return
+    try:
+        if not resolved.exists() or resolved.stat().st_size == 0:
+            return
+    except OSError:
+        return
+    if str(resolved) in _INITIALIZED_PATHS:
+        return
+    reason: Optional[str] = None
+    try:
+        probe = sqlite3.connect(str(resolved), timeout=5, isolation_level=None)
+        try:
+            row = probe.execute("PRAGMA integrity_check").fetchone()
+        finally:
+            probe.close()
+        if not row or (row[0] or "").lower() != "ok":
+            reason = f"integrity_check returned {row[0] if row else '<no row>'!r}"
+    except sqlite3.OperationalError:
+        # Lock contention, busy, transient IO — not corruption. Let it propagate.
+        raise
+    except sqlite3.DatabaseError as exc:
+        reason = f"sqlite refused to open file: {exc}"
+    if reason is None:
+        return
+    backup = _backup_corrupt_db(resolved)
+    raise KanbanDbCorruptError(resolved, backup, reason)
+
+
 def connect(
    db_path: Optional[Path] = None,
    *,
@ -1033,7 +1160,13 @@ def connect(
    else:
        path = kanban_db_path(board=board)
    path.parent.mkdir(parents=True, exist_ok=True)
+    # Cheap byte-level check first — catches the #29507 TLS-overwrite shape
+    # and other invalid-header cases without opening a sqlite connection.
    _validate_sqlite_header(path)
+    # Full integrity probe — catches corruption past the header (malformed
+    # pages, broken internal metadata). Cached per-path after first success
+    # via _INITIALIZED_PATHS so it only runs once per process per path.
+    _guard_existing_db_is_healthy(path)
    resolved = str(path.resolve())
    conn = sqlite3.connect(str(path), isolation_level=None, timeout=30)
    try:
@ -1518,8 +1651,15 @@ def create_task(
    now = int(time.time())

    # Resolve workspace_path from board-level default_workdir when the
-    # caller did not specify one explicitly.
-    if workspace_path is None:
+    # caller did not specify one explicitly. Board defaults represent
+    # persistent project checkouts, so only persistent workspace kinds may
+    # inherit them. Scratch workspaces are auto-deleted on completion and
+    # must stay under the per-board scratch root created by
+    # ``resolve_workspace``; inheriting ``default_workdir`` for a scratch
+    # task would point cleanup at the user's source tree (#28818). The
+    # containment guard in ``_cleanup_workspace`` is the safety rail, but
+    # we also stop the bad state from being created in the first place.
+    if workspace_path is None and workspace_kind in {"dir", "worktree"}:
        board_slug = board if board else get_current_board()
        board_meta = read_board_metadata(board_slug)
        board_default = board_meta.get("default_workdir")
@ -2904,6 +3044,81 @@ def complete_task(
 # Workspace / tmux cleanup
 # ---------------------------------------------------------------------------

+def _is_managed_scratch_path(p: Path) -> bool:
+    """Return True iff *p* is a strict descendant of a kanban-managed scratch root.
+
+    A managed root is exclusively a ``workspaces/`` directory — never the
+    broader kanban home, a board root, or sibling subtrees like ``logs/`` or
+    ``boards/<slug>/`` itself. Allowed roots:
+
+    * ``HERMES_KANBAN_WORKSPACES_ROOT`` when set (worker-side override
+      injected by the dispatcher).
+    * ``<kanban_home>/kanban/workspaces`` — legacy default-board scratch root.
+    * ``<kanban_home>/kanban/boards/<slug>/workspaces`` for each board slug
+      that currently exists on disk.
+
+    The check requires strict descendancy: a path equal to one of these
+    roots is NOT managed (deleting the workspaces root would wipe every
+    task's scratch dir at once), and a path that resolves to ``<kanban_home>
+    /kanban`` itself, ``<kanban_home>/kanban/logs``, or
+    ``<kanban_home>/kanban/boards/<slug>`` is rejected because those
+    subtrees hold Hermes' own DB, metadata, and logs, not task workspaces.
+
+    Used by :func:`_cleanup_workspace` to refuse to ``shutil.rmtree`` paths
+    outside Hermes-managed storage. A board ``default_workdir`` pointing at a
+    real source tree can otherwise pair with ``workspace_kind='scratch'`` and
+    cause task completion to delete user data (#28818).
+    """
+    try:
+        p_abs = p.resolve(strict=False)
+    except OSError:
+        return False
+    roots: list[Path] = []
+    override = os.environ.get("HERMES_KANBAN_WORKSPACES_ROOT", "").strip()
+    if override:
+        try:
+            roots.append(Path(override).expanduser().resolve(strict=False))
+        except OSError:
+            pass
+    try:
+        home = kanban_home()
+    except OSError:
+        home = None
+    if home is not None:
+        try:
+            roots.append((home / "kanban" / "workspaces").resolve(strict=False))
+        except OSError:
+            pass
+        try:
+            boards_parent = (home / "kanban" / "boards").resolve(strict=False)
+        except OSError:
+            boards_parent = None
+        if boards_parent is not None:
+            try:
+                entries = list(boards_parent.iterdir())
+            except OSError:
+                entries = []
+            for entry in entries:
+                try:
+                    if not entry.is_dir():
+                        continue
+                except OSError:
+                    continue
+                try:
+                    roots.append((entry / "workspaces").resolve(strict=False))
+                except OSError:
+                    continue
+    for root in roots:
+        if p_abs == root:
+            continue
+        try:
+            if p_abs.is_relative_to(root):
+                return True
+        except ValueError:
+            continue
+    return False
+
+
 def _cleanup_workspace(conn: sqlite3.Connection, task_id: str) -> None:
    """Remove a task's scratch workspace dir and kill its stale tmux session.

@ -2926,8 +3141,21 @@ def _cleanup_workspace(conn: sqlite3.Connection, task_id: str) -> None:
        import shutil
        wp = Path(path)
        if wp.is_dir():
-            shutil.rmtree(wp, ignore_errors=True)
-            _log.debug("Removed scratch workspace: %s", wp)
+            # Containment guard (#28818): a board's ``default_workdir`` can
+            # pair ``workspace_kind='scratch'`` with a user-supplied path
+            # pointing at a real source tree. Without this check, task
+            # completion would unconditionally ``shutil.rmtree`` that path
+            # and silently delete the user's source data.
+            if _is_managed_scratch_path(wp):
+                shutil.rmtree(wp, ignore_errors=True)
+                _log.debug("Removed scratch workspace: %s", wp)
+            else:
+                _log.warning(
+                    "Refusing to remove out-of-scratch workspace for task %s: %s "
+                    "(workspace_kind='scratch' but path is outside any "
+                    "kanban-managed workspaces root)",
+                    task_id, wp,
+                )
        # Also kill the tmux session for the worker that owned this task,
        # if the tmux session is now dead (worker process exited).
        _cleanup_worker_tmux(conn, task_id)
@ -2961,6 +3189,93 @@ def _cleanup_worker_tmux(conn: sqlite3.Connection, task_id: str) -> None:
        pass  # best-effort — never block completion


+# ---------------------------------------------------------------------------
+# First-use tip for scratch workspaces
+# ---------------------------------------------------------------------------
+#
+# Scratch workspaces are intentionally ephemeral — ``_cleanup_workspace``
+# removes them as soon as ``complete_task`` runs.  New users often don't
+# realize that and lose worker output (community report, May 2026).  The
+# behavior is right; the lack of warning is the bug.
+#
+# On the FIRST scratch workspace materialization across the whole install
+# we:
+#   1. Log a warning line on the dispatcher logger.
+#   2. Append a ``tip_scratch_workspace`` event on the task so it's visible
+#      via ``hermes kanban show <id>`` and the dashboard.
+#   3. Touch a sentinel file under ``kanban_home() / '.scratch_tip_shown'``
+#      so we don't repeat the tip — once you know, you know.
+#
+# Scope is per-install, not per-board: a user creating a second board
+# already learned the lesson on board #1.
+
+_SCRATCH_TIP_SENTINEL_NAME = ".scratch_tip_shown"
+
+_SCRATCH_TIP_MESSAGE = (
+    "scratch workspaces are ephemeral — they're deleted when the task "
+    "completes. Use --workspace worktree: (git worktree) or "
+    "--workspace dir:/abs/path (existing dir) to preserve worker output."
+)
+
+
+def _scratch_tip_sentinel_path() -> Path:
+    """Path to the per-install scratch-workspace-tip sentinel file."""
+    return kanban_home() / _SCRATCH_TIP_SENTINEL_NAME
+
+
+def _scratch_tip_shown() -> bool:
+    """True iff the scratch-workspace tip has already been emitted on this
+    install. Best-effort — any error means we re-emit, which is the safer
+    failure mode for a help message."""
+    try:
+        return _scratch_tip_sentinel_path().exists()
+    except OSError:
+        return False
+
+
+def _mark_scratch_tip_shown() -> None:
+    """Touch the sentinel so future scratch workspaces stay silent.
+
+    Best-effort: a failure here just means the tip might appear once more,
+    which is preferable to crashing dispatch over a help message.
+    """
+    try:
+        path = _scratch_tip_sentinel_path()
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.touch(exist_ok=True)
+    except OSError:
+        pass
+
+
+def _maybe_emit_scratch_tip(
+    conn: sqlite3.Connection,
+    task_id: str,
+    workspace_kind: Optional[str],
+) -> None:
+    """Emit the first-use scratch-workspace tip exactly once per install.
+
+    Called from the dispatcher right after a scratch workspace is
+    materialized. No-op for ``worktree`` / ``dir`` workspaces (they're
+    preserved by design) and no-op after the sentinel exists.
+    """
+    if (workspace_kind or "scratch") != "scratch":
+        return
+    if _scratch_tip_shown():
+        return
+    try:
+        _log.warning("kanban: %s (task %s)", _SCRATCH_TIP_MESSAGE, task_id)
+        with write_txn(conn):
+            _append_event(
+                conn, task_id, "tip_scratch_workspace",
+                {"message": _SCRATCH_TIP_MESSAGE},
+            )
+    except Exception:
+        # Best-effort — never block the spawn loop over a help message.
+        pass
+    finally:
+        _mark_scratch_tip_shown()
+
+
 def edit_completed_task_result(
    conn: sqlite3.Connection,
    task_id: str,
@ -3083,6 +3398,77 @@ def block_task(
        return True


+
+def promote_task(
+    conn: sqlite3.Connection,
+    task_id: str,
+    *,
+    actor: str,
+    reason: Optional[str] = None,
+    force: bool = False,
+    dry_run: bool = False,
+) -> tuple[bool, Optional[str]]:
+    """Manually promote a `todo` or `blocked` task to `ready`.
+
+    Mirrors the automatic promotion done by ``recompute_ready`` but
+    drives it from a deliberate operator action with an audit-trail
+    entry. Refuses to promote if any parent dep is not in a terminal
+    state (`done`/`archived`) unless ``force=True``. Does NOT change
+    assignee or claim state. Returns ``(True, None)`` on success and
+    ``(False, reason)`` if refused. ``dry_run=True`` validates the
+    promotion would succeed without mutating state.
+    """
+    row = conn.execute(
+        "SELECT status FROM tasks WHERE id = ?", (task_id,)
+    ).fetchone()
+    if row is None:
+        return False, f"task {task_id} not found"
+
+    cur_status = row["status"]
+    if cur_status not in ("todo", "blocked"):
+        return False, (
+            f"task {task_id} is {cur_status!r}; promote only applies to "
+            f"'todo' or 'blocked'"
+        )
+
+    if not force:
+        parents = conn.execute(
+            "SELECT t.id, t.status FROM tasks t "
+            "JOIN task_links l ON l.parent_id = t.id "
+            "WHERE l.child_id = ?",
+            (task_id,),
+        ).fetchall()
+        unsatisfied = [
+            p["id"] for p in parents
+            if p["status"] not in ("done", "archived")
+        ]
+        if unsatisfied:
+            return False, (
+                f"unsatisfied parent dependencies: "
+                f"{', '.join(unsatisfied)} (use --force to override)"
+            )
+
+    if dry_run:
+        return True, None
+
+    with write_txn(conn):
+        upd = conn.execute(
+            "UPDATE tasks SET status = 'ready' "
+            "WHERE id = ? AND status IN ('todo', 'blocked')",
+            (task_id,),
+        )
+        if upd.rowcount != 1:
+            return False, f"task {task_id} status changed during promotion"
+        _append_event(
+            conn,
+            task_id,
+            "promoted_manual",
+            {"actor": actor, "reason": reason, "forced": force},
+        )
+
+    return True, None
+
+
 def unblock_task(conn: sqlite3.Connection, task_id: str) -> bool:
    """Transition ``blocked``/``scheduled`` -> ready or todo.

@ -4892,6 +5278,7 @@ def dispatch_once(
            continue
        # Persist the resolved workspace path so the worker can cd there.
        set_workspace_path(conn, claimed.id, str(workspace))
+        _maybe_emit_scratch_tip(conn, claimed.id, claimed.workspace_kind)
        _spawn = spawn_fn if spawn_fn is not None else _default_spawn
        try:
            # Back-compat: older spawn_fn signatures accept only
@ -4970,6 +5357,7 @@ def dispatch_once(
            continue
        # Persist the resolved workspace path so the worker can cd there.
        set_workspace_path(conn, claimed.id, str(workspace))
+        _maybe_emit_scratch_tip(conn, claimed.id, claimed.workspace_kind)
        # Force-load sdlc-review skill for review agents.  The
        # _default_spawn function already auto-loads kanban-worker, and
        # appends task.skills via --skills.  Setting task.skills here
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -61,12 +61,76 @@ try:
 except ModuleNotFoundError:
    pass

+import os
+import sys
+
+
+def _is_termux_startup_environment_fast() -> bool:
+    """Tiny Termux check for pre-import startup shortcuts."""
+    prefix = os.environ.get("PREFIX", "")
+    return bool(
+        os.environ.get("TERMUX_VERSION")
+        or "com.termux/files/usr" in prefix
+        or prefix.startswith("/data/data/com.termux/")
+    )
+
+
+def _is_termux_fast_version_argv(argv: list[str]) -> bool:
+    return argv in (["--version"], ["-V"], ["version"])
+
+
+def _read_openai_version_fast() -> str | None:
+    """Read OpenAI SDK version without importing ``importlib.metadata``."""
+    for base in sys.path:
+        if not base:
+            base = os.getcwd()
+        version_file = os.path.join(base, "openai", "_version.py")
+        try:
+            with open(version_file, encoding="utf-8") as handle:
+                for line in handle:
+                    stripped = line.strip()
+                    if not stripped.startswith("__version__"):
+                        continue
+                    _key, _sep, value = stripped.partition("=")
+                    value = value.split("#", 1)[0].strip().strip("\"'")
+                    return value or None
+        except OSError:
+            continue
+    return None
+
+
+def _print_fast_version_info() -> None:
+    from hermes_cli import __release_date__, __version__
+
+    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
+    print(f"Hermes Agent v{__version__} ({__release_date__})")
+    print(f"Project: {project_root}")
+    print(f"Python: {sys.version.split()[0]}")
+
+    openai_version = _read_openai_version_fast()
+    print(f"OpenAI SDK: {openai_version}" if openai_version else "OpenAI SDK: Not installed")
+
+
+def _try_termux_ultrafast_version() -> bool:
+    """Handle ``hermes --version`` before config/logging imports on Termux."""
+    if os.environ.get("HERMES_TERMUX_DISABLE_FAST_CLI") == "1":
+        return False
+    if not _is_termux_startup_environment_fast():
+        return False
+    if not _is_termux_fast_version_argv(sys.argv[1:]):
+        return False
+
+    _print_fast_version_info()
+    return True
+
+
+if _try_termux_ultrafast_version():
+    raise SystemExit(0)
+
 import argparse
 import json
-import os
 import shutil
 import subprocess
-import sys
 from pathlib import Path
 from typing import Optional

@ -591,7 +655,7 @@ def _session_browse_picker(sessions: list) -> Optional[str]:
                curses.init_pair(1, curses.COLOR_GREEN, -1)  # selected
                curses.init_pair(2, curses.COLOR_YELLOW, -1)  # header
                curses.init_pair(3, curses.COLOR_CYAN, -1)  # search
-                curses.init_pair(4, 8, -1)  # dim
+                curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)  # dim

            cursor = 0
            scroll_offset = 0
@ -1390,7 +1454,7 @@ def _launch_tui(
    provider: Optional[str] = None,
    toolsets: object = None,
    skills: object = None,
-    verbose: bool = False,
+    verbose: Optional[bool] = None,
    quiet: bool = False,
    query: Optional[str] = None,
    image: Optional[str] = None,
@ -1699,7 +1763,7 @@ def cmd_chat(args):
            provider=getattr(args, "provider", None),
            toolsets=getattr(args, "toolsets", None),
            skills=getattr(args, "skills", None),
-            verbose=getattr(args, "verbose", False),
+            verbose=getattr(args, "verbose", None),
            quiet=getattr(args, "quiet", False),
            query=getattr(args, "query", None),
            image=getattr(args, "image", None),
@ -1719,7 +1783,7 @@ def cmd_chat(args):
        "provider": getattr(args, "provider", None),
        "toolsets": args.toolsets,
        "skills": getattr(args, "skills", None),
-        "verbose": args.verbose,
+        "verbose": getattr(args, "verbose", None),
        "quiet": getattr(args, "quiet", False),
        "query": args.query,
        "image": getattr(args, "image", None),
@ -1730,6 +1794,7 @@ def cmd_chat(args):
        "max_turns": getattr(args, "max_turns", None),
        "ignore_rules": getattr(args, "ignore_rules", False),
        "ignore_user_config": getattr(args, "ignore_user_config", False),
+        "compact": getattr(args, "compact", False),
    }
    # Filter out None values
    kwargs = {k: v for k, v in kwargs.items() if v is not None}
@ -2433,10 +2498,34 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
    ("mcp", "MCP", "MCP tool reasoning"),
    ("title_generation", "Title generation", "session titles"),
    ("skills_hub", "Skills hub", "skills search/install"),
+    ("triage_specifier", "Triage specifier", "kanban spec fleshing"),
+    ("kanban_decomposer", "Kanban decomposer", "task decomposition"),
+    ("profile_describer", "Profile describer", "auto profile descriptions"),
    ("curator", "Curator", "skill-usage review pass"),
 ]


+def _all_aux_tasks() -> list[tuple[str, str, str]]:
+    """Return built-in + plugin-registered auxiliary tasks for picker/menu use.
+
+    Built-in tasks come first (preserving order), followed by plugin tasks
+    sorted by key. Used by ``_aux_config_menu``, ``_reset_aux_to_auto``, and
+    display-name lookups so plugin-registered tasks (registered via
+    :meth:`hermes_cli.plugins.PluginContext.register_auxiliary_task`) appear
+    in the same surfaces as built-in ones without core knowing about them.
+    """
+    tasks = list(_AUX_TASKS)
+    try:
+        from hermes_cli.plugins import get_plugin_auxiliary_tasks
+        for entry in get_plugin_auxiliary_tasks():
+            tasks.append((entry["key"], entry["display_name"], entry["description"]))
+    except Exception:
+        # Plugin discovery failure must not break the aux config UI.
+        # Built-in tasks remain available.
+        pass
+    return tasks
+
+
 def _format_aux_current(task_cfg: dict) -> str:
    """Render the current aux config for display in the task menu."""
    if not isinstance(task_cfg, dict):
@ -2487,7 +2576,11 @@ def _save_aux_choice(


 def _reset_aux_to_auto() -> int:
-    """Reset every known aux task back to auto/empty. Returns number reset."""
+    """Reset every known aux task back to auto/empty. Returns number reset.
+
+    Includes plugin-registered tasks (via ``_all_aux_tasks``) so a plugin
+    that contributed an auxiliary task gets reset alongside built-ins.
+    """
    from hermes_cli.config import load_config, save_config

    cfg = load_config()
@ -2496,7 +2589,7 @@ def _reset_aux_to_auto() -> int:
        aux = {}
        cfg["auxiliary"] = aux
    count = 0
-    for task, _name, _desc in _AUX_TASKS:
+    for task, _name, _desc in _all_aux_tasks():
        entry = aux.setdefault(task, {})
        if not isinstance(entry, dict):
            entry = {}
@ -2539,10 +2632,11 @@ def _aux_config_menu() -> None:
        print()

        # Build the task menu with current settings inline
-        name_col = max(len(name) for _, name, _ in _AUX_TASKS) + 2
-        desc_col = max(len(desc) for _, _, desc in _AUX_TASKS) + 4
+        all_tasks = _all_aux_tasks()
+        name_col = max(len(name) for _, name, _ in all_tasks) + 2
+        desc_col = max(len(desc) for _, _, desc in all_tasks) + 4
        entries: list[tuple[str, str]] = []
-        for task_key, name, desc in _AUX_TASKS:
+        for task_key, name, desc in all_tasks:
            task_cfg = (
                aux.get(task_key, {}) if isinstance(aux.get(task_key), dict) else {}
            )
@ -2593,7 +2687,7 @@ def _aux_select_for_task(task: str) -> None:
    current_model = str(task_cfg.get("model") or "").strip()
    current_base_url = str(task_cfg.get("base_url") or "").strip()

-    display_name = next((name for key, name, _ in _AUX_TASKS if key == task), task)
+    display_name = next((name for key, name, _ in _all_aux_tasks() if key == task), task)

    # Gather authenticated providers (has credentials + curated model list)
    try:
@ -2664,7 +2758,7 @@ def _aux_flow_provider_model(
    from hermes_cli.auth import _prompt_model_selection
    from hermes_cli.models import get_pricing_for_provider

-    display_name = next((name for key, name, _ in _AUX_TASKS if key == task), task)
+    display_name = next((name for key, name, _ in _all_aux_tasks() if key == task), task)

    # Fetch live pricing for this provider (non-blocking)
    pricing: dict = {}
@ -2710,7 +2804,7 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
    """Prompt for a direct OpenAI-compatible base_url + optional api_key/model."""
    import getpass

-    display_name = next((name for key, name, _ in _AUX_TASKS if key == task), task)
+    display_name = next((name for key, name, _ in _all_aux_tasks() if key == task), task)
    current_base_url = str(task_cfg.get("base_url") or "").strip()
    current_model = str(task_cfg.get("model") or "").strip()

@ -4662,7 +4756,9 @@ def _model_flow_copilot(config, current_model=""):
        source = creds.get("source", "")
    else:
        if source in {"GITHUB_TOKEN", "GH_TOKEN"}:
-            print(f"  GitHub token: {api_key[:8]}... ✓ ({source})")
+            from hermes_cli.env_loader import format_secret_source_suffix
+            bw_suffix = format_secret_source_suffix(source)
+            print(f"  GitHub token: {api_key[:8]}... ✓ ({source}{bw_suffix})")
        elif source == "gh auth token":
            print("  GitHub token: ✓ (from `gh auth token`)")
        else:
@ -4919,7 +5015,10 @@ def _prompt_api_key(pconfig, existing_key: str, provider_id: str = "") -> tuple:
        return new_key, False

    # Already configured — offer K / R / C ────────────────────────────────
-    print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
+    from hermes_cli.env_loader import format_secret_source_suffix
+
+    source_suffix = format_secret_source_suffix(key_env) if key_env else ""
+    print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓{source_suffix}")
    if not key_env:
        # Nothing we can rewrite; just acknowledge and move on.
        print()
@ -5202,7 +5301,9 @@ def _model_flow_bedrock_api_key(config, region, current_model=""):
    # Prompt for API key
    existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
    if existing_key:
-        print(f"  Bedrock API Key: {existing_key[:12]}... ✓")
+        from hermes_cli.env_loader import format_secret_source_suffix
+        source_suffix = format_secret_source_suffix("AWS_BEARER_TOKEN_BEDROCK")
+        print(f"  Bedrock API Key: {existing_key[:12]}... ✓{source_suffix}")
    else:
        print(f"  Endpoint: {mantle_base_url}")
        print()
@ -5873,7 +5974,22 @@ def _model_flow_anthropic(config, current_model=""):
    if has_creds:
        # Show what we found
        if existing_key:
-            print(f"  Anthropic credentials: {existing_key[:12]}... ✓")
+            from hermes_cli.env_loader import format_secret_source_suffix
+            from hermes_cli.auth import PROVIDER_REGISTRY
+
+            # Surface which env var supplied the key so users with
+            # Bitwarden see "(from Bitwarden)" — without this, a detected
+            # BSM key looks identical to a key in .env and users assume
+            # nothing is wired up.
+            source_suffix = ""
+            for var in PROVIDER_REGISTRY["anthropic"].api_key_env_vars:
+                if os.getenv(var, "").strip() == existing_key:
+                    source_suffix = format_secret_source_suffix(var)
+                    if source_suffix:
+                        break
+            print(
+                f"  Anthropic credentials: {existing_key[:12]}... ✓{source_suffix}"
+            )
        elif cc_available:
            print("  Claude Code credentials: ✓ (auto-detected)")
        print()
@ -6007,6 +6123,13 @@ def cmd_webhook(args):
    webhook_command(args)


+def cmd_portal(args):
+    """Nous Portal status and Tool Gateway routing surface."""
+    from hermes_cli.portal_cli import portal_command
+
+    return portal_command(args)
+
+
 def cmd_slack(args):
    """Slack integration helpers.

@ -6059,6 +6182,19 @@ def cmd_doctor(args):
    run_doctor(args)


+def cmd_security(args):
+    """Dispatch `hermes security <subcmd>`."""
+    sub = getattr(args, "security_command", None)
+    if sub in ("audit", None):
+        from hermes_cli.security_audit import cmd_security_audit
+
+        # Default subcommand is `audit` when no subcmd is given.
+        code = cmd_security_audit(args)
+        sys.exit(int(code or 0))
+    print(f"unknown security subcommand: {sub}", file=sys.stderr)
+    sys.exit(2)
+
+
 def cmd_dump(args):
    """Dump setup summary for support/debugging."""
    from hermes_cli.dump import run_dump
@ -6835,8 +6971,8 @@ def _update_via_zip(args):
    )

    print("→ Downloading latest version...")
+    tmp_dir = tempfile.mkdtemp(prefix="hermes-update-")
    try:
-        tmp_dir = tempfile.mkdtemp(prefix="hermes-update-")
        zip_path = os.path.join(tmp_dir, f"hermes-agent-{branch}.zip")
        urlretrieve(zip_url, zip_path)

@ -6883,12 +7019,11 @@ def _update_via_zip(args):

        print(f"✓ Updated {update_count} items from ZIP")

-        # Cleanup
-        shutil.rmtree(tmp_dir, ignore_errors=True)
-
    except Exception as e:
        print(f"✗ ZIP update failed: {e}")
        sys.exit(1)
+    finally:
+        shutil.rmtree(tmp_dir, ignore_errors=True)

    # Clear stale bytecode after ZIP extraction
    removed = _clear_bytecode_cache(PROJECT_ROOT)
@ -9720,6 +9855,7 @@ def _coalesce_session_name_args(argv: list) -> list:
        "honcho",
        "claw",
        "plugins",
+        "security",
        "acp",
        "webhook",
        "memory",
@ -10557,10 +10693,10 @@ _BUILTIN_SUBCOMMANDS = frozenset(
        "config", "cron", "curator", "dashboard", "debug", "doctor",
        "dump", "fallback", "gateway", "hooks", "import", "insights",
        "kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
-        "model", "pairing", "plugins", "postinstall", "profile", "proxy",
+        "model", "pairing", "plugins", "portal", "postinstall", "profile", "proxy",
        "send", "sessions", "setup",
        "skills", "slack", "status", "tools", "uninstall", "update",
-        "version", "webhook", "whatsapp", "chat", "secrets",
+        "version", "webhook", "whatsapp", "chat", "secrets", "security",
        # Help-ish invocations — plugin commands not being listed in
        # top-level --help is an acceptable trade-off for skipping an
        # expensive eager import of every bundled plugin module.
@ -10717,10 +10853,6 @@ def _set_chat_arg_defaults(args) -> None:
            setattr(args, attr, default)


-def _is_termux_fast_version_argv(argv: list[str]) -> bool:
-    return argv in (["--version"], ["-V"], ["version"])
-
-
 def _try_termux_fast_cli_launch() -> bool:
    """Run obvious Termux non-TUI chat/oneshot/version paths on a light parser."""
    if not _is_termux_startup_environment():
@ -10774,7 +10906,17 @@ def _try_termux_fast_cli_launch() -> bool:

    if args.command in {None, "chat"}:
        _set_chat_arg_defaults(args)
-        _prepare_agent_startup(args)
+        interactive_prompt = not getattr(args, "query", None) and not getattr(args, "image", None)
+        if interactive_prompt:
+            # Bare Termux CLI should reach the prompt first and do agent-only
+            # discovery on the first submitted turn instead of before input.
+            setattr(args, "compact", True)
+            os.environ["HERMES_DEFER_AGENT_STARTUP"] = "1"
+            os.environ["HERMES_FAST_STARTUP_BANNER"] = "1"
+            if getattr(args, "accept_hooks", False):
+                os.environ["HERMES_ACCEPT_HOOKS"] = "1"
+        else:
+            _prepare_agent_startup(args)
        cmd_chat(args)
        return True

@ -11288,6 +11430,13 @@ def main():
        help="On existing installs: only prompt for items that are missing "
        "or unset, instead of running the full reconfigure wizard.",
    )
+    setup_parser.add_argument(
+        "--portal",
+        action="store_true",
+        help="One-shot Nous Portal setup: log in via OAuth, set Nous as the "
+        "inference provider, and opt into the Tool Gateway. Skips the "
+        "rest of the wizard.",
+    )
    setup_parser.set_defaults(func=cmd_setup)

    # =========================================================================
@ -11763,6 +11912,12 @@ def main():

    webhook_parser.set_defaults(func=cmd_webhook)

+    # =========================================================================
+    # portal command — Nous Portal status + Tool Gateway routing
+    # =========================================================================
+    from hermes_cli.portal_cli import add_parser as _add_portal_parser
+    _add_portal_parser(subparsers)
+
    # =========================================================================
    # kanban command — multi-profile collaboration board
    # =========================================================================
@ -11861,6 +12016,58 @@ def main():
    )
    doctor_parser.set_defaults(func=cmd_doctor)

+    # =========================================================================
+    # security command — on-demand supply-chain audit
+    # =========================================================================
+    security_parser = subparsers.add_parser(
+        "security",
+        help="Supply-chain audit (OSV.dev) for venv, plugins, and MCP servers",
+        description=(
+            "On-demand vulnerability scan against OSV.dev. Covers the Hermes "
+            "venv (installed PyPI dists), Python deps declared by plugins under "
+            "~/.hermes/plugins/, and pinned npx/uvx MCP servers in config.yaml. "
+            "Does NOT scan globally-installed packages or editor/browser extensions."
+        ),
+    )
+    security_subparsers = security_parser.add_subparsers(
+        dest="security_command",
+        metavar="<subcommand>",
+    )
+
+    audit_parser = security_subparsers.add_parser(
+        "audit",
+        help="Run a one-shot supply-chain audit",
+        description="Query OSV.dev for known vulnerabilities in installed components.",
+    )
+    audit_parser.add_argument(
+        "--json",
+        action="store_true",
+        help="Emit machine-readable JSON instead of human-readable text",
+    )
+    audit_parser.add_argument(
+        "--fail-on",
+        default="critical",
+        choices=["low", "moderate", "high", "critical"],
+        help="Exit non-zero when any finding meets this severity (default: critical)",
+    )
+    audit_parser.add_argument(
+        "--skip-venv",
+        action="store_true",
+        help="Skip scanning the Hermes Python venv",
+    )
+    audit_parser.add_argument(
+        "--skip-plugins",
+        action="store_true",
+        help="Skip scanning plugin requirements files",
+    )
+    audit_parser.add_argument(
+        "--skip-mcp",
+        action="store_true",
+        help="Skip scanning pinned MCP servers in config.yaml",
+    )
+    audit_parser.set_defaults(func=cmd_security)
+    security_parser.set_defaults(func=cmd_security)
+
    # =========================================================================
    # dump command
    # =========================================================================
@ -12186,6 +12393,11 @@ Examples:
    skills_audit.add_argument(
        "name", nargs="?", help="Specific skill to audit (default: all)"
    )
+    skills_audit.add_argument(
+        "--deep",
+        action="store_true",
+        help="Run AST-level analysis on Python files (opt-in diagnostic)",
+    )

    skills_uninstall = skills_subparsers.add_parser(
        "uninstall", help="Remove a hub-installed skill"
@ -13665,7 +13877,7 @@ Examples:
            ("model", None),
            ("provider", None),
            ("toolsets", None),
-            ("verbose", False),
+            ("verbose", None),
            ("worktree", False),
        ]:
            if not hasattr(args, attr):
@ -13680,7 +13892,7 @@ Examples:
            ("model", None),
            ("provider", None),
            ("toolsets", None),
-            ("verbose", False),
+            ("verbose", None),
            ("resume", None),
            ("continue_last", None),
            ("worktree", False),
--- a/hermes_cli/oneshot.py
+++ b/hermes_cli/oneshot.py
@ -17,7 +17,6 @@ Model / provider selection mirrors `hermes chat`:

 Env var fallbacks (used when the corresponding arg is not passed):
    - HERMES_INFERENCE_MODEL
-    - HERMES_INFERENCE_PROVIDER  (already read by resolve_runtime_provider)
 """

 from __future__ import annotations
@ -28,6 +27,8 @@ import sys
 from contextlib import redirect_stderr, redirect_stdout
 from typing import Optional

+from hermes_cli.fallback_config import get_fallback_chain
+

 def _normalize_toolsets(toolsets: object = None) -> list[str] | None:
    if not toolsets:
@ -133,9 +134,8 @@ def run_oneshot(
        prompt: The user message to send.
        model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
            env var, then config.yaml's model.default / model.model.
-        provider: Optional provider override. Falls back to
-            HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
-            then "auto".
+        provider: Optional provider override. Falls back to config.yaml's
+            model.provider, then "auto".
        toolsets: Optional comma-separated string or iterable of toolsets.

    Returns the exit code.  Caller should sys.exit() with the return.
@ -301,14 +301,9 @@ def _run_agent(
        toolsets_list = sorted(_get_platform_tools(cfg, "cli"))

    session_db = _create_session_db_for_oneshot()
-    # Read fallback chain from profile config — supports both the new list
-    # format (fallback_providers) and the legacy single-dict (fallback_model).
-    # Mirrors the same normalization in cli.py so oneshot workers (e.g. kanban
-    # workers spawned via `hermes -p <profile> chat -q ...`) honour the
-    # profile's fallback chain just like interactive sessions do.
-    _fb = cfg.get("fallback_providers") or cfg.get("fallback_model") or []
-    if isinstance(_fb, dict):
-        _fb = [_fb] if _fb.get("provider") and _fb.get("model") else []
+    # Read the effective fallback chain from profile config so oneshot workers
+    # honour the same merge semantics as interactive CLI and gateway sessions.
+    _fb = get_fallback_chain(cfg)

    agent = AIAgent(
        api_key=runtime.get("api_key"),
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@ -698,6 +698,119 @@ class PluginContext:

    # -- hook registration --------------------------------------------------

+    # -- auxiliary task registration ---------------------------------------
+
+    def register_auxiliary_task(
+        self,
+        key: str,
+        *,
+        display_name: str,
+        description: str,
+        defaults: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """Register a plugin-defined auxiliary LLM task.
+
+        Auxiliary tasks are LLM-backed side jobs (vision analysis, web extraction,
+        compression, smart-approval, etc.) that route through ``auxiliary_client.py``.
+        Each task has its own ``auxiliary.<key>`` config block where users can
+        pin a provider/model independent of the main chat model.
+
+        Plugins use this to declare their own auxiliary tasks without touching
+        core files. After registration, the task:
+
+          - Appears in the ``hermes model → Configure auxiliary models`` picker
+          - Has its provider/model/base_url/api_key bridged from config.yaml to
+            ``AUXILIARY_<KEY_UPPER>_*`` env vars at gateway startup
+          - Gets default routing fields (provider="auto", model="", etc.) merged
+            into loaded configs so ``cfg.get("auxiliary", {}).get(key)`` works
+
+        Args:
+            key: stable task key (snake_case). Used in config ``auxiliary.<key>``
+                and env vars ``AUXILIARY_<KEY_UPPER>_*``. Must not shadow a
+                built-in task key (vision, compression, web_extract, approval,
+                mcp, title_generation, skills_hub, curator).
+            display_name: human-readable name shown in the picker.
+            description: short one-line description shown next to the name.
+            defaults: optional dict of default routing fields. Recognized keys:
+                ``provider`` (default "auto"), ``model`` (default ""),
+                ``base_url`` (default ""), ``api_key`` (default ""),
+                ``timeout`` (default 60), ``extra_body`` (default {}),
+                plus any task-specific extras (e.g. ``download_timeout``).
+                Unknown keys are preserved verbatim — the plugin owns the
+                schema for its own task.
+
+        Raises:
+            ValueError: if *key* is empty, contains invalid characters, or
+                shadows a built-in auxiliary task key.
+
+        Example:
+            ctx.register_auxiliary_task(
+                key="memory_retain_filter",
+                display_name="Memory retain filter",
+                description="hindsight pre-retain dedup/extract",
+                defaults={"provider": "auto", "timeout": 30},
+            )
+        """
+        # Validate key shape
+        if not key or not isinstance(key, str):
+            raise ValueError(
+                f"Plugin '{self.manifest.name}' tried to register auxiliary task "
+                f"with invalid key {key!r}"
+            )
+        if not all(c.isalnum() or c == "_" for c in key):
+            raise ValueError(
+                f"Plugin '{self.manifest.name}' auxiliary task key {key!r} "
+                f"must contain only alphanumeric characters and underscores"
+            )
+
+        # Lazy import to avoid circular: hermes_cli.main imports plugins indirectly
+        from hermes_cli.main import _AUX_TASKS as _BUILTIN_AUX_TASKS
+
+        builtin_keys = {k for k, _name, _desc in _BUILTIN_AUX_TASKS}
+        if key in builtin_keys:
+            raise ValueError(
+                f"Plugin '{self.manifest.name}' cannot register auxiliary task "
+                f"{key!r} — that key is reserved for a built-in task. "
+                f"Pick a plugin-namespaced key (e.g. '{self.manifest.name}_{key}')."
+            )
+
+        # Reject duplicate registrations across plugins
+        existing = self._manager._aux_tasks.get(key)
+        if existing is not None and existing.get("plugin") != self.manifest.name:
+            raise ValueError(
+                f"Plugin '{self.manifest.name}' cannot register auxiliary task "
+                f"{key!r} — already registered by plugin "
+                f"'{existing.get('plugin')}'"
+            )
+
+        # Normalize defaults — plugin owns the schema, but we ensure routing
+        # fields exist with sensible types so consumers don't crash.
+        merged_defaults: Dict[str, Any] = {
+            "provider": "auto",
+            "model": "",
+            "base_url": "",
+            "api_key": "",
+            "timeout": 60,
+            "extra_body": {},
+        }
+        if defaults:
+            for k, v in defaults.items():
+                merged_defaults[k] = v
+
+        self._manager._aux_tasks[key] = {
+            "key": key,
+            "display_name": display_name,
+            "description": description,
+            "defaults": merged_defaults,
+            "plugin": self.manifest.name,
+        }
+        logger.debug(
+            "Plugin %s registered auxiliary task: %s (%s)",
+            self.manifest.name,
+            key,
+            display_name,
+        )
+
    def register_hook(self, hook_name: str, callback: Callable) -> None:
        """Register a lifecycle hook callback.

@ -782,6 +895,9 @@ class PluginManager:
        self._cli_ref = None  # Set by CLI after plugin discovery
        # Plugin skill registry: qualified name → metadata dict.
        self._plugin_skills: Dict[str, Dict[str, Any]] = {}
+        # Plugin-registered auxiliary tasks: key → {key, display_name,
+        # description, defaults, plugin}. See PluginContext.register_auxiliary_task.
+        self._aux_tasks: Dict[str, Dict[str, Any]] = {}

    # -----------------------------------------------------------------------
    # Public
@ -803,6 +919,7 @@ class PluginManager:
            self._cli_commands.clear()
            self._plugin_commands.clear()
            self._plugin_skills.clear()
+            self._aux_tasks.clear()
            self._context_engine = None
        self._discovered = True

@ -1548,6 +1665,21 @@ def get_plugin_commands() -> Dict[str, dict]:
    return _ensure_plugins_discovered()._plugin_commands


+def get_plugin_auxiliary_tasks() -> List[Dict[str, Any]]:
+    """Return all plugin-registered auxiliary tasks as a stable-ordered list.
+
+    Each entry is the registration dict from
+    :meth:`PluginContext.register_auxiliary_task`:
+    ``{key, display_name, description, defaults, plugin}``.
+
+    Triggers idempotent plugin discovery so callers can read the registry
+    before any explicit ``discover_plugins()`` call. Sorted by ``key`` for
+    deterministic ordering in pickers and tests.
+    """
+    manager = _ensure_plugins_discovered()
+    return [manager._aux_tasks[k] for k in sorted(manager._aux_tasks)]
+
+
 def get_plugin_toolsets() -> List[tuple]:
    """Return plugin toolsets as ``(key, label, description)`` tuples.

--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@ -76,22 +76,42 @@ def _plugins_dir() -> Path:
    return plugins


-def _sanitize_plugin_name(name: str, plugins_dir: Path) -> Path:
+def _sanitize_plugin_name(
+    name: str,
+    plugins_dir: Path,
+    *,
+    allow_subdir: bool = False,
+) -> Path:
    """Validate a plugin name and return the safe target path inside *plugins_dir*.

    Raises ``ValueError`` if the name contains path-traversal sequences or would
    resolve outside the plugins directory.
+
+    ``allow_subdir=True`` permits a single forward slash inside *name* so
+    category-namespaced plugin keys like ``observability/langfuse`` or
+    ``image_gen/openai`` (the registry keys emitted by ``_discover_all_plugins``)
+    can be looked up. ``..`` and backslash are still rejected, leading and
+    trailing slashes are stripped, and the resolved target must still live
+    inside *plugins_dir*. Install paths leave this at the default ``False``
+    because a freshly-cloned plugin always lands top-level under
+    ``~/.hermes/plugins/<name>/``.
    """
    if not name:
        raise ValueError("Plugin name must not be empty.")

+    if allow_subdir:
+        name = name.strip("/")
+        if not name:
+            raise ValueError("Plugin name must not be empty.")
+
    if name in {".", ".."}:
        raise ValueError(
            f"Invalid plugin name '{name}': must not reference the plugins directory itself."
        )

    # Reject obvious traversal characters
-    for bad in ("/", "\\", ".."):
+    bad_chars = ("\\", "..") if allow_subdir else ("/", "\\", "..")
+    for bad in bad_chars:
        if bad in name:
            raise ValueError(f"Invalid plugin name '{name}': must not contain '{bad}'.")

@ -326,7 +346,7 @@ def _display_removed(name: str, plugins_dir: Path) -> None:

 def _require_installed_plugin(name: str, plugins_dir: Path, console) -> Path:
    """Return the plugin path if it exists, or exit with an error listing installed plugins."""
-    target = _sanitize_plugin_name(name, plugins_dir)
+    target = _sanitize_plugin_name(name, plugins_dir, allow_subdir=True)
    if not target.exists():
        installed = ", ".join(d.name for d in plugins_dir.iterdir() if d.is_dir()) or "(none)"
        console.print(
@ -1051,7 +1071,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
            curses.init_pair(1, curses.COLOR_GREEN, -1)
            curses.init_pair(2, curses.COLOR_YELLOW, -1)
            curses.init_pair(3, curses.COLOR_CYAN, -1)
-            curses.init_pair(4, 8, -1)  # dim gray
+            curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)  # dim gray
        cursor = 0
        scroll_offset = 0

@ -1196,7 +1216,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
                            curses.init_pair(1, curses.COLOR_GREEN, -1)
                            curses.init_pair(2, curses.COLOR_YELLOW, -1)
                            curses.init_pair(3, curses.COLOR_CYAN, -1)
-                            curses.init_pair(4, 8, -1)
+                            curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
                        curses.curs_set(0)
            elif key in {curses.KEY_ENTER, 10, 13}:
                if cursor < n_plugins:
@ -1228,7 +1248,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
                            curses.init_pair(1, curses.COLOR_GREEN, -1)
                            curses.init_pair(2, curses.COLOR_YELLOW, -1)
                            curses.init_pair(3, curses.COLOR_CYAN, -1)
-                            curses.init_pair(4, 8, -1)
+                            curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
                        curses.curs_set(0)
            elif key in {27, ord("q")}:
                # Save plugin changes on exit
@ -1508,7 +1528,7 @@ def _user_installed_plugin_dir(name: str) -> Optional[Path]:
    """Resolved path under ``~/.hermes/plugins/<name>`` if it exists."""
    plugins_dir = _plugins_dir()
    try:
-        target = _sanitize_plugin_name(name, plugins_dir)
+        target = _sanitize_plugin_name(name, plugins_dir, allow_subdir=True)
    except ValueError:
        return None
    return target if target.is_dir() else None
--- a/hermes_cli/portal_cli.py
+++ b/hermes_cli/portal_cli.py
@ -0,0 +1,219 @@
+"""``hermes portal`` — small CLI surface for Nous Portal users.
+
+Subcommands:
+  status   Show Portal auth state + which Tool Gateway tools are routed.
+  open     Open the Portal subscription page in the user's default browser.
+  tools    List Tool Gateway tools and which are active in the current config.
+
+This command is intentionally minimal — it does not duplicate functionality
+already in ``hermes auth`` or ``hermes tools``. It's a discovery + status
+surface for the Portal subscription itself.
+"""
+from __future__ import annotations
+
+import sys
+import webbrowser
+from typing import Optional
+
+from hermes_cli.colors import Colors, color
+from hermes_cli.config import load_config
+
+DEFAULT_PORTAL_URL = "https://portal.nousresearch.com"
+SUBSCRIPTION_URL = "https://portal.nousresearch.com/manage-subscription"
+DOCS_URL = "https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway"
+
+
+def _nous_portal_base_url() -> str:
+    """Resolve the Portal base URL from auth state or default."""
+    try:
+        from hermes_cli.auth import get_nous_auth_status
+        status = get_nous_auth_status() or {}
+        url = status.get("portal_base_url")
+        if isinstance(url, str) and url.strip():
+            return url.rstrip("/")
+    except Exception:
+        pass
+    return DEFAULT_PORTAL_URL
+
+
+def _cmd_status(args) -> int:
+    """Show Portal auth + Tool Gateway routing summary."""
+    from hermes_cli.auth import get_nous_auth_status
+    from hermes_cli.nous_subscription import get_nous_subscription_features
+
+    config = load_config() or {}
+
+    try:
+        auth = get_nous_auth_status() or {}
+    except Exception:
+        auth = {}
+
+    logged_in = bool(auth.get("logged_in"))
+
+    print()
+    print(color("  Nous Portal", Colors.MAGENTA))
+    print(color("  ───────────", Colors.MAGENTA))
+    if logged_in:
+        portal = auth.get("portal_base_url") or DEFAULT_PORTAL_URL
+        print(f"  Auth:    {color('✓ logged in', Colors.GREEN)}")
+        print(f"  Portal:  {portal}")
+        inference = auth.get("inference_base_url")
+        if inference:
+            print(f"  API:     {inference}")
+    else:
+        print(f"  Auth:    {color('not logged in', Colors.YELLOW)}")
+        print(f"  Sign up: {SUBSCRIPTION_URL}")
+        print(f"  Login:   hermes auth add nous --type oauth")
+
+    # Provider selection (independent of auth)
+    model_cfg = config.get("model") if isinstance(config.get("model"), dict) else {}
+    provider = str(model_cfg.get("provider") or "").strip().lower()
+    if provider == "nous":
+        print(f"  Model:   {color('✓ using Nous as inference provider', Colors.GREEN)}")
+    elif provider:
+        print(f"  Model:   currently {provider} (switch with `hermes model`)")
+
+    # Tool Gateway routing
+    print()
+    print(color("  Tool Gateway", Colors.MAGENTA))
+    print(color("  ────────────", Colors.MAGENTA))
+    try:
+        features = get_nous_subscription_features(config)
+    except Exception:
+        features = None
+
+    if features is None:
+        print("  (could not resolve subscription state)")
+        return 0
+
+    rows = []
+    for feat in features.items():
+        if feat.managed_by_nous:
+            state = color("via Nous Portal", Colors.GREEN)
+        elif feat.active and feat.current_provider:
+            state = feat.current_provider
+        elif feat.active:
+            state = "active"
+        else:
+            state = color("not configured", Colors.DIM)
+        rows.append((feat.label, state))
+
+    width = max((len(r[0]) for r in rows), default=0)
+    for label, state in rows:
+        print(f"  {label:<{width}}   {state}")
+
+    if not logged_in:
+        print()
+        print(color(f"  Docs: {DOCS_URL}", Colors.DIM))
+    return 0
+
+
+def _cmd_open(args) -> int:
+    """Open the Portal subscription page in the default browser."""
+    target = SUBSCRIPTION_URL
+    print(f"Opening {target}")
+    try:
+        opened = webbrowser.open(target)
+    except Exception:
+        opened = False
+    if not opened:
+        print()
+        print("Could not launch a browser. Visit the URL above manually.")
+        return 1
+    return 0
+
+
+def _cmd_tools(args) -> int:
+    """List the Tool Gateway catalog + current routing."""
+    from hermes_cli.nous_subscription import get_nous_subscription_features
+
+    config = load_config() or {}
+    try:
+        features = get_nous_subscription_features(config)
+    except Exception:
+        print("Could not resolve Tool Gateway state.", file=sys.stderr)
+        return 1
+
+    # Static catalog — the partners Tool Gateway routes to today.
+    catalog = [
+        ("web",       "Web search & extract",  "Firecrawl"),
+        ("image_gen", "Image generation",      "FAL"),
+        ("tts",       "Text-to-speech",        "OpenAI TTS"),
+        ("browser",   "Browser automation",    "Browser Use"),
+        ("modal",     "Cloud terminal",        "Modal"),
+    ]
+
+    print()
+    print(color("  Tool Gateway catalog", Colors.MAGENTA))
+    print(color("  ────────────────────", Colors.MAGENTA))
+
+    if not features.nous_auth_present:
+        print(color("  Not logged into Nous Portal — sign in with `hermes auth add nous --type oauth`.", Colors.YELLOW))
+        print()
+
+    label_width = max(len(label) for _, label, _ in catalog)
+    for key, label, partner in catalog:
+        feat = features.features.get(key)
+        if feat is None:
+            state = color("unknown", Colors.DIM)
+        elif feat.managed_by_nous:
+            state = color("✓ via Nous Portal", Colors.GREEN)
+        elif feat.active and feat.current_provider:
+            state = feat.current_provider
+        elif feat.active:
+            state = "active"
+        else:
+            state = color("not configured", Colors.DIM)
+        print(f"  {label:<{label_width}}  partner: {partner:<14} {state}")
+
+    print()
+    print(color(f"  Manage your subscription: {SUBSCRIPTION_URL}", Colors.DIM))
+    print(color(f"  Docs: {DOCS_URL}", Colors.DIM))
+    return 0
+
+
+def portal_command(args) -> int:
+    """Top-level dispatch for `hermes portal <subcommand>`."""
+    sub = getattr(args, "portal_command", None)
+    if sub in {None, ""}:
+        # Default to status — matches gh / kubectl conventions where the
+        # subcommand-less form gives a useful overview.
+        return _cmd_status(args)
+    if sub == "status":
+        return _cmd_status(args)
+    if sub == "open":
+        return _cmd_open(args)
+    if sub == "tools":
+        return _cmd_tools(args)
+    print(f"Unknown portal subcommand: {sub}", file=sys.stderr)
+    print("Run `hermes portal -h` for usage.", file=sys.stderr)
+    return 1
+
+
+def add_parser(subparsers) -> None:
+    """Register `hermes portal` on the given argparse subparsers object."""
+    portal_parser = subparsers.add_parser(
+        "portal",
+        help="Nous Portal status, subscription, and Tool Gateway routing",
+        description=(
+            "Inspect Nous Portal auth, Tool Gateway routing, and open the "
+            "Portal subscription page. Subcommands: status (default), "
+            "open, tools."
+        ),
+    )
+    portal_sub = portal_parser.add_subparsers(dest="portal_command")
+
+    portal_sub.add_parser(
+        "status",
+        help="Show Portal auth + Tool Gateway routing summary (default)",
+    )
+    portal_sub.add_parser(
+        "open",
+        help="Open the Portal subscription page in your default browser",
+    )
+    portal_sub.add_parser(
+        "tools",
+        help="List Tool Gateway tools and which are routed via Nous",
+    )
+
+    portal_parser.set_defaults(func=portal_command)
--- a/hermes_cli/proxy/adapters/nous_portal.py
+++ b/hermes_cli/proxy/adapters/nous_portal.py
@ -27,6 +27,7 @@ from hermes_cli.auth import (
    _quarantine_nous_oauth_state,
    _quarantine_nous_pool_entries,
    _save_auth_store,
+    _validate_nous_inference_url_from_network,
    _write_shared_nous_state,
    resolve_nous_runtime_credentials,
 )
@ -137,7 +138,10 @@ class NousPortalAdapter(UpstreamAdapter):
                    "Try `hermes login nous` to re-authenticate."
                )

-            base_url = refreshed.get("base_url") or DEFAULT_NOUS_INFERENCE_URL
+            base_url = (
+                _validate_nous_inference_url_from_network(refreshed.get("base_url"))
+                or DEFAULT_NOUS_INFERENCE_URL
+            )
            base_url = base_url.rstrip("/")

            return UpstreamCredential(
--- a/hermes_cli/secrets_cli.py
+++ b/hermes_cli/secrets_cli.py
@ -57,6 +57,15 @@ def register_cli(parent_parser: argparse.ArgumentParser) -> None:
        "--access-token",
        help="Provide the access token non-interactively (will be stored in .env)",
    )
+    setup.add_argument(
+        "--server-url",
+        help=(
+            "Bitwarden region / self-hosted endpoint. Examples: "
+            "https://vault.bitwarden.com (US, default), "
+            "https://vault.bitwarden.eu (EU), or your self-hosted URL. "
+            "Skips the interactive region prompt."
+        ),
+    )
    setup.set_defaults(func=cmd_setup)

    status = sub.add_parser("status", help="Show config + binary + last fetch")
@ -145,14 +154,28 @@ def cmd_setup(args: argparse.Namespace) -> int:
    os.environ[token_env] = token  # so the test fetch below sees it
    console.print(f"  [green]✓[/green] stored in {get_env_path()} as {token_env}")

+    # ------------------------------------------------------------------ region
+    console.print()
+    console.print("[bold]Step 3[/bold]  Pick a Bitwarden region")
+    server_url = _resolve_server_url(args, secrets_cfg, console)
+    if server_url is None:
+        return 1
+    if server_url:
+        console.print(f"  [green]✓[/green] using {server_url}")
+    else:
+        console.print(
+            "  [green]✓[/green] using bws default "
+            "(US Cloud, https://vault.bitwarden.com)"
+        )
+
    # ------------------------------------------------------------------- project
    if args.project_id and args.project_id.strip():
        project_id = args.project_id.strip()
    else:
        console.print()
-        console.print("[bold]Step 3[/bold]  Pick a project")
+        console.print("[bold]Step 4[/bold]  Pick a project")
        project_id = ""
-        projects = _list_projects(binary, token, console)
+        projects = _list_projects(binary, token, console, server_url=server_url)
        if projects is None:
            return 1
        if not projects:
@ -187,7 +210,7 @@ def cmd_setup(args: argparse.Namespace) -> int:

    # ------------------------------------------------------------------- test
    console.print()
-    step_num = 4 if not (args.project_id and args.project_id.strip()) else 3
+    step_num = 5 if not (args.project_id and args.project_id.strip()) else 4
    console.print(f"[bold]Step {step_num}[/bold]  Test fetch")
    try:
        secrets, warnings = bw.fetch_bitwarden_secrets(
@ -195,6 +218,7 @@ def cmd_setup(args: argparse.Namespace) -> int:
            project_id=project_id,
            binary=binary,
            use_cache=False,
+            server_url=server_url,
        )
    except Exception as exc:  # noqa: BLE001
        console.print(f"  [red]✗ Fetch failed: {exc}[/red]")
@ -221,6 +245,7 @@ def cmd_setup(args: argparse.Namespace) -> int:
    # ------------------------------------------------------------------- save
    secrets_cfg["enabled"] = True
    secrets_cfg["project_id"] = project_id
+    secrets_cfg["server_url"] = server_url
    secrets_cfg.setdefault("access_token_env", token_env)
    secrets_cfg.setdefault("cache_ttl_seconds", 300)
    secrets_cfg.setdefault("override_existing", True)
@ -248,6 +273,7 @@ def cmd_status(args: argparse.Namespace) -> int:
    enabled = bool(bw_cfg.get("enabled"))
    token_env = bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
    project_id = bw_cfg.get("project_id", "")
+    server_url = str(bw_cfg.get("server_url", "") or "").strip()
    token_set = bool(os.environ.get(token_env))

    table = Table(show_header=False, box=None, padding=(0, 2))
@ -257,6 +283,10 @@ def cmd_status(args: argparse.Namespace) -> int:
    table.add_row("Token env var",   token_env)
    table.add_row("Token in env",    _yn(token_set))
    table.add_row("Project ID",      project_id or "[dim](unset)[/dim]")
+    table.add_row(
+        "Server URL",
+        server_url or "[dim]default (US Cloud, https://vault.bitwarden.com)[/dim]",
+    )
    table.add_row("Override existing", _yn(bool(bw_cfg.get("override_existing", False))))
    table.add_row("Cache TTL (s)",   str(bw_cfg.get("cache_ttl_seconds", 300)))
    table.add_row("Auto-install",    _yn(bool(bw_cfg.get("auto_install", True))))
@ -306,11 +336,14 @@ def cmd_sync(args: argparse.Namespace) -> int:
        console.print("[red]No project_id configured.[/red]")
        return 1

+    server_url = str(bw_cfg.get("server_url", "") or "").strip()
+
    try:
        secrets, warnings = bw.fetch_bitwarden_secrets(
            access_token=token,
            project_id=project_id,
            use_cache=False,
+            server_url=server_url,
        )
    except Exception as exc:  # noqa: BLE001
        console.print(f"[red]Fetch failed: {exc}[/red]")
@ -407,12 +440,14 @@ def _bws_version(binary: Path) -> str:


 def _list_projects(
-    binary: Path, token: str, console: Console
+    binary: Path, token: str, console: Console, *, server_url: str = ""
 ) -> Optional[List[dict]]:
    """Call ``bws project list`` and return the parsed list, or None on failure."""
    env = os.environ.copy()
    env["BWS_ACCESS_TOKEN"] = token
    env.setdefault("NO_COLOR", "1")
+    if server_url:
+        env["BWS_SERVER_URL"] = server_url
    try:
        res = subprocess.run(
            [str(binary), "project", "list", "--output", "json"],
@ -428,7 +463,16 @@ def _list_projects(
    if res.returncode != 0:
        err = (res.stderr or res.stdout).strip()[:300]
        console.print(f"  [red]bws project list failed: {err}[/red]")
-        if "authorization" in err.lower() or "invalid" in err.lower():
+        lowered = err.lower()
+        if "invalid_client" in lowered or "400 bad request" in lowered:
+            console.print(
+                "  [yellow]'invalid_client' from the US identity endpoint usually "
+                "means the token is for a different Bitwarden region.  Re-run "
+                "[cyan]hermes secrets bitwarden setup[/cyan] and pick EU or "
+                "self-hosted at the region prompt, or set [cyan]secrets.bitwarden."
+                "server_url[/cyan] in config.yaml.[/yellow]"
+            )
+        elif "authorization" in lowered or "invalid" in lowered:
            console.print(
                "  [yellow]This usually means the access token is wrong or revoked. "
                "Double-check it in the Bitwarden web app.[/yellow]"
@ -443,3 +487,91 @@ def _list_projects(
    if not isinstance(data, list):
        return []
    return [p for p in data if isinstance(p, dict) and p.get("id")]
+
+
+# Canonical Bitwarden region endpoints.  Keep in sync with what Bitwarden
+# publishes — these are stable but if a third region appears, add it here
+# and to the prompt below.
+_REGION_PRESETS = [
+    ("US Cloud  (https://vault.bitwarden.com — bws default)", ""),
+    ("EU Cloud  (https://vault.bitwarden.eu)", "https://vault.bitwarden.eu"),
+]
+
+
+def _resolve_server_url(
+    args: argparse.Namespace,
+    secrets_cfg: dict,
+    console: Console,
+) -> Optional[str]:
+    """Pick a Bitwarden server URL for setup.
+
+    Resolution order:
+      1. ``--server-url`` CLI flag (non-interactive)
+      2. ``BWS_SERVER_URL`` env var (so users running with that already set
+         in their shell don't have to re-enter it)
+      3. Existing ``secrets.bitwarden.server_url`` value (for re-runs)
+      4. Interactive menu: US / EU / self-hosted
+
+    Returns the chosen URL as a string (empty string = bws default,
+    i.e. US Cloud).  Returns None if the user aborted with an empty
+    custom URL.
+    """
+    if args.server_url and args.server_url.strip():
+        return args.server_url.strip()
+
+    env_url = os.environ.get("BWS_SERVER_URL", "").strip()
+    if env_url:
+        console.print(
+            f"  Detected [cyan]BWS_SERVER_URL[/cyan]={env_url} in your shell — using it."
+        )
+        return env_url
+
+    existing = str(secrets_cfg.get("server_url", "") or "").strip()
+    if existing:
+        console.print(
+            f"  Existing config: [cyan]{existing}[/cyan]. "
+            "Press Enter to keep, or pick a different option below."
+        )
+
+    table = Table(show_header=True, header_style="bold", box=None, padding=(0, 2))
+    table.add_column("#", style="cyan", width=4)
+    table.add_column("Region / endpoint")
+    for i, (label, _url) in enumerate(_REGION_PRESETS, 1):
+        table.add_row(str(i), label)
+    table.add_row(str(len(_REGION_PRESETS) + 1), "Self-hosted / custom URL")
+    console.print(table)
+
+    custom_idx = len(_REGION_PRESETS) + 1
+    while True:
+        prompt = f"  Select region [1-{custom_idx}]"
+        if existing:
+            prompt += " (Enter to keep current)"
+        prompt += ": "
+        choice = console.input(prompt).strip()
+        if not choice:
+            if existing:
+                return existing
+            console.print("  [red]Enter a number.[/red]")
+            continue
+        try:
+            idx = int(choice)
+        except ValueError:
+            console.print("  [red]Enter a number.[/red]")
+            continue
+        if 1 <= idx <= len(_REGION_PRESETS):
+            return _REGION_PRESETS[idx - 1][1]
+        if idx == custom_idx:
+            custom = console.input(
+                "  Enter your Bitwarden server URL "
+                "(e.g. https://vault.example.com): "
+            ).strip()
+            if not custom:
+                console.print("  [red]Empty URL, aborting.[/red]")
+                return None
+            if not custom.startswith(("http://", "https://")):
+                console.print(
+                    "  [yellow]Warning: URL doesn't start with http:// or "
+                    "https:// — bws may reject it.[/yellow]"
+                )
+            return custom
+        console.print(f"  [red]Out of range — pick 1-{custom_idx}.[/red]")
--- a/hermes_cli/security_audit.py
+++ b/hermes_cli/security_audit.py
@ -0,0 +1,576 @@
+"""On-demand supply-chain audit for Hermes Agent installs.
+
+Scans three surfaces a Hermes user actually controls and we can map to
+upstream advisories without auth or extra binaries:
+
+1. The Hermes venv (every PyPI dist via ``importlib.metadata``).
+2. Python deps declared by user-installed plugins under ``~/.hermes/plugins``
+   (``requirements.txt`` + ``pyproject.toml`` best-effort pin extraction).
+3. MCP servers wired in ``config.yaml`` whose ``command/args`` look like
+   ``npx -y <pkg>@<ver>`` or ``uvx <pkg>==<ver>``.
+
+Vulnerabilities are looked up against OSV.dev (``api.osv.dev/v1/querybatch``
+ ``/v1/vulns/{id}``). Single-shot, on-demand, never daily — see the design
+notes in ``references/security-disclosure-triage.md``.
+
+Out of scope on purpose: global pip/npm, editor/browser extensions,
+daily background scans, auto-blocking installs.
+"""
+
+from __future__ import annotations
+
+import argparse
+import concurrent.futures
+import json
+import re
+import sys
+import urllib.error
+import urllib.request
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Iterable, Optional
+
+from hermes_constants import get_hermes_home
+
+OSV_BATCH_URL = "https://api.osv.dev/v1/querybatch"
+OSV_VULN_URL = "https://api.osv.dev/v1/vulns/{vid}"
+OSV_BATCH_MAX = 1000  # OSV documented hard cap per request
+HTTP_TIMEOUT = 20
+DETAIL_PARALLELISM = 8
+
+# Severity ordering for --fail-on gating. UNKNOWN sits below LOW so it
+# never blocks unless --fail-on is passed something even lower (we don't
+# expose that).
+SEVERITY_ORDER = {
+    "UNKNOWN": 0,
+    "LOW": 1,
+    "MODERATE": 2,
+    "MEDIUM": 2,
+    "HIGH": 3,
+    "CRITICAL": 4,
+}
+
+
+# ─── Data shapes ──────────────────────────────────────────────────────────────
+
+
+@dataclass(frozen=True)
+class Component:
+    """A single (name, version, ecosystem) tuple discovered on disk."""
+
+    name: str
+    version: str
+    ecosystem: str  # "PyPI" | "npm" — exactly as OSV expects
+    source: str    # human-readable origin, e.g. "venv", "plugin:foo", "mcp:bar"
+
+
+@dataclass
+class Vulnerability:
+    osv_id: str
+    severity: str = "UNKNOWN"
+    summary: str = ""
+    fixed_versions: list[str] = field(default_factory=list)
+
+
+@dataclass
+class Finding:
+    component: Component
+    vuln: Vulnerability
+
+
+# ─── Component discovery ──────────────────────────────────────────────────────
+
+
+def _discover_venv() -> list[Component]:
+    """Every dist installed in the running Python's import path."""
+    from importlib.metadata import distributions
+
+    out: list[Component] = []
+    seen: set[tuple[str, str]] = set()
+    for dist in distributions():
+        try:
+            name = (dist.metadata["Name"] or "").strip()
+        except Exception:
+            continue
+        version = (dist.version or "").strip()
+        if not name or not version:
+            continue
+        key = (name.lower(), version)
+        if key in seen:
+            continue
+        seen.add(key)
+        out.append(Component(name=name, version=version, ecosystem="PyPI", source="venv"))
+    return out
+
+
+# requirements.txt line: drop comments, environment markers, options, extras
+_REQ_LINE = re.compile(
+    r"""^\s*
+        (?P<name>[A-Za-z0-9][A-Za-z0-9._-]*)
+        (?:\[[^\]]+\])?              # extras
+        \s*==\s*
+        (?P<version>[A-Za-z0-9._+!-]+)
+        \s*(?:;.*)?$
+    """,
+    re.VERBOSE,
+)
+
+
+def _parse_requirements(text: str) -> list[tuple[str, str]]:
+    """Extract ``name==version`` pins. Everything else (>=, ~=, no pin) is skipped.
+
+    A loose pin can't be mapped to a single OSV query, and getting it wrong
+    is worse than missing a finding for an audit tool — false positives
+    train users to ignore output.
+    """
+    pins: list[tuple[str, str]] = []
+    for raw in text.splitlines():
+        line = raw.strip()
+        if not line or line.startswith("#") or line.startswith("-"):
+            continue
+        m = _REQ_LINE.match(line)
+        if m:
+            pins.append((m.group("name"), m.group("version")))
+    return pins
+
+
+def _parse_pyproject_pins(text: str) -> list[tuple[str, str]]:
+    """Pull ``name==version`` pins from a ``pyproject.toml`` ``dependencies`` list.
+
+    Uses stdlib ``tomllib`` (3.11+). Same exact-pin policy as requirements.
+    """
+    try:
+        import tomllib
+    except ImportError:  # pragma: no cover - 3.10 only
+        return []
+    try:
+        data = tomllib.loads(text)
+    except Exception:
+        return []
+    deps: list[str] = []
+    project = data.get("project") or {}
+    if isinstance(project.get("dependencies"), list):
+        deps.extend(str(x) for x in project["dependencies"])
+    optional = project.get("optional-dependencies") or {}
+    if isinstance(optional, dict):
+        for group in optional.values():
+            if isinstance(group, list):
+                deps.extend(str(x) for x in group)
+    pins: list[tuple[str, str]] = []
+    for dep in deps:
+        m = _REQ_LINE.match(dep)
+        if m:
+            pins.append((m.group("name"), m.group("version")))
+    return pins
+
+
+def _discover_plugins(hermes_home: Path) -> list[Component]:
+    """Python deps declared by plugins under ``~/.hermes/plugins``.
+
+    Plugins typically don't install into the venv (they're directory-based
+    with relative imports), so their stated requirements are useful audit
+    surface even when the venv scan misses them.
+    """
+    plugins_dir = hermes_home / "plugins"
+    if not plugins_dir.is_dir():
+        return []
+
+    out: list[Component] = []
+    for plugin_dir in sorted(plugins_dir.iterdir()):
+        if not plugin_dir.is_dir() or plugin_dir.name.startswith("."):
+            continue
+        source = f"plugin:{plugin_dir.name}"
+        for req_file in ("requirements.txt", "requirements-dev.txt"):
+            path = plugin_dir / req_file
+            if path.is_file():
+                try:
+                    pins = _parse_requirements(path.read_text(encoding="utf-8", errors="replace"))
+                except OSError:
+                    continue
+                for name, version in pins:
+                    out.append(Component(name=name, version=version, ecosystem="PyPI", source=source))
+        pyproject = plugin_dir / "pyproject.toml"
+        if pyproject.is_file():
+            try:
+                pins = _parse_pyproject_pins(pyproject.read_text(encoding="utf-8", errors="replace"))
+            except OSError:
+                continue
+            for name, version in pins:
+                out.append(Component(name=name, version=version, ecosystem="PyPI", source=source))
+    return out
+
+
+# npx forms we recognise:
+#   npx -y @scope/pkg@1.2.3
+#   npx --yes pkg@1.2.3
+#   npx pkg@1.2.3 [...args]
+# We deliberately don't try to resolve unversioned names — that maps to
+# "latest" at runtime and isn't a stable audit subject.
+_NPX_PKG = re.compile(r"^(@[A-Za-z0-9._-]+/[A-Za-z0-9._-]+|[A-Za-z0-9._-]+)@([A-Za-z0-9._+-]+)$")
+# uvx forms:
+#   uvx pkg==1.2.3
+#   uvx --with pkg==1.2.3 entrypoint
+_UVX_PKG = re.compile(r"^([A-Za-z0-9][A-Za-z0-9._-]*)==([A-Za-z0-9._+!-]+)$")
+
+
+def _extract_mcp_component(server_name: str, command: str, args: list[str]) -> Optional[Component]:
+    """Best-effort: parse `command/args` into a (name, version, ecosystem).
+
+    Returns None when the entry doesn't pin a version we can audit (local
+    paths, Docker images, unversioned npx, etc.). Audit output stays silent
+    rather than guess.
+    """
+    cmd = (command or "").strip().lower()
+    if not args:
+        return None
+    # npx (any prefix path)
+    if cmd.endswith("npx") or cmd == "npx":
+        # Skip flag tokens until we see the first thing that looks like a pkg ref
+        for token in args:
+            if token.startswith("-"):
+                continue
+            m = _NPX_PKG.match(token)
+            if m:
+                return Component(
+                    name=m.group(1),
+                    version=m.group(2),
+                    ecosystem="npm",
+                    source=f"mcp:{server_name}",
+                )
+            return None  # First non-flag token isn't a pinned ref
+    # uvx (any prefix path)
+    if cmd.endswith("uvx") or cmd == "uvx":
+        for token in args:
+            if token.startswith("-"):
+                continue
+            m = _UVX_PKG.match(token)
+            if m:
+                return Component(
+                    name=m.group(1),
+                    version=m.group(2),
+                    ecosystem="PyPI",
+                    source=f"mcp:{server_name}",
+                )
+            return None
+    return None
+
+
+def _discover_mcp() -> list[Component]:
+    """Pinned MCP server packages from ``config.yaml``."""
+    try:
+        from hermes_cli.mcp_config import _get_mcp_servers
+    except Exception:
+        return []
+
+    out: list[Component] = []
+    servers = _get_mcp_servers()
+    if not isinstance(servers, dict):
+        return []
+    for name, cfg in servers.items():
+        if not isinstance(cfg, dict):
+            continue
+        command = cfg.get("command", "") or ""
+        args = cfg.get("args") or []
+        if not isinstance(args, list):
+            continue
+        comp = _extract_mcp_component(name, command, [str(a) for a in args])
+        if comp is not None:
+            out.append(comp)
+    return out
+
+
+# ─── OSV client ───────────────────────────────────────────────────────────────
+
+
+def _http_post_json(url: str, payload: dict) -> dict:
+    data = json.dumps(payload).encode("utf-8")
+    req = urllib.request.Request(
+        url, data=data, headers={"Content-Type": "application/json"}, method="POST"
+    )
+    with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def _http_get_json(url: str) -> dict:
+    req = urllib.request.Request(url, method="GET")
+    with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def _osv_query_batch(components: list[Component]) -> dict[Component, list[str]]:
+    """Return {component -> [osv_id, ...]} for components with any vulns.
+
+    Components without findings are omitted from the result dict.
+    """
+    if not components:
+        return {}
+    findings: dict[Component, list[str]] = {}
+    for chunk_start in range(0, len(components), OSV_BATCH_MAX):
+        chunk = components[chunk_start:chunk_start + OSV_BATCH_MAX]
+        payload = {
+            "queries": [
+                {
+                    "package": {"name": c.name, "ecosystem": c.ecosystem},
+                    "version": c.version,
+                }
+                for c in chunk
+            ]
+        }
+        try:
+            resp = _http_post_json(OSV_BATCH_URL, payload)
+        except (urllib.error.URLError, TimeoutError, ConnectionError) as exc:
+            raise RuntimeError(f"OSV batch query failed: {exc}") from exc
+        results = resp.get("results") or []
+        for comp, result in zip(chunk, results):
+            vulns = (result or {}).get("vulns") or []
+            ids = [v.get("id") for v in vulns if v.get("id")]
+            if ids:
+                findings[comp] = ids
+    return findings
+
+
+def _osv_severity_from_record(record: dict) -> str:
+    """Extract CVSS-derived severity tier from an OSV vuln record."""
+    # OSV puts CVSS in `severity` (top-level or per-affected) and a
+    # human-readable bucket in `database_specific.severity` for GHSAs.
+    db_specific = record.get("database_specific") or {}
+    raw = db_specific.get("severity")
+    if isinstance(raw, str) and raw.strip():
+        upper = raw.strip().upper()
+        if upper in SEVERITY_ORDER:
+            return upper
+    # Fall back to CVSS score → tier
+    score: Optional[float] = None
+    for sev_entry in record.get("severity") or []:
+        s = sev_entry.get("score")
+        if isinstance(s, str):
+            # CVSS vector strings look like "CVSS:3.1/AV:N/..." — we can't
+            # parse without a lib. Look for an explicit numeric in
+            # affected[].ecosystem_specific later if present.
+            continue
+    affected = record.get("affected") or []
+    for entry in affected:
+        eco_spec = entry.get("ecosystem_specific") or {}
+        sev = eco_spec.get("severity")
+        if isinstance(sev, str) and sev.strip().upper() in SEVERITY_ORDER:
+            return sev.strip().upper()
+    if score is not None:
+        if score >= 9.0:
+            return "CRITICAL"
+        if score >= 7.0:
+            return "HIGH"
+        if score >= 4.0:
+            return "MODERATE"
+        if score > 0:
+            return "LOW"
+    return "UNKNOWN"
+
+
+def _osv_fixed_versions(record: dict) -> list[str]:
+    fixes: list[str] = []
+    for entry in record.get("affected") or []:
+        for rng in entry.get("ranges") or []:
+            for event in rng.get("events") or []:
+                if "fixed" in event:
+                    fixes.append(str(event["fixed"]))
+    # Dedupe, preserve order
+    seen: set[str] = set()
+    out: list[str] = []
+    for f in fixes:
+        if f not in seen:
+            seen.add(f)
+            out.append(f)
+    return out
+
+
+def _osv_fetch_details(vuln_ids: Iterable[str]) -> dict[str, Vulnerability]:
+    """Fetch summary/severity for each unique vuln id, in parallel."""
+    unique = sorted({vid for vid in vuln_ids if vid})
+    if not unique:
+        return {}
+    out: dict[str, Vulnerability] = {}
+
+    def _fetch_one(vid: str) -> Vulnerability:
+        try:
+            rec = _http_get_json(OSV_VULN_URL.format(vid=vid))
+        except (urllib.error.URLError, TimeoutError, ConnectionError):
+            return Vulnerability(osv_id=vid)
+        return Vulnerability(
+            osv_id=vid,
+            severity=_osv_severity_from_record(rec),
+            summary=(rec.get("summary") or "").strip(),
+            fixed_versions=_osv_fixed_versions(rec),
+        )
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=DETAIL_PARALLELISM) as pool:
+        for vuln in pool.map(_fetch_one, unique):
+            out[vuln.osv_id] = vuln
+    return out
+
+
+# ─── Orchestration ────────────────────────────────────────────────────────────
+
+
+def run_audit(
+    *,
+    skip_venv: bool = False,
+    skip_plugins: bool = False,
+    skip_mcp: bool = False,
+    hermes_home: Optional[Path] = None,
+) -> list[Finding]:
+    """Discover components, query OSV, return findings sorted by severity desc."""
+    home = hermes_home or Path(get_hermes_home())
+    components: list[Component] = []
+    if not skip_venv:
+        components.extend(_discover_venv())
+    if not skip_plugins:
+        components.extend(_discover_plugins(home))
+    if not skip_mcp:
+        components.extend(_discover_mcp())
+
+    if not components:
+        return []
+
+    raw = _osv_query_batch(components)
+    if not raw:
+        return []
+
+    all_ids: list[str] = []
+    for ids in raw.values():
+        all_ids.extend(ids)
+    details = _osv_fetch_details(all_ids)
+
+    findings: list[Finding] = []
+    for comp, ids in raw.items():
+        for vid in ids:
+            vuln = details.get(vid) or Vulnerability(osv_id=vid)
+            findings.append(Finding(component=comp, vuln=vuln))
+
+    findings.sort(
+        key=lambda f: (
+            -SEVERITY_ORDER.get(f.vuln.severity, 0),
+            f.component.source,
+            f.component.name.lower(),
+            f.vuln.osv_id,
+        )
+    )
+    return findings
+
+
+# ─── Rendering ────────────────────────────────────────────────────────────────
+
+
+def _render_human(findings: list[Finding], total_components: int) -> str:
+    if not findings:
+        return f"No known vulnerabilities found across {total_components} component(s)."
+
+    lines: list[str] = []
+    lines.append(
+        f"Found {len(findings)} known vulnerability finding(s) "
+        f"across {total_components} component(s):"
+    )
+    lines.append("")
+    last_source = None
+    for f in findings:
+        if f.component.source != last_source:
+            lines.append(f"[{f.component.source}]")
+            last_source = f.component.source
+        sev = f.vuln.severity.ljust(8)
+        head = f"  {sev}  {f.component.name}=={f.component.version}  {f.vuln.osv_id}"
+        lines.append(head)
+        if f.vuln.summary:
+            summary = f.vuln.summary
+            if len(summary) > 100:
+                summary = summary[:97] + "..."
+            lines.append(f"           {summary}")
+        if f.vuln.fixed_versions:
+            lines.append(f"           fixed in: {', '.join(f.vuln.fixed_versions[:3])}")
+    return "\n".join(lines)
+
+
+def _render_json(findings: list[Finding], total_components: int) -> str:
+    payload = {
+        "total_components_scanned": total_components,
+        "finding_count": len(findings),
+        "findings": [
+            {
+                "package": f.component.name,
+                "version": f.component.version,
+                "ecosystem": f.component.ecosystem,
+                "source": f.component.source,
+                "vuln_id": f.vuln.osv_id,
+                "severity": f.vuln.severity,
+                "summary": f.vuln.summary,
+                "fixed_versions": f.vuln.fixed_versions,
+            }
+            for f in findings
+        ],
+    }
+    return json.dumps(payload, indent=2)
+
+
+def _count_components(
+    *, skip_venv: bool, skip_plugins: bool, skip_mcp: bool, hermes_home: Path
+) -> int:
+    total = 0
+    if not skip_venv:
+        total += len(_discover_venv())
+    if not skip_plugins:
+        total += len(_discover_plugins(hermes_home))
+    if not skip_mcp:
+        total += len(_discover_mcp())
+    return total
+
+
+# ─── CLI entrypoint ───────────────────────────────────────────────────────────
+
+
+def cmd_security_audit(args: argparse.Namespace) -> int:
+    """Implementation of `hermes security audit`."""
+    home = Path(get_hermes_home())
+    skip_venv = bool(getattr(args, "skip_venv", False))
+    skip_plugins = bool(getattr(args, "skip_plugins", False))
+    skip_mcp = bool(getattr(args, "skip_mcp", False))
+    output_json = bool(getattr(args, "json", False))
+    fail_on = (getattr(args, "fail_on", None) or "critical").upper()
+    if fail_on not in SEVERITY_ORDER:
+        print(
+            f"unknown --fail-on value: {fail_on.lower()} "
+            f"(choose from: low, moderate, high, critical)",
+            file=sys.stderr,
+        )
+        return 2
+
+    total = _count_components(
+        skip_venv=skip_venv, skip_plugins=skip_plugins, skip_mcp=skip_mcp, hermes_home=home
+    )
+    if total == 0:
+        msg = "No components discovered (everything skipped, or empty environment)."
+        if output_json:
+            print(json.dumps({"total_components_scanned": 0, "finding_count": 0, "findings": []}))
+        else:
+            print(msg)
+        return 0
+
+    try:
+        findings = run_audit(
+            skip_venv=skip_venv,
+            skip_plugins=skip_plugins,
+            skip_mcp=skip_mcp,
+            hermes_home=home,
+        )
+    except RuntimeError as exc:
+        print(f"audit failed: {exc}", file=sys.stderr)
+        return 2
+
+    if output_json:
+        print(_render_json(findings, total))
+    else:
+        print(_render_human(findings, total))
+
+    # Exit code: 1 iff any finding meets or exceeds the --fail-on threshold.
+    threshold = SEVERITY_ORDER[fail_on]
+    for f in findings:
+        if SEVERITY_ORDER.get(f.vuln.severity, 0) >= threshold:
+            return 1
+    return 0
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -2034,74 +2034,6 @@ def _setup_telegram():
            save_env_value("TELEGRAM_HOME_CHANNEL", home_channel)


-def _setup_discord():
-    """Configure Discord bot credentials and allowlist."""
-    print_header("Discord")
-    existing = get_env_value("DISCORD_BOT_TOKEN")
-    if existing:
-        print_info("Discord: already configured")
-        if not prompt_yes_no("Reconfigure Discord?", False):
-            if not get_env_value("DISCORD_ALLOWED_USERS"):
-                print_info("⚠️  Discord has no user allowlist - anyone can use your bot!")
-                if prompt_yes_no("Add allowed users now?", True):
-                    print_info("   To find Discord ID: Enable Developer Mode, right-click name → Copy ID")
-                    allowed_users = prompt("Allowed user IDs (comma-separated)")
-                    if allowed_users:
-                        cleaned_ids = _clean_discord_user_ids(allowed_users)
-                        save_env_value("DISCORD_ALLOWED_USERS", ",".join(cleaned_ids))
-                        print_success("Discord allowlist configured")
-            return
-
-    print_info("Create a bot at https://discord.com/developers/applications")
-    token = prompt("Discord bot token", password=True)
-    if not token:
-        return
-    save_env_value("DISCORD_BOT_TOKEN", token)
-    print_success("Discord token saved")
-
-    print()
-    print_info("🔒 Security: Restrict who can use your bot")
-    print_info("   To find your Discord user ID:")
-    print_info("   1. Enable Developer Mode in Discord settings")
-    print_info("   2. Right-click your name → Copy ID")
-    print()
-    print_info("   You can also use Discord usernames (resolved on gateway start).")
-    print()
-    allowed_users = prompt(
-        "Allowed user IDs or usernames (comma-separated, leave empty for open access)"
-    )
-    if allowed_users:
-        cleaned_ids = _clean_discord_user_ids(allowed_users)
-        save_env_value("DISCORD_ALLOWED_USERS", ",".join(cleaned_ids))
-        print_success("Discord allowlist configured")
-    else:
-        print_info("⚠️  No allowlist set - anyone in servers with your bot can use it!")
-
-    print()
-    print_info("📬 Home Channel: where Hermes delivers cron job results,")
-    print_info("   cross-platform messages, and notifications.")
-    print_info("   To get a channel ID: right-click a channel → Copy Channel ID")
-    print_info("   (requires Developer Mode in Discord settings)")
-    print_info("   You can also set this later by typing /set-home in a Discord channel.")
-    home_channel = prompt("Home channel ID (leave empty to set later with /set-home)")
-    if home_channel:
-        save_env_value("DISCORD_HOME_CHANNEL", home_channel)
-
-
-def _clean_discord_user_ids(raw: str) -> list:
-    """Strip common Discord mention prefixes from a comma-separated ID string."""
-    cleaned = []
-    for uid in raw.replace(" ", "").split(","):
-        uid = uid.strip()
-        if uid.startswith("<@") and uid.endswith(">"):
-            uid = uid.lstrip("<@!").rstrip(">")
-        if uid.lower().startswith("user:"):
-            uid = uid[5:]
-        if uid:
-            cleaned.append(uid)
-    return cleaned
-
-
 def _setup_slack():
    """Configure Slack bot credentials."""
    print_header("Slack")
@ -2256,28 +2188,58 @@ def _setup_matrix():
            print_success("E2EE enabled")

        matrix_pkg = "mautrix[encryption]" if want_e2ee else "mautrix"
+        # Use the central lazy-deps feature group so we install ALL of
+        # platform.matrix's dependencies (mautrix, Markdown, aiosqlite,
+        # asyncpg, aiohttp-socks) — not just mautrix itself.  The previous
+        # hand-rolled ``pip install mautrix[encryption]`` left asyncpg /
+        # aiosqlite uninstalled and broke E2EE connect with
+        # ``No module named 'asyncpg'`` on every fresh install (#31116).
        try:
-            __import__("mautrix")
+            from tools.lazy_deps import ensure as _lazy_ensure, feature_missing
+            _missing_before = feature_missing("platform.matrix")
+            if _missing_before:
+                print_info(
+                    f"Installing {matrix_pkg} (+ {len(_missing_before)} runtime deps)..."
+                )
+                try:
+                    _lazy_ensure("platform.matrix", prompt=False)
+                    print_success(f"{matrix_pkg} installed")
+                except Exception as exc:
+                    print_warning(
+                        f"Install failed — run manually: pip install "
+                        f"'mautrix[encryption]' asyncpg aiosqlite Markdown "
+                        f"aiohttp-socks"
+                    )
+                    print_info(f"  Error: {exc}")
        except ImportError:
-            print_info(f"Installing {matrix_pkg}...")
-            import subprocess
-            uv_bin = shutil.which("uv")
-            if uv_bin:
-                result = subprocess.run(
-                    [uv_bin, "pip", "install", "--python", sys.executable, matrix_pkg],
-                    capture_output=True, text=True,
-                )
-            else:
-                result = subprocess.run(
-                    [sys.executable, "-m", "pip", "install", matrix_pkg],
-                    capture_output=True, text=True,
-                )
-            if result.returncode == 0:
-                print_success(f"{matrix_pkg} installed")
-            else:
-                print_warning(f"Install failed — run manually: pip install '{matrix_pkg}'")
-                if result.stderr:
-                    print_info(f"  Error: {result.stderr.strip().splitlines()[-1]}")
+            # tools.lazy_deps unavailable (extreme edge case — partial
+            # install).  Fall back to the legacy single-package install
+            # path so the wizard still does *something*.
+            try:
+                __import__("mautrix")
+            except ImportError:
+                print_info(f"Installing {matrix_pkg}...")
+                import subprocess
+                uv_bin = shutil.which("uv")
+                if uv_bin:
+                    result = subprocess.run(
+                        [uv_bin, "pip", "install", "--python", sys.executable, matrix_pkg],
+                        capture_output=True, text=True,
+                    )
+                else:
+                    result = subprocess.run(
+                        [sys.executable, "-m", "pip", "install", matrix_pkg],
+                        capture_output=True, text=True,
+                    )
+                if result.returncode == 0:
+                    print_success(f"{matrix_pkg} installed")
+                else:
+                    print_warning(
+                        f"Install failed — run manually: pip install "
+                        f"'{matrix_pkg}' asyncpg aiosqlite Markdown aiohttp-socks"
+                    )
+                    if result.stderr:
+                        print_info(f"  Error: {result.stderr.strip().splitlines()[-1]}")

        print()
        print_info("🔒 Security: Restrict who can use your bot")
@ -3128,6 +3090,119 @@ SETUP_SECTIONS = [
 ]


+def _run_portal_one_shot(config: dict) -> None:
+    """One-shot Nous Portal setup — OAuth + provider switch + Tool Gateway.
+
+    Wired into ``hermes setup --portal``. Does NOT prompt for anything
+    besides what the underlying OAuth + Tool Gateway prompts already need.
+    Designed to be shareable as a single command (``hermes setup --portal``)
+    that gets a brand-new user from zero to a fully working Hermes session
+    with web/image/tts/browser tools all routed via their Portal sub.
+    """
+    from types import SimpleNamespace
+
+    from hermes_cli.auth_commands import auth_add_command
+    from hermes_cli.config import save_config
+    from hermes_cli.auth import get_nous_auth_status
+    from hermes_cli.nous_subscription import prompt_enable_tool_gateway
+
+    print()
+    print(
+        color(
+            "┌─────────────────────────────────────────────────────────┐",
+            Colors.MAGENTA,
+        )
+    )
+    print(color("│     ⚕ Hermes Setup — Nous Portal (one-shot)             │", Colors.MAGENTA))
+    print(
+        color(
+            "└─────────────────────────────────────────────────────────┘",
+            Colors.MAGENTA,
+        )
+    )
+    print()
+    print_info("  One subscription, 300+ models, plus the Tool Gateway:")
+    print_info("    web search, image generation, TTS, browser automation")
+    print_info("    — all routed through your Nous Portal sub.")
+    print()
+    print_info("  Sign up: https://portal.nousresearch.com/manage-subscription")
+    print()
+
+    # Skip OAuth if already logged in (don't re-prompt every time the user
+    # runs `hermes setup --portal` after a successful first run).
+    already_logged_in = False
+    try:
+        already_logged_in = bool((get_nous_auth_status() or {}).get("logged_in"))
+    except Exception:
+        already_logged_in = False
+
+    if already_logged_in:
+        print_success("  Already logged into Nous Portal.")
+    else:
+        # Hand off to the shared auth wiring so the device-code flow is
+        # identical to `hermes auth add nous --type oauth`. SimpleNamespace
+        # mirrors the argparse Namespace contract that auth_add_command expects.
+        ns = SimpleNamespace(
+            provider="nous",
+            auth_type="oauth",
+            label=None,
+            api_key=None,
+            portal_url=None,
+            inference_url=None,
+            client_id=None,
+            scope=None,
+            no_browser=False,
+            timeout=None,
+            insecure=False,
+            ca_bundle=None,
+            min_key_ttl_seconds=5 * 60,
+        )
+        try:
+            auth_add_command(ns)
+        except SystemExit as e:
+            print()
+            print_error(f"  Nous Portal login failed (exit {e.code}).")
+            print_info("  You can retry later with `hermes auth add nous --type oauth`.")
+            return
+        except (KeyboardInterrupt, EOFError):
+            print()
+            print_info("  Setup cancelled.")
+            return
+        except Exception as exc:
+            print()
+            print_error(f"  Nous Portal login failed: {exc}")
+            print_info("  You can retry later with `hermes auth add nous --type oauth`.")
+            return
+
+    # Set provider → nous so the model picker, status surfaces, and
+    # managed-tool gating all light up. Leave model.model empty so the
+    # runtime picks Nous's default model; the user can change it later
+    # with `hermes model`.
+    model_cfg = config.get("model")
+    if not isinstance(model_cfg, dict):
+        model_cfg = {}
+        config["model"] = model_cfg
+    model_cfg["provider"] = "nous"
+    save_config(config)
+    print()
+    print_success("  Nous set as your inference provider.")
+
+    # Offer the Tool Gateway opt-in (single Y/n) — same flow that fires
+    # from `hermes model` after picking Nous.
+    print()
+    try:
+        prompt_enable_tool_gateway(config)
+    except (KeyboardInterrupt, EOFError):
+        pass
+    except Exception as exc:
+        print_warning(f"  Tool Gateway prompt skipped: {exc}")
+
+    print()
+    print_success("Portal setup complete.")
+    print_info("  Run `hermes portal status` to inspect routing.")
+    print_info("  Run `hermes` to start chatting.")
+
+
 def run_setup_wizard(args):
    """Run the interactive setup wizard.

@ -3183,6 +3258,11 @@ def run_setup_wizard(args):
        )
        return

+    # --portal: one-shot Nous Portal setup. Skips the rest of the wizard.
+    if bool(getattr(args, "portal", False)):
+        _run_portal_one_shot(config)
+        return
+
    # Check if a specific section was requested
    section = getattr(args, "section", None)
    if section:
--- a/hermes_cli/skills_hub.py
+++ b/hermes_cli/skills_hub.py
@ -906,8 +906,14 @@ def do_update(name: Optional[str] = None, console: Optional[Console] = None) ->
    c.print(f"[bold green]Updated {len(updates)} skill(s).[/]\n")


-def do_audit(name: Optional[str] = None, console: Optional[Console] = None) -> None:
-    """Re-run security scan on installed hub skills."""
+def do_audit(name: Optional[str] = None, console: Optional[Console] = None,
+             deep: bool = False) -> None:
+    """Re-run security scan on installed hub skills.
+
+    When ``deep=True``, also runs an opt-in AST-level diagnostic on Python
+    files (review aid only — not a security gate; skills_guard.py verdicts
+    are unchanged).
+    """
    from tools.skills_hub import HubLockFile, SKILLS_DIR
    from tools.skills_guard import scan_skill, format_scan_report

@ -928,6 +934,9 @@ def do_audit(name: Optional[str] = None, console: Optional[Console] = None) -> N

    c.print(f"\n[bold]Auditing {len(targets)} skill(s)...[/]\n")

+    if deep:
+        from tools.skills_ast_audit import ast_scan_path, format_ast_report
+
    for entry in targets:
        skill_path = SKILLS_DIR / entry["install_path"]
        if not skill_path.exists():
@ -936,6 +945,10 @@ def do_audit(name: Optional[str] = None, console: Optional[Console] = None) -> N

        result = scan_skill(skill_path, source=entry.get("identifier", entry["source"]))
        c.print(format_scan_report(result))
+
+        if deep:
+            c.print(format_ast_report(ast_scan_path(skill_path), skill_name=entry["name"]))
+
        c.print()


@ -1343,7 +1356,8 @@ def skills_command(args) -> None:
    elif action == "update":
        do_update(name=getattr(args, "name", None))
    elif action == "audit":
-        do_audit(name=getattr(args, "name", None))
+        do_audit(name=getattr(args, "name", None),
+                 deep=getattr(args, "deep", False))
    elif action == "uninstall":
        do_uninstall(args.name)
    elif action == "reset":
@ -1395,6 +1409,8 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
        /skills update
        /skills audit
        /skills audit my-skill
+        /skills audit --deep
+        /skills audit my-skill --deep
        /skills uninstall my-skill
        /skills tap list
        /skills tap add owner/repo
@ -1509,8 +1525,9 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
        do_update(name=name, console=c)

    elif action == "audit":
-        name = args[0] if args else None
-        do_audit(name=name, console=c)
+        name = args[0] if args and not args[0].startswith("--") else None
+        deep = "--deep" in args
+        do_audit(name=name, console=c, deep=deep)

    elif action == "uninstall":
        if not args:
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@ -311,6 +311,16 @@ TOOL_CATEGORIES = {
    "image_gen": {
        "name": "Image Generation",
        "icon": "🎨",
+        # Per-provider rows for FAL.ai (`plugins/image_gen/fal`), OpenAI,
+        # OpenAI Codex, and xAI are injected at runtime from each
+        # ``plugins.image_gen.<vendor>`` package via
+        # ``_plugin_image_gen_providers()`` in ``_visible_providers``.
+        # Only non-provider UX setup-flow rows remain here:
+        #   - "Nous Subscription" — managed FAL billed via the Nous
+        #     subscription (requires_nous_auth + override_env_vars).
+        #     Uses the fal plugin as the underlying backend but has a
+        #     distinct setup UX.
+        # Mirrors the shape browser/video_gen ship today.
        "providers": [
            {
                "name": "Nous Subscription",
@ -322,15 +332,6 @@ TOOL_CATEGORIES = {
                "override_env_vars": ["FAL_KEY"],
                "imagegen_backend": "fal",
            },
-            {
-                "name": "FAL.ai",
-                "badge": "paid",
-                "tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
-                "env_vars": [
-                    {"key": "FAL_KEY", "prompt": "FAL API key", "url": "https://fal.ai/dashboard/keys"},
-                ],
-                "imagegen_backend": "fal",
-            },
        ],
    },
    "video_gen": {
@ -482,6 +483,11 @@ TOOLSET_ENV_REQUIREMENTS = {
 # ─── Post-Setup Hooks ─────────────────────────────────────────────────────────


+def _cua_driver_cmd() -> str:
+    """Return the cua-driver executable name/path, honoring non-empty overrides."""
+    return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver"
+
+
 def _pip_install(
    args: List[str],
    *,
@ -550,6 +556,55 @@ def _pip_install(
    )


+
+def _check_cua_driver_asset_for_arch() -> bool:
+    """Check whether the latest CUA release ships an asset for this architecture.
+
+    Returns True if the asset likely exists (or if we cannot determine it).
+    Returns False and prints a warning when the asset is confirmed missing,
+    so callers can skip the install attempt and avoid a raw 404.
+    """
+    import platform as _plat
+    import urllib.request
+
+    machine = _plat.machine()  # "x86_64" or "arm64"
+    if machine == "arm64":
+        # arm64 (Apple Silicon) assets are always published.
+        return True
+
+    # x86_64 / Intel — probe the latest release for an architecture-specific
+    # asset before falling through to the upstream installer.
+    api_url = (
+        "https://api.github.com/repos/trycua/cua/releases/latest"
+    )
+    try:
+        req = urllib.request.Request(api_url, headers={"Accept": "application/vnd.github+json"})
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            release = _json.loads(resp.read().decode())
+        tag = release.get("tag_name", "")
+        assets = release.get("assets", [])
+        arch_names = {"x86_64", "amd64"}
+        has_asset = any(
+            any(a in a_info.get("name", "").lower() for a in arch_names)
+            for a_info in assets
+        )
+        if not has_asset:
+            _print_warning(
+                f"    Latest CUA release ({tag}) has no Intel (x86_64) asset."
+            )
+            _print_info(
+                "    CUA Driver currently only ships Apple Silicon builds."
+            )
+            _print_info(
+                "    See: https://github.com/trycua/cua/issues/1493"
+            )
+            return False
+    except Exception:
+        # Network / API failure — proceed and let the installer handle it.
+        pass
+    return True
+
+
 def install_cua_driver(upgrade: bool = False) -> bool:
    """Install or refresh the cua-driver binary used by Computer Use.

@ -579,7 +634,8 @@ def install_cua_driver(upgrade: bool = False) -> bool:
        _print_warning("    Computer Use (cua-driver) is macOS-only; skipping.")
        return False

-    binary = shutil.which("cua-driver")
+    driver_cmd = _cua_driver_cmd()
+    binary = shutil.which(driver_cmd)

    # Not installed → fresh install path (only when caller asked for it).
    if not binary and not upgrade:
@ -587,18 +643,20 @@ def install_cua_driver(upgrade: bool = False) -> bool:
            _print_warning("    curl not found — install manually:")
            _print_info("      https://github.com/trycua/cua/blob/main/libs/cua-driver/README.md")
            return False
+        if not _check_cua_driver_asset_for_arch():
+            return False
        return _run_cua_driver_installer(label="Installing")

    # Already installed and caller didn't ask to upgrade → just confirm.
    if binary and not upgrade:
        try:
            version = subprocess.run(
-                ["cua-driver", "--version"],
+                [driver_cmd, "--version"],
                capture_output=True, text=True, timeout=5,
            ).stdout.strip()
-            _print_success(f"    cua-driver already installed: {version or 'unknown version'}")
+            _print_success(f"    {driver_cmd} already installed: {version or 'unknown version'}")
        except Exception:
-            _print_success("    cua-driver already installed.")
+            _print_success(f"    {driver_cmd} already installed.")
        _print_info("    Grant macOS permissions if not done yet:")
        _print_info("      System Settings > Privacy & Security > Accessibility")
        _print_info("      System Settings > Privacy & Security > Screen Recording")
@ -609,11 +667,14 @@ def install_cua_driver(upgrade: bool = False) -> bool:
        _print_warning("    curl not found — cannot refresh cua-driver.")
        return bool(binary)

+    if not _check_cua_driver_asset_for_arch():
+        return bool(binary)
+
    if binary:
        # Show before/after version when we have a baseline. Best-effort.
        try:
            before = subprocess.run(
-                ["cua-driver", "--version"],
+                [driver_cmd, "--version"],
                capture_output=True, text=True, timeout=5,
            ).stdout.strip()
        except Exception:
@ -625,13 +686,13 @@ def install_cua_driver(upgrade: bool = False) -> bool:
    if ok and before:
        try:
            after = subprocess.run(
-                ["cua-driver", "--version"],
+                [driver_cmd, "--version"],
                capture_output=True, text=True, timeout=5,
            ).stdout.strip()
            if after and after != before:
-                _print_success(f"    cua-driver upgraded: {before} → {after}")
+                _print_success(f"    {driver_cmd} upgraded: {before} → {after}")
            elif after:
-                _print_info(f"    cua-driver up to date: {after}")
+                _print_info(f"    {driver_cmd} up to date: {after}")
        except Exception:
            pass
    return ok
@ -655,11 +716,12 @@ def _run_cua_driver_installer(label: str = "Installing", verbose: bool = True) -
        _print_info(f"    {label} cua-driver (macOS background computer-use)...")
    else:
        _print_info(f"    {label} cua-driver...")
+    driver_cmd = _cua_driver_cmd()
    try:
        result = subprocess.run(install_cmd, shell=True, timeout=300)
-        if result.returncode == 0 and shutil.which("cua-driver"):
+        if result.returncode == 0 and shutil.which(driver_cmd):
            if verbose:
-                _print_success("    cua-driver installed.")
+                _print_success(f"    {driver_cmd} installed.")
                _print_info("    IMPORTANT — grant macOS permissions now:")
                _print_info("      System Settings > Privacy & Security > Accessibility")
                _print_info("      System Settings > Privacy & Security > Screen Recording")
@ -1506,12 +1568,9 @@ def _plugin_image_gen_providers() -> list[dict]:
    Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
    row but carries an ``image_gen_plugin_name`` marker so downstream
    code (config writing, model picker) knows to route through the
-    plugin registry instead of the in-tree FAL backend.
-
-    FAL is skipped — it's already exposed by the hardcoded
-    ``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
-    a plugin in a follow-up PR, the hardcoded entries go away and this
-    function surfaces it alongside OpenAI automatically.
+    plugin registry. Every image-gen backend is a plugin now — there
+    are no hardcoded rows left in ``TOOL_CATEGORIES["image_gen"]`` for
+    this function to dedupe against (see issue #26241).
    """
    try:
        from agent.image_gen_registry import list_providers
@ -1524,9 +1583,6 @@ def _plugin_image_gen_providers() -> list[dict]:

    rows: list[dict] = []
    for provider in providers:
-        if getattr(provider, "name", None) == "fal":
-            # FAL has its own hardcoded rows today.
-            continue
        try:
            schema = provider.get_setup_schema()
        except Exception:
@ -1751,7 +1807,7 @@ _POST_SETUP_INSTALLED: dict = {
    # entry when (a) the post_setup is the ONLY install side-effect for
    # a no-key provider, and (b) an installed-state check is cheap and
    # doesn't trigger a heavy import.
-    "cua_driver": lambda: bool(shutil.which("cua-driver")),
+    "cua_driver": lambda: bool(shutil.which(_cua_driver_cmd())),
 }


@ -1869,6 +1925,16 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
        print()

        # Plain text labels only (no ANSI codes in menu items)
+        # When the user is logged into Nous, surface a marker on providers
+        # whose access is included in their subscription so it's visually
+        # obvious which options cost extra vs. cost nothing on top of Nous.
+        try:
+            _nous_logged_in = bool(
+                get_nous_subscription_features(config).nous_auth_present
+            )
+        except Exception:
+            _nous_logged_in = False
+
        provider_choices = []
        for p in providers:
            badge = f" [{p['badge']}]" if p.get("badge") else ""
@ -1882,7 +1948,15 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
                    configured = ""
                else:
                    configured = " [configured]"
-            provider_choices.append(f"{p['name']}{badge}{tag}{configured}")
+            # Highlight Nous-managed entries when the user has Portal auth.
+            # curses_radiolist can't render ANSI inside item strings, so we
+            # use a plain unicode star + parenthetical phrase. Suppressed
+            # when no Portal auth is present so non-subscribers see the
+            # picker unchanged.
+            sub_marker = ""
+            if _nous_logged_in and p.get("managed_nous_feature"):
+                sub_marker = "  ★ Included with your Nous subscription"
+            provider_choices.append(f"{p['name']}{badge}{tag}{configured}{sub_marker}")

        # Add skip option
        provider_choices.append("Skip — keep defaults / configure later")
@ -2349,6 +2423,30 @@ def _configure_provider(provider: dict, config: dict):

    # Prompt for each required env var
    all_configured = True
+    # If this BYOK provider lives in a category that ALSO has a
+    # Nous-managed sibling, show a single dim hint so users know
+    # they can avoid the key entirely via a Portal subscription.
+    # Suppressed when the user is already authed to Nous.
+    _show_portal_hint = False
+    if env_vars and not managed_feature and not provider.get("requires_nous_auth"):
+        try:
+            _has_managed_sibling = False
+            for _cat_key, _cat in TOOL_CATEGORIES.items():
+                _providers = _cat.get("providers", [])
+                if provider in _providers and any(
+                    sib.get("managed_nous_feature") for sib in _providers
+                ):
+                    _has_managed_sibling = True
+                    break
+            if _has_managed_sibling:
+                _features = get_nous_subscription_features(config)
+                _show_portal_hint = not _features.nous_auth_present
+        except Exception:
+            _show_portal_hint = False
+
+    if _show_portal_hint:
+        _print_info("  Available through Nous Portal subscription.")
+
    for var in env_vars:
        existing = get_env_value(var["key"])
        if existing:
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -48,6 +48,7 @@ from hermes_cli.config import (
    redact_key,
 )
 from gateway.status import get_running_pid, read_runtime_status
+from utils import env_var_enabled

 try:
    from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
@ -118,7 +119,6 @@ _PUBLIC_API_PATHS: frozenset = frozenset({
    "/api/model/info",
    "/api/dashboard/themes",
    "/api/dashboard/plugins",
-    "/api/dashboard/plugins/rescan",
 })


@ -975,11 +975,13 @@ _AUX_TASK_SLOTS: Tuple[str, ...] = (
    "vision",
    "web_extract",
    "compression",
-    "session_search",
    "skills_hub",
    "approval",
    "mcp",
    "title_generation",
+    "triage_specifier",
+    "kanban_decomposer",
+    "profile_describer",
    "curator",
 )

@ -3293,24 +3295,49 @@ _VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
 _LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})


-def _is_public_bind() -> bool:
-    """True when bound to all-interfaces (operator used --insecure)."""
-    return getattr(app.state, "bound_host", "") in {"0.0.0.0", "::"}
-
-
 def _ws_client_is_allowed(ws: "WebSocket") -> bool:
    """Check if the WebSocket client IP is acceptable.

-    Allows loopback always; allows any IP when bound to all-interfaces
-    (--insecure mode, guarded by session token auth).
+    Allows loopback clients only.
    """
-    if _is_public_bind():
-        return True
    client_host = ws.client.host if ws.client else ""
    if not client_host:
        return True
    return client_host in _LOOPBACK_HOSTS

+
+def _ws_host_origin_is_allowed(ws: "WebSocket") -> bool:
+    """Apply the dashboard Host/Origin guard to WebSocket upgrades.
+
+    FastAPI HTTP middleware does not run for WebSocket routes, so the
+    DNS-rebinding Host check used for normal dashboard HTTP requests must be
+    repeated here before accepting the upgrade.  Browsers also send an Origin
+    header on WebSocket handshakes; when present, require it to target the
+    same bound dashboard host.
+    """
+    bound_host = getattr(app.state, "bound_host", None)
+    if not bound_host:
+        return True
+
+    host_header = ws.headers.get("host", "")
+    if not _is_accepted_host(host_header, bound_host):
+        return False
+
+    origin = ws.headers.get("origin", "")
+    if not origin:
+        return True
+
+    parsed = urllib.parse.urlparse(origin)
+    if parsed.scheme not in {"http", "https"} or not parsed.netloc:
+        return False
+
+    return _is_accepted_host(parsed.netloc, bound_host)
+
+
+def _ws_request_is_allowed(ws: "WebSocket") -> bool:
+    """Return True when the WebSocket upgrade matches dashboard boundaries."""
+    return _ws_host_origin_is_allowed(ws) and _ws_client_is_allowed(ws)
+
 # Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
 # and /api/events (dashboard → browser sidebar).  Keyed by an opaque channel id
 # the chat tab generates on mount; entries auto-evict when the last subscriber
@ -3389,7 +3416,7 @@ async def _broadcast_event(channel: str, payload: str) -> None:
        except Exception:
            # Subscriber went away mid-send; the /api/events finally clause
            # will remove it from the registry on its next iteration.
-            pass
+            _log.warning("broadcast send failed for subscriber on %s", channel, exc_info=True)


 def _channel_or_close_code(ws: WebSocket) -> Optional[str]:
@ -3412,7 +3439,7 @@ async def pty_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    if not _ws_client_is_allowed(ws):
+    if not _ws_request_is_allowed(ws):
        await ws.close(code=4403)
        return

@ -3531,7 +3558,7 @@ async def gateway_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    if not _ws_client_is_allowed(ws):
+    if not _ws_request_is_allowed(ws):
        await ws.close(code=4403)
        return

@ -3563,7 +3590,7 @@ async def pub_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    if not _ws_client_is_allowed(ws):
+    if not _ws_request_is_allowed(ws):
        await ws.close(code=4403)
        return

@ -3592,7 +3619,7 @@ async def events_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    if not _ws_client_is_allowed(ws):
+    if not _ws_request_is_allowed(ws):
        await ws.close(code=4403)
        return

@ -4044,6 +4071,43 @@ async def set_dashboard_theme(body: ThemeSetBody):
 # Dashboard plugin system
 # ---------------------------------------------------------------------------

+def _safe_plugin_api_relpath(api_field: Any, *, dashboard_dir: Path) -> Optional[str]:
+    """Validate the manifest's ``api`` field for the plugin loader.
+
+    The web server later imports this file as a Python module via
+    ``importlib.util.spec_from_file_location`` (arbitrary code
+    execution by design — that's how plugins extend the backend).
+    Pre-#29156 the field was used as-is, which meant:
+
+    * An absolute path swallowed the plugin's dashboard directory
+      entirely — ``Path('safe/dashboard') / '/tmp/evil.py'`` resolves
+      to ``/tmp/evil.py``, so any attacker-controlled manifest could
+      point the import at any Python file on disk (GHSA-5qr3-c538-wm9j).
+    * A ``../..`` traversal could climb out of the plugin into
+      neighbouring directories on the search path.
+
+    Return the original string when the resolved path stays under
+    ``dashboard_dir``; return ``None`` (with a warning logged at the
+    call site) otherwise so the plugin still loads its static JS/CSS
+    but its backend ``api`` is rejected.
+    """
+    if not isinstance(api_field, str) or not api_field.strip():
+        return None
+    candidate = Path(api_field)
+    if candidate.is_absolute():
+        return None
+    try:
+        resolved = (dashboard_dir / candidate).resolve()
+        base = dashboard_dir.resolve()
+    except (OSError, RuntimeError):
+        return None
+    try:
+        resolved.relative_to(base)
+    except ValueError:
+        return None
+    return api_field
+
+
 def _discover_dashboard_plugins() -> list:
    """Scan plugins/*/dashboard/manifest.json for dashboard extensions.

@ -4062,7 +4126,16 @@ def _discover_dashboard_plugins() -> list:
        (bundled_root / "memory", "bundled"),
        (bundled_root, "bundled"),
    ]
-    if os.environ.get("HERMES_ENABLE_PROJECT_PLUGINS"):
+    # GHSA-5qr3-c538-wm9j (#29156): the previous ``os.environ.get(...)``
+    # check treated *any* non-empty string as truthy, so ``=0``, ``=false``,
+    # and ``=no`` — all of which the agent loader and operators correctly
+    # read as "disabled" — silently *enabled* the untrusted project source
+    # in the web server.  Combined with the absolute-path RCE primitive on
+    # the manifest's ``api`` field (now patched below), this turned the
+    # opt-in into a sticky always-on switch.  Use the shared truthy
+    # semantics (``1`` / ``true`` / ``yes`` / ``on``) so the gate matches
+    # ``hermes_cli/plugins.py`` and the documented user contract.
+    if env_var_enabled("HERMES_ENABLE_PROJECT_PLUGINS"):
        search_dirs.append((Path.cwd() / ".hermes" / "plugins", "project"))

    for plugins_root, source in search_dirs:
@ -4101,6 +4174,23 @@ def _discover_dashboard_plugins() -> list:
                slots: List[str] = []
                if isinstance(slots_src, list):
                    slots = [s for s in slots_src if isinstance(s, str) and s]
+                # Validate ``api`` at discovery time so the value cached
+                # on the plugin entry is already safe to feed into the
+                # importer.  An attacker-controlled manifest can name
+                # any absolute path or ``..`` traversal here — the
+                # web server then imports that file as a Python module
+                # (RCE, GHSA-5qr3-c538-wm9j).
+                raw_api = data.get("api")
+                dashboard_dir = child / "dashboard"
+                safe_api = _safe_plugin_api_relpath(raw_api, dashboard_dir=dashboard_dir)
+                if raw_api and safe_api is None:
+                    _log.warning(
+                        "Plugin %s: refusing unsafe api path %r (must be a "
+                        "relative file inside the plugin's dashboard/ "
+                        "directory); backend routes from this plugin will "
+                        "not be mounted",
+                        name, raw_api,
+                    )
                plugins.append({
                    "name": name,
                    "label": data.get("label", name),
@ -4111,10 +4201,10 @@ def _discover_dashboard_plugins() -> list:
                    "slots": slots,
                    "entry": data.get("entry", "dist/index.js"),
                    "css": data.get("css"),
-                    "has_api": bool(data.get("api")),
+                    "has_api": bool(safe_api),
                    "source": source,
-                    "_dir": str(child / "dashboard"),
-                    "_api_file": data.get("api"),
+                    "_dir": str(dashboard_dir),
+                    "_api_file": safe_api,
                })
            except Exception as exc:
                _log.warning("Bad dashboard plugin manifest %s: %s", manifest_file, exc)
@ -4317,12 +4407,13 @@ async def post_agent_plugin_install(request: Request, body: _AgentPluginInstallB

 def _validate_plugin_name(name: str) -> str:
    """Reject path-traversal attempts in plugin name URL parameters."""
-    if not name or "/" in name or "\\" in name or ".." in name:
+    name = name.strip("/")
+    if not name or ".." in name or "\\" in name:
        raise HTTPException(status_code=400, detail="Invalid plugin name.")
    return name


-@app.post("/api/dashboard/agent-plugins/{name}/enable")
+@app.post("/api/dashboard/agent-plugins/{name:path}/enable")
 async def post_agent_plugin_enable(request: Request, name: str):
    _require_token(request)
    name = _validate_plugin_name(name)
@ -4334,7 +4425,7 @@ async def post_agent_plugin_enable(request: Request, name: str):
    return result


-@app.post("/api/dashboard/agent-plugins/{name}/disable")
+@app.post("/api/dashboard/agent-plugins/{name:path}/disable")
 async def post_agent_plugin_disable(request: Request, name: str):
    _require_token(request)
    name = _validate_plugin_name(name)
@ -4346,7 +4437,7 @@ async def post_agent_plugin_disable(request: Request, name: str):
    return result


-@app.post("/api/dashboard/agent-plugins/{name}/update")
+@app.post("/api/dashboard/agent-plugins/{name:path}/update")
 async def post_agent_plugin_update(request: Request, name: str):
    _require_token(request)
    name = _validate_plugin_name(name)
@ -4359,7 +4450,7 @@ async def post_agent_plugin_update(request: Request, name: str):
    return result


-@app.delete("/api/dashboard/agent-plugins/{name}")
+@app.delete("/api/dashboard/agent-plugins/{name:path}")
 async def delete_agent_plugin(request: Request, name: str):
    _require_token(request)
    name = _validate_plugin_name(name)
@ -4397,7 +4488,7 @@ class _PluginVisibilityBody(BaseModel):
    hidden: bool


-@app.post("/api/dashboard/plugins/{name}/visibility")
+@app.post("/api/dashboard/plugins/{name:path}/visibility")
 async def post_plugin_visibility(request: Request, name: str, body: _PluginVisibilityBody):
    """Toggle a plugin's sidebar visibility (persists to config.yaml dashboard.hidden_plugins)."""
    _require_token(request)
@ -4468,12 +4559,42 @@ def _mount_plugin_api_routes():
    Each plugin's ``api`` field points to a Python file that must expose
    a ``router`` (FastAPI APIRouter).  Routes are mounted under
    ``/api/plugins/<name>/``.
+
+    Backend import is restricted to ``bundled`` and ``user`` sources.
+    Project plugins (``./.hermes/plugins/``) ship with the CWD and are
+    therefore attacker-controlled in any threat model where the user
+    opens a malicious repo; they can extend the dashboard UI via
+    static JS/CSS but their Python ``api`` file is never auto-imported
+    by the web server.  See GHSA-5qr3-c538-wm9j (#29156).
    """
    for plugin in _get_dashboard_plugins():
        api_file_name = plugin.get("_api_file")
        if not api_file_name:
            continue
-        api_path = Path(plugin["_dir"]) / api_file_name
+        if plugin.get("source") == "project":
+            _log.warning(
+                "Plugin %s: ignoring backend api=%s (project plugins may "
+                "not auto-import Python code; move the plugin to "
+                "~/.hermes/plugins/ if you trust it)",
+                plugin["name"], api_file_name,
+            )
+            continue
+        dashboard_dir = Path(plugin["_dir"])
+        api_path = dashboard_dir / api_file_name
+        try:
+            resolved_api = api_path.resolve()
+            resolved_base = dashboard_dir.resolve()
+            resolved_api.relative_to(resolved_base)
+        except (OSError, RuntimeError, ValueError):
+            # Discovery already filters this, but re-check here in case
+            # ``_dir`` was tampered with after caching or a future caller
+            # bypasses the validator.  Defence in depth keeps the import
+            # primitive contained even if the upstream check regresses.
+            _log.warning(
+                "Plugin %s: refusing to import api file outside its "
+                "dashboard directory (%s)", plugin["name"], api_path,
+            )
+            continue
        if not api_path.exists():
            _log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name)
            continue
--- a/hermes_cli/webhook.py
+++ b/hermes_cli/webhook.py
@ -11,8 +11,10 @@ hot-reloaded by the webhook adapter without a gateway restart.
 """

 import json
+import os
 import re
 import secrets
+import tempfile
 import time
 from pathlib import Path
 from typing import Dict
@ -23,6 +25,7 @@ from hermes_cli.config import cfg_get


 _SUBSCRIPTIONS_FILENAME = "webhook_subscriptions.json"
+_SUBSCRIPTIONS_FILE_MODE = 0o600


 def _hermes_home() -> Path:
@ -48,12 +51,33 @@ def _load_subscriptions() -> Dict[str, dict]:
 def _save_subscriptions(subs: Dict[str, dict]) -> None:
    path = _subscriptions_path()
    path.parent.mkdir(parents=True, exist_ok=True)
-    tmp_path = path.with_suffix(".tmp")
-    tmp_path.write_text(
-        json.dumps(subs, indent=2, ensure_ascii=False),
-        encoding="utf-8",
+    # webhook_subscriptions.json contains per-route HMAC secrets — write
+    # via tempfile + chmod 0o600 before the atomic rename so a permissive
+    # umask cannot leave the secrets readable to other local users in the
+    # window between create and rename.
+    fd, tmp_name = tempfile.mkstemp(
+        prefix=f".{path.name}.",
+        suffix=".tmp",
+        dir=path.parent,
+        text=True,
    )
-    atomic_replace(tmp_path, path)
+    tmp_path = Path(tmp_name)
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as fh:
+            json.dump(subs, fh, indent=2, ensure_ascii=False)
+            fh.flush()
+            os.fsync(fh.fileno())
+        os.chmod(tmp_path, _SUBSCRIPTIONS_FILE_MODE)
+        atomic_replace(tmp_path, path)
+        # Re-assert after rename in case the destination existed with a
+        # broader mode and atomic_replace preserved it.
+        os.chmod(path, _SUBSCRIPTIONS_FILE_MODE)
+    except Exception:
+        try:
+            tmp_path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        raise


 def _get_webhook_config() -> dict:
--- a/hermes_state.py
+++ b/hermes_state.py
@ -33,7 +33,7 @@ T = TypeVar("T")

 DEFAULT_DB_PATH = get_hermes_home() / "state.db"

-SCHEMA_VERSION = 12
+SCHEMA_VERSION = 13

 # ---------------------------------------------------------------------------
 # WAL-compatibility fallback
@ -237,7 +237,8 @@ CREATE TABLE IF NOT EXISTS messages (
    reasoning_details TEXT,
    codex_reasoning_items TEXT,
    codex_message_items TEXT,
-    platform_message_id TEXT
+    platform_message_id TEXT,
+    observed INTEGER DEFAULT 0
 );

 CREATE TABLE IF NOT EXISTS state_meta (
@ -1460,6 +1461,7 @@ class SessionDB:
        codex_reasoning_items: Any = None,
        codex_message_items: Any = None,
        platform_message_id: str = None,
+        observed: bool = False,
    ) -> int:
        """
        Append a message to a session. Returns the message row ID.
@ -1501,8 +1503,8 @@ class SessionDB:
                """INSERT INTO messages (session_id, role, content, tool_call_id,
                   tool_calls, tool_name, timestamp, token_count, finish_reason,
                   reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
-                   codex_message_items, platform_message_id)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                   codex_message_items, platform_message_id, observed)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    session_id,
                    role,
@ -1519,6 +1521,7 @@ class SessionDB:
                    codex_items_json,
                    codex_message_items_json,
                    platform_message_id,
+                    1 if observed else 0,
                ),
            )
            msg_id = cursor.lastrowid
@ -1590,8 +1593,8 @@ class SessionDB:
                    """INSERT INTO messages (session_id, role, content, tool_call_id,
                       tool_calls, tool_name, timestamp, token_count, finish_reason,
                       reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
-                       codex_message_items, platform_message_id)
-                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                       codex_message_items, platform_message_id, observed)
+                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                    (
                        session_id,
                        role,
@ -1608,6 +1611,7 @@ class SessionDB:
                        codex_items_json,
                        codex_message_items_json,
                        platform_msg_id,
+                        1 if msg.get("observed") else 0,
                    ),
                )
                total_messages += 1
@ -1925,7 +1929,7 @@ class SessionDB:
            rows = self._conn.execute(
                "SELECT role, content, tool_call_id, tool_calls, tool_name, "
                "finish_reason, reasoning, reasoning_content, reasoning_details, "
-                "codex_reasoning_items, codex_message_items, platform_message_id "
+                "codex_reasoning_items, codex_message_items, platform_message_id, observed "
                f"FROM messages WHERE session_id IN ({placeholders}) ORDER BY id",
                tuple(session_ids),
            ).fetchall()
@ -1953,6 +1957,8 @@ class SessionDB:
            # for backward compatibility with the JSONL transcript shape.
            if row["platform_message_id"]:
                msg["message_id"] = row["platform_message_id"]
+            if row["observed"]:
+                msg["observed"] = True
            # Restore reasoning fields on assistant messages so providers
            # that replay reasoning (OpenRouter, OpenAI, Nous) receive
            # coherent multi-turn reasoning context.
--- a/infographic/bitwarden-secrets-manager/infographic.png
+++ b/infographic/bitwarden-secrets-manager/infographic.png
--- a/infographic/bitwarden-secrets-manager/prompts/infographic.md
+++ b/infographic/bitwarden-secrets-manager/prompts/infographic.md
@ -1,121 +0,0 @@
-Create a professional infographic following these specifications:
-
-## Image Specifications
-
- **Type**: Infographic
- **Layout**: bento-grid
- **Style**: retro-pop-grid
- **Aspect Ratio**: 1:1 (square)
- **Language**: en
-
-## Core Principles
-
- Follow the layout structure precisely for information architecture
- Apply style aesthetics consistently throughout
- Keep information concise, highlight keywords and core concepts
- Use ample whitespace for visual clarity
- Maintain clear visual hierarchy
-
-## Text Requirements
-
- All text must match the specified style treatment
- Main titles should be prominent and readable
- Key concepts should be visually emphasized
- Labels should be clear and appropriately sized
- Use English for all text content
-
-## Layout Guidelines (bento-grid)
-
- Grid of rectangular cells with varied sizes (1x1, 2x1, 1x2, 2x2)
- Hero cell ("ONE TOKEN, EVERY KEY") takes the largest position (top-center or upper-left, 2x2)
- Supporting cells around the hero, mixed cell sizes for rhythm
- Each cell self-contained with its own title + icon + brief content
- Title strip at the top: "BITWARDEN SECRETS MANAGER — HERMES-AGENT PR #30035"
- Footer strip at the bottom with commit SHA + repo
-
-## Style Guidelines (retro-pop-grid)
-
- 1970s retro pop art with strict Swiss international grid
- Background: warm vintage cream/beige (#F5F0E6)
- Accents: salmon pink, sky blue, mustard yellow, mint green — all muted retro tones
- Pure solid black (#000000) and solid white (#FFFFFF) for extreme-contrast cells
- Uniform thick black outlines on ALL illustrations, text boxes, grid dividers
- Pure 2D flat vector aesthetic with subtle screen-print texture
- One cell inverted to black-background-with-white-text for the "NEVER BLOCKS STARTUP" warning section
- Geometric fill patterns in empty cells: checkerboards, diagonal lines, dot grids
- Flat abstract symbols: shields (security), wrenches (install), arrows (rotation), keyholes (auth), checkmarks (tests)
- Vintage comic-style smiley face for "26/26 PASSING" cell
- Bold brutalist or thick retro display fonts for headers; clean sans-serif body
- Decorative stylistic labels acceptable: "WARNING", "NEW DEFAULT", "PINNED", "VERIFIED", "ROTATE"
-
-## Avoid
-
- 3D rendering, gradients, soft shadows, sketch-like lines
- Free-floating elements — everything anchored in grid cells
- Pure white background — must use warm cream/beige
-
---
-
-Generate the infographic based on the content below:
-
-### Title (top strip)
-BITWARDEN SECRETS MANAGER → HERMES-AGENT
-PR #30035
-
-### HERO CELL (largest, top-center, salmon pink background with thick black border)
-ONE TOKEN, EVERY KEY
-Rotate once in the Bitwarden web app.
-Every Hermes process picks it up on next start.
-NEW DEFAULT: override_existing = true
-
-### Cell — LAZY INSTALL (sky blue background)
-~/.hermes/bin/bws
-bws v2.0.0 PINNED
-SHA-256 VERIFIED
-No apt · no brew · no sudo
-Icon: wrench + downward arrow
-
-### Cell — CLI SURFACE (mustard yellow background, checkerboard accents)
-$ hermes secrets bitwarden
-  setup    wizard
-  status   diagnose
-  sync     fetch
-  install  binary
-  disable  off
-Icon: terminal prompt symbol
-
-### Cell — SOURCE OF TRUTH (mint green background)
-BITWARDEN WINS
-Overwrites stale .env on every start
-Bootstrap token never overwritten (exception)
-Icon: keyhole + arrow
-
-### Cell — INVERTED BLACK CELL with WHITE TEXT — NEVER BLOCKS STARTUP (extreme contrast)
-WARNING-FREE STARTUP
-Missing binary → warn + continue
-Bad token → warn + continue
-Network down → warn + continue
-Checksum mismatch → refuse + warn
-30s timeout ceiling
-Icon: white triangle warning sign
-
-### Cell — TESTS (cream with thick black outline, vintage comic smiley face)
-26 / 26
-HERMETIC
-subprocess + urllib mocked
-linux · macos · windows
-x86_64 · arm64
-Icon: comic-style smiley face with checkmark
-
-### Cell — CONFIG YAML (white background with black grid)
-secrets:
-  bitwarden:
-    enabled: true
-    project_id: ...
-    override_existing: true
-    cache_ttl_seconds: 300
-    auto_install: true
-
-### Footer strip (bottom, black-on-cream)
-PR #30035 · commit 7f9b05668 · NousResearch/hermes-agent
-10 files · +1743 / -1 · agent/secret_sources/ · hermes_cli/secrets_cli.py
--- a/infographic/bitwarden-secrets-manager/structured-content.md
+++ b/infographic/bitwarden-secrets-manager/structured-content.md
@ -1,57 +0,0 @@
-# Hermes-Agent PR #30035 — Bitwarden Secrets Manager Integration
-
-## Hero
-**ONE TOKEN, EVERY KEY**
-Rotate once. Every Hermes process picks it up on next start.
-`secrets.bitwarden.override_existing: true` (default)
-
-## Cells
-
-### Lazy Install
- `bws v2.0.0` pinned
- Downloaded into `~/.hermes/bin/bws`
- SHA-256 verified vs GitHub Releases checksum file
- No apt, no brew, no sudo
- Cross-platform: linux gnu+musl, macos universal, windows x86_64+arm64
-
-### CLI Surface
- `hermes secrets bitwarden setup`     wizard
- `hermes secrets bitwarden status`    diagnose
- `hermes secrets bitwarden sync`      dry-run / --apply
- `hermes secrets bitwarden install`   binary only
- `hermes secrets bitwarden disable`   off switch
-
-### Source of Truth
- Bitwarden WINS on every Hermes start
- BSM values overwrite stale `.env` lines
- Rotate a key once → all your machines reload it
- Bootstrap token `BWS_ACCESS_TOKEN` is the lone exception (never overwritten)
-
-### Never Blocks Startup
- Missing binary → warn + continue
- Bad token → warn + continue
- Checksum mismatch → refuse install + warn
- No network → warn + continue
- Timeout → 30s ceiling, warn + continue
-
-### Tests
- 26/26 passing, hermetic
- subprocess + urllib mocked
- Platform matrix tested (linux, macos, windows × x86_64, arm64)
- Cache hit/miss, auth fail, non-JSON, timeout, override behavior
-
-### Config
-```yaml
-secrets:
-  bitwarden:
-    enabled: true
-    project_id: <uuid>
-    override_existing: true   # NEW DEFAULT
-    cache_ttl_seconds: 300
-    auto_install: true
-```
-
-## Footer
-PR #30035 · commit 7f9b05668 · NousResearch/hermes-agent
-
-10 files changed · +1743 / -1 · agent/secret_sources/ · hermes_cli/secrets_cli.py · tests · docs
--- a/infographic/kanban-db-corruption-defense/infographic.png
+++ b/infographic/kanban-db-corruption-defense/infographic.png
--- a/infographic/skill-scanner-no-ghost-skills/infographic.png
+++ b/infographic/skill-scanner-no-ghost-skills/infographic.png
--- a/locales/af.yaml
+++ b/locales/af.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "Geen benoemde sessies gevind nie.\nGebruik `/title My Sessie` om jou huidige sessie 'n naam te gee, en dan `/resume My Sessie` om later daarheen terug te keer."
    list_header:           "📋 **Benoemde Sessies**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\nGebruik: `/resume <session name>`"
+    list_footer_numbered:  "\nGebruik: `/resume <sessienaam>` of `/resume <nommer>` (bv. `/resume 1` vir die mees onlangse)"
    list_failed:           "Kon nie sessies lys nie: {error}"
+    out_of_range:          "Hervat-indeks {index} is buite bereik.\nGebruik `/resume` sonder argumente om beskikbare sessies te sien."
    not_found:             "Geen sessie gevind wat by '**{name}**' pas nie.\nGebruik `/resume` sonder argumente om beskikbare sessies te sien."
    already_on:            "📌 Reeds op sessie **{name}**."
    switch_failed:         "Kon nie sessie verander nie."
--- a/locales/de.yaml
+++ b/locales/de.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "Keine benannten Sitzungen gefunden.\nVerwenden Sie `/title Meine Sitzung`, um die aktuelle Sitzung zu benennen, dann `/resume Meine Sitzung`, um später dorthin zurückzukehren."
    list_header:           "📋 **Benannte Sitzungen**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\nVerwendung: `/resume <Sitzungsname>`"
+    list_footer_numbered:  "\nVerwendung: `/resume <Sitzungsname>` oder `/resume <Nummer>` (z. B. `/resume 1` für die zuletzt verwendete)"
    list_failed:           "Sitzungen konnten nicht aufgelistet werden: {error}"
+    out_of_range:          "Wiederaufnahme-Index {index} liegt außerhalb des gültigen Bereichs.\nVerwenden Sie `/resume` ohne Argumente, um verfügbare Sitzungen anzuzeigen."
    not_found:             "Keine Sitzung passend zu '**{name}**' gefunden.\nVerwenden Sie `/resume` ohne Argumente, um verfügbare Sitzungen zu sehen."
    already_on:            "📌 Bereits in Sitzung **{name}**."
    switch_failed:         "Sitzungswechsel fehlgeschlagen."
--- a/locales/en.yaml
+++ b/locales/en.yaml
@ -237,9 +237,12 @@ gateway:
    no_named_sessions:     "No named sessions found.\nUse `/title My Session` to name your current session, then `/resume My Session` to return to it later."
    list_header:           "📋 **Named Sessions**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\nUsage: `/resume <session name>`"
+    list_footer_numbered:  "\nUsage: `/resume <session name>` or `/resume <number>` (e.g. `/resume 1` for the most recent)"
    list_failed:           "Could not list sessions: {error}"
+    out_of_range:          "Resume index {index} is out of range.\nUse `/resume` with no arguments to see available sessions."
    not_found:             "No session found matching '**{name}**'.\nUse `/resume` with no arguments to see available sessions."
    already_on:            "📌 Already on session **{name}**."
    switch_failed:         "Failed to switch session."
--- a/locales/es.yaml
+++ b/locales/es.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "No se encontraron sesiones con nombre.\nUsa `/title Mi sesión` para nombrar la sesión actual y luego `/resume Mi sesión` para volver a ella."
    list_header:           "📋 **Sesiones con nombre**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\nUso: `/resume <nombre de sesión>`"
+    list_footer_numbered:  "\nUso: `/resume <nombre de sesión>` o `/resume <número>` (p. ej. `/resume 1` para la más reciente)"
    list_failed:           "No se pudieron listar las sesiones: {error}"
+    out_of_range:          "El índice de reanudación {index} está fuera de rango.\nUsa `/resume` sin argumentos para ver las sesiones disponibles."
    not_found:             "No se encontró ninguna sesión que coincida con '**{name}**'.\nUsa `/resume` sin argumentos para ver las sesiones disponibles."
    already_on:            "📌 Ya estás en la sesión **{name}**."
    switch_failed:         "No se pudo cambiar de sesión."
--- a/locales/fr.yaml
+++ b/locales/fr.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "Aucune session nommée trouvée.\nUtilisez `/title Ma session` pour nommer la session actuelle, puis `/resume Ma session` pour y revenir plus tard."
    list_header:           "📋 **Sessions nommées**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\nUsage : `/resume <nom de session>`"
+    list_footer_numbered:  "\nUtilisation : `/resume <nom de session>` ou `/resume <numéro>` (par exemple `/resume 1` pour la plus récente)"
    list_failed:           "Impossible de lister les sessions : {error}"
+    out_of_range:          "L'index de reprise {index} est hors limites.\nUtilisez `/resume` sans arguments pour voir les sessions disponibles."
    not_found:             "Aucune session correspondant à '**{name}**' trouvée.\nUtilisez `/resume` sans argument pour voir les sessions disponibles."
    already_on:            "📌 Déjà sur la session **{name}**."
    switch_failed:         "Échec du changement de session."
--- a/locales/ga.yaml
+++ b/locales/ga.yaml
@ -226,9 +226,12 @@ gateway:
    no_named_sessions:     "Níor aimsíodh aon seisiún ainmnithe.\nÚsáid `/title M'Ainm Seisiúin` chun do sheisiún reatha a ainmniú, ansin `/resume M'Ainm Seisiúin` chun filleadh air níos déanaí."
    list_header:           "📋 **Seisiúin Ainmnithe**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\nÚsáid: `/resume <session name>`"
+    list_footer_numbered:  "\nÚsáid: `/resume <ainm seisiúin>` nó `/resume <uimhir>` (m.sh. `/resume 1` don cheann is déanaí)"
    list_failed:           "Níorbh fhéidir seisiúin a liostáil: {error}"
+    out_of_range:          "Tá an t-innéacs atosaithe {index} as raon.\nÚsáid `/resume` gan argóintí chun na seisiúin atá ar fáil a fheiceáil."
    not_found:             "Níor aimsíodh aon seisiún ag teacht le '**{name}**'.\nÚsáid `/resume` gan argóintí chun seisiúin atá ar fáil a fheiceáil."
    already_on:            "📌 Cheana ar an seisiún **{name}**."
    switch_failed:         "Theip ar athrú seisiúin."
--- a/locales/hu.yaml
+++ b/locales/hu.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "Nem található elnevezett munkamenet.\nHasználd a `/title Saját munkamenet` parancsot a jelenlegi munkamenet elnevezéséhez, majd a `/resume Saját munkamenet` paranccsal térhetsz vissza hozzá."
    list_header:           "📋 **Elnevezett munkamenetek**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\nHasználat: `/resume <munkamenet neve>`"
+    list_footer_numbered:  "\nHasználat: `/resume <munkamenet neve>` vagy `/resume <szám>` (pl. `/resume 1` a legutóbbihoz)"
    list_failed:           "Nem sikerült listázni a munkameneteket: {error}"
+    out_of_range:          "A folytatási index ({index}) tartományon kívül esik.\nA `/resume` argumentumok nélküli használata megjeleníti az elérhető munkameneteket."
    not_found:             "Nem található '**{name}**' nevű munkamenet.\nArgumentumok nélkül használd a `/resume` parancsot az elérhető munkamenetek megtekintéséhez."
    already_on:            "📌 Már a **{name}** munkamenetben vagy."
    switch_failed:         "Nem sikerült munkamenetet váltani."
--- a/locales/it.yaml
+++ b/locales/it.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "Nessuna sessione con nome trovata.\nUsa `/title My Session` per dare un nome alla sessione attuale, poi `/resume My Session` per tornare a essa in seguito."
    list_header:           "📋 **Sessioni con nome**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\nUso: `/resume <session name>`"
+    list_footer_numbered:  "\nUso: `/resume <nome sessione>` o `/resume <numero>` (es. `/resume 1` per la più recente)"
    list_failed:           "Impossibile elencare le sessioni: {error}"
+    out_of_range:          "L'indice di ripresa {index} è fuori intervallo.\nUsa `/resume` senza argomenti per vedere le sessioni disponibili."
    not_found:             "Nessuna sessione trovata corrispondente a '**{name}**'.\nUsa `/resume` senza argomenti per vedere le sessioni disponibili."
    already_on:            "📌 Già nella sessione **{name}**."
    switch_failed:         "Cambio di sessione non riuscito."
--- a/locales/ja.yaml
+++ b/locales/ja.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "名前付きセッションが見つかりません。\n`/title セッション名` で現在のセッションに名前を付けると、後で `/resume セッション名` で戻れます。"
    list_header:           "📋 **名前付きセッション**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\n使い方: `/resume <セッション名>`"
+    list_footer_numbered:  "\n使い方: `/resume <セッション名>` または `/resume <番号>`（例: 最新のセッションには `/resume 1`）"
    list_failed:           "セッションを一覧表示できませんでした: {error}"
+    out_of_range:          "再開インデックス {index} は範囲外です。\n引数なしで `/resume` を実行すると、利用可能なセッションが表示されます。"
    not_found:             "'**{name}**' に一致するセッションが見つかりません。\n引数なしで `/resume` を実行すると利用可能なセッションを表示します。"
    already_on:            "📌 既にセッション **{name}** にいます。"
    switch_failed:         "セッションの切り替えに失敗しました。"
--- a/locales/ko.yaml
+++ b/locales/ko.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "이름이 지정된 세션이 없습니다.\n현재 세션에 이름을 지정하려면 `/title 내 세션`을 사용하고, 나중에 `/resume 내 세션`으로 돌아오세요."
    list_header:           "📋 **이름이 지정된 세션**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\n사용법: `/resume <session name>`"
+    list_footer_numbered:  "\n사용법: `/resume <세션 이름>` 또는 `/resume <번호>` (예: 가장 최근 세션은 `/resume 1`)"
    list_failed:           "세션 목록을 가져올 수 없습니다: {error}"
+    out_of_range:          "재개 인덱스 {index}이(가) 범위를 벗어났습니다.\n인자 없이 `/resume`을 실행하면 사용 가능한 세션이 표시됩니다."
    not_found:             "'**{name}**'와 일치하는 세션이 없습니다.\n사용 가능한 세션을 보려면 인수 없이 `/resume`을 사용하세요."
    already_on:            "📌 이미 **{name}** 세션에 있습니다."
    switch_failed:         "세션 전환에 실패했습니다."
--- a/locales/pt.yaml
+++ b/locales/pt.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "Não foram encontradas sessões com nome.\nUsa `/title A minha sessão` para nomear a sessão atual e depois `/resume A minha sessão` para voltar a ela."
    list_header:           "📋 **Sessões com nome**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\nUso: `/resume <nome da sessão>`"
+    list_footer_numbered:  "\nUso: `/resume <nome da sessão>` ou `/resume <número>` (ex.: `/resume 1` para a mais recente)"
    list_failed:           "Não foi possível listar as sessões: {error}"
+    out_of_range:          "O índice de retomada {index} está fora do intervalo.\nUse `/resume` sem argumentos para ver as sessões disponíveis."
    not_found:             "Não foi encontrada nenhuma sessão correspondente a '**{name}**'.\nUsa `/resume` sem argumentos para ver as sessões disponíveis."
    already_on:            "📌 Já estás na sessão **{name}**."
    switch_failed:         "Falha ao mudar de sessão."
--- a/locales/ru.yaml
+++ b/locales/ru.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "Именованных сеансов не найдено.\nИспользуйте `/title Мой сеанс`, чтобы назвать текущий сеанс, затем `/resume Мой сеанс`, чтобы вернуться к нему позже."
    list_header:           "📋 **Именованные сеансы**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\nИспользование: `/resume <название сеанса>`"
+    list_footer_numbered:  "\nИспользование: `/resume <имя сеанса>` или `/resume <номер>` (например, `/resume 1` для самого недавнего)"
    list_failed:           "Не удалось получить список сеансов: {error}"
+    out_of_range:          "Индекс возобновления {index} вне диапазона.\nИспользуйте `/resume` без аргументов, чтобы увидеть доступные сеансы."
    not_found:             "Сеанс, соответствующий '**{name}**', не найден.\nИспользуйте `/resume` без аргументов, чтобы увидеть доступные сеансы."
    already_on:            "📌 Уже в сеансе **{name}**."
    switch_failed:         "Не удалось переключить сеанс."
--- a/locales/tr.yaml
+++ b/locales/tr.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "Adlandırılmış oturum bulunamadı.\nMevcut oturumu adlandırmak için `/title Oturumum`, daha sonra geri dönmek için `/resume Oturumum` kullanın."
    list_header:           "📋 **Adlandırılmış Oturumlar**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\nKullanım: `/resume <oturum adı>`"
+    list_footer_numbered:  "\nKullanım: `/resume <oturum adı>` veya `/resume <numara>` (örn. en yenisi için `/resume 1`)"
    list_failed:           "Oturumlar listelenemedi: {error}"
+    out_of_range:          "Devam endeksi {index} aralık dışında.\nKullanılabilir oturumları görmek için `/resume` komutunu argümansız çalıştırın."
    not_found:             "'**{name}**' ile eşleşen oturum bulunamadı.\nKullanılabilir oturumları görmek için argümansız `/resume` kullanın."
    already_on:            "📌 Zaten **{name}** oturumundasınız."
    switch_failed:         "Oturum değiştirilemedi."
--- a/locales/uk.yaml
+++ b/locales/uk.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "Іменованих сеансів не знайдено.\nВикористайте `/title Мій сеанс`, щоб назвати поточний сеанс, потім `/resume Мій сеанс`, щоб повернутися до нього."
    list_header:           "📋 **Іменовані сеанси**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\nВикористання: `/resume <назва сеансу>`"
+    list_footer_numbered:  "\nВикористання: `/resume <назва сесії>` або `/resume <номер>` (наприклад, `/resume 1` для найновішої)"
    list_failed:           "Не вдалося отримати список сеансів: {error}"
+    out_of_range:          "Індекс відновлення {index} поза межами діапазону.\nВикористовуйте `/resume` без аргументів, щоб переглянути доступні сесії."
    not_found:             "Сеанс, що відповідає '**{name}**', не знайдено.\nВикористайте `/resume` без аргументів, щоб побачити доступні сеанси."
    already_on:            "📌 Уже в сеансі **{name}**."
    switch_failed:         "Не вдалося переключити сеанс."
--- a/locales/zh-hant.yaml
+++ b/locales/zh-hant.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "找不到已命名的工作階段。\n使用 `/title 我的工作階段` 為目前工作階段命名，然後使用 `/resume 我的工作階段` 返回。"
    list_header:           "📋 **已命名工作階段**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\n用法：`/resume <工作階段名稱>`"
+    list_footer_numbered:  "\n用法：`/resume <會話名稱>` 或 `/resume <編號>`（例如，`/resume 1` 表示最近的會話）"
    list_failed:           "無法列出工作階段：{error}"
+    out_of_range:          "恢復索引 {index} 超出範圍。\n請使用不帶參數的 `/resume` 查看可用會話。"
    not_found:             "找不到符合 '**{name}**' 的工作階段。\n使用不帶參數的 `/resume` 檢視可用的工作階段。"
    already_on:            "📌 已在工作階段 **{name}** 上。"
    switch_failed:         "切換工作階段失敗。"
--- a/locales/zh.yaml
+++ b/locales/zh.yaml
@ -222,9 +222,12 @@ gateway:
    no_named_sessions:     "未找到已命名的会话。\n使用 `/title 我的会话` 为当前会话命名，然后用 `/resume 我的会话` 返回。"
    list_header:           "📋 **已命名会话**\n"
    list_item:             "• **{title}**{preview_part}"
+    list_item_numbered:    "{index}. **{title}**{preview_part}"
    list_preview_suffix:   " — _{preview}_"
    list_footer:           "\n用法：`/resume <会话名称>`"
+    list_footer_numbered:  "\n用法：`/resume <会话名称>` 或 `/resume <编号>`（例如，`/resume 1` 表示最近的会话）"
    list_failed:           "无法列出会话：{error}"
+    out_of_range:          "恢复索引 {index} 超出范围。\n请使用不带参数的 `/resume` 查看可用会话。"
    not_found:             "未找到匹配 '**{name}**' 的会话。\n使用不带参数的 `/resume` 查看可用会话。"
    already_on:            "📌 已在会话 **{name}** 上。"
    switch_failed:         "切换会话失败。"
--- a/nix/web.nix
+++ b/nix/web.nix
@ -4,7 +4,7 @@ let
  src = ../web;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-xSsyluzU2lNhwGqB6XMCGMv3QFHZizE6hgUyc1jvyOw=";
+    hash = "sha256-6qhGuifHVtCeep1SiQdCUxBMr7UGhYpdMTvXhrQu/zA=";
  };

  npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
--- a/Show more
+++ b/Show more