diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index c5fd9a20aee..4c88772327f 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1680,26 +1680,48 @@ def _read_main_provider() -> str:
 # per turn — no lock needed. Cleared by ``clear_runtime_main()``.
 _RUNTIME_MAIN_PROVIDER: str = ""
 _RUNTIME_MAIN_MODEL: str = ""
+_RUNTIME_MAIN_BASE_URL: str = ""
+_RUNTIME_MAIN_API_KEY: str = ""
+_RUNTIME_MAIN_API_MODE: str = ""
 
 
-def set_runtime_main(provider: str, model: str) -> None:
-    """Record the live runtime provider/model for the current AIAgent.
+def set_runtime_main(
+    provider: str,
+    model: str,
+    *,
+    base_url: str = "",
+    api_key: str = "",
+    api_mode: str = "",
+) -> None:
+    """Record the live runtime provider/model/credentials for the current AIAgent.
 
     Called by ``run_agent.AIAgent._sync_runtime_main_for_aux_routing`` (or
     equivalent setter) at the top of each turn so that
     ``_read_main_provider`` / ``_read_main_model`` reflect CLI/gateway
     overrides instead of the stale config.yaml default.
+
+    For ``custom:`` providers, ``base_url`` and ``api_key`` must also be
+    recorded so that ``_resolve_auto`` can construct a valid client in
+    Step 1 instead of falling through to the aggregator chain.
     """
     global _RUNTIME_MAIN_PROVIDER, _RUNTIME_MAIN_MODEL
+    global _RUNTIME_MAIN_BASE_URL, _RUNTIME_MAIN_API_KEY, _RUNTIME_MAIN_API_MODE
     _RUNTIME_MAIN_PROVIDER = (provider or "").strip().lower()
     _RUNTIME_MAIN_MODEL = (model or "").strip()
+    _RUNTIME_MAIN_BASE_URL = (base_url or "").strip()
+    _RUNTIME_MAIN_API_KEY = api_key.strip() if isinstance(api_key, str) else ""
+    _RUNTIME_MAIN_API_MODE = (api_mode or "").strip()
 
 
 def clear_runtime_main() -> None:
     """Clear the runtime override (e.g. on session end)."""
     global _RUNTIME_MAIN_PROVIDER, _RUNTIME_MAIN_MODEL
+    global _RUNTIME_MAIN_BASE_URL, _RUNTIME_MAIN_API_KEY, _RUNTIME_MAIN_API_MODE
     _RUNTIME_MAIN_PROVIDER = ""
     _RUNTIME_MAIN_MODEL = ""
+    _RUNTIME_MAIN_BASE_URL = ""
+    _RUNTIME_MAIN_API_KEY = ""
+    _RUNTIME_MAIN_API_MODE = ""
 
 
 def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[str]]:
@@ -2980,6 +3002,18 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
     runtime_api_key = runtime.get("api_key", "")
     runtime_api_mode = str(runtime.get("api_mode") or "")
 
+    # Fall back to process-local globals when main_runtime dict was not
+    # provided or was incomplete.  ``set_runtime_main()`` now records
+    # base_url/api_key/api_mode alongside provider/model, so custom:
+    # providers get the full credential surface in Step 1 of the
+    # auto-detect chain.
+    if not runtime_base_url and _RUNTIME_MAIN_BASE_URL:
+        runtime_base_url = _RUNTIME_MAIN_BASE_URL
+    if not runtime_api_key and _RUNTIME_MAIN_API_KEY:
+        runtime_api_key = _RUNTIME_MAIN_API_KEY
+    if not runtime_api_mode and _RUNTIME_MAIN_API_MODE:
+        runtime_api_mode = _RUNTIME_MAIN_API_MODE
+
     # ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named
     #    provider (not 'custom').  This catches the common "env poisoning"
     #    scenario where a user switches providers via `hermes model` but the
diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py
index 0785347d2c9..cc7427950b2 100644
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@@ -1283,6 +1283,18 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
             agent._copy_reasoning_content_for_api(msg, api_msg)
             for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"):
                 api_msg.pop(internal_field, None)
+            # Strict OpenAI-compatible gateways (Fireworks-backed OpenCode Go,
+            # Mistral, Moonshot/Kimi) reject any message key outside the Chat
+            # Completions schema. The main loop drops these via
+            # ChatCompletionsTransport.convert_messages(), but the summary path
+            # hand-builds messages and calls chat.completions.create() directly,
+            # bypassing the transport — so mirror that sanitization here:
+            # tool_name (SQLite FTS bookkeeping), the codex_* reasoning carriers,
+            # and every Hermes-internal underscore-prefixed scaffolding key.
+            for schema_foreign in ("tool_name", "codex_reasoning_items", "codex_message_items"):
+                api_msg.pop(schema_foreign, None)
+            for internal_key in [k for k in api_msg if isinstance(k, str) and k.startswith("_")]:
+                api_msg.pop(internal_key, None)
             if _needs_sanitize:
                 agent._sanitize_tool_calls_for_strict_api(api_msg)
             api_messages.append(api_msg)
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index cf9c534decd..079c4b0b560 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -40,17 +40,47 @@ SUMMARY_PREFIX = (
     "window — treat it as background reference, NOT as active instructions. "
     "Do NOT answer questions or fulfill requests mentioned in this summary; "
     "they were already addressed. "
-    "Your current task is identified in the '## Active Task' section of the "
-    "summary — resume exactly from there. "
+    "Respond ONLY to the latest user message that appears AFTER this "
+    "summary — that message is the single source of truth for what to do "
+    "right now. "
+    "If the latest user message is consistent with the '## Active Task' "
+    "section, you may use the summary as background. If the latest user "
+    "message contradicts, supersedes, changes topic from, or in any way "
+    "diverges from '## Active Task' / '## In Progress' / '## Pending User "
+    "Asks' / '## Remaining Work', the latest message WINS — discard those "
+    "stale items entirely and do not 'wrap up the old task first'. "
+    "Reverse signals in the latest message (e.g. 'stop', 'undo', 'roll "
+    "back', 'just verify', 'don't do that anymore', 'never mind', a new "
+    "topic) must immediately end any in-flight work described in the "
+    "summary; do not re-surface it in later turns. "
     "IMPORTANT: Your persistent memory (MEMORY.md, USER.md) in the system "
     "prompt is ALWAYS authoritative and active — never ignore or deprioritize "
     "memory content due to this compaction note. "
-    "Respond ONLY to the latest user message "
-    "that appears AFTER this summary. The current session state (files, "
-    "config, etc.) may reflect work described here — avoid repeating it:"
+    "The current session state (files, config, etc.) may reflect work "
+    "described here — avoid repeating it:"
 )
 LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
 
+# Handoff prefixes that shipped in earlier releases. A summary persisted under
+# one of these can be inherited into a resumed lineage (#35344); when it is
+# re-normalized on re-compaction we must strip the OLD prefix too, otherwise the
+# stale directive it carried (e.g. "resume exactly from Active Task") survives
+# embedded in the body and keeps hijacking replies. Keep newest-first; entries
+# are matched literally. Add a frozen copy here whenever SUMMARY_PREFIX changes.
+_HISTORICAL_SUMMARY_PREFIXES = (
+    # Pre-#35344: contained the self-contradicting "resume exactly" directive.
+    "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
+    "into the summary below. This is a handoff from a previous context "
+    "window — treat it as background reference, NOT as active instructions. "
+    "Do NOT answer questions or fulfill requests mentioned in this summary; "
+    "they were already addressed. "
+    "Your current task is identified in the '## Active Task' section of the "
+    "summary — resume exactly from there. "
+    "Respond ONLY to the latest user message "
+    "that appears AFTER this summary. The current session state (files, "
+    "config, etc.) may reflect work described here — avoid repeating it:",
+)
+
 # Minimum tokens for the summary output
 _MIN_SUMMARY_TOKENS = 2000
 # Proportion of compressed content to allocate for summary
@@ -1236,11 +1266,27 @@ Summary generation was unavailable, so this is a best-effort deterministic fallb
 
         # Shared structured template (used by both paths).
         _template_sections = f"""## Active Task
-[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or
-task assignment verbatim — the exact words they used. If multiple tasks
-were requested and only some are done, list only the ones NOT yet completed.
-Continuation should pick up exactly here. Example:
+[THE SINGLE MOST IMPORTANT FIELD. Capture the user's most recent unfulfilled
+input verbatim — the exact words they used. This includes:
+- Explicit task assignments ("refactor the auth module")
+- Questions awaiting an answer ("waarom staat X op Y?", "wat zijn de volgende stappen?")
+- Decisions awaiting input ("optie A of B?")
+- Ongoing discussions where the assistant owes the next substantive reply
+A conversation where the user just asked a question IS an active task — the
+task is "answer that question with full context". Do NOT write "None" merely
+because the user did not issue an imperative command; reserve "None" for the
+rare case where the last exchange was fully resolved and the user said
+something like "thanks, that's all".
+If multiple items are outstanding, list only the ones NOT yet completed.
+Continuation should pick up exactly here. Examples:
 "User asked: 'Now refactor the auth module to use JWT instead of sessions'"
+"User asked: 'Waarom stond provider ineens op openrouter?' — needs investigation + answer"
+"User chose option A; awaiting implementation of step 2"
+If the user's most recent message was a reverse signal (stop, undo, roll
+back, never mind, just verify, change of topic) that supersedes earlier
+work, write the reverse signal verbatim and DO NOT carry forward the
+cancelled task. Example: "User asked: 'Stop the i18n refactor and just
+verify the current diff' — earlier i18n in-flight work is cancelled."
 If no outstanding task exists, write "None."]
 
 ## Goal
@@ -1306,7 +1352,7 @@ PREVIOUS SUMMARY:
 NEW TURNS TO INCORPORATE:
 {content_to_summarize}
 
-Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled request — this is the most important field for task continuity.
+Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled input — this includes any question, decision request, or discussion turn that the assistant has not yet answered. Only write "None" if the last exchange was fully resolved.
 
 {_template_sections}"""
         else:
@@ -1470,9 +1516,16 @@ The user has requested that this compaction PRIORITISE preserving all informatio
 
     @staticmethod
     def _strip_summary_prefix(summary: str) -> str:
-        """Return summary body without the current or legacy handoff prefix."""
+        """Return summary body without the current, legacy, or any historical
+        handoff prefix.
+
+        Historical prefixes must be stripped too: a handoff persisted under an
+        older prefix can be inherited into a resumed lineage (#35344), and if we
+        only re-prepend the current prefix without removing the old one, the
+        stale directive it carried stays embedded in the body.
+        """
         text = (summary or "").strip()
-        for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX):
+        for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX, *_HISTORICAL_SUMMARY_PREFIXES):
             if text.startswith(prefix):
                 return text[len(prefix):].lstrip()
         return text
@@ -1486,7 +1539,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
     @staticmethod
     def _is_context_summary_content(content: Any) -> bool:
         text = _content_text_for_contains(content).lstrip()
-        return text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX)
+        if text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX):
+            return True
+        return any(text.startswith(p) for p in _HISTORICAL_SUMMARY_PREFIXES)
 
     @classmethod
     def _find_latest_context_summary(
diff --git a/agent/context_engine.py b/agent/context_engine.py
index bb426fc189d..79c31fb48e6 100644
--- a/agent/context_engine.py
+++ b/agent/context_engine.py
@@ -115,6 +115,15 @@ class ContextEngine(ABC):
         """
         return False
 
+    def should_defer_preflight_to_real_usage(self, rough_tokens: int) -> bool:
+        """Return True when preflight should trust recent real usage instead.
+
+        Built-in compression uses this to avoid re-compacting from known-noisy
+        rough estimates after a compressed request has already fit. Third-party
+        engines can ignore it safely.
+        """
+        return False
+
     # -- Optional: manual /compress preflight ------------------------------
 
     def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
diff --git a/agent/conversation_compression.py b/agent/conversation_compression.py
index 9a93ba4a496..ba8678cc723 100644
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -575,19 +575,18 @@ def compress_context(
             force=True,
         )
 
-    # Update token estimate after compaction so pressure calculations
-    # use the post-compression count, not the stale pre-compression one.
-    # Use estimate_request_tokens_rough() so tool schemas are included —
-    # with 50+ tools enabled, schemas alone can add 20-30K tokens, and
-    # omitting them delays the next compression cycle far past the
-    # configured threshold (issue #14695).
+    # Keep the post-compression rough estimate for diagnostics, but do not
+    # treat it as provider-reported prompt usage. Schema-heavy rough estimates
+    # can remain above threshold even after the next real API request fits.
     _compressed_est = estimate_request_tokens_rough(
         compressed,
         system_prompt=new_system_prompt or "",
         tools=agent.tools or None,
     )
-    agent.context_compressor.last_prompt_tokens = _compressed_est
+    agent.context_compressor.last_compression_rough_tokens = _compressed_est
+    agent.context_compressor.last_prompt_tokens = -1
     agent.context_compressor.last_completion_tokens = 0
+    agent.context_compressor.awaiting_real_usage_after_compression = True
 
     # Clear the file-read dedup cache.  After compression the original
     # read content is summarised away — if the model re-reads the same
@@ -599,7 +598,7 @@ def compress_context(
         pass
 
     logger.info(
-        "context compression done: session=%s messages=%d->%d tokens=~%s",
+        "context compression done: session=%s messages=%d->%d rough_tokens=~%s awaiting_real_usage=true",
         agent.session_id or "none", _pre_msg_count, len(compressed),
         f"{_compressed_est:,}",
     )
diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py
index a6c975be391..bb6c6229cdb 100644
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -392,6 +392,9 @@ def run_conversation(
         set_runtime_main(
             getattr(agent, "provider", "") or "",
             getattr(agent, "model", "") or "",
+            base_url=getattr(agent, "base_url", "") or "",
+            api_key=getattr(agent, "api_key", "") or "",
+            api_mode=getattr(agent, "api_mode", "") or "",
         )
     except Exception:
         pass
@@ -600,18 +603,50 @@ def run_conversation(
             system_prompt=active_system_prompt or "",
             tools=agent.tools or None,
         )
+        _compressor = agent.context_compressor
+        _defer_preflight = getattr(
+            _compressor,
+            "should_defer_preflight_to_real_usage",
+            lambda _tokens: False,
+        )
+        _preflight_deferred = _defer_preflight(_preflight_tokens)
 
-        if agent.context_compressor.should_compress(_preflight_tokens):
+        if not _preflight_deferred:
+            # Keep the CLI/ACP context display in sync with what preflight
+            # actually measured.  The status bar reads
+            # ``compressor.last_prompt_tokens``, which otherwise only updates
+            # from a *successful* API response.  When the conversation has grown
+            # since the last successful call — or when compression then fails
+            # (e.g. the auxiliary summary model times out) and no fresh usage
+            # arrives — the bar stays stuck at the old, smaller value while
+            # preflight reports a much larger number, looking out of sync.
+            # Seed it with the fresh estimate (only ever revising upward; a real
+            # ``update_from_response`` will correct it after the next API call).
+            # Skipped when deferring — a deferred estimate is known to over-count
+            # vs the last real provider prompt, so trusting it for the display
+            # would re-introduce the very desync we're avoiding.
+            if _preflight_tokens > (_compressor.last_prompt_tokens or 0):
+                _compressor.last_prompt_tokens = _preflight_tokens
+
+        if _preflight_deferred:
+            logger.info(
+                "Skipping preflight compression: rough estimate ~%s >= %s, "
+                "but last real provider prompt was %s after compression",
+                f"{_preflight_tokens:,}",
+                f"{_compressor.threshold_tokens:,}",
+                f"{_compressor.last_real_prompt_tokens:,}",
+            )
+        elif _compressor.should_compress(_preflight_tokens):
             logger.info(
                 "Preflight compression: ~%s tokens >= %s threshold (model %s, ctx %s)",
                 f"{_preflight_tokens:,}",
-                f"{agent.context_compressor.threshold_tokens:,}",
+                f"{_compressor.threshold_tokens:,}",
                 agent.model,
-                f"{agent.context_compressor.context_length:,}",
+                f"{_compressor.context_length:,}",
             )
             agent._emit_status(
                 f"📦 Preflight compression: ~{_preflight_tokens:,} tokens "
-                f">= {agent.context_compressor.threshold_tokens:,} threshold. "
+                f">= {_compressor.threshold_tokens:,} threshold. "
                 "This may take a moment."
             )
             # May need multiple passes for very large sessions with small
@@ -646,8 +681,8 @@ def run_conversation(
                     system_prompt=active_system_prompt or "",
                     tools=agent.tools or None,
                 )
-                if _preflight_tokens < agent.context_compressor.threshold_tokens:
-                    break  # Under threshold
+                if not _compressor.should_compress(_preflight_tokens):
+                    break  # Under threshold or anti-thrash guard stopped it
 
     # Plugin hook: pre_llm_call
     # Fired once per turn before the tool-calling loop.  Plugins can
@@ -1457,7 +1492,8 @@ def run_conversation(
                     
                     if retry_count >= max_retries:
                         # Try fallback before giving up
-                        agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
+                        if agent._has_pending_fallback():
+                            agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
                         if agent._try_activate_fallback():
                             retry_count = 0
                             compression_attempts = 0
@@ -3059,12 +3095,17 @@ def run_conversation(
                 ) and not is_context_length_error
 
                 if is_client_error:
-                    # Try fallback before aborting — a different provider
-                    # may not have the same issue (rate limit, auth, etc.)
-                    if classified.reason == FailoverReason.content_policy_blocked:
-                        agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
-                    else:
-                        agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
+                    # Try fallback before aborting — a different provider may
+                    # not have the same issue (rate limit, auth, etc.). Only
+                    # announce the attempt when a fallback chain actually
+                    # exists; otherwise "trying fallback..." is a lie and the
+                    # session looks like it's recovering when it's about to
+                    # abort silently (#35314, #17446).
+                    if agent._has_pending_fallback():
+                        if classified.reason == FailoverReason.content_policy_blocked:
+                            agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
+                        else:
+                            agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
                     if agent._try_activate_fallback():
                         retry_count = 0
                         compression_attempts = 0
@@ -3207,7 +3248,8 @@ def run_conversation(
                         retry_count = 0
                         continue
                     # Try fallback before giving up entirely
-                    agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
+                    if agent._has_pending_fallback():
+                        agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
                     if agent._try_activate_fallback():
                         retry_count = 0
                         compression_attempts = 0
@@ -3862,6 +3904,11 @@ def run_conversation(
                     # inflate completion_tokens with reasoning,
                     # causing premature compression.  (#12026)
                     _real_tokens = _compressor.last_prompt_tokens
+                elif _compressor.last_prompt_tokens == -1:
+                    # Compression just ran and no API-reported prompt count
+                    # has arrived yet. Avoid treating a schema-heavy rough
+                    # post-compression estimate as real context pressure.
+                    _real_tokens = 0
                 else:
                     # Include tool schemas — with 50+ tools enabled
                     # these add 20-30K tokens the messages-only
@@ -4443,6 +4490,55 @@ def run_conversation(
         except Exception as _ver_err:
             logger.debug("file-mutation verifier footer failed: %s", _ver_err)
 
+    # Turn-completion explainer.
+    # When a turn ends abnormally after substantive work — empty content
+    # after retries, a partial/truncated stream, a still-pending tool
+    # result, or an iteration/budget limit — the user otherwise gets a
+    # blank or fragmentary response box with no consolidated reason why
+    # the agent stopped (#34452).  Surface a single user-visible
+    # explanation derived from ``_turn_exit_reason``, mirroring the
+    # file-mutation verifier footer pattern above.
+    #
+    # Gate carefully so healthy turns stay quiet:
+    #   - ``text_response(...)`` exits never produce an explanation
+    #     (handled inside the formatter), so a terse ``Done.`` is silent.
+    #   - We only ACT when there is no genuinely usable reply this turn:
+    #     an empty response, the "(empty)" terminal sentinel, or a
+    #     suspiciously short partial fragment with no terminating
+    #     punctuation (e.g. "The").  A real short answer keeps its text.
+    if not interrupted:
+        try:
+            if agent._turn_completion_explainer_enabled():
+                _stripped = (final_response or "").strip()
+                _is_empty_terminal = _stripped == "" or _stripped == "(empty)"
+                # A short fragment that is not a normal text_response exit
+                # and lacks sentence-ending punctuation is treated as a
+                # truncated partial (the "The" case from #34452).
+                _is_partial_fragment = (
+                    not _is_empty_terminal
+                    and not str(_turn_exit_reason).startswith("text_response")
+                    and len(_stripped) <= 24
+                    and _stripped[-1:] not in {".", "!", "?", "。", "！", "？", "`", ")"}
+                )
+                if _is_empty_terminal or _is_partial_fragment:
+                    _explanation = agent._format_turn_completion_explanation(
+                        _turn_exit_reason
+                    )
+                    if _explanation:
+                        if _is_empty_terminal:
+                            # Replace the bare "(empty)"/blank sentinel with
+                            # the actionable explanation.
+                            final_response = _explanation
+                        else:
+                            # Keep the partial fragment, append the reason so
+                            # the user sees both what arrived and why it
+                            # stopped.
+                            final_response = (
+                                _stripped + "\n\n" + _explanation
+                            )
+        except Exception as _exp_err:
+            logger.debug("turn-completion explainer failed: %s", _exp_err)
+
     _response_transformed = False
 
     # Plugin hook: transform_llm_output
diff --git a/agent/lsp/cli.py b/agent/lsp/cli.py
index 121cfa5f92c..139baa213f7 100644
--- a/agent/lsp/cli.py
+++ b/agent/lsp/cli.py
@@ -247,18 +247,13 @@ def _cmd_restart() -> int:
 
 
 def _cmd_which(server_id: str) -> int:
-    from agent.lsp.install import INSTALL_RECIPES, hermes_lsp_bin_dir
-    import shutil as _shutil
+    from agent.lsp.install import INSTALL_RECIPES, _existing_binary
 
     recipe = INSTALL_RECIPES.get(server_id)
     bin_name = (recipe or {}).get("bin", server_id)
-    staged = hermes_lsp_bin_dir() / bin_name
-    if staged.exists():
-        sys.stdout.write(str(staged) + "\n")
-        return 0
-    on_path = _shutil.which(bin_name)
-    if on_path:
-        sys.stdout.write(on_path + "\n")
+    resolved = _existing_binary(bin_name)
+    if resolved:
+        sys.stdout.write(resolved + "\n")
         return 0
     sys.stderr.write(f"{server_id}: not installed\n")
     return 1
@@ -292,11 +287,9 @@ def _backend_warnings() -> list:
     suggestion across common platforms.
     """
     import shutil as _shutil
-    from agent.lsp.install import hermes_lsp_bin_dir
+    from agent.lsp.install import _existing_binary
     notes: list = []
-    bash_installed = _shutil.which("bash-language-server") is not None or (
-        (hermes_lsp_bin_dir() / "bash-language-server").exists()
-    )
+    bash_installed = _existing_binary("bash-language-server") is not None
     if bash_installed and _shutil.which("shellcheck") is None:
         notes.append(
             "bash-language-server is installed but shellcheck is missing — "
diff --git a/agent/lsp/client.py b/agent/lsp/client.py
index 06a92ae351b..c135e554c5d 100644
--- a/agent/lsp/client.py
+++ b/agent/lsp/client.py
@@ -44,6 +44,7 @@ from __future__ import annotations
 import asyncio
 import logging
 import os
+import sys
 from pathlib import Path
 from typing import Any, Awaitable, Callable, Dict, List, Optional, Set
 from urllib.parse import quote, unquote
@@ -244,15 +245,27 @@ class LSPClient:
             await self._cleanup_process()
             raise
 
+    @staticmethod
+    def _win_wrap_cmd(cmd: List[str]) -> List[str]:
+        """On Windows, wrap .cmd/.bat shims so CreateProcess can run them."""
+        exe = cmd[0]
+        if exe.lower().endswith((".cmd", ".bat")):
+            return ["cmd.exe", "/c", *cmd]
+        return cmd
+
     async def _spawn(self) -> None:
         env = dict(os.environ)
         if self._env:
             env.update(self._env)
 
+        cmd = self._command
+        if sys.platform == "win32":
+            cmd = self._win_wrap_cmd(cmd)
+
         try:
             self._proc = await asyncio.create_subprocess_exec(
-                self._command[0],
-                *self._command[1:],
+                cmd[0],
+                *cmd[1:],
                 stdin=asyncio.subprocess.PIPE,
                 stdout=asyncio.subprocess.PIPE,
                 stderr=asyncio.subprocess.PIPE,
@@ -261,7 +274,7 @@ class LSPClient:
             )
         except FileNotFoundError as e:
             raise LSPProtocolError(
-                f"LSP server binary not found: {self._command[0]} ({e})"
+                f"LSP server binary not found: {cmd[0]} ({e})"
             ) from e
 
         # Drain stderr at debug level — if we don't, the pipe buffer
diff --git a/agent/lsp/install.py b/agent/lsp/install.py
index d4a80ec195e..9193b0375c0 100644
--- a/agent/lsp/install.py
+++ b/agent/lsp/install.py
@@ -108,6 +108,11 @@ INSTALL_RECIPES: Dict[str, Dict[str, Any]] = {
 _install_locks: Dict[str, threading.Lock] = {}
 _install_results: Dict[str, Optional[str]] = {}
 _install_lock_meta = threading.Lock()
+_WINDOWS_WRAPPER_SUFFIXES = (".cmd", ".exe", ".bat")
+
+
+def _is_windows() -> bool:
+    return os.name == "nt"
 
 
 def hermes_lsp_bin_dir() -> Path:
@@ -120,14 +125,33 @@ def hermes_lsp_bin_dir() -> Path:
     return p
 
 
+def _native_binary_candidates(base: Path) -> list[Path]:
+    """Return platform-native executable candidates for a staged binary."""
+    candidates = [base]
+    if _is_windows():
+        existing = {str(base).lower()}
+        for suffix in _WINDOWS_WRAPPER_SUFFIXES:
+            candidate = Path(str(base) + suffix)
+            key = str(candidate).lower()
+            if key not in existing:
+                candidates.append(candidate)
+                existing.add(key)
+    return candidates
+
+
 def _existing_binary(name: str) -> Optional[str]:
     """Probe the staging dir + PATH for a binary named ``name``."""
-    staged = hermes_lsp_bin_dir() / name
-    if staged.exists() and os.access(staged, os.X_OK):
-        return str(staged)
+    for staged in _native_binary_candidates(hermes_lsp_bin_dir() / name):
+        if staged.exists() and os.access(staged, os.X_OK):
+            return str(staged)
     on_path = shutil.which(name)
     if on_path:
         return on_path
+    if _is_windows():
+        for suffix in _WINDOWS_WRAPPER_SUFFIXES:
+            on_path = shutil.which(f"{name}{suffix}")
+            if on_path:
+                return on_path
     return None
 
 
@@ -250,12 +274,7 @@ def _install_npm(
 
     # Find the bin
     nm_bin = staging / "node_modules" / ".bin" / bin_name
-    if os.name == "nt":
-        # On Windows npm sometimes drops `.cmd` shims
-        candidates = [nm_bin, nm_bin.with_suffix(".cmd")]
-    else:
-        candidates = [nm_bin]
-    for c in candidates:
+    for c in _native_binary_candidates(nm_bin):
         if c.exists():
             # Symlink into our `lsp/bin/` for stable PATH access.
             link = hermes_lsp_bin_dir() / c.name
@@ -301,7 +320,7 @@ def _install_go(pkg: str, bin_name: str) -> Optional[str]:
         logger.warning("[install] go install errored for %s: %s", pkg, e)
         return None
     bin_path = staging / bin_name
-    if os.name == "nt":
+    if _is_windows():
         bin_path = bin_path.with_suffix(".exe")
     if bin_path.exists():
         return str(bin_path)
@@ -337,19 +356,24 @@ def _install_pip(pkg: str, bin_name: str) -> Optional[str]:
     except (subprocess.TimeoutExpired, OSError) as e:
         logger.warning("[install] pip install errored for %s: %s", pkg, e)
         return None
-    # Look for the script
-    bin_path = pip_target / "bin" / bin_name
-    if bin_path.exists():
-        link = hermes_lsp_bin_dir() / bin_name
-        if not link.exists():
-            try:
-                link.symlink_to(bin_path)
-            except (OSError, NotImplementedError):
-                try:
-                    shutil.copy2(bin_path, link)
-                except OSError:
-                    return str(bin_path)
-        return str(link if link.exists() else bin_path)
+    # Look for the console script.  POSIX wheels generally write to bin/,
+    # while native Windows installs use Scripts/.
+    script_dirs = [pip_target / "bin"]
+    if _is_windows():
+        script_dirs.append(pip_target / "Scripts")
+    for script_dir in script_dirs:
+        for bin_path in _native_binary_candidates(script_dir / bin_name):
+            if bin_path.exists():
+                link = hermes_lsp_bin_dir() / bin_path.name
+                if not link.exists():
+                    try:
+                        link.symlink_to(bin_path)
+                    except (OSError, NotImplementedError):
+                        try:
+                            shutil.copy2(bin_path, link)
+                        except OSError:
+                            return str(bin_path)
+                return str(link if link.exists() else bin_path)
     return None
 
 
diff --git a/agent/tool_executor.py b/agent/tool_executor.py
index 358c1a0a8f7..bbbd239dff9 100644
--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@@ -180,28 +180,9 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
         except Exception:
             pass
 
-        # Checkpoint for file-mutating tools
-        if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
-            try:
-                file_path = function_args.get("path", "")
-                if file_path:
-                    work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
-                    agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
-            except Exception:
-                pass
-
-        # Checkpoint before destructive terminal commands
-        if function_name == "terminal" and agent._checkpoint_mgr.enabled:
-            try:
-                cmd = function_args.get("command", "")
-                if _is_destructive_command(cmd):
-                    cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
-                    agent._checkpoint_mgr.ensure_checkpoint(
-                        cwd, f"before terminal: {cmd[:60]}"
-                    )
-            except Exception:
-                pass
-
+        # ── Block evaluation (BEFORE checkpoint preflight) ───────────
+        # We must know whether the tool will execute before touching
+        # checkpoint state (dedup slot, real snapshots).
         block_result = None
         blocked_by_guardrail = False
         if _ts_scope_block is not None:
@@ -224,6 +205,30 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
                     block_result = agent._guardrail_block_result(guardrail_decision)
                     blocked_by_guardrail = True
 
+        # ── Checkpoint preflight (only for tools that will execute) ──
+        if block_result is None:
+            # Checkpoint for file-mutating tools
+            if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
+                try:
+                    file_path = function_args.get("path", "")
+                    if file_path:
+                        work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
+                        agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
+                except Exception:
+                    pass
+
+            # Checkpoint before destructive terminal commands
+            if function_name == "terminal" and agent._checkpoint_mgr.enabled:
+                try:
+                    cmd = function_args.get("command", "")
+                    if _is_destructive_command(cmd):
+                        cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
+                        agent._checkpoint_mgr.ensure_checkpoint(
+                            cwd, f"before terminal: {cmd[:60]}"
+                        )
+                except Exception:
+                    pass
+
         parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail))
 
     # ── Logging / callbacks ──────────────────────────────────────────
@@ -301,33 +306,38 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
         # submit site below (GHSA-qg5c-hvr5-hjgr, #13617).
         start = time.time()
         try:
-            result = agent._invoke_tool(
-                function_name,
-                function_args,
-                effective_task_id,
-                tool_call.id,
-                messages=messages,
-                pre_tool_block_checked=True,
-            )
-        except Exception as tool_error:
-            result = f"Error executing tool '{function_name}': {tool_error}"
-            logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
-        duration = time.time() - start
-        is_error, _ = _detect_tool_failure(function_name, result)
-        if is_error:
-            logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
-        else:
-            logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
-        results[index] = (function_name, function_args, result, duration, is_error, False)
-        # Tear down worker-tid tracking.  Clear any interrupt bit we may
-        # have set so the next task scheduled onto this recycled tid
-        # starts with a clean slate.
-        with agent._tool_worker_threads_lock:
-            agent._tool_worker_threads.discard(_worker_tid)
-        try:
-            _ra()._set_interrupt(False, _worker_tid)
-        except Exception:
-            pass
+            try:
+                result = agent._invoke_tool(
+                    function_name,
+                    function_args,
+                    effective_task_id,
+                    tool_call.id,
+                    messages=messages,
+                    pre_tool_block_checked=True,
+                )
+            except Exception as tool_error:
+                result = f"Error executing tool '{function_name}': {tool_error}"
+                logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
+            duration = time.time() - start
+            is_error, _ = _detect_tool_failure(function_name, result)
+            if is_error:
+                logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
+            else:
+                logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
+            results[index] = (function_name, function_args, result, duration, is_error, False)
+        finally:
+            # Tear down worker-tid tracking.  Clear any interrupt bit we may
+            # have set so the next task scheduled onto this recycled tid
+            # starts with a clean slate.  This MUST be in a finally block
+            # because BaseException subclasses (CancelledError, KeyboardInterrupt)
+            # bypass ``except Exception`` and would otherwise leak the tid
+            # into _interrupted_threads, poisoning the recycled thread.
+            with agent._tool_worker_threads_lock:
+                agent._tool_worker_threads.discard(_worker_tid)
+            try:
+                _ra()._set_interrupt(False, _worker_tid)
+            except Exception:
+                pass
 
     # Start spinner for CLI mode (skip when TUI handles tool progress)
     spinner = None
@@ -753,10 +763,14 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
         elif function_name == "delegate_task":
             tasks_arg = function_args.get("tasks")
             if tasks_arg and isinstance(tasks_arg, list):
-                spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
+                spinner_label = f"🔀 delegating {len(tasks_arg)} tasks · (/agents to monitor)"
             else:
                 goal_preview = (function_args.get("goal") or "")[:30]
-                spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
+                spinner_label = (
+                    f"🔀 {goal_preview} · (/agents to monitor)"
+                    if goal_preview
+                    else "🔀 delegating · (/agents to monitor)"
+                )
             spinner = None
             if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
                 face = random.choice(KawaiiSpinner.get_waiting_faces())
diff --git a/cli.py b/cli.py
index 95d14f7a996..9045e4790bf 100644
--- a/cli.py
+++ b/cli.py
@@ -787,8 +787,10 @@ def AIAgent(*args, **kwargs):
 
 
 def get_tool_definitions(*args, **kwargs):
+    from hermes_cli.mcp_startup import wait_for_mcp_discovery
     from model_tools import get_tool_definitions as _get_tool_definitions
 
+    wait_for_mcp_discovery()
     return _get_tool_definitions(*args, **kwargs)
 
 
@@ -896,9 +898,12 @@ def _prepare_deferred_agent_startup() -> None:
             exc_info=True,
         )
     try:
-        from tools.mcp_tool import discover_mcp_tools
+        from hermes_cli.mcp_startup import start_background_mcp_discovery
 
-        discover_mcp_tools()
+        start_background_mcp_discovery(
+            logger=logger,
+            thread_name="termux-cli-mcp-discovery",
+        )
     except Exception:
         logger.debug(
             "MCP tool discovery failed at deferred CLI startup",
@@ -1537,9 +1542,17 @@ def _query_osc11_background() -> str | None:
     Most modern terminals reply with \x1b]11;rgb:RRRR/GGGG/BBBB\x1b\\
     within a few ms.  We wait up to 100ms total before giving up.
     Returns "#RRGGBB" or None on timeout / non-tty.
+
+    Skipped over SSH: the round-trip routinely exceeds our 100ms budget, so a
+    late reply lands after prompt_toolkit has grabbed the tty — its payload
+    leaks in as typed text and the BEL terminator reads as Ctrl+G (open
+    editor), trapping the user in a stray editor. Remote sessions fall back to
+    COLORFGBG / env hints / the dark default instead.
     """
     if not sys.stdin.isatty() or not sys.stdout.isatty():
         return None
+    if any(os.environ.get(v) for v in ("SSH_CONNECTION", "SSH_CLIENT", "SSH_TTY")):
+        return None
     try:
         import termios
         import tty
@@ -1587,8 +1600,11 @@ def _query_osc11_background() -> str | None:
         r, g, b = norm(m.group(1)), norm(m.group(2)), norm(m.group(3))
         return f"#{r:02X}{g:02X}{b:02X}"
     finally:
+        # TCSAFLUSH discards any unread input as it restores the original
+        # attributes — scrubs a slow/partial OSC 11 reply out of the tty
+        # buffer before prompt_toolkit can read it as keystrokes.
         try:
-            termios.tcsetattr(fd, termios.TCSANOW, old)
+            termios.tcsetattr(fd, termios.TCSAFLUSH, old)
         except Exception:
             pass
 
@@ -4872,6 +4888,10 @@ class HermesCLI:
         if not self._ensure_runtime_credentials():
             return False
 
+        from hermes_cli.mcp_startup import wait_for_mcp_discovery
+
+        wait_for_mcp_discovery()
+
         # Initialize SQLite session store for CLI sessions (if not already done in __init__)
         if self._session_db is None:
             try:
@@ -12928,6 +12948,13 @@ class HermesCLI:
                         if event.app.is_running:
                             event.app.exit()
                     event.app.current_buffer.reset(append_to_history=True)
+                    # Force a repaint: process_command() prints through
+                    # patch_stdout (scrolls output above the prompt) and never
+                    # invalidates the app, so the just-cleared input area can
+                    # keep showing the submitted text until some unrelated
+                    # redraw fires. Every other early-return branch in this
+                    # handler invalidates after reset — match them.
+                    event.app.invalidate()
                     return
 
                 # Handle /steer while the agent is running immediately on the
@@ -12939,6 +12966,13 @@ class HermesCLI:
                 if self._should_handle_steer_command_inline(text, has_images=has_images):
                     self.process_command(text)
                     event.app.current_buffer.reset(append_to_history=True)
+                    # Force a repaint after clearing the buffer.  /steer is
+                    # dispatched mid-run while the agent streams output through
+                    # patch_stdout; process_command() never invalidates the
+                    # app, so without this the submitted "/steer <text>" can
+                    # linger in the input area (looking unsent) and invite an
+                    # accidental re-submit. See issue #34569.
+                    event.app.invalidate()
                     return
 
                 # Snapshot and clear attached images
diff --git a/gateway/config.py b/gateway/config.py
index c8e23f2bf38..59fdfa54ed6 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -474,6 +474,13 @@ class GatewayConfig:
     
     # Delivery settings
     always_log_local: bool = True  # Always save cron outputs to local files
+    # Drop outbound "silence narration" messages (e.g. *(silent)*, 🔇, a bare
+    # ".") pre-send. These are model hallucinations emitted when a persona has
+    # nothing actionable to say; in bot-to-bot channels they mirror back and
+    # forth, burning tokens and crashing models. Substrate-level guard that
+    # survives SOUL.md/prompt drift across providers. Opt out with False for
+    # raw passthrough.
+    filter_silence_narration: bool = True
 
     # STT settings
     stt_enabled: bool = True  # Whether to auto-transcribe inbound voice messages
@@ -582,6 +589,7 @@ class GatewayConfig:
             "quick_commands": self.quick_commands,
             "sessions_dir": str(self.sessions_dir),
             "always_log_local": self.always_log_local,
+            "filter_silence_narration": self.filter_silence_narration,
             "stt_enabled": self.stt_enabled,
             "group_sessions_per_user": self.group_sessions_per_user,
             "thread_sessions_per_user": self.thread_sessions_per_user,
@@ -650,6 +658,9 @@ class GatewayConfig:
             quick_commands=quick_commands,
             sessions_dir=sessions_dir,
             always_log_local=_coerce_bool(data.get("always_log_local"), True),
+            filter_silence_narration=_coerce_bool(
+                data.get("filter_silence_narration"), True
+            ),
             stt_enabled=_coerce_bool(stt_enabled, True),
             group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
             thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
@@ -757,21 +768,32 @@ def load_gateway_config() -> GatewayConfig:
             if "always_log_local" in yaml_cfg:
                 gw_data["always_log_local"] = yaml_cfg["always_log_local"]
 
+            if "filter_silence_narration" in yaml_cfg:
+                gw_data["filter_silence_narration"] = yaml_cfg[
+                    "filter_silence_narration"
+                ]
+
             if "unauthorized_dm_behavior" in yaml_cfg:
                 gw_data["unauthorized_dm_behavior"] = _normalize_unauthorized_dm_behavior(
                     yaml_cfg.get("unauthorized_dm_behavior"),
                     "pair",
                 )
 
-            # Merge platforms section from config.yaml into gw_data so that
-            # nested keys like platforms.webhook.extra.routes are loaded.
-            yaml_platforms = yaml_cfg.get("platforms")
+            # Merge platform config into gw_data so runtime-only settings under
+            # ``gateway.platforms`` are loaded the same way as top-level
+            # ``platforms``. Merge nested first so top-level config keeps
+            # precedence, matching the existing gateway.streaming fallback.
+            gateway_cfg = yaml_cfg.get("gateway")
+            gateway_platforms = gateway_cfg.get("platforms") if isinstance(gateway_cfg, dict) else None
             platforms_data = gw_data.setdefault("platforms", {})
             if not isinstance(platforms_data, dict):
                 platforms_data = {}
                 gw_data["platforms"] = platforms_data
-            if isinstance(yaml_platforms, dict):
-                for plat_name, plat_block in yaml_platforms.items():
+
+            def _merge_platform_map(source_platforms: Any) -> None:
+                if not isinstance(source_platforms, dict):
+                    return
+                for plat_name, plat_block in source_platforms.items():
                     if not isinstance(plat_block, dict):
                         continue
                     existing = platforms_data.get(plat_name, {})
@@ -785,6 +807,10 @@ def load_gateway_config() -> GatewayConfig:
                     if merged_extra:
                         merged["extra"] = merged_extra
                     platforms_data[plat_name] = merged
+
+            _merge_platform_map(gateway_platforms)
+            _merge_platform_map(yaml_cfg.get("platforms"))
+            if platforms_data:
                 gw_data["platforms"] = platforms_data
             # Iterate built-in platforms plus any registered plugin platforms
             # so plugin authors get the same shared-key bridging (#24836).
@@ -890,6 +916,18 @@ def load_gateway_config() -> GatewayConfig:
                     if entry.apply_yaml_config_fn is None:
                         continue
                     platform_cfg = yaml_cfg.get(entry.name)
+                    # Fall back to the platform's block under ``platforms`` /
+                    # ``gateway.platforms`` so adapter hooks still run when the
+                    # user configured the platform only under those nested paths
+                    # (e.g. ``platforms.discord.extra.allow_from``) and not via a
+                    # top-level ``discord:`` block.
+                    if not isinstance(platform_cfg, dict):
+                        for _src in (gateway_platforms, yaml_cfg.get("platforms")):
+                            if isinstance(_src, dict):
+                                _candidate = _src.get(entry.name)
+                                if isinstance(_candidate, dict):
+                                    platform_cfg = _candidate
+                                    break
                     if not isinstance(platform_cfg, dict):
                         continue
                     try:
diff --git a/gateway/delivery.py b/gateway/delivery.py
index a1cbb299384..8afab431c36 100644
--- a/gateway/delivery.py
+++ b/gateway/delivery.py
@@ -9,6 +9,8 @@ Routes messages to the appropriate destination based on:
 """
 
 import logging
+import os
+import re
 from pathlib import Path
 from datetime import datetime
 from dataclasses import dataclass
@@ -21,6 +23,32 @@ logger = logging.getLogger(__name__)
 MAX_PLATFORM_OUTPUT = 4000
 TRUNCATED_VISIBLE = 3800
 
+# Matches strings that are *only* a "silence" narration with optional markdown
+# wrappers. Covers: *(silent)*, _silent_, `silent`, ~silent~, (silent), silent,
+# 🔇, a bare ".", "…", and the whitespace/marker-padded variants seen in the
+# wild. Anchored to start/end so substantive messages that merely *contain* the
+# word "silent" are never matched.
+_SILENCE_NARRATION = re.compile(
+    r'^[\s*_~`]*\(?\s*(silent|silence|no\s+response|no\s+reply)\s*\.?\)?[\s*_~`]*$'
+    r'|^[\s*_~`]*[\U0001F507\.\u2026]+[\s*_~`]*$',
+    re.IGNORECASE,
+)
+
+
+def _is_silence_narration(content: Optional[str]) -> bool:
+    """Return True when ``content`` is *only* a silence-narration token.
+
+    Length-guarded (real messages are longer) and anchored to the whole string
+    so legitimate prose like "The deployment ran silently" or "Silence is
+    golden — here is the plan..." is never flagged.
+    """
+    if not content:
+        return False
+    stripped = content.strip()
+    if not stripped or len(stripped) > 64:  # length guard
+        return False
+    return bool(_SILENCE_NARRATION.match(stripped))
+
 from .config import Platform, GatewayConfig
 from .session import SessionSource
 
@@ -261,6 +289,18 @@ class DeliveryRouter:
         path.write_text(content)
         return path
 
+    def _filter_silence_narration_enabled(self) -> bool:
+        """Whether the outbound silence-narration filter is active.
+
+        ``HERMES_FILTER_SILENCE_NARRATION`` env var overrides config when set;
+        otherwise the ``gateway.filter_silence_narration`` config flag wins
+        (default True).
+        """
+        env = os.getenv("HERMES_FILTER_SILENCE_NARRATION")
+        if env is not None:
+            return env.strip().lower() in ("1", "true", "yes", "on")
+        return bool(getattr(self.config, "filter_silence_narration", True))
+
     async def _deliver_to_platform(
         self,
         target: DeliveryTarget,
@@ -286,6 +326,27 @@ class DeliveryRouter:
                 + f"\n\n... [truncated, full output saved to {saved_path}]"
             )
         
+        # Substrate-level anti-loop guard: drop hallucinated "silence narration"
+        # (*(silent)*, 🔇, a bare ".", etc.) before it ever reaches the adapter.
+        # In bot-to-bot channels these tokens mirror back and forth until a
+        # model crashes with "no content after all retries". Behavioral prompt
+        # rules drift across providers; this single chokepoint covers every
+        # platform adapter regardless of which persona's prompt failed.
+        # Local/file delivery (_deliver_local) is a separate path and is never
+        # filtered — saved silence has no loop risk.
+        if self._filter_silence_narration_enabled() and _is_silence_narration(content):
+            logger.warning(
+                "Dropped silence-narration outbound to %s (chat=%s): %r",
+                target.platform.value,
+                target.chat_id,
+                content[:40],
+            )
+            return {
+                "success": True,
+                "filtered": "silence_narration",
+                "delivered": False,
+            }
+
         send_metadata = dict(metadata or {})
         is_named_telegram_private_topic = False
         named_telegram_private_topic_name: Optional[str] = None
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 6979a869148..e1b677f12a1 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -1191,10 +1191,12 @@ _MEDIA_EXT_ALTERNATION = "|".join(
 # bare-path detector (extract_local_files) downstream rather than silently
 # deleted. Shared by the non-streaming dispatch path and the streaming
 # consumer so both behave identically.
+# Path anchors: ``~/`` (Unix home-relative), ``/`` (Unix absolute),
+# ``X:\\`` or ``X:/`` (Windows drive-letter absolute — #34632).
 MEDIA_TAG_CLEANUP_RE = re.compile(
     r'''[`"']?MEDIA:\s*'''
     r'''(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|'''
-    r'''(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:''' + _MEDIA_EXT_ALTERNATION + r'''))'''
+    r'''(?:~/|/|[A-Za-z]:[/\\])\S+(?:[^\S\n]+\S+)*?\.(?:''' + _MEDIA_EXT_ALTERNATION + r'''))'''
     r'''(?=[\s`"',;:)\]}]|$)[`"']?''',
     re.IGNORECASE,
 )
@@ -2665,9 +2667,10 @@ class BasePlatformAdapter(ABC):
 
         # (?<![/:\w.]) prevents matching inside URLs (e.g. https://…/img.png)
         #             and relative paths (./foo.png)
-        # (?:~/|/)    anchors to absolute or home-relative paths
+        # (?:~/|/)    anchors to absolute or home-relative Unix paths
+        # (?:[A-Za-z]:[/\\]) anchors to Windows drive-letter paths (#34632)
         path_re = re.compile(
-            r'(?<![/:\w.])(?:~/|/)(?:[\w.\-]+/)*[\w.\-]+\.(?:' + ext_part + r')\b',
+            r'(?<![/:\w.])(?:~/|/|[A-Za-z]:[/\\])(?:[\w.\-]+[/\\])*[\w.\-]+\.(?:' + ext_part + r')\b',
             re.IGNORECASE,
         )
 
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index afb5726f330..bc0670c5a51 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -2804,21 +2804,8 @@ class TelegramAdapter(BasePlatformAdapter):
                 return slug
 
         try:
-            # Build provider buttons — 2 per row
-            buttons: list = []
-            for p in providers:
-                count = p.get("total_models", len(p.get("models", [])))
-                label = f"{p['name']} ({count})"
-                if p.get("is_current"):
-                    label = f"✓ {label}"
-                # Compact callback data: mp:<slug>  (max 64 bytes)
-                buttons.append(
-                    InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
-                )
-
-            rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
-            rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
-            keyboard = InlineKeyboardMarkup(rows)
+            # Build provider buttons — folds provider groups (display only).
+            keyboard = self._build_provider_keyboard(providers)
 
             provider_label = get_label(current_provider)
             text = self.format_message(
@@ -2865,6 +2852,56 @@ class TelegramAdapter(BasePlatformAdapter):
 
     _MODEL_PAGE_SIZE = 8
 
+    def _build_provider_keyboard(self, providers: list):
+        """Build the top-level provider keyboard, folding provider groups.
+
+        Provider families (Kimi/Moonshot, MiniMax, xAI Grok, ...) collapse to
+        a single ``mpg:<gid>`` button; tapping it drills into a member
+        sub-keyboard. Single providers (and groups with only one authenticated
+        member) render as direct ``mp:<slug>`` buttons. Grouping mirrors the
+        CLI ``hermes model`` picker via the shared ``group_providers`` fold,
+        so all surfaces stay consistent.
+        """
+        try:
+            from hermes_cli.models import group_providers
+        except Exception:
+            group_providers = None
+
+        by_slug = {p.get("slug"): p for p in providers}
+
+        def _provider_button(p):
+            count = p.get("total_models", len(p.get("models", [])))
+            label = f"{p['name']} ({count})"
+            if p.get("is_current"):
+                label = f"✓ {label}"
+            return InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
+
+        buttons: list = []
+        if group_providers is not None:
+            for row in group_providers([p.get("slug") for p in providers]):
+                if row["kind"] == "group":
+                    members = [by_slug[m] for m in row["members"] if m in by_slug]
+                    count = sum(
+                        m.get("total_models", len(m.get("models", []))) for m in members
+                    )
+                    label = f"{row['label']} ▸ ({count})"
+                    if any(m.get("is_current") for m in members):
+                        label = f"✓ {label}"
+                    buttons.append(
+                        InlineKeyboardButton(label, callback_data=f"mpg:{row['group_id']}")
+                    )
+                else:
+                    p = by_slug.get(row["slug"])
+                    if p is not None:
+                        buttons.append(_provider_button(p))
+        else:
+            for p in providers:
+                buttons.append(_provider_button(p))
+
+        rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
+        rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
+        return InlineKeyboardMarkup(rows)
+
     def _build_model_keyboard(self, models: list, page: int) -> tuple:
         """Build paginated model buttons. Returns (keyboard, page_info_text)."""
         page_size = self._MODEL_PAGE_SIZE
@@ -3043,10 +3080,23 @@ class TelegramAdapter(BasePlatformAdapter):
             # Clean up state
             self._model_picker_state.pop(chat_id, None)
 
-        elif data == "mb":
-            # --- Back to provider list ---
+        elif data.startswith("mpg:"):
+            # --- Provider group selected: show member providers ---
+            group_id = data[4:]
+            try:
+                from hermes_cli.models import PROVIDER_GROUPS
+                _label, member_slugs = PROVIDER_GROUPS.get(group_id, ("", []))
+            except Exception:
+                _label, member_slugs = "", []
+
+            by_slug = {p["slug"]: p for p in state["providers"]}
+            members = [by_slug[m] for m in member_slugs if m in by_slug]
+            if not members:
+                await query.answer(text="Group not found.")
+                return
+
             buttons = []
-            for p in state["providers"]:
+            for p in members:
                 count = p.get("total_models", len(p.get("models", [])))
                 label = f"{p['name']} ({count})"
                 if p.get("is_current"):
@@ -3054,11 +3104,30 @@ class TelegramAdapter(BasePlatformAdapter):
                 buttons.append(
                     InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
                 )
-
             rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
-            rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
+            rows.append([
+                InlineKeyboardButton("◀ Back", callback_data="mb"),
+                InlineKeyboardButton("✗ Cancel", callback_data="mx"),
+            ])
             keyboard = InlineKeyboardMarkup(rows)
 
+            await query.edit_message_text(
+                text=self.format_message(
+                    (
+                        f"⚙ *Model Configuration*\n\n"
+                        f"Provider family: *{_label or group_id}*\n\n"
+                        f"Select a provider:"
+                    )
+                ),
+                parse_mode=ParseMode.MARKDOWN_V2,
+                reply_markup=keyboard,
+            )
+            await query.answer()
+
+        elif data == "mb":
+            # --- Back to provider list (folds groups) ---
+            keyboard = self._build_provider_keyboard(state["providers"])
+
             try:
                 provider_label = get_label(state["current_provider"])
             except Exception:
@@ -3107,7 +3176,7 @@ class TelegramAdapter(BasePlatformAdapter):
         query_user_name = getattr(query.from_user, "first_name", None)
 
         # --- Model picker callbacks ---
-        if data.startswith(("mp:", "mm:", "mb", "mx", "mg:")):
+        if data.startswith(("mp:", "mpg:", "mm:", "mb", "mx", "mg:")):
             chat_id = str(query.message.chat_id) if query.message else None
             if chat_id:
                 await self._handle_model_picker_callback(query, data, chat_id)
diff --git a/gateway/platforms/weixin.py b/gateway/platforms/weixin.py
index 025bf052cce..36bb3dd21c2 100644
--- a/gateway/platforms/weixin.py
+++ b/gateway/platforms/weixin.py
@@ -1180,12 +1180,48 @@ class WeixinAdapter(BasePlatformAdapter):
             default=False,
         )
 
+        # Text debounce batching (mirrors Telegram adapter pattern).
+        # iLink delivers messages individually, so rapid multi-message
+        # bursts (forwarded batches, paste-splits) each trigger a
+        # separate agent invocation.  Default 3s delay / 5s split delay
+        # are tuned for iLink's typical delivery cadence.  Tunable via
+        # config.yaml under
+        # ``gateway.platforms.weixin.extra.text_batch_delay_seconds`` /
+        # ``text_batch_split_delay_seconds``.
+        self._text_batch_delay_seconds = self._coerce_float_extra(
+            "text_batch_delay_seconds", 3.0
+        )
+        self._text_batch_split_delay_seconds = self._coerce_float_extra(
+            "text_batch_split_delay_seconds", 5.0
+        )
+        self._pending_text_batches: Dict[str, MessageEvent] = {}
+        self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
+
         if self._account_id and not self._token:
             persisted = load_weixin_account(hermes_home, self._account_id)
             if persisted:
                 self._token = str(persisted.get("token") or "").strip()
                 self._base_url = str(persisted.get("base_url") or self._base_url).strip().rstrip("/")
 
+    def _coerce_float_extra(self, key: str, default: float) -> float:
+        """Read a float from ``config.extra``, guarding against bad/non-finite values.
+
+        The result is fed directly to ``asyncio.sleep()``, so NaN/Inf and
+        unparseable values fall back to ``default``.
+        """
+        import math
+
+        value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None
+        if value is None:
+            return float(default)
+        try:
+            parsed = float(value)
+        except (TypeError, ValueError):
+            return float(default)
+        if not math.isfinite(parsed) or parsed < 0:
+            return float(default)
+        return parsed
+
     @staticmethod
     def _coerce_list(value: Any) -> List[str]:
         if value is None:
@@ -1247,6 +1283,11 @@ class WeixinAdapter(BasePlatformAdapter):
     async def disconnect(self) -> None:
         _LIVE_ADAPTERS.pop(self._token, None)
         self._running = False
+        for task in self._pending_text_batch_tasks.values():
+            if not task.done():
+                task.cancel()
+        self._pending_text_batches.clear()
+        self._pending_text_batch_tasks.clear()
         if self._poll_task and not self._poll_task.done():
             self._poll_task.cancel()
             try:
@@ -1395,12 +1436,10 @@ class WeixinAdapter(BasePlatformAdapter):
             timestamp=datetime.now(),
         )
         logger.info("[%s] inbound from=%s type=%s media=%d", self.name, _safe_id(sender_id), source.chat_type, len(media_paths))
-        await self.handle_message(event)
-
-    @property
-    def enforces_own_access_policy(self) -> bool:
-        """Weixin gates DM/group access at intake via dm_policy/group_policy."""
-        return True
+        if event.message_type == MessageType.TEXT:
+            self._enqueue_text_event(event)
+        else:
+            await self.handle_message(event)
 
     def _is_dm_allowed(self, sender_id: str) -> bool:
         if self._dm_policy == "disabled":
@@ -1409,6 +1448,76 @@ class WeixinAdapter(BasePlatformAdapter):
             return sender_id in self._allow_from
         return True
 
+    @property
+    def enforces_own_access_policy(self) -> bool:
+        """Weixin gates DM/group access at intake via dm_policy/group_policy."""
+        return True
+
+    # ------------------------------------------------------------------
+    # Text debounce batching
+    # ------------------------------------------------------------------
+
+    _SPLIT_THRESHOLD = 1800  # iLink chunks at ~2048 chars
+
+    def _text_batch_key(self, event: MessageEvent) -> str:
+        """Session-scoped key for text message batching."""
+        from gateway.session import build_session_key
+        return build_session_key(
+            event.source,
+            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
+            thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
+        )
+
+    def _enqueue_text_event(self, event: MessageEvent) -> None:
+        """Buffer a text event and reset the flush timer.
+
+        When users forward multiple messages or send rapid-fire texts
+        via WeChat, each arrives as a separate iLink message. This
+        concatenates them and waits for a short quiet period before
+        dispatching the combined message.
+        """
+        key = self._text_batch_key(event)
+        existing = self._pending_text_batches.get(key)
+        chunk_len = len(event.text or "")
+        if existing is None:
+            event._last_chunk_len = chunk_len  # type: ignore[attr-defined]
+            self._pending_text_batches[key] = event
+        else:
+            if event.text:
+                existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
+            existing._last_chunk_len = chunk_len  # type: ignore[attr-defined]
+            if event.media_urls:
+                existing.media_urls.extend(event.media_urls)
+                existing.media_types.extend(event.media_types)
+
+        prior_task = self._pending_text_batch_tasks.get(key)
+        if prior_task and not prior_task.done():
+            prior_task.cancel()
+        self._pending_text_batch_tasks[key] = asyncio.create_task(
+            self._flush_text_batch(key)
+        )
+
+    async def _flush_text_batch(self, key: str) -> None:
+        """Wait for quiet period then dispatch aggregated text."""
+        current_task = asyncio.current_task()
+        try:
+            pending = self._pending_text_batches.get(key)
+            last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
+            if last_len >= self._SPLIT_THRESHOLD:
+                delay = self._text_batch_split_delay_seconds
+            else:
+                delay = self._text_batch_delay_seconds
+            await asyncio.sleep(delay)
+            if self._pending_text_batch_tasks.get(key) is not current_task:
+                return
+            event = self._pending_text_batches.pop(key, None)
+            if not event:
+                return
+            await self.handle_message(event)
+        finally:
+            if self._pending_text_batch_tasks.get(key) is current_task:
+                self._pending_text_batch_tasks.pop(key, None)
+
     async def _collect_media(self, item: Dict[str, Any], media_paths: List[str], media_types: List[str]) -> None:
         item_type = item.get("type")
         if item_type == ITEM_IMAGE:
diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py
index 0ca3d41fabb..703f774323f 100644
--- a/gateway/platforms/whatsapp.py
+++ b/gateway/platforms/whatsapp.py
@@ -278,6 +278,43 @@ class WhatsAppAdapter(BasePlatformAdapter):
         # notification before the normal "✓ whatsapp disconnected" fires.
         self._shutting_down: bool = False
 
+        # Text debounce batching (mirrors Telegram adapter pattern).
+        # WhatsApp often delivers multiple messages in rapid succession
+        # (e.g. forwarded batches, paste-splits) — without debounce each
+        # message triggers a separate agent invocation, wasting tokens and
+        # flooding the user with reply fragments.  Default 5s delay /
+        # 10s split delay are conservative for WhatsApp's delivery cadence.
+        # Tunable via config.yaml under
+        # ``gateway.platforms.whatsapp.extra.text_batch_delay_seconds`` /
+        # ``text_batch_split_delay_seconds``.
+        self._text_batch_delay_seconds = self._coerce_float_extra(
+            "text_batch_delay_seconds", 5.0
+        )
+        self._text_batch_split_delay_seconds = self._coerce_float_extra(
+            "text_batch_split_delay_seconds", 10.0
+        )
+        self._pending_text_batches: Dict[str, MessageEvent] = {}
+        self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
+
+    def _coerce_float_extra(self, key: str, default: float) -> float:
+        """Read a float from ``config.extra``, guarding against bad/non-finite values.
+
+        The result is fed directly to ``asyncio.sleep()``, so NaN/Inf and
+        unparseable values fall back to ``default``.
+        """
+        import math
+
+        value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None
+        if value is None:
+            return float(default)
+        try:
+            parsed = float(value)
+        except (TypeError, ValueError):
+            return float(default)
+        if not math.isfinite(parsed) or parsed < 0:
+            return float(default)
+        return parsed
+
     def _effective_reply_prefix(self) -> str:
         """Return the prefix the Node bridge will add in self-chat mode."""
         whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat")
@@ -1139,7 +1176,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
                         for msg_data in messages:
                             event = await self._build_message_event(msg_data)
                             if event:
-                                await self.handle_message(event)
+                                if event.message_type == MessageType.TEXT:
+                                    self._enqueue_text_event(event)
+                                else:
+                                    await self.handle_message(event)
             except asyncio.CancelledError:
                 break
             except Exception as e:
@@ -1151,7 +1191,67 @@ class WhatsAppAdapter(BasePlatformAdapter):
                 await asyncio.sleep(5)
             
             await asyncio.sleep(1)  # Poll interval
-    
+
+    # ── Text debounce batching ──────────────────────────────────────
+
+    _SPLIT_THRESHOLD = 6000  # WhatsApp supports ~65K chars; generous threshold
+
+    def _text_batch_key(self, event: MessageEvent) -> str:
+        """Session-scoped key for text message batching."""
+        from gateway.session import build_session_key
+        return build_session_key(
+            event.source,
+            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
+            thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
+        )
+
+    def _enqueue_text_event(self, event: MessageEvent) -> None:
+        """Buffer a text event and reset the flush timer.
+
+        When WhatsApp delivers rapid-fire messages (e.g. forwarded
+        batches), this concatenates them and waits for a short quiet
+        period before dispatching the combined message.
+        """
+        key = self._text_batch_key(event)
+        existing = self._pending_text_batches.get(key)
+        chunk_len = len(event.text or "")
+        if existing is None:
+            event._last_chunk_len = chunk_len  # type: ignore[attr-defined]
+            self._pending_text_batches[key] = event
+        else:
+            if event.text:
+                existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
+            existing._last_chunk_len = chunk_len  # type: ignore[attr-defined]
+            if event.media_urls:
+                existing.media_urls.extend(event.media_urls)
+                existing.media_types.extend(event.media_types)
+
+        prior_task = self._pending_text_batch_tasks.get(key)
+        if prior_task and not prior_task.done():
+            prior_task.cancel()
+        self._pending_text_batch_tasks[key] = asyncio.create_task(
+            self._flush_text_batch(key)
+        )
+
+    async def _flush_text_batch(self, key: str) -> None:
+        """Wait for quiet period then dispatch aggregated text."""
+        current_task = asyncio.current_task()
+        try:
+            pending = self._pending_text_batches.get(key)
+            last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
+            if last_len >= self._SPLIT_THRESHOLD:
+                delay = self._text_batch_split_delay_seconds
+            else:
+                delay = self._text_batch_delay_seconds
+            await asyncio.sleep(delay)
+            event = self._pending_text_batches.pop(key, None)
+            if not event:
+                return
+            await self.handle_message(event)
+        finally:
+            if self._pending_text_batch_tasks.get(key) is current_task:
+                self._pending_text_batch_tasks.pop(key, None)
+
     async def _build_message_event(self, data: Dict[str, Any]) -> Optional[MessageEvent]:
         """Build a MessageEvent from bridge message data, downloading images to cache."""
         try:
diff --git a/gateway/run.py b/gateway/run.py
index 570ccf7e31b..6e0c6cb33f2 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1730,6 +1730,14 @@ class GatewayRunner:
         self._running_agents: Dict[str, Any] = {}
         self._running_agents_ts: Dict[str, float] = {}  # start timestamp per session
         self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
+        # Last successfully-resolved (non-empty) model, keyed by session. Used
+        # as a fallback when a fresh config read transiently returns an empty
+        # model (e.g. an mtime-keyed config-cache miss during a post-interrupt
+        # recovery turn). Without this, the agent is built with model="" and
+        # every API call fails HTTP 400 "No models provided" — the session goes
+        # silent until the user manually re-sends. See #35314. ``"*"`` holds a
+        # process-wide last-known-good for sessions seen for the first time.
+        self._last_resolved_model: Dict[str, str] = {}
         # Overflow buffer for explicit /queue commands.  The adapter-level
         # _pending_messages dict is a single slot per session (designed for
         # "next-turn" follow-ups where repeated sends collapse into one
@@ -2488,6 +2496,32 @@ class GatewayRunner:
             except Exception:
                 pass
 
+        # Final safety net (#35314): if resolution still produced an empty
+        # model — e.g. a transient config-cache miss during a post-interrupt
+        # recovery turn returned an empty user_config — reuse the last model we
+        # successfully resolved for this session (or, failing that, the most
+        # recent one resolved process-wide). Building an agent with model=""
+        # makes every API call fail HTTP 400 "No models provided" and the
+        # session goes silent until the user manually re-sends. ``getattr``
+        # guards against bare test runners built via ``object.__new__``.
+        _last_good = getattr(self, "_last_resolved_model", None)
+        if _last_good is not None:
+            if not model:
+                _recovered = _last_good.get(resolved_session_key or "") or _last_good.get("*")
+                if _recovered:
+                    logger.warning(
+                        "Empty model resolved for session=%s — recovering "
+                        "last-known-good model %s (config read likely returned "
+                        "empty; see #35314)",
+                        resolved_session_key or "", _recovered,
+                    )
+                    model = _recovered
+            elif model:
+                # Cache the good resolution for future recovery turns.
+                if resolved_session_key:
+                    _last_good[resolved_session_key] = model
+                _last_good["*"] = model
+
         return model, runtime_kwargs
 
     def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
@@ -2784,10 +2818,12 @@ class GatewayRunner:
         """Mark a queued platform as paused — keep it in ``_failed_platforms``
         but stop the reconnect watcher from hammering it.
 
-        Used by the circuit breaker after ``_PAUSE_AFTER_FAILURES`` consecutive
-        retryable failures, and by ``/platform pause <name>`` for manual
-        intervention.  Paused platforms are surfaced in ``/platform list``
-        and resumed with ``/platform resume <name>``.
+        Used by ``/platform pause <name>`` for manual operator intervention.
+        Paused platforms are surfaced in ``/platform list`` and resumed with
+        ``/platform resume <name>``.  Note: the reconnect watcher does NOT
+        auto-pause — retryable (network/DNS) failures keep retrying at the
+        backoff cap indefinitely so a transient outage self-heals without
+        manual intervention.
         """
         info = getattr(self, "_failed_platforms", {}).get(platform)
         if info is None:
@@ -5865,15 +5901,17 @@ class GatewayRunner:
         """Background task that periodically retries connecting failed platforms.
 
         Uses exponential backoff: 30s → 60s → 120s → 240s → 300s (cap).
-        Retryable failures keep retrying at the backoff cap indefinitely
-        — but if a platform fails ``_PAUSE_AFTER_FAILURES`` times in a row
-        without ever succeeding, it is *paused*: kept in the retry queue
-        but no longer hammered.  The user surfaces it with ``/platform list``
-        and resumes it with ``/platform resume <name>``.  Non-retryable
-        failures (bad auth, etc.) still drop out of the queue immediately.
+        Retryable failures (network/DNS blips) keep retrying at the backoff
+        cap indefinitely — they self-heal once connectivity returns, so a
+        transient outage never requires manual intervention. Non-retryable
+        failures (bad auth, etc.) drop out of the queue immediately. The
+        circuit breaker (``_pause_failed_platform`` / ``/platform pause``)
+        remains available for manual operator control via ``/platform list``
+        and ``/platform resume <name>``, but is no longer triggered
+        automatically — auto-pausing a recovered platform was the cause of
+        bots silently staying dead after a transient DNS failure.
         """
         _BACKOFF_CAP = 300  # 5 minutes max between retries
-        _PAUSE_AFTER_FAILURES = 10  # circuit-breaker threshold
 
         await asyncio.sleep(10)  # initial delay — let startup finish
         while self._running:
@@ -5968,14 +6006,14 @@ class GatewayRunner:
                             "Reconnect %s failed, next retry in %ds",
                             platform.value, backoff,
                         )
-                        if attempt >= _PAUSE_AFTER_FAILURES:
-                            self._pause_failed_platform(
-                                platform,
-                                reason=(
-                                    adapter.fatal_error_message
-                                    or "failed to reconnect"
-                                ),
-                            )
+                        # Retryable failures (network/DNS blips) keep retrying
+                        # at the backoff cap indefinitely — they self-heal once
+                        # connectivity returns. We do NOT auto-pause them: a
+                        # transient outage must never require manual `/platform
+                        # resume` to recover. Non-retryable failures (bad auth,
+                        # etc.) already drop out of the queue via the
+                        # `not fatal_error_retryable` branch above, so anything
+                        # reaching here is by definition retryable.
                 except Exception as e:
                     self._update_platform_runtime_status(
                         platform.value,
@@ -5990,8 +6028,9 @@ class GatewayRunner:
                         "Reconnect %s error: %s, next retry in %ds",
                         platform.value, e, backoff,
                     )
-                    if attempt >= _PAUSE_AFTER_FAILURES:
-                        self._pause_failed_platform(platform, reason=str(e))
+                    # A raised exception during reconnect (connect timeout, DNS
+                    # resolution failure, etc.) is inherently transient — keep
+                    # retrying at the backoff cap rather than auto-pausing.
 
             # Check every 10 seconds for platforms that need reconnection
             for _ in range(10):
@@ -10531,6 +10570,22 @@ class GatewayRunner:
                             except Exception as exc:
                                 logger.warning("Picker model switch failed for cached agent: %s", exc)
 
+                        # Persist the new model to the session DB so the
+                        # dashboard shows the updated model (#34850).
+                        _sess_db = getattr(_self, "_session_db", None)
+                        if _sess_db is not None:
+                            try:
+                                _sess_entry = _self.session_store.get_or_create_session(
+                                    event.source
+                                )
+                                _sess_db.update_session_model(
+                                    _sess_entry.session_id, result.new_model
+                                )
+                            except Exception as exc:
+                                logger.debug(
+                                    "Failed to persist model switch to DB: %s", exc
+                                )
+
                         # Store model note + session override
                         if not hasattr(_self, "_pending_model_notes"):
                             _self._pending_model_notes = {}
@@ -10668,6 +10723,20 @@ class GatewayRunner:
             except Exception as exc:
                 logger.warning("In-place model switch failed for cached agent: %s", exc)
 
+        # Persist the new model to the session DB so the dashboard
+        # shows the updated model (#34850).
+        _sess_db = getattr(self, "_session_db", None)
+        if _sess_db is not None:
+            try:
+                _sess_entry = self.session_store.get_or_create_session(source)
+                _sess_db.update_session_model(
+                    _sess_entry.session_id, result.new_model
+                )
+            except Exception as exc:
+                logger.debug(
+                    "Failed to persist model switch to DB: %s", exc
+                )
+
         # Store a note to prepend to the next user message so the model
         # knows about the switch (avoids system messages mid-history).
         if not hasattr(self, "_pending_model_notes"):
@@ -15313,8 +15382,52 @@ class GatewayRunner:
         ("compression", "target_ratio"),
         ("compression", "protect_last_n"),
         ("agent", "disabled_toolsets"),
+        ("memory", "provider"),
     )
 
+    _HONCHO_CACHE_BUSTING_KEYS = (
+        "honcho.peer_name",
+        "honcho.ai_peer",
+        "honcho.pin_peer_name",
+        "honcho.runtime_peer_prefix",
+        "honcho.user_peer_aliases",
+    )
+    _HONCHO_CACHE_BUSTING_MEMO: dict[tuple[str, int | None], dict[str, Any]] = {}
+
+    @classmethod
+    def _empty_honcho_cache_busting_config(cls) -> dict[str, Any]:
+        return {key: None for key in cls._HONCHO_CACHE_BUSTING_KEYS}
+
+    @classmethod
+    def _extract_honcho_cache_busting_config(cls) -> dict[str, Any]:
+        """Extract Honcho identity keys, memoized by honcho.json mtime."""
+        try:
+            from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path
+
+            path = resolve_config_path()
+            try:
+                mtime_ns = path.stat().st_mtime_ns
+            except OSError:
+                mtime_ns = None
+            memo_key = (str(path), mtime_ns)
+            cached = cls._HONCHO_CACHE_BUSTING_MEMO.get(memo_key)
+            if cached is not None:
+                return dict(cached)
+
+            hcfg = HonchoClientConfig.from_global_config(config_path=path)
+            aliases = hcfg.user_peer_aliases or {}
+            values = {
+                "honcho.peer_name": hcfg.peer_name,
+                "honcho.ai_peer": hcfg.ai_peer,
+                "honcho.pin_peer_name": bool(hcfg.pin_peer_name),
+                "honcho.runtime_peer_prefix": hcfg.runtime_peer_prefix or "",
+                "honcho.user_peer_aliases": sorted(aliases.items()) if isinstance(aliases, dict) else [],
+            }
+            cls._HONCHO_CACHE_BUSTING_MEMO = {memo_key: values}
+            return dict(values)
+        except Exception:
+            return cls._empty_honcho_cache_busting_config()
+
     @classmethod
     def _extract_cache_busting_config(cls, user_config: dict | None) -> dict:
         """Pull values that must bust the cached agent.
@@ -15345,26 +15458,12 @@ class GatewayRunner:
             out["tools.registry_generation"] = None
 
         # Honcho identity-mapping keys live in honcho.json, not user_config.
-        # HonchoSessionManager freezes the resolved peer_name / ai_peer /
-        # pin / aliases / prefix at construction; without busting here,
-        # mid-flight honcho.json edits go unread until the next unrelated
-        # cache eviction.
-        try:
-            from plugins.memory.honcho.client import HonchoClientConfig
-
-            hcfg = HonchoClientConfig.from_global_config()
-            out["honcho.peer_name"] = hcfg.peer_name
-            out["honcho.ai_peer"] = hcfg.ai_peer
-            out["honcho.pin_peer_name"] = bool(hcfg.pin_peer_name)
-            out["honcho.runtime_peer_prefix"] = hcfg.runtime_peer_prefix or ""
-            aliases = hcfg.user_peer_aliases or {}
-            out["honcho.user_peer_aliases"] = sorted(aliases.items()) if isinstance(aliases, dict) else []
-        except Exception:
-            out["honcho.peer_name"] = None
-            out["honcho.ai_peer"] = None
-            out["honcho.pin_peer_name"] = None
-            out["honcho.runtime_peer_prefix"] = None
-            out["honcho.user_peer_aliases"] = None
+        # Only read that file when Honcho is the active memory provider.
+        provider = cfg_get(cfg, "memory", "provider")
+        if isinstance(provider, str) and provider.lower() == "honcho":
+            out.update(cls._extract_honcho_cache_busting_config())
+        else:
+            out.update(cls._empty_honcho_cache_busting_config())
 
         return out
 
@@ -17203,7 +17302,7 @@ class GatewayRunner:
                     _hc = _hm.get("content", "")
                     if "MEDIA:" in _hc:
                         _TOOL_MEDIA_RE = re.compile(
-                            r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
+                            r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
                             r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
                             r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
                             r'txt|csv|apk|ipa))',
@@ -17529,7 +17628,7 @@ class GatewayRunner:
                         content = msg.get("content", "")
                         if "MEDIA:" in content:
                             _TOOL_MEDIA_RE = re.compile(
-                                r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
+                                r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
                                 r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
                                 r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
                                 r'txt|csv|apk|ipa))',
diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index c91b2f728c2..f25d03d2a87 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -12,14 +12,16 @@ import threading
 import time
 from pathlib import Path
 from hermes_constants import get_hermes_home
-from typing import Dict, List, Optional
+from typing import TYPE_CHECKING, Dict, List, Optional
 
-from rich.console import Console
-from rich.panel import Panel
-from rich.table import Table
-
-from prompt_toolkit import print_formatted_text as _pt_print
-from prompt_toolkit.formatted_text import ANSI as _PT_ANSI
+# rich and prompt_toolkit are imported lazily (inside the functions that use
+# them) rather than at module level.  Importing this module is on the TUI
+# gateway's critical startup path purely to reach the lightweight update-check
+# helpers (``prefetch_update_check``); pulling rich.console + prompt_toolkit
+# eagerly added ~50ms of wasted imports before ``gateway.ready`` could fire.
+# Keep the type-only reference available to checkers without the runtime cost.
+if TYPE_CHECKING:
+    from rich.console import Console
 
 logger = logging.getLogger(__name__)
 
@@ -36,6 +38,8 @@ _RST = "\033[0m"
 
 def cprint(text: str):
     """Print ANSI-colored text through prompt_toolkit's renderer."""
+    from prompt_toolkit import print_formatted_text as _pt_print
+    from prompt_toolkit.formatted_text import ANSI as _PT_ANSI
     _pt_print(_PT_ANSI(text))
 
 
@@ -471,7 +475,7 @@ def _display_toolset_name(toolset_name: str) -> str:
     )
 
 
-def build_welcome_banner(console: Console, model: str, cwd: str,
+def build_welcome_banner(console: "Console", model: str, cwd: str,
                          tools: List[dict] = None,
                          enabled_toolsets: List[str] = None,
                          session_id: str = None,
@@ -490,6 +494,8 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
         context_length: Model's context window size in tokens.
     """
     from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS
+    from rich.panel import Panel
+    from rich.table import Table
     if get_toolset_for_tool is None:
         from model_tools import get_toolset_for_tool
 
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 55b76b58850..6e643338a98 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -286,9 +286,22 @@ def detect_install_method(project_root: Optional[Path] = None) -> str:
     Resolution order:
     1. Stamped ``~/.hermes/.install_method`` file (written by installers)
     2. HERMES_MANAGED env / .managed marker (NixOS, Homebrew)
-    3. Container detection (/.dockerenv, /run/.containerenv, cgroup)
-    4. .git directory presence -> 'git'
-    5. Fallback -> 'pip'
+    3. .git directory presence -> 'git'
+    4. Fallback -> 'pip'
+
+    Note: running inside a container is NOT treated as "docker" on its own.
+    The two supported install paths both self-identify via the
+    ``.install_method`` stamp (caught by step 1), so neither relies on
+    container detection here:
+      - the curl installer (scripts/install.sh, the README/website install
+        command) git-clones the repo and stamps ``git``;
+      - the published ``nousresearch/hermes-agent`` image stamps ``docker``
+        at boot via ``docker/stage2-hook.sh``.
+    An unsupported manual install dropped into a container (no stamp) was
+    wrongly classified as the published image by bare container detection,
+    so ``hermes update`` bailed with "doesn't apply inside the Docker
+    container". Without that fallback such installs fall through to the
+    ``.git``/pip checks and behave like any off-path install. See issue #34397.
     """
     stamp = get_hermes_home() / ".install_method"
     try:
@@ -300,9 +313,6 @@ def detect_install_method(project_root: Optional[Path] = None) -> str:
     managed = get_managed_system()
     if managed:
         return managed.lower().replace(" ", "-")
-    from hermes_constants import is_container
-    if is_container():
-        return "docker"
     if project_root is None:
         project_root = Path(__file__).parent.parent.resolve()
     if (project_root / ".git").is_dir():
@@ -320,6 +330,34 @@ def stamp_install_method(method: str) -> None:
         pass
 
 
+def is_uv_tool_install() -> bool:
+    """Return True when the *running* Hermes lives in a ``uv tool`` layout.
+
+    ``uv tool install hermes-agent`` places the install at
+    ``.../uv/tools/hermes-agent/...`` (default ``~/.local/share/uv/tools``,
+    or ``$UV_TOOL_DIR/...``). Such installs live outside any virtualenv, so
+    ``uv pip install`` fails with ``No virtual environment found`` and the
+    update path must use ``uv tool upgrade`` instead.
+
+    Detection is intentionally restricted to properties of the running
+    interpreter (``sys.prefix`` / ``sys.executable``). We deliberately do
+    NOT consult ``uv tool list``: it would also return True when
+    ``hermes-agent`` happens to be uv-tool-installed on the machine while
+    the *active* Hermes is a regular pip/venv install, causing
+    ``hermes update`` to upgrade the wrong copy. It would also block on a
+    subprocess call (~seconds) just to compute a recommendation string.
+    """
+    def _has_uv_tool_marker(path: str) -> bool:
+        norm = os.path.normpath(path).replace(os.sep, "/").lower()
+        return "/uv/tools/hermes-agent/" in norm + "/"
+
+    if _has_uv_tool_marker(sys.prefix):
+        return True
+    if _has_uv_tool_marker(sys.executable or ""):
+        return True
+    return False
+
+
 def recommended_update_command_for_method(method: str) -> str:
     """Return the update command or guidance for a given install method."""
     if method == "nixos":
@@ -329,9 +367,10 @@ def recommended_update_command_for_method(method: str) -> str:
     if method == "docker":
         return "docker pull nousresearch/hermes-agent:latest"
     if method == "pip":
+        if is_uv_tool_install():
+            return "uv tool upgrade hermes-agent"
         import shutil
-        uv = shutil.which("uv")
-        if uv:
+        if shutil.which("uv"):
             return "uv pip install --upgrade hermes-agent"
         return "pip install --upgrade hermes-agent"
     return "hermes update"
@@ -1184,6 +1223,11 @@ DEFAULT_CONFIG = {
         # Mirrors `hermes -c` muscle memory.  Default off so existing
         # users aren't surprised.  HERMES_TUI_RESUME=<id> always wins.
         "tui_auto_resume_recent": False,
+        # When true (default), `hermes --tui` drops a one-time hint
+        # ("subagents working · /agents to watch live") the first time a turn
+        # starts delegating, nudging the user toward the live spawn-tree
+        # dashboard. Set false to suppress the hint.
+        "tui_agents_nudge": True,
         "bell_on_complete": False,
         "show_reasoning": False,
         "streaming": False,
@@ -1203,6 +1247,13 @@ DEFAULT_CONFIG = {
         # class of over-claim that otherwise forces users to run
         # `git status` to verify edits landed.  Set false to suppress.
         "file_mutation_verifier": True,
+        # Turn-completion explainer.  When true (default), the agent appends a
+        # one-line explanation to its final response whenever a turn ends
+        # abnormally with no usable reply — empty content after retries, a
+        # partial/truncated stream, a still-pending tool result, or an
+        # iteration/budget limit.  Replaces the bare "(empty)" sentinel so the
+        # failure isn't silent from the UI's perspective.  Set false to suppress.
+        "turn_completion_explainer": True,
         "show_cost": False,       # Show $ cost in the status bar (off by default)
         "skin": "default",
         # UI language for static user-facing messages (approval prompts, a
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 3db70beaa72..4971f1faece 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -204,6 +204,60 @@ def _fail_and_issue(text: str, detail: str, fix: str, issues: list[str]) -> None
     issues.append(fix)
 
 
+def _read_pyproject_version() -> str | None:
+    """Read the ``version = "..."`` from ``pyproject.toml`` at the project root.
+
+    Returns None when running from an installed wheel (no pyproject.toml ships
+    with the package) or when the file can't be parsed. Reads only the
+    ``[project]`` version, ignoring any version strings that appear in other
+    tables.
+    """
+    pyproject = PROJECT_ROOT / "pyproject.toml"
+    try:
+        text = pyproject.read_text(encoding="utf-8")
+    except OSError:
+        return None
+    in_project = False
+    for raw in text.splitlines():
+        line = raw.strip()
+        if line.startswith("[") and line.endswith("]"):
+            in_project = line == "[project]"
+            continue
+        if in_project and line.startswith("version") and "=" in line:
+            value = line.split("=", 1)[1]
+            value = value.split("#", 1)[0].strip().strip("\"'")
+            return value or None
+    return None
+
+
+def _check_version_consistency(issues: list[str]) -> None:
+    """Verify pyproject.toml version matches hermes_cli.__version__.
+
+    A git conflict resolution (reset/merge) can revert one file without the
+    other, leaving ``hermes --version`` reporting a stale version while
+    ``pyproject.toml`` is current. Detect that drift so users can re-sync.
+    Silent no-op for installed wheels where pyproject.toml isn't present.
+    """
+    try:
+        from hermes_cli import __version__ as init_version
+    except Exception:
+        return
+    pyproject_version = _read_pyproject_version()
+    if pyproject_version is None:
+        # Installed wheel or unreadable pyproject — nothing to cross-check.
+        return
+    if pyproject_version == init_version:
+        check_ok("Version files consistent", f"({init_version})")
+    else:
+        _fail_and_issue(
+            "Version mismatch between source files",
+            f"(pyproject.toml {pyproject_version} != hermes_cli/__init__.py {init_version})",
+            "Re-sync version files (e.g. run 'hermes update', or set "
+            "hermes_cli/__init__.py __version__ to match pyproject.toml)",
+            issues,
+        )
+
+
 def _check_s6_supervision(issues: list[str]) -> None:
     """Inside a container under our s6 /init, surface what s6 sees.
 
@@ -509,6 +563,10 @@ def run_doctor(args):
         check_ok("Virtual environment active")
     else:
         check_warn("Not in virtual environment", "(recommended)")
+
+    # Detect drift between pyproject.toml and hermes_cli/__init__.py versions
+    # (a git conflict resolution can silently revert one but not the other).
+    _check_version_consistency(issues)
     
     _section("Required Packages")
     required_packages = [
diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py
index 5e465e87a6f..4711655249d 100644
--- a/hermes_cli/kanban_db.py
+++ b/hermes_cli/kanban_db.py
@@ -396,6 +396,41 @@ def workspaces_root(board: Optional[str] = None) -> Path:
     return board_dir(slug) / "workspaces"
 
 
+def attachments_root(board: Optional[str] = None) -> Path:
+    """Return the directory under which task file attachments are stored.
+
+    Mirrors :func:`worker_logs_dir` / :func:`workspaces_root`: anchored
+    per-board so attachments don't leak between projects. Each task gets
+    its own ``<root>/.../attachments/<task_id>/`` subdirectory.
+
+    ``HERMES_KANBAN_ATTACHMENTS_ROOT`` pins the path directly (highest
+    precedence) for tests and unusual deployments.
+
+    ``default`` uses ``<root>/kanban/attachments/``; other boards use
+    ``<root>/kanban/boards/<slug>/attachments/``.
+
+    Workers (which run with full file-tool access) read attached files
+    by the absolute path surfaced in :func:`build_worker_context`. On the
+    local terminal backend — the default for kanban — that path resolves
+    directly. Remote backends (Docker/Modal) need this directory mounted;
+    see the kanban docs.
+    """
+    override = os.environ.get("HERMES_KANBAN_ATTACHMENTS_ROOT", "").strip()
+    if override:
+        return Path(override).expanduser()
+    slug = _normalize_board_slug(board)
+    if slug is None:
+        slug = get_current_board()
+    if slug == DEFAULT_BOARD:
+        return kanban_home() / "kanban" / "attachments"
+    return board_dir(slug) / "attachments"
+
+
+def task_attachments_dir(task_id: str, board: Optional[str] = None) -> Path:
+    """Return the per-task attachment directory ``<root>/<task_id>/``."""
+    return attachments_root(board=board) / task_id
+
+
 def worker_logs_dir(board: Optional[str] = None) -> Path:
     """Return the directory under which per-task worker logs are written.
 
@@ -831,6 +866,20 @@ class Comment:
     created_at: int
 
 
+@dataclass
+class Attachment:
+    """In-memory view of a row from the ``task_attachments`` table."""
+
+    id: int
+    task_id: str
+    filename: str
+    stored_path: str
+    content_type: Optional[str]
+    size: int
+    uploaded_by: Optional[str]
+    created_at: int
+
+
 @dataclass
 class Event:
     id: int
@@ -957,6 +1006,23 @@ CREATE TABLE IF NOT EXISTS task_runs (
     error               TEXT
 );
 
+-- Files attached to a task (PDFs, images, source documents). The blob
+-- lives on disk under ``attachments_root(board)/<task_id>/<stored_name>``;
+-- this row carries metadata + the absolute ``stored_path`` so the
+-- dashboard can list/download and ``build_worker_context`` can surface
+-- the absolute path to the worker (which has full file-tool access). See
+-- #35338.
+CREATE TABLE IF NOT EXISTS task_attachments (
+    id           INTEGER PRIMARY KEY AUTOINCREMENT,
+    task_id      TEXT NOT NULL,
+    filename     TEXT NOT NULL,
+    stored_path  TEXT NOT NULL,
+    content_type TEXT,
+    size         INTEGER NOT NULL DEFAULT 0,
+    uploaded_by  TEXT,
+    created_at   INTEGER NOT NULL
+);
+
 -- Subscription from a gateway source (platform + chat + thread) to a
 -- task. The gateway's kanban-notifier watcher tails task_events and
 -- pushes ``completed`` / ``blocked`` / ``spawn_auto_blocked`` events to
@@ -981,6 +1047,7 @@ CREATE INDEX IF NOT EXISTS idx_comments_task         ON task_comments(task_id, c
 CREATE INDEX IF NOT EXISTS idx_events_task           ON task_events(task_id, created_at);
 CREATE INDEX IF NOT EXISTS idx_runs_task             ON task_runs(task_id, started_at);
 CREATE INDEX IF NOT EXISTS idx_runs_status           ON task_runs(status);
+CREATE INDEX IF NOT EXISTS idx_attachments_task      ON task_attachments(task_id, created_at);
 CREATE INDEX IF NOT EXISTS idx_notify_task           ON kanban_notify_subs(task_id);
 """
 
@@ -1637,6 +1704,140 @@ def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None:
             (new, old),
         )
 
+    _rebuild_drifted_tables(conn)
+
+
+# Legacy DBs defined these tables with a ``TEXT PRIMARY KEY`` id (or, for
+# ``kanban_notify_subs``, a nullable ``TEXT last_event_id``). The current
+# schema uses ``INTEGER PRIMARY KEY AUTOINCREMENT`` / ``INTEGER NOT NULL
+# DEFAULT 0``. ``CREATE TABLE IF NOT EXISTS`` skips existing tables
+# regardless of schema and ``_add_column_if_missing`` only adds columns, so
+# neither can fix a drifted column type — the table must be rebuilt. See
+# #35096.
+#
+# Each entry pairs the canonical CREATE TABLE with the CREATE INDEX
+# statements that DROP TABLE would otherwise take down with it (including
+# ``idx_events_run``, added by the additive pass above). To guard against
+# this list drifting from SCHEMA_SQL, ``test_rebuilt_schema_matches_fresh``
+# asserts a rebuilt legacy DB is byte-identical to a fresh one.
+_REBUILD_SPECS = {
+    "task_events": (
+        "CREATE TABLE task_events ("
+        " id INTEGER PRIMARY KEY AUTOINCREMENT,"
+        " task_id TEXT NOT NULL, run_id INTEGER, kind TEXT NOT NULL,"
+        " payload TEXT, created_at INTEGER NOT NULL)",
+        (
+            "CREATE INDEX idx_events_task ON task_events(task_id, created_at)",
+            "CREATE INDEX idx_events_run ON task_events(run_id, id)",
+        ),
+    ),
+    "task_comments": (
+        "CREATE TABLE task_comments ("
+        " id INTEGER PRIMARY KEY AUTOINCREMENT,"
+        " task_id TEXT NOT NULL, author TEXT NOT NULL, body TEXT NOT NULL,"
+        " created_at INTEGER NOT NULL)",
+        ("CREATE INDEX idx_comments_task ON task_comments(task_id, created_at)",),
+    ),
+    "task_runs": (
+        "CREATE TABLE task_runs ("
+        " id INTEGER PRIMARY KEY AUTOINCREMENT,"
+        " task_id TEXT NOT NULL, profile TEXT, step_key TEXT,"
+        " status TEXT NOT NULL, claim_lock TEXT, claim_expires INTEGER,"
+        " worker_pid INTEGER, max_runtime_seconds INTEGER,"
+        " last_heartbeat_at INTEGER, started_at INTEGER NOT NULL,"
+        " ended_at INTEGER, outcome TEXT, summary TEXT, metadata TEXT,"
+        " error TEXT)",
+        (
+            "CREATE INDEX idx_runs_task ON task_runs(task_id, started_at)",
+            "CREATE INDEX idx_runs_status ON task_runs(status)",
+        ),
+    ),
+    "kanban_notify_subs": (
+        "CREATE TABLE kanban_notify_subs ("
+        " task_id TEXT NOT NULL, platform TEXT NOT NULL, chat_id TEXT NOT NULL,"
+        " thread_id TEXT NOT NULL DEFAULT '', user_id TEXT,"
+        " notifier_profile TEXT, created_at INTEGER NOT NULL,"
+        " last_event_id INTEGER NOT NULL DEFAULT 0,"
+        " PRIMARY KEY (task_id, platform, chat_id, thread_id))",
+        ("CREATE INDEX idx_notify_task ON kanban_notify_subs(task_id)",),
+    ),
+}
+
+
+def _table_has_drifted(conn: sqlite3.Connection, table: str) -> bool:
+    """True when ``table`` still carries the legacy (pre-AUTOINCREMENT) shape."""
+    info = conn.execute(f"PRAGMA table_info({table})").fetchall()
+    if not info:
+        return False  # table absent — nothing to rebuild
+    if table == "kanban_notify_subs":
+        lei = next((c for c in info if c["name"] == "last_event_id"), None)
+        return lei is not None and (lei["type"] or "").upper() != "INTEGER"
+    # task_events / task_comments / task_runs: id must be INTEGER and a PK.
+    id_col = next((c for c in info if c["name"] == "id"), None)
+    if id_col is None:
+        return False
+    return not ((id_col["type"] or "").upper() == "INTEGER" and id_col["pk"])
+
+
+def _rebuild_drifted_tables(conn: sqlite3.Connection) -> None:
+    """Rebuild any kanban table whose column types drifted from SCHEMA_SQL.
+
+    Old boards crash the gateway notifier (``int(None)`` on a NULL id in
+    ``unseen_events_for_sub``) and never match the ``id > cursor`` filter, so
+    every kanban notification is silently lost (#35096). Each affected table is
+    rebuilt with the standard SQLite pattern — CREATE new → INSERT shared
+    columns → DROP old → RENAME — recreating its indexes too (DROP TABLE takes
+    them down). The legacy TEXT ids are dropped (they aren't valid integers);
+    AUTOINCREMENT assigns fresh ones and ``last_event_id`` cursors reset to 0,
+    so the first post-migration tick replays a task's event history once —
+    the safe failure mode for a feature that was already fully broken.
+
+    The whole pass runs in one transaction so an interruption can't leave a
+    table half-renamed, and under ``connect()``'s init locks so nothing races
+    it. Idempotent: a correctly-typed DB skips every table and returns without
+    opening a transaction.
+    """
+    drifted = [t for t in _REBUILD_SPECS if _table_has_drifted(conn, t)]
+    if not drifted:
+        return
+
+    conn.execute("BEGIN IMMEDIATE")
+    try:
+        for table in drifted:
+            create_sql, index_sqls = _REBUILD_SPECS[table]
+            old_cols = [c["name"] for c in conn.execute(f"PRAGMA table_info({table})")]
+            _log.info("kanban migration: rebuilding %s to match current schema", table)
+            conn.execute(f"ALTER TABLE {table} RENAME TO {table}_legacy")
+            conn.execute(create_sql)
+            new_cols = {c["name"] for c in conn.execute(f"PRAGMA table_info({table})")}
+            if table == "kanban_notify_subs":
+                # Cast the legacy TEXT cursor to INTEGER; NULL / non-numeric → 0.
+                shared = [c for c in old_cols if c in new_cols and c != "last_event_id"]
+                cols_csv = ", ".join(shared)
+                conn.execute(
+                    f"INSERT INTO {table} ({cols_csv}, last_event_id) "
+                    f"SELECT {cols_csv}, COALESCE(CAST(last_event_id AS INTEGER), 0) "
+                    f"FROM {table}_legacy"
+                )
+            else:
+                # Drop the legacy TEXT id; AUTOINCREMENT reassigns it.
+                shared = [c for c in old_cols if c in new_cols and c != "id"]
+                cols_csv = ", ".join(shared)
+                conn.execute(
+                    f"INSERT INTO {table} ({cols_csv}) "
+                    f"SELECT {cols_csv} FROM {table}_legacy"
+                )
+            conn.execute(f"DROP TABLE {table}_legacy")
+            for index_sql in index_sqls:
+                conn.execute(index_sql)
+        conn.execute("COMMIT")
+    except Exception:
+        try:
+            conn.execute("ROLLBACK")
+        except sqlite3.OperationalError:
+            pass
+        raise
+
 
 def _check_file_length_invariant(conn: sqlite3.Connection) -> None:
     """Read the SQLite header page_count and compare against actual file size.
@@ -2252,6 +2453,121 @@ def list_comments(conn: sqlite3.Connection, task_id: str) -> list[Comment]:
     ]
 
 
+# ---------------------------------------------------------------------------
+# Attachments
+# ---------------------------------------------------------------------------
+
+def add_attachment(
+    conn: sqlite3.Connection,
+    task_id: str,
+    *,
+    filename: str,
+    stored_path: str,
+    content_type: Optional[str] = None,
+    size: int = 0,
+    uploaded_by: Optional[str] = None,
+) -> int:
+    """Record a file attachment for a task. Returns the new attachment id.
+
+    The caller is responsible for writing the blob to ``stored_path``
+    first (under :func:`task_attachments_dir`); this only persists the
+    metadata row and appends an ``attached`` event.
+    """
+    if not filename or not filename.strip():
+        raise ValueError("attachment filename is required")
+    if not stored_path or not stored_path.strip():
+        raise ValueError("attachment stored_path is required")
+    now = int(time.time())
+    with write_txn(conn):
+        if not conn.execute(
+            "SELECT 1 FROM tasks WHERE id = ?", (task_id,)
+        ).fetchone():
+            raise ValueError(f"unknown task {task_id}")
+        cur = conn.execute(
+            "INSERT INTO task_attachments "
+            "(task_id, filename, stored_path, content_type, size, uploaded_by, created_at) "
+            "VALUES (?, ?, ?, ?, ?, ?, ?)",
+            (
+                task_id,
+                filename.strip(),
+                stored_path,
+                content_type,
+                int(size),
+                uploaded_by,
+                now,
+            ),
+        )
+        _append_event(
+            conn,
+            task_id,
+            "attached",
+            {"filename": filename.strip(), "size": int(size), "by": uploaded_by},
+        )
+        return int(cur.lastrowid or 0)
+
+
+def list_attachments(conn: sqlite3.Connection, task_id: str) -> list[Attachment]:
+    rows = conn.execute(
+        "SELECT * FROM task_attachments WHERE task_id = ? ORDER BY created_at ASC, id ASC",
+        (task_id,),
+    ).fetchall()
+    return [
+        Attachment(
+            id=r["id"],
+            task_id=r["task_id"],
+            filename=r["filename"],
+            stored_path=r["stored_path"],
+            content_type=r["content_type"],
+            size=r["size"] or 0,
+            uploaded_by=r["uploaded_by"],
+            created_at=r["created_at"],
+        )
+        for r in rows
+    ]
+
+
+def get_attachment(conn: sqlite3.Connection, attachment_id: int) -> Optional[Attachment]:
+    r = conn.execute(
+        "SELECT * FROM task_attachments WHERE id = ?", (attachment_id,)
+    ).fetchone()
+    if r is None:
+        return None
+    return Attachment(
+        id=r["id"],
+        task_id=r["task_id"],
+        filename=r["filename"],
+        stored_path=r["stored_path"],
+        content_type=r["content_type"],
+        size=r["size"] or 0,
+        uploaded_by=r["uploaded_by"],
+        created_at=r["created_at"],
+    )
+
+
+def delete_attachment(conn: sqlite3.Connection, attachment_id: int) -> Optional[Attachment]:
+    """Delete an attachment row and its on-disk blob. Returns the removed row.
+
+    Returns ``None`` when no row matched. The blob is removed best-effort
+    (a missing file is not an error); the metadata row is the source of
+    truth for whether an attachment "exists".
+    """
+    with write_txn(conn):
+        att = get_attachment(conn, attachment_id)
+        if att is None:
+            return None
+        conn.execute("DELETE FROM task_attachments WHERE id = ?", (attachment_id,))
+        _append_event(
+            conn, att.task_id, "attachment_removed", {"filename": att.filename}
+        )
+    try:
+        p = Path(att.stored_path)
+        if p.is_file():
+            p.unlink()
+    except OSError:
+        pass
+    return att
+
+
 def list_events(conn: sqlite3.Connection, task_id: str) -> list[Event]:
     rows = conn.execute(
         "SELECT * FROM task_events WHERE task_id = ? ORDER BY created_at ASC, id ASC",
@@ -2457,7 +2773,9 @@ def _has_sticky_block(conn: sqlite3.Connection, task_id: str) -> bool:
     return bool(row) and row["kind"] == "blocked"
 
 
-def recompute_ready(conn: sqlite3.Connection) -> int:
+def recompute_ready(
+    conn: sqlite3.Connection, failure_limit: int = None,
+) -> int:
     """Promote ``todo`` tasks to ``ready`` when all parents are ``done`` or ``archived``.
 
     Returns the number of tasks promoted.  Safe to call inside or outside
@@ -2465,17 +2783,34 @@ def recompute_ready(conn: sqlite3.Connection) -> int:
 
     ``blocked`` tasks are also considered for promotion (so a task
     blocked purely by a parent dependency unblocks itself when the
-    parent completes), *except* when the most recent block event was a
-    worker-initiated ``kanban_block`` — those stay blocked until an
-    explicit ``kanban_unblock`` (#28712).  Without that guard, a
-    ``review-required`` handoff would auto-respawn, the fresh worker
-    would find nothing to do, exit cleanly, get recorded as a protocol
-    violation, and the cycle would repeat indefinitely.
+    parent completes), *except* in two cases:
+
+    1. The most recent block event was a worker-initiated
+       ``kanban_block`` — those stay blocked until an explicit
+       ``kanban_unblock`` (#28712).
+
+    2. The task's ``consecutive_failures`` has reached the effective
+       failure limit.  This prevents infinite retry loops when a task
+       repeatedly exhausts its iteration budget: without this guard the
+       counter would reset on every recovery cycle and the circuit
+       breaker could never trip (#35072).
+
+    The effective failure limit resolves in the same order as the
+    circuit breaker in ``_record_task_failure`` so the two never
+    disagree about when a task is permanently blocked:
+
+      1. per-task ``max_retries`` if set
+      2. caller-supplied ``failure_limit`` (the dispatcher passes the
+         ``kanban.failure_limit`` config value through ``dispatch_once``)
+      3. ``DEFAULT_FAILURE_LIMIT``
     """
+    if failure_limit is None:
+        failure_limit = DEFAULT_FAILURE_LIMIT
     promoted = 0
     with write_txn(conn):
         todo_rows = conn.execute(
-            "SELECT id, status FROM tasks WHERE status IN ('todo', 'blocked')"
+            "SELECT id, status, consecutive_failures, max_retries "
+            "FROM tasks WHERE status IN ('todo', 'blocked')"
         ).fetchall()
         for row in todo_rows:
             task_id = row["id"]
@@ -2493,13 +2828,25 @@ def recompute_ready(conn: sqlite3.Connection) -> int:
                 (task_id,),
             ).fetchall()
             if all(p["status"] in ("done", "archived") for p in parents):
-                # Blocked tasks also get their failure counters reset —
-                # this is effectively an auto-unblock (circuit-breaker
-                # recovery; worker-initiated blocks are skipped above).
                 if cur_status == "blocked":
+                    # Don't auto-recover tasks that have hit the
+                    # circuit-breaker failure limit.  Without this
+                    # guard, a task that repeatedly exhausts its
+                    # iteration budget would cycle forever:
+                    # block → auto-recover → respawn → budget
+                    # exhausted → block → …  The counter must also
+                    # be preserved so the breaker can accumulate
+                    # across recovery cycles.
+                    failures = int(row["consecutive_failures"] or 0)
+                    task_limit = row["max_retries"]
+                    effective_limit = (
+                        int(task_limit) if task_limit is not None
+                        else int(failure_limit)
+                    )
+                    if failures >= effective_limit:
+                        continue
                     conn.execute(
-                        "UPDATE tasks SET status = 'ready', "
-                        "consecutive_failures = 0, last_failure_error = NULL "
+                        "UPDATE tasks SET status = 'ready' "
                         "WHERE id = ? AND status = 'blocked'",
                         (task_id,),
                     )
@@ -5424,7 +5771,7 @@ def dispatch_once(
     if _crash_auto_blocked:
         result.auto_blocked.extend(_crash_auto_blocked)
     result.timed_out = enforce_max_runtime(conn)
-    result.promoted = recompute_ready(conn)
+    result.promoted = recompute_ready(conn, failure_limit=failure_limit)
 
     # Count tasks already running so max_spawn enforces concurrency rather
     # than a per-tick spawn budget. See the docstring above for the full
@@ -6300,6 +6647,25 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
         lines.append(_cap(task.body, _CTX_MAX_BODY_BYTES))
         lines.append("")
 
+    # Attachments — files uploaded to this task (PDFs, source docs,
+    # images). Surface the absolute on-disk path so the worker, which has
+    # full file-tool access, can read them directly (read_file, terminal
+    # `pdftotext`, etc.). On the local terminal backend the path resolves
+    # as-is; remote backends need the kanban attachments dir mounted.
+    attachments = list_attachments(conn, task_id)
+    if attachments:
+        lines.append("## Attachments")
+        lines.append(
+            "Files attached to this task. Read them with the file/terminal "
+            "tools at the absolute paths below:"
+        )
+        for att in attachments:
+            size_kb = max(1, (att.size + 1023) // 1024) if att.size else 0
+            size_str = f", {size_kb} KB" if size_kb else ""
+            ctype = f", {att.content_type}" if att.content_type else ""
+            lines.append(f"- `{att.filename}`{ctype}{size_str} → `{att.stored_path}`")
+        lines.append("")
+
     # Prior attempts — show closed runs so a retrying worker sees the
     # history. Skip the currently-active run (that's this worker).
     # Cap at _CTX_MAX_PRIOR_ATTEMPTS most-recent closed runs; older
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 1f75e8bc8fe..4211a73dd8e 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -65,6 +65,46 @@ import os
 import sys
 
 
+def _set_process_title() -> None:
+    """Set the process title to 'hermes' so tools like 'ps', 'top', and
+    'htop' show the app name instead of 'python3.xx'.
+
+    Purely cosmetic — non-fatal on any platform.
+
+    Strategy (try in order):
+      1. ``setproctitle`` (opt-in dep — installed via ``hermes tools`` or
+         ``pip install setproctitle``, or bundled in a future release).
+      2. ctypes ``prctl(PR_SET_NAME)`` (Linux only, 15-char limit).
+      3. ctypes ``pthread_setname_np`` (macOS only, kernel thread name —
+         changes lldb/top but not ``ps aux``).
+      4. No-op on Windows (the .exe name is already ``hermes.exe``).
+    """
+    # Strategy 1: setproctitle (best — works on macOS, Linux, BSD)
+    try:
+        import setproctitle  # type: ignore[import-untyped]
+
+        setproctitle.setproctitle("hermes")
+        return
+    except ImportError:
+        pass
+
+    # Strategy 2/3: platform-specific ctypes fallback
+    import ctypes
+    import platform
+
+    try:
+        system = platform.system()
+        if system == "Linux":
+            libc = ctypes.CDLL("libc.so.6", use_errno=True)
+            libc.prctl(15, b"hermes", 0, 0, 0)  # PR_SET_NAME = 15
+        elif system == "Darwin":
+            libc = ctypes.CDLL("libc.dylib", use_errno=True)
+            libc.pthread_setname_np(b"hermes")
+        # Windows: the .exe name is already ``hermes.exe`` — nothing to do.
+    except Exception:
+        pass
+
+
 # Mouse-tracking residue suppression — runs BEFORE every other import on the
 # TUI hot path so the terminal stops emitting SGR/X10 mouse reports while the
 # Python launcher is still doing imports (≈100–300ms in cooked + echo mode,
@@ -2385,7 +2425,12 @@ def select_provider_and_model(args=None):
     if active == "openrouter" and get_env_value("OPENAI_BASE_URL"):
         active = "custom"
 
-    from hermes_cli.models import CANONICAL_PROVIDERS, _PROVIDER_LABELS
+    from hermes_cli.models import (
+        CANONICAL_PROVIDERS,
+        _PROVIDER_LABELS,
+        group_providers,
+        provider_group_for_slug,
+    )
 
     provider_labels = dict(_PROVIDER_LABELS)  # derive from canonical list
     if active and active in _custom_provider_map:
@@ -2398,8 +2443,43 @@ def select_provider_and_model(args=None):
     print(f"  Active provider:  {active_label}")
     print()
 
-    # Step 1: Provider selection — flat list from CANONICAL_PROVIDERS
-    all_providers = [(p.slug, p.tui_desc) for p in CANONICAL_PROVIDERS]
+    # Step 1: Provider selection.
+    #
+    # Canonical providers are folded into top-level groups (display only — see
+    # PROVIDER_GROUPS in hermes_cli/models.py). A multi-member group shows one
+    # row ("Kimi / Moonshot ▸"); picking it opens a member sub-picker that
+    # resolves back to a concrete slug, so the dispatch chain below is
+    # unchanged. Custom providers and the trailing actions stay flat.
+    canonical_descs = {p.slug: p.tui_desc for p in CANONICAL_PROVIDERS}
+    grouped_rows = group_providers([p.slug for p in CANONICAL_PROVIDERS])
+
+    # The group/slug that should be pre-selected: the active provider's group
+    # if it's grouped, otherwise the active slug itself.
+    active_group = provider_group_for_slug(active) if active else ""
+
+    # ordered entries: (key, label, members)
+    #   members == [] → leaf row, key is a provider slug / action
+    #   members != [] → group row, key is "group:<gid>"
+    ordered: list[tuple[str, str, list[str]]] = []
+    default_idx = 0
+    for row in grouped_rows:
+        if row["kind"] == "group":
+            gid = row["group_id"]
+            label = f"{row['label']} ▸"
+            key = f"group:{gid}"
+            is_active = bool(active_group) and gid == active_group
+            members = row["members"]
+        else:
+            slug = row["slug"]
+            label = canonical_descs.get(slug, provider_labels.get(slug, slug))
+            key = slug
+            is_active = bool(active) and slug == active
+            members = []
+        if is_active:
+            ordered.append((key, f"{label}  ← currently active", members))
+            default_idx = len(ordered) - 1
+        else:
+            ordered.append((key, label, members))
 
     for key, provider_info in _custom_provider_map.items():
         name = provider_info["name"]
@@ -2407,36 +2487,49 @@ def select_provider_and_model(args=None):
         short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/")
         saved_model = provider_info.get("model", "")
         model_hint = f" — {saved_model}" if saved_model else ""
-        all_providers.append((key, f"{name} ({short_url}){model_hint}"))
-
-    # Build the menu
-    ordered = []
-    default_idx = 0
-    for key, label in all_providers:
+        label = f"{name} ({short_url}){model_hint}"
         if active and key == active:
-            ordered.append((key, f"{label}  ← currently active"))
+            ordered.append((key, f"{label}  ← currently active", []))
             default_idx = len(ordered) - 1
         else:
-            ordered.append((key, label))
+            ordered.append((key, label, []))
 
-    ordered.append(("custom", "Custom endpoint (enter URL manually)"))
+    ordered.append(("custom", "Custom endpoint (enter URL manually)", []))
     _has_saved_custom_list = isinstance(config.get("custom_providers"), list) and bool(
         config.get("custom_providers")
     )
     if _has_saved_custom_list:
-        ordered.append(("remove-custom", "Remove a saved custom provider"))
-    ordered.append(("aux-config", "Configure auxiliary models..."))
-    ordered.append(("cancel", "Leave unchanged"))
+        ordered.append(("remove-custom", "Remove a saved custom provider", []))
+    ordered.append(("aux-config", "Configure auxiliary models...", []))
+    ordered.append(("cancel", "Leave unchanged", []))
 
     provider_idx = _prompt_provider_choice(
-        [label for _, label in ordered],
+        [label for _, label, _ in ordered],
         default=default_idx,
     )
     if provider_idx is None or ordered[provider_idx][0] == "cancel":
         print("No change.")
         return
 
-    selected_provider = ordered[provider_idx][0]
+    selected_key = ordered[provider_idx][0]
+    selected_members = ordered[provider_idx][2]
+
+    # Group row → drill into a member sub-picker. Default to the active member
+    # if the active provider lives in this group.
+    if selected_members:
+        member_default = 0
+        if active in selected_members:
+            member_default = selected_members.index(active)
+        member_labels = [
+            canonical_descs.get(m, provider_labels.get(m, m)) for m in selected_members
+        ]
+        member_idx = _prompt_provider_choice(member_labels, default=member_default)
+        if member_idx is None:
+            print("No change.")
+            return
+        selected_provider = selected_members[member_idx]
+    else:
+        selected_provider = selected_key
 
     if selected_provider == "aux-config":
         _aux_config_menu()
@@ -8008,39 +8101,6 @@ def _detect_concurrent_hermes_instances(
     except Exception:
         return []
 
-    # Build a set of PIDs to exclude: the Python process itself plus its
-    # entire parent chain. On Windows the setuptools-generated hermes.exe
-    # launcher is a separate native process that spawns python.exe (the
-    # interpreter that runs our code).  os.getpid() returns the Python PID,
-    # but the launcher (which holds the file lock) is the parent.  Without
-    # walking the parent chain, every ``hermes update`` reports its own
-    # launcher as a concurrent instance — a false positive.
-    if exclude_pid is not None:
-        exclude_pids: set[int] = {exclude_pid}
-    else:
-        exclude_pids = {os.getpid()}
-    # The parent-walk is best-effort: if psutil rejects a PID (NoSuchProcess /
-    # AccessDenied) we stop walking and use whatever we've collected so far.
-    # Broader Exception catch on the outer block guards against partially-
-    # stubbed psutil in unit tests (e.g. a SimpleNamespace lacking Process /
-    # NoSuchProcess) — the surrounding update flow documents this helper as
-    # "never raises".
-    try:
-        current = psutil.Process(next(iter(exclude_pids)))
-        while True:
-            try:
-                parent = current.parent()
-            except Exception:
-                break
-            if parent is None or parent.pid <= 0:
-                break
-            if parent.pid in exclude_pids:
-                break  # loop detected
-            exclude_pids.add(parent.pid)
-            current = parent
-    except Exception:
-        pass
-
     # Resolve every shim path to its canonical form once for cheap comparison.
     shim_paths: set[str] = set()
     for shim in _hermes_exe_shims(scripts_dir):
@@ -8051,6 +8111,56 @@ def _detect_concurrent_hermes_instances(
     if not shim_paths:
         return []
 
+    # Build a set of PIDs to exclude: the Python process itself plus every
+    # ancestor whose executable is one of our shims. On Windows the
+    # setuptools-generated hermes.exe launcher is a separate native process
+    # that spawns python.exe (the interpreter that runs our code).
+    # os.getpid() returns the Python PID, but the launcher (which holds the
+    # file lock) is the parent. Without excluding it, every ``hermes update``
+    # reports its own launcher as a concurrent instance — a false positive
+    # (issues #29341, #34795).
+    #
+    # Two robustness points learned from the field:
+    #   1. Use ``proc.parents()`` — it returns the WHOLE ancestor list in one
+    #      call. The earlier per-hop ``current.parent()`` loop bailed on the
+    #      first psutil error (AccessDenied/NoSuchProcess is common on Windows
+    #      across session/elevation boundaries), leaving the launcher shim in
+    #      the candidate set and re-triggering the false positive.
+    #   2. Only exclude ancestors whose exe is itself a shim. A genuine second
+    #      hermes.exe sitting *under* a non-Hermes parent (e.g. a Hermes
+    #      Desktop backend child) must still be flagged, so we don't blanket-
+    #      exclude unrelated ancestors like the shell or terminal.
+    # Broad ``except Exception`` guards against partially-stubbed psutil in
+    # unit tests; this helper is documented as "never raises".
+    if exclude_pid is not None:
+        exclude_pids: set[int] = {int(exclude_pid)}
+    else:
+        exclude_pids = {os.getpid()}
+    try:
+        seed = next(iter(exclude_pids))
+        try:
+            ancestors = psutil.Process(seed).parents()
+        except Exception:
+            ancestors = []
+        for ancestor in ancestors:
+            try:
+                anc_exe = ancestor.exe()
+            except Exception:
+                continue
+            if not anc_exe:
+                continue
+            try:
+                anc_norm = str(Path(anc_exe).resolve()).lower()
+            except (OSError, ValueError):
+                anc_norm = str(anc_exe).lower()
+            if anc_norm in shim_paths:
+                try:
+                    exclude_pids.add(int(ancestor.pid))
+                except Exception:
+                    continue
+    except Exception:
+        pass
+
     matches: list[tuple[int, str]] = []
     try:
         proc_iter = psutil.process_iter(["pid", "exe", "name"])
@@ -8091,6 +8201,13 @@ def _format_concurrent_instances_message(
     lines.append("")
     lines.append("  Close Hermes Desktop, exit any open `hermes` REPLs, and")
     lines.append("  stop the gateway (`hermes gateway stop`) before retrying.")
+    lines.append("")
+    if matches:
+        pid_args = " ".join(f"/PID {pid}" for pid, _ in matches)
+        lines.append("  If you've already closed everything and these PIDs are")
+        lines.append("  stale, terminate them directly, then retry the update:")
+        lines.append(f"      taskkill {pid_args} /F")
+        lines.append("")
     lines.append("  Override with `hermes update --force` if you've already")
     lines.append("  confirmed those processes will not write to the venv.")
     return "\n".join(lines)
@@ -9055,18 +9172,51 @@ def cmd_update(args):
 def _cmd_update_pip(args):
     """Update Hermes via pip (for PyPI installs)."""
     from hermes_cli import __version__
+    from hermes_cli.config import is_uv_tool_install
 
     print(f"→ Current version: {__version__}")
     print("→ Checking PyPI for updates...")
 
     uv = shutil.which("uv")
-    if uv:
+    in_venv = sys.prefix != sys.base_prefix
+    # pipx-managed installs live under .../pipx/venvs/<name>/...
+    pipx_managed = "pipx" in sys.prefix.split(os.sep)
+    pipx = shutil.which("pipx") if pipx_managed else None
+
+    # Only the ``uv pip install`` path inside a venv needs VIRTUAL_ENV
+    # exported (uv refuses to install without it when the launcher shim
+    # didn't activate the venv). ``uv tool upgrade`` / ``pipx upgrade``
+    # operate on a named environment and ignore VIRTUAL_ENV, so we don't
+    # set it for them.
+    export_virtualenv = False
+
+    if is_uv_tool_install():
+        if not uv:
+            print("✗ Detected a uv-tool install but `uv` is not on PATH; install uv and retry.")
+            sys.exit(1)
+        cmd = [uv, "tool", "upgrade", "hermes-agent"]
+    elif pipx_managed and pipx:
+        # pipx owns its own venv; ``pipx upgrade`` is the only correct path.
+        # Matches scripts/auto-update.sh, which already uses pipx upgrade.
+        cmd = [pipx, "upgrade", "hermes-agent"]
+    elif uv:
         cmd = [uv, "pip", "install", "--upgrade", "hermes-agent"]
+        if in_venv:
+            # Launcher shim runs the venv interpreter but doesn't export
+            # VIRTUAL_ENV; without it uv errors "No virtual environment found".
+            export_virtualenv = True
+        else:
+            # Outside any venv, ``--system`` lets uv target the active
+            # interpreter, matching pip's default behaviour.
+            cmd.insert(3, "--system")
     else:
         cmd = [sys.executable, "-m", "pip", "install", "--upgrade", "hermes-agent"]
 
     print(f"→ Running: {' '.join(cmd)}")
-    result = subprocess.run(cmd)
+    run_kwargs = {}
+    if export_virtualenv:
+        run_kwargs["env"] = {**os.environ, "VIRTUAL_ENV": sys.prefix}
+    result = subprocess.run(cmd, **run_kwargs)
     if result.returncode != 0:
         print("✗ Update failed")
         sys.exit(1)
@@ -11157,6 +11307,13 @@ def cmd_completion(args, parser=None):
         print(generate_bash(parser))
 
 
+def cmd_prompt_size(args):
+    """Show a byte/char breakdown of the system prompt + tool schemas."""
+    from hermes_cli.prompt_size import cmd_prompt_size as _impl
+
+    _impl(args)
+
+
 def cmd_logs(args):
     """View and filter Hermes log files."""
     from hermes_cli.logs import tail_log, list_logs
@@ -11193,6 +11350,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
         "dump", "fallback", "gateway", "hooks", "import", "insights",
         "gui", "desktop", "kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
         "model", "pairing", "plugins", "portal", "postinstall", "profile", "proxy",
+        "prompt-size",
         "send", "sessions", "setup",
         "skills", "slack", "status", "tools", "uninstall", "update",
         "version", "webhook", "whatsapp", "chat", "secrets", "security",
@@ -11293,6 +11451,26 @@ _AGENT_SUBCOMMANDS = {
 }
 
 
+def _is_tui_chat_launch(args) -> bool:
+    return bool(getattr(args, "tui", False) or os.environ.get("HERMES_TUI") == "1")
+
+
+def _command_has_dedicated_mcp_startup(args) -> bool:
+    if args.command == "acp":
+        return True
+    if args.command == "gateway" and getattr(args, "gateway_command", None) == "run":
+        return True
+    if args.command == "cron" and getattr(args, "cron_command", None) in {"run", "tick"}:
+        return True
+    return False
+
+
+def _should_background_mcp_startup(args) -> bool:
+    if _is_tui_chat_launch(args):
+        return False
+    return args.command in {None, "chat", "rl"}
+
+
 def _prepare_agent_startup(args) -> None:
     """Discover plugins/MCP/hooks for commands that can run an agent turn."""
     _sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
@@ -11312,19 +11490,42 @@ def _prepare_agent_startup(args) -> None:
             "plugin discovery failed at CLI startup",
             exc_info=True,
         )
-    try:
-        # MCP tool discovery — no event loop running in CLI/TUI startup,
-        # so inline is safe.  Moved here from model_tools.py module scope
-        # to avoid freezing the gateway's event loop on its first message
-        # via the same lazy import path (#16856).
-        from tools.mcp_tool import discover_mcp_tools
+    _run_inline_mcp_discovery = True
+    if _is_tui_chat_launch(args):
+        # The TUI launcher hands off to a dedicated startup path that already
+        # backgrounds MCP discovery with a bounded join before the first tool
+        # snapshot.
+        _run_inline_mcp_discovery = False
+    elif _command_has_dedicated_mcp_startup(args):
+        # These entrypoints already do their own MCP startup later on the real
+        # runtime path (gateway executor, ACP launcher, cron job runner).
+        _run_inline_mcp_discovery = False
+    elif _should_background_mcp_startup(args):
+        try:
+            from hermes_cli.mcp_startup import start_background_mcp_discovery
 
-        discover_mcp_tools()
-    except Exception:
-        logger.debug(
-            "MCP tool discovery failed at CLI startup",
-            exc_info=True,
-        )
+            start_background_mcp_discovery(
+                logger=logger,
+                thread_name="cli-mcp-discovery",
+            )
+        except Exception:
+            logger.debug(
+                "Background MCP tool discovery failed at CLI startup",
+                exc_info=True,
+            )
+        _run_inline_mcp_discovery = False
+    if _run_inline_mcp_discovery:
+        try:
+            # MCP tool discovery remains synchronous for entrypoints that do
+            # not own a later bounded/executor startup path.
+            from tools.mcp_tool import discover_mcp_tools
+
+            discover_mcp_tools()
+        except Exception:
+            logger.debug(
+                "MCP tool discovery failed at CLI startup",
+                exc_info=True,
+            )
     try:
         from hermes_cli.config import load_config
         from agent.shell_hooks import register_from_config
@@ -11465,6 +11666,10 @@ def _try_termux_fast_tui_launch() -> bool:
 
 def main():
     """Main entry point for hermes CLI."""
+    # Cosmetic: make the process show up as 'hermes' instead of 'python3.11'
+    # in ps/top/htop.  Non-fatal — just a nicer UX.
+    _set_process_title()
+
     # Force UTF-8 stdio on Windows before anything prints.  No-op elsewhere.
     try:
         from hermes_cli.stdio import configure_windows_stdio
@@ -13218,9 +13423,15 @@ Examples:
         ),
     )
     memory_sub = memory_parser.add_subparsers(dest="memory_command")
-    memory_sub.add_parser(
+    _setup_parser = memory_sub.add_parser(
         "setup", help="Interactive provider selection and configuration"
     )
+    _setup_parser.add_argument(
+        "provider",
+        nargs="?",
+        default=None,
+        help="Provider to configure directly (e.g. honcho), skipping the picker",
+    )
     memory_sub.add_parser("status", help="Show current memory provider config")
     memory_sub.add_parser("off", help="Disable external provider (built-in only)")
     _reset_parser = memory_sub.add_parser(
@@ -14471,6 +14682,30 @@ Examples:
     )
     logs_parser.set_defaults(func=cmd_logs)
 
+    # =========================================================================
+    # prompt-size command
+    # =========================================================================
+    prompt_size_parser = subparsers.add_parser(
+        "prompt-size",
+        help="Show a byte breakdown of the system prompt + tool schemas",
+        description=(
+            "Report the fixed prompt budget for a fresh session: system "
+            "prompt total, skills index, memory, user profile, and tool-schema "
+            "JSON. Runs offline (no API call)."
+        ),
+    )
+    prompt_size_parser.add_argument(
+        "--platform",
+        default="cli",
+        help="Platform to simulate (cli, telegram, discord, ...). Default: cli",
+    )
+    prompt_size_parser.add_argument(
+        "--json",
+        action="store_true",
+        help="Emit the breakdown as JSON",
+    )
+    prompt_size_parser.set_defaults(func=cmd_prompt_size)
+
     # =========================================================================
     # Parse and execute
     # =========================================================================
diff --git a/hermes_cli/mcp_startup.py b/hermes_cli/mcp_startup.py
new file mode 100644
index 00000000000..6d81853bca0
--- /dev/null
+++ b/hermes_cli/mcp_startup.py
@@ -0,0 +1,59 @@
+"""Shared CLI/TUI-safe helpers for background MCP discovery."""
+
+from __future__ import annotations
+
+import threading
+from typing import Optional
+
+_mcp_discovery_lock = threading.Lock()
+_mcp_discovery_started = False
+_mcp_discovery_thread: Optional[threading.Thread] = None
+
+
+def _has_configured_mcp_servers() -> bool:
+    """Cheap config probe so non-MCP users avoid importing the MCP stack."""
+    try:
+        from hermes_cli.config import read_raw_config
+
+        mcp_servers = (read_raw_config() or {}).get("mcp_servers")
+        return isinstance(mcp_servers, dict) and len(mcp_servers) > 0
+    except Exception:
+        # Be conservative: if config probing fails, try discovery in the
+        # background so startup still can't block.
+        return True
+
+
+def start_background_mcp_discovery(*, logger, thread_name: str) -> None:
+    """Spawn one shared background MCP discovery thread for this process."""
+    global _mcp_discovery_started, _mcp_discovery_thread
+
+    with _mcp_discovery_lock:
+        if _mcp_discovery_started:
+            return
+        _mcp_discovery_started = True
+        if not _has_configured_mcp_servers():
+            return
+
+        def _discover() -> None:
+            try:
+                from tools.mcp_tool import discover_mcp_tools
+
+                discover_mcp_tools()
+            except Exception:
+                logger.debug("Background MCP tool discovery failed", exc_info=True)
+
+        thread = threading.Thread(
+            target=_discover,
+            name=thread_name,
+            daemon=True,
+        )
+        _mcp_discovery_thread = thread
+        thread.start()
+
+
+def wait_for_mcp_discovery(timeout: float = 0.75) -> None:
+    """Briefly wait for background MCP discovery before the first tool snapshot."""
+    thread = _mcp_discovery_thread
+    if thread is None or not thread.is_alive():
+        return
+    thread.join(timeout=timeout)
diff --git a/hermes_cli/memory_setup.py b/hermes_cli/memory_setup.py
index cac13bf781d..a75c10b0229 100644
--- a/hermes_cli/memory_setup.py
+++ b/hermes_cli/memory_setup.py
@@ -452,7 +452,11 @@ def memory_command(args) -> None:
     """Route memory subcommands."""
     sub = getattr(args, "memory_command", None)
     if sub == "setup":
-        cmd_setup(args)
+        provider = getattr(args, "provider", None)
+        if provider:
+            cmd_setup_provider(provider)
+        else:
+            cmd_setup(args)
     elif sub == "status":
         cmd_status(args)
     else:
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 42eadfd7629..fba6ec94cfd 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -936,6 +936,105 @@ _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
 _PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider
 
 
+# ---------------------------------------------------------------------------
+# Provider groups — DISPLAY ONLY
+#
+# Some vendors expose several Hermes provider slugs (one per endpoint /
+# auth method: global API, China API, OAuth coding plan, ...). Listing every
+# slug as a top-level row in the interactive `hermes model` / setup wizard /
+# Telegram `/model` pickers makes that list long and noisy.
+#
+# These groups fold related slugs under one top-level row in INTERACTIVE
+# PICKERS only. They do NOT change ``CANONICAL_PROVIDERS``, slug identity,
+# the ``--provider`` flag, ``/model <provider:model>``, or any typed path —
+# every member slug remains individually addressable. Grouping is a pure
+# display affordance; ``group_providers()`` is the single fold used by all
+# three picker surfaces so they stay consistent.
+#
+#   group_id -> (display_label, [member_slug, ...])
+#
+# Member order is the order shown inside the group submenu.
+# ---------------------------------------------------------------------------
+PROVIDER_GROUPS: dict[str, tuple[str, list[str]]] = {
+    "kimi":     ("Kimi / Moonshot", ["kimi-coding", "kimi-coding-cn"]),
+    "minimax":  ("MiniMax",         ["minimax", "minimax-oauth", "minimax-cn"]),
+    "xai":      ("xAI Grok",        ["xai", "xai-oauth"]),
+    "google":   ("Google Gemini",   ["gemini", "google-gemini-cli"]),
+    "openai":   ("OpenAI",          ["openai-codex", "openai-api"]),
+    "opencode": ("OpenCode",        ["opencode-zen", "opencode-go"]),
+    "copilot":  ("GitHub Copilot",  ["copilot", "copilot-acp"]),
+}
+
+# Reverse index: member slug -> group_id. Built once at import.
+_SLUG_TO_GROUP: dict[str, str] = {
+    slug: gid for gid, (_label, members) in PROVIDER_GROUPS.items() for slug in members
+}
+
+
+def provider_group_for_slug(slug: str) -> str:
+    """Return the group_id a provider slug belongs to, or "" if ungrouped."""
+    return _SLUG_TO_GROUP.get(str(slug or "").strip().lower(), "")
+
+
+def group_providers(slugs):
+    """Fold a flat ordered slug iterable into picker rows by provider group.
+
+    DISPLAY ONLY. Used by every interactive picker (``hermes model``, the
+    setup wizard, the Telegram ``/model`` keyboard) so grouping is identical
+    across surfaces.
+
+    Each returned row is a dict::
+
+        {"kind": "single", "slug": <slug>}                       # ungrouped, or
+                                                                  # 1-member group
+        {"kind": "group", "group_id": <gid>, "label": <label>,
+         "members": [<slug>, ...]}                                # 2+ members
+
+    Rules:
+      * A group row appears at the position of its FIRST present member, in
+        the input order. Subsequent members fold into that row (and are not
+        emitted again).
+      * Member order inside a group follows ``PROVIDER_GROUPS`` declaration,
+        restricted to the members actually present in ``slugs``.
+      * A group reduced to a single present member degrades to a ``single``
+        row — no pointless one-item submenu.
+      * Slugs not in any group pass through as ``single`` rows, order
+        preserved.
+      * Duplicate slugs in the input are ignored after first sight.
+    """
+    seen: set[str] = set()
+    # Which present members each group has, in declaration order.
+    group_members: dict[str, list[str]] = {}
+    for gid, (_label, members) in PROVIDER_GROUPS.items():
+        present = [m for m in members if m in set(slugs)]
+        if present:
+            group_members[gid] = present
+
+    rows = []
+    emitted_groups: set[str] = set()
+    for slug in slugs:
+        s = str(slug or "").strip().lower()
+        if not s or s in seen:
+            continue
+        seen.add(s)
+        gid = _SLUG_TO_GROUP.get(s, "")
+        if not gid:
+            rows.append({"kind": "single", "slug": s})
+            continue
+        if gid in emitted_groups:
+            continue  # already folded at the first member's position
+        emitted_groups.add(gid)
+        members = group_members.get(gid, [s])
+        if len(members) <= 1:
+            rows.append({"kind": "single", "slug": members[0]})
+        else:
+            label, _ = PROVIDER_GROUPS[gid]
+            rows.append(
+                {"kind": "group", "group_id": gid, "label": label, "members": list(members)}
+            )
+    return rows
+
+
 _PROVIDER_ALIASES = {
     "glm": "zai",
     "z-ai": "zai",
diff --git a/hermes_cli/nous_account.py b/hermes_cli/nous_account.py
index 02ccb86c7dd..36c7abcd798 100644
--- a/hermes_cli/nous_account.py
+++ b/hermes_cli/nous_account.py
@@ -4,6 +4,7 @@ from __future__ import annotations
 
 import hashlib
 import json
+import threading
 import time
 import urllib.request
 from dataclasses import dataclass
@@ -15,6 +16,7 @@ NousAccountInfoSource = Literal["jwt", "account_api", "inference_key", "none", "
 
 _ACCOUNT_INFO_CACHE_TTL = 60
 _account_info_cache: tuple[str, float, "NousPortalAccountInfo"] | None = None
+_ACCOUNT_INFO_CACHE_LOCK = threading.Lock()
 
 
 @dataclass(frozen=True)
@@ -302,10 +304,11 @@ def _fresh_account_info(
         portal_base_url = _portal_base_url(refreshed_state) or portal_base_url
         cache_key = _cache_key(access_token, portal_base_url)
 
-        if not force_fresh and _account_info_cache is not None:
-            cached_key, cached_at, cached_info = _account_info_cache
-            if cached_key == cache_key and (time.monotonic() - cached_at) < _ACCOUNT_INFO_CACHE_TTL:
-                return cached_info
+        with _ACCOUNT_INFO_CACHE_LOCK:
+            if not force_fresh and _account_info_cache is not None:
+                cached_key, cached_at, cached_info = _account_info_cache
+                if cached_key == cache_key and (time.monotonic() - cached_at) < _ACCOUNT_INFO_CACHE_TTL:
+                    return cached_info
 
         payload = _fetch_nous_account_info(access_token, portal_base_url)
         if not payload:
@@ -327,7 +330,8 @@ def _fresh_account_info(
             state=refreshed_state,
             portal_base_url=portal_base_url,
         )
-        _account_info_cache = (cache_key, time.monotonic(), info)
+        with _ACCOUNT_INFO_CACHE_LOCK:
+            _account_info_cache = (cache_key, time.monotonic(), info)
         return info
     except Exception as exc:
         return _error_info(
diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py
index 5f29101eb01..f19393337bd 100644
--- a/hermes_cli/nous_subscription.py
+++ b/hermes_cli/nous_subscription.py
@@ -587,9 +587,20 @@ def apply_nous_managed_defaults(
         changed.add("browser")
 
     if "image_gen" in selected_toolsets and not fal_key_is_configured():
+        image_cfg = config.get("image_gen")
+        if not isinstance(image_cfg, dict):
+            image_cfg = {}
+            config["image_gen"] = image_cfg
+        image_cfg["use_gateway"] = True
         changed.add("image_gen")
 
     if "video_gen" in selected_toolsets and not fal_key_is_configured():
+        video_cfg = config.get("video_gen")
+        if not isinstance(video_cfg, dict):
+            video_cfg = {}
+            config["video_gen"] = video_cfg
+        video_cfg["provider"] = "fal"
+        video_cfg["use_gateway"] = True
         changed.add("video_gen")
 
     return changed
diff --git a/hermes_cli/oneshot.py b/hermes_cli/oneshot.py
index b79644f6706..f66d71c62e6 100644
--- a/hermes_cli/oneshot.py
+++ b/hermes_cli/oneshot.py
@@ -174,28 +174,55 @@ def run_oneshot(
     # Redirect stderr AND stdout to devnull for the entire call tree.
     # We'll print the final response to the real stdout at the end.
     real_stdout = sys.stdout
+    real_stderr = sys.stderr
     devnull = open(os.devnull, "w", encoding="utf-8")
 
+    response: Optional[str] = None
+    failure: BaseException | None = None
     try:
         with redirect_stdout(devnull), redirect_stderr(devnull):
-            response = _run_agent(
-                prompt,
-                model=model,
-                provider=provider,
-                toolsets=explicit_toolsets,
-                use_config_toolsets=use_config_toolsets,
-            )
+            try:
+                response = _run_agent(
+                    prompt,
+                    model=model,
+                    provider=provider,
+                    toolsets=explicit_toolsets,
+                    use_config_toolsets=use_config_toolsets,
+                )
+            except BaseException as exc:  # noqa: BLE001
+                # Capture anything that escapes the agent (including OSError
+                # from prompt_toolkit/Vt100 when stdout is a non-TTY pipe,
+                # KeyboardInterrupt, SystemExit, etc.) so we can surface it on
+                # the real stderr instead of crashing past the redirect with a
+                # traceback that the caller never sees. A silent exit in a
+                # cron / SSH / subprocess context is the worst failure mode.
+                # See #30623.
+                failure = exc
     finally:
         try:
             devnull.close()
         except Exception:
             pass
 
-    if response:
-        real_stdout.write(response)
-        if not response.endswith("\n"):
-            real_stdout.write("\n")
-        real_stdout.flush()
+    if failure is not None:
+        # Re-raise control-flow exceptions so the parent handles them as usual
+        # (Ctrl-C / explicit sys.exit() inside the agent).
+        if isinstance(failure, (KeyboardInterrupt, SystemExit)):
+            raise failure
+        real_stderr.write(f"hermes -z: agent failed: {failure}\n")
+        real_stderr.flush()
+        return 1
+
+    if not (response or "").strip():
+        real_stderr.write("hermes -z: no final response was produced; treating the run as failed.\n")
+        real_stderr.flush()
+        return 1
+
+    assert response is not None  # narrowed by the empty-response guard above
+    real_stdout.write(response)
+    if not response.endswith("\n"):
+        real_stdout.write("\n")
+    real_stdout.flush()
     return 0
 
 
diff --git a/hermes_cli/profiles.py b/hermes_cli/profiles.py
index f490cbbfb99..31dbf8dfb4a 100644
--- a/hermes_cli/profiles.py
+++ b/hermes_cli/profiles.py
@@ -1471,8 +1471,9 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
 
 def _migrate_honcho_profile_host(old_name: str, new_name: str, new_dir: Path) -> None:
     """Rename Honcho host blocks for a renamed profile without changing peers."""
-    old_host = f"hermes.{old_name}"
-    new_host = f"hermes.{new_name}"
+    old_host = f"hermes_{old_name}"
+    legacy_old_host = f"hermes.{old_name}"
+    new_host = f"hermes_{new_name}"
 
     candidates = [
         new_dir / "honcho.json",
@@ -1496,18 +1497,24 @@ def _migrate_honcho_profile_host(old_name: str, new_name: str, new_dir: Path) ->
             continue
 
         hosts = raw.get("hosts")
-        if not isinstance(hosts, dict) or old_host not in hosts:
+        if not isinstance(hosts, dict):
+            continue
+        source_host = old_host if old_host in hosts else legacy_old_host
+        if source_host not in hosts:
             continue
 
         if new_host in hosts:
             print(f"⚠ Honcho host block not migrated: {new_host} already exists in {path}")
             continue
 
-        block = hosts[old_host]
+        block = hosts[source_host]
         if isinstance(block, dict) and "aiPeer" not in block:
-            bare = old_host.split(".", 1)[1] if "." in old_host else old_host
+            if source_host.startswith("hermes_"):
+                bare = source_host.split("_", 1)[1]
+            else:
+                bare = source_host.split(".", 1)[1] if "." in source_host else source_host
             block["aiPeer"] = bare
-        hosts[new_host] = hosts.pop(old_host)
+        hosts[new_host] = hosts.pop(source_host)
         tmp = path.with_suffix(path.suffix + ".tmp")
         try:
             tmp.write_text(json.dumps(raw, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
@@ -1519,7 +1526,7 @@ def _migrate_honcho_profile_host(old_name: str, new_name: str, new_dir: Path) ->
                 pass
             continue
 
-        print(f"✓ Honcho host updated: {old_host} → {new_host}")
+        print(f"✓ Honcho host updated: {source_host} → {new_host}")
 
 
 def rename_profile(old_name: str, new_name: str) -> Path:
diff --git a/hermes_cli/prompt_size.py b/hermes_cli/prompt_size.py
new file mode 100644
index 00000000000..913beb18bd3
--- /dev/null
+++ b/hermes_cli/prompt_size.py
@@ -0,0 +1,153 @@
+"""Prompt-size diagnostic: ``hermes prompt-size``.
+
+Reports a byte/char breakdown of the system prompt the agent would build for
+a fresh session — system prompt total, the ``<available_skills>`` index,
+memory + user profile, and tool-schema JSON. Lets users see where their fixed
+prompt budget goes (issue #34667) without parsing a saved session JSON by hand.
+
+The diagnostic builds a real inspection agent (so the numbers match what
+actually ships on the wire) but never makes a network call: it passes dummy
+credentials so ``AIAgent.__init__`` takes the direct-construction path, then
+calls ``build_system_prompt_parts`` / inspects ``agent.tools`` offline.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from typing import Any, Dict, List, Tuple
+
+# The skills index is wrapped in this tag pair inside the stable tier.
+_SKILLS_BLOCK_RE = re.compile(r"<available_skills>.*?</available_skills>", re.DOTALL)
+
+
+def _bytes(s: str) -> int:
+    return len(s.encode("utf-8"))
+
+
+def _build_inspection_agent(platform: str) -> Any:
+    """Construct an offline AIAgent for prompt inspection.
+
+    Dummy ``api_key`` + ``base_url`` force the direct-construction path in
+    ``run_agent.py`` (no provider auto-detection, no network). Toolsets and
+    platform come from the caller so the breakdown matches a real session.
+    """
+    from run_agent import AIAgent
+    from hermes_cli.config import load_config
+
+    cfg = load_config()
+    model_cfg = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {}
+    model = model_cfg.get("default") or model_cfg.get("model") or ""
+
+    return AIAgent(
+        model=model,
+        api_key="inspect-only",
+        base_url="https://openrouter.ai/api/v1",
+        quiet_mode=True,
+        save_trajectories=False,
+        platform=platform,
+    )
+
+
+def compute_prompt_breakdown(platform: str = "cli") -> Dict[str, Any]:
+    """Return a dict of prompt-size measurements for a fresh session.
+
+    Keys: ``system_prompt`` (chars/bytes), ``skills_index``, ``memory``,
+    ``user_profile``, ``tools`` (count + json bytes), and ``sections`` (a list
+    of (label, chars, bytes) for the three prompt tiers).
+    """
+    from agent.system_prompt import build_system_prompt, build_system_prompt_parts
+
+    agent = _build_inspection_agent(platform)
+
+    parts = build_system_prompt_parts(agent)
+    full = build_system_prompt(agent)
+
+    stable = parts.get("stable", "")
+    context = parts.get("context", "")
+    volatile = parts.get("volatile", "")
+
+    # Skills index — the <available_skills> block (the largest single block
+    # when many skills are installed). Measured inside the stable tier.
+    skills_match = _SKILLS_BLOCK_RE.search(stable)
+    skills_index = skills_match.group(0) if skills_match else ""
+
+    # Memory + user profile live in the volatile tier. We re-derive their
+    # blocks directly from the memory store so the numbers are attributable
+    # even though they're joined into ``volatile``.
+    memory_block = ""
+    user_block = ""
+    store = getattr(agent, "_memory_store", None)
+    if store is not None:
+        try:
+            if getattr(agent, "_memory_enabled", True):
+                memory_block = store.format_for_system_prompt("memory") or ""
+            if getattr(agent, "_user_profile_enabled", True):
+                user_block = store.format_for_system_prompt("user") or ""
+        except Exception:
+            pass
+
+    # Tool-schema JSON — the other half of the fixed per-call payload.
+    tools = getattr(agent, "tools", None) or []
+    tools_json = json.dumps(tools, ensure_ascii=False)
+
+    sections: List[Tuple[str, int, int]] = [
+        ("stable (identity/guidance/skills)", len(stable), _bytes(stable)),
+        ("context (AGENTS.md/cwd files)", len(context), _bytes(context)),
+        ("volatile (memory/profile/timestamp)", len(volatile), _bytes(volatile)),
+    ]
+
+    return {
+        "platform": platform,
+        "model": getattr(agent, "model", "") or "",
+        "system_prompt": {"chars": len(full), "bytes": _bytes(full)},
+        "skills_index": {"chars": len(skills_index), "bytes": _bytes(skills_index)},
+        "memory": {"chars": len(memory_block), "bytes": _bytes(memory_block)},
+        "user_profile": {"chars": len(user_block), "bytes": _bytes(user_block)},
+        "tools": {"count": len(tools), "json_bytes": _bytes(tools_json)},
+        "sections": sections,
+    }
+
+
+def _fmt_kb(n: int) -> str:
+    return f"{n / 1024:.1f} KB"
+
+
+def render_breakdown(data: Dict[str, Any]) -> str:
+    """Render the breakdown as plain text suitable for a terminal."""
+    lines: List[str] = []
+    sp = data["system_prompt"]
+    lines.append(f"Prompt-size breakdown (platform={data['platform']}, model={data['model'] or 'unset'})")
+    lines.append("")
+    lines.append(f"  System prompt total : {sp['bytes']:>8,} B  ({_fmt_kb(sp['bytes'])}, {sp['chars']:,} chars)")
+    lines.append("")
+    lines.append("  Major blocks:")
+    si = data["skills_index"]
+    mem = data["memory"]
+    up = data["user_profile"]
+    lines.append(f"    skills index       : {si['bytes']:>8,} B  ({_fmt_kb(si['bytes'])})")
+    lines.append(f"    memory             : {mem['bytes']:>8,} B  ({_fmt_kb(mem['bytes'])})")
+    lines.append(f"    user profile       : {up['bytes']:>8,} B  ({_fmt_kb(up['bytes'])})")
+    lines.append("")
+    lines.append("  Prompt tiers:")
+    for label, chars, byts in data["sections"]:
+        lines.append(f"    {label:<36}: {byts:>8,} B  ({_fmt_kb(byts)})")
+    lines.append("")
+    tools = data["tools"]
+    lines.append(f"  Tool schemas         : {tools['json_bytes']:>8,} B  ({_fmt_kb(tools['json_bytes'])}, {tools['count']} tools)")
+    return "\n".join(lines)
+
+
+def cmd_prompt_size(args: Any) -> None:
+    """Entry point for ``hermes prompt-size``."""
+    platform = getattr(args, "platform", "cli") or "cli"
+    as_json = getattr(args, "json", False)
+    try:
+        data = compute_prompt_breakdown(platform)
+    except Exception as e:
+        print(f"Could not compute prompt-size breakdown: {e}")
+        return
+    if as_json:
+        print(json.dumps(data, ensure_ascii=False, indent=2))
+    else:
+        print(render_breakdown(data))
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index 1d83e7d7711..cd294681fc8 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -4168,10 +4168,19 @@ _LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})
 def _ws_client_is_allowed(ws: "WebSocket") -> bool:
     """Check if the WebSocket client IP is acceptable.
 
-    Loopback mode: only loopback clients allowed — the legacy
+    Loopback bind: only loopback clients allowed — the legacy
     ``?token=<_SESSION_TOKEN>`` path is the only auth we have, so we
     don't want LAN hosts guessing tokens.
 
+    Explicit non-loopback bind (``--host 0.0.0.0``, ``--host ::``, or a
+    specific address such as a Tailscale/LAN IP, always with
+    ``--insecure``): allow any peer. The operator explicitly opted into
+    non-loopback exposure, so the loopback-only peer restriction does not
+    apply. DNS-rebinding is still blocked by the Host/Origin guard in
+    :func:`_ws_host_origin_is_allowed`, which mirrors the HTTP layer and
+    requires the Host header to match the bound interface — the same
+    defence ``_is_accepted_host`` applies to non-loopback HTTP requests.
+
     Gated mode: any peer is allowed — uvicorn's ``proxy_headers=True``
     (enabled when the OAuth gate is active so cookies can pick up
     ``X-Forwarded-Proto``) rewrites ``ws.client.host`` to the
@@ -4182,6 +4191,14 @@ def _ws_client_is_allowed(ws: "WebSocket") -> bool:
     """
     if getattr(app.state, "auth_required", False):
         return True
+    # Any explicit non-loopback bind (0.0.0.0, ::, or a specific LAN /
+    # Tailscale address) means the operator opted into non-loopback
+    # access via --insecure.  The loopback-only peer gate only applies to
+    # an actual loopback bind; otherwise the WS handshake is rejected even
+    # though same-bind HTTP requests pass _is_accepted_host.
+    bound_host = (getattr(app.state, "bound_host", "") or "").strip().lower()
+    if bound_host and bound_host not in _LOOPBACK_HOSTS:
+        return True
     client_host = ws.client.host if ws.client else ""
     if not client_host:
         return True
diff --git a/hermes_state.py b/hermes_state.py
index f132c171654..72a0781af67 100644
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -381,6 +381,7 @@ class SessionDB:
 
         self._lock = threading.Lock()
         self._write_count = 0
+        self._fts_enabled = False
         try:
             self._conn = sqlite3.connect(
                 str(self.db_path),
@@ -389,7 +390,6 @@ class SessionDB:
                 # handles contention instead of sitting in SQLite's internal
                 # busy handler for up to 30s.
                 timeout=1.0,
-                # Autocommit mode: Python's default isolation_level=""
                 # auto-starts transactions on DML, which conflicts with our
                 # explicit BEGIN IMMEDIATE.  None = we manage transactions
                 # ourselves.
@@ -725,14 +725,44 @@ class SessionDB:
         # FTS5 setup (separate because CREATE VIRTUAL TABLE can't be in executescript with IF NOT EXISTS reliably)
         try:
             cursor.execute("SELECT * FROM messages_fts LIMIT 0")
-        except sqlite3.OperationalError:
-            cursor.executescript(FTS_SQL)
+            self._fts_enabled = True
+        except sqlite3.OperationalError as exc:
+            if "no such table" not in str(exc).lower():
+                raise
+            try:
+                cursor.executescript(FTS_SQL)
+                self._fts_enabled = True
+            except sqlite3.OperationalError as fts_exc:
+                err = str(fts_exc).lower()
+                if "fts5" not in err and "no such module" not in err:
+                    raise
+                logger.warning(
+                    "SQLite FTS5 unavailable for %s; full-text session search "
+                    "disabled. This usually means Hermes is running on an "
+                    "unsupported install (e.g. a pip-installed or pip-managed "
+                    "Python whose bundled SQLite lacks FTS5) rather than a "
+                    "mainline install. Some features may be missing or behave "
+                    "differently. Install the supported way: "
+                    "https://hermes-agent.nousresearch.com (underlying error: %s)",
+                    self.db_path,
+                    fts_exc,
+                )
 
         # Trigram FTS5 for CJK/substring search
         try:
             cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
-        except sqlite3.OperationalError:
-            cursor.executescript(FTS_TRIGRAM_SQL)
+        except sqlite3.OperationalError as exc:
+            if "no such table" not in str(exc).lower():
+                raise
+            try:
+                cursor.executescript(FTS_TRIGRAM_SQL)
+            except sqlite3.OperationalError as fts_exc:
+                err = str(fts_exc).lower()
+                if "fts5" not in err and "no such module" not in err:
+                    raise
+                # Same FTS5-unavailable cause already warned about above for
+                # messages_fts; the trigram table is an additional CJK index,
+                # so just degrade silently here. CJK search falls back to LIKE.
 
         self._conn.commit()
 
@@ -947,6 +977,20 @@ class SessionDB:
             )
         self._execute_write(_do)
 
+    def update_session_model(self, session_id: str, model: str) -> None:
+        """Update the model for a session after a mid-session switch.
+
+        Unlike ``update_token_counts`` which uses ``COALESCE(model, ?)``
+        (only filling in NULL), this unconditionally sets the model column
+        so that the dashboard reflects the user's latest /model choice.
+        """
+        def _do(conn):
+            conn.execute(
+                "UPDATE sessions SET model = ? WHERE id = ?",
+                (model, session_id),
+            )
+        self._execute_write(_do)
+
     def update_token_counts(
         self,
         session_id: str,
@@ -2333,6 +2377,9 @@ class SessionDB:
         ignores ``sort``. The trigram CJK path honours ``sort`` like the main
         FTS5 path.
         """
+        if not self._fts_enabled:
+            return []
+
         if not query or not query.strip():
             return []
 
diff --git a/locales/af.yaml b/locales/af.yaml
index 636bae754f3..a64e759c441 100644
--- a/locales/af.yaml
+++ b/locales/af.yaml
@@ -255,7 +255,7 @@ gateway:
     title:                 "**Titel:** {title}"
     created:               "**Geskep:** {timestamp}"
     last_activity:         "**Laaste aktiwiteit:** {timestamp}"
-    tokens:                "**Tokens:** {tokens}"
+    tokens:                "**Kumulatiewe API-tokens (elke oproep weer gestuur):** {tokens}"
     agent_running:         "**Agent loop:** {state}"
     state_yes:             "Ja ⚡"
     state_no:              "Nee"
diff --git a/locales/de.yaml b/locales/de.yaml
index f400dd9fb2e..4b84f2e4b66 100644
--- a/locales/de.yaml
+++ b/locales/de.yaml
@@ -255,7 +255,7 @@ gateway:
     title:                 "**Titel:** {title}"
     created:               "**Erstellt:** {timestamp}"
     last_activity:         "**Letzte Aktivität:** {timestamp}"
-    tokens:                "**Tokens:** {tokens}"
+    tokens:                "**Kumulierte API-Tokens (bei jedem Aufruf erneut gesendet):** {tokens}"
     agent_running:         "**Agent läuft:** {state}"
     state_yes:             "Ja ⚡"
     state_no:              "Nein"
diff --git a/locales/en.yaml b/locales/en.yaml
index 88d18a2f892..93d7ffdc433 100644
--- a/locales/en.yaml
+++ b/locales/en.yaml
@@ -270,7 +270,7 @@ gateway:
     title:                 "**Title:** {title}"
     created:               "**Created:** {timestamp}"
     last_activity:         "**Last Activity:** {timestamp}"
-    tokens:                "**Tokens:** {tokens}"
+    tokens:                "**Cumulative API tokens (re-sent each call):** {tokens}"
     agent_running:         "**Agent Running:** {state}"
     state_yes:             "Yes ⚡"
     state_no:              "No"
diff --git a/locales/es.yaml b/locales/es.yaml
index 08aaf9ad0b4..6a3cccb66a4 100644
--- a/locales/es.yaml
+++ b/locales/es.yaml
@@ -255,7 +255,7 @@ gateway:
     title:                 "**Título:** {title}"
     created:               "**Creado:** {timestamp}"
     last_activity:         "**Última actividad:** {timestamp}"
-    tokens:                "**Tokens:** {tokens}"
+    tokens:                "**Tokens de API acumulados (reenviados en cada llamada):** {tokens}"
     agent_running:         "**Agente activo:** {state}"
     state_yes:             "Sí ⚡"
     state_no:              "No"
diff --git a/locales/pt.yaml b/locales/pt.yaml
index 0c0eddad91e..662971f08b7 100644
--- a/locales/pt.yaml
+++ b/locales/pt.yaml
@@ -255,7 +255,7 @@ gateway:
     title:                 "**Título:** {title}"
     created:               "**Criada:** {timestamp}"
     last_activity:         "**Última atividade:** {timestamp}"
-    tokens:                "**Tokens:** {tokens}"
+    tokens:                "**Tokens de API cumulativos (reenviados a cada chamada):** {tokens}"
     agent_running:         "**Agente em execução:** {state}"
     state_yes:             "Sim ⚡"
     state_no:              "Não"
diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
index 865d844df26..b4a24a46e25 100644
--- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md
+++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
@@ -32,14 +32,14 @@ Honcho provides AI-native cross-session user modeling. It learns who the user is
 ### Cloud (app.honcho.dev)
 
 ```bash
-hermes honcho setup
+hermes memory setup honcho
 # select "cloud", paste API key from https://app.honcho.dev
 ```
 
 ### Self-hosted
 
 ```bash
-hermes honcho setup
+hermes memory setup honcho
 # select "local", enter base URL (e.g. http://localhost:8000)
 ```
 
diff --git a/plugins/browser/browser_use/provider.py b/plugins/browser/browser_use/provider.py
index 3d371bdd88a..46a22033344 100644
--- a/plugins/browser/browser_use/provider.py
+++ b/plugins/browser/browser_use/provider.py
@@ -119,17 +119,20 @@ class BrowserUseBrowserProvider(BrowserProvider):
         return "Browser Use"
 
     def is_available(self) -> bool:
-        return self._get_config_or_none() is not None
+        return self._get_config_or_none(refresh_token=False) is not None
 
     # ------------------------------------------------------------------
     # Config resolution (direct API key OR managed Nous gateway)
     # ------------------------------------------------------------------
 
-    def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
+    def _get_config_or_none(self, *, refresh_token: bool = True) -> Optional[Dict[str, Any]]:
         # Import here to avoid a hard dependency at module-import time —
         # managed_tool_gateway pulls in the Nous auth stack which can be
         # heavy and is not needed for direct-API-key users.
-        from tools.managed_tool_gateway import resolve_managed_tool_gateway
+        from tools.managed_tool_gateway import (
+            peek_nous_access_token,
+            resolve_managed_tool_gateway,
+        )
         from tools.tool_backend_helpers import prefers_gateway
 
         # Direct API key wins unless the user has explicitly opted into the
@@ -142,7 +145,11 @@ class BrowserUseBrowserProvider(BrowserProvider):
                 "managed_mode": False,
             }
 
-        managed = resolve_managed_tool_gateway("browser-use")
+        # Keep availability scans off the synchronous OAuth refresh path.
+        managed = resolve_managed_tool_gateway(
+            "browser-use",
+            token_reader=None if refresh_token else peek_nous_access_token,
+        )
         if managed is None:
             return None
 
diff --git a/plugins/kanban/dashboard/dist/index.js b/plugins/kanban/dashboard/dist/index.js
index 9a04b6a649e..c22c06c1293 100644
--- a/plugins/kanban/dashboard/dist/index.js
+++ b/plugins/kanban/dashboard/dist/index.js
@@ -2741,6 +2741,8 @@
     // Ready/Block/Complete buttons feel like no-ops.  See #26744.
     const [patchErr, setPatchErr] = useState(null);
     const [newComment, setNewComment] = useState("");
+    const [uploadBusy, setUploadBusy] = useState(false);
+    const [uploadErr, setUploadErr] = useState(null);
     const [editing, setEditing] = useState(false);
     // Home-channel notification toggles. homeChannels is the list of platforms
     // the user has a /sethome on; each entry has a `subscribed` bool telling
@@ -2789,6 +2791,49 @@
       }).catch(function (e) { setErr(String(e.message || e)); });
     };
 
+    // File upload uses raw fetch (not SDK.fetchJSON, which JSON-encodes)
+    // so the browser sets the multipart boundary. Auth rides the session
+    // cookie + bearer token, matching the rest of the dashboard.
+    const handleUpload = function (fileList) {
+      const files = Array.prototype.slice.call(fileList || []);
+      if (!files.length) return;
+      setUploadBusy(true);
+      setUploadErr(null);
+      const token = window.__HERMES_SESSION_TOKEN__ || "";
+      const headers = token ? { Authorization: "Bearer " + token } : {};
+      const url = withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}/attachments`, boardSlug);
+      // Upload sequentially so a partial failure leaves a clear state.
+      let chain = Promise.resolve();
+      files.forEach(function (f) {
+        chain = chain.then(function () {
+          const fd = new FormData();
+          fd.append("file", f, f.name);
+          return fetch(url, { method: "POST", headers: headers, credentials: "same-origin", body: fd })
+            .then(function (resp) {
+              if (!resp.ok) {
+                return resp.text().then(function (txt) {
+                  throw new Error(parseApiErrorMessage(new Error(resp.status + ": " + txt)));
+                });
+              }
+            });
+        });
+      });
+      chain.then(function () {
+        load();
+        props.onRefresh();
+      }).catch(function (e) {
+        setUploadErr(String(e.message || e));
+      }).finally(function () {
+        setUploadBusy(false);
+      });
+    };
+
+    const handleDeleteAttachment = function (attachmentId) {
+      return SDK.fetchJSON(withBoard(`${API}/attachments/${attachmentId}`, boardSlug), { method: "DELETE" })
+        .then(function () { load(); props.onRefresh(); })
+        .catch(function (e) { setUploadErr(String(e.message || e)); });
+    };
+
     const doPatch = function (patch, opts) {
       if (opts && opts.confirm && !window.confirm(opts.confirm)) {
         return Promise.resolve();
@@ -2946,6 +2991,10 @@
           homeBusy: homeBusy,
           onToggleHomeSub: toggleHomeSubscription,
           onRefresh: props.onRefresh,
+          onUpload: handleUpload,
+          onDeleteAttachment: handleDeleteAttachment,
+          uploadBusy: uploadBusy,
+          uploadErr: uploadErr,
         }) : null,
         data ? h("div", { className: "hermes-kanban-drawer-comment-row" },
           h(Input, {
@@ -2968,11 +3017,118 @@
     );
   }
 
+  function _fmtBytes(n) {
+    n = Number(n) || 0;
+    if (n < 1024) return n + " B";
+    if (n < 1024 * 1024) return (n / 1024).toFixed(1) + " KB";
+    return (n / (1024 * 1024)).toFixed(1) + " MB";
+  }
+
+  // Attachments section in the task drawer (#35338). Upload button +
+  // list with download links and a delete (×) per row. The download
+  // link hits GET /attachments/:id which streams the file; the worker
+  // context surfaces the same files' absolute paths so a kanban worker
+  // can read them with the file/terminal tools.
+  function AttachmentsSection(props) {
+    const i18n = props.i18n;
+    const atts = props.attachments || [];
+    const fileRef = useRef(null);
+    const [dlErr, setDlErr] = useState(null);
+    // Download via authenticated fetch → blob → synthetic anchor click.
+    // A plain <a href> can't carry the session header/bearer the dashboard
+    // auth middleware requires in loopback mode, so fetch with the token
+    // and hand the browser a blob URL instead.
+    function downloadAttachment(a) {
+      const token = window.__HERMES_SESSION_TOKEN__ || "";
+      const headers = token ? { Authorization: "Bearer " + token } : {};
+      const url = withBoard(`${API}/attachments/${a.id}`, props.boardSlug);
+      setDlErr(null);
+      fetch(url, { headers: headers, credentials: "same-origin" })
+        .then(function (resp) {
+          if (!resp.ok) {
+            return resp.text().then(function (txt) {
+              throw new Error(parseApiErrorMessage(new Error(resp.status + ": " + txt)));
+            });
+          }
+          return resp.blob();
+        })
+        .then(function (blob) {
+          const objUrl = URL.createObjectURL(blob);
+          const link = document.createElement("a");
+          link.href = objUrl;
+          link.download = a.filename || "attachment";
+          document.body.appendChild(link);
+          link.click();
+          document.body.removeChild(link);
+          setTimeout(function () { URL.revokeObjectURL(objUrl); }, 10000);
+        })
+        .catch(function (e) { setDlErr(String(e.message || e)); });
+    }
+    return h("div", { className: "hermes-kanban-section" },
+      h("div", { className: "hermes-kanban-section-head" },
+        `${tx(i18n, "attachments", "Attachments")} (${atts.length})`),
+      h("input", {
+        ref: fileRef,
+        type: "file",
+        multiple: true,
+        style: { display: "none" },
+        onChange: function (e) {
+          if (props.onUpload) props.onUpload(e.target.files);
+          // Reset so selecting the same file again re-triggers onChange.
+          try { e.target.value = ""; } catch (_e) { /* ignore */ }
+        },
+      }),
+      h("div", { className: "flex items-center gap-2 mb-2" },
+        h(Button, {
+          size: "sm",
+          variant: "outline",
+          disabled: !!props.uploadBusy,
+          onClick: function () { if (fileRef.current) fileRef.current.click(); },
+        }, props.uploadBusy
+            ? tx(i18n, "uploading", "Uploading…")
+            : tx(i18n, "uploadFile", "Upload file")),
+      ),
+      (props.uploadErr || dlErr)
+        ? h("div", { className: "text-xs text-destructive mb-2" }, props.uploadErr || dlErr)
+        : null,
+      atts.length === 0
+        ? h("div", { className: "text-xs text-muted-foreground" },
+            tx(i18n, "noAttachments", "— no attachments —"))
+        : atts.map(function (a) {
+            return h("div", {
+              key: a.id,
+              className: "flex items-center justify-between gap-2 py-1 text-sm",
+            },
+              h("button", {
+                type: "button",
+                className: "hermes-kanban-attachment-link truncate",
+                title: a.filename,
+                onClick: function () { downloadAttachment(a); },
+              }, a.filename),
+              h("span", { className: "text-xs text-muted-foreground whitespace-nowrap" },
+                _fmtBytes(a.size)),
+              h("button", {
+                type: "button",
+                className: "hermes-kanban-drawer-close",
+                title: tx(i18n, "removeAttachment", "Remove attachment"),
+                onClick: function () {
+                  if (window.confirm(tx(i18n, "confirmRemoveAttachment",
+                      "Remove this attachment?"))) {
+                    if (props.onDelete) props.onDelete(a.id);
+                  }
+                },
+              }, "×"),
+            );
+          }),
+    );
+  }
+
   function TaskDetail(props) {
     const { t: i18n } = useI18n();
     const t = props.data.task;
     const comments = props.data.comments || [];
     const events = props.data.events || [];
+    const attachments = props.data.attachments || [];
     const links = props.data.links || { parents: [], children: [] };
 
     return h("div", { className: "hermes-kanban-drawer-body" },
@@ -3042,6 +3198,15 @@
         h("div", { className: "hermes-kanban-section-head" }, tx(i18n, "result", "Result")),
         h(MarkdownBlock, { source: t.result, enabled: props.renderMarkdown }),
       ) : null,
+      h(AttachmentsSection, {
+        attachments: attachments,
+        boardSlug: props.boardSlug,
+        onUpload: props.onUpload,
+        onDelete: props.onDeleteAttachment,
+        uploadBusy: props.uploadBusy,
+        uploadErr: props.uploadErr,
+        i18n: i18n,
+      }),
       h("div", { className: "hermes-kanban-section" },
         h("div", { className: "hermes-kanban-section-head" },
           `${tx(i18n, "comments", "Comments")} (${comments.length})`),
diff --git a/plugins/kanban/dashboard/dist/style.css b/plugins/kanban/dashboard/dist/style.css
index 9aa780e6213..6b396b2612e 100644
--- a/plugins/kanban/dashboard/dist/style.css
+++ b/plugins/kanban/dashboard/dist/style.css
@@ -334,6 +334,11 @@
 .hermes-kanban-drawer {
   width: min(var(--hermes-kanban-drawer-width, 640px), 92vw);
   height: 100vh;
+  /* Dynamic viewport unit excludes the mobile browser's collapsing chrome
+     (URL/nav bars) so the drawer's bottom row stays reachable. Falls back to
+     100vh on browsers without dvh support. */
+  height: 100dvh;
+  max-height: 100dvh;
   background: var(--color-card);
   border-left: 1px solid var(--color-border);
   display: flex;
@@ -352,10 +357,23 @@
   align-items: center;
   justify-content: space-between;
   padding: 0.6rem 0.8rem;
+  /* Honor the top safe-area inset (notch) so the task id / close button are
+     not clipped on mobile. */
+  padding-top: max(0.6rem, env(safe-area-inset-top));
   border-bottom: 1px solid var(--color-border);
   font-family: var(--font-mono, ui-monospace, monospace);
 }
 
+/* On mobile the dashboard shell renders a fixed top bar (min-h-14, hidden at
+   the lg breakpoint). The drawer is a body-level z-60 overlay starting at the
+   viewport top, so its header would sit behind that bar. Push the header down
+   by the bar height (3.5rem) plus the top safe-area inset. */
+@media (max-width: 1023px) {
+  .hermes-kanban-drawer-head {
+    padding-top: calc(3.5rem + env(safe-area-inset-top));
+  }
+}
+
 .hermes-kanban-drawer-close {
   appearance: none;
   background: transparent;
@@ -368,10 +386,33 @@
 }
 .hermes-kanban-drawer-close:hover { color: var(--color-foreground); }
 
+/* Attachment download trigger — styled as a link, rendered as a <button>
+   so the click handler can fetch with the session token (#35338). */
+.hermes-kanban-attachment-link {
+  appearance: none;
+  background: transparent;
+  border: 0;
+  padding: 0;
+  margin: 0;
+  text-align: left;
+  color: var(--color-primary, #6ea8fe);
+  cursor: pointer;
+  text-decoration: none;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+  flex: 1;
+}
+.hermes-kanban-attachment-link:hover { text-decoration: underline; }
+
 .hermes-kanban-drawer-body {
   flex: 1;
   overflow-y: auto;
   padding: 0.9rem;
+  /* When no comment row is rendered (loading / error states), the scrolling
+     body is the bottom-most element — extend its bottom padding past the
+     mobile browser chrome so the last content stays readable. */
+  padding-bottom: max(0.9rem, calc(0.9rem + env(safe-area-inset-bottom)));
   display: flex;
   flex-direction: column;
   gap: 0.85rem;
@@ -530,6 +571,9 @@
   display: flex;
   gap: 0.4rem;
   padding: 0.55rem 0.75rem;
+  /* Keep the comment input clear of the mobile browser nav bar / home
+     indicator by extending the bottom padding with the safe-area inset. */
+  padding-bottom: max(0.55rem, calc(0.55rem + env(safe-area-inset-bottom)));
   border-top: 1px solid var(--color-border);
   background: color-mix(in srgb, var(--color-card) 90%, transparent);
 }
diff --git a/plugins/kanban/dashboard/plugin_api.py b/plugins/kanban/dashboard/plugin_api.py
index cae4d872302..0c2122c2a11 100644
--- a/plugins/kanban/dashboard/plugin_api.py
+++ b/plugins/kanban/dashboard/plugin_api.py
@@ -43,9 +43,11 @@ import os
 import sqlite3
 import time
 from dataclasses import asdict
+from pathlib import Path
 from typing import Any, Optional
 
-from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect, status as http_status
+from fastapi import APIRouter, File, Form, HTTPException, Query, UploadFile, WebSocket, WebSocketDisconnect, status as http_status
+from fastapi.responses import FileResponse
 from pydantic import BaseModel, Field
 
 from hermes_cli import kanban_db
@@ -186,6 +188,21 @@ def _comment_dict(c: kanban_db.Comment) -> dict[str, Any]:
     }
 
 
+def _attachment_dict(a: kanban_db.Attachment) -> dict[str, Any]:
+    """Serialise an Attachment for the drawer. ``stored_path`` is the
+    absolute on-disk path workers read; the UI uses ``id`` for download."""
+    return {
+        "id": a.id,
+        "task_id": a.task_id,
+        "filename": a.filename,
+        "content_type": a.content_type,
+        "size": a.size,
+        "uploaded_by": a.uploaded_by,
+        "stored_path": a.stored_path,
+        "created_at": a.created_at,
+    }
+
+
 def _run_dict(r: kanban_db.Run) -> dict[str, Any]:
     """Serialise a Run for the drawer's Run history section."""
     return {
@@ -531,6 +548,7 @@ def get_task(
             "task": task_d,
             "comments": [_comment_dict(c) for c in kanban_db.list_comments(conn, task_id)],
             "events": [_event_dict(e) for e in kanban_db.list_events(conn, task_id)],
+            "attachments": [_attachment_dict(a) for a in kanban_db.list_attachments(conn, task_id)],
             "links": _links_for(conn, task_id),
             "runs": [
                 _run_dict(r)
@@ -609,6 +627,165 @@ def create_task(payload: CreateTaskBody, board: Optional[str] = Query(None)):
         conn.close()
 
 
+# ---------------------------------------------------------------------------
+# Attachments — upload / list / download / delete (#35338)
+# ---------------------------------------------------------------------------
+
+# Cap a single upload so a runaway request can't fill the disk. 25 MB
+# comfortably covers PDFs, images, and source docs — the kanban use case.
+_MAX_ATTACHMENT_BYTES = 25 * 1024 * 1024
+
+
+def _safe_attachment_name(raw: str) -> str:
+    """Reduce a client-supplied filename to a safe basename.
+
+    Strips any directory components (``os.path.basename`` on both
+    separators) so a malicious ``../../etc/passwd`` or ``C:\\x`` collapses
+    to its leaf. Rejects empty / dotfile-only names. The result is only
+    ever joined under the per-task attachments dir, never used verbatim
+    as a path from the client.
+    """
+    name = (raw or "").replace("\\", "/").split("/")[-1].strip()
+    # Drop control chars and leading dots so we never write a dotfile or
+    # a name with embedded NULs/newlines.
+    name = "".join(ch for ch in name if ch.isprintable() and ch not in '\x00').strip()
+    name = name.lstrip(".").strip()
+    if not name:
+        raise HTTPException(status_code=400, detail="invalid attachment filename")
+    return name[:200]
+
+
+@router.get("/tasks/{task_id}/attachments")
+def list_task_attachments(task_id: str, board: Optional[str] = Query(None)):
+    board = _resolve_board(board)
+    conn = _conn(board=board)
+    try:
+        if kanban_db.get_task(conn, task_id) is None:
+            raise HTTPException(status_code=404, detail=f"task {task_id} not found")
+        return {
+            "attachments": [
+                _attachment_dict(a) for a in kanban_db.list_attachments(conn, task_id)
+            ]
+        }
+    finally:
+        conn.close()
+
+
+@router.post("/tasks/{task_id}/attachments")
+async def upload_task_attachment(
+    task_id: str,
+    file: UploadFile = File(...),
+    board: Optional[str] = Query(None),
+    uploaded_by: Optional[str] = Form(None),
+):
+    """Store an uploaded file for a task and record its metadata.
+
+    The blob lands under ``attachments_root(board)/<task_id>/`` with a
+    sanitised, collision-resolved name. The worker reads it via the
+    absolute path surfaced in ``build_worker_context``.
+    """
+    board = _resolve_board(board)
+    conn = _conn(board=board)
+    try:
+        if kanban_db.get_task(conn, task_id) is None:
+            raise HTTPException(status_code=404, detail=f"task {task_id} not found")
+
+        safe_name = _safe_attachment_name(file.filename or "")
+
+        # Stream to disk with a hard size cap so a huge upload can't fill
+        # the disk. Read in chunks; abort + clean up if the cap is hit.
+        dest_dir = kanban_db.task_attachments_dir(task_id, board=board)
+        dest_dir.mkdir(parents=True, exist_ok=True)
+
+        # Resolve name collisions: foo.pdf → foo (1).pdf, foo (2).pdf, …
+        stem, dot, ext = safe_name.partition(".")
+        candidate = safe_name
+        n = 1
+        while (dest_dir / candidate).exists():
+            candidate = f"{stem} ({n}){dot}{ext}"
+            n += 1
+        dest_path = dest_dir / candidate
+
+        total = 0
+        try:
+            with open(dest_path, "wb") as out:
+                while True:
+                    chunk = await file.read(1024 * 1024)
+                    if not chunk:
+                        break
+                    total += len(chunk)
+                    if total > _MAX_ATTACHMENT_BYTES:
+                        out.close()
+                        dest_path.unlink(missing_ok=True)
+                        raise HTTPException(
+                            status_code=413,
+                            detail=(
+                                f"attachment exceeds {_MAX_ATTACHMENT_BYTES // (1024 * 1024)} MB limit"
+                            ),
+                        )
+                    out.write(chunk)
+        except HTTPException:
+            raise
+        except OSError as exc:
+            raise HTTPException(status_code=500, detail=f"failed to store attachment: {exc}")
+
+        att_id = kanban_db.add_attachment(
+            conn,
+            task_id,
+            filename=candidate,
+            stored_path=str(dest_path.resolve()),
+            content_type=file.content_type,
+            size=total,
+            uploaded_by=(uploaded_by or "dashboard"),
+        )
+        att = kanban_db.get_attachment(conn, att_id)
+        return {"attachment": _attachment_dict(att) if att else None}
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    finally:
+        conn.close()
+
+
+@router.get("/attachments/{attachment_id}")
+def download_attachment(attachment_id: int, board: Optional[str] = Query(None)):
+    board = _resolve_board(board)
+    conn = _conn(board=board)
+    try:
+        att = kanban_db.get_attachment(conn, attachment_id)
+        if att is None:
+            raise HTTPException(status_code=404, detail="attachment not found")
+        # Confirm the blob still lives under the board's attachments root
+        # before serving — defense in depth against a tampered DB row.
+        root = kanban_db.attachments_root(board=board).resolve()
+        try:
+            stored = Path(att.stored_path).resolve()
+            stored.relative_to(root)
+        except (ValueError, OSError):
+            raise HTTPException(status_code=404, detail="attachment file unavailable")
+        if not stored.is_file():
+            raise HTTPException(status_code=404, detail="attachment file missing on disk")
+        return FileResponse(
+            path=str(stored),
+            filename=att.filename,
+            media_type=att.content_type or "application/octet-stream",
+        )
+    finally:
+        conn.close()
+
+
+@router.delete("/attachments/{attachment_id}")
+def remove_attachment(attachment_id: int, board: Optional[str] = Query(None)):
+    board = _resolve_board(board)
+    conn = _conn(board=board)
+    try:
+        att = kanban_db.delete_attachment(conn, attachment_id)
+        if att is None:
+            raise HTTPException(status_code=404, detail="attachment not found")
+        return {"ok": True, "id": attachment_id}
+    finally:
+        conn.close()
+
+
 # ---------------------------------------------------------------------------
 # PATCH /tasks/:id  (status / assignee / priority / title / body)
 # ---------------------------------------------------------------------------
diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py
index ef8fcafb88a..2f94c08da38 100644
--- a/plugins/memory/hindsight/__init__.py
+++ b/plugins/memory/hindsight/__init__.py
@@ -633,7 +633,8 @@ class HindsightMemoryProvider(MemoryProvider):
             except Exception:
                 pass
         existing.update(values)
-        config_path.write_text(json.dumps(existing, indent=2))
+        from utils import atomic_json_write
+        atomic_json_write(config_path, existing, mode=0o600)
 
     def post_setup(self, hermes_home: str, config: dict) -> None:
         """Custom setup wizard — installs only the deps needed for the selected mode."""
diff --git a/plugins/memory/honcho/README.md b/plugins/memory/honcho/README.md
index dbe3eebc9a5..3774747d05a 100644
--- a/plugins/memory/honcho/README.md
+++ b/plugins/memory/honcho/README.md
@@ -12,8 +12,8 @@ AI-native cross-session user modeling with multi-pass dialectic reasoning, sessi
 ## Setup
 
 ```bash
-hermes honcho setup    # full interactive wizard (cloud or local)
-hermes memory setup    # generic picker, also works
+hermes memory setup honcho   # configure Honcho directly (works on a fresh install)
+hermes memory setup          # generic picker, choose Honcho from the list
 ```
 
 Or manually:
@@ -22,6 +22,10 @@ hermes config set memory.provider honcho
 echo "HONCHO_API_KEY=***" >> ~/.hermes/.env
 ```
 
+> `hermes honcho setup` also works, but only **after** Honcho is the active
+> memory provider — the `honcho` subcommand is registered for the active
+> provider only. On a fresh install, use `hermes memory setup honcho`.
+
 ## Architecture Overview
 
 ### Two-Layer Context Injection
@@ -109,7 +113,7 @@ Config is read from the first file that exists:
 | 2 | `~/.hermes/honcho.json` | Default profile (shared host blocks) |
 | 3 | `~/.honcho/config.json` | Global (cross-app interop) |
 
-Host key is derived from the active Hermes profile: `hermes` (default) or `hermes.<profile>`.
+Host key is derived from the active Hermes profile: `hermes` (default) or `hermes_<profile>`.
 
 For every key, resolution order is: **host block > root > env var > default**.
 
@@ -154,7 +158,7 @@ In gateway deployments (Telegram, Discord, Slack, etc.) each user arrives with a
 
 **Host vs root semantics.** All three keys are accepted at both root and `hosts.<host>` levels. Host-level wins. For maps and prefixes, host-level *replaces* the root value as a whole (not merge), so a host can intentionally own its identity universe or wipe it with `userPeerAliases: {}` / `runtimePeerPrefix: ""`.
 
-**Deployment shapes** (`hermes honcho setup` asks one prompt to set these):
+**Deployment shapes** (`hermes memory setup honcho` asks one prompt to set these):
 
 - **Single-operator** — `pinUserPeer: true`. All gateway users → `peerName`. Recommended for personal use where you connect Hermes to your own Telegram/Discord/etc.
 - **Multi-user gateway** — `pinUserPeer: false`, optional `runtimePeerPrefix`. Each runtime user → own peer. Recommended for bots serving many humans.
@@ -225,7 +229,7 @@ Multiple Hermes profiles can share one workspace while maintaining separate AI i
       "recallMode": "hybrid",
       "sessionStrategy": "per-directory"
     },
-    "hermes.coder": {
+    "hermes_coder": {
       "aiPeer": "coder",
       "recallMode": "tools",
       "sessionStrategy": "per-repo"
@@ -236,7 +240,7 @@ Multiple Hermes profiles can share one workspace while maintaining separate AI i
 
 Both profiles see the same user (`yourname`) in the same shared environment (`hermes`), but each AI peer builds its own observations, conclusions, and behavior patterns. The coder's memory stays code-oriented; the main agent's stays broad.
 
-Host key is derived from the active Hermes profile: `hermes` (default) or `hermes.<profile>` (e.g. `hermes -p coder` → host key `hermes.coder`).
+Host key is derived from the active Hermes profile: `hermes` (default) or `hermes_<profile>` (e.g. `hermes -p coder` -> host key `hermes_coder`). Older `hermes.<profile>` host blocks are still read for compatibility and are migrated when the CLI writes profile-scoped Honcho config.
 
 ### Dialectic & Reasoning
 
@@ -307,7 +311,8 @@ Presets:
 
 | Command | Description |
 |---------|-------------|
-| `hermes honcho setup` | Full interactive setup wizard |
+| `hermes memory setup honcho` | Configure Honcho directly — works on a fresh install |
+| `hermes honcho setup` | Interactive setup wizard (only registered once Honcho is the active provider; redirects to `hermes memory setup`) |
 | `hermes honcho status` | Show resolved config for active profile |
 | `hermes honcho enable` / `disable` | Toggle Honcho for active profile |
 | `hermes honcho mode <mode>` | Change recall or observation mode |
@@ -344,7 +349,7 @@ Presets:
       "dialecticMaxChars": 600,
       "saveMessages": true
     },
-    "hermes.coder": {
+    "hermes_coder": {
       "enabled": true,
       "aiPeer": "coder",
       "sessionStrategy": "per-repo",
diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index bbff0d0e628..6e6f39b8cd7 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -249,6 +249,7 @@ class HonchoMemoryProvider(MemoryProvider):
     def save_config(self, values, hermes_home):
         """Write config to $HERMES_HOME/honcho.json (Honcho SDK native format)."""
         import json
+        import os
         from pathlib import Path
         config_path = Path(hermes_home) / "honcho.json"
         existing = {}
@@ -258,7 +259,8 @@ class HonchoMemoryProvider(MemoryProvider):
             except Exception:
                 pass
         existing.update(values)
-        config_path.write_text(json.dumps(existing, indent=2))
+        from utils import atomic_json_write
+        atomic_json_write(config_path, existing, mode=0o600)
 
     def get_config_schema(self):
         return [
diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py
index 9227bf95ab8..ce2af8a08b2 100644
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@@ -11,7 +11,7 @@ import sys
 from pathlib import Path
 
 from hermes_constants import get_hermes_home
-from plugins.memory.honcho.client import resolve_active_host, resolve_config_path, HOST
+from plugins.memory.honcho.client import _host_block, profile_host_key, resolve_active_host, resolve_config_path, HOST
 from hermes_cli.config import cfg_get
 
 
@@ -36,7 +36,7 @@ def clone_honcho_for_profile(profile_name: str) -> bool:
     if not default_block and not has_key:
         return False
 
-    new_host = f"{HOST}.{profile_name}"
+    new_host = profile_host_key(profile_name)
     if new_host in hosts:
         return False  # already exists
 
@@ -192,7 +192,7 @@ def cmd_sync(args) -> None:
         if p.name == "default":
             continue
         if clone_honcho_for_profile(p.name):
-            print(f"  + {p.name} -> hermes.{p.name}")
+            print(f"  + {p.name} -> {profile_host_key(p.name)}")
             created += 1
         else:
             skipped += 1
@@ -243,7 +243,7 @@ def _host_key() -> str:
     if _profile_override:
         if _profile_override in {"default", "custom"}:
             return HOST
-        return f"{HOST}.{_profile_override}"
+        return profile_host_key(_profile_override)
     return resolve_active_host()
 
 
@@ -275,10 +275,8 @@ def _read_config() -> dict:
 def _write_config(cfg: dict, path: Path | None = None) -> None:
     path = path or _local_config_path()
     path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(
-        json.dumps(cfg, indent=2, ensure_ascii=False) + "\n",
-        encoding="utf-8",
-    )
+    from utils import atomic_json_write
+    atomic_json_write(path, cfg, mode=0o600)
 
 
 def _resolve_api_key(cfg: dict) -> str:
@@ -292,7 +290,7 @@ def _resolve_api_key(cfg: dict) -> str:
     config shapes, e.g. ``localhost:8000``) still pass — the Honcho SDK
     will reject them itself with a clearer error than ours.
     """
-    host_key = ((cfg.get("hosts") or {}).get(_host_key()) or {}).get("apiKey")
+    host_key = _host_block(cfg, _host_key()).get("apiKey")
     key = host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")
     if not key:
         base_url = cfg.get("baseUrl") or cfg.get("base_url") or os.environ.get("HONCHO_BASE_URL", "")
@@ -462,21 +460,58 @@ def cmd_setup(args) -> None:
     cfg.pop("base_url", None)
 
     if is_local:
-        # --- Local: ask for base URL, skip or clear API key ---
+        # --- Local: ask for base URL, optionally accept a JWT for auth ---
         current_url = cfg.get("baseUrl") or ""
         new_url = _prompt("Base URL", default=current_url or "http://localhost:8000")
         if new_url:
             cfg["baseUrl"] = new_url
 
-        # For local no-auth, the SDK must not send an API key.
-        # We keep the key in config (for cloud switching later) but
-        # the client should skip auth when baseUrl is local.
-        current_key = cfg.get("apiKey", "")
-        if current_key:
-            print(f"\n  API key present in config (kept for cloud/hybrid use).")
-            print("  Local connections will skip auth automatically.")
+        # Self-hosted Honcho can run with AUTH_USE_AUTH=true and an
+        # AUTH_JWT_SECRET on the server side. In that case clients must
+        # send a JWT signed with that secret as the bearer token (the
+        # Honcho SDK takes it via ``api_key=``). Cloud users got prompted
+        # for a key already; the local path historically skipped this and
+        # forced users to disable auth on the server. Offer the prompt
+        # here too. We store it under the host block (not the top-level
+        # apiKey) so ``get_honcho_client`` recognises it as an explicit
+        # local auth opt-in (see ``_host_has_key`` in client.py) and
+        # cloud/hybrid switching is unaffected.
+        current_host_key = hermes_host.get("apiKey", "")
+        masked = (
+            f"...{current_host_key[-8:]}"
+            if len(current_host_key) > 8
+            else ("set" if current_host_key else "not set")
+        )
+        print(
+            "\n  Local Honcho auth (JWT signed with the server's "
+            "AUTH_JWT_SECRET)."
+        )
+        print(
+            "  Leave blank if your server runs with AUTH_USE_AUTH=false. "
+            f"Current: {masked}"
+        )
+        new_local_key = _prompt(
+            "Local JWT / bearer token (blank to skip / keep current)",
+            secret=True,
+        )
+        if new_local_key:
+            hermes_host["apiKey"] = new_local_key
+        elif current_host_key:
+            print("  Keeping existing local JWT.")
         else:
-            print("\n  No API key set. Local no-auth ready.")
+            # Surface the top-level key situation for transparency.
+            top_key = cfg.get("apiKey", "")
+            if top_key:
+                print(
+                    "\n  Top-level API key present in config (kept for "
+                    "cloud/hybrid use)."
+                )
+                print(
+                    "  Local connections will skip auth automatically "
+                    "until a local JWT is set above."
+                )
+            else:
+                print("\n  No local JWT set. Local no-auth ready.")
     else:
         # --- Cloud: set default base URL, require API key ---
         cfg.pop("baseUrl", None)  # cloud uses SDK default
diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py
index 3d31bd7a1fb..ae837a0b115 100644
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@@ -32,6 +32,24 @@ logger = logging.getLogger(__name__)
 HOST = "hermes"
 
 
+def profile_host_key(profile: str | None) -> str:
+    """Return the safe Honcho host key for a Hermes profile."""
+    if not profile or profile in {"default", "custom"}:
+        return HOST
+    sanitized = "".join(c if c.isalnum() or c in "_-" else "_" for c in profile).strip("_")
+    return f"{HOST}_{sanitized or 'profile'}"
+
+
+def _host_block(raw: dict, host: str) -> dict:
+    """Return host config, accepting legacy dot-form profile host keys."""
+    hosts = raw.get("hosts") or {}
+    block = hosts.get(host, {})
+    if block or not host.startswith(f"{HOST}_"):
+        return block
+    legacy = f"{HOST}.{host[len(HOST) + 1:]}"
+    return hosts.get(legacy, {})
+
+
 def resolve_active_host() -> str:
     """Derive the Honcho host key from the active Hermes profile.
 
@@ -47,8 +65,7 @@ def resolve_active_host() -> str:
     try:
         from hermes_cli.profiles import get_active_profile_name
         profile = get_active_profile_name()
-        if profile and profile not in {"default", "custom"}:
-            return f"{HOST}.{profile}"
+        return profile_host_key(profile)
     except Exception:
         pass
     return HOST
@@ -406,7 +423,7 @@ class HonchoClientConfig:
             logger.warning("Failed to read %s: %s, falling back to env", path, e)
             return cls.from_env(host=resolved_host)
 
-        host_block = (raw.get("hosts") or {}).get(resolved_host, {})
+        host_block = _host_block(raw, resolved_host)
         # A hosts.hermes block or explicit enabled flag means the user
         # intentionally configured Honcho for this host.
         _explicitly_configured = bool(host_block) or raw.get("enabled") is True
@@ -811,7 +828,10 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
         or "::1" in resolved_base_url
     )
     if _is_local:
-        # Check if the host block has its own apiKey (explicit local auth)
+        # Check if the host block has its own apiKey (explicit local auth).
+        # Auth-skipping is loopback-only: a stored key is likely a cloud key
+        # that would break a no-auth local server, so we substitute the SDK's
+        # required-non-empty placeholder unless the host block opts in.
         _raw = config.raw or {}
         _host_block = (_raw.get("hosts") or {}).get(config.host, {})
         _host_has_key = bool(_host_block.get("apiKey"))
@@ -819,6 +839,18 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
     else:
         effective_api_key = config.api_key
 
+    # The Honcho SDK's route builders (e.g. routes.workspaces()) already
+    # include the version prefix (e.g. "/v3/workspaces").  When a user-supplied
+    # base_url already ends in a version segment (e.g.
+    # "http://localhost:38000/v3", "https://honcho.my.ts.net/v3"), concatenating
+    # the two produces "/v3/v3/workspaces" → 404 on every call.  This is a pure
+    # routing concern independent of host, so strip a trailing version segment
+    # from ANY base_url — loopback, LAN, custom domain, or cloud alike.  The
+    # SDK then appends its own versioned paths correctly.
+    if resolved_base_url:
+        import re as _re
+        resolved_base_url = _re.sub(r"/v\d+/*$", "", resolved_base_url).rstrip("/")
+
     kwargs: dict = {
         "workspace_id": config.workspace_id,
         "api_key": effective_api_key,
diff --git a/plugins/memory/mem0/__init__.py b/plugins/memory/mem0/__init__.py
index 32d1f6ff700..332b3ac9412 100644
--- a/plugins/memory/mem0/__init__.py
+++ b/plugins/memory/mem0/__init__.py
@@ -155,7 +155,8 @@ class Mem0MemoryProvider(MemoryProvider):
             except Exception:
                 pass
         existing.update(values)
-        config_path.write_text(json.dumps(existing, indent=2))
+        from utils import atomic_json_write
+        atomic_json_write(config_path, existing, mode=0o600)
 
     def get_config_schema(self):
         return [
diff --git a/plugins/memory/supermemory/__init__.py b/plugins/memory/supermemory/__init__.py
index 35b5b6fd649..a21ae53cc06 100644
--- a/plugins/memory/supermemory/__init__.py
+++ b/plugins/memory/supermemory/__init__.py
@@ -152,7 +152,8 @@ def _save_supermemory_config(values: dict, hermes_home: str) -> None:
         except Exception:
             existing = {}
     existing.update(values)
-    config_path.write_text(json.dumps(existing, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+    from utils import atomic_json_write
+    atomic_json_write(config_path, existing, mode=0o600, sort_keys=True)
 
 
 def _detect_category(text: str) -> str:
diff --git a/plugins/platforms/discord/adapter.py b/plugins/platforms/discord/adapter.py
index c58afffcd74..12cf05c38c9 100644
--- a/plugins/platforms/discord/adapter.py
+++ b/plugins/platforms/discord/adapter.py
@@ -6093,16 +6093,17 @@ def _apply_yaml_config(yaml_cfg: dict, discord_cfg: dict) -> dict | None:
     ``gateway/config.py::load_gateway_config()`` before this migration.
 
     The DiscordAdapter reads its runtime configuration via ``os.getenv()``
-    throughout the connect / handle code paths (``DISCORD_REQUIRE_MENTION``,
-    ``DISCORD_FREE_RESPONSE_CHANNELS``, ``DISCORD_AUTO_THREAD``,
-    ``DISCORD_REACTIONS``, ``DISCORD_IGNORED_CHANNELS``,
-    ``DISCORD_ALLOWED_CHANNELS``, ``DISCORD_NO_THREAD_CHANNELS``,
-    ``DISCORD_HISTORY_BACKFILL``, ``DISCORD_HISTORY_BACKFILL_LIMIT``,
-    ``DISCORD_ALLOW_MENTION_*``, ``DISCORD_REPLY_TO_MODE``,
-    ``DISCORD_THREAD_REQUIRE_MENTION``).  Rather than rewrite ~50 call sites
-    inside the adapter to read from ``PlatformConfig.extra`` instead, this
-    hook keeps the existing env-driven model and merely owns the
-    YAML→env translation here, next to the adapter that consumes it.
+    throughout the connect / handle code paths (``DISCORD_ALLOWED_USERS``,
+    ``DISCORD_REQUIRE_MENTION``, ``DISCORD_FREE_RESPONSE_CHANNELS``,
+    ``DISCORD_AUTO_THREAD``, ``DISCORD_REACTIONS``,
+    ``DISCORD_IGNORED_CHANNELS``, ``DISCORD_ALLOWED_CHANNELS``,
+    ``DISCORD_NO_THREAD_CHANNELS``, ``DISCORD_HISTORY_BACKFILL``,
+    ``DISCORD_HISTORY_BACKFILL_LIMIT``, ``DISCORD_ALLOW_MENTION_*``,
+    ``DISCORD_REPLY_TO_MODE``, ``DISCORD_THREAD_REQUIRE_MENTION``).
+    Rather than rewrite ~50 call sites inside the adapter to read from
+    ``PlatformConfig.extra`` instead, this hook keeps the existing
+    env-driven model and merely owns the YAML→env translation here, next to
+    the adapter that consumes it.
 
     Env vars take precedence over YAML — every assignment is guarded by
     ``not os.getenv(...)`` so explicit env vars survive a config.yaml
@@ -6113,6 +6114,22 @@ def _apply_yaml_config(yaml_cfg: dict, discord_cfg: dict) -> dict | None:
         os.environ["DISCORD_REQUIRE_MENTION"] = str(discord_cfg["require_mention"]).lower()
     if "thread_require_mention" in discord_cfg and not os.getenv("DISCORD_THREAD_REQUIRE_MENTION"):
         os.environ["DISCORD_THREAD_REQUIRE_MENTION"] = str(discord_cfg["thread_require_mention"]).lower()
+    platforms_cfg = yaml_cfg.get("platforms")
+    platform_extra_cfg = {}
+    if isinstance(platforms_cfg, dict):
+        discord_platform_cfg = platforms_cfg.get("discord")
+        if isinstance(discord_platform_cfg, dict):
+            candidate_extra = discord_platform_cfg.get("extra")
+            if isinstance(candidate_extra, dict):
+                platform_extra_cfg = candidate_extra
+    allowed_users_cfg = (
+        discord_cfg["allow_from"] if "allow_from" in discord_cfg
+        else platform_extra_cfg.get("allow_from")
+    )
+    if allowed_users_cfg is not None and not os.getenv("DISCORD_ALLOWED_USERS"):
+        if isinstance(allowed_users_cfg, list):
+            allowed_users_cfg = ",".join(str(v) for v in allowed_users_cfg)
+        os.environ["DISCORD_ALLOWED_USERS"] = str(allowed_users_cfg)
     frc = discord_cfg.get("free_response_channels")
     if frc is not None and not os.getenv("DISCORD_FREE_RESPONSE_CHANNELS"):
         if isinstance(frc, list):
diff --git a/plugins/web/firecrawl/provider.py b/plugins/web/firecrawl/provider.py
index 9e3f123e520..0fa99bf58f6 100644
--- a/plugins/web/firecrawl/provider.py
+++ b/plugins/web/firecrawl/provider.py
@@ -146,16 +146,16 @@ def _get_firecrawl_gateway_url() -> str:
 def _is_tool_gateway_ready() -> bool:
     """Return True when gateway URL + Nous Subscriber token are available.
 
-    Reads ``read_nous_access_token`` and ``resolve_managed_tool_gateway``
+    Reads ``peek_nous_access_token`` and ``resolve_managed_tool_gateway``
     via :mod:`tools.web_tools` rather than direct imports, so unit tests
-    that ``patch("tools.web_tools._read_nous_access_token", ...)`` see
+    that ``patch("tools.web_tools._peek_nous_access_token", ...)`` see
     their patches honored. The names are re-exported on
     :mod:`tools.web_tools` for exactly this reason.
     """
     import tools.web_tools as _wt
 
     return _wt.resolve_managed_tool_gateway(
-        "firecrawl", token_reader=_wt._read_nous_access_token
+        "firecrawl", token_reader=_wt._peek_nous_access_token
     ) is not None
 
 
diff --git a/pyproject.toml b/pyproject.toml
index fae89baea12..86bd94c5479 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -87,7 +87,7 @@ edge-tts = ["edge-tts==7.2.7"]
 modal = ["modal==1.3.4"]
 daytona = ["daytona==0.155.0"]
 hindsight = ["hindsight-client==0.6.1"]
-dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10", "setuptools==82.0.1"]
+dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "starlette==1.0.1", "ty==0.0.21", "ruff==0.15.10", "setuptools==82.0.1"]  # starlette: CVE-2026-48710
 messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "brotlicffi==1.2.0.1", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]
 cron = []  # croniter is now a core dependency; this extra kept for back-compat
 slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1", "aiohttp==3.13.3"]
@@ -114,14 +114,21 @@ pty = [
   # without pulling in extra packages.
 ]
 honcho = ["honcho-ai==2.0.1"]
-mcp = ["mcp==1.26.0"]
+# CVE-2026-48710 (BadHost): Starlette is pulled transitively by mcp's
+# sse-starlette / HTTP-SSE stack (and by fastapi in the `web` extra). Before
+# 1.0.1, a malformed Host header makes `request.url.path` desync from the path
+# the ASGI router actually dispatched, so middleware/endpoints that gate on
+# `request.url` can be bypassed. We pin a patched Starlette directly in every
+# extra that exposes a Starlette-backed server surface so pip/uv can't resolve
+# a vulnerable pre-1.0.1 transitive. Bump in lockstep with uv.lock.
+mcp = ["mcp==1.26.0", "starlette==1.0.1"]  # starlette: CVE-2026-48710
 homeassistant = ["aiohttp==3.13.3"]
 sms = ["aiohttp==3.13.3"]
 # Computer use — macOS background desktop control via cua-driver (MCP stdio).
 # The cua-driver binary itself is installed via `hermes tools` post-setup
 # (curl install script); this extra just pins the MCP client used to talk
 # to it, which is already provided by the `mcp` extra.
-computer-use = ["mcp==1.26.0"]
+computer-use = ["mcp==1.26.0", "starlette==1.0.1"]  # starlette: CVE-2026-48710
 acp = ["agent-client-protocol==0.9.0"]
 # mistral: Voxtral STT + TTS. Pinned to an exact verified-clean version.
 # The `mistralai` PyPI project was quarantined 2026-05-12 after the malicious
@@ -174,7 +181,9 @@ youtube = [
   "youtube-transcript-api==1.2.4",
 ]
 # `hermes dashboard` (localhost SPA + API).  Not in core to keep the default install lean.
-web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0"]
+# starlette==1.0.1 pinned for CVE-2026-48710 (BadHost) — fastapi pulls Starlette
+# transitively and pre-1.0.1 is the vulnerable range. See the mcp extra above.
+web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0", "starlette==1.0.1"]
 all = [
   # Policy (2026-05-12): `[all]` includes only extras that genuinely
   # CAN'T be lazy-installed via `tools/lazy_deps.py` — i.e. things every
@@ -216,7 +225,7 @@ hermes-agent = "run_agent:main"
 hermes-acp = "acp_adapter.entry:main"
 
 [tool.setuptools]
-py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"]
+py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils", "mcp_serve"]
 
 [tool.setuptools.package-data]
 hermes_cli = ["web_dist/**/*", "tui_dist/**/*", "scripts/install.sh", "scripts/install.ps1"]
diff --git a/run_agent.py b/run_agent.py
index 036103442e9..762fd2ee2cc 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2195,6 +2195,126 @@ class AIAgent:
             lines.append(f"  • … and {remaining} more")
         return "\n".join(lines)
 
+    def _turn_completion_explainer_enabled(self) -> bool:
+        """Check whether the end-of-turn completion explainer footer is on.
+
+        Config path: ``display.turn_completion_explainer`` (bool, default
+        True).  ``HERMES_TURN_COMPLETION_EXPLAINER`` env var overrides
+        config.  Exposed as a method so tests can patch a single seam,
+        mirroring ``_file_mutation_verifier_enabled``.
+        """
+        try:
+            import os as _os
+            env = _os.environ.get("HERMES_TURN_COMPLETION_EXPLAINER")
+            if env is not None:
+                return env.strip().lower() not in {"0", "false", "no", "off"}
+            # Read from the persisted config.yaml so gateway and CLI share
+            # the same setting.  Import lazily to avoid a startup-time cycle.
+            try:
+                from hermes_cli.config import load_config as _load_config
+                _cfg = _load_config() or {}
+            except Exception:
+                _cfg = {}
+            _display = _cfg.get("display") if isinstance(_cfg, dict) else None
+            if isinstance(_display, dict) and "turn_completion_explainer" in _display:
+                return bool(_display.get("turn_completion_explainer"))
+        except Exception:
+            pass
+        return True  # safe default: explainer on
+
+    @staticmethod
+    def _format_turn_completion_explanation(turn_exit_reason: str) -> str:
+        """Render a user-facing explanation for an abnormal turn ending.
+
+        Maps the internal ``turn_exit_reason`` to a short, actionable
+        message so a turn that produced no usable assistant reply (empty
+        content after retries, a partial/truncated stream, a still-pending
+        tool result, or an iteration/budget limit) is never silent from
+        the UI's perspective — the symptom users report in #34452.
+
+        Returns an empty string for reasons that are NOT abnormal (e.g.
+        a normal ``text_response(...)`` exit), so callers can concatenate
+        or substitute unconditionally without warning on healthy turns
+        like a terse ``Done.``.
+        """
+        if not turn_exit_reason:
+            return ""
+        reason = str(turn_exit_reason)
+
+        # Normal completion — stay quiet.  ``text_response(...)`` is the
+        # healthy terminal; anything that produced a real reply is fine.
+        if reason.startswith("text_response"):
+            return ""
+
+        prefix = "⚠️ No reply: "
+        if reason == "empty_response_exhausted":
+            return (
+                prefix
+                + "the model returned empty content after retries and any "
+                "fallback providers. Try `continue`, switch model/provider, "
+                "or inspect the tool output above."
+            )
+        if reason == "all_retries_exhausted_no_response":
+            return (
+                prefix
+                + "all API retries were exhausted before a response was "
+                "produced (provider errors / rate limits). Try `continue` "
+                "or switch provider."
+            )
+        if reason == "partial_stream_recovery":
+            return (
+                prefix
+                + "streaming stopped early and only a partial response was "
+                "recovered. Send `continue` to resume from where it stopped."
+            )
+        if reason == "fallback_prior_turn_content":
+            return (
+                prefix
+                + "no new content was produced this turn; showing recovered "
+                "prior context. Send `continue` to retry."
+            )
+        if reason == "interrupted_during_api_call":
+            return (
+                prefix
+                + "the request was interrupted mid-call before a reply was "
+                "received. Send `continue` to retry."
+            )
+        if reason == "budget_exhausted":
+            return (
+                prefix
+                + "the per-turn iteration/cost budget was exhausted before a "
+                "final answer. Send `continue` to keep going."
+            )
+        if reason == "ollama_runtime_context_too_small":
+            return (
+                prefix
+                + "the local model's context window was too small to finish. "
+                "Increase the context size or use a larger model."
+            )
+        if reason.startswith("max_iterations_reached"):
+            return (
+                prefix
+                + "the maximum tool-iteration limit was reached before a "
+                "final answer. Send `continue` to keep going, or raise "
+                "`max_iterations`."
+            )
+        if reason.startswith("error_near_max_iterations"):
+            return (
+                prefix
+                + "an error occurred near the iteration limit before a final "
+                "answer. Check the tool output above, then send `continue`."
+            )
+        if reason == "pending_tool_result":
+            return (
+                prefix
+                + "the turn stopped while a tool result was still pending and "
+                "the model produced no follow-up text. Send `continue` to "
+                "let it summarize."
+            )
+        # Unknown/diagnostic-only reasons (e.g. "unknown", guardrail_halt
+        # which already surfaces its own message) — don't second-guess.
+        return ""
+
     def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None:
         """Forwarder — see ``agent.agent_runtime_helpers.apply_pending_steer_to_tool_results``."""
         from agent.agent_runtime_helpers import apply_pending_steer_to_tool_results
@@ -3487,6 +3607,18 @@ class AIAgent:
         from agent.chat_completion_helpers import try_activate_fallback
         return try_activate_fallback(self, reason)
 
+    def _has_pending_fallback(self) -> bool:
+        """Whether a fallback provider is actually available to switch to.
+
+        Used to gate user-facing "trying fallback..." status so we don't
+        announce a fallback that will never be attempted (the user has no
+        fallback chain configured).  Mirrors the early-return guard in
+        ``try_activate_fallback`` (#35314, #17446).
+        """
+        chain = getattr(self, "_fallback_chain", None) or []
+        index = getattr(self, "_fallback_index", 0)
+        return index < len(chain)
+
     # ── Per-turn primary restoration ─────────────────────────────────────
 
     def _restore_primary_runtime(self) -> bool:
diff --git a/scripts/install.sh b/scripts/install.sh
index 764708baa9a..6d51c565869 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -540,6 +540,7 @@ check_python() {
     if PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION" 2>/dev/null)"; then
         PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
         log_success "Python found: $PYTHON_FOUND_VERSION"
+        ensure_fts5
         return 0
     fi
 
@@ -549,6 +550,7 @@ check_python() {
         PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION")"
         PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
         log_success "Python installed: $PYTHON_FOUND_VERSION"
+        ensure_fts5
     else
         log_error "Failed to install Python $PYTHON_VERSION"
         log_info "Install Python $PYTHON_VERSION manually, then re-run this script"
@@ -556,6 +558,51 @@ check_python() {
     fi
 }
 
+# Probe whether $1 (a python executable) links a SQLite with the FTS5
+# module compiled in. Hermes' session store (hermes_state.py) creates FTS5
+# virtual tables for full-text session search; a SQLite without FTS5 makes
+# the bundled-python path unusable for that feature. Returns 0 if FTS5 works.
+_python_has_fts5() {
+    "$1" - <<'PY' 2>/dev/null
+import sqlite3, sys
+try:
+    sqlite3.connect(":memory:").execute("CREATE VIRTUAL TABLE t USING fts5(x)")
+except Exception:
+    sys.exit(1)
+PY
+}
+
+# Guarantee the resolved uv-managed interpreter ships FTS5. uv's Python
+# distributions only gained FTS5 in mid-2025 (python-build-standalone #694),
+# so a stale interpreter already in uv's store — which `uv python find`
+# happily reuses — can lack it. When that happens, force a reinstall of the
+# latest patch for $PYTHON_VERSION (which has FTS5) and re-resolve. This keeps
+# the supported install path's session search working without bundling a
+# second SQLite or asking the user to do anything.
+ensure_fts5() {
+    [ -n "${PYTHON_PATH:-}" ] || return 0
+    if _python_has_fts5 "$PYTHON_PATH"; then
+        return 0
+    fi
+
+    log_warn "Resolved Python's SQLite lacks the FTS5 module (session search needs it)."
+    log_info "Reinstalling a current Python $PYTHON_VERSION with FTS5 via uv..."
+    if "$UV_CMD" python install "$PYTHON_VERSION" --reinstall >/dev/null 2>&1; then
+        PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION" 2>/dev/null)"
+        PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
+    fi
+
+    if [ -n "${PYTHON_PATH:-}" ] && _python_has_fts5 "$PYTHON_PATH"; then
+        log_success "FTS5 available ($PYTHON_FOUND_VERSION)"
+    else
+        # Could not obtain an FTS5-capable interpreter (offline, pinned env,
+        # etc.). Install proceeds — Hermes degrades gracefully and disables
+        # only full-text session search — but warn so it isn't a silent gap.
+        log_warn "Could not obtain an FTS5-capable Python. Hermes will run, but"
+        log_warn "full-text session search will be disabled until FTS5 is present."
+    fi
+}
+
 check_git() {
     log_info "Checking Git..."
 
diff --git a/scripts/release.py b/scripts/release.py
index 9b7e12a5d1f..30d0d84d6a0 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -45,6 +45,10 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
 
 # Auto-extracted from noreply emails + manual overrides
 AUTHOR_MAP = {
+    "zhipengli@thebrainly.ai": "a1245582339",
+    "mathijs.vd.hurk@gmail.com": "mathijsvandenhurk",
+    "drpelagik@gmail.com": "SeaXen",
+    "lengr@users.noreply.github.com": "LengR",
     "metalclaudbot@gmail.com": "HashClawAI",
     "tonybear55665566@gmail.com": "TonyPepeBear",
     "kaspersniels@gmail.com": "nielskaspers",
@@ -67,6 +71,7 @@ AUTHOR_MAP = {
     "wangpuv@hotmail.com": "wangpuv",
     "202622897+ticketclosed-wontfix@users.noreply.github.com": "ticketclosed-wontfix",
     "wuxuebin1993@gmail.com": "victorGPT",
+    "wei.chen.coder@gmail.com": "wenchengxucool",
     "frowte3k@gmail.com": "Frowtek",
     "211828103+julio-cloudvisor@users.noreply.github.com": "julio-cloudvisor",
     "17778+kweiner@users.noreply.github.com": "kweiner",
@@ -220,6 +225,7 @@ AUTHOR_MAP = {
     "264291321+v1b3coder@users.noreply.github.com": "v1b3coder",
     "silverchris@foxmail.com": "ming1523",
     "maksesipov@gmail.com": "Qwinty",
+    "byquenox@gmail.com": "Que0x",
     "denisamania@gmail.com": "CalmProton",
     "308068+mbac@users.noreply.github.com": "mbac",
     "nicoechaniz@altermundi.net": "nicoechaniz",
@@ -649,8 +655,10 @@ AUTHOR_MAP = {
     "alexazzjjtt@163.com": "alexzhu0",
     "pub_forgreatagent@antgroup.com": "AntAISecurityLab",
     "252620095+briandevans@users.noreply.github.com": "briandevans",
+    "incharge.automation@gmail.com": "inchargeautomation-lab",
     "danielrpike9@gmail.com": "Bartok9",
     "96944678+ymylive@users.noreply.github.com": "sweetcornna",
+    "laflamme@illinoisalumni.org": "briancl2",
     "skozyuk@cruxexperts.com": "CruxExperts",
     "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
     "12250313+Kailigithub@users.noreply.github.com": "Kailigithub",
@@ -1203,6 +1211,7 @@ AUTHOR_MAP = {
     "86501179+1RB@users.noreply.github.com": "1RB",  # PR #25462 salvage (discord forwarded messages)
     "44045943+ayushere@users.noreply.github.com": "ayushere",  # PR #25342 salvage (memory teardown leak)
     "15791290+domtriola@users.noreply.github.com": "domtriola",  # PR #25424 salvage (docs tirith link)
+    "tuancookiez@gmail.com": "tuancookiez-hub",  # PR #34865 salvage (LSP Windows .cmd shim spawn, #34864)
     "284216128+ephron-ren@users.noreply.github.com": "ephron-ren",  # PR #25358 salvage (MiMo reasoning echo-back)
     "96843562+freqyfreqy@users.noreply.github.com": "freqyfreqy",  # PR #25423 salvage (docs LSP worktree -> repo)
     "54306477+fu576@users.noreply.github.com": "fu576",  # PR #25369 salvage (api_mode not inherited cross-provider)
diff --git a/skills/productivity/google-workspace/scripts/google_api.py b/skills/productivity/google-workspace/scripts/google_api.py
index 231b1b6849f..27855a5158e 100644
--- a/skills/productivity/google-workspace/scripts/google_api.py
+++ b/skills/productivity/google-workspace/scripts/google_api.py
@@ -129,7 +129,11 @@ def _run_gws(parts: list[str], *, params: dict | None = None, body: dict | None
 
 
 def _headers_dict(msg: dict) -> dict[str, str]:
-    return {h["name"]: h["value"] for h in msg.get("payload", {}).get("headers", [])}
+    return {
+        h["name"].lower(): h["value"]
+        for h in msg.get("payload", {}).get("headers", [])
+        if h.get("name")
+    }
 
 
 def _extract_message_body(msg: dict) -> str:
@@ -230,10 +234,10 @@ def gmail_search(args):
                 {
                     "id": msg["id"],
                     "threadId": msg["threadId"],
-                    "from": headers.get("From", ""),
-                    "to": headers.get("To", ""),
-                    "subject": headers.get("Subject", ""),
-                    "date": headers.get("Date", ""),
+                    "from": headers.get("from", ""),
+                    "to": headers.get("to", ""),
+                    "subject": headers.get("subject", ""),
+                    "date": headers.get("date", ""),
                     "snippet": msg.get("snippet", ""),
                     "labels": msg.get("labelIds", []),
                 }
@@ -260,10 +264,10 @@ def gmail_search(args):
         output.append({
             "id": msg["id"],
             "threadId": msg["threadId"],
-            "from": headers.get("From", ""),
-            "to": headers.get("To", ""),
-            "subject": headers.get("Subject", ""),
-            "date": headers.get("Date", ""),
+            "from": headers.get("from", ""),
+            "to": headers.get("to", ""),
+            "subject": headers.get("subject", ""),
+            "date": headers.get("date", ""),
             "snippet": msg.get("snippet", ""),
             "labels": msg.get("labelIds", []),
         })
@@ -281,10 +285,10 @@ def gmail_get(args):
         result = {
             "id": msg["id"],
             "threadId": msg["threadId"],
-            "from": headers.get("From", ""),
-            "to": headers.get("To", ""),
-            "subject": headers.get("Subject", ""),
-            "date": headers.get("Date", ""),
+            "from": headers.get("from", ""),
+            "to": headers.get("to", ""),
+            "subject": headers.get("subject", ""),
+            "date": headers.get("date", ""),
             "labels": msg.get("labelIds", []),
             "body": _extract_message_body(msg),
         }
@@ -300,10 +304,10 @@ def gmail_get(args):
     result = {
         "id": msg["id"],
         "threadId": msg["threadId"],
-        "from": headers.get("From", ""),
-        "to": headers.get("To", ""),
-        "subject": headers.get("Subject", ""),
-        "date": headers.get("Date", ""),
+        "from": headers.get("from", ""),
+        "to": headers.get("to", ""),
+        "subject": headers.get("subject", ""),
+        "date": headers.get("date", ""),
         "labels": msg.get("labelIds", []),
         "body": _extract_message_body(msg),
     }
@@ -314,12 +318,12 @@ def gmail_get(args):
 def gmail_send(args):
     if _gws_binary():
         message = MIMEText(args.body, "html" if args.html else "plain")
-        message["to"] = args.to
-        message["subject"] = args.subject
+        message["To"] = args.to
+        message["Subject"] = args.subject
         if args.cc:
-            message["cc"] = args.cc
+            message["Cc"] = args.cc
         if args.from_header:
-            message["from"] = args.from_header
+            message["From"] = args.from_header
 
         raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
         body = {"raw": raw}
@@ -336,12 +340,12 @@ def gmail_send(args):
 
     service = build_service("gmail", "v1")
     message = MIMEText(args.body, "html" if args.html else "plain")
-    message["to"] = args.to
-    message["subject"] = args.subject
+    message["To"] = args.to
+    message["Subject"] = args.subject
     if args.cc:
-        message["cc"] = args.cc
+        message["Cc"] = args.cc
     if args.from_header:
-        message["from"] = args.from_header
+        message["From"] = args.from_header
 
     raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
     body = {"raw": raw}
@@ -367,18 +371,18 @@ def gmail_reply(args):
         )
         headers = _headers_dict(original)
 
-        subject = headers.get("Subject", "")
+        subject = headers.get("subject", "")
         if not subject.startswith("Re:"):
             subject = f"Re: {subject}"
 
         message = MIMEText(args.body)
-        message["to"] = headers.get("From", "")
-        message["subject"] = subject
+        message["To"] = headers.get("from", "")
+        message["Subject"] = subject
         if args.from_header:
-            message["from"] = args.from_header
-        if headers.get("Message-ID"):
-            message["In-Reply-To"] = headers["Message-ID"]
-            message["References"] = headers["Message-ID"]
+            message["From"] = args.from_header
+        if headers.get("message-id"):
+            message["In-Reply-To"] = headers["message-id"]
+            message["References"] = headers["message-id"]
 
         raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
         result = _run_gws(
@@ -396,18 +400,18 @@ def gmail_reply(args):
     ).execute()
     headers = _headers_dict(original)
 
-    subject = headers.get("Subject", "")
+    subject = headers.get("subject", "")
     if not subject.startswith("Re:"):
         subject = f"Re: {subject}"
 
     message = MIMEText(args.body)
-    message["to"] = headers.get("From", "")
-    message["subject"] = subject
+    message["To"] = headers.get("from", "")
+    message["Subject"] = subject
     if args.from_header:
-        message["from"] = args.from_header
-    if headers.get("Message-ID"):
-        message["In-Reply-To"] = headers["Message-ID"]
-        message["References"] = headers["Message-ID"]
+        message["From"] = args.from_header
+    if headers.get("message-id"):
+        message["In-Reply-To"] = headers["message-id"]
+        message["References"] = headers["message-id"]
 
     raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
     body = {"raw": raw, "threadId": original["threadId"]}
diff --git a/tests/agent/lsp/test_install_and_lint_fixes.py b/tests/agent/lsp/test_install_and_lint_fixes.py
index e9f862a6d8e..abbaef94e95 100644
--- a/tests/agent/lsp/test_install_and_lint_fixes.py
+++ b/tests/agent/lsp/test_install_and_lint_fixes.py
@@ -94,6 +94,47 @@ def test_install_npm_works_without_extras(tmp_path, monkeypatch):
     assert install_targets == ["pyright"]
 
 
+def test_existing_binary_finds_windows_wrapper_in_staging(tmp_path, monkeypatch):
+    """Installed Windows shims should satisfy later status/probe calls."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from agent.lsp import install as install_mod
+
+    wrapper = install_mod.hermes_lsp_bin_dir() / "pyright-langserver.cmd"
+    wrapper.write_text("@echo off\n")
+    wrapper.chmod(0o755)
+
+    monkeypatch.setattr(install_mod, "_is_windows", lambda: True)
+    monkeypatch.setattr(install_mod.shutil, "which", lambda _name: None)
+
+    assert install_mod._existing_binary("pyright-langserver") == str(wrapper)
+    assert install_mod.detect_status("pyright") == "installed"
+
+
+def test_install_pip_finds_windows_scripts_launcher(tmp_path, monkeypatch):
+    """pip console scripts can land in Scripts/ on native Windows."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from agent.lsp import install as install_mod
+
+    def fake_run(cmd, **kwargs):
+        scripts_dir = install_mod.hermes_lsp_bin_dir().parent / "python-packages" / "Scripts"
+        scripts_dir.mkdir(parents=True, exist_ok=True)
+        launcher = scripts_dir / "fake-language-server.exe"
+        launcher.write_text("launcher\n")
+        launcher.chmod(0o755)
+        return MagicMock(returncode=0, stderr="")
+
+    monkeypatch.setattr(install_mod, "_is_windows", lambda: True)
+    monkeypatch.setattr(install_mod.subprocess, "run", fake_run)
+
+    resolved = install_mod._install_pip("fake-lsp", "fake-language-server")
+
+    assert resolved is not None
+    assert resolved.endswith("fake-language-server.exe")
+    assert (install_mod.hermes_lsp_bin_dir() / "fake-language-server.exe").exists()
+
+
 # ---------------------------------------------------------------------------
 # Fix 2: ``hermes lsp status`` surfaces shellcheck-missing for bash
 # ---------------------------------------------------------------------------
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 0d7aa81f41f..5ce753864c9 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -41,6 +41,8 @@ class TestShouldCompress:
 
 class TestUpdateFromResponse:
     def test_updates_fields(self, compressor):
+        compressor.awaiting_real_usage_after_compression = True
+        compressor.last_compression_rough_tokens = 90_000
         compressor.update_from_response({
             "prompt_tokens": 5000,
             "completion_tokens": 1000,
@@ -48,12 +50,39 @@ class TestUpdateFromResponse:
         })
         assert compressor.last_prompt_tokens == 5000
         assert compressor.last_completion_tokens == 1000
+        assert compressor.last_real_prompt_tokens == 5000
+        assert compressor.last_rough_tokens_when_real_prompt_fit == 90_000
+        assert compressor.awaiting_real_usage_after_compression is False
 
     def test_missing_fields_default_zero(self, compressor):
         compressor.update_from_response({})
         assert compressor.last_prompt_tokens == 0
 
 
+class TestPreflightDeferral:
+    def test_defers_when_recent_real_usage_fit_and_rough_growth_is_small(self, compressor):
+        compressor.threshold_tokens = 85_000
+        compressor.last_real_prompt_tokens = 50_000
+        compressor.last_rough_tokens_when_real_prompt_fit = 90_000
+
+        assert compressor.should_defer_preflight_to_real_usage(93_000) is True
+        assert compressor.last_rough_tokens_when_real_prompt_fit == 93_000
+
+    def test_does_not_defer_when_rough_growth_is_large(self, compressor):
+        compressor.threshold_tokens = 85_000
+        compressor.last_real_prompt_tokens = 50_000
+        compressor.last_rough_tokens_when_real_prompt_fit = 90_000
+
+        assert compressor.should_defer_preflight_to_real_usage(100_000) is False
+
+    def test_does_not_defer_without_recent_real_usage(self, compressor):
+        compressor.threshold_tokens = 85_000
+        compressor.last_real_prompt_tokens = 0
+        compressor.last_rough_tokens_when_real_prompt_fit = 90_000
+
+        assert compressor.should_defer_preflight_to_real_usage(93_000) is False
+
+
 
 class TestCompress:
     def _make_messages(self, n):
diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py
index 3f9fd56d140..5b1abfd32d0 100644
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@@ -123,55 +123,6 @@ class TestEstimateMessagesTokensRough:
 # =========================================================================
 
 class TestDefaultContextLengths:
-    def test_claude_models_context_lengths(self):
-        for key, value in DEFAULT_CONTEXT_LENGTHS.items():
-            if "claude" not in key:
-                continue
-            # Claude 4.6+ models (4.6, 4.7, 4.8) have 1M context at standard
-            # API pricing (no long-context premium).  Older Claude 4.x and
-            # 3.x models cap at 200k.
-            if any(tag in key for tag in ("4.6", "4-6", "4.7", "4-7", "4.8", "4-8")):
-                assert value == 1000000, f"{key} should be 1000000"
-            else:
-                assert value == 200000, f"{key} should be 200000"
-
-    def test_gpt4_models_128k_or_1m(self):
-        # gpt-4.1 and gpt-4.1-mini have 1M context; other gpt-4* have 128k
-        for key, value in DEFAULT_CONTEXT_LENGTHS.items():
-            if "gpt-4" in key and "gpt-4.1" not in key:
-                assert value == 128000, f"{key} should be 128000"
-
-    def test_gpt41_models_1m(self):
-        for key, value in DEFAULT_CONTEXT_LENGTHS.items():
-            if "gpt-4.1" in key:
-                assert value == 1047576, f"{key} should be 1047576"
-
-    def test_gemini_models_1m(self):
-        for key, value in DEFAULT_CONTEXT_LENGTHS.items():
-            if "gemini" in key:
-                assert value == 1048576, f"{key} should be 1048576"
-
-    def test_grok_models_context_lengths(self):
-        # xAI /v1/models does not return context_length metadata, so
-        # DEFAULT_CONTEXT_LENGTHS must cover the Grok family explicitly.
-        # Values sourced from models.dev (2026-04).
-        expected = {
-            "grok-4.20": 2000000,
-            "grok-4-fast": 2000000,
-            "grok-4": 256000,
-            "grok-build": 256000,
-            "grok-code-fast": 256000,
-            "grok-3": 131072,
-            "grok-2": 131072,
-            "grok-2-vision": 8192,
-            "grok": 131072,
-        }
-        for key, value in expected.items():
-            assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing from DEFAULT_CONTEXT_LENGTHS"
-            assert DEFAULT_CONTEXT_LENGTHS[key] == value, (
-                f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}"
-            )
-
     def test_grok_substring_matching(self):
         # Longest-first substring matching must resolve the real xAI model
         # IDs to the correct fallback entries without 128k probe-down.
@@ -268,13 +219,6 @@ class TestDefaultContextLengths:
                     f"{model_id}: expected {expected_ctx}, got {actual}"
                 )
 
-    def test_all_values_positive(self):
-        for key, value in DEFAULT_CONTEXT_LENGTHS.items():
-            assert value > 0, f"{key} has non-positive context length"
-
-    def test_dict_is_not_empty(self):
-        assert len(DEFAULT_CONTEXT_LENGTHS) >= 10
-
 
 # =========================================================================
 # Codex OAuth context-window resolution (provider="openai-codex")
@@ -1141,12 +1085,6 @@ class TestContextProbeTiers:
         for i in range(len(CONTEXT_PROBE_TIERS) - 1):
             assert CONTEXT_PROBE_TIERS[i] > CONTEXT_PROBE_TIERS[i + 1]
 
-    def test_first_tier_is_256k(self):
-        assert CONTEXT_PROBE_TIERS[0] == 256_000
-
-    def test_last_tier_is_8k(self):
-        assert CONTEXT_PROBE_TIERS[-1] == 8_000
-
 
 class TestGetNextProbeTier:
     def test_from_256k(self):
diff --git a/tests/agent/test_models_dev.py b/tests/agent/test_models_dev.py
index 41fb4463ec8..b4bbbf753df 100644
--- a/tests/agent/test_models_dev.py
+++ b/tests/agent/test_models_dev.py
@@ -82,17 +82,6 @@ SAMPLE_REGISTRY = {
 
 
 class TestProviderMapping:
-    def test_all_mapped_providers_are_strings(self):
-        for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
-            assert isinstance(hermes_id, str)
-            assert isinstance(mdev_id, str)
-
-    def test_known_providers_mapped(self):
-        assert PROVIDER_TO_MODELS_DEV["anthropic"] == "anthropic"
-        assert PROVIDER_TO_MODELS_DEV["copilot"] == "github-copilot"
-        assert PROVIDER_TO_MODELS_DEV["stepfun"] == "stepfun"
-        assert PROVIDER_TO_MODELS_DEV["kilocode"] == "kilo"
-
     def test_xai_oauth_uses_xai_catalog(self):
         assert PROVIDER_TO_MODELS_DEV["xai"] == "xai"
         assert PROVIDER_TO_MODELS_DEV["xai-oauth"] == "xai"
diff --git a/tests/agent/test_resume_stale_active_task.py b/tests/agent/test_resume_stale_active_task.py
new file mode 100644
index 00000000000..6b22a37c440
--- /dev/null
+++ b/tests/agent/test_resume_stale_active_task.py
@@ -0,0 +1,141 @@
+"""Regression coverage for #35344: a resumed session must not let a stale
+``## Active Task`` from an inherited compaction handoff hijack the reply to a
+new, unrelated user message.
+
+The failure mode (real report): a lineage was compacted, producing a handoff
+whose ``## Active Task`` described task A. The lineage was resumed later and
+the user asked about an unrelated task B. The model answered with A because
+the handoff's resume directive outranked the fresh ask.
+
+The structural fix lives in ``SUMMARY_PREFIX``: the handoff is framed as
+reference-only and the latest user message explicitly *wins* on conflict, with
+named reverse-signal verbs. Two invariants guard the resume path specifically:
+
+  1. A handoff persisted under the OLD (conflicting) prefix is re-normalized to
+     the CURRENT prefix when it is re-compacted on a resumed lineage — so a
+     pre-fix stale handoff cannot keep its "resume exactly" directive forever.
+
+  2. The current handoff prefix contains an unambiguous "latest message wins /
+     discard stale Active Task" rule, so an unrelated new ask is privileged over
+     the inherited ``## Active Task``.
+
+These are content/structural assertions (no live model call) — they pin the
+mechanism that makes the stale task historical rather than active.
+"""
+
+from agent.context_compressor import (
+    SUMMARY_PREFIX,
+    LEGACY_SUMMARY_PREFIX,
+    ContextCompressor,
+)
+
+
+# The conflicting prefix that shipped before the #35344 fix. A handoff
+# persisted in a resumed lineage could carry this verbatim.
+_OLD_CONFLICTING_PREFIX = (
+    "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
+    "into the summary below. This is a handoff from a previous context "
+    "window — treat it as background reference, NOT as active instructions. "
+    "Do NOT answer questions or fulfill requests mentioned in this summary; "
+    "they were already addressed. "
+    "Your current task is identified in the '## Active Task' section of the "
+    "summary — resume exactly from there. "
+    "Respond ONLY to the latest user message "
+    "that appears AFTER this summary. The current session state (files, "
+    "config, etc.) may reflect work described here — avoid repeating it:"
+)
+
+
+def test_latest_message_wins_over_inherited_active_task():
+    """The handoff must explicitly privilege the latest user message over a
+    stale ``## Active Task`` — the core #35344 contract."""
+    lower = SUMMARY_PREFIX.lower()
+    assert "latest user message" in lower
+    assert "## active task" in lower
+    # Conflict-resolution must be explicit, not implied.
+    assert "wins" in lower or "supersede" in lower
+    assert "discard" in lower
+
+
+def test_no_resume_exactly_directive_can_hijack():
+    """The directive that caused the hijack ("resume exactly from Active
+    Task") must be gone."""
+    assert "resume exactly" not in SUMMARY_PREFIX.lower()
+
+
+def test_resumed_stale_handoff_gets_renormalized_to_current_prefix():
+    """A handoff persisted under the OLD conflicting prefix (e.g. saved before
+    the fix and inherited into a resumed lineage) is upgraded to the CURRENT
+    prefix when re-normalized on re-compaction — so the "resume exactly"
+    directive cannot survive into a resumed session."""
+    stale_body = (
+        "## Active Task\n"
+        "User asked: 'Migrate the billing module to Stripe'\n\n"
+        "## Goal\nMigrate billing.\n"
+    )
+    stale_handoff = f"{_OLD_CONFLICTING_PREFIX}\n{stale_body}"
+
+    # Sanity: the fixture really does carry the old directive.
+    assert "resume exactly" in stale_handoff.lower()
+
+    renormalized = ContextCompressor._with_summary_prefix(stale_handoff)
+
+    # The body is preserved...
+    assert "Migrate the billing module to Stripe" in renormalized
+    # ...but the conflicting directive is stripped and replaced with the
+    # current latest-message-wins framing.
+    assert "resume exactly" not in renormalized.lower()
+    assert renormalized.startswith(SUMMARY_PREFIX)
+    assert "wins" in renormalized.lower()
+
+
+def test_legacy_prefix_handoff_also_renormalized():
+    """The same upgrade applies to the oldest ``[CONTEXT SUMMARY]:`` handoff
+    format that may sit in a long-lived resumed lineage."""
+    legacy = f"{LEGACY_SUMMARY_PREFIX} ## Active Task\nUser asked: 'task A'"
+    renormalized = ContextCompressor._with_summary_prefix(legacy)
+    assert renormalized.startswith(SUMMARY_PREFIX)
+    assert LEGACY_SUMMARY_PREFIX not in renormalized
+    assert "task A" in renormalized
+
+
+def test_inherited_handoff_detected_in_resumed_protected_head():
+    """On a resumed lineage the handoff commonly sits right after the system
+    prompt (in the protected head). ``_find_latest_context_summary`` must
+    detect it there so re-compaction rehydrates state from it rather than
+    serializing it as a fresh user turn (which is what let the stale Active
+    Task read as live intent)."""
+    messages = [
+        {"role": "system", "content": "system prompt"},
+        {"role": "user", "content": f"{SUMMARY_PREFIX}\n## Active Task\nUser asked: 'task A'"},
+        {"role": "assistant", "content": "ok"},
+        {"role": "user", "content": "Unrelated task B: what's the capital of France?"},
+    ]
+    # Search the whole post-system range.
+    idx, body = ContextCompressor._find_latest_context_summary(
+        messages, 1, len(messages)
+    )
+    assert idx == 1, "handoff in protected head must be found"
+    assert "task A" in body
+    # The detected body is stripped of the prefix (treated as state, not a
+    # standalone instruction message).
+    assert not body.startswith(SUMMARY_PREFIX)
+
+
+def test_historical_prefixed_handoff_detected_and_stripped():
+    """A pre-fix handoff (old conflicting prefix) inherited into a resumed
+    lineage must still be recognized as a context summary AND have its old
+    directive stripped on detection — otherwise re-compaction serializes the
+    stale 'resume exactly' text as a fresh turn."""
+    messages = [
+        {"role": "system", "content": "system prompt"},
+        {"role": "user", "content": f"{_OLD_CONFLICTING_PREFIX}\n## Active Task\nUser asked: 'task A'"},
+        {"role": "assistant", "content": "ok"},
+        {"role": "user", "content": "Unrelated task B"},
+    ]
+    idx, body = ContextCompressor._find_latest_context_summary(
+        messages, 1, len(messages)
+    )
+    assert idx == 1
+    assert "task A" in body
+    assert "resume exactly" not in body.lower()
diff --git a/tests/agent/test_set_runtime_main_custom_provider.py b/tests/agent/test_set_runtime_main_custom_provider.py
new file mode 100644
index 00000000000..bb6a04a4beb
--- /dev/null
+++ b/tests/agent/test_set_runtime_main_custom_provider.py
@@ -0,0 +1,226 @@
+"""Regression test: set_runtime_main() must pass base_url/api_key/api_mode
+so that _resolve_auto() can route custom: providers in Step 1.
+
+Fixes https://github.com/NousResearch/hermes-agent/issues/34777
+"""
+import pytest
+from unittest.mock import patch, MagicMock
+
+
+def _get_globals(mod):
+    """Read runtime globals without triggering redaction."""
+    return {
+        "provider": mod._RUNTIME_MAIN_PROVIDER,
+        "model": mod._RUNTIME_MAIN_MODEL,
+        "base_url": mod._RUNTIME_MAIN_BASE_URL,
+        "cred": mod._RUNTIME_MAIN_API_KEY,  # renamed to avoid redaction
+        "api_mode": mod._RUNTIME_MAIN_API_MODE,
+    }
+
+
+class TestSetRuntimeMainCustomProvider:
+    """set_runtime_main must propagate base_url/api_key/api_mode for custom providers."""
+
+    def test_globals_stored(self):
+        """set_runtime_main stores all five fields in process-local globals."""
+        import agent.auxiliary_client as mod
+
+        mod.clear_runtime_main()
+        try:
+            mod.set_runtime_main(
+                "custom:my-router",
+                "glm-5.1",
+                base_url="https://my-server.example.com/v1",
+                api_key="sk-test-key",
+                api_mode="chat_completions",
+            )
+            g = _get_globals(mod)
+            assert g["provider"] == "custom:my-router"
+            assert g["model"] == "glm-5.1"
+            assert g["base_url"] == "https://my-server.example.com/v1"
+            assert g["cred"] == "sk-test-key"
+            assert g["api_mode"] == "chat_completions"
+        finally:
+            mod.clear_runtime_main()
+
+    def test_clear_resets_all_globals(self):
+        """clear_runtime_main resets all five globals to empty."""
+        import agent.auxiliary_client as mod
+
+        mod.set_runtime_main(
+            "custom:x", "m",
+            base_url="https://x.example.com",
+            api_key="sk-abc",
+            api_mode="chat_completions",
+        )
+        mod.clear_runtime_main()
+        g = _get_globals(mod)
+        for v in g.values():
+            assert v == "", f"Expected empty, got {v!r}"
+
+    def test_resolve_auto_uses_globals_for_custom_provider(self):
+        """_resolve_auto reads base_url/api_key from globals when main_runtime is None."""
+        import agent.auxiliary_client as mod
+
+        mod.clear_runtime_main()
+        try:
+            mod.set_runtime_main(
+                "custom:test-router",
+                "test-model",
+                base_url="https://custom-endpoint.example.com/v1",
+                api_key="sk-test-123",
+            )
+
+            with patch.object(mod, "resolve_provider_client") as mock_resolve:
+                mock_resolve.return_value = (MagicMock(), "test-model")
+                client, resolved = mod._resolve_auto(main_runtime=None)
+
+                mock_resolve.assert_called_once()
+                call_args = mock_resolve.call_args
+                assert call_args[0][0] == "custom"
+                assert call_args[1]["explicit_base_url"] == "https://custom-endpoint.example.com/v1"
+                assert call_args[1]["explicit_api_key"] == "sk-test-123"
+        finally:
+            mod.clear_runtime_main()
+
+    def test_explicit_main_runtime_takes_precedence(self):
+        """When main_runtime dict has values, globals are NOT used."""
+        import agent.auxiliary_client as mod
+
+        mod.clear_runtime_main()
+        try:
+            mod.set_runtime_main(
+                "custom:router-a",
+                "model-a",
+                base_url="https://from-global.example.com",
+                api_key="sk-global",
+            )
+
+            with patch.object(mod, "resolve_provider_client") as mock_resolve:
+                mock_resolve.return_value = (MagicMock(), "model-b")
+                main_rt = {
+                    "provider": "custom:router-b",
+                    "model": "model-b",
+                    "base_url": "https://from-dict.example.com",
+                    "api_key": "sk-dict",
+                }
+                mod._resolve_auto(main_runtime=main_rt)
+
+                call_args = mock_resolve.call_args[1]
+                assert call_args["explicit_base_url"] == "https://from-dict.example.com"
+                assert call_args["explicit_api_key"] == "sk-dict"
+        finally:
+            mod.clear_runtime_main()
+
+    def test_backward_compatible_defaults(self):
+        """Calling set_runtime_main with only positional args still works."""
+        import agent.auxiliary_client as mod
+
+        mod.clear_runtime_main()
+        try:
+            mod.set_runtime_main("openrouter", "gpt-4o")
+            g = _get_globals(mod)
+            assert g["provider"] == "openrouter"
+            assert g["model"] == "gpt-4o"
+            assert g["base_url"] == ""
+            assert g["cred"] == ""
+            assert g["api_mode"] == ""
+        finally:
+            mod.clear_runtime_main()
+
+
+class TestResolveAutoCustomEndToEnd:
+    """End-to-end routing assertions — build a *real* client (no mock on
+    resolve_provider_client) and verify the auxiliary auto-detect chain lands
+    on the user's custom endpoint instead of falling through to the aggregator
+    chain.  These guard the actual user-visible symptom in #34777 (aux tasks
+    silently routed to a fallback provider) rather than just the wiring.
+    """
+
+    @staticmethod
+    def _client_base_url(client):
+        for chain in (("base_url",), ("_client", "base_url")):
+            obj = client
+            try:
+                for attr in chain:
+                    obj = getattr(obj, attr)
+                return str(obj)
+            except AttributeError:
+                continue
+        return None
+
+    def test_config_less_custom_endpoint_routes_via_global(self, tmp_path, monkeypatch):
+        """custom:<name> with NO config entry: the live base_url carried by
+        set_runtime_main() must build a real client at that endpoint — not
+        fall through to Step 2 (the regression in #34777)."""
+        import agent.auxiliary_client as mod
+
+        # Hermetic: no aggregator creds, no stale OPENAI_BASE_URL.
+        for var in ("OPENROUTER_API_KEY", "NOUS_API_KEY", "OPENAI_API_KEY",
+                    "OPENAI_BASE_URL"):
+            monkeypatch.delenv(var, raising=False)
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        (hermes_home / "config.yaml").write_text(
+            "model:\n"
+            "  default: glm-5.1\n"
+            "  provider: 'custom:ephemeral'\n"
+            "  base_url: ''\n"
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        mod.clear_runtime_main()
+        try:
+            mod.set_runtime_main(
+                "custom:ephemeral",
+                "glm-5.1",
+                base_url="https://ephemeral.live/v1",
+                api_key="sk-live",
+            )
+            client, resolved = mod.resolve_provider_client("auto", None)
+            assert client is not None, (
+                "config-less custom endpoint fell through to Step 2 — "
+                "the #34777 bug is back"
+            )
+            assert resolved == "glm-5.1"
+            base = self._client_base_url(client)
+            assert base and base.rstrip("/") == "https://ephemeral.live/v1"
+        finally:
+            mod.clear_runtime_main()
+
+    def test_named_custom_with_config_entry_still_routes(self, tmp_path, monkeypatch):
+        """Regression guard: custom:<name> WITH a custom_providers entry must
+        still resolve to that entry's endpoint.  An earlier competing fix
+        collapsed the provider to bare ``custom`` before resolution, which
+        broke the named-custom branch and returned None here."""
+        import agent.auxiliary_client as mod
+
+        for var in ("OPENROUTER_API_KEY", "NOUS_API_KEY", "OPENAI_API_KEY",
+                    "OPENAI_BASE_URL"):
+            monkeypatch.delenv(var, raising=False)
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        (hermes_home / "config.yaml").write_text(
+            "model:\n"
+            "  default: glm-5.1\n"
+            "  provider: 'custom:openclaw'\n"
+            "  base_url: ''\n"
+            "custom_providers:\n"
+            "  - name: openclaw\n"
+            "    base_url: 'https://withcfg.example/v1'\n"
+            "    model: glm-5.1\n"
+            "    api_key: cfg-key\n"
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        # No live base_url carried — resolution must come from config alone,
+        # via the named-custom branch in resolve_provider_client.
+        mod.clear_runtime_main()
+        try:
+            mod.set_runtime_main("custom:openclaw", "glm-5.1")
+            client, resolved = mod.resolve_provider_client("auto", None)
+            assert client is not None
+            base = self._client_base_url(client)
+            assert base and base.rstrip("/") == "https://withcfg.example/v1"
+        finally:
+            mod.clear_runtime_main()
diff --git a/tests/agent/test_summary_prefix_semantics.py b/tests/agent/test_summary_prefix_semantics.py
new file mode 100644
index 00000000000..6e8b8f3a7c4
--- /dev/null
+++ b/tests/agent/test_summary_prefix_semantics.py
@@ -0,0 +1,62 @@
+"""Pin the semantics of SUMMARY_PREFIX so the compaction handoff doesn't
+re-introduce conflicting instructions.
+
+Background: SUMMARY_PREFIX previously contained two contradictory directives:
+
+  1. "treat it as background reference, NOT as active instructions"
+     "Do NOT answer questions or fulfill requests mentioned in this summary"
+     "Respond ONLY to the latest user message that appears AFTER this summary"
+
+  2. "Your current task is identified in the '## Active Task' section of the
+     summary — resume exactly from there."
+
+When the latest user message contradicted Active Task (e.g. "stop the
+i18n refactor", "never mind, look at grafana"), the model often followed
+(2) anyway because "resume exactly" is a strong directive — leading to
+the agent repeatedly re-surfacing already-cancelled work across turns.
+
+These tests pin the post-fix invariants so the conflict cannot regress.
+"""
+
+from agent.context_compressor import SUMMARY_PREFIX
+
+
+def test_no_resume_exactly_directive():
+    """The prefix must not tell the model to resume Active Task verbatim."""
+    assert "resume exactly" not in SUMMARY_PREFIX.lower()
+
+
+def test_latest_message_wins_on_conflict():
+    """The prefix must explicitly say latest user message wins on conflict."""
+    lower = SUMMARY_PREFIX.lower()
+    assert "latest user message" in lower
+    # Must have an explicit conflict-resolution rule.
+    assert "wins" in lower or "supersede" in lower or "discard" in lower
+
+
+def test_reverse_signals_called_out():
+    """Reverse signals (stop/undo/never mind/topic change) must be named so
+    the model recognizes them as cancellation triggers, not just background."""
+    lower = SUMMARY_PREFIX.lower()
+    # At least a few of the canonical reverse-signal verbs should appear.
+    reverse_terms = ["stop", "undo", "roll back", "never mind", "just verify"]
+    hits = sum(1 for t in reverse_terms if t in lower)
+    assert hits >= 3, (
+        f"Expected ≥3 reverse-signal terms in SUMMARY_PREFIX, found {hits}. "
+        "Without naming them the model treats reverse signals as ordinary "
+        "context and keeps pushing the cancelled task."
+    )
+
+
+def test_summary_marked_reference_only():
+    """The REFERENCE ONLY framing must remain — it's the entire point."""
+    assert "REFERENCE ONLY" in SUMMARY_PREFIX
+    assert "background reference" in SUMMARY_PREFIX
+    assert "NOT as active instructions" in SUMMARY_PREFIX
+
+
+def test_memory_authority_preserved():
+    """The fix must not weaken the MEMORY.md / USER.md authority clause."""
+    assert "MEMORY.md" in SUMMARY_PREFIX
+    assert "USER.md" in SUMMARY_PREFIX
+    assert "authoritative" in SUMMARY_PREFIX
diff --git a/tests/cli/test_cli_light_mode.py b/tests/cli/test_cli_light_mode.py
index c1df160e6b1..1a8d51ae6d1 100644
--- a/tests/cli/test_cli_light_mode.py
+++ b/tests/cli/test_cli_light_mode.py
@@ -75,6 +75,27 @@ class TestLightModeDetection:
         assert cli_mod._detect_light_mode() is True
 
 
+class TestOsc11Probe:
+    """The OSC 11 background probe must never run where its reply can leak
+    into prompt_toolkit's input (a late BEL-terminated reply reads as Ctrl+G
+    = open-editor, trapping the user in a stray editor). Guard the cases we
+    refuse to probe in.
+    """
+
+    @pytest.mark.parametrize("var", ("SSH_CONNECTION", "SSH_CLIENT", "SSH_TTY"))
+    def test_skips_over_ssh(self, cli_mod, monkeypatch, var):
+        monkeypatch.setattr(cli_mod.sys.stdin, "isatty", lambda: True, raising=False)
+        monkeypatch.setattr(cli_mod.sys.stdout, "isatty", lambda: True, raising=False)
+        for v in ("SSH_CONNECTION", "SSH_CLIENT", "SSH_TTY"):
+            monkeypatch.delenv(v, raising=False)
+        monkeypatch.setenv(var, "1.2.3.4 5555 22")
+        assert cli_mod._query_osc11_background() is None
+
+    def test_skips_when_not_a_tty(self, cli_mod, monkeypatch):
+        monkeypatch.setattr(cli_mod.sys.stdin, "isatty", lambda: False, raising=False)
+        assert cli_mod._query_osc11_background() is None
+
+
 class TestLightModeRemap:
     def test_remap_no_op_in_dark_mode(self, cli_mod, monkeypatch):
         monkeypatch.setenv("HERMES_LIGHT", "0")
@@ -133,7 +154,9 @@ class TestSkinConfigHook:
         after = SkinConfig.get_color
         assert before is after
 
-    def test_skin_color_remaps_through_wrapper_in_light_mode(self, cli_mod, monkeypatch):
+    def test_skin_color_remaps_through_wrapper_in_light_mode(
+        self, cli_mod, monkeypatch
+    ):
         from hermes_cli.skin_engine import SkinConfig
 
         cli_mod._LIGHT_MODE_CACHE = True
diff --git a/tests/cli/test_steer_inline_repaint_34569.py b/tests/cli/test_steer_inline_repaint_34569.py
new file mode 100644
index 00000000000..8c0bce3d125
--- /dev/null
+++ b/tests/cli/test_steer_inline_repaint_34569.py
@@ -0,0 +1,116 @@
+"""Regression guard for issue #34569 — inline /steer (and /model) submit
+must repaint the input area after clearing the buffer.
+
+Mechanism of the bug
+--------------------
+``handle_enter`` dispatches ``/steer`` (and ``/model``) inline on the UI
+thread while the agent is running.  Those branches called
+``buffer.reset(append_to_history=True)`` but — unlike every *other*
+early-return branch in the handler — did NOT call ``event.app.invalidate()``.
+Because ``process_command()`` prints through ``patch_stdout`` (which scrolls
+output above the prompt and never triggers a prompt_toolkit redraw), the
+just-cleared input area could keep showing the submitted ``/steer <text>``
+until some unrelated redraw fired.  The user saw their submitted text as if
+it were unsent and could accidentally re-submit it.
+
+This test pins the contract structurally: inside ``handle_enter``, any
+inline-command early-return that resets the buffer must be followed by an
+``event.app.invalidate()`` before its ``return``.  It is an *invariant*
+(every reset-then-return repaints), not a snapshot of current source.
+"""
+
+from __future__ import annotations
+
+import ast
+from pathlib import Path
+
+
+def _load_handle_enter_node() -> ast.FunctionDef:
+    """Extract the ``handle_enter`` nested function node from cli.py."""
+    cli_path = Path(__file__).resolve().parents[2] / "cli.py"
+    tree = ast.parse(cli_path.read_text(encoding="utf-8"))
+
+    target = None
+    for node in ast.walk(tree):
+        if isinstance(node, ast.FunctionDef) and node.name == "handle_enter":
+            target = node
+            break
+    assert target is not None, "handle_enter closure not found in cli.py"
+    return target
+
+
+def _is_buffer_reset(node: ast.stmt) -> bool:
+    """True if the statement is ``...current_buffer.reset(...)``."""
+    if not isinstance(node, ast.Expr):
+        return False
+    call = node.value
+    if not isinstance(call, ast.Call):
+        return False
+    func = call.func
+    return isinstance(func, ast.Attribute) and func.attr == "reset"
+
+
+def _is_invalidate(node: ast.stmt) -> bool:
+    """True if the statement is ``event.app.invalidate()``."""
+    if not isinstance(node, ast.Expr):
+        return False
+    call = node.value
+    if not isinstance(call, ast.Call):
+        return False
+    func = call.func
+    return isinstance(func, ast.Attribute) and func.attr == "invalidate"
+
+
+def _collect_reset_blocks(func: ast.FunctionDef) -> list[list[ast.stmt]]:
+    """Find every statement sequence (a block body/orelse/finalbody) within
+    ``handle_enter`` that contains a ``buffer.reset()`` call."""
+    blocks: list[list[ast.stmt]] = []
+    for node in ast.walk(func):
+        for attr in ("body", "orelse", "finalbody"):
+            seq = getattr(node, attr, None)
+            if not isinstance(seq, list):
+                continue
+            if any(isinstance(s, ast.stmt) and _is_buffer_reset(s) for s in seq):
+                blocks.append(seq)
+    return blocks
+
+
+def test_inline_command_reset_branches_invalidate():
+    """Every handle_enter branch that resets the buffer and then returns must
+    invalidate the app first (issue #34569)."""
+    func = _load_handle_enter_node()
+    reset_blocks = _collect_reset_blocks(func)
+
+    assert reset_blocks, "expected to find buffer.reset() calls in handle_enter"
+
+    offenders = []
+    for seq in reset_blocks:
+        for i, stmt in enumerate(seq):
+            if not _is_buffer_reset(stmt):
+                continue
+            # Find the next return after this reset in the same block.
+            ret_idx = None
+            for j in range(i + 1, len(seq)):
+                if isinstance(seq[j], ast.Return):
+                    ret_idx = j
+                    break
+            if ret_idx is None:
+                # reset not directly followed by a return in this block
+                # (e.g. the fall-through reset at the end of the handler) —
+                # the next user input naturally repaints, so skip.
+                continue
+            between = seq[i + 1 : ret_idx]
+            if not any(_is_invalidate(s) for s in between):
+                offenders.append(ast.dump(stmt))
+
+    assert not offenders, (
+        "handle_enter has reset-then-return branch(es) that never call "
+        "event.app.invalidate() — the input area can keep showing the "
+        "submitted text (issue #34569). Offending reset stmts:\n"
+        + "\n".join(offenders)
+    )
+
+
+if __name__ == "__main__":  # pragma: no cover
+    test_inline_command_reset_branches_invalidate()
+    print("ok")
diff --git a/tests/gateway/test_agent_cache.py b/tests/gateway/test_agent_cache.py
index 0c6e2df3bd9..37f8b51a458 100644
--- a/tests/gateway/test_agent_cache.py
+++ b/tests/gateway/test_agent_cache.py
@@ -276,6 +276,111 @@ class TestExtractCacheBustingConfig:
 
         assert out["tools.registry_generation"] == 12345
 
+
+    def test_skips_honcho_config_read_when_provider_is_not_honcho(self, monkeypatch):
+        """Non-Honcho gateways must not read/parse honcho.json on every message."""
+        from gateway.run import GatewayRunner
+
+        called = False
+
+        def _boom():
+            nonlocal called
+            called = True
+            raise AssertionError("should not read Honcho config")
+
+        monkeypatch.setattr(GatewayRunner, "_extract_honcho_cache_busting_config", _boom)
+
+        out = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "mem0"}})
+
+        assert called is False
+        assert out["honcho.peer_name"] is None
+        assert out["honcho.user_peer_aliases"] is None
+
+    def test_reads_honcho_config_only_when_provider_is_honcho(self, monkeypatch):
+        from gateway.run import GatewayRunner
+
+        calls = []
+
+        def _fake():
+            calls.append(True)
+            return {
+                "honcho.peer_name": "eri",
+                "honcho.ai_peer": "hermes",
+                "honcho.pin_peer_name": True,
+                "honcho.runtime_peer_prefix": "tg_",
+                "honcho.user_peer_aliases": [("123", "eri")],
+            }
+
+        monkeypatch.setattr(GatewayRunner, "_extract_honcho_cache_busting_config", _fake)
+
+        out = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
+
+        assert calls == [True]
+        assert out["honcho.peer_name"] == "eri"
+        assert out["honcho.user_peer_aliases"] == [("123", "eri")]
+
+    def test_memory_provider_change_busts_signature(self, monkeypatch):
+        """Switching memory.provider must itself change the cache-busting
+        signature, so the agent is rebuilt when a user swaps providers
+        mid-gateway (independent of the honcho.json identity keys)."""
+        from gateway.run import GatewayRunner
+
+        # Neutralize honcho.json reads so the only varying input is the
+        # provider value itself.
+        monkeypatch.setattr(
+            GatewayRunner,
+            "_extract_honcho_cache_busting_config",
+            classmethod(lambda cls: cls._empty_honcho_cache_busting_config()),
+        )
+
+        sig_honcho = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
+        sig_mem0 = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "mem0"}})
+
+        assert sig_honcho["memory.provider"] == "honcho"
+        assert sig_mem0["memory.provider"] == "mem0"
+        assert sig_honcho != sig_mem0
+
+    def test_honcho_cache_busting_config_memoized_by_mtime(self, monkeypatch, tmp_path):
+        """Repeated Honcho extraction for unchanged honcho.json should reuse parse result."""
+        from types import SimpleNamespace
+        from gateway.run import GatewayRunner
+
+        config_path = tmp_path / "honcho.json"
+        config_path.write_text("{}")
+        parse_calls = []
+
+        class FakeConfig:
+            peer_name = "eri"
+            ai_peer = "hermes"
+            pin_peer_name = False
+            runtime_peer_prefix = "tg_"
+            user_peer_aliases = {"123": "eri"}
+
+            @classmethod
+            def from_global_config(cls, config_path=None):
+                parse_calls.append(config_path)
+                return cls()
+
+        fake_client = SimpleNamespace(
+            HonchoClientConfig=FakeConfig,
+            resolve_config_path=lambda: config_path,
+        )
+        monkeypatch.setitem(__import__("sys").modules, "plugins.memory.honcho.client", fake_client)
+        monkeypatch.setattr(GatewayRunner, "_HONCHO_CACHE_BUSTING_MEMO", {})
+
+        first = GatewayRunner._extract_honcho_cache_busting_config()
+        second = GatewayRunner._extract_honcho_cache_busting_config()
+
+        assert first == second
+        assert first["honcho.user_peer_aliases"] == [("123", "eri")]
+        assert parse_calls == [config_path]
+
+        config_path.write_text("{\n  \"changed\": true\n}")
+        third = GatewayRunner._extract_honcho_cache_busting_config()
+
+        assert third == first
+        assert parse_calls == [config_path, config_path]
+
     def test_full_round_trip_busts_cache_on_real_edit(self):
         """End-to-end: simulate a config edit on main and verify the
         extracted cache_keys change produces a new signature."""
diff --git a/tests/gateway/test_config.py b/tests/gateway/test_config.py
index da7673011fe..da970eccf63 100644
--- a/tests/gateway/test_config.py
+++ b/tests/gateway/test_config.py
@@ -343,6 +343,56 @@ class TestLoadGatewayConfig:
         # Env value preserved, not clobbered by yaml.
         assert os.environ.get("DISCORD_THREAD_REQUIRE_MENTION") == "true"
 
+    def test_bridges_discord_allow_from_from_config_yaml(self, tmp_path, monkeypatch):
+        """discord.allow_from should populate DISCORD_ALLOWED_USERS for auth."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "discord:\n"
+            "  allow_from:\n"
+            "    - \"123456789012345678\"\n"
+            "    - \"999888777666555444\"\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.delenv("DISCORD_ALLOWED_USERS", raising=False)
+
+        config = load_gateway_config()
+
+        assert config.platforms[Platform.DISCORD].extra["allow_from"] == [
+            "123456789012345678",
+            "999888777666555444",
+        ]
+        assert os.environ.get("DISCORD_ALLOWED_USERS") == (
+            "123456789012345678,999888777666555444"
+        )
+
+    def test_bridges_discord_platform_extra_allow_from_to_env(self, tmp_path, monkeypatch):
+        """platforms.discord.extra.allow_from should reach DISCORD_ALLOWED_USERS too."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "platforms:\n"
+            "  discord:\n"
+            "    extra:\n"
+            "      allow_from:\n"
+            "        - \"123456789012345678\"\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.delenv("DISCORD_ALLOWED_USERS", raising=False)
+
+        config = load_gateway_config()
+
+        assert config.platforms[Platform.DISCORD].extra["allow_from"] == [
+            "123456789012345678",
+        ]
+        assert os.environ.get("DISCORD_ALLOWED_USERS") == "123456789012345678"
+
     def test_bridges_quoted_false_platform_enabled_from_config_yaml(self, tmp_path, monkeypatch):
         hermes_home = tmp_path / ".hermes"
         hermes_home.mkdir()
@@ -361,6 +411,69 @@ class TestLoadGatewayConfig:
         assert config.platforms[Platform.API_SERVER].enabled is False
         assert Platform.API_SERVER not in config.get_connected_platforms()
 
+    def test_bridges_nested_gateway_platforms_from_config_yaml(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "gateway:\n"
+            "  platforms:\n"
+            "    telegram:\n"
+            "      enabled: true\n"
+            "      token: nested-token\n"
+            "      home_channel:\n"
+            "        platform: telegram\n"
+            "        chat_id: \"123\"\n"
+            "        name: Nested Home\n"
+            "      extra:\n"
+            "        reply_prefix: nested\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config = load_gateway_config()
+
+        telegram = config.platforms[Platform.TELEGRAM]
+        assert telegram.enabled is True
+        assert telegram.token == "nested-token"
+        assert telegram.home_channel == HomeChannel(
+            platform=Platform.TELEGRAM,
+            chat_id="123",
+            name="Nested Home",
+        )
+        assert telegram.extra["reply_prefix"] == "nested"
+
+    def test_top_level_platforms_override_nested_gateway_platforms(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "gateway:\n"
+            "  platforms:\n"
+            "    telegram:\n"
+            "      enabled: false\n"
+            "      token: nested-token\n"
+            "      extra:\n"
+            "        reply_prefix: nested\n"
+            "platforms:\n"
+            "  telegram:\n"
+            "    enabled: true\n"
+            "    token: top-token\n"
+            "    extra:\n"
+            "      reply_prefix: top\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config = load_gateway_config()
+
+        telegram = config.platforms[Platform.TELEGRAM]
+        assert telegram.enabled is True
+        assert telegram.token == "top-token"
+        assert telegram.extra["reply_prefix"] == "top"
+
     def test_bridges_quoted_false_session_notify_from_config_yaml(self, tmp_path, monkeypatch):
         hermes_home = tmp_path / ".hermes"
         hermes_home.mkdir()
diff --git a/tests/gateway/test_delivery_silence_filter.py b/tests/gateway/test_delivery_silence_filter.py
new file mode 100644
index 00000000000..d52d9876997
--- /dev/null
+++ b/tests/gateway/test_delivery_silence_filter.py
@@ -0,0 +1,202 @@
+"""Tests for the outbound silence-narration filter (anti-loop control).
+
+See the gateway delivery path: hallucinated "silence" tokens like ``*(silent)*``
+are dropped pre-send so bot-to-bot channels can't mirror them into a token-burning
+loop that crashes a model with "no content after all retries".
+"""
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform
+from gateway.delivery import (
+    DeliveryRouter,
+    DeliveryTarget,
+    _is_silence_narration,
+)
+
+
+# --- Truth table -----------------------------------------------------------
+
+POSITIVE_CASES = [
+    "*(silent)*",
+    "*Silence.*",
+    "🔇",
+    ".",
+    "…",
+    "...",
+    "(silent)",
+    "_silent_",
+    "silent",
+    " *(silent)* ",
+    "`silent`",
+    "~silent~",
+    "Silence",
+    "no response",
+    "No Reply.",
+]
+
+NEGATIVE_CASES = [
+    "Silence is golden — here is the plan...",
+    "Silent install completed",
+    "The deployment ran silently in the background",
+    "ok",
+    "👍",
+    "Here is the result:\n\n- item one\n- item two",
+    "I have nothing to add, but here is why: the build is green.",
+    "silently",  # word boundary — trailing letters mean it isn't a bare token
+    "no responses were collected from the survey",
+    # A 64+ char string that opens with a silence token must not be dropped.
+    "silent " + "x" * 70,
+    "",
+    "   ",
+]
+
+
+@pytest.mark.parametrize("content", POSITIVE_CASES)
+def test_is_silence_narration_positive(content):
+    assert _is_silence_narration(content) is True
+
+
+@pytest.mark.parametrize("content", NEGATIVE_CASES)
+def test_is_silence_narration_negative(content):
+    assert _is_silence_narration(content) is False
+
+
+def test_is_silence_narration_none_safe():
+    assert _is_silence_narration(None) is False
+
+
+def test_length_guard_rejects_long_strings():
+    # Exactly 65 chars of dots — over the 64-char guard, so not treated as narration.
+    assert _is_silence_narration("." * 65) is False
+    assert _is_silence_narration("." * 64) is True
+
+
+# --- Integration through DeliveryRouter ------------------------------------
+
+class RecordingAdapter:
+    def __init__(self):
+        self.calls = []
+
+    async def send(self, chat_id, content, metadata=None):
+        self.calls.append({"chat_id": chat_id, "content": content, "metadata": metadata})
+        return {"success": True}
+
+
+@pytest.mark.asyncio
+async def test_silence_narration_dropped_pre_send(tmp_path, monkeypatch):
+    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
+    monkeypatch.delenv("HERMES_FILTER_SILENCE_NARRATION", raising=False)
+    adapter = RecordingAdapter()
+    router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
+    target = DeliveryTarget.parse("discord:99887766")
+
+    result = await router._deliver_to_platform(target, "*(silent)*", metadata=None)
+
+    assert adapter.calls == []  # adapter.send never invoked
+    assert result == {
+        "success": True,
+        "filtered": "silence_narration",
+        "delivered": False,
+    }
+
+
+@pytest.mark.asyncio
+async def test_real_message_is_delivered(tmp_path, monkeypatch):
+    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
+    monkeypatch.delenv("HERMES_FILTER_SILENCE_NARRATION", raising=False)
+    adapter = RecordingAdapter()
+    router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
+    target = DeliveryTarget.parse("discord:99887766")
+
+    result = await router._deliver_to_platform(
+        target, "Silence is golden — here is the plan...", metadata=None
+    )
+
+    assert len(adapter.calls) == 1
+    assert adapter.calls[0]["content"] == "Silence is golden — here is the plan..."
+    assert result == {"success": True}
+
+
+@pytest.mark.asyncio
+async def test_config_opt_out_lets_silence_through(tmp_path, monkeypatch):
+    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
+    monkeypatch.delenv("HERMES_FILTER_SILENCE_NARRATION", raising=False)
+    adapter = RecordingAdapter()
+    config = GatewayConfig(filter_silence_narration=False)
+    router = DeliveryRouter(config, adapters={Platform.DISCORD: adapter})
+    target = DeliveryTarget.parse("discord:99887766")
+
+    result = await router._deliver_to_platform(target, "*(silent)*", metadata=None)
+
+    assert len(adapter.calls) == 1
+    assert adapter.calls[0]["content"] == "*(silent)*"
+    assert result == {"success": True}
+
+
+@pytest.mark.asyncio
+async def test_env_override_disables_filter(tmp_path, monkeypatch):
+    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
+    monkeypatch.setenv("HERMES_FILTER_SILENCE_NARRATION", "0")
+    adapter = RecordingAdapter()
+    # Config default is True, but env override wins.
+    router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
+    target = DeliveryTarget.parse("discord:99887766")
+
+    result = await router._deliver_to_platform(target, "🔇", metadata=None)
+
+    assert len(adapter.calls) == 1
+    assert result == {"success": True}
+
+
+@pytest.mark.asyncio
+async def test_env_override_enables_filter_over_config(tmp_path, monkeypatch):
+    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
+    monkeypatch.setenv("HERMES_FILTER_SILENCE_NARRATION", "1")
+    adapter = RecordingAdapter()
+    # Config says off, env override forces on.
+    config = GatewayConfig(filter_silence_narration=False)
+    router = DeliveryRouter(config, adapters={Platform.DISCORD: adapter})
+    target = DeliveryTarget.parse("discord:99887766")
+
+    result = await router._deliver_to_platform(target, "*(silent)*", metadata=None)
+
+    assert adapter.calls == []
+    assert result["filtered"] == "silence_narration"
+
+
+@pytest.mark.asyncio
+async def test_local_delivery_not_filtered(tmp_path, monkeypatch):
+    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
+    monkeypatch.delenv("HERMES_FILTER_SILENCE_NARRATION", raising=False)
+    router = DeliveryRouter(GatewayConfig(), adapters={})
+
+    results = await router.deliver(
+        content="*(silent)*",
+        targets=[DeliveryTarget.parse("local")],
+        job_id="silence-job",
+    )
+
+    # Local path saved the file (no loop risk) and was not filtered.
+    local_result = results["local"]
+    assert local_result["success"] is True
+    saved_path = local_result["result"]["path"]
+    assert saved_path.endswith(".md")
+
+
+# --- Config round-trip ------------------------------------------------------
+
+def test_config_flag_defaults_true():
+    assert GatewayConfig().filter_silence_narration is True
+
+
+def test_config_from_dict_parses_flag():
+    cfg = GatewayConfig.from_dict({"filter_silence_narration": False})
+    assert cfg.filter_silence_narration is False
+
+
+def test_config_to_dict_roundtrip():
+    cfg = GatewayConfig(filter_silence_narration=False)
+    assert cfg.to_dict()["filter_silence_narration"] is False
+    restored = GatewayConfig.from_dict(cfg.to_dict())
+    assert restored.filter_silence_narration is False
diff --git a/tests/gateway/test_empty_model_recovery.py b/tests/gateway/test_empty_model_recovery.py
new file mode 100644
index 00000000000..2c4be447931
--- /dev/null
+++ b/tests/gateway/test_empty_model_recovery.py
@@ -0,0 +1,147 @@
+"""Regression tests for #35314 — empty model on the post-interrupt recovery turn.
+
+After a ``stream_interrupt_abort`` during an active gateway session, the recovery
+turn was sometimes built with ``model=""`` (a transient config-cache miss returned
+an empty ``user_config``). Every API call then failed HTTP 400 "No models
+provided", "trying fallback..." was logged but never executed (the user had no
+fallback configured), and the session went silent until the user re-sent.
+
+These tests pin two fixes:
+  1. ``_resolve_session_agent_runtime`` caches the last successfully-resolved
+     model per session and recovers it when a fresh resolution comes back empty.
+  2. ``_has_pending_fallback`` gates the "trying fallback..." status so it is only
+     announced when a fallback chain actually exists.
+"""
+
+import threading
+
+import gateway.run as gateway_run
+
+
+def _make_runner():
+    runner = object.__new__(gateway_run.GatewayRunner)
+    runner._session_model_overrides = {}
+    runner._last_resolved_model = {}
+    runner._service_tier = None
+    runner._agent_cache = {}
+    runner._agent_cache_lock = threading.Lock()
+    return runner
+
+
+def _patch_resolution(monkeypatch, *, model_from_config: str, provider: str = "openrouter"):
+    """Stub gateway model + runtime resolution to a known state."""
+    monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda cfg=None: model_from_config)
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {
+            "provider": provider,
+            "api_key": "x",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_mode": "chat_completions",
+        },
+    )
+
+
+def test_normal_turn_caches_last_resolved_model(monkeypatch):
+    _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
+    runner = _make_runner()
+    sk = "agent:main:discord:dm:123"
+
+    model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}})
+
+    assert model == "deepseek/deepseek-v4-flash"
+    # Cached per-session AND process-wide for first-seen-session recovery.
+    assert runner._last_resolved_model[sk] == "deepseek/deepseek-v4-flash"
+    assert runner._last_resolved_model["*"] == "deepseek/deepseek-v4-flash"
+
+
+def test_empty_model_recovers_session_last_good(monkeypatch):
+    runner = _make_runner()
+    sk = "agent:main:discord:dm:123"
+
+    # Turn 1: config has the model — cache it.
+    _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
+    runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}})
+
+    # Turn 2: simulate the transient empty config read (the #35314 race).
+    _patch_resolution(monkeypatch, model_from_config="", provider="")
+    model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={})
+
+    assert model == "deepseek/deepseek-v4-flash", "recovery turn must reuse last-known-good, not build model=''"
+
+
+def test_empty_model_new_session_recovers_global_last_good(monkeypatch):
+    runner = _make_runner()
+
+    # Prime a different session so the process-wide "*" slot is populated.
+    _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
+    runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:111", user_config={"model": {}})
+
+    # A brand-new session that hits an empty config read still recovers via "*".
+    _patch_resolution(monkeypatch, model_from_config="", provider="")
+    model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:999", user_config={})
+
+    assert model == "deepseek/deepseek-v4-flash"
+
+
+def test_cold_start_empty_model_does_not_crash(monkeypatch):
+    """No last-good anywhere + empty config → returns '' gracefully (no exception)."""
+    _patch_resolution(monkeypatch, model_from_config="", provider="")
+    runner = _make_runner()
+
+    model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:1", user_config={})
+
+    assert model == ""
+
+
+def test_bare_runner_without_cache_attr_does_not_crash(monkeypatch):
+    """object.__new__ runners (test helpers / pitfall #17) lack _last_resolved_model.
+
+    The getattr guard must tolerate the missing attribute.
+    """
+    _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
+    runner = object.__new__(gateway_run.GatewayRunner)
+    runner._session_model_overrides = {}
+    runner._service_tier = None
+    # Deliberately omit _last_resolved_model.
+
+    model, _ = runner._resolve_session_agent_runtime(session_key="x", user_config={"model": {}})
+
+    assert model == "deepseek/deepseek-v4-flash"
+
+
+# ── _has_pending_fallback gate ──────────────────────────────────────────────
+
+
+def _bare_agent():
+    import run_agent
+
+    return object.__new__(run_agent.AIAgent)
+
+
+def test_has_pending_fallback_empty_chain():
+    agent = _bare_agent()
+    agent._fallback_chain = []
+    agent._fallback_index = 0
+    assert agent._has_pending_fallback() is False
+
+
+def test_has_pending_fallback_with_chain():
+    agent = _bare_agent()
+    agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}]
+    agent._fallback_index = 0
+    assert agent._has_pending_fallback() is True
+
+
+def test_has_pending_fallback_exhausted_chain():
+    agent = _bare_agent()
+    agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}]
+    agent._fallback_index = 1
+    assert agent._has_pending_fallback() is False
+
+
+def test_has_pending_fallback_missing_attrs():
+    """Bare agent with no fallback attributes set must default to False, not crash."""
+    agent = _bare_agent()
+    assert agent._has_pending_fallback() is False
diff --git a/tests/gateway/test_extract_local_files.py b/tests/gateway/test_extract_local_files.py
index 1a1111f948c..bbdaced6b33 100644
--- a/tests/gateway/test_extract_local_files.py
+++ b/tests/gateway/test_extract_local_files.py
@@ -336,9 +336,35 @@ class TestEdgeCases:
         paths, _ = _extract("File at /tmp/my file.png here")
         assert paths == []
 
-    def test_windows_path_not_matched(self):
-        """Windows-style paths should not match."""
-        paths, _ = _extract("See C:\\Users\\test\\image.png")
+    @pytest.mark.parametrize(
+        "content,expected",
+        [
+            # Backslash separators (native Windows style)
+            ("See C:\\Users\\test\\image.png here", "C:\\Users\\test\\image.png"),
+            # Forward slashes with drive letter (common in cross-platform code)
+            ("See C:/Users/test/image.png here", "C:/Users/test/image.png"),
+            # Non-C: drive
+            ("Video at D:/data/clip.mp4 ready", "D:/data/clip.mp4"),
+            # Lowercase drive letter
+            ("Path e:/audio/track.mp3 done", "e:/audio/track.mp3"),
+        ],
+    )
+    def test_windows_drive_letter_paths_matched(self, content, expected):
+        """Windows drive-letter paths (C:/..., C:\\...) must be detected (#34632).
+
+        Prior behavior anchored on (?:~/|/) only, which silently dropped
+        Windows absolute paths so the agent's bare-path references were
+        sent as text instead of native uploads.
+        """
+        paths, cleaned = _extract(content)
+        assert paths == [expected]
+        assert expected not in cleaned
+
+    def test_relative_windows_path_not_matched(self):
+        """A bare Windows-style filename without a drive letter must still
+        not match (e.g. ``foo\\bar.png`` is treated as relative, like its
+        Unix sibling ``foo/bar.png``)."""
+        paths, _ = _extract("File at foo\\bar.png here")
         assert paths == []
 
     def test_relative_path_not_matched(self):
diff --git a/tests/gateway/test_platform_base.py b/tests/gateway/test_platform_base.py
index e0f2c80cb04..2cc8118b7b2 100644
--- a/tests/gateway/test_platform_base.py
+++ b/tests/gateway/test_platform_base.py
@@ -361,6 +361,45 @@ class TestExtractMedia:
         assert "[[audio_as_voice]]" not in cleaned
         assert "[[as_document]]" not in cleaned
 
+    # Windows path support — regression coverage for #34632
+
+    def test_media_tag_windows_backslash_path(self):
+        """extract_media should recognise Windows backslash paths."""
+        media, cleaned = BasePlatformAdapter.extract_media(
+            r"MEDIA:C:\Users\kotsu\file.pdf"
+        )
+        assert len(media) == 1
+        assert media[0][0].endswith("file.pdf")
+
+    def test_media_tag_windows_forward_slash_path(self):
+        """extract_media should recognise Windows forward-slash paths."""
+        media, cleaned = BasePlatformAdapter.extract_media(
+            "MEDIA:C:/Users/kotsu/file.pdf"
+        )
+        assert len(media) == 1
+        assert media[0][0].endswith("file.pdf")
+
+    def test_media_tag_windows_drive_root(self):
+        """extract_media should recognise a path at the drive root."""
+        media, cleaned = BasePlatformAdapter.extract_media(
+            r"MEDIA:D:\report.md"
+        )
+        assert len(media) == 1
+        assert media[0][0].endswith("report.md")
+
+    def test_media_tag_unix_paths_still_work(self):
+        """Unix absolute and tilde paths must still extract after Windows change."""
+        for content in ["MEDIA:/tmp/audio.ogg", r"MEDIA:~/docs/notes.md"]:
+            media, _ = BasePlatformAdapter.extract_media(content)
+            assert len(media) == 1, f"Failed for: {content}"
+
+    def test_relative_path_still_ignored(self):
+        """Relative Windows-style paths (no drive letter) must not match."""
+        media, _ = BasePlatformAdapter.extract_media(
+            r"MEDIA:Users\kotsu\file.pdf"
+        )
+        assert media == []
+
 
 class TestMediaExtensionAllowlistParity:
     """Regression coverage for issue #34517 — the MEDIA: extension black hole.
diff --git a/tests/gateway/test_platform_reconnect.py b/tests/gateway/test_platform_reconnect.py
index 1a5a35a42e7..3cd507550c5 100644
--- a/tests/gateway/test_platform_reconnect.py
+++ b/tests/gateway/test_platform_reconnect.py
@@ -294,19 +294,20 @@ class TestPlatformReconnectWatcher:
         assert runner._failed_platforms[Platform.TELEGRAM]["attempts"] == 2
 
     @pytest.mark.asyncio
-    async def test_reconnect_pauses_after_circuit_breaker_threshold(self):
-        """After enough consecutive retryable failures, the watcher should
-        *pause* the platform (keep it in the queue but stop hammering it),
-        not drop it. The user resumes via /platform resume.
+    async def test_reconnect_never_auto_pauses_retryable_failures(self):
+        """Retryable failures (network/DNS) must keep retrying indefinitely —
+        the watcher must NOT auto-pause them. Auto-pausing a transiently-failed
+        platform left bots silently dead after a DNS blip (#35284). The pause
+        circuit breaker remains available for manual /platform pause only.
         """
         runner = _make_runner()
 
         platform_config = PlatformConfig(enabled=True, token="test")
-        # 9 prior attempts — the next failure will be the 10th and should
-        # trip the circuit breaker.
+        # Far past the old circuit-breaker threshold (10): even after many
+        # consecutive retryable failures the platform must stay unpaused.
         runner._failed_platforms[Platform.TELEGRAM] = {
             "config": platform_config,
-            "attempts": 9,
+            "attempts": 25,
             "next_retry": time.monotonic() - 1,
         }
 
@@ -332,12 +333,15 @@ class TestPlatformReconnectWatcher:
 
             await run_one_iteration()
 
-        # Platform stays in queue — paused, not dropped
+        # Platform stays in queue and keeps retrying — never auto-paused.
         assert Platform.TELEGRAM in runner._failed_platforms
         info = runner._failed_platforms[Platform.TELEGRAM]
-        assert info["paused"] is True
-        assert info["attempts"] == 10
-        assert "pause_reason" in info
+        assert info.get("paused") is not True
+        assert "pause_reason" not in info
+        assert info["attempts"] == 26
+        # next_retry is pushed out by the backoff (capped at 300s), not inf.
+        assert info["next_retry"] != float("inf")
+        assert info["next_retry"] > time.monotonic()
 
     @pytest.mark.asyncio
     async def test_reconnect_skips_paused_platforms(self):
diff --git a/tests/gateway/test_run_tool_media_re.py b/tests/gateway/test_run_tool_media_re.py
new file mode 100644
index 00000000000..8f6da226876
--- /dev/null
+++ b/tests/gateway/test_run_tool_media_re.py
@@ -0,0 +1,147 @@
+r"""Tests for _TOOL_MEDIA_RE regex patterns in gateway/run.py.
+
+Issue #34632: The _TOOL_MEDIA_RE patterns in GatewayRunner used (?:/|~\/) to
+anchor paths, which only matched Unix-style absolute and home-relative paths.
+Windows absolute paths (C:\\Users\\..., D:/...) were silently ignored, causing
+MEDIA directive delivery to fail on Windows.
+
+Fix: Add [A-Za-z]:[/\\\\] as a third anchor alternative in both patterns.
+
+Two identical _TOOL_MEDIA_RE patterns exist in run.py:
+1. History scanning (~L17223): collects already-seen media paths
+2. Result scanning (~L17549): extracts new media tags from agent output
+
+This test file validates that both equivalent regex patterns correctly match
+Windows paths while preserving existing Unix path matching behavior.
+"""
+
+import re
+
+import pytest
+
+
+# Reconstruct the exact _TOOL_MEDIA_RE pattern from gateway/run.py
+# The pattern is built by concatenating raw string parts:
+#   r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|...))'
+_TOOL_MEDIA_RE = re.compile(
+    r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
+    r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
+    r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
+    r'txt|csv|apk|ipa))',
+    re.IGNORECASE,
+)
+
+
+# Reconstruct the pre-fix pattern (without Windows anchor) for regression proof
+_TOOL_MEDIA_RE_PRE_FIX = re.compile(
+    r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
+    r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
+    r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
+    r'txt|csv|apk|ipa))',
+    re.IGNORECASE,
+)
+
+
+class TestToolMediaReWindowsPaths:
+    """Issue #34632: _TOOL_MEDIA_RE must match Windows absolute paths."""
+
+    # ── Positive: Windows paths now match ──────────────────────────
+
+    @pytest.mark.parametrize("media_tag, expected_path", [
+        # Windows backslash paths
+        ("MEDIA:C:\\Users\\test\\image.png", "C:\\Users\\test\\image.png"),
+        ("MEDIA:D:\\data\\report.pdf", "D:\\data\\report.pdf"),
+        ("MEDIA:E:\\Photos\\vacation.jpg", "E:\\Photos\\vacation.jpg"),
+        # Windows forward-slash paths
+        ("MEDIA:C:/Users/test/image.png", "C:/Users/test/image.png"),
+        ("MEDIA:D:/data/report.pdf", "D:/data/report.pdf"),
+        # Mixed separators
+        ("MEDIA:C:\\Users/test\\image.webp", "C:\\Users/test\\image.webp"),
+        # Various extensions
+        ("MEDIA:F:\\videos\\clip.mp4", "F:\\videos\\clip.mp4"),
+        ("MEDIA:G:\\audio\\song.mp3", "G:\\audio\\song.mp3"),
+        ("MEDIA:H:\\docs\\sheet.xlsx", "H:\\docs\\sheet.xlsx"),
+        ("MEDIA:Z:\\archive\\backup.zip", "Z:\\archive\\backup.zip"),
+    ])
+    def test_windows_paths_match(self, media_tag, expected_path):
+        """Windows absolute paths with drive letters are matched."""
+        match = _TOOL_MEDIA_RE.search(media_tag)
+        assert match is not None, f"Should match: {media_tag}"
+        assert match.group(1) == expected_path
+
+    # ── Positive: Unix paths still match ───────────────────────────
+
+    @pytest.mark.parametrize("media_tag, expected_path", [
+        ("MEDIA:/tmp/output.png", "/tmp/output.png"),
+        ("MEDIA:/var/log/report.pdf", "/var/log/report.pdf"),
+        ("MEDIA:/home/user/docs/file.txt", "/home/user/docs/file.txt"),
+        # Home-relative
+        ("MEDIA:~/Downloads/image.jpg", "~/Downloads/image.jpg"),
+        ("MEDIA:~/Documents/report.pdf", "~/Documents/report.pdf"),
+    ])
+    def test_unix_paths_still_match(self, media_tag, expected_path):
+        """Unix-style absolute and home-relative paths still match."""
+        match = _TOOL_MEDIA_RE.search(media_tag)
+        assert match is not None, f"Should match: {media_tag}"
+        assert match.group(1) == expected_path
+
+    # ── Negative: invalid paths don't match ────────────────────────
+
+    @pytest.mark.parametrize("text", [
+        "No MEDIA tag here",
+        "MEDIA:relative/path/file.png",       # relative path, no anchor
+        "MEDIA:file.png",                      # no directory
+        "MEDIA:C:file.png",                    # drive letter but no separator
+        "MEDIA:/path/to/file.unknown",         # unsupported extension
+        "MEDIA:/path/to/file",                 # no extension
+        "MEDIA:",                               # empty path
+    ])
+    def test_invalid_paths_dont_match(self, text):
+        """Non-MEDIA text, relative paths, and unsupported extensions are ignored."""
+        match = _TOOL_MEDIA_RE.search(text)
+        assert match is None, f"Should NOT match: {text}"
+
+    # ── Negative/preserved: old pattern rejects Windows paths ──────
+
+    @pytest.mark.parametrize("media_tag", [
+        "MEDIA:C:\\Users\\test\\image.png",
+        "MEDIA:D:/data/report.pdf",
+        "MEDIA:C:\\path\\file.jpg",
+    ])
+    def test_pre_fix_pattern_rejects_windows(self, media_tag):
+        """The pre-fix pattern (without Windows anchor) does NOT match Windows paths.
+        This proves the fix is necessary — without it, these paths are silently ignored."""
+        match = _TOOL_MEDIA_RE_PRE_FIX.search(media_tag)
+        assert match is None, f"Pre-fix pattern should NOT match: {media_tag}"
+
+    # ── Edge cases ─────────────────────────────────────────────────
+
+    def test_multiple_media_tags_in_content(self):
+        """Multiple MEDIA tags in the same content are all found."""
+        content = (
+            "Some text MEDIA:C:\\path\\img.png and more MEDIA:/tmp/out.pdf trailing"
+        )
+        matches = list(_TOOL_MEDIA_RE.finditer(content))
+        assert len(matches) == 2
+        paths = [m.group(1) for m in matches]
+        assert "C:\\path\\img.png" in paths
+        assert "/tmp/out.pdf" in paths
+
+    def test_case_insensitive_drive_letter(self):
+        """Drive letters are case-insensitive due to re.IGNORECASE."""
+        match_lower = _TOOL_MEDIA_RE.search("MEDIA:c:\\path\\file.png")
+        match_upper = _TOOL_MEDIA_RE.search("MEDIA:C:\\path\\file.png")
+        assert match_lower is not None
+        assert match_upper is not None
+        assert match_lower.group(1).lower() == match_upper.group(1).lower()
+
+    @pytest.mark.parametrize("media_tag", [
+        "MEDIA:C:\\path\\file.jpeg",
+        "MEDIA:C:\\path\\file.JPG",
+        "MEDIA:C:\\path\\file.GIF",
+        "MEDIA:C:\\path\\file.MP4",
+    ])
+    def test_case_insensitive_extensions(self, media_tag):
+        """File extensions are matched case-insensitively."""
+        match = _TOOL_MEDIA_RE.search(media_tag)
+        assert match is not None, f"Should match: {media_tag}"
diff --git a/tests/gateway/test_status_command.py b/tests/gateway/test_status_command.py
index 01222597224..0b88d271808 100644
--- a/tests/gateway/test_status_command.py
+++ b/tests/gateway/test_status_command.py
@@ -97,7 +97,7 @@ async def test_status_command_reports_running_agent_without_interrupt(monkeypatc
     result = await runner._handle_message(_make_event("/status"))
 
     assert "**Session ID:** `sess-1`" in result
-    assert "**Tokens:** 321" in result
+    assert "**Cumulative API tokens (re-sent each call):** 321" in result
     assert "**Agent Running:** Yes ⚡" in result
     assert "**Title:**" not in result
     running_agent.interrupt.assert_not_called()
@@ -150,7 +150,7 @@ async def test_status_command_reads_token_totals_from_session_db():
     result = await runner._handle_message(_make_event("/status"))
 
     # 1000 + 250 + 500 + 100 + 50 = 1,900
-    assert "**Tokens:** 1,900" in result
+    assert "**Cumulative API tokens (re-sent each call):** 1,900" in result
 
 
 @pytest.mark.asyncio
@@ -171,7 +171,7 @@ async def test_status_command_tokens_zero_when_session_db_row_missing():
 
     result = await runner._handle_message(_make_event("/status"))
 
-    assert "**Tokens:** 0" in result
+    assert "**Cumulative API tokens (re-sent each call):** 0" in result
 
 
 @pytest.mark.asyncio
diff --git a/tests/gateway/test_telegram_model_picker.py b/tests/gateway/test_telegram_model_picker.py
index 3e1d4cf71e8..f6c887ef3f4 100644
--- a/tests/gateway/test_telegram_model_picker.py
+++ b/tests/gateway/test_telegram_model_picker.py
@@ -146,6 +146,78 @@ class TestTelegramModelPicker:
         # State is cleaned up after a successful switch.
         assert "12345" not in adapter._model_picker_state
 
+    @pytest.mark.asyncio
+    async def test_provider_group_folds_and_drills_down(self, monkeypatch):
+        """A provider family (e.g. MiniMax) collapses to one mpg: button at
+        the top level; tapping it expands to its authenticated members as
+        mp: buttons. A group reduced to a single authenticated member shows
+        no submenu (direct mp: button).
+
+        Inspects callback_data by recording every InlineKeyboardButton built,
+        which is robust to whether `telegram` is the real SDK or the module
+        mock (the SDK markup objects don't expose a plain iterable under the
+        mock)."""
+        import gateway.platforms.telegram as tg
+
+        built: list = []
+
+        class _RecordingButton:
+            def __init__(self, text, callback_data=None, **kw):
+                self.text = text
+                self.callback_data = callback_data
+                built.append(callback_data)
+
+        class _RecordingMarkup:
+            def __init__(self, rows):
+                self.inline_keyboard = rows
+
+        monkeypatch.setattr(tg, "InlineKeyboardButton", _RecordingButton)
+        monkeypatch.setattr(tg, "InlineKeyboardMarkup", _RecordingMarkup)
+
+        adapter = _make_adapter()
+
+        async def mock_send_message(**kwargs):
+            return SimpleNamespace(message_id=101)
+
+        adapter._bot.send_message = AsyncMock(side_effect=mock_send_message)
+
+        providers = [
+            {"slug": "minimax", "name": "MiniMax", "total_models": 2},
+            {"slug": "minimax-cn", "name": "MiniMax (China)", "total_models": 3},
+            {"slug": "xai", "name": "xAI", "total_models": 1},  # lone group member
+        ]
+
+        await adapter.send_model_picker(
+            chat_id="12345",
+            providers=providers,
+            current_model="m",
+            current_provider="minimax",
+            session_key="s",
+            on_model_selected=AsyncMock(),
+            metadata=None,
+        )
+
+        # Top-level keyboard: MiniMax family folded into one group button;
+        # xai (lone member) degraded to a direct provider button.
+        assert "mpg:minimax" in built
+        assert "mp:xai" in built
+        assert "mp:minimax" not in built
+        assert "mp:minimax-cn" not in built
+
+        # Drill into the MiniMax group → members appear as mp: buttons + back.
+        built.clear()
+        query = AsyncMock()
+        query.message = MagicMock()
+        query.message.chat_id = 12345
+        query.answer = AsyncMock()
+        query.edit_message_text = AsyncMock()
+
+        await adapter._handle_model_picker_callback(query, "mpg:minimax", "12345")
+
+        assert "mp:minimax" in built
+        assert "mp:minimax-cn" in built
+        assert "mb" in built  # back-to-providers button present
+
     @pytest.mark.asyncio
     async def test_retries_without_thread_when_thread_not_found(self):
         adapter = _make_adapter()
diff --git a/tests/gateway/test_weixin.py b/tests/gateway/test_weixin.py
index ac535865df8..0482f66248e 100644
--- a/tests/gateway/test_weixin.py
+++ b/tests/gateway/test_weixin.py
@@ -11,6 +11,7 @@ import pytest
 from gateway.config import PlatformConfig
 from gateway.config import GatewayConfig, HomeChannel, Platform, _apply_env_overrides
 from gateway.platforms.base import SendResult
+from gateway.platforms.base import MessageEvent, MessageType
 from gateway.platforms import weixin
 from gateway.platforms.weixin import ContextTokenStore, WeixinAdapter
 from tools.send_message_tool import _parse_target_ref, _send_to_platform
@@ -853,15 +854,27 @@ class TestWeixinContentDedup:
         adapter = _make_adapter()
         adapter._poll_session = object()
         adapter.handle_message = AsyncMock()
+        # Tighten the text-debounce delay so the flush completes quickly.
+        adapter._text_batch_delay_seconds = 0.05
+        adapter._text_batch_split_delay_seconds = 0.05
 
         base_msg = {
             "from_user_id": "wxid_user1",
             "item_list": [{"type": 1, "text_item": {"text": "hello world"}}],
         }
 
-        asyncio.run(adapter._process_message({**base_msg, "message_id": "msg-1"}))
-        asyncio.run(adapter._process_message({**base_msg, "message_id": "msg-2"}))
+        async def _drive():
+            # Both inbound messages share the same event loop so the debounce
+            # task created by the first one survives to be flushed.
+            await adapter._process_message({**base_msg, "message_id": "msg-1"})
+            await adapter._process_message({**base_msg, "message_id": "msg-2"})
+            # Wait out the quiet period so the buffered text batch flushes.
+            await asyncio.sleep(0.2)
 
+        asyncio.run(_drive())
+
+        # Content-dedup drops the second (duplicate) message before it is even
+        # enqueued, so only one combined dispatch reaches handle_message.
         assert adapter.handle_message.await_count == 1
         event = adapter.handle_message.await_args[0][0]
         assert event.text == "hello world"
@@ -882,3 +895,76 @@ class TestWeixinContentDedup:
         assert adapter.handle_message.await_count == 0
         # is_duplicate should only be called for message_id, never for content
         assert all("content:" not in str(call) for call in adapter._dedup.is_duplicate.call_args_list)
+
+
+class TestWeixinTextDebounce:
+    """Text-debounce batching for rapid multi-message bursts (issue #35301).
+
+    Delays are read from ``config.extra`` (config.yaml), not env vars.
+    """
+
+    def test_batch_delays_default_from_config(self):
+        adapter = _make_adapter()
+        assert adapter._text_batch_delay_seconds == 3.0
+        assert adapter._text_batch_split_delay_seconds == 5.0
+
+    def test_batch_delays_overridden_via_config_extra(self):
+        adapter = WeixinAdapter(
+            PlatformConfig(
+                enabled=True,
+                token="test-token",
+                extra={
+                    "account_id": "test-account",
+                    "text_batch_delay_seconds": "0.5",
+                    "text_batch_split_delay_seconds": 1.5,
+                },
+            )
+        )
+        assert adapter._text_batch_delay_seconds == 0.5
+        assert adapter._text_batch_split_delay_seconds == 1.5
+
+    def test_invalid_config_value_falls_back_to_default(self):
+        adapter = WeixinAdapter(
+            PlatformConfig(
+                enabled=True,
+                token="test-token",
+                extra={
+                    "account_id": "test-account",
+                    "text_batch_delay_seconds": "not-a-number",
+                    "text_batch_split_delay_seconds": -4,
+                },
+            )
+        )
+        assert adapter._text_batch_delay_seconds == 3.0
+        assert adapter._text_batch_split_delay_seconds == 5.0
+
+    def test_rapid_texts_collapse_into_single_dispatch(self):
+        adapter = _make_adapter()
+        adapter._text_batch_delay_seconds = 0.05
+        adapter._text_batch_split_delay_seconds = 0.05
+        dispatched = []
+
+        async def _capture(event):
+            dispatched.append(event.text)
+
+        adapter.handle_message = _capture
+
+        def _event(text):
+            return MessageEvent(
+                text=text,
+                message_type=MessageType.TEXT,
+                source=adapter.build_source(
+                    chat_id="wxid_user1", chat_type="dm",
+                    user_id="wxid_user1", user_name="wxid_user1",
+                ),
+            )
+
+        async def _drive():
+            adapter._enqueue_text_event(_event("one"))
+            adapter._enqueue_text_event(_event("two"))
+            adapter._enqueue_text_event(_event("three"))
+            assert dispatched == []  # nothing flushed during the burst
+            await asyncio.sleep(0.2)
+
+        asyncio.run(_drive())
+        assert dispatched == ["one\ntwo\nthree"]
diff --git a/tests/gateway/test_whatsapp_text_batching.py b/tests/gateway/test_whatsapp_text_batching.py
new file mode 100644
index 00000000000..4258617c678
--- /dev/null
+++ b/tests/gateway/test_whatsapp_text_batching.py
@@ -0,0 +1,107 @@
+"""Text-debounce batching for the WhatsApp adapter (issue #35301).
+
+WhatsApp delivers rapid multi-message bursts (forwarded batches, paste-splits)
+individually.  Without debounce each fragment triggers a separate agent
+invocation, wasting tokens and flooding the user with reply fragments.  This
+mirrors the Telegram/WeCom/Feishu pattern.
+
+Batch delays are read from ``config.extra`` (config.yaml), not env vars.
+"""
+
+import asyncio
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent, MessageType
+from gateway.platforms.whatsapp import WhatsAppAdapter
+from gateway.session import SessionSource
+
+
+def _make_adapter(**extra):
+    base = {"session_name": "test"}
+    base.update(extra)
+    return WhatsAppAdapter(PlatformConfig(enabled=True, extra=base))
+
+
+def _event(text):
+    src = SessionSource(
+        platform=Platform.WHATSAPP,
+        chat_id="chat123",
+        chat_type="dm",
+        user_id="user1",
+        user_name="tester",
+    )
+    return MessageEvent(text=text, message_type=MessageType.TEXT, source=src)
+
+
+def test_batch_delays_default_from_config():
+    adapter = _make_adapter()
+    assert adapter._text_batch_delay_seconds == 5.0
+    assert adapter._text_batch_split_delay_seconds == 10.0
+
+
+def test_batch_delays_overridden_via_config_extra():
+    adapter = _make_adapter(
+        text_batch_delay_seconds="2.5",
+        text_batch_split_delay_seconds=7,
+    )
+    assert adapter._text_batch_delay_seconds == 2.5
+    assert adapter._text_batch_split_delay_seconds == 7.0
+
+
+def test_invalid_config_value_falls_back_to_default():
+    adapter = _make_adapter(
+        text_batch_delay_seconds="garbage",
+        text_batch_split_delay_seconds=-3,
+    )
+    assert adapter._text_batch_delay_seconds == 5.0
+    assert adapter._text_batch_split_delay_seconds == 10.0
+
+
+def test_env_var_is_ignored(monkeypatch):
+    # Config-only path: the legacy HERMES_* env var must NOT influence delays.
+    monkeypatch.setenv("HERMES_WHATSAPP_TEXT_BATCH_DELAY_SECONDS", "99")
+    adapter = _make_adapter()
+    assert adapter._text_batch_delay_seconds == 5.0
+
+
+def test_rapid_texts_collapse_into_single_dispatch():
+    adapter = _make_adapter(
+        text_batch_delay_seconds=0.05,
+        text_batch_split_delay_seconds=0.05,
+    )
+    dispatched = []
+
+    async def _capture(event):
+        dispatched.append(event.text)
+
+    adapter.handle_message = _capture
+
+    async def _drive():
+        adapter._enqueue_text_event(_event("one"))
+        adapter._enqueue_text_event(_event("two"))
+        adapter._enqueue_text_event(_event("three"))
+        assert dispatched == []  # nothing flushed during the burst
+        await asyncio.sleep(0.2)
+
+    asyncio.run(_drive())
+    assert dispatched == ["one\ntwo\nthree"]
+
+
+def test_lone_message_dispatched_alone():
+    adapter = _make_adapter(
+        text_batch_delay_seconds=0.05,
+        text_batch_split_delay_seconds=0.05,
+    )
+    dispatched = []
+
+    async def _capture(event):
+        dispatched.append(event.text)
+
+    adapter.handle_message = _capture
+
+    async def _drive():
+        adapter._enqueue_text_event(_event("solo"))
+        await asyncio.sleep(0.2)
+
+    asyncio.run(_drive())
+    assert dispatched == ["solo"]
diff --git a/tests/hermes_cli/test_cmd_update.py b/tests/hermes_cli/test_cmd_update.py
index 4f2875235f1..3b3d678a437 100644
--- a/tests/hermes_cli/test_cmd_update.py
+++ b/tests/hermes_cli/test_cmd_update.py
@@ -39,6 +39,45 @@ def mock_args():
     return SimpleNamespace()
 
 
+class TestCmdUpdatePip:
+    """Regression tests for pip-install update flows."""
+
+    @patch("shutil.which", return_value="/usr/bin/uv")
+    @patch("subprocess.run")
+    def test_update_pip_exports_virtualenv_from_sys_prefix(
+        self, mock_run, _mock_which, mock_args, monkeypatch
+    ):
+        from hermes_cli import main as hm
+
+        mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        monkeypatch.delenv("VIRTUAL_ENV", raising=False)
+        monkeypatch.setattr(hm.sys, "prefix", "/tmp/hermes-launcher-venv")
+        monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
+
+        hm._cmd_update_pip(mock_args)
+
+        assert mock_run.call_count == 1
+        assert mock_run.call_args.args[0] == ["/usr/bin/uv", "pip", "install", "--upgrade", "hermes-agent"]
+        assert mock_run.call_args.kwargs["env"]["VIRTUAL_ENV"] == "/tmp/hermes-launcher-venv"
+
+    @patch("shutil.which", return_value="/usr/bin/uv")
+    @patch("subprocess.run")
+    def test_update_pip_does_not_export_virtualenv_for_system_python(
+        self, mock_run, _mock_which, mock_args, monkeypatch
+    ):
+        from hermes_cli import main as hm
+
+        mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        monkeypatch.delenv("VIRTUAL_ENV", raising=False)
+        monkeypatch.setattr(hm.sys, "prefix", "/usr")
+        monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
+
+        hm._cmd_update_pip(mock_args)
+
+        assert mock_run.call_count == 1
+        assert "env" not in mock_run.call_args.kwargs
+
+
 class TestCmdUpdateBranchFallback:
     """cmd_update falls back to main when current branch has no remote counterpart."""
 
diff --git a/tests/hermes_cli/test_copilot_in_model_list.py b/tests/hermes_cli/test_copilot_in_model_list.py
index e414687bce7..83832b0c332 100644
--- a/tests/hermes_cli/test_copilot_in_model_list.py
+++ b/tests/hermes_cli/test_copilot_in_model_list.py
@@ -6,25 +6,6 @@ from unittest.mock import patch
 from hermes_cli.model_switch import list_authenticated_providers
 
 
-@patch.dict(os.environ, {"GH_TOKEN": "test-key"}, clear=False)
-def test_copilot_picker_keeps_curated_copilot_models_when_live_catalog_unavailable():
-    with patch("agent.models_dev.fetch_models_dev", return_value={}), \
-         patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \
-         patch("hermes_cli.models._fetch_github_models", return_value=None):
-        providers = list_authenticated_providers(current_provider="openrouter", max_models=50)
-
-    copilot = next((p for p in providers if p["slug"] == "copilot"), None)
-
-    assert copilot is not None
-    assert "gpt-5.4" in copilot["models"]
-    assert "claude-sonnet-4.6" in copilot["models"]
-    assert "claude-sonnet-4" in copilot["models"]
-    assert "claude-sonnet-4.5" in copilot["models"]
-    assert "claude-haiku-4.5" in copilot["models"]
-    assert "gemini-3.1-pro-preview" in copilot["models"]
-    assert "claude-opus-4.6" not in copilot["models"]
-
-
 @patch.dict(os.environ, {"GH_TOKEN": "test-key"}, clear=False)
 def test_copilot_picker_uses_live_catalog_when_available():
     live_models = ["gpt-5.4", "claude-sonnet-4.6", "gemini-3.1-pro-preview"]
diff --git a/tests/hermes_cli/test_dashboard_auth_ws_auth.py b/tests/hermes_cli/test_dashboard_auth_ws_auth.py
index d16c7719d1a..ff432e64c5b 100644
--- a/tests/hermes_cli/test_dashboard_auth_ws_auth.py
+++ b/tests/hermes_cli/test_dashboard_auth_ws_auth.py
@@ -80,6 +80,25 @@ def loopback_app():
     web_server.app.state.auth_required = prev_required
 
 
+@pytest.fixture
+def insecure_public_app():
+    """web_server.app configured for all-interfaces insecure mode."""
+    _reset_for_tests()
+    clear_providers()
+    prev_host = getattr(web_server.app.state, "bound_host", None)
+    prev_port = getattr(web_server.app.state, "bound_port", None)
+    prev_required = getattr(web_server.app.state, "auth_required", None)
+    web_server.app.state.bound_host = "0.0.0.0"
+    web_server.app.state.bound_port = 9120
+    web_server.app.state.auth_required = False
+    client = TestClient(web_server.app, base_url="http://192.168.0.222:9120")
+    yield client
+    _reset_for_tests()
+    web_server.app.state.bound_host = prev_host
+    web_server.app.state.bound_port = prev_port
+    web_server.app.state.auth_required = prev_required
+
+
 def _logged_in(client: TestClient) -> None:
     """Drive the stub OAuth round trip so the client holds session cookies."""
     r1 = client.get("/auth/login?provider=stub", follow_redirects=False)
@@ -143,6 +162,30 @@ class TestWsTicketEndpoint:
 # ---------------------------------------------------------------------------
 
 
+@pytest.fixture
+def insecure_explicit_host_app():
+    """web_server.app bound to an explicit non-loopback host (--insecure).
+
+    Models `--host 100.64.0.10 --insecure` (e.g. a Tailscale IP behind
+    `tailscale serve`) — a specific address rather than the all-interfaces
+    0.0.0.0 wildcard.
+    """
+    _reset_for_tests()
+    clear_providers()
+    prev_host = getattr(web_server.app.state, "bound_host", None)
+    prev_port = getattr(web_server.app.state, "bound_port", None)
+    prev_required = getattr(web_server.app.state, "auth_required", None)
+    web_server.app.state.bound_host = "100.64.0.10"
+    web_server.app.state.bound_port = 9119
+    web_server.app.state.auth_required = False
+    client = TestClient(web_server.app, base_url="http://100.64.0.10:9119")
+    yield client
+    _reset_for_tests()
+    web_server.app.state.bound_host = prev_host
+    web_server.app.state.bound_port = prev_port
+    web_server.app.state.auth_required = prev_required
+
+
 def _fake_ws(*, query: dict, client_host: str = "127.0.0.1", path: str = "/api/pty"):
     """Build a stand-in for starlette.WebSocket good enough for _ws_auth_ok."""
 
@@ -281,6 +324,48 @@ class TestWsRequestIsAllowedGated:
         ws.headers = {"host": "127.0.0.1:8080"}
         assert web_server._ws_request_is_allowed(ws) is True
 
+    def test_non_loopback_peer_allowed_in_insecure_public_mode(self, insecure_public_app):
+        """`--host 0.0.0.0 --insecure` is an explicit LAN/public opt-in.
+
+        Regression coverage for the dashboard `/chat` breakage where the
+        HTML shell loaded on 9120 but every WebSocket upgrade was rejected
+        with 403 because the loopback-only peer guard still ran even though
+        the operator intentionally exposed the dashboard on all interfaces.
+        """
+        ws = _fake_ws(query={}, client_host="192.168.0.55")
+        ws.headers = {
+            "host": "192.168.0.222:9120",
+            "origin": "http://192.168.0.222:9120",
+        }
+        assert web_server._ws_request_is_allowed(ws) is True
+
+    def test_peer_allowed_on_explicit_non_loopback_bind(self, insecure_explicit_host_app):
+        """`--host 100.64.0.10 --insecure` (Tailscale/LAN IP) is an explicit
+        non-loopback opt-in too — not just the 0.0.0.0 wildcard.
+
+        Regression coverage: the merged 0.0.0.0/:: fix did not cover binding
+        directly to a specific tailnet/LAN address, so `/chat` HTML loaded but
+        WS upgrades were still rejected by the loopback-only peer guard.
+        """
+        ws = _fake_ws(query={}, client_host="100.64.0.99")
+        ws.headers = {
+            "host": "100.64.0.10:9119",
+            "origin": "http://100.64.0.10:9119",
+        }
+        assert web_server._ws_request_is_allowed(ws) is True
+
+    def test_rebinding_host_rejected_on_explicit_non_loopback_bind(
+        self, insecure_explicit_host_app
+    ):
+        """Lifting the peer-IP gate for an explicit bind must NOT lift the
+        DNS-rebinding Host guard: a mismatched Host header is still rejected,
+        because an explicit non-loopback bind requires an exact Host match in
+        `_is_accepted_host` (unlike the 0.0.0.0 wildcard, which accepts any).
+        """
+        ws = _fake_ws(query={}, client_host="100.64.0.99")
+        ws.headers = {"host": "evil.example.com"}
+        assert web_server._ws_request_is_allowed(ws) is False
+
     def test_host_origin_guard_still_runs_in_gated_mode(self, gated_app):
         """Bypassing the peer-IP check must not bypass the DNS-rebinding
         Host header guard — that one still protects against attacker
diff --git a/tests/hermes_cli/test_gmi_provider.py b/tests/hermes_cli/test_gmi_provider.py
index 2c2f146ed85..86aaf699bf6 100644
--- a/tests/hermes_cli/test_gmi_provider.py
+++ b/tests/hermes_cli/test_gmi_provider.py
@@ -80,14 +80,6 @@ class TestGmiConfigRegistry:
 
 
 class TestGmiModelCatalog:
-    def test_static_model_fallback_exists(self):
-        assert "gmi" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["gmi"]
-        assert "zai-org/GLM-5.1-FP8" in models
-        assert "deepseek-ai/DeepSeek-V3.2" in models
-        assert "moonshotai/Kimi-K2.5" in models
-        assert "anthropic/claude-sonnet-4.6" in models
-
     def test_canonical_provider_entry(self):
         slugs = [p.slug for p in CANONICAL_PROVIDERS]
         assert "gmi" in slugs
@@ -267,11 +259,6 @@ class TestGmiModelMetadata:
 
 
 class TestGmiAuxiliary:
-    def test_aux_default_model(self):
-        from agent.auxiliary_client import _get_aux_model_for_provider
-
-        assert _get_aux_model_for_provider("gmi") == "google/gemini-3.1-flash-lite-preview"
-
     def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
         monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
 
diff --git a/tests/hermes_cli/test_kanban_blocked_sticky.py b/tests/hermes_cli/test_kanban_blocked_sticky.py
index e6bd093d938..2d7cafef826 100644
--- a/tests/hermes_cli/test_kanban_blocked_sticky.py
+++ b/tests/hermes_cli/test_kanban_blocked_sticky.py
@@ -106,20 +106,30 @@ def test_worker_block_on_child_with_done_parents_is_still_sticky(kanban_home: Pa
 
 def test_circuit_breaker_block_still_auto_promotes(kanban_home: Path) -> None:
     """A child that was put into ``blocked`` *without* a worker-issued
-    ``kanban_block`` (e.g. circuit-breaker after repeated spawn
-    failures, manual DB triage) must still get auto-promoted when its
-    parents complete — preserves the pre-#28712 recovery semantics."""
+    ``kanban_block`` (e.g. a transient crash, manual DB triage) and whose
+    ``consecutive_failures`` is still *below* the circuit-breaker limit
+    must get auto-promoted when its parents complete — preserves the
+    pre-#28712 recovery semantics for genuinely transient failures.
+
+    The complementary case — a block whose failure count has *reached*
+    the limit must stay blocked — is covered by
+    ``test_kanban_db.py::test_recompute_ready_skips_tasks_at_failure_limit``
+    (#35072).  Together they pin the contract: ``recompute_ready`` defers
+    the give-up decision to the same effective limit the breaker uses, so
+    the two never disagree.
+    """
     with kb.connect() as conn:
         parent = kb.create_task(conn, title="parent")
         child = kb.create_task(conn, title="child", parents=[parent])
         kb.complete_task(conn, parent, result="ok")
 
-        # Simulate a circuit-breaker / direct triage that flips status
-        # without emitting a ``blocked`` event — exactly what
-        # ``_record_task_failure`` does after a ``gave_up``.
+        # Simulate a transient circuit-breaker / direct triage that flips
+        # status without emitting a ``blocked`` event — exactly what
+        # ``_record_task_failure`` does below the limit.  One failure is
+        # under the default limit (2), so recovery is still correct.
         conn.execute(
-            "UPDATE tasks SET status='blocked', consecutive_failures=5, "
-            "last_failure_error='persistent error' WHERE id=?",
+            "UPDATE tasks SET status='blocked', consecutive_failures=1, "
+            "last_failure_error='transient error' WHERE id=?",
             (child,),
         )
         conn.commit()
@@ -128,8 +138,9 @@ def test_circuit_breaker_block_still_auto_promotes(kanban_home: Path) -> None:
         assert promoted == 1
         task = kb.get_task(conn, child)
         assert task.status == "ready"
-        assert task.consecutive_failures == 0
-        assert task.last_failure_error is None
+        # Counter is preserved across recovery (not reset) so the breaker
+        # can still accumulate if the task keeps failing (#35072).
+        assert task.consecutive_failures == 1
 
 
 def test_gave_up_event_alone_does_not_make_block_sticky(kanban_home: Path) -> None:
diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py
index 020ad4fb425..b2510855ea2 100644
--- a/tests/hermes_cli/test_kanban_db.py
+++ b/tests/hermes_cli/test_kanban_db.py
@@ -307,7 +307,8 @@ def test_recompute_ready_cascades_through_chain(kanban_home):
 
 
 def test_recompute_ready_promotes_blocked_with_done_parents(kanban_home):
-    """blocked tasks with all parents done should be promoted to ready."""
+    """blocked tasks with all parents done should be promoted to ready,
+    unless the circuit-breaker failure limit has been reached."""
     with kb.connect() as conn:
         parent = kb.create_task(conn, title="parent", assignee="a")
         child = kb.create_task(
@@ -316,16 +317,16 @@ def test_recompute_ready_promotes_blocked_with_done_parents(kanban_home):
         # Complete the parent
         kb.claim_task(conn, parent)
         kb.complete_task(conn, parent, result="ok")
-        # Manually block the child (simulates a worker that failed
-        # after the parent finished)
+        # Manually block the child with zero failures (simulates a
+        # dependency block, not a circuit-breaker block).
         conn.execute(
-            "UPDATE tasks SET status='blocked', consecutive_failures=5, "
-            "last_failure_error='persistent error' WHERE id=?",
+            "UPDATE tasks SET status='blocked', consecutive_failures=0, "
+            "last_failure_error=NULL WHERE id=?",
             (child,),
         )
         conn.commit()
         assert kb.get_task(conn, child).status == "blocked"
-        # recompute_ready should promote blocked → ready and reset failures
+        # recompute_ready should promote blocked → ready
         promoted = kb.recompute_ready(conn)
         assert promoted == 1
         task = kb.get_task(conn, child)
@@ -815,6 +816,149 @@ def test_unblock_resets_failure_counters(kanban_home):
         assert task.last_failure_error is None
 
 
+def test_recompute_ready_skips_tasks_at_failure_limit(kanban_home):
+    """recompute_ready must not auto-recover tasks whose consecutive_failures
+    has reached the circuit-breaker limit (#35072).
+
+    Without this guard, a task that repeatedly exhausts its iteration
+    budget would cycle forever: block → auto-recover (counter reset)
+    → respawn → budget exhausted → block → …
+    """
+    with kb.connect() as conn:
+        parent = kb.create_task(conn, title="parent", assignee="a")
+        child = kb.create_task(conn, title="child", assignee="a",
+                               parents=[parent])
+        # Complete the parent so the child's dependencies are satisfied.
+        kb.claim_task(conn, parent)
+        kb.complete_task(conn, parent, summary="done")
+
+        # Simulate the child having exhausted its budget twice,
+        # hitting the default failure limit (2).
+        kb.claim_task(conn, child)
+        kb._record_task_failure(
+            conn, child, error="budget exhausted 1",
+            outcome="timed_out", release_claim=True, end_run=True,
+            failure_limit=2,
+        )
+        kb._record_task_failure(
+            conn, child, error="budget exhausted 2",
+            outcome="timed_out", release_claim=True, end_run=True,
+            failure_limit=2,
+        )
+        task = kb.get_task(conn, child)
+        assert task.status == "blocked"
+        assert task.consecutive_failures >= 2
+
+        # recompute_ready must NOT promote this task — the circuit
+        # breaker has tripped and it should stay blocked.
+        promoted = kb.recompute_ready(conn)
+        assert promoted == 0
+        assert kb.get_task(conn, child).status == "blocked"
+
+        # Explicit unblock should still work and reset the counter.
+        assert kb.unblock_task(conn, child)
+        task = kb.get_task(conn, child)
+        assert task.status == "ready"
+        assert task.consecutive_failures == 0
+
+
+def test_recompute_ready_recovers_below_limit(kanban_home):
+    """recompute_ready auto-recovers blocked tasks that haven't hit the
+    failure limit yet — the counter is preserved across recovery."""
+    with kb.connect() as conn:
+        t = kb.create_task(conn, title="task", assignee="a")
+        kb.claim_task(conn, t)
+        # One failure, below the default limit of 2.
+        kb._record_task_failure(
+            conn, t, error="budget exhausted 1",
+            outcome="timed_out", release_claim=True, end_run=True,
+            failure_limit=2,
+        )
+        task = kb.get_task(conn, t)
+        assert task.status == "ready"
+        assert task.consecutive_failures == 1
+
+        # Simulate being blocked by something else (not circuit breaker).
+        conn.execute(
+            "UPDATE tasks SET status = 'blocked' WHERE id = ?", (t,),
+        )
+        conn.commit()
+
+        promoted = kb.recompute_ready(conn)
+        assert promoted == 1
+        task = kb.get_task(conn, t)
+        assert task.status == "ready"
+        # Counter must be preserved, not reset.
+        assert task.consecutive_failures == 1
+
+
+def test_recompute_ready_honours_dispatcher_failure_limit(kanban_home):
+    """The guard's effective limit must follow the same resolution order
+    as the circuit breaker (#35072): per-task max_retries → dispatcher
+    failure_limit → DEFAULT_FAILURE_LIMIT.
+
+    Without threading the dispatcher's ``kanban.failure_limit`` through,
+    the guard falls back to DEFAULT_FAILURE_LIMIT and disagrees with the
+    breaker — sticking a task prematurely (config limit > default) or
+    letting a tripped task escape (config limit < default).
+    """
+    with kb.connect() as conn:
+        # Config allows MORE retries than the default. A task blocked
+        # with failures below the configured limit must still recover.
+        t = kb.create_task(conn, title="lenient", assignee="a")
+        conn.execute(
+            "UPDATE tasks SET status='blocked', consecutive_failures=? "
+            "WHERE id=?",
+            (kb.DEFAULT_FAILURE_LIMIT, t),
+        )
+        conn.commit()
+        # Default-limit call would stick it (failures >= default).
+        assert kb.recompute_ready(conn) == 0
+        assert kb.get_task(conn, t).status == "blocked"
+        # Dispatcher configured a higher limit → recover, preserve counter.
+        promoted = kb.recompute_ready(
+            conn, failure_limit=kb.DEFAULT_FAILURE_LIMIT + 2
+        )
+        assert promoted == 1
+        task = kb.get_task(conn, t)
+        assert task.status == "ready"
+        assert task.consecutive_failures == kb.DEFAULT_FAILURE_LIMIT
+
+        # Config allows FEWER retries than the default. A task at the
+        # stricter limit must stay blocked even though it's below default.
+        t2 = kb.create_task(conn, title="strict", assignee="a")
+        conn.execute(
+            "UPDATE tasks SET status='blocked', consecutive_failures=1 "
+            "WHERE id=?",
+            (t2,),
+        )
+        conn.commit()
+        # Default-limit (2) would recover it (1 < 2).
+        # Stricter config limit (1) must keep it blocked (1 >= 1).
+        assert kb.recompute_ready(conn, failure_limit=1) == 0
+        assert kb.get_task(conn, t2).status == "blocked"
+
+
+def test_recompute_ready_per_task_max_retries_overrides_dispatcher(kanban_home):
+    """A per-task ``max_retries`` wins over the dispatcher failure_limit,
+    matching ``_record_task_failure``'s resolution order."""
+    with kb.connect() as conn:
+        t = kb.create_task(conn, title="per-task", assignee="a")
+        # Per-task allows 4 retries; dispatcher config says 2.
+        conn.execute(
+            "UPDATE tasks SET status='blocked', consecutive_failures=2, "
+            "max_retries=4 WHERE id=?",
+            (t,),
+        )
+        conn.commit()
+        # failures(2) < per-task limit(4) → recover, despite dispatcher=2.
+        promoted = kb.recompute_ready(conn, failure_limit=2)
+        assert promoted == 1
+        task = kb.get_task(conn, t)
+        assert task.status == "ready"
+        assert task.consecutive_failures == 2
+
+
 # ---------------------------------------------------------------------------
 # Parent-completion invariant at the claim gate (RCA t_a6acd07d)
 # ---------------------------------------------------------------------------
diff --git a/tests/hermes_cli/test_kanban_db_init.py b/tests/hermes_cli/test_kanban_db_init.py
index c400b1d90f9..7db5d2009e6 100644
--- a/tests/hermes_cli/test_kanban_db_init.py
+++ b/tests/hermes_cli/test_kanban_db_init.py
@@ -1,11 +1,74 @@
 from __future__ import annotations
 
+import sqlite3
 import threading
 from pathlib import Path
 
 from hermes_cli import kanban_db as kb
 
 
+def _make_legacy_db(path: Path) -> None:
+    """Write a kanban DB with the pre-AUTOINCREMENT (TEXT PK) schema for the
+    four tables #35096 affects, keeping every other table current so the
+    additive-column migration runs cleanly on top.
+    """
+    conn = sqlite3.connect(str(path))
+    conn.executescript(kb.SCHEMA_SQL)
+    conn.executescript(
+        """
+        DROP TABLE task_events;
+        DROP TABLE task_comments;
+        DROP TABLE task_runs;
+        DROP TABLE kanban_notify_subs;
+        CREATE TABLE task_comments (id TEXT PRIMARY KEY, task_id TEXT NOT NULL,
+            author TEXT NOT NULL, body TEXT NOT NULL, created_at INTEGER NOT NULL);
+        CREATE TABLE task_events (id TEXT PRIMARY KEY, task_id TEXT NOT NULL,
+            kind TEXT NOT NULL, payload TEXT, created_at INTEGER NOT NULL);
+        CREATE TABLE task_runs (id TEXT PRIMARY KEY, task_id TEXT NOT NULL,
+            profile TEXT, status TEXT NOT NULL, started_at INTEGER NOT NULL);
+        CREATE TABLE kanban_notify_subs (task_id TEXT NOT NULL, platform TEXT NOT NULL,
+            chat_id TEXT NOT NULL, thread_id TEXT NOT NULL DEFAULT '', user_id TEXT,
+            created_at INTEGER NOT NULL, last_event_id TEXT,
+            PRIMARY KEY (task_id, platform, chat_id, thread_id));
+        """
+    )
+    conn.execute("INSERT INTO tasks (id, title, status, created_at) VALUES ('task-1', 'T', 'done', 1000)")
+    conn.execute("INSERT INTO task_comments VALUES ('c-1', 'task-1', 'agent', 'hi', 1500)")
+    conn.execute("INSERT INTO task_events VALUES ('e-1', 'task-1', 'completed', NULL, 2000)")
+    conn.execute("INSERT INTO task_events VALUES ('e-2', 'task-1', 'blocked', NULL, 2100)")
+    conn.execute("INSERT INTO task_runs VALUES ('r-1', 'task-1', 'default', 'done', 1000)")
+    conn.execute(
+        "INSERT INTO kanban_notify_subs (task_id, platform, chat_id, created_at, last_event_id) "
+        "VALUES ('task-1', 'telegram', '123', 1000, 'e-1')"
+    )
+    conn.commit()
+    conn.close()
+
+
+def _setup_home(tmp_path, monkeypatch) -> Path:
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    db_path = kb.kanban_db_path(board="legacy")
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    kb._INITIALIZED_PATHS.discard(str(db_path.resolve()))
+    return db_path
+
+
+def _table_struct(conn: sqlite3.Connection, table: str):
+    cols = [
+        (r["name"], (r["type"] or "").upper(), r["notnull"], r["pk"])
+        for r in conn.execute(f"PRAGMA table_info({table})")
+    ]
+    idx = sorted(
+        r["name"]
+        for r in conn.execute(f"PRAGMA index_list({table})")
+        if not r["name"].startswith("sqlite_")
+    )
+    return cols, idx
+
+
 def test_connect_initialization_is_thread_safe(tmp_path, monkeypatch):
     home = tmp_path / ".hermes"
     home.mkdir()
@@ -36,3 +99,79 @@ def test_connect_initialization_is_thread_safe(tmp_path, monkeypatch):
     with kb.connect(board="default") as conn:
         cols = {row["name"] for row in conn.execute("PRAGMA table_info(tasks)")}
     assert "max_retries" in cols
+
+
+def test_legacy_text_pk_tables_rebuilt_to_integer_autoincrement(tmp_path, monkeypatch):
+    """A pre-AUTOINCREMENT DB is migrated in place: id columns become INTEGER
+    PKs, ``last_event_id`` becomes INTEGER, data is preserved, and indexes
+    are recreated (DROP TABLE would otherwise take them down)."""
+    db_path = _setup_home(tmp_path, monkeypatch)
+    _make_legacy_db(db_path)
+
+    with kb.connect(db_path) as conn:
+        for table in ("task_events", "task_comments", "task_runs"):
+            id_col = {r["name"]: r for r in conn.execute(f"PRAGMA table_info({table})")}["id"]
+            assert id_col["type"].upper() == "INTEGER" and id_col["pk"] == 1
+
+        lei = {r["name"]: r for r in conn.execute("PRAGMA table_info(kanban_notify_subs)")}
+        assert lei["last_event_id"]["type"].upper() == "INTEGER"
+
+        # Data preserved across the rebuild.
+        assert len(conn.execute("SELECT * FROM task_events").fetchall()) == 2
+        assert conn.execute("SELECT body FROM task_comments").fetchone()["body"] == "hi"
+        assert len(conn.execute("SELECT * FROM task_runs").fetchall()) == 1
+        # Non-numeric legacy cursor ("e-1") casts to 0.
+        assert conn.execute("SELECT last_event_id FROM kanban_notify_subs").fetchone()["last_event_id"] == 0
+
+        # Indexes restored, including idx_events_run (added by the additive pass).
+        indexes = {r[0] for r in conn.execute("SELECT name FROM sqlite_master WHERE type='index'")}
+        for name in ("idx_events_task", "idx_events_run", "idx_comments_task",
+                     "idx_runs_task", "idx_runs_status", "idx_notify_task"):
+            assert name in indexes
+
+        # AUTOINCREMENT actually works after the rebuild.
+        conn.execute("INSERT INTO task_events (task_id, kind, created_at) VALUES ('task-1', 'completed', 3000)")
+        new_id = conn.execute("SELECT id FROM task_events ORDER BY id DESC LIMIT 1").fetchone()["id"]
+        assert isinstance(new_id, int) and new_id >= 1
+
+
+def test_rebuilt_schema_matches_fresh_db(tmp_path, monkeypatch):
+    """The rebuilt tables must be structurally identical to a fresh DB, so the
+    hand-written DDL in ``_REBUILD_SPECS`` can't silently drift from SCHEMA_SQL."""
+    legacy_path = _setup_home(tmp_path, monkeypatch)
+    _make_legacy_db(legacy_path)
+    fresh_path = kb.kanban_db_path(board="fresh")
+    fresh_path.parent.mkdir(parents=True, exist_ok=True)
+    kb._INITIALIZED_PATHS.discard(str(fresh_path.resolve()))
+
+    with kb.connect(legacy_path) as migrated, kb.connect(fresh_path) as fresh:
+        for table in ("task_events", "task_comments", "task_runs", "kanban_notify_subs"):
+            assert _table_struct(migrated, table) == _table_struct(fresh, table)
+
+
+def test_migration_is_idempotent(tmp_path, monkeypatch):
+    """Re-opening an already-migrated DB is a no-op and leaves data intact."""
+    db_path = _setup_home(tmp_path, monkeypatch)
+    _make_legacy_db(db_path)
+
+    with kb.connect(db_path):
+        pass
+    kb._INITIALIZED_PATHS.discard(str(db_path.resolve()))
+    with kb.connect(db_path) as conn:
+        id_col = {r["name"]: r for r in conn.execute("PRAGMA table_info(task_events)")}["id"]
+        assert id_col["type"].upper() == "INTEGER"
+        assert len(conn.execute("SELECT * FROM task_events").fetchall()) == 2
+
+
+def test_unseen_events_for_sub_survives_migrated_db(tmp_path, monkeypatch):
+    """The crash that motivated #35096 — ``int(None)`` on a NULL cursor — is
+    gone after migration; the notifier query returns an integer cursor."""
+    db_path = _setup_home(tmp_path, monkeypatch)
+    _make_legacy_db(db_path)
+
+    with kb.connect(db_path) as conn:
+        cursor, events = kb.unseen_events_for_sub(
+            conn, task_id="task-1", platform="telegram", chat_id="123"
+        )
+        assert isinstance(cursor, int)
+        assert isinstance(events, list)
diff --git a/tests/hermes_cli/test_mcp_startup.py b/tests/hermes_cli/test_mcp_startup.py
new file mode 100644
index 00000000000..08639abbcc9
--- /dev/null
+++ b/tests/hermes_cli/test_mcp_startup.py
@@ -0,0 +1,166 @@
+"""Regression tests for bounded/lazy CLI MCP startup."""
+
+from __future__ import annotations
+
+from argparse import Namespace
+import sys
+import threading
+import time
+import types
+
+import pytest
+
+import cli as cli_mod
+from hermes_cli import main as main_mod
+from hermes_cli import mcp_startup
+
+
+@pytest.fixture(autouse=True)
+def _reset_mcp_startup_state():
+    saved_started = mcp_startup._mcp_discovery_started
+    saved_thread = mcp_startup._mcp_discovery_thread
+    try:
+        mcp_startup._mcp_discovery_started = False
+        mcp_startup._mcp_discovery_thread = None
+        yield
+    finally:
+        thread = mcp_startup._mcp_discovery_thread
+        if thread is not None and thread.is_alive():
+            thread.join(timeout=1.0)
+        mcp_startup._mcp_discovery_started = saved_started
+        mcp_startup._mcp_discovery_thread = saved_thread
+
+
+def _agent_args(**overrides) -> Namespace:
+    base = {
+        "accept_hooks": False,
+        "command": "chat",
+        "cron_command": None,
+        "gateway_command": None,
+        "mcp_action": None,
+        "tui": False,
+    }
+    base.update(overrides)
+    return Namespace(**base)
+
+
+def test_prepare_agent_startup_backgrounds_blocking_mcp_for_chat(monkeypatch):
+    stop = threading.Event()
+    calls = {"mcp": 0}
+
+    def _blocking_discover():
+        calls["mcp"] += 1
+        stop.wait()
+
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.plugins",
+        types.SimpleNamespace(discover_plugins=lambda: None),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.config",
+        types.SimpleNamespace(
+            read_raw_config=lambda: {"mcp_servers": {"demo": {"transport": "stdio"}}},
+            load_config=lambda: {},
+        ),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "agent.shell_hooks",
+        types.SimpleNamespace(register_from_config=lambda *_a, **_k: None),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "tools.mcp_tool",
+        types.SimpleNamespace(discover_mcp_tools=_blocking_discover),
+    )
+
+    try:
+        start = time.monotonic()
+        main_mod._prepare_agent_startup(_agent_args())
+        elapsed = time.monotonic() - start
+        assert elapsed < 0.2
+        assert calls["mcp"] == 1
+        assert mcp_startup._mcp_discovery_thread is not None
+        assert mcp_startup._mcp_discovery_thread.is_alive()
+    finally:
+        stop.set()
+
+
+def test_prepare_agent_startup_skips_mcp_bootstrap_for_tui_chat(monkeypatch):
+    calls = {"mcp": 0}
+
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.plugins",
+        types.SimpleNamespace(discover_plugins=lambda: None),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.config",
+        types.SimpleNamespace(load_config=lambda: {}),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "agent.shell_hooks",
+        types.SimpleNamespace(register_from_config=lambda *_a, **_k: None),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "tools.mcp_tool",
+        types.SimpleNamespace(
+            discover_mcp_tools=lambda: calls.__setitem__("mcp", calls["mcp"] + 1)
+        ),
+    )
+
+    main_mod._prepare_agent_startup(_agent_args(tui=True))
+
+    assert calls["mcp"] == 0
+    assert mcp_startup._mcp_discovery_thread is None
+
+
+def test_cli_get_tool_definitions_briefly_waits_for_fast_mcp_thread(monkeypatch):
+    thread = threading.Thread(target=lambda: time.sleep(0.05), daemon=True)
+    thread.start()
+    mcp_startup._mcp_discovery_thread = thread
+
+    monkeypatch.setitem(
+        sys.modules,
+        "model_tools",
+        types.SimpleNamespace(get_tool_definitions=lambda *_a, **_k: ["ok"]),
+    )
+
+    start = time.monotonic()
+    result = cli_mod.get_tool_definitions(enabled_toolsets=["web"], quiet_mode=True)
+    elapsed = time.monotonic() - start
+
+    assert result == ["ok"]
+    assert elapsed >= 0.04
+    assert not thread.is_alive()
+
+
+def test_init_agent_waits_for_mcp_discovery_before_agent_build(monkeypatch):
+    waited = {"done": False}
+
+    cli = cli_mod.HermesCLI(compact=True)
+    cli._session_db = object()
+    cli._resumed = False
+    cli.conversation_history = []
+    cli._install_tool_callbacks = lambda: None
+    cli._ensure_tirith_security = lambda: None
+    cli._ensure_runtime_credentials = lambda: True
+
+    monkeypatch.setattr(
+        mcp_startup,
+        "wait_for_mcp_discovery",
+        lambda timeout=0.75: waited.__setitem__("done", True),
+    )
+
+    def _fake_agent(*_a, **_k):
+        assert waited["done"] is True
+        return types.SimpleNamespace()
+
+    monkeypatch.setattr(cli_mod, "AIAgent", _fake_agent)
+
+    assert cli._init_agent() is True
diff --git a/tests/hermes_cli/test_memory_setup_provider_arg.py b/tests/hermes_cli/test_memory_setup_provider_arg.py
new file mode 100644
index 00000000000..6dd310094b5
--- /dev/null
+++ b/tests/hermes_cli/test_memory_setup_provider_arg.py
@@ -0,0 +1,50 @@
+"""Tests for `hermes memory setup [provider]` routing.
+
+The `memory setup` subcommand accepts an optional positional ``provider`` so a
+fresh install can configure a specific provider directly (e.g.
+``hermes memory setup honcho``) without the interactive picker — which matters
+because the per-provider ``hermes <provider>`` subcommand is only registered
+once that provider is active.
+"""
+
+from types import SimpleNamespace
+from unittest.mock import patch
+
+from hermes_cli import memory_setup
+
+
+class TestMemorySetupProviderRouting:
+    def test_setup_with_provider_arg_skips_picker(self):
+        """`memory setup honcho` routes straight to cmd_setup_provider."""
+        args = SimpleNamespace(memory_command="setup", provider="honcho")
+        with patch.object(memory_setup, "cmd_setup_provider") as direct, \
+             patch.object(memory_setup, "cmd_setup") as picker:
+            memory_setup.memory_command(args)
+        direct.assert_called_once_with("honcho")
+        picker.assert_not_called()
+
+    def test_setup_without_provider_runs_picker(self):
+        """`memory setup` (no provider) runs the interactive picker."""
+        args = SimpleNamespace(memory_command="setup", provider=None)
+        with patch.object(memory_setup, "cmd_setup_provider") as direct, \
+             patch.object(memory_setup, "cmd_setup") as picker:
+            memory_setup.memory_command(args)
+        picker.assert_called_once_with(args)
+        direct.assert_not_called()
+
+    def test_setup_with_missing_provider_attr_runs_picker(self):
+        """A SimpleNamespace lacking `provider` must not crash — fall back to picker."""
+        args = SimpleNamespace(memory_command="setup")
+        with patch.object(memory_setup, "cmd_setup_provider") as direct, \
+             patch.object(memory_setup, "cmd_setup") as picker:
+            memory_setup.memory_command(args)
+        picker.assert_called_once_with(args)
+        direct.assert_not_called()
+
+    def test_unknown_provider_reports_and_returns_early(self, capsys):
+        """An unknown provider name surfaces a helpful message and returns
+        before any config load/save (the not-found guard precedes those imports)."""
+        memory_setup.cmd_setup_provider("notaprovider")
+        out = capsys.readouterr().out
+        assert "not found" in out
+        assert "hermes memory setup" in out
diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py
index 91fc4e50d00..89465b6c6c7 100644
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@@ -142,10 +142,6 @@ class TestCuratedModelsForProvider:
         assert len(models) > 0
         assert any("claude" in m[0] for m in models)
 
-    def test_zai_returns_glm_models(self):
-        models = curated_models_for_provider("zai")
-        assert any("glm" in m[0] for m in models)
-
     def test_unknown_provider_returns_empty(self):
         assert curated_models_for_provider("totally-unknown") == []
 
@@ -199,9 +195,6 @@ class TestProviderModelIds:
     def test_unknown_provider_returns_empty(self):
         assert provider_model_ids("some-unknown-provider") == []
 
-    def test_zai_returns_glm_models(self):
-        assert "glm-5" in provider_model_ids("zai")
-
     def test_stepfun_prefers_live_catalog(self):
         with patch(
             "hermes_cli.auth.resolve_api_key_provider_credentials",
@@ -222,31 +215,6 @@ class TestProviderModelIds:
              patch("hermes_cli.models._fetch_github_models", return_value=["gpt-5.4", "claude-sonnet-4.6"]):
             assert provider_model_ids("copilot-acp") == ["gpt-5.4", "claude-sonnet-4.6"]
 
-    def test_copilot_falls_back_to_curated_defaults_without_stale_opus(self):
-        with patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \
-             patch("hermes_cli.models._fetch_github_models", return_value=None):
-            ids = provider_model_ids("copilot")
-
-        assert "gpt-5.4" in ids
-        assert "claude-sonnet-4.6" in ids
-        assert "claude-sonnet-4" in ids
-        assert "claude-sonnet-4.5" in ids
-        assert "claude-haiku-4.5" in ids
-        assert "gemini-3.1-pro-preview" in ids
-        assert "claude-opus-4.6" not in ids
-
-    def test_copilot_acp_falls_back_to_copilot_defaults(self):
-        with patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \
-             patch("hermes_cli.models._fetch_github_models", return_value=None):
-            ids = provider_model_ids("copilot-acp")
-
-        assert "gpt-5.4" in ids
-        assert "claude-sonnet-4.6" in ids
-        assert "claude-sonnet-4" in ids
-        assert "gemini-3.1-pro-preview" in ids
-        assert "copilot-acp" not in ids
-        assert "claude-opus-4.6" not in ids
-
 
 # -- fetch_api_models --------------------------------------------------------
 
diff --git a/tests/hermes_cli/test_models.py b/tests/hermes_cli/test_models.py
index db96a6558d7..f965f361dec 100644
--- a/tests/hermes_cli/test_models.py
+++ b/tests/hermes_cli/test_models.py
@@ -56,10 +56,6 @@ class TestOpenRouterModels:
             assert isinstance(mid, str) and len(mid) > 0
             assert isinstance(desc, str)
 
-    def test_at_least_5_models(self):
-        """Sanity check that the models list hasn't been accidentally truncated."""
-        assert len(OPENROUTER_MODELS) >= 5
-
 
 class TestFetchOpenRouterModels:
     def test_live_fetch_recomputes_free_tags(self, monkeypatch):
diff --git a/tests/hermes_cli/test_nous_subscription.py b/tests/hermes_cli/test_nous_subscription.py
index 2c89d245301..561602c0ac6 100644
--- a/tests/hermes_cli/test_nous_subscription.py
+++ b/tests/hermes_cli/test_nous_subscription.py
@@ -231,3 +231,93 @@ def test_get_gateway_eligible_tools_ignores_quoted_false_opt_in(monkeypatch):
     assert "web" in has_direct
     assert "web" not in already_managed
     assert set(unconfigured) == {"image_gen", "video_gen", "tts", "browser"}
+
+
+def test_apply_nous_managed_defaults_writes_video_gen_config(monkeypatch):
+    """apply_nous_managed_defaults must write video_gen.provider and
+    video_gen.use_gateway when a Nous subscriber selects video_gen
+    without a direct FAL_KEY."""
+    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda **kw: True)
+    monkeypatch.delenv("FAL_KEY", raising=False)
+    monkeypatch.setattr(ns, "fal_key_is_configured", lambda: False)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info",
+        lambda **kw: _account(logged_in=True, paid=True),
+    )
+
+    config = {"model": {"provider": "nous"}}
+    changed = ns.apply_nous_managed_defaults(
+        config, enabled_toolsets=["video_gen"],
+    )
+
+    assert "video_gen" in changed
+    assert config["video_gen"]["provider"] == "fal"
+    assert config["video_gen"]["use_gateway"] is True
+
+
+def test_apply_nous_managed_defaults_writes_image_gen_config(monkeypatch):
+    """apply_nous_managed_defaults must write image_gen.use_gateway
+    when a Nous subscriber selects image_gen without a direct FAL_KEY."""
+    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda **kw: True)
+    monkeypatch.delenv("FAL_KEY", raising=False)
+    monkeypatch.setattr(ns, "fal_key_is_configured", lambda: False)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info",
+        lambda **kw: _account(logged_in=True, paid=True),
+    )
+
+    config = {"model": {"provider": "nous"}}
+    changed = ns.apply_nous_managed_defaults(
+        config, enabled_toolsets=["image_gen"],
+    )
+
+    assert "image_gen" in changed
+    assert config["image_gen"]["use_gateway"] is True
+
+
+def test_apply_nous_managed_defaults_skips_fal_tools_when_key_present(monkeypatch):
+    """When FAL_KEY is set, apply_nous_managed_defaults should not touch
+    image_gen or video_gen config — the user's direct key takes precedence."""
+    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda **kw: True)
+    monkeypatch.setenv("FAL_KEY", "fal-direct-key")
+    monkeypatch.setattr(ns, "fal_key_is_configured", lambda: True)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info",
+        lambda **kw: _account(logged_in=True, paid=True),
+    )
+
+    config = {"model": {"provider": "nous"}}
+    changed = ns.apply_nous_managed_defaults(
+        config, enabled_toolsets=["image_gen", "video_gen"],
+    )
+
+    assert "image_gen" not in changed
+    assert "video_gen" not in changed
+    assert "image_gen" not in config
+    assert "video_gen" not in config
+
+
+def test_apply_nous_managed_defaults_preserves_existing_video_gen_section(monkeypatch):
+    """When video_gen config already exists as a dict, the function should
+    update it in-place rather than replacing it."""
+    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda **kw: True)
+    monkeypatch.delenv("FAL_KEY", raising=False)
+    monkeypatch.setattr(ns, "fal_key_is_configured", lambda: False)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info",
+        lambda **kw: _account(logged_in=True, paid=True),
+    )
+
+    config = {
+        "model": {"provider": "nous"},
+        "video_gen": {"model": "pixverse-v6"},
+    }
+    changed = ns.apply_nous_managed_defaults(
+        config, enabled_toolsets=["video_gen"],
+    )
+
+    assert "video_gen" in changed
+    assert config["video_gen"]["provider"] == "fal"
+    assert config["video_gen"]["use_gateway"] is True
+    # Pre-existing keys should be preserved
+    assert config["video_gen"]["model"] == "pixverse-v6"
diff --git a/tests/hermes_cli/test_ollama_cloud_provider.py b/tests/hermes_cli/test_ollama_cloud_provider.py
index e62aa899ff8..ad7e3a0b9d9 100644
--- a/tests/hermes_cli/test_ollama_cloud_provider.py
+++ b/tests/hermes_cli/test_ollama_cloud_provider.py
@@ -495,12 +495,3 @@ class TestOllamaCloudSuffixStripping:
         assert _strip_ollama_cloud_suffix("qwen3-coder:480b-cloud") == "qwen3-coder:480b"
         assert _strip_ollama_cloud_suffix("nemotron-3-nano:30b") == "nemotron-3-nano:30b"
         assert _strip_ollama_cloud_suffix("") == ""
-
-
-# ── Auxiliary Model ──
-
-class TestOllamaCloudAuxiliary:
-    def test_aux_model_defined(self):
-        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
-        assert "ollama-cloud" in _API_KEY_PROVIDER_AUX_MODELS
-        assert _API_KEY_PROVIDER_AUX_MODELS["ollama-cloud"] == "nemotron-3-nano:30b"
diff --git a/tests/hermes_cli/test_pip_install_detection.py b/tests/hermes_cli/test_pip_install_detection.py
index 49df74f626e..eb06e35f2bf 100644
--- a/tests/hermes_cli/test_pip_install_detection.py
+++ b/tests/hermes_cli/test_pip_install_detection.py
@@ -48,12 +48,32 @@ def test_stamp_file_takes_precedence(tmp_path):
         assert detect_install_method(project_root=tmp_path) == "docker"
 
 
-def test_docker_detected_via_dockerenv(tmp_path):
+def test_container_without_stamp_is_not_docker(tmp_path):
+    """An unstamped install in a generic container must NOT be flagged as docker.
+
+    Regression for issue #34397. The two supported installs both stamp
+    ``.install_method`` (the curl installer -> ``git``, covered by
+    ``test_stamp_file_takes_precedence``; the published image -> ``docker``),
+    so neither hits this path. An unsupported manual install dropped into a
+    container has no stamp and was wrongly classified as the published Docker
+    image, so ``hermes update`` refused to run. With a ``.git`` checkout it
+    must resolve to ``git``.
+    """
+    (tmp_path / ".git").mkdir()
     with patch("hermes_cli.config.get_managed_system", return_value=None), \
          patch("hermes_cli.config.get_hermes_home", return_value=tmp_path), \
          patch("hermes_constants.is_container", return_value=True):
         from hermes_cli.config import detect_install_method
-        assert detect_install_method(project_root=tmp_path) == "docker"
+        assert detect_install_method(project_root=tmp_path) == "git"
+
+
+def test_container_pip_install_without_stamp_is_pip(tmp_path):
+    """Container + no .git + no stamp -> pip, not docker (issue #34397)."""
+    with patch("hermes_cli.config.get_managed_system", return_value=None), \
+         patch("hermes_cli.config.get_hermes_home", return_value=tmp_path), \
+         patch("hermes_constants.is_container", return_value=True):
+        from hermes_cli.config import detect_install_method
+        assert detect_install_method(project_root=tmp_path) == "pip"
 
 
 def test_recommended_update_command_docker():
diff --git a/tests/hermes_cli/test_profiles.py b/tests/hermes_cli/test_profiles.py
index 22e36d42123..dd336030928 100644
--- a/tests/hermes_cli/test_profiles.py
+++ b/tests/hermes_cli/test_profiles.py
@@ -754,8 +754,8 @@ class TestRenameProfile:
 
         cfg = json.loads(honcho_path.read_text())
         assert "hermes.ssi_health" not in cfg["hosts"]
-        assert cfg["hosts"]["hermes.heimdall"]["aiPeer"] == "ssi_health"
-        assert cfg["hosts"]["hermes.heimdall"]["peerName"] == "user-peer"
+        assert cfg["hosts"]["hermes_heimdall"]["aiPeer"] == "ssi_health"
+        assert cfg["hosts"]["hermes_heimdall"]["peerName"] == "user-peer"
 
     def test_pins_ai_peer_when_absent_on_honcho_host_rename(self, profile_env):
         tmp_path = profile_env
@@ -772,8 +772,8 @@ class TestRenameProfile:
 
         cfg = json.loads(honcho_path.read_text())
         assert "hermes.ssi_health" not in cfg["hosts"]
-        assert cfg["hosts"]["hermes.heimdall"]["aiPeer"] == "ssi_health"
-        assert cfg["hosts"]["hermes.heimdall"]["workspace"] == "hermes"
+        assert cfg["hosts"]["hermes_heimdall"]["aiPeer"] == "ssi_health"
+        assert cfg["hosts"]["hermes_heimdall"]["workspace"] == "hermes"
 
     def test_does_not_overwrite_existing_honcho_host_on_rename(self, profile_env):
         tmp_path = profile_env
@@ -782,7 +782,7 @@ class TestRenameProfile:
         honcho_path.write_text(json.dumps({
             "hosts": {
                 "hermes.ssi_health": {"aiPeer": "ssi_health"},
-                "hermes.heimdall": {"aiPeer": "heimdall"},
+                "hermes_heimdall": {"aiPeer": "heimdall"},
             }
         }))
 
@@ -791,7 +791,7 @@ class TestRenameProfile:
 
         cfg = json.loads(honcho_path.read_text())
         assert cfg["hosts"]["hermes.ssi_health"]["aiPeer"] == "ssi_health"
-        assert cfg["hosts"]["hermes.heimdall"]["aiPeer"] == "heimdall"
+        assert cfg["hosts"]["hermes_heimdall"]["aiPeer"] == "heimdall"
 
     def test_default_raises_value_error(self, profile_env):
         with pytest.raises(ValueError, match="default"):
diff --git a/tests/hermes_cli/test_prompt_size.py b/tests/hermes_cli/test_prompt_size.py
new file mode 100644
index 00000000000..bd75c6df142
--- /dev/null
+++ b/tests/hermes_cli/test_prompt_size.py
@@ -0,0 +1,118 @@
+"""Tests for the ``hermes prompt-size`` diagnostic (issue #34667)."""
+
+import json
+
+import pytest
+
+from hermes_cli.prompt_size import (
+    _SKILLS_BLOCK_RE,
+    compute_prompt_breakdown,
+    render_breakdown,
+)
+
+
+def _seed_memory(hermes_home, memory_text="", user_text=""):
+    mem_dir = hermes_home / "memories"
+    mem_dir.mkdir(parents=True, exist_ok=True)
+    if memory_text:
+        (mem_dir / "MEMORY.md").write_text(memory_text, encoding="utf-8")
+    if user_text:
+        (mem_dir / "USER.md").write_text(user_text, encoding="utf-8")
+
+
+def _seed_skill(hermes_home, name, description):
+    skill_dir = hermes_home / "skills" / "demo" / name
+    skill_dir.mkdir(parents=True, exist_ok=True)
+    (skill_dir / "SKILL.md").write_text(
+        f"---\nname: {name}\ndescription: {description}\n---\n# {name}\nbody\n",
+        encoding="utf-8",
+    )
+
+
+@pytest.fixture
+def isolated_home(tmp_path, monkeypatch):
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.chdir(tmp_path)  # avoid picking up the repo's AGENTS.md
+    return hermes_home
+
+
+def test_breakdown_keys_and_shape(isolated_home):
+    """The breakdown exposes every documented key with int byte/char counts."""
+    data = compute_prompt_breakdown("cli")
+    assert set(data) >= {
+        "platform",
+        "model",
+        "system_prompt",
+        "skills_index",
+        "memory",
+        "user_profile",
+        "tools",
+        "sections",
+    }
+    assert data["platform"] == "cli"
+    for key in ("system_prompt", "skills_index", "memory", "user_profile"):
+        assert data[key]["bytes"] >= 0
+        assert data[key]["chars"] >= 0
+    assert data["tools"]["count"] >= 0
+    assert data["tools"]["json_bytes"] >= 0
+    # System prompt is non-trivial even with empty home (identity + guidance).
+    assert data["system_prompt"]["bytes"] > 0
+
+
+def test_runs_offline_without_credentials(isolated_home, monkeypatch):
+    """No provider credentials configured → still produces a breakdown."""
+    for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "NOUS_API_KEY",
+                "ANTHROPIC_API_KEY"):
+        monkeypatch.delenv(var, raising=False)
+    data = compute_prompt_breakdown("cli")
+    assert data["system_prompt"]["bytes"] > 0
+
+
+def test_skills_index_reflects_installed_skills(isolated_home):
+    """Installing a skill makes the skills-index block non-empty.
+
+    Note: the skills prompt is cached per-process (in-process LRU + disk
+    snapshot), so we seed the skill BEFORE the first build rather than
+    comparing before/after within one process.
+    """
+    _seed_skill(isolated_home, "hello", "a demo skill for size testing")
+    data = compute_prompt_breakdown("cli")
+    assert data["skills_index"]["bytes"] > 0
+
+
+def test_memory_and_profile_are_attributed(isolated_home):
+    """Memory and user-profile blocks are measured separately."""
+    _seed_memory(
+        isolated_home,
+        memory_text="Project uses pytest.\n",
+        user_text="User is a developer.\n",
+    )
+    data = compute_prompt_breakdown("cli")
+    assert data["memory"]["bytes"] > 0
+    assert data["user_profile"]["bytes"] > 0
+
+
+def test_skills_block_regex_matches_tagged_block():
+    text = "preamble\n<available_skills>\n  cat:\n    - a: b\n</available_skills>\ntail"
+    m = _SKILLS_BLOCK_RE.search(text)
+    assert m is not None
+    assert m.group(0).startswith("<available_skills>")
+    assert m.group(0).endswith("</available_skills>")
+
+
+def test_render_breakdown_is_plain_text(isolated_home):
+    data = compute_prompt_breakdown("cli")
+    out = render_breakdown(data)
+    assert "System prompt total" in out
+    assert "skills index" in out
+    assert "Tool schemas" in out
+    # Plain text — no JSON braces leaking in.
+    assert not out.strip().startswith("{")
+
+
+def test_json_serializable(isolated_home):
+    data = compute_prompt_breakdown("cli")
+    # Round-trips cleanly for ``--json`` output.
+    assert json.loads(json.dumps(data)) == json.loads(json.dumps(data))
diff --git a/tests/hermes_cli/test_provider_groups.py b/tests/hermes_cli/test_provider_groups.py
new file mode 100644
index 00000000000..561ad4eac3a
--- /dev/null
+++ b/tests/hermes_cli/test_provider_groups.py
@@ -0,0 +1,118 @@
+"""Tests for provider-group folding (display-only picker grouping).
+
+These are invariant tests, not catalog snapshots: they assert how
+``group_providers`` folds a flat slug list and how member slugs relate to
+``PROVIDER_GROUPS`` / ``CANONICAL_PROVIDERS`` — not the specific set of
+vendors, which is expected to change over time.
+"""
+
+from hermes_cli.models import (
+    CANONICAL_PROVIDERS,
+    PROVIDER_GROUPS,
+    group_providers,
+    provider_group_for_slug,
+)
+
+
+def _slugs(rows):
+    """Flatten picker rows back to the concrete slugs they expose."""
+    out = []
+    for r in rows:
+        if r["kind"] == "single":
+            out.append(r["slug"])
+        else:
+            out.extend(r["members"])
+    return out
+
+
+def test_groups_reference_real_canonical_slugs():
+    """Every group member must be an actual provider slug. Guards typos and
+    stale group entries after a provider is renamed/removed."""
+    canonical = {p.slug for p in CANONICAL_PROVIDERS}
+    for gid, (label, members) in PROVIDER_GROUPS.items():
+        assert label, f"group {gid} has empty label"
+        assert len(members) >= 1
+        for m in members:
+            assert m in canonical, f"group {gid} member {m!r} is not a canonical slug"
+
+
+def test_member_slugs_are_unique_across_groups():
+    """A slug may belong to at most one group."""
+    seen = {}
+    for gid, (_label, members) in PROVIDER_GROUPS.items():
+        for m in members:
+            assert m not in seen, f"{m!r} in both {seen[m]!r} and {gid!r}"
+            seen[m] = gid
+
+
+def test_reverse_index_matches_groups():
+    for gid, (_label, members) in PROVIDER_GROUPS.items():
+        for m in members:
+            assert provider_group_for_slug(m) == gid
+    assert provider_group_for_slug("openrouter") == ""
+    assert provider_group_for_slug("") == ""
+
+
+def test_ungrouped_providers_pass_through_in_order():
+    rows = group_providers(["nous", "openrouter", "deepseek"])
+    assert all(r["kind"] == "single" for r in rows)
+    assert [r["slug"] for r in rows] == ["nous", "openrouter", "deepseek"]
+
+
+def test_multi_member_group_folds_to_one_row():
+    rows = group_providers(["minimax", "minimax-oauth", "minimax-cn"])
+    assert len(rows) == 1
+    row = rows[0]
+    assert row["kind"] == "group"
+    assert row["group_id"] == "minimax"
+    assert row["members"] == ["minimax", "minimax-oauth", "minimax-cn"]
+
+
+def test_group_appears_at_first_member_position():
+    """The group row takes the slot of its earliest-listed present member,
+    and later members do not re-emit."""
+    rows = group_providers(["nous", "minimax", "deepseek", "minimax-cn"])
+    kinds = [(r["kind"], r.get("group_id") or r.get("slug")) for r in rows]
+    assert kinds == [
+        ("single", "nous"),
+        ("group", "minimax"),
+        ("single", "deepseek"),
+    ]
+    # both minimax members folded into the single group row
+    assert rows[1]["members"] == ["minimax", "minimax-cn"]
+
+
+def test_single_present_member_degrades_to_single_row():
+    """A group with only one present member shows no submenu."""
+    rows = group_providers(["xai"])  # xai-oauth absent
+    assert len(rows) == 1
+    assert rows[0]["kind"] == "single"
+    assert rows[0]["slug"] == "xai"
+
+
+def test_member_order_follows_declaration_not_input():
+    """Inside a folded group, members are ordered by PROVIDER_GROUPS, not by
+    the order they appeared in the input list."""
+    rows = group_providers(["minimax-cn", "minimax", "minimax-oauth"])
+    assert rows[0]["members"] == ["minimax", "minimax-oauth", "minimax-cn"]
+
+
+def test_duplicate_slugs_ignored():
+    rows = group_providers(["nous", "nous", "minimax", "minimax"])
+    assert [r.get("slug") or r["group_id"] for r in rows] == ["nous", "minimax"]
+
+
+def test_fold_is_lossless_for_present_slugs():
+    """Every input slug (deduped) must still be reachable through the folded
+    rows — grouping hides nothing."""
+    flat = [p.slug for p in CANONICAL_PROVIDERS]
+    rows = group_providers(flat)
+    assert set(_slugs(rows)) == set(flat)
+
+
+def test_canonical_fold_row_count_shrinks():
+    """Folding the full canonical list produces fewer top-level rows than the
+    flat list (proves grouping actually consolidates)."""
+    flat = [p.slug for p in CANONICAL_PROVIDERS]
+    rows = group_providers(flat)
+    assert len(rows) < len(flat)
diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py
index cfef9c3b46a..e93ad8fcaf3 100644
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -757,8 +757,68 @@ def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
     assert config["web"]["backend"] == "firecrawl"
     assert config["tts"]["provider"] == "openai"
     assert config["browser"]["cloud_provider"] == "browser-use"
+    assert config["image_gen"]["use_gateway"] is True
     assert configured == []
 
+
+def test_first_install_nous_auto_configures_video_gen(monkeypatch):
+    """When a Nous subscriber checks video_gen in the toolset checklist,
+    apply_nous_managed_defaults must write video_gen.provider and
+    video_gen.use_gateway so the FAL plugin can route through the gateway
+    at runtime.  Regression test for the bug where video_gen was marked as
+    auto-configured but no config was actually written."""
+    monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
+    config = {
+        "model": {"provider": "nous"},
+        "platform_toolsets": {"cli": []},
+    }
+    for env_var in (
+        "VOICE_TOOLS_OPENAI_KEY",
+        "OPENAI_API_KEY",
+        "ELEVENLABS_API_KEY",
+        "FIRECRAWL_API_KEY",
+        "FIRECRAWL_API_URL",
+        "TAVILY_API_KEY",
+        "PARALLEL_API_KEY",
+        "BROWSERBASE_API_KEY",
+        "BROWSERBASE_PROJECT_ID",
+        "BROWSER_USE_API_KEY",
+        "FAL_KEY",
+    ):
+        monkeypatch.delenv(env_var, raising=False)
+
+    monkeypatch.setattr(
+        "hermes_cli.tools_config._prompt_toolset_checklist",
+        lambda *args, **kwargs: {"video_gen"},
+    )
+    monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None)
+    monkeypatch.setattr(
+        "hermes_cli.tools_config._get_enabled_platforms",
+        lambda: ["cli"],
+    )
+    monkeypatch.setattr(
+        "hermes_cli.nous_subscription.get_nous_portal_account_info",
+        lambda *args, **kwargs: NousPortalAccountInfo(
+            logged_in=True,
+            source="jwt",
+            fresh=False,
+            paid_service_access=True,
+        ),
+    )
+
+    configured = []
+    monkeypatch.setattr(
+        "hermes_cli.tools_config._configure_toolset",
+        lambda ts_key, config: configured.append(ts_key),
+    )
+
+    tools_command(first_install=True, config=config)
+
+    assert config["video_gen"]["provider"] == "fal"
+    assert config["video_gen"]["use_gateway"] is True
+    # video_gen should NOT appear in the manual configure list — it's auto-configured
+    assert "video_gen" not in configured
+
 # ── Platform / toolset consistency ────────────────────────────────────────────
 
 
diff --git a/tests/hermes_cli/test_tui_resume_flow.py b/tests/hermes_cli/test_tui_resume_flow.py
index bcf552a8f10..d15d67c0071 100644
--- a/tests/hermes_cli/test_tui_resume_flow.py
+++ b/tests/hermes_cli/test_tui_resume_flow.py
@@ -638,6 +638,60 @@ def test_oneshot_rejects_invalid_only_toolsets(monkeypatch, capsys):
     assert "did not contain any valid toolsets" in err
 
 
+def test_oneshot_fails_closed_on_empty_final_response(monkeypatch, capsys):
+    _stub_plugin_discovery(monkeypatch)
+    import hermes_cli.oneshot as oneshot_mod
+
+    monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "")
+
+    assert oneshot_mod.run_oneshot("hello") == 1
+    captured = capsys.readouterr()
+    assert captured.out == ""
+    assert "no final response" in captured.err
+
+
+def test_oneshot_prints_nonempty_final_response(monkeypatch, capsys):
+    _stub_plugin_discovery(monkeypatch)
+    import hermes_cli.oneshot as oneshot_mod
+
+    monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "done")
+
+    assert oneshot_mod.run_oneshot("hello") == 0
+    captured = capsys.readouterr()
+    assert captured.out == "done\n"
+    assert captured.err == ""
+
+
+def test_oneshot_fails_closed_on_agent_exception(monkeypatch, capsys):
+    _stub_plugin_discovery(monkeypatch)
+    import hermes_cli.oneshot as oneshot_mod
+
+    def _boom(*_args, **_kwargs):
+        raise OSError("not a TTY")
+
+    monkeypatch.setattr(oneshot_mod, "_run_agent", _boom)
+
+    assert oneshot_mod.run_oneshot("hello") == 1
+    captured = capsys.readouterr()
+    assert captured.out == ""
+    assert "agent failed" in captured.err
+    assert "not a TTY" in captured.err
+
+
+def test_oneshot_reraises_keyboard_interrupt(monkeypatch):
+    _stub_plugin_discovery(monkeypatch)
+    import hermes_cli.oneshot as oneshot_mod
+    import pytest as _pytest
+
+    def _interrupt(*_args, **_kwargs):
+        raise KeyboardInterrupt
+
+    monkeypatch.setattr(oneshot_mod, "_run_agent", _interrupt)
+
+    with _pytest.raises(KeyboardInterrupt):
+        oneshot_mod.run_oneshot("hello")
+
+
 def test_oneshot_filters_invalid_toolsets_before_redirect(monkeypatch, capsys):
     _stub_plugin_discovery(monkeypatch)
     from hermes_cli.oneshot import _validate_explicit_toolsets
diff --git a/tests/hermes_cli/test_update_concurrent_quarantine.py b/tests/hermes_cli/test_update_concurrent_quarantine.py
index bddc0071e46..fe14856fd7e 100644
--- a/tests/hermes_cli/test_update_concurrent_quarantine.py
+++ b/tests/hermes_cli/test_update_concurrent_quarantine.py
@@ -128,24 +128,31 @@ def test_detect_concurrent_is_noop_off_windows(_winp, tmp_path):
 def _fake_psutil_with_parent_chain(
     parent_chain: list[int],
     proc_iter_rows: list,
+    *,
+    ancestor_exe: str | None = None,
 ):
-    """Build a psutil stand-in that has Process()/parent() AND process_iter().
+    """Build a psutil stand-in that has Process()/parents()/exe() AND process_iter().
 
-    ``parent_chain`` is the list of PIDs returned by successive ``.parent()``
-    calls starting from the seed (``os.getpid()``); the last entry's
-    ``.parent()`` returns ``None`` to terminate the walk.
+    ``parent_chain`` is the ordered list of ancestor PIDs (closest first)
+    returned by ``proc.parents()`` on the seed (``os.getpid()``).
+    ``ancestor_exe`` is the executable path reported by each ancestor's
+    ``.exe()``; when it matches one of our shim paths the ancestor is
+    excluded (the launcher-shim case). Pass ``None`` to model an ancestor
+    whose exe can't be read (psutil error) — it stays in the candidate set.
     """
 
     class _FakeProc:
-        def __init__(self, pid: int, chain: list[int]):
+        def __init__(self, pid: int, exe_path: str | None):
             self.pid = pid
-            self._chain = chain
+            self._exe = exe_path
 
-        def parent(self):
-            if not self._chain:
-                return None
-            next_pid = self._chain[0]
-            return _FakeProc(next_pid, self._chain[1:])
+        def exe(self):
+            if self._exe is None:
+                raise OSError("exe unavailable")
+            return self._exe
+
+        def parents(self):
+            return [_FakeProc(p, ancestor_exe) for p in parent_chain]
 
     class _NoSuchProcess(Exception):
         pass
@@ -153,8 +160,8 @@ def _fake_psutil_with_parent_chain(
     class _AccessDenied(Exception):
         pass
 
-    def _process(pid):
-        return _FakeProc(pid, list(parent_chain))
+    def _process(pid=None):
+        return _FakeProc(pid if pid is not None else os.getpid(), ancestor_exe)
 
     return types.SimpleNamespace(
         Process=_process,
@@ -185,6 +192,7 @@ def test_detect_concurrent_excludes_parent_chain(_winp, tmp_path):
     fake_psutil = _fake_psutil_with_parent_chain(
         parent_chain=[launcher_pid],
         proc_iter_rows=rows,
+        ancestor_exe=str(shim),
     )
     with patch.dict(sys.modules, {"psutil": fake_psutil}):
         result = cli_main._detect_concurrent_hermes_instances(scripts_dir)
@@ -211,6 +219,7 @@ def test_detect_concurrent_still_finds_unrelated_other_hermes(_winp, tmp_path):
     fake_psutil = _fake_psutil_with_parent_chain(
         parent_chain=[launcher_pid],
         proc_iter_rows=rows,
+        ancestor_exe=str(shim),
     )
     with patch.dict(sys.modules, {"psutil": fake_psutil}):
         result = cli_main._detect_concurrent_hermes_instances(scripts_dir)
@@ -238,6 +247,7 @@ def test_detect_concurrent_parent_chain_walks_deep(_winp, tmp_path):
     fake_psutil = _fake_psutil_with_parent_chain(
         parent_chain=[parent_pid, grandparent_pid, greatgrandparent_pid],
         proc_iter_rows=rows,
+        ancestor_exe=str(shim),
     )
     with patch.dict(sys.modules, {"psutil": fake_psutil}):
         result = cli_main._detect_concurrent_hermes_instances(scripts_dir)
@@ -246,25 +256,38 @@ def test_detect_concurrent_parent_chain_walks_deep(_winp, tmp_path):
 
 
 @patch.object(cli_main, "_is_windows", return_value=True)
-def test_detect_concurrent_parent_walk_handles_cycle(_winp, tmp_path):
-    """A PID cycle in the parent chain must not hang the walk."""
+def test_detect_concurrent_parents_call_robust_to_one_bad_hop(_winp, tmp_path):
+    """The launcher shim is still excluded even when an ancestor exe is unreadable.
+
+    Field regression (issues #29341, #34795): the old per-hop ``parent()``
+    walk bailed on the FIRST psutil error, so an AccessDenied on any hop left
+    the launcher shim in the candidate set and re-triggered the false
+    positive. ``parents()`` returns the whole list at once; we evaluate each
+    ancestor independently, so one unreadable hop never strands the launcher.
+    """
     scripts_dir = tmp_path
     shim = scripts_dir / "hermes.exe"
     shim.write_bytes(b"")
     me = os.getpid()
-    bogus_loop_pid = me + 1
+    launcher_pid = me + 100
 
-    rows = [_make_proc(me, str(shim), "python.exe")]
-    # Chain that points back to ``me`` — the loop-detection branch must break.
+    rows = [
+        _make_proc(me, str(shim), "python.exe"),
+        _make_proc(launcher_pid, str(shim), "hermes.exe"),
+    ]
+    # ancestor_exe=None → every ancestor's .exe() raises OSError. The helper
+    # must swallow it per-ancestor and not crash; the launcher won't be
+    # excluded in this degenerate case, but a real run reads the shim exe.
     fake_psutil = _fake_psutil_with_parent_chain(
-        parent_chain=[bogus_loop_pid, me, bogus_loop_pid],
+        parent_chain=[launcher_pid],
         proc_iter_rows=rows,
+        ancestor_exe=None,
     )
     with patch.dict(sys.modules, {"psutil": fake_psutil}):
         result = cli_main._detect_concurrent_hermes_instances(scripts_dir)
 
-    # No crash, no hang; self + bogus_loop_pid excluded; no others reported.
-    assert result == []
+    # No crash; helper completes. (Degenerate stub: launcher exe unreadable.)
+    assert result == [(launcher_pid, "hermes.exe")]
 
 
 @patch.object(cli_main, "_is_windows", return_value=True)
@@ -310,6 +333,11 @@ def test_format_message_mentions_pids_and_remediation(tmp_path):
     assert "--force" in msg
     # Mentions the file that would have been overwritten
     assert str(tmp_path / "hermes.exe") in msg
+    # Self-service kill command targets the exact stale PIDs (issue #34795).
+    assert "taskkill" in msg
+    assert "/PID 1234" in msg
+    assert "/PID 5678" in msg
+    assert "/F" in msg
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/hermes_cli/test_uv_tool_update.py b/tests/hermes_cli/test_uv_tool_update.py
new file mode 100644
index 00000000000..b5905c9b7fd
--- /dev/null
+++ b/tests/hermes_cli/test_uv_tool_update.py
@@ -0,0 +1,311 @@
+"""Tests for uv-tool install detection in the update path (issue #29700).
+
+``uv tool install hermes-agent`` lives outside any venv, so the previous
+``uv pip install --upgrade`` update path failed with ``No virtual
+environment found``. ``is_uv_tool_install`` should detect this layout and
+both the user-facing recommended command and the actual
+``_cmd_update_pip`` subprocess invocation should switch to
+``uv tool upgrade hermes-agent``.
+
+Detection is restricted to properties of the running interpreter
+(``sys.prefix`` / ``sys.executable``) so a pip/venv install on a machine
+that also has ``uv tool install hermes-agent`` does not get misclassified.
+"""
+from __future__ import annotations
+
+import subprocess
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# is_uv_tool_install
+# ---------------------------------------------------------------------------
+
+
+class TestIsUvToolInstall:
+    def test_returns_true_when_sys_prefix_matches_uv_tool_layout(self):
+        from hermes_cli import config
+
+        with patch.object(config.sys, "prefix", "/home/user/.local/share/uv/tools/hermes-agent"):
+            assert config.is_uv_tool_install() is True
+
+    def test_returns_true_when_sys_executable_matches_uv_tool_layout(self):
+        """Some uv-tool layouts surface the marker on ``sys.executable`` (bin/python)."""
+        from hermes_cli import config
+
+        with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
+             patch.object(
+                 config.sys,
+                 "executable",
+                 "/home/user/.local/share/uv/tools/hermes-agent/bin/python",
+             ):
+            assert config.is_uv_tool_install() is True
+
+    def test_returns_false_when_neither_prefix_nor_executable_matches(self):
+        from hermes_cli import config
+
+        with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
+             patch.object(config.sys, "executable", "/usr/bin/python3"):
+            assert config.is_uv_tool_install() is False
+
+    def test_does_not_consult_uv_tool_list(self):
+        """Detection must NOT shell out: ``uv tool list`` would false-positive
+        when the active install is pip/venv but the machine also has
+        ``uv tool install hermes-agent`` somewhere on disk. Copilot review on
+        PR #29703 flagged this; the fix is to never call ``uv tool list``
+        from the detection path."""
+        from hermes_cli import config
+
+        with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
+             patch.object(config.sys, "executable", "/usr/bin/python3"), \
+             patch("subprocess.run") as mock_run:
+            assert config.is_uv_tool_install() is False
+            mock_run.assert_not_called()
+
+    def test_case_insensitive_match(self):
+        """Match must be case-insensitive — Windows paths preserve case
+        (e.g. ``...AppData\\Local\\UV\\Tools\\hermes-agent``) and a case-sensitive
+        check would miss them. We exercise the lower-cased compare path here
+        without monkey-patching ``os.sep``, which would break the whole suite."""
+        from hermes_cli import config
+
+        with patch.object(
+            config.sys, "prefix", "/HOME/USER/.local/share/UV/Tools/hermes-agent"
+        ):
+            assert config.is_uv_tool_install() is True
+
+    def test_handles_empty_executable(self):
+        from hermes_cli import config
+
+        with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
+             patch.object(config.sys, "executable", ""):
+            assert config.is_uv_tool_install() is False
+
+
+# ---------------------------------------------------------------------------
+# recommended_update_command_for_method
+# ---------------------------------------------------------------------------
+
+
+class TestRecommendedUpdateCommandForUvTool:
+    def test_uv_tool_install_recommends_uv_tool_upgrade(self):
+        from hermes_cli import config
+
+        with patch("shutil.which", return_value="/usr/local/bin/uv"), \
+             patch.object(config, "is_uv_tool_install", return_value=True):
+            cmd = config.recommended_update_command_for_method("pip")
+            assert cmd == "uv tool upgrade hermes-agent"
+
+    def test_uv_tool_install_recommends_uv_tool_upgrade_even_without_uv_on_path(self):
+        """Recommendation reflects the *install method*, not whether ``uv`` is
+        currently on PATH — the user needs to know the right command to run."""
+        from hermes_cli import config
+
+        with patch("shutil.which", return_value=None), \
+             patch.object(config, "is_uv_tool_install", return_value=True):
+            cmd = config.recommended_update_command_for_method("pip")
+            assert cmd == "uv tool upgrade hermes-agent"
+
+    def test_uv_pip_install_keeps_legacy_recommendation(self):
+        """Existing behavior: uv is on PATH but Hermes is a regular pip install."""
+        from hermes_cli import config
+
+        with patch("shutil.which", return_value="/usr/local/bin/uv"), \
+             patch.object(config, "is_uv_tool_install", return_value=False):
+            cmd = config.recommended_update_command_for_method("pip")
+            assert cmd == "uv pip install --upgrade hermes-agent"
+
+    def test_no_uv_falls_back_to_plain_pip(self):
+        from hermes_cli import config
+
+        with patch("shutil.which", return_value=None), \
+             patch.object(config, "is_uv_tool_install", return_value=False):
+            cmd = config.recommended_update_command_for_method("pip")
+            assert cmd == "pip install --upgrade hermes-agent"
+
+    def test_recommendation_does_not_spawn_subprocess(self):
+        """Computing the recommendation string must be cheap — no ``uv tool list``
+        spawn. Copilot review on PR #29703 flagged the prior subprocess hop
+        as adding overhead and a multi-second timeout window for what is
+        purely a display string."""
+        from hermes_cli import config
+
+        with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
+             patch.object(config.sys, "executable", "/usr/bin/python3"), \
+             patch("shutil.which", return_value="/usr/local/bin/uv"), \
+             patch("subprocess.run") as mock_run:
+            cmd = config.recommended_update_command_for_method("pip")
+            mock_run.assert_not_called()
+            assert cmd == "uv pip install --upgrade hermes-agent"
+
+
+# ---------------------------------------------------------------------------
+# _cmd_update_pip subprocess command
+# ---------------------------------------------------------------------------
+
+
+class TestCmdUpdatePipUsesUvTool:
+    @patch("subprocess.run")
+    def test_runs_uv_tool_upgrade_when_uv_tool_install(self, mock_run):
+        """The actual subprocess invocation must switch to ``uv tool upgrade``."""
+        from hermes_cli.main import _cmd_update_pip
+
+        mock_run.return_value = subprocess.CompletedProcess(["uv"], 0, stdout="", stderr="")
+        with patch("shutil.which", return_value="/usr/local/bin/uv"), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=True):
+            _cmd_update_pip(SimpleNamespace())
+
+        assert mock_run.call_args[0][0] == ["/usr/local/bin/uv", "tool", "upgrade", "hermes-agent"]
+
+    @patch("subprocess.run")
+    def test_runs_uv_pip_install_when_not_uv_tool(self, mock_run):
+        """Existing behavior preserved when uv is present but Hermes isn't a tool install."""
+        from hermes_cli.main import _cmd_update_pip
+
+        mock_run.return_value = subprocess.CompletedProcess(["uv"], 0, stdout="", stderr="")
+        with patch("shutil.which", return_value="/usr/local/bin/uv"), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=False):
+            _cmd_update_pip(SimpleNamespace())
+
+        assert mock_run.call_args[0][0] == [
+            "/usr/local/bin/uv",
+            "pip",
+            "install",
+            "--upgrade",
+            "hermes-agent",
+        ]
+
+    @patch("subprocess.run")
+    def test_falls_back_to_pip_when_no_uv(self, mock_run):
+        from hermes_cli.main import _cmd_update_pip
+
+        mock_run.return_value = subprocess.CompletedProcess(["pip"], 0, stdout="", stderr="")
+        with patch("shutil.which", return_value=None), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=False):
+            _cmd_update_pip(SimpleNamespace())
+
+        cmd = mock_run.call_args[0][0]
+        assert cmd[1:] == ["-m", "pip", "install", "--upgrade", "hermes-agent"]
+
+    @patch("subprocess.run")
+    def test_exits_nonzero_on_subprocess_failure(self, mock_run):
+        from hermes_cli.main import _cmd_update_pip
+
+        mock_run.return_value = subprocess.CompletedProcess(["uv"], 1, stdout="", stderr="")
+        with patch("shutil.which", return_value="/usr/local/bin/uv"), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=True):
+            with pytest.raises(SystemExit) as exc_info:
+                _cmd_update_pip(SimpleNamespace())
+        assert exc_info.value.code == 1
+
+    @patch("subprocess.run")
+    def test_uv_tool_install_without_uv_on_path_exits_with_hint(self, mock_run):
+        """If the running interpreter looks like a uv-tool install but ``uv`` is
+        somehow missing from PATH, surface a clear hint instead of silently
+        falling back to ``python -m pip``, which would either fail (no venv)
+        or upgrade the wrong copy."""
+        from hermes_cli.main import _cmd_update_pip
+
+        with patch("shutil.which", return_value=None), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=True):
+            with pytest.raises(SystemExit) as exc_info:
+                _cmd_update_pip(SimpleNamespace())
+        assert exc_info.value.code == 1
+        mock_run.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# pipx-managed installs, --system fallback, and VIRTUAL_ENV overlay
+# (issue #29700 / #35031 family — consolidated update-path handling)
+# ---------------------------------------------------------------------------
+
+
+class TestCmdUpdatePipInstallLayouts:
+    """The uv pip path must adapt to where the running interpreter lives:
+
+    - inside a venv (launcher shim)  -> export VIRTUAL_ENV, no ``--system``
+    - bare pip outside any venv      -> add ``--system``, no overlay
+    - pipx-managed                   -> ``pipx upgrade``
+    """
+
+    @patch("subprocess.run")
+    def test_pipx_managed_uses_pipx_upgrade(self, mock_run, monkeypatch):
+        from hermes_cli import main as hm
+
+        mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        monkeypatch.setattr(hm.sys, "prefix", "/home/u/.local/pipx/venvs/hermes-agent")
+        monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
+
+        def _which(name):
+            return {"uv": "/usr/bin/uv", "pipx": "/usr/bin/pipx"}.get(name)
+
+        with patch("shutil.which", side_effect=_which), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=False):
+            hm._cmd_update_pip(SimpleNamespace())
+
+        assert mock_run.call_args[0][0] == ["/usr/bin/pipx", "upgrade", "hermes-agent"]
+        # pipx upgrade ignores VIRTUAL_ENV; we must not set it.
+        assert "env" not in mock_run.call_args.kwargs
+
+    @patch("subprocess.run")
+    def test_pipx_layout_without_pipx_binary_treated_as_venv(
+        self, mock_run, monkeypatch
+    ):
+        from hermes_cli import main as hm
+
+        mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        monkeypatch.setattr(hm.sys, "prefix", "/home/u/.local/pipx/venvs/hermes-agent")
+        monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
+
+        # pipx layout detected via prefix, but pipx binary missing on PATH.
+        def _which(name):
+            return "/usr/bin/uv" if name == "uv" else None
+
+        with patch("shutil.which", side_effect=_which), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=False):
+            hm._cmd_update_pip(SimpleNamespace())
+
+        # prefix != base_prefix, so this is treated as a venv -> overlay, no --system.
+        assert mock_run.call_args[0][0] == [
+            "/usr/bin/uv", "pip", "install", "--upgrade", "hermes-agent",
+        ]
+        assert mock_run.call_args.kwargs["env"]["VIRTUAL_ENV"].endswith("hermes-agent")
+
+    @patch("subprocess.run")
+    def test_bare_pip_outside_venv_adds_system(self, mock_run, monkeypatch):
+        from hermes_cli import main as hm
+
+        mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        # No venv: prefix == base_prefix.
+        monkeypatch.setattr(hm.sys, "prefix", "/usr")
+        monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
+
+        with patch("shutil.which", return_value="/usr/bin/uv"), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=False):
+            hm._cmd_update_pip(SimpleNamespace())
+
+        assert mock_run.call_args[0][0] == [
+            "/usr/bin/uv", "pip", "install", "--system", "--upgrade", "hermes-agent",
+        ]
+        assert "env" not in mock_run.call_args.kwargs
+
+    @patch("subprocess.run")
+    def test_venv_exports_virtualenv_and_omits_system(self, mock_run, monkeypatch):
+        from hermes_cli import main as hm
+
+        mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        monkeypatch.delenv("VIRTUAL_ENV", raising=False)
+        monkeypatch.setattr(hm.sys, "prefix", "/home/u/.hermes/hermes-agent/venv")
+        monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
+
+        with patch("shutil.which", return_value="/usr/bin/uv"), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=False):
+            hm._cmd_update_pip(SimpleNamespace())
+
+        cmd = mock_run.call_args[0][0]
+        assert "--system" not in cmd
+        assert cmd == ["/usr/bin/uv", "pip", "install", "--upgrade", "hermes-agent"]
+        assert mock_run.call_args.kwargs["env"]["VIRTUAL_ENV"] == "/home/u/.hermes/hermes-agent/venv"
diff --git a/tests/honcho_plugin/test_async_memory.py b/tests/honcho_plugin/test_async_memory.py
index 97f4f7306d5..e1f2f5ea97b 100644
--- a/tests/honcho_plugin/test_async_memory.py
+++ b/tests/honcho_plugin/test_async_memory.py
@@ -249,9 +249,12 @@ class TestFlushAll:
         mgr = _make_manager(write_frequency="async")
         sess = _make_session()
         sess.add_message("user", "pending")
-        mgr._async_queue.put(sess)
 
         with patch.object(mgr, "_flush_session") as mock_flush:
+            # Put the item AFTER the mock is installed so the background
+            # writer thread (if it dequeues before flush_all) still hits
+            # the mock rather than the real _flush_session.
+            mgr._async_queue.put(sess)
             mgr.flush_all()
             # Called at least once for the queued item
             assert mock_flush.call_count >= 1
diff --git a/tests/honcho_plugin/test_cli.py b/tests/honcho_plugin/test_cli.py
index 8244badc2f6..74b7e1bc34e 100644
--- a/tests/honcho_plugin/test_cli.py
+++ b/tests/honcho_plugin/test_cli.py
@@ -1,6 +1,7 @@
 """Tests for plugins/memory/honcho/cli.py."""
 
 from types import SimpleNamespace
+import json
 
 
 class TestResolveApiKey:
@@ -100,6 +101,84 @@ class TestResolveApiKey:
                 f"expected local sentinel for legacy schemeless {legacy!r}"
 
 
+class TestCmdSetupLocalJwt:
+    """Local-deployment setup must allow configuring a JWT for AUTH_JWT_SECRET-backed Honcho servers."""
+
+    def _run_setup(self, monkeypatch, tmp_path, initial_cfg, prompt_answers):
+        import plugins.memory.honcho.cli as honcho_cli
+
+        # Avoid touching real config / SDK / filesystem.
+        cfg_path = tmp_path / "honcho.json"
+        monkeypatch.setattr(honcho_cli, "_read_config", lambda: dict(initial_cfg))
+        monkeypatch.setattr(honcho_cli, "_local_config_path", lambda: cfg_path)
+        monkeypatch.setattr(honcho_cli, "_config_path", lambda: cfg_path)
+        monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
+        monkeypatch.setattr(honcho_cli, "_ensure_sdk_installed", lambda: True)
+
+        written = {}
+
+        def _capture_write(cfg, path=None):
+            written["cfg"] = cfg
+            written["path"] = path
+
+        monkeypatch.setattr(honcho_cli, "_write_config", _capture_write)
+
+        # Feed scripted prompt answers in order.
+        answers = list(prompt_answers)
+
+        def _fake_prompt(label, default=None, secret=False):
+            if not answers:
+                # Default-through any remaining prompts to keep the wizard moving.
+                return default or ""
+            return answers.pop(0)
+
+        monkeypatch.setattr(honcho_cli, "_prompt", _fake_prompt)
+
+        honcho_cli.cmd_setup(SimpleNamespace())
+        return written.get("cfg")
+
+    def test_local_setup_stores_jwt_under_host_block(self, monkeypatch, tmp_path):
+        """Self-hosted users supplying a JWT must have it written under hosts.<host>.apiKey,
+        not as the top-level cloud apiKey, so cloud/hybrid switching is preserved and
+        get_honcho_client treats it as an explicit local auth opt-in."""
+        cfg = self._run_setup(
+            monkeypatch,
+            tmp_path,
+            initial_cfg={},
+            prompt_answers=[
+                "local",                       # deployment
+                "http://localhost:8000",       # base URL
+                "my-local-jwt-token",          # local JWT
+            ],
+        )
+        assert cfg is not None
+        assert cfg.get("baseUrl") == "http://localhost:8000"
+        # Top-level apiKey must remain unset (cloud field).
+        assert not cfg.get("apiKey")
+        # The new local JWT belongs under the host block.
+        host_block = (cfg.get("hosts") or {}).get("hermes") or {}
+        assert host_block.get("apiKey") == "my-local-jwt-token"
+
+    def test_local_setup_blank_jwt_keeps_local_no_auth(self, monkeypatch, tmp_path):
+        """Blank JWT prompt response on a fresh local config must not introduce an apiKey
+        anywhere (local no-auth Honcho deployments must still work out of the box)."""
+        cfg = self._run_setup(
+            monkeypatch,
+            tmp_path,
+            initial_cfg={},
+            prompt_answers=[
+                "local",
+                "http://localhost:8000",
+                "",  # blank JWT
+            ],
+        )
+        assert cfg is not None
+        assert cfg.get("baseUrl") == "http://localhost:8000"
+        assert not cfg.get("apiKey")
+        host_block = (cfg.get("hosts") or {}).get("hermes") or {}
+        assert not host_block.get("apiKey")
+
+
 class TestCmdStatus:
     def test_reports_connection_failure_when_session_setup_fails(self, monkeypatch, capsys, tmp_path):
         import plugins.memory.honcho.cli as honcho_cli
@@ -192,7 +271,7 @@ class TestCloneHonchoForProfile:
         honcho_cli, written = self._setup_clone_env(monkeypatch, tmp_path, cfg)
         ok = honcho_cli.clone_honcho_for_profile("coder")
         assert ok is True
-        new_block = written["cfg"]["hosts"]["hermes.coder"]
+        new_block = written["cfg"]["hosts"]["hermes_coder"]
         assert new_block["userPeerAliases"] == {"86701400": "eri", "discord-491827364": "eri"}
 
     def test_runtime_peer_prefix_carries_into_cloned_profile(self, monkeypatch, tmp_path):
@@ -208,7 +287,7 @@ class TestCloneHonchoForProfile:
         honcho_cli, written = self._setup_clone_env(monkeypatch, tmp_path, cfg)
         ok = honcho_cli.clone_honcho_for_profile("coder")
         assert ok is True
-        new_block = written["cfg"]["hosts"]["hermes.coder"]
+        new_block = written["cfg"]["hosts"]["hermes_coder"]
         assert new_block["runtimePeerPrefix"] == "telegram_"
 
     def test_pin_peer_name_carries_into_cloned_profile(self, monkeypatch, tmp_path):
@@ -224,7 +303,7 @@ class TestCloneHonchoForProfile:
         honcho_cli, written = self._setup_clone_env(monkeypatch, tmp_path, cfg)
         ok = honcho_cli.clone_honcho_for_profile("coder")
         assert ok is True
-        new_block = written["cfg"]["hosts"]["hermes.coder"]
+        new_block = written["cfg"]["hosts"]["hermes_coder"]
         assert new_block["pinPeerName"] is True
 
     def test_unset_identity_keys_do_not_appear_in_cloned_profile(self, monkeypatch, tmp_path):
@@ -235,7 +314,7 @@ class TestCloneHonchoForProfile:
         honcho_cli, written = self._setup_clone_env(monkeypatch, tmp_path, cfg)
         ok = honcho_cli.clone_honcho_for_profile("coder")
         assert ok is True
-        new_block = written["cfg"]["hosts"]["hermes.coder"]
+        new_block = written["cfg"]["hosts"]["hermes_coder"]
         assert "userPeerAliases" not in new_block
         assert "runtimePeerPrefix" not in new_block
         assert "pinPeerName" not in new_block
@@ -572,5 +651,5 @@ class TestCloneCarriesPinUserPeer:
 
         ok = honcho_cli.clone_honcho_for_profile("partner")
         assert ok is True
-        new_block = written["cfg"]["hosts"]["hermes.partner"]
+        new_block = written["cfg"]["hosts"]["hermes_partner"]
         assert new_block["pinUserPeer"] is True
diff --git a/tests/honcho_plugin/test_client.py b/tests/honcho_plugin/test_client.py
index a02e6937a34..929df4283f6 100644
--- a/tests/honcho_plugin/test_client.py
+++ b/tests/honcho_plugin/test_client.py
@@ -13,6 +13,7 @@ import pytest
 from plugins.memory.honcho.client import (
     HonchoClientConfig,
     get_honcho_client,
+    profile_host_key,
     reset_honcho_client,
     resolve_active_host,
     resolve_config_path,
@@ -430,6 +431,10 @@ class TestResolveConfigPath:
 
 
 class TestResolveActiveHost:
+    def test_profile_host_key_uses_honcho_safe_separator(self):
+        assert profile_host_key("coder") == "hermes_coder"
+        assert profile_host_key("default") == "hermes"
+
     def test_default_returns_hermes(self):
         with patch.dict(os.environ, {}, clear=True):
             os.environ.pop("HERMES_HONCHO_HOST", None)
@@ -444,7 +449,7 @@ class TestResolveActiveHost:
         with patch.dict(os.environ, {}, clear=False):
             os.environ.pop("HERMES_HONCHO_HOST", None)
             with patch("hermes_cli.profiles.get_active_profile_name", return_value="coder"):
-                assert resolve_active_host() == "hermes.coder"
+                assert resolve_active_host() == "hermes_coder"
 
     def test_default_profile_returns_hermes(self):
         with patch.dict(os.environ, {}, clear=False):
@@ -477,10 +482,10 @@ class TestResolveActiveHost:
 class TestProfileScopedConfig:
     def test_from_env_uses_profile_host(self):
         with patch.dict(os.environ, {"HONCHO_API_KEY": "key"}):
-            config = HonchoClientConfig.from_env(host="hermes.coder")
-        assert config.host == "hermes.coder"
+            config = HonchoClientConfig.from_env(host="hermes_coder")
+        assert config.host == "hermes_coder"
         assert config.workspace_id == "hermes"  # shared workspace
-        assert config.ai_peer == "hermes.coder"
+        assert config.ai_peer == "hermes_coder"
 
     def test_from_env_default_workspace_preserved_for_default_host(self):
         with patch.dict(os.environ, {"HONCHO_API_KEY": "key"}):
@@ -494,22 +499,35 @@ class TestProfileScopedConfig:
             "apiKey": "shared-key",
             "hosts": {
                 "hermes": {"aiPeer": "hermes", "peerName": "alice"},
-                "hermes.coder": {
-                    "aiPeer": "hermes.coder",
+                "hermes_coder": {
+                    "aiPeer": "hermes_coder",
                     "peerName": "alice-coder",
                     "workspace": "coder-ws",
                 },
             },
         }))
         config = HonchoClientConfig.from_global_config(
-            host="hermes.coder", config_path=config_file,
+            host="hermes_coder", config_path=config_file,
         )
-        assert config.host == "hermes.coder"
+        assert config.host == "hermes_coder"
         assert config.workspace_id == "coder-ws"
-        assert config.ai_peer == "hermes.coder"
+        assert config.ai_peer == "hermes_coder"
         assert config.peer_name == "alice-coder"
 
     def test_from_global_config_auto_resolves_host(self, tmp_path):
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({
+            "apiKey": "key",
+            "hosts": {
+                "hermes_dreamer": {"peerName": "dreamer-user"},
+            },
+        }))
+        with patch("plugins.memory.honcho.client.resolve_active_host", return_value="hermes_dreamer"):
+            config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.host == "hermes_dreamer"
+        assert config.peer_name == "dreamer-user"
+
+    def test_from_global_config_reads_legacy_dot_profile_host_block(self, tmp_path):
         config_file = tmp_path / "config.json"
         config_file.write_text(json.dumps({
             "apiKey": "key",
@@ -517,10 +535,13 @@ class TestProfileScopedConfig:
                 "hermes.dreamer": {"peerName": "dreamer-user"},
             },
         }))
-        with patch("plugins.memory.honcho.client.resolve_active_host", return_value="hermes.dreamer"):
-            config = HonchoClientConfig.from_global_config(config_path=config_file)
-        assert config.host == "hermes.dreamer"
+        config = HonchoClientConfig.from_global_config(
+            host="hermes_dreamer",
+            config_path=config_file,
+        )
+        assert config.host == "hermes_dreamer"
         assert config.peer_name == "dreamer-user"
+        assert config.workspace_id == "hermes_dreamer"
 
 
 class TestObservationModeMigration:
@@ -890,3 +911,176 @@ class TestDialecticDepthParsing:
         }))
         config = HonchoClientConfig.from_global_config(config_path=config_file)
         assert config.dialectic_depth_levels == ["low", "high"]
+
+
+class TestGetHonchoClientBaseUrlDoublePrefixFix:
+    """Regression tests for #20688 — Honcho SDK double-prefixing of /v3 for
+    self-hosted instances where base_url already contains a version path."""
+
+    def teardown_method(self):
+        reset_honcho_client()
+
+    @pytest.mark.skipif(
+        not importlib.util.find_spec("honcho"),
+        reason="honcho SDK not installed"
+    )
+    def test_local_base_url_with_v3_suffix_stripped(self):
+        """base_url 'http://localhost:38000/v3' must become 'http://localhost:38000'
+        before passing to the Honcho SDK to avoid double '/v3/v3' prefixing."""
+        fake_honcho = MagicMock(name="Honcho")
+        cfg = HonchoClientConfig(
+            api_key=None,
+            base_url="http://localhost:38000/v3",
+            workspace_id="hermes",
+            environment="production",
+        )
+
+        with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho, \
+             patch("hermes_cli.config.load_config", return_value={}):
+            get_honcho_client(cfg)
+
+        mock_honcho.assert_called_once()
+        passed_base_url = mock_honcho.call_args.kwargs.get("base_url")
+        assert passed_base_url == "http://localhost:38000", (
+            f"Expected 'http://localhost:38000', got {passed_base_url!r}"
+        )
+
+    @pytest.mark.skipif(
+        not importlib.util.find_spec("honcho"),
+        reason="honcho SDK not installed"
+    )
+    def test_local_base_url_without_version_unchanged(self):
+        """base_url 'http://localhost:38000' (no version) must be passed unchanged."""
+        fake_honcho = MagicMock(name="Honcho")
+        cfg = HonchoClientConfig(
+            api_key=None,
+            base_url="http://localhost:38000",
+            workspace_id="hermes",
+            environment="production",
+        )
+
+        with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho, \
+             patch("hermes_cli.config.load_config", return_value={}):
+            get_honcho_client(cfg)
+
+        mock_honcho.assert_called_once()
+        passed_base_url = mock_honcho.call_args.kwargs.get("base_url")
+        assert passed_base_url == "http://localhost:38000", (
+            f"Expected 'http://localhost:38000', got {passed_base_url!r}"
+        )
+
+    @pytest.mark.skipif(
+        not importlib.util.find_spec("honcho"),
+        reason="honcho SDK not installed"
+    )
+    def test_cloud_base_url_without_version_unchanged(self):
+        """A cloud base_url with no version segment must pass through untouched."""
+        fake_honcho = MagicMock(name="Honcho")
+        cfg = HonchoClientConfig(
+            api_key="cloud-key",
+            base_url="https://api.honcho.dev",
+            workspace_id="hermes",
+            environment="production",
+        )
+
+        with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho, \
+             patch("hermes_cli.config.load_config", return_value={}):
+            get_honcho_client(cfg)
+
+        mock_honcho.assert_called_once()
+        passed_base_url = mock_honcho.call_args.kwargs.get("base_url")
+        assert passed_base_url == "https://api.honcho.dev", (
+            f"Expected 'https://api.honcho.dev', got {passed_base_url!r}"
+        )
+
+    @pytest.mark.skipif(
+        not importlib.util.find_spec("honcho"),
+        reason="honcho SDK not installed"
+    )
+    def test_cloud_base_url_with_version_stripped(self):
+        """A version segment double-prefixes regardless of host, so a cloud
+        base_url that ends in '/v3' must also be stripped (the SDK re-adds it)."""
+        fake_honcho = MagicMock(name="Honcho")
+        cfg = HonchoClientConfig(
+            api_key="cloud-key",
+            base_url="https://api.honcho.dev/v3",
+            workspace_id="hermes",
+            environment="production",
+        )
+
+        with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho, \
+             patch("hermes_cli.config.load_config", return_value={}):
+            get_honcho_client(cfg)
+
+        mock_honcho.assert_called_once()
+        passed_base_url = mock_honcho.call_args.kwargs.get("base_url")
+        assert passed_base_url == "https://api.honcho.dev", (
+            f"Expected 'https://api.honcho.dev', got {passed_base_url!r}"
+        )
+
+    @pytest.mark.skipif(
+        not importlib.util.find_spec("honcho"),
+        reason="honcho SDK not installed"
+    )
+    @pytest.mark.parametrize(
+        "raw_url, expected",
+        [
+            # LAN IP self-host
+            ("http://10.0.0.5:8000/v3", "http://10.0.0.5:8000"),
+            ("http://192.168.1.20:38000/v3/", "http://192.168.1.20:38000"),
+            # Tailscale / custom-domain self-host
+            ("https://honcho.my.ts.net/v3", "https://honcho.my.ts.net"),
+            ("https://honcho.lab.internal/v3", "https://honcho.lab.internal"),
+            ("https://honcho.fly.dev/v3", "https://honcho.fly.dev"),
+            # higher version segments are also stripped
+            ("https://honcho.lab.internal/v12", "https://honcho.lab.internal"),
+            # self-host without a version segment is left unchanged
+            ("https://honcho.my.ts.net", "https://honcho.my.ts.net"),
+            ("http://10.0.0.5:8000", "http://10.0.0.5:8000"),
+        ],
+    )
+    def test_self_hosted_base_url_version_stripped(self, raw_url, expected):
+        """Non-loopback self-hosted instances (LAN IPs, Tailscale, custom
+        domains) must get the same version-segment stripping as localhost.
+        Regression for #20688 recurring on any non-loopback self-host."""
+        fake_honcho = MagicMock(name="Honcho")
+        cfg = HonchoClientConfig(
+            api_key="self-host-key",
+            base_url=raw_url,
+            workspace_id="hermes",
+            environment="production",
+        )
+
+        with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho, \
+             patch("hermes_cli.config.load_config", return_value={}):
+            get_honcho_client(cfg)
+
+        mock_honcho.assert_called_once()
+        passed_base_url = mock_honcho.call_args.kwargs.get("base_url")
+        assert passed_base_url == expected, (
+            f"Expected {expected!r}, got {passed_base_url!r}"
+        )
+
+    @pytest.mark.skipif(
+        not importlib.util.find_spec("honcho"),
+        reason="honcho SDK not installed"
+    )
+    def test_local_base_url_with_trailing_slash_stripped(self):
+        """base_url 'http://127.0.0.1:38000/v3/' must also be cleaned up."""
+        fake_honcho = MagicMock(name="Honcho")
+        cfg = HonchoClientConfig(
+            api_key=None,
+            base_url="http://127.0.0.1:38000/v3/",
+            workspace_id="hermes",
+            environment="production",
+        )
+
+        with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho, \
+             patch("hermes_cli.config.load_config", return_value={}):
+            get_honcho_client(cfg)
+
+        mock_honcho.assert_called_once()
+        passed_base_url = mock_honcho.call_args.kwargs.get("base_url")
+        assert passed_base_url == "http://127.0.0.1:38000", (
+            f"Expected 'http://127.0.0.1:38000', got {passed_base_url!r}"
+        )
diff --git a/tests/honcho_plugin/test_pin_peer_name.py b/tests/honcho_plugin/test_pin_peer_name.py
index ef3a215f329..1e72bc97d1a 100644
--- a/tests/honcho_plugin/test_pin_peer_name.py
+++ b/tests/honcho_plugin/test_pin_peer_name.py
@@ -745,10 +745,10 @@ class TestPinTransition:
         monkeypatch.setenv("HERMES_HOME", str(tmp_path))
 
         cfg_path.write_text(json.dumps({"apiKey": "k", "peerName": "Igor", "pinPeerName": True}))
-        sig_pinned = GatewayRunner._extract_cache_busting_config({})
+        sig_pinned = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
 
         cfg_path.write_text(json.dumps({"apiKey": "k", "peerName": "Igor", "pinPeerName": False}))
-        sig_unpinned = GatewayRunner._extract_cache_busting_config({})
+        sig_unpinned = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
 
         assert sig_pinned["honcho.pin_peer_name"] != sig_unpinned["honcho.pin_peer_name"]
 
@@ -759,14 +759,14 @@ class TestPinTransition:
         monkeypatch.setenv("HERMES_HOME", str(tmp_path))
 
         cfg_path.write_text(json.dumps({"apiKey": "k", "peerName": "Igor"}))
-        sig_no_aliases = GatewayRunner._extract_cache_busting_config({})
+        sig_no_aliases = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
 
         cfg_path.write_text(json.dumps({
             "apiKey": "k",
             "peerName": "Igor",
             "userPeerAliases": {"86701400": "Igor"},
         }))
-        sig_with_aliases = GatewayRunner._extract_cache_busting_config({})
+        sig_with_aliases = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
 
         assert sig_no_aliases["honcho.user_peer_aliases"] != sig_with_aliases["honcho.user_peer_aliases"]
 
@@ -777,14 +777,14 @@ class TestPinTransition:
         monkeypatch.setenv("HERMES_HOME", str(tmp_path))
 
         cfg_path.write_text(json.dumps({"apiKey": "k", "peerName": "Igor"}))
-        sig_no_prefix = GatewayRunner._extract_cache_busting_config({})
+        sig_no_prefix = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
 
         cfg_path.write_text(json.dumps({
             "apiKey": "k",
             "peerName": "Igor",
             "runtimePeerPrefix": "telegram_",
         }))
-        sig_with_prefix = GatewayRunner._extract_cache_busting_config({})
+        sig_with_prefix = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
 
         assert sig_no_prefix["honcho.runtime_peer_prefix"] != sig_with_prefix["honcho.runtime_peer_prefix"]
 
@@ -805,14 +805,14 @@ class TestPinTransition:
             "peerName": "Igor",
             "aiPeer": "hermes",
         }))
-        sig_before = GatewayRunner._extract_cache_busting_config({})
+        sig_before = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
 
         cfg_path.write_text(json.dumps({
             "apiKey": "k",
             "peerName": "Igor",
             "aiPeer": "hermetika",
         }))
-        sig_after = GatewayRunner._extract_cache_busting_config({})
+        sig_after = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
 
         assert sig_before["honcho.ai_peer"] != sig_after["honcho.ai_peer"]
 
diff --git a/tests/plugins/memory/test_hindsight_provider.py b/tests/plugins/memory/test_hindsight_provider.py
index bc62b7f2c8f..f49c227611a 100644
--- a/tests/plugins/memory/test_hindsight_provider.py
+++ b/tests/plugins/memory/test_hindsight_provider.py
@@ -6,7 +6,9 @@ turn counting, tags), and schema completeness.
 """
 
 import json
+import os
 import re
+import stat
 import sys
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
@@ -1570,3 +1572,13 @@ class TestShutdown:
         assert embedded._client is None
         assert provider._client is None
 
+
+@pytest.mark.skipif(os.name == "nt", reason="POSIX mode bits not enforced on Windows")
+def test_save_config_sets_owner_only_permissions(tmp_path):
+    """hindsight/config.json must be written with 0o600 so API key is not world-readable."""
+    provider = HindsightMemoryProvider()
+    provider.save_config({"api_key": "hd-test-key"}, str(tmp_path))
+    config_file = tmp_path / "hindsight" / "config.json"
+    assert config_file.exists()
+    mode = stat.S_IMODE(config_file.stat().st_mode)
+    assert mode == 0o600, f"Expected 0o600 (owner-only), got {oct(mode)}"
diff --git a/tests/plugins/memory/test_mem0_v2.py b/tests/plugins/memory/test_mem0_v2.py
index 1ef85499b54..a9a86676452 100644
--- a/tests/plugins/memory/test_mem0_v2.py
+++ b/tests/plugins/memory/test_mem0_v2.py
@@ -4,6 +4,10 @@ Salvaged from PRs #5301 (qaqcvc) and #5117 (vvvanguards).
 """
 
 import json
+import os
+import stat
+
+import pytest
 
 from plugins.memory.mem0 import Mem0MemoryProvider
 
@@ -202,6 +206,17 @@ class TestMem0ResponseUnwrapping:
 # ---------------------------------------------------------------------------
 
 
+@pytest.mark.skipif(os.name == "nt", reason="POSIX mode bits not enforced on Windows")
+def test_save_config_sets_owner_only_permissions(tmp_path):
+    """mem0.json must be written with 0o600 so API key is not world-readable."""
+    provider = Mem0MemoryProvider()
+    provider.save_config({"api_key": "m0-test-key"}, str(tmp_path))
+    config_file = tmp_path / "mem0.json"
+    assert config_file.exists()
+    mode = stat.S_IMODE(config_file.stat().st_mode)
+    assert mode == 0o600, f"Expected 0o600 (owner-only), got {oct(mode)}"
+
+
 class TestMem0Defaults:
     """Ensure we don't break existing users' defaults."""
 
diff --git a/tests/plugins/memory/test_supermemory_provider.py b/tests/plugins/memory/test_supermemory_provider.py
index 0aee459757f..d5f1c5bb174 100644
--- a/tests/plugins/memory/test_supermemory_provider.py
+++ b/tests/plugins/memory/test_supermemory_provider.py
@@ -1,4 +1,6 @@
 import json
+import os
+import stat
 import threading
 
 import pytest
@@ -409,3 +411,13 @@ def test_get_config_schema_minimal():
     assert len(schema) == 1
     assert schema[0]["key"] == "api_key"
     assert schema[0]["secret"] is True
+
+
+@pytest.mark.skipif(os.name == "nt", reason="POSIX mode bits not enforced on Windows")
+def test_save_config_sets_owner_only_permissions(tmp_path):
+    """supermemory.json must be written with 0o600 so API key is not world-readable."""
+    _save_supermemory_config({"api_key": "sm-test-key"}, str(tmp_path))
+    config_file = tmp_path / "supermemory.json"
+    assert config_file.exists()
+    mode = stat.S_IMODE(config_file.stat().st_mode)
+    assert mode == 0o600, f"Expected 0o600 (owner-only), got {oct(mode)}"
diff --git a/tests/plugins/test_kanban_attachments.py b/tests/plugins/test_kanban_attachments.py
new file mode 100644
index 00000000000..3beb875a812
--- /dev/null
+++ b/tests/plugins/test_kanban_attachments.py
@@ -0,0 +1,291 @@
+"""Tests for Kanban task file attachments (#35338).
+
+Covers three layers:
+  * ``hermes_cli.kanban_db`` accessors (add/list/get/delete + path helpers)
+  * the dashboard REST surface (upload / list / download / delete)
+  * worker-context surfacing so a kanban worker sees the absolute paths
+
+The plugin router is attached to a bare FastAPI app — same approach as
+``test_kanban_dashboard_plugin.py`` — so we exercise the real HTTP path
+(multipart upload, streaming download) without the whole dashboard.
+"""
+
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from hermes_cli import kanban_db as kb
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+def _load_plugin_router():
+    repo_root = Path(__file__).resolve().parents[2]
+    plugin_file = repo_root / "plugins" / "kanban" / "dashboard" / "plugin_api.py"
+    assert plugin_file.exists(), f"plugin file missing: {plugin_file}"
+    spec = importlib.util.spec_from_file_location(
+        "hermes_dashboard_plugin_kanban_attach_test", plugin_file,
+    )
+    assert spec is not None and spec.loader is not None
+    mod = importlib.util.module_from_spec(spec)
+    sys.modules[spec.name] = mod
+    spec.loader.exec_module(mod)
+    return mod.router
+
+
+@pytest.fixture
+def kanban_home(tmp_path, monkeypatch):
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    kb.init_db()
+    return home
+
+
+@pytest.fixture
+def client(kanban_home):
+    app = FastAPI()
+    app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
+    return TestClient(app)
+
+
+def _make_task(conn, title="t") -> str:
+    return kb.create_task(conn, title=title)
+
+
+# ---------------------------------------------------------------------------
+# DB-layer accessors
+# ---------------------------------------------------------------------------
+
+
+def test_add_list_get_delete_attachment(kanban_home, tmp_path):
+    conn = kb.connect()
+    try:
+        task_id = _make_task(conn)
+        # Write a real blob under the per-task dir so delete can unlink it.
+        dest_dir = kb.task_attachments_dir(task_id)
+        dest_dir.mkdir(parents=True, exist_ok=True)
+        blob = dest_dir / "source.pdf"
+        blob.write_bytes(b"%PDF-1.4 fake")
+
+        att_id = kb.add_attachment(
+            conn,
+            task_id,
+            filename="source.pdf",
+            stored_path=str(blob),
+            content_type="application/pdf",
+            size=blob.stat().st_size,
+            uploaded_by="tester",
+        )
+        assert att_id > 0
+
+        atts = kb.list_attachments(conn, task_id)
+        assert len(atts) == 1
+        a = atts[0]
+        assert a.filename == "source.pdf"
+        assert a.content_type == "application/pdf"
+        assert a.size == len(b"%PDF-1.4 fake")
+        assert a.uploaded_by == "tester"
+        assert a.stored_path == str(blob)
+
+        got = kb.get_attachment(conn, att_id)
+        assert got is not None and got.id == att_id
+
+        removed = kb.delete_attachment(conn, att_id)
+        assert removed is not None and removed.id == att_id
+        assert kb.list_attachments(conn, task_id) == []
+        assert not blob.exists(), "delete should unlink the on-disk blob"
+        assert kb.get_attachment(conn, att_id) is None
+    finally:
+        conn.close()
+
+
+def test_add_attachment_rejects_unknown_task(kanban_home):
+    conn = kb.connect()
+    try:
+        with pytest.raises(ValueError):
+            kb.add_attachment(
+                conn, "t_doesnotexist", filename="x.txt", stored_path="/tmp/x.txt"
+            )
+    finally:
+        conn.close()
+
+
+def test_add_attachment_appends_event(kanban_home):
+    conn = kb.connect()
+    try:
+        task_id = _make_task(conn)
+        kb.add_attachment(
+            conn, task_id, filename="a.txt", stored_path="/tmp/a.txt", size=3
+        )
+        kinds = [e.kind for e in kb.list_events(conn, task_id)]
+        assert "attached" in kinds
+    finally:
+        conn.close()
+
+
+def test_delete_attachment_missing_returns_none(kanban_home):
+    conn = kb.connect()
+    try:
+        assert kb.delete_attachment(conn, 999999) is None
+    finally:
+        conn.close()
+
+
+def test_attachments_root_is_per_board(kanban_home, monkeypatch):
+    # default board uses <root>/kanban/attachments
+    default_root = kb.attachments_root(board="default")
+    assert default_root.name == "attachments"
+    # a named board nests under its board dir
+    monkeypatch.delenv("HERMES_KANBAN_ATTACHMENTS_ROOT", raising=False)
+    named = kb.attachments_root(board="default")
+    assert named == default_root
+
+
+def test_attachments_root_env_override(kanban_home, monkeypatch, tmp_path):
+    override = tmp_path / "custom-attach"
+    monkeypatch.setenv("HERMES_KANBAN_ATTACHMENTS_ROOT", str(override))
+    assert kb.attachments_root() == override
+    assert kb.task_attachments_dir("t_abc") == override / "t_abc"
+
+
+# ---------------------------------------------------------------------------
+# Worker context surfacing
+# ---------------------------------------------------------------------------
+
+
+def test_worker_context_lists_attachments_with_absolute_path(kanban_home):
+    conn = kb.connect()
+    try:
+        task_id = _make_task(conn, title="translate PDF")
+        dest_dir = kb.task_attachments_dir(task_id)
+        dest_dir.mkdir(parents=True, exist_ok=True)
+        blob = dest_dir / "manual.pdf"
+        blob.write_bytes(b"data")
+        kb.add_attachment(
+            conn,
+            task_id,
+            filename="manual.pdf",
+            stored_path=str(blob.resolve()),
+            content_type="application/pdf",
+            size=4,
+        )
+        ctx = kb.build_worker_context(conn, task_id)
+        assert "## Attachments" in ctx
+        assert "manual.pdf" in ctx
+        # The absolute path must appear so the worker can read_file it.
+        assert str(blob.resolve()) in ctx
+    finally:
+        conn.close()
+
+
+def test_worker_context_no_attachments_section_when_empty(kanban_home):
+    conn = kb.connect()
+    try:
+        task_id = _make_task(conn)
+        ctx = kb.build_worker_context(conn, task_id)
+        assert "## Attachments" not in ctx
+    finally:
+        conn.close()
+
+
+# ---------------------------------------------------------------------------
+# REST surface — upload / list / download / delete round-trip
+# ---------------------------------------------------------------------------
+
+
+def _create_task_via_api(client) -> str:
+    r = client.post("/api/plugins/kanban/tasks", json={"title": "x"})
+    assert r.status_code == 200, r.text
+    return r.json()["task"]["id"]
+
+
+def test_upload_list_download_delete_roundtrip(client):
+    task_id = _create_task_via_api(client)
+    content = b"hello attachment world"
+
+    # Upload
+    r = client.post(
+        f"/api/plugins/kanban/tasks/{task_id}/attachments",
+        files={"file": ("notes.txt", content, "text/plain")},
+    )
+    assert r.status_code == 200, r.text
+    att = r.json()["attachment"]
+    assert att["filename"] == "notes.txt"
+    assert att["size"] == len(content)
+    att_id = att["id"]
+
+    # List (drawer also embeds it in GET /tasks/:id)
+    r = client.get(f"/api/plugins/kanban/tasks/{task_id}/attachments")
+    assert r.status_code == 200
+    assert [a["filename"] for a in r.json()["attachments"]] == ["notes.txt"]
+
+    detail = client.get(f"/api/plugins/kanban/tasks/{task_id}").json()
+    assert "attachments" in detail
+    assert len(detail["attachments"]) == 1
+
+    # Download streams the exact bytes back
+    r = client.get(f"/api/plugins/kanban/attachments/{att_id}")
+    assert r.status_code == 200
+    assert r.content == content
+
+    # Delete removes the row and the file
+    r = client.delete(f"/api/plugins/kanban/attachments/{att_id}")
+    assert r.status_code == 200
+    assert client.get(f"/api/plugins/kanban/attachments/{att_id}").status_code == 404
+    assert client.get(
+        f"/api/plugins/kanban/tasks/{task_id}/attachments"
+    ).json()["attachments"] == []
+
+
+def test_upload_sanitizes_traversal_filename(client):
+    task_id = _create_task_via_api(client)
+    r = client.post(
+        f"/api/plugins/kanban/tasks/{task_id}/attachments",
+        files={"file": ("../../../../etc/passwd", b"x", "text/plain")},
+    )
+    assert r.status_code == 200, r.text
+    stored_path = r.json()["attachment"]["stored_path"]
+    # The leaf name only; never escapes the per-task attachments dir.
+    assert Path(stored_path).name == "passwd"
+    task_dir = kb.task_attachments_dir(task_id).resolve()
+    assert Path(stored_path).resolve().is_relative_to(task_dir)
+
+
+def test_upload_name_collision_gets_suffixed(client):
+    task_id = _create_task_via_api(client)
+    for _ in range(2):
+        r = client.post(
+            f"/api/plugins/kanban/tasks/{task_id}/attachments",
+            files={"file": ("dup.txt", b"a", "text/plain")},
+        )
+        assert r.status_code == 200, r.text
+    names = sorted(
+        a["filename"]
+        for a in client.get(
+            f"/api/plugins/kanban/tasks/{task_id}/attachments"
+        ).json()["attachments"]
+    )
+    assert names == ["dup (1).txt", "dup.txt"]
+
+
+def test_upload_unknown_task_404(client):
+    r = client.post(
+        "/api/plugins/kanban/tasks/t_nope/attachments",
+        files={"file": ("x.txt", b"x", "text/plain")},
+    )
+    assert r.status_code == 404
+
+
+def test_download_unknown_attachment_404(client):
+    assert client.get("/api/plugins/kanban/attachments/424242").status_code == 404
diff --git a/tests/run_agent/test_413_compression.py b/tests/run_agent/test_413_compression.py
index 6695d6c275e..cadb26c449b 100644
--- a/tests/run_agent/test_413_compression.py
+++ b/tests/run_agent/test_413_compression.py
@@ -491,6 +491,96 @@ class TestPreflightCompression:
             for ev, msg in status_messages
         )
 
+    def test_preflight_defers_when_recent_real_usage_fit(self, agent):
+        """A noisy rough estimate should not re-compact a recently fitting request."""
+        agent.compression_enabled = True
+        agent.context_compressor.context_length = 200_000
+        agent.context_compressor.threshold_tokens = 100_000
+        agent.context_compressor.last_prompt_tokens = 58_000
+        agent.context_compressor.last_real_prompt_tokens = 58_000
+        agent.context_compressor.last_rough_tokens_when_real_prompt_fit = 113_000
+
+        big_history = []
+        for i in range(20):
+            big_history.append({"role": "user", "content": f"Message {i} padded"})
+            big_history.append({"role": "assistant", "content": f"Response {i} padded"})
+
+        ok_resp = _mock_response(
+            content="Used real fit",
+            finish_reason="stop",
+            usage={"prompt_tokens": 59_000, "completion_tokens": 100, "total_tokens": 59_100},
+        )
+        agent.client.chat.completions.create.side_effect = [ok_resp]
+        status_messages = []
+        agent.status_callback = lambda ev, msg: status_messages.append((ev, msg))
+
+        with (
+            patch("agent.conversation_loop.estimate_request_tokens_rough", return_value=114_000),
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("hello", conversation_history=big_history)
+
+        mock_compress.assert_not_called()
+        assert result["completed"] is True
+        assert result["final_response"] == "Used real fit"
+        assert not any(
+            ev == "lifecycle" and "Preflight compression" in msg
+            for ev, msg in status_messages
+        )
+
+    def test_preflight_compresses_when_rough_growth_after_fit_is_large(self, agent):
+        """Large rough growth after a fitting request still triggers preflight."""
+        agent.compression_enabled = True
+        agent.context_compressor.context_length = 200_000
+        agent.context_compressor.threshold_tokens = 100_000
+        agent.context_compressor.last_prompt_tokens = 58_000
+        agent.context_compressor.last_real_prompt_tokens = 58_000
+        agent.context_compressor.last_rough_tokens_when_real_prompt_fit = 113_000
+
+        big_history = []
+        for i in range(20):
+            big_history.append({"role": "user", "content": f"Message {i} padded"})
+            big_history.append({"role": "assistant", "content": f"Response {i} padded"})
+
+        ok_resp = _mock_response(
+            content="Compressed after growth",
+            finish_reason="stop",
+            usage={"prompt_tokens": 50_000, "completion_tokens": 100, "total_tokens": 50_100},
+        )
+        agent.client.chat.completions.create.side_effect = [ok_resp]
+
+        # First rough estimate must clear the threshold so preflight fires
+        # (rough growth since the last fitting request is large, so the
+        # deferral path is NOT taken). Every estimate after compaction is
+        # sub-threshold. Use a callable side_effect rather than a fixed list
+        # so we don't have to predict how many times the loop re-estimates —
+        # the post-response real-token estimate is an extra call that a
+        # 2-element list would exhaust (StopIteration).
+        _rough_calls = {"n": 0}
+
+        def _rough_estimate(*_args, **_kwargs):
+            _rough_calls["n"] += 1
+            return 125_000 if _rough_calls["n"] == 1 else 40_000
+
+        with (
+            patch("agent.conversation_loop.estimate_request_tokens_rough", side_effect=_rough_estimate),
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            mock_compress.return_value = (
+                [{"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"}],
+                "new system prompt",
+            )
+            result = agent.run_conversation("hello", conversation_history=big_history)
+
+        mock_compress.assert_called_once()
+        assert result["completed"] is True
+
     def test_no_preflight_when_under_threshold(self, agent):
         """When history fits within context, no preflight compression needed."""
         agent.compression_enabled = True
@@ -575,6 +665,74 @@ class TestPreflightCompression:
         mock_compress.assert_not_called()
         assert result["completed"] is True
 
+    def test_preflight_seeds_display_tokens_when_compression_aborts(self, agent):
+        """Display must reflect the real context size even when compression no-ops.
+
+        Regression: the CLI status bar reads ``last_prompt_tokens``, which only
+        updated from a *successful* API response. When the loaded history was
+        oversized but compression failed to reduce it (e.g. the auxiliary
+        summary model timed out), the bar stayed stuck at the old, smaller
+        value while the preflight estimate reported a much larger number —
+        looking permanently out of sync.
+        """
+        agent.compression_enabled = True
+        agent.context_compressor.context_length = 200_000
+        agent.context_compressor.threshold_tokens = 130_000
+        # Simulate a stale display value from an earlier, smaller turn.
+        agent.context_compressor.last_prompt_tokens = 74_400
+
+        big_history = []
+        for i in range(20):
+            big_history.append({"role": "user", "content": f"Message {i} padded text"})
+            big_history.append({"role": "assistant", "content": f"Response {i} padded text"})
+
+        ok_resp = _mock_response(content="After preflight", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [ok_resp]
+
+        with (
+            patch("agent.conversation_loop.estimate_request_tokens_rough", return_value=144_669),
+            # Compression no-ops (returns input unchanged) — mirrors an aux
+            # summary-model timeout where the messages can't be reduced.
+            patch.object(agent, "_compress_context", side_effect=lambda msgs, *a, **k: (msgs, agent._cached_system_prompt)),
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("hello", conversation_history=big_history)
+
+        assert result["completed"] is True
+        # The display token count was revised up to the fresh preflight estimate,
+        # not left at the stale 74_400.
+        assert agent.context_compressor.last_prompt_tokens == 144_669
+
+    def test_preflight_seed_only_revises_upward(self, agent):
+        """A larger tracked value must not be clobbered by a smaller estimate."""
+        agent.compression_enabled = True
+        agent.context_compressor.context_length = 200_000
+        agent.context_compressor.threshold_tokens = 130_000
+        # A real, larger usage figure is already tracked.
+        agent.context_compressor.last_prompt_tokens = 160_000
+
+        big_history = []
+        for i in range(20):
+            big_history.append({"role": "user", "content": f"Message {i} padded text"})
+            big_history.append({"role": "assistant", "content": f"Response {i} padded text"})
+
+        ok_resp = _mock_response(content="After preflight", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [ok_resp]
+
+        with (
+            patch("agent.conversation_loop.estimate_request_tokens_rough", return_value=144_669),
+            patch.object(agent, "_compress_context", side_effect=lambda msgs, *a, **k: (msgs, agent._cached_system_prompt)),
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            agent.run_conversation("hello", conversation_history=big_history)
+
+        # Smaller estimate must not overwrite the larger tracked value.
+        assert agent.context_compressor.last_prompt_tokens == 160_000
+
 
 class TestToolResultPreflightCompression:
     """Compression should trigger when tool results push context past the threshold."""
diff --git a/tests/run_agent/test_dict_tool_call_args.py b/tests/run_agent/test_dict_tool_call_args.py
index 61ee6fc5c28..ac249919fa1 100644
--- a/tests/run_agent/test_dict_tool_call_args.py
+++ b/tests/run_agent/test_dict_tool_call_args.py
@@ -70,4 +70,9 @@ def test_tool_call_validation_accepts_dict_arguments(monkeypatch):
 
     result = agent.run_conversation("read the file")
 
-    assert result["final_response"] == "done"
+    # The conversation hits max_iterations=3 (3 tool turns then forced summary).
+    # PR #34470 adds an explainer suffix to abnormal turn endings so users
+    # understand why the response is short instead of seeing a blank reply.
+    # The exact suffix wording is owned by conversation_loop; this test only
+    # cares that the model's actual text ('done') survives at the start.
+    assert result["final_response"].startswith("done")
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 831491c266c..a495b718320 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -2543,6 +2543,122 @@ class TestConcurrentToolExecution:
         assert json.loads(result) == {"error": "Blocked"}
         assert agent._turns_since_memory == 5
 
+    def test_concurrent_blocked_write_skips_checkpoint(self, agent, monkeypatch):
+        """Concurrent path: blocked write_file should not trigger checkpoint."""
+        tc1 = _mock_tool_call(name="write_file",
+                              arguments='{"path":"test.txt","content":"hello"}',
+                              call_id="c1")
+        tc2 = _mock_tool_call(name="read_file",
+                              arguments='{"path":"other.py"}',
+                              call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+
+        monkeypatch.setattr(
+            "hermes_cli.plugins.get_pre_tool_call_block_message",
+            lambda *args, **kwargs: "Blocked" if args[0] == "write_file" else None,
+        )
+
+        agent._checkpoint_mgr.enabled = True
+
+        def fake_handle(name, args, task_id, **kwargs):
+            return f"result_{name}"
+
+        with patch("run_agent.handle_function_call", side_effect=fake_handle):
+            with patch.object(agent._checkpoint_mgr, "ensure_checkpoint") as cp_mock:
+                agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1")
+
+        cp_mock.assert_not_called()
+
+    def test_concurrent_blocked_patch_skips_checkpoint(self, agent, monkeypatch):
+        """Concurrent path: blocked patch should not trigger checkpoint."""
+        tc1 = _mock_tool_call(name="patch",
+                              arguments='{"path":"f.py","old":"a","new":"b"}',
+                              call_id="c1")
+        tc2 = _mock_tool_call(name="read_file",
+                              arguments='{"path":"other.py"}',
+                              call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+
+        monkeypatch.setattr(
+            "hermes_cli.plugins.get_pre_tool_call_block_message",
+            lambda *args, **kwargs: "Blocked" if args[0] == "patch" else None,
+        )
+
+        agent._checkpoint_mgr.enabled = True
+
+        def fake_handle(name, args, task_id, **kwargs):
+            return f"result_{name}"
+
+        with patch("run_agent.handle_function_call", side_effect=fake_handle):
+            with patch.object(agent._checkpoint_mgr, "ensure_checkpoint") as cp_mock:
+                agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1")
+
+        cp_mock.assert_not_called()
+
+    def test_concurrent_blocked_terminal_skips_checkpoint(self, agent, monkeypatch):
+        """Concurrent path: blocked terminal should not trigger checkpoint."""
+        tc1 = _mock_tool_call(name="terminal",
+                              arguments='{"command":"rm -rf /tmp/foo"}',
+                              call_id="c1")
+        tc2 = _mock_tool_call(name="read_file",
+                              arguments='{"path":"other.py"}',
+                              call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+
+        monkeypatch.setattr(
+            "hermes_cli.plugins.get_pre_tool_call_block_message",
+            lambda *args, **kwargs: "Blocked" if args[0] == "terminal" else None,
+        )
+
+        agent._checkpoint_mgr.enabled = True
+
+        def fake_handle(name, args, task_id, **kwargs):
+            return f"result_{name}"
+
+        with patch("run_agent.handle_function_call", side_effect=fake_handle):
+            with patch.object(agent._checkpoint_mgr, "ensure_checkpoint") as cp_mock:
+                with patch("agent.tool_executor._is_destructive_command", return_value=True):
+                    agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1")
+
+        cp_mock.assert_not_called()
+
+    def test_concurrent_blocked_write_does_not_steal_slot_from_allowed_write(self, agent, monkeypatch):
+        """When write_file is blocked, its dedup slot must not be consumed,
+        so a subsequent allowed write_file for the same path still checkpoints."""
+        tc1 = _mock_tool_call(name="write_file",
+                              arguments='{"path":"dup.txt","content":"blocked"}',
+                              call_id="c1")
+        tc2 = _mock_tool_call(name="write_file",
+                              arguments='{"path":"dup.txt","content":"allowed"}',
+                              call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+
+        call_count = {"n": 0}
+        def block_first_only(*args, **kwargs):
+            call_count["n"] += 1
+            return "Blocked" if call_count["n"] == 1 else None
+
+        monkeypatch.setattr(
+            "hermes_cli.plugins.get_pre_tool_call_block_message",
+            block_first_only,
+        )
+
+        agent._checkpoint_mgr.enabled = True
+
+        def fake_handle(name, args, task_id, **kwargs):
+            return f"result_{name}"
+
+        with patch("run_agent.handle_function_call", side_effect=fake_handle):
+            with patch.object(agent._checkpoint_mgr, "ensure_checkpoint") as cp_mock:
+                agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1")
+
+        # Second (allowed) write must checkpoint even though first was blocked.
+        cp_mock.assert_called_once()
+
 
 class TestPathsOverlap:
     """Unit tests for the _paths_overlap helper."""
@@ -2756,6 +2872,40 @@ class TestHandleMaxIterations:
         ]
         assert len(stub_ids) >= 1, f"No stub result for assistant tool_call: {stub_ids}"
 
+    def test_summary_strips_strict_schema_foreign_fields(self, agent):
+        """Regression: the max-iterations summary request must NOT carry
+        Chat-Completions-schema-foreign keys — tool_name (SQLite FTS
+        bookkeeping), codex_* reasoning carriers, or internal _-prefixed
+        scaffolding. Strict gateways (Fireworks-backed OpenCode Go, Mistral,
+        Kimi) reject these with 'Extra inputs are not permitted, field:
+        messages[N].tool_name'. The transport's convert_messages() strips
+        them on the main loop; this hand-built summary path must mirror it."""
+        agent.client.chat.completions.create.return_value = _mock_response(content="Summary")
+        agent._cached_system_prompt = "You are helpful."
+        messages = [
+            {"role": "user", "content": "do stuff"},
+            {
+                "role": "assistant",
+                "tool_calls": [{"id": "call_1", "function": {"name": "execute_code", "arguments": "{}"}}],
+                "codex_reasoning_items": [{"id": "rs_1"}],
+            },
+            {"role": "tool", "tool_call_id": "call_1", "content": "result", "tool_name": "execute_code"},
+            {"role": "assistant", "content": "Done.", "_empty_recovery_synthetic": True},
+        ]
+
+        result = agent._handle_max_iterations(messages, 60)
+
+        assert result == "Summary"
+        sent_msgs = agent.client.chat.completions.create.call_args.kwargs.get("messages", [])
+        for m in sent_msgs:
+            assert "tool_name" not in m, m
+            assert "codex_reasoning_items" not in m, m
+            assert "codex_message_items" not in m, m
+            assert not any(isinstance(k, str) and k.startswith("_") for k in m), m
+        # Internal history is untouched — the path copies each message.
+        assert messages[2]["tool_name"] == "execute_code"
+        assert messages[1]["codex_reasoning_items"] == [{"id": "rs_1"}]
+
     def test_summary_omits_provider_preferences_for_non_openrouter(self, agent):
         agent.base_url = "https://api.openai.com/v1"
         agent._base_url_lower = agent.base_url.lower()
@@ -3046,7 +3196,11 @@ class TestRunConversation:
 
         mock_compress.assert_not_called()  # no compression triggered
         assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
+        # #34452: the bare "(empty)" sentinel is now replaced by a
+        # user-visible end-of-turn explanation so the failure isn't silent.
+        assert result["final_response"] != "(empty)"
+        assert "No reply:" in result["final_response"]
+        assert result["turn_exit_reason"] == "empty_response_exhausted"
         assert result["api_calls"] == 6  # 1 original + 2 prefill + 3 retries
 
     def test_reasoning_only_response_prefill_then_empty(self, agent):
@@ -3066,7 +3220,9 @@ class TestRunConversation:
         ):
             result = agent.run_conversation("answer me")
         assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
+        # #34452: explanation replaces the bare "(empty)" sentinel.
+        assert result["final_response"] != "(empty)"
+        assert "No reply:" in result["final_response"]
         assert result["api_calls"] == 6  # 1 original + 2 prefill + 3 retries
 
     def test_reasoning_only_prefill_succeeds_on_continuation(self, agent):
@@ -3113,7 +3269,9 @@ class TestRunConversation:
         ):
             result = agent.run_conversation("answer me")
         assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
+        # #34452: explanation replaces the bare "(empty)" sentinel.
+        assert result["final_response"] != "(empty)"
+        assert "No reply:" in result["final_response"]
         assert result["api_calls"] == 4  # 1 original + 3 retries
 
     def test_truly_empty_response_succeeds_on_nudge(self, agent):
@@ -3209,7 +3367,9 @@ class TestRunConversation:
         ):
             result = agent.run_conversation("answer me")
         assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
+        # #34452: explanation replaces the bare "(empty)" sentinel.
+        assert result["final_response"] != "(empty)"
+        assert "No reply:" in result["final_response"]
 
     def test_empty_response_emits_status_for_gateway(self, agent):
         """_emit_status is called during empty retries so gateway users see feedback."""
@@ -3235,7 +3395,10 @@ class TestRunConversation:
         ):
             result = agent.run_conversation("answer me")
 
-        assert result["final_response"] == "(empty)"
+        # #34452: explanation replaces the bare "(empty)" sentinel, but the
+        # status emissions during retries are unchanged.
+        assert result["final_response"] != "(empty)"
+        assert "No reply:" in result["final_response"]
         # Should have emitted retry statuses (3 retries) + final failure
         retry_msgs = [m for m in status_messages if "retrying" in m.lower()]
         assert len(retry_msgs) == 3, f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}"
diff --git a/tests/run_agent/test_turn_completion_explainer.py b/tests/run_agent/test_turn_completion_explainer.py
new file mode 100644
index 00000000000..a04cc1e5e36
--- /dev/null
+++ b/tests/run_agent/test_turn_completion_explainer.py
@@ -0,0 +1,181 @@
+"""Tests for the end-of-turn completion explainer (#34452).
+
+When a turn ends abnormally after tools (empty content after retries, a
+partial/truncated stream, exhausted retries, or an iteration/budget limit)
+the user should get a single user-visible explanation of why the reply
+stopped instead of a blank or fragmentary response box.  Normal short
+replies (e.g. ``Done.``) must stay quiet.
+
+These tests exercise:
+  1. ``_format_turn_completion_explanation`` — the pure reason→message map.
+  2. ``_turn_completion_explainer_enabled`` — the env/config seam.
+  3. An end-to-end ``run_conversation`` turn that exhausts empty-response
+     retries and verifies the explanation reaches ``final_response``.
+
+All assertions work under the mocked OpenAI SDK used elsewhere in this
+suite (we patch ``run_agent.OpenAI`` and drive ``agent.client``), so they
+pass identically in CI and locally.
+"""
+
+import os
+import uuid
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+from run_agent import AIAgent
+
+
+# --------------------------------------------------------------------------
+# Fixtures (mirrors tests/run_agent/test_tool_call_guardrail_runtime.py)
+# --------------------------------------------------------------------------
+def _mock_response(content="Hello", finish_reason="stop", tool_calls=None):
+    msg = SimpleNamespace(content=content, tool_calls=tool_calls)
+    choice = SimpleNamespace(message=msg, finish_reason=finish_reason)
+    return SimpleNamespace(choices=[choice], model="test/model", usage=None)
+
+
+def _make_agent(max_iterations: int = 10, config: dict | None = None) -> AIAgent:
+    with (
+        patch("run_agent.get_tool_definitions", return_value=[]),
+        patch("run_agent.check_toolset_requirements", return_value={}),
+        patch("hermes_cli.config.load_config", return_value=config or {}),
+        patch("run_agent.OpenAI"),
+    ):
+        agent = AIAgent(
+            api_key="test-key-1234567890",
+            base_url="https://openrouter.ai/api/v1",
+            max_iterations=max_iterations,
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+    agent.client = MagicMock()
+    agent._cached_system_prompt = "You are helpful."
+    agent._use_prompt_caching = False
+    agent.tool_delay = 0
+    agent.compression_enabled = False
+    agent.save_trajectories = False
+    # No fallback chain so empty responses exhaust deterministically.
+    agent._fallback_chain = []
+    return agent
+
+
+# --------------------------------------------------------------------------
+# 1. Pure formatter
+# --------------------------------------------------------------------------
+def test_explanation_quiet_for_normal_text_response():
+    """A healthy text_response exit must NOT produce any explanation."""
+    out = AIAgent._format_turn_completion_explanation(
+        "text_response(finish_reason=stop)"
+    )
+    assert out == ""
+
+
+def test_explanation_quiet_for_empty_reason():
+    assert AIAgent._format_turn_completion_explanation("") == ""
+    assert AIAgent._format_turn_completion_explanation("unknown") == ""
+    # guardrail_halt surfaces its own message; explainer stays out of the way.
+    assert AIAgent._format_turn_completion_explanation("guardrail_halt") == ""
+
+
+def test_explanation_for_empty_response_exhausted():
+    out = AIAgent._format_turn_completion_explanation("empty_response_exhausted")
+    assert out  # non-empty
+    assert "empty content" in out
+    assert "continue" in out.lower()
+
+
+def test_explanation_for_partial_stream_recovery():
+    out = AIAgent._format_turn_completion_explanation("partial_stream_recovery")
+    assert "partial" in out.lower()
+    assert "continue" in out.lower()
+
+
+def test_explanation_for_max_iterations_reached_prefix_match():
+    """``max_iterations_reached(...)`` carries a parenthetical suffix."""
+    out = AIAgent._format_turn_completion_explanation(
+        "max_iterations_reached(10/10)"
+    )
+    assert "iteration" in out.lower()
+
+
+def test_explanation_for_all_retries_exhausted():
+    out = AIAgent._format_turn_completion_explanation(
+        "all_retries_exhausted_no_response"
+    )
+    assert "retries" in out.lower()
+
+
+# --------------------------------------------------------------------------
+# 2. Enable/disable seam
+# --------------------------------------------------------------------------
+def test_explainer_enabled_by_default():
+    agent = _make_agent()
+    with patch.dict(os.environ, {}, clear=False):
+        os.environ.pop("HERMES_TURN_COMPLETION_EXPLAINER", None)
+        with patch("hermes_cli.config.load_config", return_value={}):
+            assert agent._turn_completion_explainer_enabled() is True
+
+
+def test_explainer_disabled_via_env():
+    agent = _make_agent()
+    with patch.dict(
+        os.environ, {"HERMES_TURN_COMPLETION_EXPLAINER": "0"}, clear=False
+    ):
+        assert agent._turn_completion_explainer_enabled() is False
+
+
+def test_explainer_disabled_via_config():
+    agent = _make_agent()
+    with patch.dict(os.environ, {}, clear=False):
+        os.environ.pop("HERMES_TURN_COMPLETION_EXPLAINER", None)
+        with patch(
+            "hermes_cli.config.load_config",
+            return_value={"display": {"turn_completion_explainer": False}},
+        ):
+            assert agent._turn_completion_explainer_enabled() is False
+
+
+# --------------------------------------------------------------------------
+# 3. End-to-end: empty-response exhaustion surfaces the explanation
+# --------------------------------------------------------------------------
+def test_run_conversation_empty_exhausted_surfaces_explanation():
+    """Four empty responses in a row should exhaust retries and the final
+    response should be the actionable explanation, not a bare '(empty)'."""
+    agent = _make_agent(max_iterations=10)
+    # 4 empty responses: retries 1..3 then the terminal on the 4th.
+    agent.client.chat.completions.create.side_effect = [
+        _mock_response(content="", finish_reason="stop") for _ in range(8)
+    ]
+
+    with (
+        patch.object(agent, "_persist_session"),
+        patch.object(agent, "_save_trajectory"),
+        patch.object(agent, "_cleanup_task_resources"),
+    ):
+        result = agent.run_conversation("do something")
+
+    assert result["turn_exit_reason"] == "empty_response_exhausted"
+    # The user must NOT be left with a bare sentinel; the explanation wins.
+    assert result["final_response"] != "(empty)"
+    assert result["final_response"].strip() != ""
+    assert "No reply:" in result["final_response"]
+
+
+def test_run_conversation_normal_reply_stays_quiet():
+    """A normal short reply like 'Done.' must NOT get an explainer footer."""
+    agent = _make_agent(max_iterations=10)
+    agent.client.chat.completions.create.side_effect = [
+        _mock_response(content="Done.", finish_reason="stop"),
+    ]
+
+    with (
+        patch.object(agent, "_persist_session"),
+        patch.object(agent, "_save_trajectory"),
+        patch.object(agent, "_cleanup_task_resources"),
+    ):
+        result = agent.run_conversation("do something")
+
+    assert result["turn_exit_reason"].startswith("text_response")
+    assert result["final_response"] == "Done."
+    assert "No reply:" not in result["final_response"]
diff --git a/tests/skills/test_google_workspace_api.py b/tests/skills/test_google_workspace_api.py
index 30a1441d634..ffb56ce3cb5 100644
--- a/tests/skills/test_google_workspace_api.py
+++ b/tests/skills/test_google_workspace_api.py
@@ -229,6 +229,212 @@ def test_api_calendar_list_respects_date_range(api_module):
     assert params["timeMax"] == "2026-04-07T23:59:59Z"
 
 
+@pytest.mark.parametrize(
+    "header_names",
+    [
+        ("from", "to", "subject", "date"),
+        ("From", "To", "Subject", "Date"),
+    ],
+)
+def test_api_gmail_get_reads_headers_case_insensitively(api_module, capsys, header_names):
+    from_name, to_name, subject_name, date_name = header_names
+
+    def fake_run_gws(parts, *, params=None, body=None):
+        assert parts == ["gmail", "users", "messages", "get"]
+        assert params == {"userId": "me", "id": "msg-1", "format": "full"}
+        return {
+            "id": "msg-1",
+            "threadId": "thread-1",
+            "labelIds": ["INBOX"],
+            "payload": {
+                "headers": [
+                    {"name": from_name, "value": "sender@example.com"},
+                    {"name": to_name, "value": "recipient@example.com"},
+                    {"name": subject_name, "value": "case bug"},
+                    {"name": date_name, "value": "Fri, 29 May 2026 12:00:00 +0000"},
+                ],
+                "body": {},
+            },
+        }
+
+    api_module._run_gws = fake_run_gws
+    args = api_module.argparse.Namespace(message_id="msg-1", func=api_module.gmail_get)
+
+    api_module.gmail_get(args)
+
+    result = json.loads(capsys.readouterr().out)
+    assert result["from"] == "sender@example.com"
+    assert result["to"] == "recipient@example.com"
+    assert result["subject"] == "case bug"
+    assert result["date"] == "Fri, 29 May 2026 12:00:00 +0000"
+
+
+@pytest.mark.parametrize(
+    "header_names",
+    [
+        ("from", "to", "subject", "date"),
+        ("From", "To", "Subject", "Date"),
+    ],
+)
+def test_api_gmail_search_reads_headers_case_insensitively(
+    api_module,
+    capsys,
+    header_names,
+):
+    from_name, to_name, subject_name, date_name = header_names
+    calls = []
+
+    def fake_run_gws(parts, *, params=None, body=None):
+        calls.append({"parts": parts, "params": params, "body": body})
+        if parts == ["gmail", "users", "messages", "list"]:
+            assert params == {"userId": "me", "q": "from:sender", "maxResults": 5}
+            return {"messages": [{"id": "msg-1"}]}
+
+        assert parts == ["gmail", "users", "messages", "get"]
+        assert params == {
+            "userId": "me",
+            "id": "msg-1",
+            "format": "metadata",
+            "metadataHeaders": ["From", "To", "Subject", "Date"],
+        }
+        return {
+            "id": "msg-1",
+            "threadId": "thread-1",
+            "labelIds": ["INBOX"],
+            "snippet": "preview",
+            "payload": {
+                "headers": [
+                    {"name": from_name, "value": "sender@example.com"},
+                    {"name": to_name, "value": "recipient@example.com"},
+                    {"name": subject_name, "value": "case bug"},
+                    {"name": date_name, "value": "Fri, 29 May 2026 12:00:00 +0000"},
+                ],
+            },
+        }
+
+    api_module._run_gws = fake_run_gws
+    args = api_module.argparse.Namespace(
+        query="from:sender",
+        max=5,
+        func=api_module.gmail_search,
+    )
+
+    api_module.gmail_search(args)
+
+    assert len(calls) == 2
+    result = json.loads(capsys.readouterr().out)
+    assert result == [
+        {
+            "id": "msg-1",
+            "threadId": "thread-1",
+            "from": "sender@example.com",
+            "to": "recipient@example.com",
+            "subject": "case bug",
+            "date": "Fri, 29 May 2026 12:00:00 +0000",
+            "snippet": "preview",
+            "labels": ["INBOX"],
+        }
+    ]
+
+
+def test_api_gmail_send_uses_conventional_mime_header_casing(api_module):
+    captured = {}
+
+    def fake_run_gws(parts, *, params=None, body=None):
+        captured["parts"] = parts
+        captured["params"] = params
+        captured["body"] = body
+        return {"id": "sent-1", "threadId": "thread-1"}
+
+    api_module._run_gws = fake_run_gws
+    args = api_module.argparse.Namespace(
+        to="recipient@example.com",
+        subject="hello",
+        body="body",
+        html=False,
+        cc="copy@example.com",
+        from_header="sender@example.com",
+        thread_id="thread-1",
+        func=api_module.gmail_send,
+    )
+
+    api_module.gmail_send(args)
+
+    raw = api_module.base64.urlsafe_b64decode(captured["body"]["raw"])
+    raw_text = raw.decode()
+    assert "To: recipient@example.com" in raw_text
+    assert "Subject: hello" in raw_text
+    assert "Cc: copy@example.com" in raw_text
+    assert "From: sender@example.com" in raw_text
+    assert "\nto: " not in raw_text
+    assert "\nsubject: " not in raw_text
+
+
+@pytest.mark.parametrize(
+    "header_names",
+    [
+        ("from", "subject", "message-id"),
+        ("From", "Subject", "Message-ID"),
+    ],
+)
+def test_api_gmail_reply_reads_headers_case_insensitively_and_uses_conventional_mime_header_casing(
+    api_module,
+    header_names,
+):
+    from_name, subject_name, message_id_name = header_names
+    calls = []
+
+    def fake_run_gws(parts, *, params=None, body=None):
+        calls.append({"parts": parts, "params": params, "body": body})
+        if parts == ["gmail", "users", "messages", "get"]:
+            assert params == {
+                "userId": "me",
+                "id": "msg-1",
+                "format": "metadata",
+                "metadataHeaders": ["From", "Subject", "Message-ID"],
+            }
+            return {
+                "id": "msg-1",
+                "threadId": "thread-1",
+                "payload": {
+                    "headers": [
+                        {"name": from_name, "value": "sender@example.com"},
+                        {"name": subject_name, "value": "case bug"},
+                        {"name": message_id_name, "value": "<msg-1@example.com>"},
+                    ],
+                },
+            }
+
+        assert parts == ["gmail", "users", "messages", "send"]
+        assert params == {"userId": "me"}
+        return {"id": "sent-1", "threadId": "thread-1"}
+
+    api_module._run_gws = fake_run_gws
+    args = api_module.argparse.Namespace(
+        message_id="msg-1",
+        body="reply body",
+        from_header="recipient@example.com",
+        func=api_module.gmail_reply,
+    )
+
+    api_module.gmail_reply(args)
+
+    assert len(calls) == 2
+    body = calls[1]["body"]
+    assert body["threadId"] == "thread-1"
+    raw = api_module.base64.urlsafe_b64decode(body["raw"])
+    raw_text = raw.decode()
+    assert "To: sender@example.com" in raw_text
+    assert "Subject: Re: case bug" in raw_text
+    assert "From: recipient@example.com" in raw_text
+    assert "In-Reply-To: <msg-1@example.com>" in raw_text
+    assert "References: <msg-1@example.com>" in raw_text
+    assert "\nto: " not in raw_text
+    assert "\nsubject: " not in raw_text
+    assert "\nin-reply-to: " not in raw_text
+    assert "\nreferences: " not in raw_text
+
+
 def test_api_get_credentials_refresh_persists_authorized_user_type(api_module, monkeypatch):
     token_path = api_module.TOKEN_PATH
     _write_token(token_path, token="ya29.old")
diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py
index 1d7b922c2dd..998205c7ddd 100644
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -1,11 +1,35 @@
 """Tests for hermes_state.py — SessionDB SQLite CRUD, FTS5 search, export."""
 
+import sqlite3
 import time
 import pytest
 
 from hermes_state import SessionDB
 
 
+class _NoFtsCursor(sqlite3.Cursor):
+    """Simulate a SQLite build without the fts5 module."""
+
+    def execute(self, sql, parameters=()):
+        probe = sql.strip()
+        if probe in (
+            "SELECT * FROM messages_fts LIMIT 0",
+            "SELECT * FROM messages_fts_trigram LIMIT 0",
+        ):
+            raise sqlite3.OperationalError("no such table: " + probe.split()[-3])
+        return super().execute(sql, parameters)
+
+    def executescript(self, sql_script):
+        if "USING fts5" in sql_script:
+            raise sqlite3.OperationalError("no such module: fts5")
+        return super().executescript(sql_script)
+
+
+class _NoFtsConnection(sqlite3.Connection):
+    def cursor(self, factory=None):
+        return super().cursor(factory or _NoFtsCursor)
+
+
 @pytest.fixture()
 def db(tmp_path):
     """Create a SessionDB with a temp database file."""
@@ -128,6 +152,30 @@ class TestSessionLifecycle:
         session = db.get_session("s1")
         assert session["model"] == "anthropic/claude-opus-4.6"
 
+    def test_update_session_model_overwrites_existing(self, db):
+        """A mid-session /model switch must overwrite the stored model.
+
+        update_token_counts uses COALESCE(model, ?) (first-writer-wins), so
+        the dashboard kept showing the original model after a switch (#34850).
+        update_session_model sets the column unconditionally.
+        """
+        db.create_session(session_id="s1", source="telegram",
+                          model="xiaomi/mimo-v2.5-pro")
+        # Token updates never change the model once set.
+        db.update_token_counts("s1", input_tokens=10, output_tokens=5,
+                               model="xiaomi/mimo-v2.5-pro")
+        assert db.get_session("s1")["model"] == "xiaomi/mimo-v2.5-pro"
+
+        # Explicit switch overwrites it.
+        db.update_session_model("s1", "xiaomi/mimo-v2.5")
+        assert db.get_session("s1")["model"] == "xiaomi/mimo-v2.5"
+
+        # And a subsequent token update does NOT revert it (COALESCE no-ops
+        # because the column is now non-NULL).
+        db.update_token_counts("s1", input_tokens=10, output_tokens=5,
+                               model="xiaomi/mimo-v2.5-pro")
+        assert db.get_session("s1")["model"] == "xiaomi/mimo-v2.5"
+
     def test_parent_session(self, db):
         db.create_session(session_id="parent", source="cli")
         db.create_session(session_id="child", source="cli", parent_session_id="parent")
@@ -135,6 +183,33 @@ class TestSessionLifecycle:
         child = db.get_session("child")
         assert child["parent_session_id"] == "parent"
 
+    def test_db_initializes_without_fts5_module(self, tmp_path, monkeypatch):
+        real_connect = sqlite3.connect
+
+        def connect_without_fts(*args, **kwargs):
+            kwargs["factory"] = _NoFtsConnection
+            return real_connect(*args, **kwargs)
+
+        monkeypatch.setattr("hermes_state.sqlite3.connect", connect_without_fts)
+
+        db = SessionDB(db_path=tmp_path / "state.db")
+        try:
+            assert db._fts_enabled is False
+            # Neither FTS5 virtual table should have been created on a build
+            # that lacks the fts5 module — both init paths must degrade.
+            assert db._fts_table_exists("messages_fts") is False
+            assert db._fts_table_exists("messages_fts_trigram") is False
+
+            db.create_session(session_id="s1", source="cli")
+            db.append_message("s1", role="user", content="hello from sqlite without fts")
+
+            messages = db.get_messages("s1")
+            assert len(messages) == 1
+            assert messages[0]["content"] == "hello from sqlite without fts"
+            assert db.search_messages("hello") == []
+        finally:
+            db.close()
+
 
 # =========================================================================
 # Message storage
diff --git a/tests/test_honcho_client_config.py b/tests/test_honcho_client_config.py
index d4c62d610e9..f7b1efa151c 100644
--- a/tests/test_honcho_client_config.py
+++ b/tests/test_honcho_client_config.py
@@ -2,9 +2,13 @@
 
 import json
 import os
+import stat
+from pathlib import Path
 
+import pytest
 
 from plugins.memory.honcho.client import HonchoClientConfig
+from plugins.memory.honcho import HonchoMemoryProvider
 
 
 class TestHonchoClientConfigAutoEnable:
@@ -100,3 +104,24 @@ class TestHonchoClientConfigAutoEnable:
 
         assert cfg.api_key == "fallback-key"
         assert cfg.enabled is True  # from_env() sets enabled=True
+
+
+@pytest.mark.skipif(os.name == "nt", reason="POSIX mode bits not enforced on Windows")
+def test_save_config_sets_owner_only_permissions(tmp_path, monkeypatch):
+    """honcho.json is created atomically with 0o600, not chmod-after-write."""
+    import utils
+    calls = []
+    real_atomic = utils.atomic_json_write
+
+    def spy(path, data, **kwargs):
+        calls.append(kwargs.get("mode"))
+        return real_atomic(path, data, **kwargs)
+
+    monkeypatch.setattr(utils, "atomic_json_write", spy)
+    provider = HonchoMemoryProvider()
+    provider.save_config({"api_key": "hc-test-key"}, str(tmp_path))
+    assert calls == [0o600]
+    config_file = tmp_path / "honcho.json"
+    assert config_file.exists()
+    mode = stat.S_IMODE(config_file.stat().st_mode)
+    assert mode == 0o600, f"Expected 0o600 (owner-only), got {oct(mode)}"
diff --git a/tests/test_packaging_metadata.py b/tests/test_packaging_metadata.py
index d72c0224a69..fadb022f31f 100644
--- a/tests/test_packaging_metadata.py
+++ b/tests/test_packaging_metadata.py
@@ -115,3 +115,88 @@ def test_bundled_plugin_manifests_ship_in_both_wheel_and_sdist():
     assert "recursive-include plugins" in manifest and "plugin.yaml" in manifest, (
         "MANIFEST.in must recursive-include plugins plugin.yaml/plugin.yml (sdist)"
     )
+
+
+# Minimum non-vulnerable Starlette: CVE-2026-48710 ("BadHost") was fixed in
+# 1.0.1. Anything below that lets a malformed Host header desync
+# ``request.url.path`` from the dispatched ASGI path, bypassing path-based
+# authz in middleware/endpoints that gate on ``request.url``. Starlette is a
+# transitive dep (fastapi in [web]; sse-starlette/mcp in [mcp]/[computer-use]/
+# [dev]) so we pin it directly in every extra that exposes a server surface and
+# enforce the floor in both pyproject and the committed lockfile.
+_STARLETTE_CVE_FLOOR = (1, 0, 1)
+
+
+def _version_tuple(spec: str) -> tuple[int, ...]:
+    # "1.0.1" -> (1, 0, 1); tolerant of pre/post suffixes by truncating.
+    head = spec.split("+", 1)[0]
+    parts = []
+    for chunk in head.split("."):
+        digits = "".join(ch for ch in chunk if ch.isdigit())
+        if not digits:
+            break
+        parts.append(int(digits))
+    return tuple(parts)
+
+
+def test_starlette_pinned_above_cve_2026_48710_floor_in_pyproject():
+    """Every extra that declares Starlette must pin a patched (>=1.0.1) version.
+
+    Regression guard for #35067 / CVE-2026-48710. A future edit that drops the
+    pin (re-exposing the unbounded transitive ``starlette>=0.27`` from mcp /
+    ``>=0.40.0`` from fastapi) or pins a pre-1.0.1 version fails here instead of
+    shipping a Host-header auth-bypass to dashboard / MCP-HTTP users.
+    """
+    data = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8"))
+    extras = data["project"]["optional-dependencies"]
+
+    found = {}
+    for extra, specs in extras.items():
+        for spec in specs:
+            name = spec.split("==", 1)[0].split(">", 1)[0].split("<", 1)[0].split("[", 1)[0].strip()
+            if name.lower() == "starlette":
+                assert "==" in spec, f"[{extra}] must exact-pin starlette, got {spec!r}"
+                ver = spec.split("==", 1)[1].split(";", 1)[0].strip()
+                found[extra] = ver
+
+    # The four server-surface extras must each carry the direct pin.
+    for extra in ("web", "mcp", "computer-use", "dev"):
+        assert extra in found, (
+            f"[{extra}] no longer pins starlette directly — CVE-2026-48710 "
+            f"regression risk (mcp/fastapi pull it transitively with no upper bound)"
+        )
+
+    for extra, ver in found.items():
+        assert _version_tuple(ver) >= _STARLETTE_CVE_FLOOR, (
+            f"[{extra}] pins starlette=={ver}, below the CVE-2026-48710 fix "
+            f"floor {'.'.join(map(str, _STARLETTE_CVE_FLOOR))}"
+        )
+
+
+def test_locked_starlette_is_not_vulnerable_to_cve_2026_48710():
+    """The committed uv.lock must resolve starlette to a patched version.
+
+    pyproject pins protect the declared extras, but the lockfile is what
+    hash-verified installs (``uv sync --locked``) actually pull. Assert the
+    resolved version is >= the CVE-2026-48710 fix floor so a stale-lock
+    regression can't ship a vulnerable Starlette to users.
+    """
+    lock = (REPO_ROOT / "uv.lock").read_text(encoding="utf-8")
+    versions = []
+    in_starlette = False
+    for line in lock.splitlines():
+        if line.startswith("[[package]]"):
+            in_starlette = False
+        elif line.strip() == 'name = "starlette"':
+            in_starlette = True
+        elif in_starlette and line.startswith("version = "):
+            versions.append(line.split("=", 1)[1].strip().strip('"'))
+            in_starlette = False
+
+    assert versions, "starlette not found in uv.lock"
+    for ver in versions:
+        assert _version_tuple(ver) >= _STARLETTE_CVE_FLOOR, (
+            f"uv.lock resolves starlette=={ver}, below the CVE-2026-48710 fix "
+            f"floor {'.'.join(map(str, _STARLETTE_CVE_FLOOR))} — regenerate the "
+            f"lockfile after bumping the pin"
+        )
diff --git a/tests/tools/test_browser_eval_supervisor_path.py b/tests/tools/test_browser_eval_supervisor_path.py
index 09a3bcbcaef..d23312eb747 100644
--- a/tests/tools/test_browser_eval_supervisor_path.py
+++ b/tests/tools/test_browser_eval_supervisor_path.py
@@ -189,6 +189,32 @@ class TestBrowserEvalSupervisorPath:
         json.loads(bt._browser_eval("1+1"))
         assert called["subprocess"] is True
 
+    def test_subprocess_reference_chain_error_becomes_guidance(self, monkeypatch):
+        """The CLI subprocess can't retry with returnByValue=False, so the
+        cryptic 'Object reference chain is too long' CDP error must be turned
+        into actionable guidance instead of surfaced raw."""
+        import tools.browser_tool as bt
+
+        # No supervisor → subprocess path runs.
+        _patch_supervisor(monkeypatch, None)
+
+        def _fake_subprocess(task_id, cmd, args):
+            assert cmd == "eval"
+            return {
+                "success": False,
+                "error": "Runtime.evaluate failed: Object reference chain is too long",
+            }
+
+        monkeypatch.setattr(bt, "_run_browser_command", _fake_subprocess)
+
+        out = json.loads(bt._browser_eval("document.body"))
+        assert out["success"] is False
+        # Raw protocol error must NOT leak through.
+        assert "reference chain" not in out["error"].lower()
+        # Actionable guidance instead.
+        assert "primitive" in out["error"].lower()
+        assert "DOM node" in out["error"] or "dom node" in out["error"].lower()
+
 
 # ---------------------------------------------------------------------------
 # Response shaping: CDPSupervisor.evaluate_runtime
@@ -361,3 +387,91 @@ class TestEvaluateRuntimeResponseShaping:
         finally:
             loop.call_soon_threadsafe(loop.stop)
             thread.join(timeout=2)
+
+
+def _make_supervisor_with_cdp_fn(cdp_fn):
+    """Like ``_make_supervisor_with_cdp`` but lets the test supply a coroutine
+    function as ``_cdp`` so behaviour can vary by params (e.g. returnByValue).
+    """
+    import asyncio
+    import threading
+
+    from tools.browser_supervisor import CDPSupervisor
+
+    sup = object.__new__(CDPSupervisor)
+    sup._state_lock = threading.Lock()
+    sup._active = True
+    sup._page_session_id = "test-session-id"
+
+    loop = asyncio.new_event_loop()
+
+    def _runner():
+        asyncio.set_event_loop(loop)
+        loop.run_forever()
+
+    thread = threading.Thread(target=_runner, daemon=True)
+    thread.start()
+
+    sup._cdp = cdp_fn  # type: ignore[method-assign]
+    sup._loop = loop
+    sup._thread = thread
+    return sup
+
+
+class TestEvaluateRuntimeDomNodeCrashRetry:
+    """returnByValue=True on a DOM node fails CDP serialization with 'Object
+    reference chain is too long'.  evaluate_runtime must retry with
+    returnByValue=False and return the node's description instead of crashing.
+    """
+
+    def test_reference_chain_crash_retries_without_by_value(self):
+        calls = []
+
+        async def _fake_cdp(method, params=None, *, session_id=None, timeout=10.0):
+            by_value = (params or {}).get("returnByValue")
+            calls.append(by_value)
+            if by_value:
+                # Mirror _read_loop turning a top-level CDP error into a RuntimeError.
+                raise RuntimeError(
+                    "CDP error on id=7: {'code': -32000, "
+                    "'message': 'Object reference chain is too long'}"
+                )
+            # returnByValue=False: Chrome returns the node's description, no value.
+            return {
+                "id": 8,
+                "result": {
+                    "result": {
+                        "type": "object",
+                        "subtype": "node",
+                        "description": "body",
+                    }
+                },
+            }
+
+        sup = _make_supervisor_with_cdp_fn(_fake_cdp)
+        try:
+            out = sup.evaluate_runtime("document.body")
+            assert out["ok"] is True
+            assert out["result"] == "body"
+            assert out["result_type"] == "object"
+            # First call by_value=True (crashed), retried with by_value=False.
+            assert calls == [True, False]
+        finally:
+            _stop_supervisor(sup)
+
+    def test_unrelated_error_does_not_retry(self):
+        calls = []
+
+        async def _fake_cdp(method, params=None, *, session_id=None, timeout=10.0):
+            calls.append((params or {}).get("returnByValue"))
+            raise RuntimeError("CDP error on id=3: {'message': 'Target closed'}")
+
+        sup = _make_supervisor_with_cdp_fn(_fake_cdp)
+        try:
+            out = sup.evaluate_runtime("document.body")
+            assert out["ok"] is False
+            assert "Target closed" in out["error"]
+            # No retry for unrelated failures — exactly one call.
+            assert calls == [True]
+        finally:
+            _stop_supervisor(sup)
diff --git a/tests/tools/test_file_operations.py b/tests/tools/test_file_operations.py
index f809ea5d912..225b005cfe8 100644
--- a/tests/tools/test_file_operations.py
+++ b/tests/tools/test_file_operations.py
@@ -345,15 +345,23 @@ class TestShellFileOpsHelpers:
     def test_add_line_numbers(self, file_ops):
         content = "line one\nline two\nline three"
         result = file_ops._add_line_numbers(content)
-        assert "     1|line one" in result
-        assert "     2|line two" in result
-        assert "     3|line three" in result
+        # Compact gutter: "<n>|content" (no fixed-width padding).
+        assert "1|line one" in result
+        assert "2|line two" in result
+        assert "3|line three" in result
 
     def test_add_line_numbers_with_offset(self, file_ops):
         content = "continued\nmore"
         result = file_ops._add_line_numbers(content, start_line=50)
-        assert "    50|continued" in result
-        assert "    51|more" in result
+        assert "50|continued" in result
+        assert "51|more" in result
+
+    def test_add_line_numbers_padded_env_override(self, file_ops, monkeypatch):
+        # Legacy fixed-width format available via HERMES_READ_GUTTER=padded.
+        monkeypatch.setenv("HERMES_READ_GUTTER", "padded")
+        result = file_ops._add_line_numbers("line one\nline two")
+        assert "     1|line one" in result
+        assert "     2|line two" in result
 
     def test_add_line_numbers_truncates_long_lines(self, file_ops):
         long_line = "x" * (MAX_LINE_LENGTH + 100)
@@ -405,7 +413,7 @@ class TestShellFileOpsHelpers:
         assert "HERMES_FENCE" not in result.content
         assert "\x1b]" not in result.content
         assert "\x07" not in result.content
-        assert "     1|print('ok')" in result.content
+        assert "1|print('ok')" in result.content
 
     def test_read_file_raw_strips_leaked_terminal_fence_markers(self, mock_env):
         leaked = (
@@ -638,12 +646,14 @@ class TestPatchReplacePostWriteVerification:
         state = {"content": "hello world\n"}
 
         def side_effect(command, stdin_data=None, **kwargs):
-            # Write is `cat > path` — detect by the `>` redirect, NOT just `cat `
-            if command.startswith("cat >"):
-                if stdin_data is not None:
-                    state["content"] = stdin_data
+            # A write is the only call that pipes content over stdin — key
+            # on that behavioral signal rather than the exact write command,
+            # which is an atomic temp-file + mv script (`set -e; ... mv ...`),
+            # not a bare `cat > path`.
+            if stdin_data is not None:
+                state["content"] = stdin_data
                 return {"output": "", "returncode": 0}
-            if command.startswith("cat "):  # read
+            if command.startswith("cat "):  # read / verify
                 return {"output": state["content"], "returncode": 0}
             if command.startswith("mkdir "):
                 return {"output": "", "returncode": 0}
@@ -664,9 +674,8 @@ class TestPatchReplacePostWriteVerification:
         state = {"content": "hello world\n"}
 
         def side_effect(command, stdin_data=None, **kwargs):
-            if command.startswith("cat >"):  # write
-                if stdin_data is not None:
-                    state["content"] = stdin_data
+            if stdin_data is not None:  # write (atomic temp-file + mv script)
+                state["content"] = stdin_data
                 return {"output": "", "returncode": 0}
             if command.startswith("cat "):  # read
                 call_count["cat"] += 1
diff --git a/tests/tools/test_file_operations_edge_cases.py b/tests/tools/test_file_operations_edge_cases.py
index bad72f4b6d4..0e275d5a4a9 100644
--- a/tests/tools/test_file_operations_edge_cases.py
+++ b/tests/tools/test_file_operations_edge_cases.py
@@ -292,7 +292,7 @@ class TestPaginationBounds:
             result = ops.read_file("notes.txt", offset=0, limit=0)
 
         assert result.error is None
-        assert "     1|line1" in result.content
+        assert "1|line1" in result.content
         sed_commands = [cmd for cmd in commands if cmd.startswith("sed -n")]
         assert sed_commands == ["sed -n '1,1p' 'notes.txt'"]
 
diff --git a/tests/tools/test_file_tools_cwd_resolution.py b/tests/tools/test_file_tools_cwd_resolution.py
new file mode 100644
index 00000000000..6bb7c1bf37f
--- /dev/null
+++ b/tests/tools/test_file_tools_cwd_resolution.py
@@ -0,0 +1,197 @@
+"""Regression tests for file-tool path resolution base correctness.
+
+The bug (observed in a worktree dev session, May 2026): when the resolution
+base for a relative path is itself RELATIVE — e.g. ``TERMINAL_CWD="."`` from a
+stale config — ``_resolve_path_for_task`` resolved the path against the agent's
+PROCESS cwd instead of the intended workspace. In a git-worktree session this
+silently routed ``patch``/``write_file`` edits into the *main* checkout: the
+write landed, self-verified, and reported success — against the wrong file.
+The agent then grepped the worktree, saw nothing, and concluded the patch tool
+had silently no-op'd. It hadn't; it wrote to the wrong place.
+
+Core invariant these tests pin:
+  The resolution base for a relative path MUST always be absolute. A relative
+  ``TERMINAL_CWD`` (``.``, ``./sub``, ``..``) must be anchored deterministically,
+  never left to resolve against whatever the process cwd happens to be.
+"""
+
+import os
+from pathlib import Path
+
+import pytest
+
+import tools.file_tools as ft
+
+
+@pytest.fixture
+def _isolated_cwd(tmp_path, monkeypatch):
+    """Two checkouts: workspace (intended) + decoy (process cwd)."""
+    workspace = tmp_path / "workspace"
+    decoy = tmp_path / "decoy"
+    workspace.mkdir()
+    decoy.mkdir()
+    (workspace / "target.py").write_text("WORKSPACE_ORIGINAL\n")
+    (decoy / "target.py").write_text("DECOY_ORIGINAL\n")
+    # Process cwd = decoy, analogous to "main repo" while the terminal is in
+    # the worktree.
+    monkeypatch.chdir(decoy)
+    # No live-terminal-cwd tracking recorded yet (fresh-session condition).
+    monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": None)
+    return workspace, decoy
+
+
+def test_relative_terminal_cwd_anchors_to_absolute_not_process_cwd(_isolated_cwd, monkeypatch):
+    """TERMINAL_CWD='.' must NOT silently mean 'the agent process cwd'.
+
+    A relative base is meaningless as a resolution anchor. The resolver must
+    make it absolute deterministically. We assert the resolved path is
+    absolute and stable regardless of where os.getcwd() points.
+    """
+    workspace, decoy = _isolated_cwd
+    # Poison config: literal relative '.'
+    monkeypatch.setenv("TERMINAL_CWD", ".")
+
+    resolved = ft._resolve_path_for_task("target.py", task_id="default")
+
+    assert resolved.is_absolute(), f"resolution base leaked a relative path: {resolved}"
+    # The exact anchor for a bare '.' is the process cwd resolved to absolute —
+    # that is acceptable as long as it is ABSOLUTE and stable. The bug was that
+    # a relative base produced surprising results; the fix is that the base is
+    # always absolutised. (We do not require it to point at the workspace here —
+    # that's what live-cwd tracking is for; see the next test.)
+    assert str(resolved) == str((Path(os.getcwd()) / "target.py").resolve())
+
+
+def test_live_tracking_cwd_wins_over_relative_terminal_cwd(_isolated_cwd, monkeypatch):
+    """When the terminal reports its absolute cwd, that is authoritative.
+
+    This is the real-world fix: the terminal's tracked absolute cwd (the
+    worktree) must override a stale relative TERMINAL_CWD so edits land where
+    the agent is actually working.
+    """
+    workspace, decoy = _isolated_cwd
+    monkeypatch.setenv("TERMINAL_CWD", ".")
+    monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace))
+
+    resolved = ft._resolve_path_for_task("target.py", task_id="default")
+
+    assert resolved == (workspace / "target.py")
+
+
+def test_absolute_terminal_cwd_used_verbatim(_isolated_cwd, monkeypatch):
+    """An absolute TERMINAL_CWD is the resolution base (no live tracking)."""
+    workspace, decoy = _isolated_cwd
+    monkeypatch.setenv("TERMINAL_CWD", str(workspace))
+
+    resolved = ft._resolve_path_for_task("target.py", task_id="default")
+
+    assert resolved == (workspace / "target.py")
+
+
+def test_absolute_input_path_ignores_base(_isolated_cwd, monkeypatch):
+    """An absolute input path is never re-anchored."""
+    workspace, decoy = _isolated_cwd
+    monkeypatch.setenv("TERMINAL_CWD", ".")
+    abs_target = str(workspace / "target.py")
+
+    resolved = ft._resolve_path_for_task(abs_target, task_id="default")
+
+    assert resolved == Path(abs_target).resolve()
+
+
+def test_resolution_base_always_absolute_no_terminal_cwd(_isolated_cwd, monkeypatch):
+    """With TERMINAL_CWD unset, the base falls back to an ABSOLUTE process cwd."""
+    workspace, decoy = _isolated_cwd
+    monkeypatch.delenv("TERMINAL_CWD", raising=False)
+
+    resolved = ft._resolve_path_for_task("target.py", task_id="default")
+
+    assert resolved.is_absolute()
+    assert str(resolved) == str((Path(os.getcwd()) / "target.py").resolve())
+
+
+# ── B-(ii): workspace-divergence warning ────────────────────────────────────
+
+
+def test_warning_fires_when_relative_path_escapes_workspace(_isolated_cwd, monkeypatch):
+    """Relative path resolving outside the live workspace must warn."""
+    workspace, decoy = _isolated_cwd
+    # Live cwd = workspace, but the relative path resolves to decoy (process cwd)
+    # because TERMINAL_CWD is the poison '.'.  Simulate by pointing live tracking
+    # at workspace while the resolved path is under decoy.
+    monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace))
+    resolved_in_decoy = decoy / "target.py"
+
+    warn = ft._path_resolution_warning("target.py", resolved_in_decoy, task_id="default")
+
+    assert warn is not None
+    assert "OUTSIDE the active workspace" in warn
+    assert str(decoy) in warn
+    assert str(workspace) in warn
+
+
+def test_no_warning_when_relative_path_inside_workspace(_isolated_cwd, monkeypatch):
+    workspace, decoy = _isolated_cwd
+    monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace))
+    resolved_in_workspace = workspace / "target.py"
+
+    warn = ft._path_resolution_warning("target.py", resolved_in_workspace, task_id="default")
+
+    assert warn is None
+
+
+def test_no_warning_for_absolute_input(_isolated_cwd, monkeypatch):
+    workspace, decoy = _isolated_cwd
+    monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace))
+
+    warn = ft._path_resolution_warning(str(decoy / "target.py"), decoy / "target.py", task_id="default")
+
+    assert warn is None
+
+
+def test_no_warning_when_no_live_cwd(_isolated_cwd, monkeypatch):
+    workspace, decoy = _isolated_cwd
+    monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": None)
+
+    warn = ft._path_resolution_warning("target.py", decoy / "target.py", task_id="default")
+
+    assert warn is None
+
+
+# ── Fix A: write_file / patch report the resolved ABSOLUTE path ──────────────
+
+
+def test_write_file_reports_resolved_absolute_path(_isolated_cwd, monkeypatch):
+    """write_file_tool must put the absolute on-disk path in files_modified."""
+    workspace, decoy = _isolated_cwd
+    monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace))
+
+    import json
+    out = json.loads(ft.write_file_tool("newfile.txt", "hello\n", task_id="t1"))
+
+    expected = str((workspace / "newfile.txt").resolve())
+    assert out.get("resolved_path") == expected
+    assert out.get("files_modified") == [expected]
+    assert (workspace / "newfile.txt").read_text() == "hello\n"
+
+
+def test_patch_reports_resolved_absolute_path(_isolated_cwd, monkeypatch):
+    """patch_tool (replace mode) must put the absolute on-disk path in files_modified."""
+    workspace, decoy = _isolated_cwd
+    monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace))
+
+    import json
+    out = json.loads(ft.patch_tool(
+        mode="replace", path="target.py",
+        old_string="WORKSPACE_ORIGINAL", new_string="WORKSPACE_PATCHED",
+        task_id="t1",
+    ))
+
+    expected = str((workspace / "target.py").resolve())
+    assert not out.get("error"), out
+    assert out.get("resolved_path") == expected
+    assert out.get("files_modified") == [expected]
+    assert "WORKSPACE_PATCHED" in (workspace / "target.py").read_text()
+    # And the decoy copy is untouched.
+    assert (decoy / "target.py").read_text() == "DECOY_ORIGINAL\n"
+
diff --git a/tests/tools/test_file_write_safety.py b/tests/tools/test_file_write_safety.py
index e2eef17ab1d..ac44dd1bc6b 100644
--- a/tests/tools/test_file_write_safety.py
+++ b/tests/tools/test_file_write_safety.py
@@ -107,5 +107,177 @@ class TestCheckSensitivePathMacOSBypass:
         assert _check_sensitive_path("/tmp/safe_file.txt") is None
 
 
+class TestAtomicWrite:
+    """write_file / patch land via a temp-file + atomic rename.
+
+    The invariant: a write that fails partway NEVER corrupts the existing
+    file, and the swap is a real rename (so a reader either sees the full
+    old content or the full new content, never a half-written file). These
+    run against a real LocalEnvironment so the actual shell script executes.
+    """
+
+    @pytest.fixture
+    def ops(self, tmp_path: Path):
+        from tools.environments.local import LocalEnvironment
+        from tools.file_operations import ShellFileOperations
+        env = LocalEnvironment(cwd=str(tmp_path))
+        return ShellFileOperations(env, cwd=str(tmp_path))
+
+    def test_overwrite_changes_inode(self, ops, tmp_path: Path):
+        # A real rename allocates a new inode for the target; an in-place
+        # rewrite would keep the same inode. This proves the swap is atomic.
+        target = tmp_path / "f.txt"
+        target.write_text("v1")
+        ino_before = os.stat(target).st_ino
+        res = ops.write_file(str(target), "v2 content")
+        assert res.error is None, res.error
+        assert target.read_text() == "v2 content"
+        assert os.stat(target).st_ino != ino_before
+
+    def test_overwrite_preserves_mode(self, ops, tmp_path: Path):
+        target = tmp_path / "perms.txt"
+        target.write_text("old")
+        os.chmod(target, 0o640)
+        res = ops.write_file(str(target), "new")
+        assert res.error is None, res.error
+        assert (os.stat(target).st_mode & 0o777) == 0o640
+
+    def test_failed_write_leaves_original_intact(self, ops, tmp_path: Path):
+        # A read-only parent directory means the temp file can't be created,
+        # so the write fails BEFORE any rename. The original must survive
+        # byte-for-byte and no temp file may be left behind.
+        if hasattr(os, "geteuid") and os.geteuid() == 0:
+            pytest.skip("root bypasses directory permission bits")
+        locked = tmp_path / "locked"
+        locked.mkdir()
+        target = locked / "f.txt"
+        target.write_text("ORIGINAL\n")
+        os.chmod(locked, 0o500)  # r-x: cannot create entries inside
+        try:
+            res = ops.write_file(str(target), "SHOULD NOT LAND")
+        finally:
+            os.chmod(locked, 0o700)  # restore for cleanup
+        assert res.error is not None
+        assert target.read_text() == "ORIGINAL\n"
+        assert [p for p in os.listdir(locked) if ".hermes-tmp" in p] == []
+
+    def test_no_temp_file_leaked_on_success(self, ops, tmp_path: Path):
+        target = tmp_path / "f.txt"
+        ops.write_file(str(target), "hello\n")
+        assert [p for p in os.listdir(tmp_path) if ".hermes-tmp" in p] == []
+
+    def test_special_chars_roundtrip(self, ops, tmp_path: Path):
+        target = tmp_path / "special.txt"
+        tricky = "q 'single' \"double\" $VAR `cmd` \\back\nünïcödé 日本語\n"
+        res = ops.write_file(str(target), tricky)
+        assert res.error is None, res.error
+        assert target.read_text(encoding="utf-8") == tricky
+
+    def test_patch_routes_through_atomic_write(self, ops, tmp_path: Path):
+        target = tmp_path / "edit.py"
+        target.write_text("a = 1\nb = 2\nc = 3\n")
+        os.chmod(target, 0o600)
+        res = ops.patch_replace(str(target), "b = 2", "b = 22")
+        assert res.success, res.error
+        assert target.read_text() == "a = 1\nb = 22\nc = 3\n"
+        assert (os.stat(target).st_mode & 0o777) == 0o600
+
+
+class TestBomHandling:
+    """UTF-8 BOM is stripped on read and preserved across write/patch.
+
+    A BOM (U+FEFF, bytes EF BB BF) is an invisible leading marker some
+    Windows editors prepend. The agent should never see it in read output,
+    but a file that had one on disk must keep it after an edit so the byte
+    signature is preserved.
+    """
+
+    BOM = "\ufeff"
+
+    @pytest.fixture
+    def ops(self, tmp_path: Path):
+        from tools.environments.local import LocalEnvironment
+        from tools.file_operations import ShellFileOperations
+        env = LocalEnvironment(cwd=str(tmp_path))
+        return ShellFileOperations(env, cwd=str(tmp_path))
+
+    def test_helpers(self):
+        from tools.file_operations import _strip_bom, _has_bom
+        assert _strip_bom("\ufeffhello") == ("hello", True)
+        assert _strip_bom("hello") == ("hello", False)
+        assert _strip_bom("") == ("", False)
+        # mid-string BOM is data, not a marker — left alone
+        assert _strip_bom("a\ufeffb") == ("a\ufeffb", False)
+        assert _has_bom("\ufeffx") is True
+        assert _has_bom("x") is False
+        assert _has_bom(None) is False
+
+    def test_read_strips_bom(self, ops, tmp_path: Path):
+        target = tmp_path / "bom.py"
+        # Write raw bytes with a real UTF-8 BOM prefix.
+        target.write_bytes(self.BOM.encode("utf-8") + b"import os\nx = 1\n")
+        res = ops.read_file(str(target))
+        assert res.error is None, res.error
+        # Line 1 content must NOT carry the phantom U+FEFF.
+        first_line = res.content.split("\n", 1)[0]
+        assert self.BOM not in first_line
+        assert first_line.endswith("import os")
+
+    def test_read_raw_strips_bom(self, ops, tmp_path: Path):
+        target = tmp_path / "bom.txt"
+        target.write_bytes(self.BOM.encode("utf-8") + b"hello\nworld\n")
+        res = ops.read_file_raw(str(target))
+        assert res.error is None, res.error
+        assert not res.content.startswith(self.BOM)
+        assert res.content == "hello\nworld\n"
+
+    def test_write_preserves_bom(self, ops, tmp_path: Path):
+        # Existing file has a BOM; agent rewrites with BOM-less content.
+        target = tmp_path / "config.txt"
+        target.write_bytes(self.BOM.encode("utf-8") + b"old\n")
+        res = ops.write_file(str(target), "new content\n")
+        assert res.error is None, res.error
+        raw = target.read_bytes()
+        assert raw.startswith(self.BOM.encode("utf-8"))  # BOM restored
+        assert raw == self.BOM.encode("utf-8") + b"new content\n"
+
+    def test_write_no_bom_when_original_had_none(self, ops, tmp_path: Path):
+        target = tmp_path / "plain.txt"
+        target.write_text("old\n")
+        res = ops.write_file(str(target), "new\n")
+        assert res.error is None, res.error
+        assert not target.read_bytes().startswith(self.BOM.encode("utf-8"))
+
+    def test_write_does_not_double_bom(self, ops, tmp_path: Path):
+        # If content already carries a BOM and the file had one, don't add a
+        # second.
+        target = tmp_path / "config.txt"
+        target.write_bytes(self.BOM.encode("utf-8") + b"old\n")
+        res = ops.write_file(str(target), self.BOM + "new\n")
+        assert res.error is None, res.error
+        raw = target.read_bytes()
+        # exactly one BOM
+        assert raw == self.BOM.encode("utf-8") + b"new\n"
+
+    def test_patch_roundtrip_preserves_bom(self, ops, tmp_path: Path):
+        target = tmp_path / "edit.py"
+        target.write_bytes(self.BOM.encode("utf-8") + b"a = 1\nb = 2\nc = 3\n")
+        res = ops.patch_replace(str(target), "b = 2", "b = 22")
+        assert res.success, res.error
+        raw = target.read_bytes()
+        assert raw.startswith(self.BOM.encode("utf-8"))  # marker survived
+        assert raw == self.BOM.encode("utf-8") + b"a = 1\nb = 22\nc = 3\n"
+
+    def test_patch_matches_first_line_through_bom(self, ops, tmp_path: Path):
+        # The whole point: an edit targeting the BOM-prefixed first line
+        # must match cleanly (the matcher sees BOM-stripped content).
+        target = tmp_path / "mod.py"
+        target.write_bytes(self.BOM.encode("utf-8") + b"import os\nimport sys\n")
+        res = ops.patch_replace(str(target), "import os", "import os, json")
+        assert res.success, res.error
+        raw = target.read_bytes()
+        assert raw == self.BOM.encode("utf-8") + b"import os, json\nimport sys\n"
+
+
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])
diff --git a/tests/tools/test_interrupt.py b/tests/tools/test_interrupt.py
index 27c61023147..5d614f62bc5 100644
--- a/tests/tools/test_interrupt.py
+++ b/tests/tools/test_interrupt.py
@@ -203,6 +203,83 @@ class TestSIGKILLEscalation:
         assert "interrupted" in result_holder["value"]["output"].lower()
 
 
+# ---------------------------------------------------------------------------
+# Regression: _run_tool cleanup on BaseException (issue #35309)
+# ---------------------------------------------------------------------------
+
+class TestRunToolCleanupOnBaseException:
+    """Verify that _run_tool cleans up _interrupted_threads even when
+    _invoke_tool raises a BaseException (e.g. CancelledError).
+
+    Regression test for #35309: without the finally block, a BaseException
+    bypasses ``except Exception``, leaking the worker tid into
+    _interrupted_threads.  ThreadPoolExecutor recycles tids, so the next
+    tool scheduled on the same thread is instantly "interrupted".
+    """
+
+    def test_cleanup_on_base_exception(self):
+        from unittest.mock import MagicMock, patch
+        import types
+        from tools.interrupt import set_interrupt, is_interrupted, _interrupted_threads, _lock
+
+        # Clear global state
+        with _lock:
+            _interrupted_threads.clear()
+
+        # Build a minimal mock agent with the attributes _run_tool needs
+        agent = MagicMock()
+        agent._interrupt_requested = False
+        agent._tool_worker_threads = set()
+        agent._tool_worker_threads_lock = threading.Lock()
+
+        # _set_interrupt delegates to the real module
+        def _mock_set_interrupt(active, tid=None):
+            set_interrupt(active, tid)
+        agent._set_interrupt = _mock_set_interrupt
+
+        # _invoke_tool raises BaseException (simulating CancelledError)
+        agent._invoke_tool = MagicMock(side_effect=BaseException("simulated CancelledError"))
+
+        # Bind the real concurrent method so we get _run_tool
+        from run_agent import AIAgent
+        agent._execute_tool_calls_concurrent = types.MethodType(
+            AIAgent._execute_tool_calls_concurrent, agent
+        )
+
+        # Build a single tool call
+        tc = MagicMock()
+        tc.id = "tc_base_exc"
+        tc.function.name = "dummy_tool"
+        tc.function.arguments = "{}"
+
+        assistant_msg = MagicMock()
+        assistant_msg.tool_calls = [tc]
+
+        # _execute_tool_calls_concurrent will submit _run_tool to a
+        # ThreadPoolExecutor.  The BaseException propagates out of the
+        # worker, but the finally block should still clean up.
+        try:
+            agent._execute_tool_calls_concurrent(assistant_msg, [], "default")
+        except Exception:
+            pass  # ThreadPoolExecutor may re-raise
+
+        # After the worker finishes (even with BaseException), the worker
+        # tid should have been removed from _interrupted_threads and
+        # _tool_worker_threads.
+        assert len(agent._tool_worker_threads) == 0, (
+            f"_tool_worker_threads not cleaned up: {agent._tool_worker_threads}"
+        )
+
+        # Verify no stale tid is left in the global interrupt set.  The
+        # worker thread is recycled by ThreadPoolExecutor, so a leaked tid
+        # would poison the next task on that thread.  We cleared the set at
+        # the start and never set any interrupt ourselves, so a leak from
+        # _run_tool is the only way an entry could land here.
+        with _lock:
+            leaked = set(_interrupted_threads)
+        assert leaked == set(), f"leaked tids in _interrupted_threads: {leaked}"
+
+
 # ---------------------------------------------------------------------------
 # Manual smoke test checklist (not automated)
 # ---------------------------------------------------------------------------
diff --git a/tests/tools/test_managed_browserbase_and_modal.py b/tests/tools/test_managed_browserbase_and_modal.py
index fc2559dc756..f705380991c 100644
--- a/tests/tools/test_managed_browserbase_and_modal.py
+++ b/tests/tools/test_managed_browserbase_and_modal.py
@@ -234,6 +234,44 @@ def test_browserbase_does_not_use_gateway_only_configuration():
     assert provider.is_available() is False
 
 
+def test_browser_use_availability_skips_refresh_for_expired_cached_gateway_token(tmp_path, monkeypatch):
+    _install_fake_tools_package()
+    monkeypatch.delenv("TOOL_GATEWAY_USER_TOKEN", raising=False)
+    expired_at = "2000-01-01T00:00:00+00:00"
+    (tmp_path / "auth.json").write_text(
+        '{"providers":{"nous":{"access_token":"expired-token","refresh_token":"refresh-token","expires_at":"%s"}}}'
+        % expired_at,
+        encoding="utf-8",
+    )
+    refresh_calls = []
+
+    def _record_refresh(*, refresh_skew_seconds=120, **_kwargs):
+        refresh_calls.append(refresh_skew_seconds)
+        return "fresh-token"
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.resolve_nous_access_token",
+        _record_refresh,
+    )
+
+    env = os.environ.copy()
+    env.pop("BROWSER_USE_API_KEY", None)
+    env.update({
+        "HERMES_HOME": str(tmp_path),
+        "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
+    })
+
+    with patch.dict(os.environ, env, clear=True):
+        browser_use_module = _load_plugin_module(
+            "plugins.browser.browser_use.provider",
+            "browser/browser_use/provider.py",
+        )
+        provider = browser_use_module.BrowserUseBrowserProvider()
+        assert provider.is_available() is True
+
+    assert refresh_calls == []
+
+
 def test_browser_use_managed_gateway_adds_idempotency_key_and_persists_external_call_id():
     _install_fake_tools_package()
     env = os.environ.copy()
diff --git a/tests/tools/test_managed_tool_gateway.py b/tests/tools/test_managed_tool_gateway.py
index a539fb57cab..2973259ba74 100644
--- a/tests/tools/test_managed_tool_gateway.py
+++ b/tests/tools/test_managed_tool_gateway.py
@@ -12,6 +12,7 @@ assert MODULE_SPEC and MODULE_SPEC.loader
 managed_tool_gateway = module_from_spec(MODULE_SPEC)
 sys.modules[MODULE_SPEC.name] = managed_tool_gateway
 MODULE_SPEC.loader.exec_module(managed_tool_gateway)
+is_managed_tool_gateway_ready = managed_tool_gateway.is_managed_tool_gateway_ready
 resolve_managed_tool_gateway = managed_tool_gateway.resolve_managed_tool_gateway
 
 
@@ -97,3 +98,37 @@ def test_read_nous_access_token_refreshes_expiring_cached_token(tmp_path, monkey
     )
 
     assert managed_tool_gateway.read_nous_access_token() == "fresh-token"
+
+
+def test_is_managed_tool_gateway_ready_skips_refresh_for_expired_cached_token(tmp_path, monkeypatch):
+    monkeypatch.delenv("TOOL_GATEWAY_USER_TOKEN", raising=False)
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    expired_at = (datetime.now(timezone.utc) - timedelta(seconds=30)).isoformat()
+    (tmp_path / "auth.json").write_text(json.dumps({
+        "providers": {
+            "nous": {
+                "access_token": "expired-token",
+                "refresh_token": "refresh-token",
+                "expires_at": expired_at,
+            }
+        }
+    }))
+    refresh_calls = []
+
+    def _record_refresh(*, refresh_skew_seconds=120, **_kwargs):
+        refresh_calls.append(refresh_skew_seconds)
+        return "fresh-token"
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.resolve_nous_access_token",
+        _record_refresh,
+    )
+
+    with patch.dict(
+        os.environ,
+        {"TOOL_GATEWAY_DOMAIN": "nousresearch.com"},
+        clear=False,
+    ), patch.object(managed_tool_gateway, "managed_nous_tools_enabled", return_value=True):
+        assert is_managed_tool_gateway_ready("modal") is True
+
+    assert refresh_calls == []
diff --git a/tests/tools/test_mcp_stability.py b/tests/tools/test_mcp_stability.py
index 1dd76959854..2c3734274d8 100644
--- a/tests/tools/test_mcp_stability.py
+++ b/tests/tools/test_mcp_stability.py
@@ -1,9 +1,12 @@
 """Tests for MCP stability fixes — event loop handler, PID tracking, shutdown robustness."""
 
 import asyncio
+import os
 import signal
 from unittest.mock import patch, MagicMock
 
+import pytest
+
 
 
 # ---------------------------------------------------------------------------
@@ -171,6 +174,221 @@ class TestStdioPidTracking:
             assert fake_pid not in _orphan_stdio_pids
 
 
+# ---------------------------------------------------------------------------
+# Fix 2b: stdio descendant reaping via process group (issue #23799)
+# ---------------------------------------------------------------------------
+#
+# When a stdio MCP wrapper (e.g. ``openclaw mcp serve``) itself spawns a
+# helper subprocess (``claude mcp serve``) and then exits, the helper
+# reparents to systemd-user and is invisible to the per-pid orphan reaper.
+# The fix captures the wrapper's pgid at spawn time and reaps via killpg,
+# which reaches same-group descendants whether or not the direct pid is alive.
+
+class TestStdioPgroupReaping:
+    """_kill_orphaned_mcp_children reaps via killpg when a pgid is tracked."""
+
+    def _reset_state(self):
+        from tools.mcp_tool import _stdio_pids, _orphan_stdio_pids, _stdio_pgids, _lock
+        with _lock:
+            _stdio_pids.clear()
+            _orphan_stdio_pids.clear()
+            _stdio_pgids.clear()
+
+    def test_killpg_used_when_pgid_tracked(self, monkeypatch):
+        """SIGTERM and SIGKILL route through killpg when pgid is known."""
+        from tools.mcp_tool import (
+            _kill_orphaned_mcp_children,
+            _orphan_stdio_pids,
+            _stdio_pgids,
+            _lock,
+        )
+
+        self._reset_state()
+        fake_pid = 525252
+        fake_pgid = 525252  # session leader: pgid == pid
+        with _lock:
+            _orphan_stdio_pids.add(fake_pid)
+            _stdio_pgids[fake_pid] = fake_pgid
+
+        fake_sigkill = 9
+        monkeypatch.setattr(signal, "SIGKILL", fake_sigkill, raising=False)
+
+        # Ensure os.killpg exists on this platform for the test to make sense;
+        # the production fallback path is covered by the per-pid tests above.
+        if not hasattr(os, "killpg"):
+            pytest.skip("os.killpg not available on this platform")
+
+        with patch("tools.mcp_tool.os.killpg") as mock_killpg, \
+             patch("tools.mcp_tool.os.kill") as mock_kill, \
+             patch("gateway.status._pid_exists", return_value=True), \
+             patch("time.sleep"):
+            _kill_orphaned_mcp_children()
+
+        # Both phases should have used killpg (pgroup reach), not per-pid kill.
+        mock_killpg.assert_any_call(fake_pgid, signal.SIGTERM)
+        mock_killpg.assert_any_call(fake_pgid, fake_sigkill)
+        assert mock_killpg.call_count == 2
+        mock_kill.assert_not_called()
+
+        with _lock:
+            assert fake_pid not in _orphan_stdio_pids
+            assert fake_pid not in _stdio_pgids
+
+    def test_killpg_failure_falls_back_to_kill(self, monkeypatch):
+        """If killpg raises ProcessLookupError (pgroup gone), try os.kill."""
+        from tools.mcp_tool import (
+            _kill_orphaned_mcp_children,
+            _orphan_stdio_pids,
+            _stdio_pgids,
+            _lock,
+        )
+
+        self._reset_state()
+        fake_pid = 636363
+        fake_pgid = 636363
+        with _lock:
+            _orphan_stdio_pids.add(fake_pid)
+            _stdio_pgids[fake_pid] = fake_pgid
+
+        if not hasattr(os, "killpg"):
+            pytest.skip("os.killpg not available on this platform")
+
+        with patch(
+            "tools.mcp_tool.os.killpg",
+            side_effect=ProcessLookupError("no such process group"),
+        ) as mock_killpg, \
+             patch("tools.mcp_tool.os.kill") as mock_kill, \
+             patch("gateway.status._pid_exists", return_value=False), \
+             patch("time.sleep"):
+            _kill_orphaned_mcp_children()
+
+        # killpg was attempted (phase 1 SIGTERM) and fell back to os.kill.
+        # Phase 3 skips because _pid_exists returns False (direct pid gone).
+        mock_killpg.assert_called()
+        mock_kill.assert_any_call(fake_pid, signal.SIGTERM)
+
+        with _lock:
+            assert fake_pid not in _orphan_stdio_pids
+            assert fake_pid not in _stdio_pgids
+
+    def test_no_pgid_uses_per_pid_kill(self, monkeypatch):
+        """When no pgid is recorded (e.g. Windows), fall back to os.kill."""
+        from tools.mcp_tool import (
+            _kill_orphaned_mcp_children,
+            _orphan_stdio_pids,
+            _stdio_pgids,
+            _lock,
+        )
+
+        self._reset_state()
+        fake_pid = 747474
+        with _lock:
+            _orphan_stdio_pids.add(fake_pid)
+            # No entry in _stdio_pgids.
+
+        with patch("tools.mcp_tool.os.kill") as mock_kill, \
+             patch("gateway.status._pid_exists", return_value=False), \
+             patch("time.sleep"):
+            # killpg may or may not exist; either way the no-pgid path skips it.
+            _kill_orphaned_mcp_children()
+
+        mock_kill.assert_any_call(fake_pid, signal.SIGTERM)
+
+        with _lock:
+            assert fake_pid not in _orphan_stdio_pids
+
+    @pytest.mark.live_system_guard_bypass
+    @pytest.mark.skipif(
+        not hasattr(os, "killpg") or not hasattr(os, "setsid"),
+        reason="POSIX-only: requires os.killpg and os.setsid",
+    )
+    def test_grandchild_reaped_via_pgroup(self, tmp_path):
+        """End-to-end: parent spawns grandchild, parent exits, killpg reaps grandchild.
+
+        Mirrors issue #23799: a stdio MCP wrapper (parent) launches a long-lived
+        helper subprocess (grandchild) in the same process group, then the
+        wrapper exits while the grandchild keeps running.  killpg on the pgid
+        captured at spawn time must still deliver the signal to the grandchild.
+
+        Marked ``live_system_guard_bypass`` because this test genuinely needs
+        real signal delivery to its own subprocess tree (the conftest guard
+        only knows the test's *initial* children; the spawned tree here is
+        outside that allowlist).
+        """
+        import subprocess
+        import sys
+        import time as _time
+
+        psutil = pytest.importorskip("psutil")
+
+        # Grandchild: sleep forever, write its pid then wait.
+        grandchild_pid_file = tmp_path / "grandchild.pid"
+        grandchild_script = tmp_path / "grandchild.py"
+        grandchild_script.write_text(
+            "import os, sys, time\n"
+            f"open({str(grandchild_pid_file)!r}, 'w').write(str(os.getpid()))\n"
+            "while True:\n"
+            "    time.sleep(0.5)\n"
+        )
+
+        # Parent: spawn grandchild, exit immediately (without killing it).
+        parent_script = tmp_path / "parent.py"
+        parent_script.write_text(
+            "import subprocess, sys\n"
+            f"subprocess.Popen([sys.executable, {str(grandchild_script)!r}])\n"
+            # Parent exits — grandchild reparents to init.
+        )
+
+        # Spawn parent in its own session (mirrors stdio_client behaviour).
+        parent = subprocess.Popen(
+            [sys.executable, str(parent_script)],
+            start_new_session=True,
+        )
+        parent_pgid = os.getpgid(parent.pid)
+        # Wait for parent to exit and grandchild to spin up.
+        parent.wait(timeout=5)
+        deadline = _time.time() + 5
+        while _time.time() < deadline and not grandchild_pid_file.exists():
+            _time.sleep(0.05)
+        assert grandchild_pid_file.exists(), "grandchild did not start"
+        grandchild_pid = int(grandchild_pid_file.read_text().strip())
+
+        # Sanity: grandchild is alive and shares the parent's pgid.
+        assert psutil.pid_exists(grandchild_pid)
+        assert os.getpgid(grandchild_pid) == parent_pgid
+
+        # Drive the reaper: register the parent pid + pgid as an orphan.
+        from tools.mcp_tool import (
+            _kill_orphaned_mcp_children,
+            _orphan_stdio_pids,
+            _stdio_pgids,
+            _stdio_pids,
+            _lock,
+        )
+        with _lock:
+            _stdio_pids.clear()
+            _orphan_stdio_pids.clear()
+            _stdio_pgids.clear()
+            _orphan_stdio_pids.add(parent.pid)
+            _stdio_pgids[parent.pid] = parent_pgid
+        try:
+            _kill_orphaned_mcp_children()
+        finally:
+            # Belt-and-suspenders: ensure grandchild is dead even if test fails.
+            try:
+                os.kill(grandchild_pid, signal.SIGKILL)
+            except ProcessLookupError:
+                pass
+
+        # Grandchild should be gone — SIGTERM via killpg in phase 1 reached it.
+        deadline = _time.time() + 3
+        while _time.time() < deadline and psutil.pid_exists(grandchild_pid):
+            _time.sleep(0.05)
+        assert not psutil.pid_exists(grandchild_pid), (
+            "grandchild survived killpg-based reaping (issue #23799 regression)"
+        )
+
+
 # ---------------------------------------------------------------------------
 # Fix 3: MCP reload timeout (cli.py)
 # ---------------------------------------------------------------------------
diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py
index 3a6ad11fdea..10a4868655b 100644
--- a/tests/tools/test_send_message_tool.py
+++ b/tests/tools/test_send_message_tool.py
@@ -1220,6 +1220,90 @@ class TestParseTargetRefSlack:
         assert _parse_target_ref("telegram", "C0B0QV5434G")[2] is False
 
 
+class TestParseTargetRefEmail:
+    """_parse_target_ref recognizes email addresses as explicit for the email platform."""
+
+    def test_standard_email_is_explicit(self):
+        chat_id, thread_id, is_explicit = _parse_target_ref("email", "user@example.com")
+        assert chat_id == "user@example.com"
+        assert thread_id is None
+        assert is_explicit is True
+
+    def test_email_with_dots_in_local_part(self):
+        chat_id, _, is_explicit = _parse_target_ref("email", "first.last@example.co.uk")
+        assert chat_id == "first.last@example.co.uk"
+        assert is_explicit is True
+
+    def test_email_with_plus_tag(self):
+        chat_id, _, is_explicit = _parse_target_ref("email", "user+tag@gmail.com")
+        assert chat_id == "user+tag@gmail.com"
+        assert is_explicit is True
+
+    def test_email_strips_whitespace(self):
+        chat_id, _, is_explicit = _parse_target_ref("email", "  user@example.com  ")
+        assert chat_id == "user@example.com"
+        assert is_explicit is True
+
+    def test_invalid_email_not_explicit(self):
+        assert _parse_target_ref("email", "not-an-email")[2] is False
+        assert _parse_target_ref("email", "@example.com")[2] is False
+        assert _parse_target_ref("email", "user@")[2] is False
+        assert _parse_target_ref("email", "user@.com")[2] is False
+
+    def test_email_not_explicit_for_other_platforms(self):
+        assert _parse_target_ref("telegram", "user@example.com")[2] is False
+        assert _parse_target_ref("discord", "user@example.com")[2] is False
+        assert _parse_target_ref("slack", "user@example.com")[2] is False
+
+
+class TestEmailHomeChannelErrorHint:
+    """The no-home-channel error for email points at the real env var.
+
+    Email reads its home channel from EMAIL_HOME_ADDRESS (gateway/config.py),
+    not the generic EMAIL_HOME_CHANNEL. The error guidance must name the
+    variable that is actually consulted so users who follow it succeed.
+    """
+
+    def test_email_error_names_email_home_address(self):
+        email_cfg = SimpleNamespace(enabled=True, token="", extra={})
+        config = SimpleNamespace(
+            platforms={Platform.EMAIL: email_cfg},
+            get_home_channel=lambda _platform: None,
+        )
+        with patch("gateway.config.load_gateway_config", return_value=config), \
+             patch("tools.interrupt.is_interrupted", return_value=False):
+            result = json.loads(
+                send_message_tool(
+                    {
+                        "action": "send",
+                        "target": "email",
+                        "message": "hi",
+                    }
+                )
+            )
+        assert "EMAIL_HOME_ADDRESS" in result["error"]
+        assert "EMAIL_HOME_CHANNEL" not in result["error"]
+
+    def test_non_email_platform_keeps_generic_home_channel_hint(self):
+        telegram_cfg = SimpleNamespace(enabled=True, token="***", extra={})
+        config = SimpleNamespace(
+            platforms={Platform.TELEGRAM: telegram_cfg},
+            get_home_channel=lambda _platform: None,
+        )
+        with patch("gateway.config.load_gateway_config", return_value=config), \
+             patch("tools.interrupt.is_interrupted", return_value=False):
+            result = json.loads(
+                send_message_tool(
+                    {
+                        "action": "send",
+                        "target": "telegram",
+                        "message": "hi",
+                    }
+                )
+            )
+        assert "TELEGRAM_HOME_CHANNEL" in result["error"]
+
+
 class TestSendDiscordThreadId:
     """_send_discord uses thread_id when provided."""
 
diff --git a/tests/tools/test_skills_sync.py b/tests/tools/test_skills_sync.py
index 1813f4c50e7..c13ed18727a 100644
--- a/tests/tools/test_skills_sync.py
+++ b/tests/tools/test_skills_sync.py
@@ -845,3 +845,85 @@ class TestResetBundledSkill:
             post_manifest = _read_manifest()
             assert "google-workspace" in post_manifest
         assert (skills_dir / "productivity" / "google-workspace" / "SKILL.md").exists()
+
+    def test_reset_restore_succeeds_on_readonly_nix_tree(self, tmp_path):
+        """#34972: --restore must succeed even when the user copy is a fully
+        read-only tree (r-xr-xr-x dirs + files), as produced by copying a
+        Nix-store source. The manifest is re-baselined and bundled re-copied."""
+        import os
+        import stat
+
+        bundled = self._setup_bundled(tmp_path)
+        skills_dir = tmp_path / "user_skills"
+        manifest_file = skills_dir / ".bundled_manifest"
+
+        dest = skills_dir / "productivity" / "google-workspace"
+        sub = dest / "references"
+        sub.mkdir(parents=True)
+        (dest / "SKILL.md").write_text("# user version\n")
+        (sub / "ref.md").write_text("# nested ref\n")
+        manifest_file.write_text(
+            "google-workspace:STALEHASH000000000000000000000000\n"
+        )
+
+        # Read-only files AND directories — the real Nix-store case.
+        ro_dir = (
+            stat.S_IRUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP
+            | stat.S_IROTH | stat.S_IXOTH
+        )
+        os.chmod(sub / "ref.md", stat.S_IREAD)
+        os.chmod(dest / "SKILL.md", stat.S_IREAD)
+        os.chmod(sub, ro_dir)
+        os.chmod(dest, ro_dir)
+
+        try:
+            with self._patches(bundled, skills_dir, manifest_file):
+                result = reset_bundled_skill("google-workspace", restore=True)
+
+            assert result["ok"] is True
+            assert result["action"] == "restored"
+            # Bundled version was re-copied over the (deleted) user copy.
+            assert "upstream" in (dest / "SKILL.md").read_text()
+            # The read-only nested user dir/file was fully removed, not left behind.
+            assert not (sub / "ref.md").exists()
+            # sync ran and re-copied the skill (not stuck in limbo).
+            assert "google-workspace" in result["synced"]["copied"]
+        finally:
+            # Restore perms so tmp_path teardown can remove anything left.
+            for p in (sub, dest):
+                if p.exists():
+                    os.chmod(p, stat.S_IRWXU)
+
+    def test_reset_restore_preserves_manifest_on_rmtree_failure(self, tmp_path):
+        """#34972: when the user copy genuinely cannot be removed, the manifest
+        entry must NOT be deleted — otherwise the skill enters a limbo state
+        where future syncs silently skip it forever."""
+        bundled = self._setup_bundled(tmp_path)
+        skills_dir = tmp_path / "user_skills"
+        manifest_file = skills_dir / ".bundled_manifest"
+
+        dest = skills_dir / "productivity" / "google-workspace"
+        dest.mkdir(parents=True)
+        (dest / "SKILL.md").write_text("# user version\n")
+        manifest_file.write_text(
+            "google-workspace:STALEHASH000000000000000000000000\n"
+        )
+
+        # Simulate an unremovable tree (e.g. a busy mountpoint or a path even
+        # chmod can't rescue) by making the removal helper raise.
+        def _boom(_path):
+            raise PermissionError(13, "Permission denied")
+
+        with self._patches(bundled, skills_dir, manifest_file), patch(
+            "tools.skills_sync._rmtree_writable", side_effect=_boom
+        ):
+            result = reset_bundled_skill("google-workspace", restore=True)
+
+        # Restore failed, and the manifest must be left untouched.
+        assert result["ok"] is False
+        assert result["action"] == "not_reset"
+        assert "Manifest entry preserved" in result["message"]
+        manifest_after = manifest_file.read_text()
+        assert "google-workspace" in manifest_after
+        # User copy is still on disk (we changed nothing).
+        assert (dest / "SKILL.md").exists()
diff --git a/tests/tools/test_vision_tools.py b/tests/tools/test_vision_tools.py
index e3bff50d56f..7a50a4b4630 100644
--- a/tests/tools/test_vision_tools.py
+++ b/tests/tools/test_vision_tools.py
@@ -917,3 +917,84 @@ class TestIsImageSizeError:
 
     def test_empty_message(self):
         assert not _is_image_size_error(Exception(""))
+
+
+class TestDownloadRetryClassification:
+    """Error-class-aware retry: 4xx fail-fast, 429/5xx/transient retried (issue #32296)."""
+
+    @staticmethod
+    def _status_error(status_code):
+        import httpx
+
+        request = httpx.Request("GET", "https://example.com/img.jpg")
+        response = httpx.Response(status_code, request=request)
+        return httpx.HTTPStatusError(
+            f"{status_code}", request=request, response=response
+        )
+
+    def _make_client_raising_status(self, status_code):
+        """AsyncClient whose response.raise_for_status() raises HTTPStatusError."""
+        mock_response = MagicMock()
+        mock_response.raise_for_status = MagicMock(
+            side_effect=self._status_error(status_code)
+        )
+        mock_client = AsyncMock()
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=False)
+        mock_client.get = AsyncMock(return_value=mock_response)
+        return mock_client
+
+    def test_is_retryable_classification(self):
+        from tools.vision_tools import _is_retryable_download_error
+
+        # Non-retryable client errors
+        for code in (400, 403, 404, 410):
+            assert _is_retryable_download_error(self._status_error(code)) is False
+        # Retryable: rate limit + server errors
+        for code in (429, 500, 502, 503):
+            assert _is_retryable_download_error(self._status_error(code)) is True
+        # Policy/SSRF/size errors are terminal
+        assert _is_retryable_download_error(PermissionError("blocked")) is False
+        assert _is_retryable_download_error(ValueError("too large")) is False
+        # Unclassified (network blip) is retryable
+        assert _is_retryable_download_error(ConnectionError("reset")) is True
+
+    @pytest.mark.asyncio
+    async def test_404_fails_fast_without_retry(self, tmp_path):
+        """A 404 must raise on the first attempt — no backoff sleep, no extra GETs."""
+        import httpx
+        from tools.vision_tools import _download_image
+
+        mock_client = self._make_client_raising_status(404)
+        with (
+            patch("tools.vision_tools.httpx.AsyncClient", return_value=mock_client),
+            patch("tools.vision_tools.check_website_access", return_value=None),
+            patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep,
+            pytest.raises(httpx.HTTPStatusError),
+        ):
+            await _download_image(
+                "https://example.com/missing.jpg", tmp_path / "x.jpg", max_retries=3
+            )
+        # Exactly one attempt, zero backoff sleeps.
+        assert mock_client.get.await_count == 1
+        mock_sleep.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_503_retries_then_raises(self, tmp_path):
+        """A 5xx is retried up to max_retries, sleeping between attempts."""
+        import httpx
+        from tools.vision_tools import _download_image
+
+        mock_client = self._make_client_raising_status(503)
+        with (
+            patch("tools.vision_tools.httpx.AsyncClient", return_value=mock_client),
+            patch("tools.vision_tools.check_website_access", return_value=None),
+            patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep,
+            pytest.raises(httpx.HTTPStatusError),
+        ):
+            await _download_image(
+                "https://example.com/flaky.jpg", tmp_path / "y.jpg", max_retries=3
+            )
+        # All three attempts used, two backoff sleeps between them.
+        assert mock_client.get.await_count == 3
+        assert mock_sleep.await_count == 2
diff --git a/tests/tools/test_web_tools_config.py b/tests/tools/test_web_tools_config.py
index 87fc27cc372..e9bcd8e2079 100644
--- a/tests/tools/test_web_tools_config.py
+++ b/tests/tools/test_web_tools_config.py
@@ -623,10 +623,49 @@ class TestCheckWebApiKey:
             assert check_web_api_key() is True
 
     def test_tool_gateway_returns_true(self):
-        with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"):
+        with patch("tools.web_tools._peek_nous_access_token", return_value="nous-token"):
             from tools.web_tools import check_web_api_key
             assert check_web_api_key() is True
 
+    def test_tool_gateway_availability_skips_refresh_for_expired_cached_token(
+        self,
+        tmp_path,
+        monkeypatch,
+    ):
+        monkeypatch.delenv("TOOL_GATEWAY_USER_TOKEN", raising=False)
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        expired_at = "2000-01-01T00:00:00+00:00"
+        (tmp_path / "auth.json").write_text(json.dumps({
+            "providers": {
+                "nous": {
+                    "access_token": "expired-token",
+                    "refresh_token": "refresh-token",
+                    "expires_at": expired_at,
+                }
+            }
+        }))
+        refresh_calls = []
+
+        def _record_refresh(*, refresh_skew_seconds=120, **_kwargs):
+            refresh_calls.append(refresh_skew_seconds)
+            return "fresh-token"
+
+        monkeypatch.setattr(
+            "hermes_cli.auth.resolve_nous_access_token",
+            _record_refresh,
+        )
+
+        with patch.dict(
+            os.environ,
+            {"FIRECRAWL_GATEWAY_URL": "http://127.0.0.1:3002"},
+            clear=False,
+        ):
+            from tools.web_tools import check_web_api_key
+
+            assert check_web_api_key() is True
+
+        assert refresh_calls == []
+
     def test_configured_backend_must_match_available_provider(self):
         with patch("tools.web_tools._load_web_config", return_value={"backend": "parallel"}):
             with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"):
@@ -636,7 +675,7 @@ class TestCheckWebApiKey:
 
     def test_configured_firecrawl_backend_accepts_managed_gateway(self):
         with patch("tools.web_tools._load_web_config", return_value={"backend": "firecrawl"}):
-            with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"):
+            with patch("tools.web_tools._peek_nous_access_token", return_value="nous-token"):
                 with patch.dict(os.environ, {"FIRECRAWL_GATEWAY_URL": "http://127.0.0.1:3002"}, clear=False):
                     from tools.web_tools import check_web_api_key
                     assert check_web_api_key() is True
diff --git a/tests/tui_gateway/test_wait_for_mcp_discovery.py b/tests/tui_gateway/test_wait_for_mcp_discovery.py
new file mode 100644
index 00000000000..ab5bb5f6ddc
--- /dev/null
+++ b/tests/tui_gateway/test_wait_for_mcp_discovery.py
@@ -0,0 +1,78 @@
+"""Tests for tui_gateway.entry.wait_for_mcp_discovery (PR #35245).
+
+MCP tool discovery runs in a background daemon thread so a slow/dead server
+can't freeze ``gateway.ready``.  The agent snapshots its tool list once at
+build time and never re-reads it, so ``_make_agent`` briefly joins the
+discovery thread before building — bounded, so a dead server can't re-introduce
+the startup hang, and a no-op once discovery has finished.
+"""
+
+import threading
+import time
+
+import tui_gateway.entry as entry
+
+
+def _restore_thread_slot(saved):
+    entry._mcp_discovery_thread = saved
+
+
+def test_no_thread_is_noop():
+    """When no discovery thread was started (the common no-MCP case), the
+    helper returns immediately and never blocks."""
+    saved = entry._mcp_discovery_thread
+    try:
+        entry._mcp_discovery_thread = None
+        start = time.monotonic()
+        entry.wait_for_mcp_discovery(timeout=5.0)
+        assert time.monotonic() - start < 0.1
+    finally:
+        _restore_thread_slot(saved)
+
+
+def test_already_finished_thread_is_noop():
+    """A thread that has already finished is not joined-on (dead thread)."""
+    saved = entry._mcp_discovery_thread
+    try:
+        t = threading.Thread(target=lambda: None, daemon=True)
+        t.start()
+        t.join()  # ensure it's finished
+        entry._mcp_discovery_thread = t
+        start = time.monotonic()
+        entry.wait_for_mcp_discovery(timeout=5.0)
+        assert time.monotonic() - start < 0.1
+    finally:
+        _restore_thread_slot(saved)
+
+
+def test_fast_thread_is_joined():
+    """A reachable-but-still-connecting (fast) server lands before the agent
+    snapshots tools — the helper waits for it to finish."""
+    saved = entry._mcp_discovery_thread
+    try:
+        t = threading.Thread(target=lambda: time.sleep(0.05), daemon=True)
+        t.start()
+        entry._mcp_discovery_thread = t
+        entry.wait_for_mcp_discovery(timeout=1.0)
+        assert not t.is_alive()  # joined to completion
+    finally:
+        _restore_thread_slot(saved)
+
+
+def test_hung_thread_is_bounded_by_timeout():
+    """A slow/dead server must NOT re-introduce the startup hang — the join is
+    bounded by the timeout and returns even though the thread is still alive."""
+    saved = entry._mcp_discovery_thread
+    stop = threading.Event()
+    try:
+        t = threading.Thread(target=stop.wait, daemon=True)  # blocks until set
+        t.start()
+        entry._mcp_discovery_thread = t
+        start = time.monotonic()
+        entry.wait_for_mcp_discovery(timeout=0.3)
+        elapsed = time.monotonic() - start
+        assert 0.25 <= elapsed < 1.0  # bounded near the timeout, not forever
+        assert t.is_alive()  # thread still running; we did not block on it
+    finally:
+        stop.set()
+        _restore_thread_slot(saved)
diff --git a/tools/browser_supervisor.py b/tools/browser_supervisor.py
index 73dd3e51bb5..19a16f699c1 100644
--- a/tools/browser_supervisor.py
+++ b/tools/browser_supervisor.py
@@ -496,12 +496,12 @@ class CDPSupervisor:
         if not session_id:
             return {"ok": False, "error": "supervisor has no attached page session"}
 
-        async def _do_eval() -> Dict[str, Any]:
+        async def _do_eval(by_value: bool) -> Dict[str, Any]:
             return await self._cdp(
                 "Runtime.evaluate",
                 {
                     "expression": expression,
-                    "returnByValue": return_by_value,
+                    "returnByValue": by_value,
                     "awaitPromise": await_promise,
                     # userGesture matters for things like clipboard / fullscreen
                     # APIs that require a user-activation context.
@@ -511,14 +511,32 @@ class CDPSupervisor:
                 timeout=timeout,
             )
 
-        try:
-            from agent.async_utils import safe_schedule_threadsafe
-            fut = safe_schedule_threadsafe(_do_eval(), loop)
+        from agent.async_utils import safe_schedule_threadsafe
+
+        def _run_eval(by_value: bool) -> Dict[str, Any]:
+            fut = safe_schedule_threadsafe(_do_eval(by_value), loop)
             if fut is None:
-                return {"ok": False, "error": "Browser supervisor loop unavailable"}
-            response = fut.result(timeout=timeout + 1)
+                raise RuntimeError("Browser supervisor loop unavailable")
+            return fut.result(timeout=timeout + 1)
+
+        try:
+            response = _run_eval(return_by_value)
         except Exception as exc:
-            return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
+            # ``returnByValue=True`` asks Chrome to deep-serialize the result.
+            # For live DOM nodes / NodeLists / Window that serialization can
+            # blow past CDP's recursion guard and fail the whole call with
+            # ``Object reference chain is too long`` (a protocol-level error,
+            # not a JS exception).  Retry once with ``returnByValue=False`` so
+            # Chrome returns the object's description string instead — the same
+            # graceful degradation path used for ``document.querySelector(...)``
+            # results — rather than crashing the eval.
+            if return_by_value and "reference chain is too long" in str(exc).lower():
+                try:
+                    response = _run_eval(False)
+                except Exception as exc2:
+                    return {"ok": False, "error": f"{type(exc2).__name__}: {exc2}"}
+            else:
+                return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
 
         # Runtime.evaluate response shape:
         #   {"id": N, "result": {"result": {"type": "...", "value": ..., ...},
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index f7d4d7577b4..482f4e17845 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -2874,6 +2874,22 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
                 "error": f"JavaScript evaluation is not supported by this browser backend. {err}",
             }
             return json.dumps(_copy_fallback_warning(response, result))
+        # A live DOM node / NodeList / Window can't be JSON-serialized by CDP
+        # and fails the eval with "Object reference chain is too long".  The
+        # supervisor fast path retries with returnByValue=false, but the CLI
+        # subprocess can't, so turn the cryptic protocol error into actionable
+        # guidance instead of surfacing it raw.
+        if "reference chain is too long" in err.lower():
+            response = {
+                "success": False,
+                "error": (
+                    "Expression returned a live DOM node / NodeList / Window, "
+                    "which can't be serialized. Extract a primitive value "
+                    "(e.g. .innerText, .href, .src, .value) or use "
+                    "JSON.stringify() / a snapshot tool instead."
+                ),
+            }
+            return json.dumps(_copy_fallback_warning(response, result))
         response = {
             "success": False,
             "error": err,
diff --git a/tools/file_operations.py b/tools/file_operations.py
index b27405c58d7..32d878de0e5 100644
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -113,6 +113,36 @@ def _normalize_line_endings(text: str, target: str) -> str:
     return text
 
 
+# UTF-8 byte order mark. Some Windows editors (Notepad, older Visual Studio,
+# some PowerShell redirects) prepend this invisible 3-byte marker
+# (EF BB BF == U+FEFF) to UTF-8 text files. It renders as nothing but is a
+# real character at the start of the decoded string, so without handling it:
+#   - read_file would surface a stray U+FEFF as the first character (the
+#     model sees a phantom char before `import ...`), and
+#   - patch matches against the true first line would miss, and write_file
+#     would silently drop or double the marker on rewrite.
+# We strip it on read so the model sees clean content, and restore it on
+# write when the original file had one — exactly mirroring the line-ending
+# preservation above (detect on disk, preserve across the edit).
+_UTF8_BOM = "\ufeff"
+
+
+def _strip_bom(text: str) -> tuple[str, bool]:
+    """Return (text-without-leading-BOM, had_bom).
+
+    Only a single leading BOM is stripped; a BOM appearing mid-content is
+    left alone (it's legitimate data there, not a file marker).
+    """
+    if text and text.startswith(_UTF8_BOM):
+        return text[len(_UTF8_BOM):], True
+    return text, False
+
+
+def _has_bom(text: Optional[str]) -> bool:
+    """True if ``text`` begins with a UTF-8 BOM."""
+    return bool(text) and text.startswith(_UTF8_BOM)
+
+
 def _is_write_denied(path: str) -> bool:
     """Return True if path is on the write deny list."""
     return _shared_is_write_denied(path)
@@ -672,8 +702,25 @@ class ShellFileOperations(FileOperations):
         return ext in IMAGE_EXTENSIONS
     
     def _add_line_numbers(self, content: str, start_line: int = 1) -> str:
-        """Add line numbers to content in LINE_NUM|CONTENT format."""
+        """Add line numbers to content in ``LINE_NUM|CONTENT`` format.
+
+        The gutter uses a compact ``<n>|`` prefix (e.g. ``34|foo``) rather
+        than a fixed-width zero/space-padded one (``    34|foo``). The
+        padding was pure token overhead: on dense source the padded gutter
+        cost ~48% more tokens than the bare content and ~16% more than the
+        compact form, because the leading spaces + zero-padding tokenize
+        into extra tokens on every single line. An A/B (Sonnet 4.6, 2
+        passes) showed the compact gutter matches the padded gutter on
+        line-reference / patch / value-lookup / structure tasks (4/4 both),
+        while dropping line numbers entirely regressed line-referencing
+        (the model hand-counted and was off-by-one, 3/4) — so we keep the
+        numbers, just not the padding. ``HERMES_READ_GUTTER=padded``
+        restores the legacy fixed-width format for anyone who relied on
+        column alignment.
+        """
+        import os as _os
         from tools.tool_output_limits import get_max_line_length
+        padded = (_os.environ.get("HERMES_READ_GUTTER") or "").lower() == "padded"
         max_line_length = get_max_line_length()
         lines = content.split('\n')
         numbered = []
@@ -681,7 +728,7 @@ class ShellFileOperations(FileOperations):
             # Truncate long lines
             if len(line) > max_line_length:
                 line = line[:max_line_length] + "... [truncated]"
-            numbered.append(f"{i:6d}|{line}")
+            numbered.append(f"{i:6d}|{line}" if padded else f"{i}|{line}")
         return '\n'.join(numbered)
     
     def _expand_path(self, path: str) -> str:
@@ -726,6 +773,60 @@ class ShellFileOperations(FileOperations):
         # Use single quotes and escape any single quotes in the string
         return "'" + arg.replace("'", "'\"'\"'") + "'"
 
+    def _atomic_write(self, path: str, content: str) -> "ExecuteResult":
+        """Write ``content`` to ``path`` atomically via temp-file + rename.
+
+        Streams ``content`` over stdin into a temp file in the SAME
+        directory as ``path`` (so the final ``mv`` is a real rename on the
+        same filesystem, not a non-atomic cross-device copy), preserves the
+        existing file's mode if it exists, then renames over the target.
+        On any failure the temp file is removed so we never leak a partial
+        ``.hermes-tmp`` file next to the user's data, and the original file
+        is left untouched. Content rides stdin so there is no ARG_MAX limit.
+
+        Returns an :class:`ExecuteResult`; ``exit_code == 0`` means the file
+        was swapped into place atomically. A non-zero exit means nothing was
+        renamed and the original (if any) is intact.
+        """
+        q_path = self._escape_shell_arg(path)
+        parent = os.path.dirname(path) or "."
+        q_parent = self._escape_shell_arg(parent)
+        # template basename: hidden so it doesn't show up in casual `ls`,
+        # carries a marker so an orphaned temp (only possible on a hard
+        # crash *between* cat and mv) is identifiable.
+        tmpl = self._escape_shell_arg(".hermes-tmp.XXXXXX")
+
+        # One shell script, fully quoted. Notes:
+        #  - `mktemp` lands the temp in the target's own dir (-p) so `mv` is
+        #    same-FS atomic; we fall back to a PID-stamped name if the
+        #    backend lacks mktemp (rare; busybox/macOS/Linux all ship it).
+        #  - `chmod --reference` is GNU-only, so we read the octal mode with
+        #    `stat` (GNU `-c%a` or BSD `-f%Lp`) and `chmod` it explicitly;
+        #    silent best-effort — a perms-copy failure must not abort the
+        #    write, the file still lands with default umask perms.
+        #  - `trap ... EXIT` guarantees the temp is removed on every error
+        #    path (cat failure, mv failure, signal) but NOT after a
+        #    successful mv (the temp no longer exists by then).
+        #  - we `cat >` the temp, then `mv -f` it over the target.
+        script = (
+            "set -e; "
+            f"d={q_parent}; t={q_path}; "
+            'tmp="$(mktemp -p "$d" ' + tmpl + ' 2>/dev/null '
+            '|| mktemp "$d/.hermes-tmp.$$.XXXXXX" 2>/dev/null '
+            '|| { tmp="$d/.hermes-tmp.$$"; : > "$tmp" && echo "$tmp"; })"; '
+            '[ -n "$tmp" ] || { echo "atomic write: could not create temp file" >&2; exit 1; }; '
+            "trap 'rm -f \"$tmp\"' EXIT; "
+            # preserve mode of an existing target (best-effort, never fatal)
+            'if [ -e "$t" ]; then '
+            'm="$(stat -c%a "$t" 2>/dev/null || stat -f%Lp "$t" 2>/dev/null || true)"; '
+            '[ -n "$m" ] && chmod "$m" "$tmp" 2>/dev/null || true; '
+            "fi; "
+            'cat > "$tmp"; '
+            'mv -f "$tmp" "$t"; '
+            "trap - EXIT"
+        )
+        return self._exec(script, stdin_data=content)
+
     def _detect_file_line_ending(self, path: str, pre_content: Optional[str] = None) -> Optional[str]:
         """Detect the dominant line ending of a file on disk.
 
@@ -747,6 +848,22 @@ class ShellFileOperations(FileOperations):
             return None
         return _detect_line_ending(head_result.stdout)
 
+    def _file_has_bom(self, path: str, pre_content: Optional[str] = None) -> bool:
+        """Whether the file on disk starts with a UTF-8 BOM.
+
+        Uses ``pre_content`` if we already read the file (zero extra exec
+        calls); otherwise issues a tiny ``head -c 3`` to sample just the
+        marker. A missing/empty file returns False (new writes get no BOM
+        unless the caller explicitly includes one).
+        """
+        if pre_content is not None:
+            return _has_bom(pre_content)
+        head_cmd = f"head -c 3 {self._escape_shell_arg(path)} 2>/dev/null"
+        head_result = self._exec(head_cmd)
+        if head_result.exit_code != 0 or not head_result.stdout:
+            return False
+        return _has_bom(head_result.stdout)
+
 
     def _unified_diff(self, old_content: str, new_content: str, filename: str) -> str:
         """Generate unified diff between old and new content."""
@@ -831,6 +948,11 @@ class ShellFileOperations(FileOperations):
         if read_result.exit_code != 0:
             return ReadResult(error=f"Failed to read file: {read_result.stdout}")
         read_output = _strip_terminal_fence_leaks(read_result.stdout)
+        # Strip a leading UTF-8 BOM so the model never sees a phantom U+FEFF
+        # before the first real character. Only meaningful on the first
+        # chunk (the marker lives at byte 0); later pages can't carry it.
+        if offset == 1:
+            read_output, _ = _strip_bom(read_output)
         
         # Get total line count
         wc_cmd = f"wc -l < {self._escape_shell_arg(path)}"
@@ -935,8 +1057,14 @@ class ShellFileOperations(FileOperations):
         cat_result = self._exec(f"cat {self._escape_shell_arg(path)}")
         if cat_result.exit_code != 0:
             return ReadResult(error=f"Failed to read file: {cat_result.stdout}")
+        # Strip a leading UTF-8 BOM so patch's fuzzy matcher operates on
+        # clean content (a phantom U+FEFF before line 1 would defeat an
+        # exact first-line match). write_file restores the BOM on the way
+        # back out — it re-probes the on-disk file, which still has the
+        # marker — so the round-trip preserves it.
+        raw_content, _ = _strip_bom(_strip_terminal_fence_leaks(cat_result.stdout))
         return ReadResult(
-            content=_strip_terminal_fence_leaks(cat_result.stdout),
+            content=raw_content,
             file_size=file_size,
         )
 
@@ -1036,6 +1164,18 @@ class ShellFileOperations(FileOperations):
         if original_ending == "\r\n":
             content = _normalize_line_endings(content, "\r\n")
 
+        # ── BOM preservation ──────────────────────────────────────────
+        # If the file on disk started with a UTF-8 BOM, keep it. read_file
+        # strips the BOM so the agent never sees it, which means the
+        # content it hands back to write_file / patch has no BOM either —
+        # without restoring it here a round-trip would silently strip the
+        # marker and change the file's byte signature (some Windows
+        # toolchains key on it). Only prepend when the original had a BOM
+        # and the new content doesn't already carry one (guards against
+        # double-BOM if a caller passed raw bytes).
+        if self._file_has_bom(path, pre_content) and not _has_bom(content):
+            content = _UTF8_BOM + content
+
         # Snapshot LSP diagnostics for this file (best-effort) so the
         # post-write LSP layer can return only diagnostics introduced
         # by this specific edit.  Mirrors claude-code's
@@ -1053,10 +1193,22 @@ class ShellFileOperations(FileOperations):
             if mkdir_result.exit_code == 0:
                 dirs_created = True
 
-        # Write via stdin pipe — content bypasses shell arg parsing entirely,
-        # so there's no ARG_MAX limit regardless of file size.
-        write_cmd = f"cat > {self._escape_shell_arg(path)}"
-        write_result = self._exec(write_cmd, stdin_data=content)
+        # Write atomically: stream into a temp file in the SAME directory,
+        # then ``mv`` it over the target. The rename is atomic on POSIX
+        # (and on every backend FS we run on), so a crash / power loss /
+        # truncated pipe mid-write leaves the original file intact instead
+        # of a half-written corrupt file. Same-directory is load-bearing —
+        # ``mv`` across filesystems degrades to copy+unlink, which is NOT
+        # atomic; keeping the temp beside the target guarantees a real
+        # rename. Content still rides stdin so there's no ARG_MAX limit.
+        #
+        # The temp file is created with ``mktemp`` (collision-safe) when the
+        # backend has it, falling back to a PID-stamped name otherwise. We
+        # then chmod the temp to match the existing file's mode (if any) so
+        # the atomic swap doesn't silently widen or narrow permissions, and
+        # clean the temp up on any failure so we never leak a ``.hermes-tmp``
+        # turd next to the user's file.
+        write_result = self._atomic_write(path, content)
 
         if write_result.exit_code != 0:
             return WriteResult(error=f"Failed to write file: {write_result.stdout}")
@@ -1127,7 +1279,13 @@ class ShellFileOperations(FileOperations):
             return PatchResult(error=f"Failed to read file: {path}")
         
         content = read_result.stdout
-        
+        # Strip a leading UTF-8 BOM before matching so the fuzzy matcher and
+        # the diff operate on clean content (a phantom U+FEFF before line 1
+        # defeats an exact first-line match). write_file restores the BOM on
+        # the way back out by re-probing the on-disk file, so the round-trip
+        # preserves the marker.
+        content, _ = _strip_bom(content)
+
         # Import and use fuzzy matching
         from tools.fuzzy_match import fuzzy_find_and_replace
         
@@ -1176,8 +1334,13 @@ class ShellFileOperations(FileOperations):
         # ``new_content`` string has bare LFs.  Without this normalization
         # every patch on Windows returns a bogus "wrote 39, read 42"
         # false-negative even though the edit landed correctly.  POSIX
-        # backends don't translate, so this is a no-op there.
-        _verify_stdout_normalized = verify_result.stdout.replace("\r\n", "\n").replace("\r", "\n")
+        # backends don't translate, so this is a no-op there.  We also
+        # strip a leading BOM from the re-read: write_file restored the
+        # marker on disk but ``new_content`` is the BOM-less string we
+        # matched against, so the comparison must drop it to stay
+        # apples-to-apples.
+        _verify_bomless, _ = _strip_bom(verify_result.stdout)
+        _verify_stdout_normalized = _verify_bomless.replace("\r\n", "\n").replace("\r", "\n")
         _new_content_normalized = new_content.replace("\r\n", "\n").replace("\r", "\n")
         if _verify_stdout_normalized != _new_content_normalized:
             return PatchResult(error=(
diff --git a/tools/file_tools.py b/tools/file_tools.py
index 54a089fc9d0..6ea6ff0a3f0 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -116,15 +116,80 @@ def _get_live_tracking_cwd(task_id: str = "default") -> str | None:
     return None
 
 
+def _resolve_base_dir(task_id: str = "default") -> Path:
+    """Return the ABSOLUTE base directory for resolving relative paths.
+
+    Resolution order:
+      1. The task's live terminal cwd (the directory the agent is actually
+         working in — e.g. a git worktree). Authoritative when known.
+      2. ``$TERMINAL_CWD`` from config/env.
+      3. The process cwd.
+
+    The returned base is ALWAYS absolute. This is the core invariant that
+    prevents the worktree-cwd divergence bug: a relative ``TERMINAL_CWD``
+    (commonly the literal ``"."`` from a stale config) is meaningless as a
+    resolution anchor — left to ``Path.resolve()`` it silently resolves
+    against whatever the agent PROCESS cwd happens to be (e.g. the main repo
+    while the terminal is in a worktree), routing edits to the wrong checkout.
+    Anchoring a relative base against the process cwd here makes the resolution
+    deterministic and inspectable rather than dependent on resolve()-time cwd.
+    """
+    live = _get_live_tracking_cwd(task_id)
+    if live:
+        base = Path(live).expanduser()
+    else:
+        raw = os.environ.get("TERMINAL_CWD")
+        base = Path(raw).expanduser() if raw else Path(os.getcwd())
+    if not base.is_absolute():
+        # A relative base (".", "./sub", "..") is anchored to the process cwd
+        # once, here, so the result no longer depends on cwd at resolve() time.
+        base = Path(os.getcwd()) / base
+    return base.resolve()
+
+
 def _resolve_path_for_task(filepath: str, task_id: str = "default") -> Path:
-    """Resolve *filepath* against the task's live terminal cwd when possible."""
+    """Resolve *filepath* against the task's absolute base directory.
+
+    See :func:`_resolve_base_dir` for how the base is chosen. Absolute input
+    paths are returned resolved-but-unanchored.
+    """
     p = Path(filepath).expanduser()
-    if not p.is_absolute():
-        base = _get_live_tracking_cwd(task_id) or os.environ.get(
-            "TERMINAL_CWD", os.getcwd()
-        )
-        p = Path(base) / p
-    return p.resolve()
+    if p.is_absolute():
+        return p.resolve()
+    return (_resolve_base_dir(task_id) / p).resolve()
+
+
+def _path_resolution_warning(filepath: str, resolved: Path, task_id: str = "default") -> str | None:
+    """Warn when a relative path resolved OUTSIDE the task's workspace root.
+
+    Surfaces the worktree-cwd divergence the moment it would matter: if the
+    agent passes a relative path but it resolves under a directory that is not
+    the live terminal cwd (i.e. the edit is about to land in a different
+    checkout than the one the agent is working in), return a message naming the
+    absolute target. ``None`` when the path is absolute, the base is unknown,
+    or the resolved path is correctly under the workspace root.
+    """
+    try:
+        if Path(filepath).expanduser().is_absolute():
+            return None
+        live = _get_live_tracking_cwd(task_id)
+        if not live:
+            return None  # No authoritative workspace root to compare against.
+        root = Path(live).expanduser().resolve()
+        # Is `resolved` inside `root`?
+        try:
+            resolved.relative_to(root)
+            return None  # Inside the workspace — expected.
+        except ValueError:
+            return (
+                f"Relative path {filepath!r} resolved to {str(resolved)!r}, which is "
+                f"OUTSIDE the active workspace ({str(root)!r}). The edit will land in "
+                f"a different directory than the terminal's cwd. If this is not "
+                f"intended (e.g. a git-worktree session writing into the main "
+                f"checkout), pass an absolute path under the workspace instead."
+            )
+    except Exception:
+        return None
 
 
 def _is_blocked_device_path(path: str) -> bool:
@@ -930,12 +995,21 @@ def write_file_tool(path: str, content: str, task_id: str = "default",
             # fire — its message names the sibling subagent.
             cross_warning = file_state.check_stale(task_id, _resolved)
             stale_warning = _check_file_staleness(path, task_id)
+            # Workspace-divergence warning: relative path resolving outside the
+            # terminal's cwd (the worktree-cwd bug). Lowest priority of the three.
+            cwd_warning = _path_resolution_warning(path, Path(_resolved), task_id)
             file_ops = _get_file_ops(task_id)
-            result = file_ops.write_file(path, content)
+            result = file_ops.write_file(_resolved, content)
             result_dict = result.to_dict()
-            effective_warning = cross_warning or stale_warning
+            effective_warning = cross_warning or stale_warning or cwd_warning
             if effective_warning:
                 result_dict["_warning"] = effective_warning
+            # Always report the ABSOLUTE path actually written, so a wrong-cwd
+            # mismatch is visible in the response instead of silently routing
+            # the edit to the wrong checkout.
+            result_dict["resolved_path"] = _resolved
+            if not result_dict.get("error"):
+                result_dict["files_modified"] = [_resolved]
             # Refresh stamps after the successful write so consecutive
             # writes by this task don't trigger false staleness warnings.
             _update_read_timestamp(path, task_id)
@@ -1027,6 +1101,10 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
                 _path_to_resolved[_p] = _r
                 _cross = file_state.check_stale(task_id, _r) if _r else None
                 _sw = _cross or _check_file_staleness(_p, task_id)
+                if not _sw and _r:
+                    # Workspace-divergence warning (worktree-cwd bug): relative
+                    # path resolving outside the terminal's cwd.
+                    _sw = _path_resolution_warning(_p, Path(_r), task_id)
                 if _sw:
                     stale_warnings.append(_sw)
 
@@ -1037,7 +1115,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
                     return tool_error("path required")
                 if old_string is None or new_string is None:
                     return tool_error("old_string and new_string required")
-                result = file_ops.patch_replace(path, old_string, new_string, replace_all)
+                # Pass the resolved ABSOLUTE path to the shell layer so it
+                # operates on the exact file the tool layer resolved — the
+                # shell's own cwd may differ (worktree-cwd bug), and a relative
+                # path would let the two layers disagree about which file is
+                # being edited.
+                _replace_target = _path_to_resolved.get(path) or path
+                result = file_ops.patch_replace(_replace_target, old_string, new_string, replace_all)
             elif mode == "patch":
                 if not patch:
                     return tool_error("patch content required")
@@ -1048,9 +1132,18 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
             result_dict = result.to_dict()
             if stale_warnings:
                 result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
+            # Report the ABSOLUTE path(s) actually patched so a wrong-cwd
+            # mismatch (e.g. a worktree session editing the main checkout) is
+            # visible in the response instead of silently landing elsewhere.
+            _resolved_modified = [
+                _path_to_resolved.get(_p) or _p for _p in _paths_to_check
+            ]
             # Refresh stored timestamps for all successfully-patched paths so
             # consecutive edits by this task don't trigger false warnings.
             if not result_dict.get("error"):
+                result_dict["files_modified"] = _resolved_modified
+                if len(_resolved_modified) == 1:
+                    result_dict["resolved_path"] = _resolved_modified[0]
                 for _p in _paths_to_check:
                     _update_read_timestamp(_p, task_id)
                     _r = _path_to_resolved.get(_p)
diff --git a/tools/lazy_deps.py b/tools/lazy_deps.py
index a0926a435c7..20d68f2f7f3 100644
--- a/tools/lazy_deps.py
+++ b/tools/lazy_deps.py
@@ -173,6 +173,7 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
     "tool.dashboard": (
         "fastapi==0.133.1",
         "uvicorn[standard]==0.41.0",
+        "starlette==1.0.1",  # CVE-2026-48710 (BadHost) — keep lazy-install in sync with pyproject [web]
     ),
 }
 
diff --git a/tools/managed_tool_gateway.py b/tools/managed_tool_gateway.py
index cd27537fde2..d894dcb4b29 100644
--- a/tools/managed_tool_gateway.py
+++ b/tools/managed_tool_gateway.py
@@ -72,15 +72,34 @@ def _access_token_is_expiring(expires_at: object, skew_seconds: int) -> bool:
     return remaining <= max(0, int(skew_seconds))
 
 
-def read_nous_access_token() -> Optional[str]:
-    """Read a Nous Subscriber OAuth access token from auth store or env override."""
+def peek_nous_access_token() -> Optional[str]:
+    """Cheap probe for a Nous gateway token without triggering refresh.
+
+    Availability scans (`hermes tools`, banner/status paint, provider
+    `is_available()` checks) must stay off the synchronous OAuth refresh path.
+    This helper therefore only inspects the explicit env override and the
+    cached auth-store token, without checking expiry and without making any
+    network calls. Truthful refresh handling stays in request/session paths
+    that call :func:`read_nous_access_token`.
+    """
     explicit = os.getenv("TOOL_GATEWAY_USER_TOKEN")
     if isinstance(explicit, str) and explicit.strip():
         return explicit.strip()
 
     nous_provider = _read_nous_provider_state() or {}
     access_token = nous_provider.get("access_token")
-    cached_token = access_token.strip() if isinstance(access_token, str) and access_token.strip() else None
+    if isinstance(access_token, str) and access_token.strip():
+        return access_token.strip()
+    return None
+
+
+def read_nous_access_token() -> Optional[str]:
+    """Read a Nous Subscriber OAuth access token from auth store or env override."""
+    explicit = os.getenv("TOOL_GATEWAY_USER_TOKEN")
+    if isinstance(explicit, str) and explicit.strip():
+        return explicit.strip()
+    nous_provider = _read_nous_provider_state() or {}
+    cached_token = peek_nous_access_token()
 
     if cached_token and not _access_token_is_expiring(
         nous_provider.get("expires_at"),
@@ -159,9 +178,15 @@ def is_managed_tool_gateway_ready(
     gateway_builder: Optional[Callable[[str], str]] = None,
     token_reader: Optional[Callable[[], Optional[str]]] = None,
 ) -> bool:
-    """Return True when gateway URL and Nous access token are available."""
+    """Return True when gateway URL and a likely-usable Nous token are present.
+
+    Defaults to :func:`peek_nous_access_token` so read-only availability scans
+    avoid synchronous OAuth refresh. Callers that are about to make a real
+    gateway request should use :func:`resolve_managed_tool_gateway` (which
+    still defaults to the refresh-aware :func:`read_nous_access_token`).
+    """
     return resolve_managed_tool_gateway(
         vendor,
         gateway_builder=gateway_builder,
-        token_reader=token_reader,
+        token_reader=token_reader or peek_nous_access_token,
     ) is not None
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index 593994caa09..9794b5e8592 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -1395,9 +1395,22 @@ class MCPServerTask:
                 # Capture the newly spawned subprocess PID for force-kill cleanup.
                 new_pids = _snapshot_child_pids() - pids_before
                 if new_pids:
+                    # Capture pgid while the child is alive — once it exits we
+                    # can no longer call ``os.getpgid`` on it, and the cleanup
+                    # sweep needs the pgid to reach any reparented descendants
+                    # (e.g. ``claude mcp serve`` spawned by a stdio wrapper).
+                    new_pgids: Dict[int, int] = {}
+                    for _pid in new_pids:
+                        try:
+                            new_pgids[_pid] = os.getpgid(_pid)
+                        except (AttributeError, ProcessLookupError, OSError):
+                            # AttributeError: Windows (os.getpgid is POSIX-only)
+                            # ProcessLookupError: child raced and already exited
+                            pass
                     with _lock:
                         for _pid in new_pids:
                             _stdio_pids[_pid] = self.name
+                        _stdio_pgids.update(new_pgids)
                 async with ClientSession(
                     read_stream, write_stream, **sampling_kwargs
                 ) as session:
@@ -1416,16 +1429,33 @@ class MCPServerTask:
             # on Linux, where setsid() children escape the parent cgroup).
             # Mark them as orphans so the next cleanup sweep can reap them.
             if new_pids:
+                from gateway.status import _pid_exists
+                _killpg = getattr(os, "killpg", None)
                 with _lock:
                     for _pid in new_pids:
                         _stdio_pids.pop(_pid, None)
                     for pid in new_pids:
                         # ``os.kill(pid, 0)`` is NOT a no-op on Windows
                         # (bpo-14484). Use the cross-platform check.
-                        from gateway.status import _pid_exists
-                        if not _pid_exists(pid):
-                            continue  # process already exited — nothing to do
-                        _orphan_stdio_pids.add(pid)
+                        pid_alive = _pid_exists(pid)
+                        pgroup_alive = False
+                        pgid = _stdio_pgids.get(pid)
+                        if not pid_alive and pgid is not None and _killpg is not None:
+                            # Direct child exited but descendants may still be
+                            # in its pgroup (e.g. ``claude mcp serve`` spawned
+                            # by an MCP wrapper that exited first).  Probe with
+                            # signal 0 — succeeds iff any pgroup member is alive.
+                            try:
+                                _killpg(pgid, 0)
+                                pgroup_alive = True
+                            except (ProcessLookupError, PermissionError, OSError):
+                                pgroup_alive = False
+                        if pid_alive or pgroup_alive:
+                            _orphan_stdio_pids.add(pid)
+                        else:
+                            # Nothing left to reap — drop the pgid entry so
+                            # PID-reuse can't surface stale pgroup state later.
+                            _stdio_pgids.pop(pid, None)
 
     async def _run_http(self, config: dict):
         """Run the server using HTTP/StreamableHTTP transport."""
@@ -2224,6 +2254,19 @@ _stdio_pids: Dict[int, str] = {}  # pid -> server_name
 # sessions (e.g. concurrent cron jobs or live user chats).
 _orphan_stdio_pids: set = set()
 
+# Process-group IDs of stdio MCP subprocesses, captured at spawn time.
+# The MCP SDK spawns stdio children with ``start_new_session=True`` so each
+# direct child becomes its own session/pgroup leader (PGID == its own PID).
+# Grandchildren spawned by that child (e.g. a wrapper MCP server that itself
+# launches helper subprocesses like ``claude mcp serve``) inherit that PGID
+# unless they call ``setsid`` themselves.  When the direct child exits, those
+# grandchildren reparent to init/systemd-user but keep the original PGID, so
+# ``killpg(pgid, sig)`` still reaches them.  Tracked separately from
+# ``_stdio_pids`` so we retain the PGID even after the direct child has
+# exited and been removed from the active map.  Empty on Windows
+# (``os.getpgid`` is POSIX-only).
+_stdio_pgids: Dict[int, int] = {}  # pid -> pgid
+
 
 def _snapshot_child_pids() -> set:
     """Return a set of current child process PIDs.
@@ -3640,6 +3683,12 @@ def _kill_orphaned_mcp_children(include_active: bool = False) -> None:
     survivors, avoiding shared-resource collisions when multiple hermes
     processes run on the same host (each has its own ``_stdio_pids`` dict).
 
+    On POSIX, signals are sent via ``os.killpg`` to the spawn-time pgid when
+    one is tracked, so reparented grandchildren in the same process group
+    (e.g. ``claude mcp serve`` spawned by a stdio MCP wrapper that exited
+    first) are reaped alongside the direct child.  Falls back to ``os.kill``
+    on Windows and when no pgid is recorded.
+
     With ``include_active=True`` also kills every PID in ``_stdio_pids`` —
     used only at final shutdown, after the MCP event loop has stopped and no
     sessions can still be in flight.
@@ -3654,20 +3703,42 @@ def _kill_orphaned_mcp_children(include_active: bool = False) -> None:
         if include_active:
             pids.update(dict(_stdio_pids))
             _stdio_pids.clear()
+        # Snapshot pgids for the pids we're about to kill, then drop the
+        # entries so a future spawn can't collide with stale state.
+        pgids: Dict[int, int] = {pid: _stdio_pgids[pid] for pid in pids if pid in _stdio_pgids}
+        for pid in pgids:
+            _stdio_pgids.pop(pid, None)
 
     # Fast path: no tracked stdio PIDs to reap. Skip the SIGTERM/sleep/SIGKILL
     # dance entirely — otherwise every MCP-free shutdown pays a 2s sleep tax.
     if not pids:
         return
 
-    # Phase 1: SIGTERM (graceful)
-    for pid, server_name in pids.items():
+    def _send_signal(pid: int, sig: int, server_name: str) -> None:
+        """SIGTERM/SIGKILL via pgroup on POSIX, fall back to pid signal."""
+        pgid = pgids.get(pid)
+        killpg = getattr(os, "killpg", None)
+        if pgid is not None and killpg is not None:
+            try:
+                killpg(pgid, sig)
+                return
+            except (ProcessLookupError, PermissionError, OSError) as exc:
+                # Pgroup gone (all members exited) or refused — fall back to
+                # the per-pid path so we still try the direct child if alive.
+                logger.debug(
+                    "killpg(%d, %d) failed for MCP server '%s': %s; falling back to kill(pid)",
+                    pgid, sig, server_name, exc,
+                )
         try:
-            os.kill(pid, _signal.SIGTERM)
-            logger.debug("Sent SIGTERM to orphaned MCP process %d (%s)", pid, server_name)
+            os.kill(pid, sig)
         except (ProcessLookupError, PermissionError, OSError):
             pass
 
+    # Phase 1: SIGTERM (graceful)
+    for pid, server_name in pids.items():
+        _send_signal(pid, _signal.SIGTERM, server_name)
+        logger.debug("Sent SIGTERM to orphaned MCP process %d (%s)", pid, server_name)
+
     # Phase 2: Wait for graceful exit
     time.sleep(2)
 
@@ -3679,14 +3750,11 @@ def _kill_orphaned_mcp_children(include_active: bool = False) -> None:
     for pid, server_name in pids.items():
         if not _pid_exists(pid):
             continue  # Good — exited after SIGTERM
-        try:
-            os.kill(pid, _sigkill)
-            logger.warning(
-                "Force-killed MCP process %d (%s) after SIGTERM timeout",
-                pid, server_name,
-            )
-        except (ProcessLookupError, PermissionError, OSError):
-            pass
+        _send_signal(pid, _sigkill, server_name)
+        logger.warning(
+            "Force-killed MCP process %d (%s) after SIGTERM timeout",
+            pid, server_name,
+        )
 
 
 def _stop_mcp_loop():
diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index 9ea0b9af41b..88bcb4005c0 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -40,6 +40,15 @@ _NUMERIC_TOPIC_RE = _TELEGRAM_TOPIC_TARGET_RE
 # downstream adapters (signal, etc.) expect.
 _PHONE_PLATFORMS = frozenset({"signal", "sms", "whatsapp"})
 _E164_TARGET_RE = re.compile(r"^\s*\+(\d{7,15})\s*$")
+# Email addresses — a valid email like "user@domain.com" should be treated as
+# an explicit target for the email platform, not fall through to channel-name
+# resolution which has no way to resolve a raw address.
+_EMAIL_TARGET_RE = re.compile(r"^\s*[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\s*$")
+# Most platforms read their home channel from "<PLATFORM>_HOME_CHANNEL", but a
+# few diverge. Email reads EMAIL_HOME_ADDRESS (see gateway/config.py), so the
+# generic "<PLATFORM>_HOME_CHANNEL" hint would point users at a variable that is
+# never read. Map the exceptions so the error guidance is actually actionable.
+_HOME_CHANNEL_ENV_OVERRIDES = {"email": "EMAIL_HOME_ADDRESS"}
 _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
 _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".3gp"}
 _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a", ".flac"}
@@ -265,10 +274,13 @@ def _handle_send(args):
             chat_id = home.chat_id
             used_home_channel = True
         else:
+            home_env = _HOME_CHANNEL_ENV_OVERRIDES.get(
+                platform_name, f"{platform_name.upper()}_HOME_CHANNEL"
+            )
             return json.dumps({
                 "error": f"No home channel set for {platform_name} to determine where to send the message. "
                 f"Either specify a channel directly with '{platform_name}:CHANNEL_NAME', "
-                f"or set a home channel via: hermes config set {platform_name.upper()}_HOME_CHANNEL <channel_id>"
+                f"or set a home channel via: hermes config set {home_env} <channel_id>"
             })
 
     duplicate_skip = _maybe_skip_cron_duplicate_send(platform_name, chat_id, thread_id)
@@ -383,6 +395,10 @@ def _parse_target_ref(platform_name: str, target_ref: str):
         if target_ref.strip().isdigit():
             return f"group:{target_ref.strip()}", None, True
         return None, None, False
+    if platform_name == "email":
+        match = _EMAIL_TARGET_RE.fullmatch(target_ref)
+        if match:
+            return target_ref.strip(), None, True
     if platform_name in _PHONE_PLATFORMS:
         match = _E164_TARGET_RE.fullmatch(target_ref)
         if match:
diff --git a/tools/skills_sync.py b/tools/skills_sync.py
index 81710a7b870..11d031cde41 100644
--- a/tools/skills_sync.py
+++ b/tools/skills_sync.py
@@ -517,7 +517,10 @@ def sync_skills(quiet: bool = False) -> dict:
                         if not quiet:
                             print(f"  ↑ {skill_name} (updated)")
                         # Remove backup after successful copy
-                        shutil.rmtree(backup, ignore_errors=True)
+                        try:
+                            _rmtree_writable(backup)
+                        except (OSError, IOError):
+                            logger.debug("Could not remove backup %s", backup, exc_info=True)
                     except (OSError, IOError):
                         # Restore from backup
                         if backup.exists() and not dest.exists():
@@ -563,6 +566,30 @@ def sync_skills(quiet: bool = False) -> dict:
     }
 
 
+def _rmtree_writable(path: Path) -> None:
+    """Remove a directory tree, making read-only entries writable first.
+
+    Handles immutable package sources (Nix store, deb/rpm installs) that
+    preserve read-only permissions on copied files *and* directories
+    (``r-xr-xr-x``).  Removing a child requires write permission on its
+    parent directory, so the retry handler makes the failing path **and its
+    parent** writable before re-attempting.  See #34860, #34972.
+    """
+    import stat
+
+    def _on_error(func, fpath, exc_info):
+        # Unlinking a child requires the parent dir to be writable, so chmod
+        # the parent as well as the failing path, then retry.
+        for target in (os.path.dirname(fpath), fpath):
+            try:
+                os.chmod(target, stat.S_IRWXU)
+            except OSError:
+                pass
+        func(fpath)
+
+    shutil.rmtree(path, onerror=_on_error)
+
+
 def reset_bundled_skill(name: str, restore: bool = False) -> dict:
     """
     Reset a bundled skill's manifest tracking so future syncs work normally.
@@ -606,12 +633,9 @@ def reset_bundled_skill(name: str, restore: bool = False) -> dict:
             "synced": None,
         }
 
-    # Step 1: drop the manifest entry so next sync treats it as new
-    if in_manifest:
-        del manifest[name]
-        _write_manifest(manifest)
-
-    # Step 2 (optional): delete the user's copy so next sync re-copies bundled
+    # Step 1 (optional): delete the user's copy so next sync re-copies bundled.
+    # Must happen BEFORE manifest deletion so that a failed rmtree does not
+    # leave the skill in a manifest-less limbo state (see #34972).
     deleted_user_copy = False
     if restore:
         if not is_bundled:
@@ -619,28 +643,32 @@ def reset_bundled_skill(name: str, restore: bool = False) -> dict:
                 "ok": False,
                 "action": "bundled_missing",
                 "message": (
-                    f"'{name}' has no bundled source — manifest entry cleared "
+                    f"'{name}' has no bundled source — manifest entry preserved "
                     f"but cannot restore from bundled (skill was removed upstream)."
                 ),
                 "synced": None,
             }
-        # The destination mirrors the bundled path relative to bundled_dir.
         dest = _compute_relative_dest(bundled_by_name[name], bundled_dir)
         if dest.exists():
             try:
-                shutil.rmtree(dest)
+                _rmtree_writable(dest)
                 deleted_user_copy = True
             except (OSError, IOError) as e:
                 return {
                     "ok": False,
-                    "action": "manifest_cleared",
+                    "action": "not_reset",
                     "message": (
-                        f"Cleared manifest entry for '{name}' but could not "
-                        f"delete user copy at {dest}: {e}"
+                        f"Could not delete user copy at {dest}: {e}. "
+                        f"Manifest entry preserved — nothing was changed."
                     ),
                     "synced": None,
                 }
 
+    # Step 2: drop the manifest entry so next sync treats it as new
+    if in_manifest:
+        del manifest[name]
+        _write_manifest(manifest)
+
     # Step 3: run sync to re-baseline (or re-copy if we deleted)
     synced = sync_skills(quiet=True)
 
diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index 986f9dab984..23a0508fed1 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -127,6 +127,30 @@ def _detect_image_mime_type(image_path: Path) -> Optional[str]:
     return None
 
 
+def _is_retryable_download_error(error: Exception) -> bool:
+    """Return True only for transient image-download failures worth retrying.
+
+    Non-retryable (fail-fast):
+      - httpx.HTTPStatusError with a 4xx status other than 429 (404/403/410/...):
+        the resource is missing or forbidden; retrying can't change that.
+      - PermissionError: blocked by website policy / SSRF guard.
+      - ValueError: image too large or blocked redirect — deterministic.
+
+    Retryable (transient):
+      - httpx 429 (rate limited) and 5xx (server-side) errors.
+      - Connection/timeout/transport errors (httpx.TransportError) and any
+        other unclassified exception, which may be a flaky network blip.
+    """
+    if isinstance(error, (PermissionError, ValueError)):
+        return False
+    if isinstance(error, httpx.HTTPStatusError):
+        status = error.response.status_code
+        if 400 <= status < 500 and status != 429:
+            return False
+        return True
+    return True
+
+
 async def _download_image(image_url: str, destination: Path, max_retries: int = 3) -> Path:
     """
     Download an image from a URL to a local destination (async) with retry logic.
@@ -210,24 +234,32 @@ async def _download_image(image_url: str, destination: Path, max_retries: int =
             return destination
         except Exception as e:
             last_error = e
-            if attempt < max_retries - 1:
-                wait_time = 2 ** (attempt + 1)  # 2s, 4s, 8s
-                logger.warning("Image download failed (attempt %s/%s): %s", attempt + 1, max_retries, str(e)[:50])
-                logger.warning("Retrying in %ss...", wait_time)
-                await asyncio.sleep(wait_time)
-            else:
+            # Error-class-aware retry: only retry transient failures. A 4xx
+            # client error (404/403/410, etc.) will never succeed on retry —
+            # the resource isn't there or we're not allowed — so burning 3
+            # attempts with 2s/4s/8s backoff just inflates latency. 429 (rate
+            # limit) and 5xx remain retryable. PermissionError (policy block)
+            # and ValueError (too-large / SSRF redirect) are also terminal.
+            if not _is_retryable_download_error(e) or attempt >= max_retries - 1:
                 logger.error(
-                    "Image download failed after %s attempts: %s",
-                    max_retries,
+                    "Image download failed after %s attempt(s): %s",
+                    attempt + 1,
                     str(e)[:100],
                     exc_info=True,
                 )
-    
-    if last_error is None:
-        raise RuntimeError(
-            f"_download_image exited retry loop without attempting (max_retries={max_retries})"
-        )
-    raise last_error
+                raise
+            wait_time = 2 ** (attempt + 1)  # 2s, 4s, 8s
+            logger.warning("Image download failed (attempt %s/%s): %s", attempt + 1, max_retries, str(e)[:50])
+            logger.warning("Retrying in %ss...", wait_time)
+            await asyncio.sleep(wait_time)
+
+    # The loop always returns on success or re-raises on the final/non-retryable
+    # attempt, so reaching here means max_retries was non-positive.
+    if last_error is not None:
+        raise last_error
+    raise RuntimeError(
+        f"_download_image exited retry loop without attempting (max_retries={max_retries})"
+    )
 
 
 def _determine_mime_type(image_path: Path) -> str:
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 509546fd573..d03f6865df2 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -93,6 +93,7 @@ from tools.debug_helpers import DebugSession
 # tools.web_tools (the firecrawl plugin reads them via its own import chain).
 from tools.managed_tool_gateway import (  # noqa: F401 — backward-compat names for tests
     build_vendor_gateway_url,
+    peek_nous_access_token as _peek_nous_access_token,
     read_nous_access_token as _read_nous_access_token,
     resolve_managed_tool_gateway,
 )
diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py
index 0400a3fcbff..7069ec97605 100644
--- a/tui_gateway/entry.py
+++ b/tui_gateway/entry.py
@@ -12,6 +12,7 @@ if _src_root and _src_root not in sys.path:
 sys.path = [p for p in sys.path if p not in {"", "."}]
 
 import json
+import logging
 import signal
 import time
 import traceback
@@ -20,6 +21,13 @@ from tui_gateway import server
 from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json
 from tui_gateway.transport import TeeTransport
 
+logger = logging.getLogger(__name__)
+
+# Handle for the background MCP tool-discovery thread (see main()).  The first
+# agent build briefly joins this so already-spawning fast servers land before
+# the agent snapshots its tool list (see wait_for_mcp_discovery).
+_mcp_discovery_thread = None
+
 
 def _install_sidecar_publisher() -> None:
     """Mirror every dispatcher emit to the dashboard sidebar via WS.
@@ -184,37 +192,76 @@ def _log_exit(reason: str) -> None:
     print(f"[gateway-exit] {reason}", file=sys.stderr, flush=True)
 
 
+def wait_for_mcp_discovery(timeout: float = 0.75) -> None:
+    """Briefly block until background MCP discovery finishes, up to ``timeout``.
+
+    MCP discovery runs in a daemon thread spawned at startup (see main()) so a
+    slow/dead server can't freeze ``gateway.ready``.  But the agent snapshots
+    its tool list ONCE at build time and never re-reads it, so a reachable-but-
+    slow server that finishes connecting *after* the first prompt would be
+    invisible for the whole session.  Joining with a short bounded timeout
+    before the first agent build lets already-spawning fast servers land
+    without re-introducing the startup hang: a dead server simply isn't waited
+    on beyond ``timeout``.  No-op when no discovery thread was started.
+    """
+    thread = _mcp_discovery_thread
+    if thread is None or not thread.is_alive():
+        return
+    thread.join(timeout=timeout)
+
+
 def main():
     _install_sidecar_publisher()
 
-    # MCP tool discovery — inline is safe here: TUI entry is a plain
-    # sync loop with no asyncio event loop to block.  Previously ran as
-    # a model_tools.py module-level side effect; moved to explicit
-    # startup calls to avoid freezing the gateway's loop on lazy import
-    # (#16856).
+    # MCP tool discovery — runs in a background daemon thread so a slow or
+    # unreachable MCP server can't freeze TUI startup.  Previously this ran
+    # inline before ``gateway.ready``, which meant any configured-but-down
+    # server stalled the whole shell on "summoning hermes…" for the full
+    # connect-retry backoff (e.g. a dead stdio/http server burns 1+2+4s of
+    # retries → ~7s of dead air before the composer appears).  Discovery is
+    # idempotent and registers tools into the shared registry as servers
+    # connect.  The agent isn't built until the first prompt, at which point
+    # ``_make_agent`` briefly joins this thread (``wait_for_mcp_discovery``,
+    # bounded) so already-spawning fast servers land in the tool snapshot —
+    # a dead server is simply not waited on past the bound.  ``/reload-mcp``
+    # rebuilds the snapshot for servers that connect later in the session.
     #
     # Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the
     # full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers —
-    # ~200ms on macOS), which runs on the TUI's critical path before
-    # ``gateway.ready`` can be emitted.  The overwhelming majority of users
-    # have no ``mcp_servers`` configured, in which case every byte of that
-    # import is wasted.  Check the config first (cheap — it's already been
-    # loaded once by ``_config_mtime`` elsewhere) and only pay the import
-    # cost when there's actually MCP work to do.
+    # ~200ms on macOS).  The overwhelming majority of users have no
+    # ``mcp_servers`` configured, in which case every byte of that import is
+    # wasted.  Check the config first (cheap) and only spawn the discovery
+    # thread when there's actually MCP work to do, so the import cost stays
+    # off the path entirely for the common case.
     try:
         from hermes_cli.config import read_raw_config
         _mcp_servers = (read_raw_config() or {}).get("mcp_servers")
         _has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0
     except Exception:
-        # Be conservative: if we can't decide, fall back to the old
-        # behaviour and let the discovery path handle its own errors.
+        # Be conservative: if we can't decide, fall back to attempting
+        # discovery (still backgrounded, so it can't block startup).
         _has_mcp_servers = True
     if _has_mcp_servers:
-        try:
-            from tools.mcp_tool import discover_mcp_tools
-            discover_mcp_tools()
-        except Exception:
-            pass
+        def _discover_mcp_background() -> None:
+            try:
+                from tools.mcp_tool import discover_mcp_tools
+                discover_mcp_tools()
+            except Exception:
+                logger.warning(
+                    "Background MCP tool discovery failed", exc_info=True
+                )
+
+        import threading as _mcp_threading
+        _mcp_thread = _mcp_threading.Thread(
+            target=_discover_mcp_background,
+            name="tui-mcp-discovery",
+            daemon=True,
+        )
+        _mcp_thread.start()
+        # Publish the handle so the first agent build can briefly wait for
+        # already-spawning fast servers to land (see wait_for_mcp_discovery).
+        global _mcp_discovery_thread
+        _mcp_discovery_thread = _mcp_thread
 
     if not write_json({
         "jsonrpc": "2.0",
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 43b50986596..45ffd407a4e 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -2243,6 +2243,19 @@ def _make_agent(sid: str, key: str, session_id: str | None = None):
     from run_agent import AIAgent
     from hermes_cli.runtime_provider import resolve_runtime_provider
 
+    # MCP tool discovery runs in a background daemon thread at startup so a
+    # dead server can't freeze the shell (see tui_gateway/entry.py).  The agent
+    # snapshots its tool list once here and never re-reads it, so briefly wait
+    # for in-flight discovery to land before building — bounded, so a slow/dead
+    # server still can't block.  No-op once discovery has finished (every build
+    # after the first during a slow startup).
+    try:
+        from tui_gateway.entry import wait_for_mcp_discovery
+
+        wait_for_mcp_discovery()
+    except Exception:
+        pass
+
     cfg = _load_cfg()
     agent_cfg = cfg.get("agent") or {}
     system_prompt = _prompt_text(agent_cfg.get("system_prompt", ""))
@@ -5379,8 +5392,28 @@ def _(rid, params: dict) -> dict:
         discover_mcp_tools()
         if session:
             agent = session["agent"]
-            if hasattr(agent, "refresh_tools"):
-                agent.refresh_tools()
+            # Rebuild the cached agent's tool snapshot so the current session
+            # picks up added/removed MCP tools without `/new` (which discards
+            # history).  The agent snapshots tools once at build and never
+            # re-reads the registry, so an explicit rebuild is required here.
+            # The user already consented to the prompt-cache invalidation via
+            # the confirm gate above.  Mirrors gateway/run.py::_execute_mcp_reload.
+            try:
+                from model_tools import get_tool_definitions
+
+                new_defs = get_tool_definitions(
+                    enabled_toolsets=_load_enabled_toolsets(),
+                    quiet_mode=True,
+                )
+                agent.tools = new_defs
+                agent.valid_tool_names = (
+                    {t["function"]["name"] for t in new_defs} if new_defs else set()
+                )
+            except Exception as _exc:
+                logger.warning(
+                    "Failed to refresh cached agent tools after /reload-mcp: %s",
+                    _exc,
+                )
             _emit(
                 "session.info",
                 params.get("session_id", ""),
diff --git a/ui-tui/src/__tests__/clipboard.test.ts b/ui-tui/src/__tests__/clipboard.test.ts
index b0646ee488e..93feb009d87 100644
--- a/ui-tui/src/__tests__/clipboard.test.ts
+++ b/ui-tui/src/__tests__/clipboard.test.ts
@@ -269,7 +269,14 @@ describe('writeClipboardText', () => {
       expect.arrayContaining(['-NoProfile', '-NonInteractive']),
       expect.anything()
     )
-    expect(stdin.end).toHaveBeenCalledWith('wsl text')
+    // PowerShell uses base64-encoded UTF-8 via command argument, not stdin
+    expect(stdin.end).not.toHaveBeenCalled()
+    const calledArgs = start.mock.calls[0][1] as string[]
+    const commandIdx = calledArgs.indexOf('-Command')
+    expect(commandIdx).toBeGreaterThan(-1)
+    const script = calledArgs[commandIdx + 1]
+    expect(script).toContain('FromBase64String')
+    expect(script).toContain(Buffer.from('wsl text', 'utf8').toString('base64'))
   })
 
   it('prefers the Windows clipboard path over wl-copy inside WSLg', async () => {
@@ -300,7 +307,13 @@ describe('writeClipboardText', () => {
       expect.arrayContaining(['-NoProfile', '-NonInteractive']),
       expect.anything()
     )
-    expect(stdin.end).toHaveBeenCalledWith('wslg text')
+    // PowerShell uses base64-encoded UTF-8 via command argument, not stdin
+    expect(stdin.end).not.toHaveBeenCalled()
+    const calledArgs = start.mock.calls[0][1] as string[]
+    const commandIdx = calledArgs.indexOf('-Command')
+    const script = calledArgs[commandIdx + 1]
+    expect(script).toContain('FromBase64String')
+    expect(script).toContain(Buffer.from('wslg text', 'utf8').toString('base64'))
   })
 
   it('uses PowerShell on Windows', async () => {
@@ -325,5 +338,32 @@ describe('writeClipboardText', () => {
       expect.arrayContaining(['-NoProfile', '-NonInteractive']),
       expect.anything()
     )
+    // PowerShell uses base64-encoded UTF-8 via command argument, not stdin
+    expect(stdin.end).not.toHaveBeenCalled()
+  })
+
+  it('preserves CJK text via base64 encoding in PowerShell on WSL', async () => {
+    const stdin = { end: vi.fn() }
+
+    const child = {
+      once: vi.fn((event: string, cb: (code?: number) => void) => {
+        if (event === 'close') {
+          cb(0)
+        }
+
+        return child
+      }),
+      stdin
+    }
+
+    const start = vi.fn().mockReturnValue(child)
+    const cjkText = '你好世界，测试中文 🎉'
+
+    await expect(writeClipboardText(cjkText, 'linux', start as any, { WSL_INTEROP: '/tmp/socket' })).resolves.toBe(true)
+    const calledArgs = start.mock.calls[0][1] as string[]
+    const commandIdx = calledArgs.indexOf('-Command')
+    const script = calledArgs[commandIdx + 1]
+    expect(script).toContain(Buffer.from(cjkText, 'utf8').toString('base64'))
+    expect(script).toContain('UTF8.GetString')
   })
 })
diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
index 0a3e4227396..897875b2c03 100644
--- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
+++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
@@ -1,7 +1,7 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest'
 
 import { createGatewayEventHandler } from '../app/createGatewayEventHandler.js'
-import { getOverlayState, resetOverlayState } from '../app/overlayStore.js'
+import { getOverlayState, patchOverlayState, resetOverlayState } from '../app/overlayStore.js'
 import { turnController } from '../app/turnController.js'
 import { getTurnState, resetTurnState } from '../app/turnStore.js'
 import { getUiState, patchUiState, resetUiState } from '../app/uiStore.js'
@@ -897,6 +897,117 @@ describe('createGatewayEventHandler', () => {
     expect(getTurnState().subagents.find(s => s.id === 'sa-weird')?.status).toBe('completed')
   })
 
+  it('nudges toward /agents on the first spawn_requested of a turn', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+
+    onEvent({
+      payload: { goal: 'child a', subagent_id: 'sa-a', task_index: 0 },
+      type: 'subagent.spawn_requested'
+    } as any)
+
+    const hints = getTurnState().activity.filter(a => a.text.includes('/agents'))
+    expect(hints).toHaveLength(1)
+    expect(hints[0]).toMatchObject({ tone: 'info' })
+  })
+
+  it('nudges toward /agents on subagent.start (spawn_requested dropped in CLI path)', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+
+    // In the real CLI→gateway path the delegate callback drops
+    // spawn_requested, so `start` is the first event the TUI sees.
+    onEvent({
+      payload: { goal: 'child a', subagent_id: 'sa-a', task_index: 0 },
+      type: 'subagent.start'
+    } as any)
+
+    expect(getTurnState().activity.filter(a => a.text.includes('/agents'))).toHaveLength(1)
+  })
+
+  it('nudges at most once per turn and resets on the next message.start', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+
+    // Multiple spawns in one turn → a single hint.
+    onEvent({
+      payload: { goal: 'child a', subagent_id: 'sa-a', task_index: 0 },
+      type: 'subagent.start'
+    } as any)
+    onEvent({
+      payload: { goal: 'child b', subagent_id: 'sa-b', task_index: 1 },
+      type: 'subagent.start'
+    } as any)
+    expect(getTurnState().activity.filter(a => a.text.includes('/agents'))).toHaveLength(1)
+
+    // New turn clears activity AND the once-per-turn guard → nudges again.
+    onEvent({ payload: {}, type: 'message.start' } as any)
+    onEvent({
+      payload: { goal: 'child c', subagent_id: 'sa-c', task_index: 0 },
+      type: 'subagent.start'
+    } as any)
+    expect(getTurnState().activity.filter(a => a.text.includes('/agents'))).toHaveLength(1)
+  })
+
+  it('does not nudge when the /agents overlay is already open', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+
+    // User already has the dashboard open → nothing to advertise.
+    patchOverlayState({ agents: true })
+
+    onEvent({
+      payload: { goal: 'child a', subagent_id: 'sa-a', task_index: 0 },
+      type: 'subagent.start'
+    } as any)
+
+    expect(getTurnState().activity.filter(a => a.text.includes('/agents'))).toHaveLength(0)
+  })
+
+  it('nudges if the /agents overlay is closed mid-turn while delegation continues', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+
+    // Overlay open on the first delegation event → suppressed, but the
+    // turn's nudge credit must NOT be burned (the user is watching).
+    patchOverlayState({ agents: true })
+    onEvent({
+      payload: { goal: 'child a', subagent_id: 'sa-a', task_index: 0 },
+      type: 'subagent.start'
+    } as any)
+    expect(getTurnState().activity.filter(a => a.text.includes('/agents'))).toHaveLength(0)
+
+    // User closes the dashboard mid-turn → the next delegation event nudges.
+    patchOverlayState({ agents: false })
+    onEvent({
+      payload: { goal: 'child b', subagent_id: 'sa-b', task_index: 1 },
+      type: 'subagent.start'
+    } as any)
+    expect(getTurnState().activity.filter(a => a.text.includes('/agents'))).toHaveLength(1)
+  })
+
+  it('does not nudge when display.tui_agents_nudge is false', async () => {
+    const appended: Msg[] = []
+    const ctx = buildCtx(appended)
+    // config.get → full returns the disable flag.
+    ctx.gateway.rpc = vi.fn(async (method: string) =>
+      method === 'config.get' ? { config: { display: { tui_agents_nudge: false } } } : null
+    )
+    const onEvent = createGatewayEventHandler(ctx)
+
+    // Eager config fetch fires at creation; let it resolve before any spawn
+    // (mirrors real usage — config lands well before the first delegation).
+    await Promise.resolve()
+    await Promise.resolve()
+
+    onEvent({
+      payload: { goal: 'child a', subagent_id: 'sa-a', task_index: 0 },
+      type: 'subagent.start'
+    } as any)
+
+    expect(getTurnState().activity.filter(a => a.text.includes('/agents'))).toHaveLength(0)
+  })
+
   it('drops stale reasoning/tool/todos events after ctrl-c until the next message starts', () => {
     // Repro for the discord report: ctrl-c interrupts, but late reasoning/tool
     // events from the still-winding-down agent loop kept populating the UI for
diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts
index 26d6cfacd0c..70264b0c7f9 100644
--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@@ -17,7 +17,7 @@ import type { Msg, SubagentProgress, SubagentStatus } from '../types.js'
 
 import { applyDelegationStatus, getDelegationState } from './delegationStore.js'
 import type { GatewayEventHandlerContext } from './interfaces.js'
-import { patchOverlayState } from './overlayStore.js'
+import { getOverlayState, patchOverlayState } from './overlayStore.js'
 import { turnController } from './turnController.js'
 import { getUiState, patchUiState } from './uiStore.js'
 
@@ -123,6 +123,78 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
   // render a /warning close to the configured cap without spamming the RPC.
   let lastDelegationFetchAt = 0
 
+  // ── Shared full-config read ──────────────────────────────────────────
+  //
+  // Several concerns need `display.*` flags at startup (the /agents nudge
+  // gate below, the auto-resume check in the `gateway.ready` handler).
+  // Memoize the `config.get full` RPC so we make exactly one round-trip
+  // instead of one per concern.  Resolves to null on RPC failure; callers
+  // treat null as "use defaults".
+  let fullConfigPromise: null | Promise<ConfigFullResponse | null> = null
+
+  const getFullConfigOnce = (): Promise<ConfigFullResponse | null> => {
+    fullConfigPromise ??= rpc<ConfigFullResponse>('config.get', { key: 'full' }).catch(() => null)
+
+    return fullConfigPromise
+  }
+
+  // ── Nudge toward /agents on delegation ───────────────────────────────
+  //
+  // When `display.tui_agents_nudge` is enabled (default true), the first
+  // time a turn starts delegating we drop a single transient activity hint
+  // ("subagents working · /agents to watch live") so the user discovers the
+  // spawn-tree dashboard instead of staring at a quiet transcript — without
+  // hijacking the screen by force-opening an overlay.  Guards:
+  //   • fires at most once per turn (`agentsNudgedThisTurn`)
+  //   • silent if the overlay is already open (nothing to advertise)
+  // Reset on `message.start`.  The config flag is fetched once, lazily;
+  // until it resolves we assume the default (on).
+  let agentsNudgeEnabled = true
+  let agentsNudgeConfigFetched = false
+  let agentsNudgedThisTurn = false
+
+  const ensureAgentsNudgeConfig = () => {
+    if (agentsNudgeConfigFetched) {
+      return
+    }
+
+    agentsNudgeConfigFetched = true
+    getFullConfigOnce().then(cfg => {
+      // Only an explicit `false` disables it; absent/unknown keeps default on.
+      if (cfg?.config?.display?.tui_agents_nudge === false) {
+        agentsNudgeEnabled = false
+      }
+    })
+  }
+
+  const maybeNudgeAgents = () => {
+    ensureAgentsNudgeConfig()
+
+    if (!agentsNudgeEnabled || agentsNudgedThisTurn) {
+      return
+    }
+
+    // Already watching → no point advertising the dashboard.  Don't burn the
+    // turn's nudge credit here: if the user closes the overlay later in the
+    // same turn while delegation is still ongoing, a subsequent event should
+    // still be allowed to nudge.  The flag is only set once we actually push.
+    if (getOverlayState().agents) {
+      return
+    }
+
+    agentsNudgedThisTurn = true
+    turnController.pushActivity('subagents working · /agents to watch live', 'info')
+  }
+
+  const resetAgentsNudgeTurnState = () => {
+    agentsNudgedThisTurn = false
+  }
+
+  // Kick off the config fetch eagerly at handler creation so the flag is
+  // resolved well before the first delegation of any real session (which
+  // only happens after gateway.ready + a user turn).
+  ensureAgentsNudgeConfig()
+
   const refreshDelegationStatus = (force = false) => {
     const now = Date.now()
 
@@ -244,8 +316,8 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
     // forging a brand-new one.  Mirrors classic CLI's `hermes -c` /
     // `hermes --tui` muscle memory and addresses the audit's "session
     // unrecoverable after disconnection" gap.  Default off so existing
-    // users aren't surprised.
-    rpc<ConfigFullResponse>('config.get', { key: 'full' })
+    // users aren't surprised.  (Shares the memoized full-config read.)
+    getFullConfigOnce()
       .then(cfg => {
         if (!cfg?.config?.display?.tui_auto_resume_recent) {
           patchUiState({ status: 'forging session…' })
@@ -332,6 +404,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
       }
 
       case 'message.start':
+        resetAgentsNudgeTurnState()
         turnController.startMessage()
 
         return
@@ -618,6 +691,9 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
         // Preserve completed state if a later event races in before this one.
         turnController.upsertSubagent(ev.payload, c => (isTerminalStatus(c.status) ? {} : { status: 'queued' }))
 
+        // First sign of delegation this turn → nudge toward /agents.
+        maybeNudgeAgents()
+
         // Prime the status-bar HUD: fetch caps (once every 5s) so we can
         // warn as depth/concurrency approaches the configured ceiling.
         if (getDelegationState().maxSpawnDepth === null) {
@@ -631,6 +707,12 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
       case 'subagent.start':
         turnController.upsertSubagent(ev.payload, c => (isTerminalStatus(c.status) ? {} : { status: 'running' }))
 
+        // `subagent.start` is the first delegation event the TUI reliably
+        // receives (the delegate callback drops `spawn_requested` in the
+        // CLI→gateway path), so nudge here too.  Once-per-turn guarded, so
+        // hooking both events is safe.
+        maybeNudgeAgents()
+
         return
       case 'subagent.thinking': {
         const text = String(ev.payload.text ?? '').trim()
diff --git a/ui-tui/src/components/thinking.tsx b/ui-tui/src/components/thinking.tsx
index 0d9ecee87c9..ce90cca2138 100644
--- a/ui-tui/src/components/thinking.tsx
+++ b/ui-tui/src/components/thinking.tsx
@@ -1073,6 +1073,10 @@ export const ToolTrail = memo(function ToolTrail({
             const branch: TreeBranch = index === groups.length - 1 ? 'last' : 'mid'
             const childRails = nextTreeRails(rails, branch)
             const hasInlineSubagents = inlineDelegateKey === group.key
+            // Surface the /agents hint the moment a delegate group appears —
+            // while it's still in-flight and before any subagent has
+            // registered — so users can open the live monitor immediately.
+            const isDelegateGroup = group.label.startsWith('Delegate Task')
 
             return (
               <Box flexDirection="column" key={group.key}>
@@ -1083,6 +1087,11 @@ export const ToolTrail = memo(function ToolTrail({
                     <>
                       <Text color={t.color.accent}>● </Text>
                       {toolLabel(group)}
+                      {isDelegateGroup ? (
+                        <Text color={t.color.statusFg} dim>
+                          {'  (/agents to monitor)'}
+                        </Text>
+                      ) : null}
                     </>
                   }
                   rails={rails}
diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts
index ae1f38e9b38..447dec3ea49 100644
--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@@ -62,6 +62,12 @@ export interface ConfigDisplayConfig {
   show_reasoning?: boolean
   streaming?: boolean
   thinking_mode?: string
+  /**
+   * Nudge the user toward the /agents spawn-tree dashboard the first time a
+   * turn starts delegating, via a one-time transient activity hint.  Opens
+   * nothing — just advertises the command.  Default true.
+   */
+  tui_agents_nudge?: boolean
   tui_auto_resume_recent?: boolean
   tui_compact?: boolean
   /** Legacy alias for display.mouse_tracking. */
diff --git a/ui-tui/src/lib/clipboard.ts b/ui-tui/src/lib/clipboard.ts
index 587e8986c3e..4a5387ae2d2 100644
--- a/ui-tui/src/lib/clipboard.ts
+++ b/ui-tui/src/lib/clipboard.ts
@@ -91,33 +91,42 @@ export async function readClipboardText(
   return null
 }
 
+// PowerShell on Windows/WSL decodes piped stdin with the system ANSI code
+// page (e.g. CP936), not UTF-8, so $input-based writes mangle CJK/emoji. We
+// instead base64-encode the UTF-8 bytes and pass them as a -Command argument,
+// decoding with UTF8.GetString — this removes the stdin-encoding variable
+// entirely (also immune to BOM injection on redirect). PowerShell entries set
+// stdin=false; every other backend reads UTF-8 stdin natively.
+type WriteCmd = { args: readonly string[]; cmd: string; stdin: boolean }
+
+function _powershellWriteScript(b64: string): string {
+  return `Set-Clipboard -Value ([System.Text.Encoding]::UTF8.GetString([System.Convert]::FromBase64String('${b64}')))`
+}
+
 function writeClipboardCommands(
   platform: NodeJS.Platform,
   env: NodeJS.ProcessEnv
-): Array<{ args: readonly string[]; cmd: string }> {
+): WriteCmd[] {
   if (platform === 'darwin') {
-    return [{ cmd: 'pbcopy', args: [] }]
+    return [{ cmd: 'pbcopy', args: [], stdin: true }]
   }
 
   if (platform === 'win32') {
-    return [{ cmd: 'powershell', args: ['-NoProfile', '-NonInteractive', '-Command', 'Set-Clipboard -Value $input'] }]
+    return [{ cmd: 'powershell', args: ['-NoProfile', '-NonInteractive'], stdin: false }]
   }
 
-  const attempts: Array<{ args: readonly string[]; cmd: string }> = []
+  const attempts: WriteCmd[] = []
 
   if (env.WSL_INTEROP || env.WSL_DISTRO_NAME) {
-    attempts.push({
-      cmd: 'powershell.exe',
-      args: ['-NoProfile', '-NonInteractive', '-Command', 'Set-Clipboard -Value $input']
-    })
+    attempts.push({ cmd: 'powershell.exe', args: ['-NoProfile', '-NonInteractive'], stdin: false })
   }
 
   if (env.WAYLAND_DISPLAY) {
-    attempts.push({ cmd: 'wl-copy', args: ['--type', 'text/plain'] })
+    attempts.push({ cmd: 'wl-copy', args: ['--type', 'text/plain'], stdin: true })
   }
 
-  attempts.push({ cmd: 'xclip', args: ['-selection', 'clipboard', '-in'] })
-  attempts.push({ cmd: 'xsel', args: ['--clipboard', '--input'] })
+  attempts.push({ cmd: 'xclip', args: ['-selection', 'clipboard', '-in'], stdin: true })
+  attempts.push({ cmd: 'xsel', args: ['--clipboard', '--input'], stdin: true })
 
   return attempts
 }
@@ -144,14 +153,21 @@ export async function writeClipboardText(
 ): Promise<boolean> {
   const candidates = writeClipboardCommands(platform, env)
 
-  for (const { cmd, args } of candidates) {
+  for (const cmdEntry of candidates) {
     try {
       const ok = await new Promise<boolean>(resolve => {
-        const child = start(cmd, [...args], { stdio: ['pipe', 'ignore', 'ignore'], windowsHide: true })
-
-        child.once('error', () => resolve(false))
-        child.once('close', code => resolve(code === 0))
-        child.stdin?.end(text)
+        if (cmdEntry.stdin) {
+          const child = start(cmdEntry.cmd, [...cmdEntry.args], { stdio: ['pipe', 'ignore', 'ignore'], windowsHide: true })
+          child.once('error', () => resolve(false))
+          child.once('close', (code: number | null) => resolve(code === 0))
+          child.stdin?.end(text)
+        } else {
+          const b64 = Buffer.from(text, 'utf8').toString('base64')
+          const script = _powershellWriteScript(b64)
+          const child = start(cmdEntry.cmd, [...cmdEntry.args, '-Command', script], { stdio: ['ignore', 'ignore', 'ignore'], windowsHide: true })
+          child.once('error', () => resolve(false))
+          child.once('close', (code: number | null) => resolve(code === 0))
+        }
       })
 
       if (ok) {
diff --git a/utils.py b/utils.py
index 156fd38bdc3..cb08ba12869 100644
--- a/utils.py
+++ b/utils.py
@@ -87,6 +87,7 @@ def atomic_json_write(
     data: Any,
     *,
     indent: int = 2,
+    mode: int | None = None,
     **dump_kwargs: Any,
 ) -> None:
     """Write JSON data to a file atomically.
@@ -99,13 +100,16 @@ def atomic_json_write(
         path: Target file path (will be created or overwritten).
         data: JSON-serializable data to write.
         indent: JSON indentation (default 2).
+        mode: Optional final permission mode. When set, the temp file is
+            created and replaced with this mode, avoiding chmod-after-write
+            TOCTOU exposure for secret-bearing files.
         **dump_kwargs: Additional keyword args forwarded to json.dump(), such
             as default=str for non-native types.
     """
     path = Path(path)
     path.parent.mkdir(parents=True, exist_ok=True)
 
-    original_mode = _preserve_file_mode(path)
+    original_mode = None if mode is not None else _preserve_file_mode(path)
 
     fd, tmp_path = tempfile.mkstemp(
         dir=str(path.parent),
@@ -113,6 +117,8 @@ def atomic_json_write(
         suffix=".tmp",
     )
     try:
+        if mode is not None:
+            os.fchmod(fd, mode)
         with os.fdopen(fd, "w", encoding="utf-8") as f:
             json.dump(
                 data,
@@ -125,7 +131,13 @@ def atomic_json_write(
             os.fsync(f.fileno())
         # Preserve symlinks — swap in-place on the real file (GitHub #16743).
         real_path = atomic_replace(tmp_path, path)
-        _restore_file_mode(real_path, original_mode)
+        if mode is not None:
+            try:
+                os.chmod(real_path, mode)
+            except OSError:
+                pass
+        else:
+            _restore_file_mode(Path(real_path), original_mode)
     except BaseException:
         # Intentionally catch BaseException so temp-file cleanup still runs for
         # KeyboardInterrupt/SystemExit before re-raising the original signal.
diff --git a/uv.lock b/uv.lock
index 24205de8627..299c659fd2f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1640,6 +1640,7 @@ all = [
     { name = "ruff" },
     { name = "setuptools" },
     { name = "simple-term-menu" },
+    { name = "starlette" },
     { name = "ty" },
     { name = "uvicorn", extra = ["standard"] },
     { name = "youtube-transcript-api" },
@@ -1658,6 +1659,7 @@ cli = [
 ]
 computer-use = [
     { name = "mcp" },
+    { name = "starlette" },
 ]
 daytona = [
     { name = "daytona" },
@@ -1670,6 +1672,7 @@ dev = [
     { name = "pytest-timeout" },
     { name = "ruff" },
     { name = "setuptools" },
+    { name = "starlette" },
     { name = "ty" },
 ]
 dingtalk = [
@@ -1716,6 +1719,7 @@ matrix = [
 ]
 mcp = [
     { name = "mcp" },
+    { name = "starlette" },
 ]
 messaging = [
     { name = "aiohttp" },
@@ -1755,6 +1759,7 @@ termux = [
     { name = "python-telegram-bot", extra = ["webhooks"] },
     { name = "pywinpty", marker = "sys_platform == 'win32'" },
     { name = "simple-term-menu" },
+    { name = "starlette" },
 ]
 termux-all = [
     { name = "agent-client-protocol" },
@@ -1769,6 +1774,7 @@ termux-all = [
     { name = "python-telegram-bot", extra = ["webhooks"] },
     { name = "pywinpty", marker = "sys_platform == 'win32'" },
     { name = "simple-term-menu" },
+    { name = "starlette" },
     { name = "uvicorn", extra = ["standard"] },
 ]
 tts-premium = [
@@ -1781,6 +1787,7 @@ voice = [
 ]
 web = [
     { name = "fastapi" },
+    { name = "starlette" },
     { name = "uvicorn", extra = ["standard"] },
 ]
 wecom = [
@@ -1886,6 +1893,10 @@ requires-dist = [
     { name = "slack-sdk", marker = "extra == 'messaging'", specifier = "==3.40.1" },
     { name = "slack-sdk", marker = "extra == 'slack'", specifier = "==3.40.1" },
     { name = "sounddevice", marker = "extra == 'voice'", specifier = "==0.5.5" },
+    { name = "starlette", marker = "extra == 'computer-use'", specifier = "==1.0.1" },
+    { name = "starlette", marker = "extra == 'dev'", specifier = "==1.0.1" },
+    { name = "starlette", marker = "extra == 'mcp'", specifier = "==1.0.1" },
+    { name = "starlette", marker = "extra == 'web'", specifier = "==1.0.1" },
     { name = "tenacity", specifier = "==9.1.4" },
     { name = "ty", marker = "extra == 'dev'", specifier = "==0.0.21" },
     { name = "tzdata", marker = "sys_platform == 'win32'", specifier = "==2025.3" },
@@ -4084,15 +4095,15 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.52.1"
+version = "1.0.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/08/a3/84e821cc54b4ab50ae6dbc6ac3800a651b65ec35f045cc73785380654057/starlette-1.0.1.tar.gz", hash = "sha256:512399c5f1de7fac99c88572212ded9ddeddef2fb32afa82d724000e88b38f4f", size = 2659596, upload-time = "2026-05-21T21:58:58.433Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/e1/b2df4bc09a1e51ff664c1e17018a4274b42e5e9352e4a478ea540512dc88/starlette-1.0.1-py3-none-any.whl", hash = "sha256:7c0e69b2ee1c848bd54669d908500117a3ee13de603a21427e5c6fc1adf98dcd", size = 72802, upload-time = "2026-05-21T21:58:56.551Z" },
 ]
 
 [[package]]
diff --git a/website/docs/developer-guide/context-compression-and-caching.md b/website/docs/developer-guide/context-compression-and-caching.md
index 4b511756181..55641b16f27 100644
--- a/website/docs/developer-guide/context-compression-and-caching.md
+++ b/website/docs/developer-guide/context-compression-and-caching.md
@@ -111,6 +111,17 @@ tail_token_budget    = 100,000 × 0.20 = 20,000
 max_summary_tokens   = min(200,000 × 0.05, 12,000) = 10,000
 ```
 
+:::note Threshold is derived from the MAIN model's context window
+`threshold_tokens` is always `threshold × context_length`, where `context_length`
+is the **main agent model's** context window — never the auxiliary/summary
+model's. On a 262,144-token model at the default `0.50`, the threshold is
+`262,144 × 0.50 = 131,072`. That number being close to a common "128K context"
+is a coincidence of the percentage, not a sign that the auxiliary model's window
+is the trigger. The auxiliary model's context window is a separate concern — see
+the "Summary model context length" warning below for how it affects whether a
+summary can be produced, not when compression fires.
+:::
+
 
 ## Compression Algorithm
 
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 5882d4aaac3..b8b41a621e0 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -58,6 +58,7 @@ hermes [global-options] <command> [subcommand/options]
 | `hermes doctor` | Diagnose config and dependency issues. |
 | `hermes security audit` | On-demand supply-chain audit (OSV.dev) for the venv, plugin requirements, and pinned MCP servers. |
 | `hermes dump` | Copy-pasteable setup summary for support/debugging. |
+| `hermes prompt-size` | Show a byte breakdown of the system prompt + tool schemas (skills index, memory, profile). Runs offline. |
 | `hermes debug` | Debug tools — upload logs and system info for support. |
 | `hermes backup` | Back up Hermes home directory to a zip file. |
 | `hermes checkpoints` | Inspect / prune / clear `~/.hermes/checkpoints/` (the shadow store used by `/rollback`). Run with no args for a status overview. |
@@ -886,6 +887,50 @@ Lines without a parseable timestamp are included when `--since` is active (they
 
 Hermes uses Python's `RotatingFileHandler`. Old logs are rotated automatically — look for `agent.log.1`, `agent.log.2`, etc. The `hermes logs list` subcommand shows all log files including rotated ones.
 
+
+## `hermes prompt-size`
+
+```bash
+hermes prompt-size [--platform <name>] [--json]
+```
+
+Reports the fixed prompt budget for a fresh session — what gets sent on every
+API call *before* any conversation content. Useful when a downstream adapter or
+proxy has a tighter prompt budget than the model's context window, or when you
+want to see which block (skills index, memory, profile) dominates.
+
+It builds the same system prompt the agent would, then breaks it down:
+
+- **System prompt total** — full assembled prompt (identity, guidance, skills
+  index, context files, memory, profile, timestamp).
+- **Skills index** — the `<available_skills>` block. This is often the largest
+  single block when many skills are installed.
+- **Memory** and **user profile** — your `MEMORY.md` / `USER.md` snapshots.
+- **Prompt tiers** — stable / context / volatile, matching how Hermes layers
+  the prompt for cache-friendliness.
+- **Tool schemas** — the JSON for all enabled tools (the other half of the
+  fixed per-call payload).
+
+Runs entirely offline — no API call, works with no credentials configured.
+
+```bash
+# Human-readable breakdown for the CLI platform (default)
+hermes prompt-size
+
+# Simulate a messaging platform's prompt (different platform hint)
+hermes prompt-size --platform telegram
+
+# Machine-readable output for scripts
+hermes prompt-size --json
+```
+
+:::tip
+The skills index and tool schemas scale with how many skills and tools you have
+enabled. To shrink the prompt, disable unused toolsets (`hermes tools`) or
+uninstall skills you don't need (`hermes skills`). Context files (AGENTS.md,
+.cursorrules) in your current directory also count toward the total.
+:::
+
 ## `hermes config`
 
 ```bash
diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index 61dd73e8f2e..b971bea272d 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -106,6 +106,10 @@ The auto-injected dialectic scales `dialecticReasoningLevel` by query length: +1
 
 Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho.json` (profile-local). The setup wizard handles this for you.
 
+### Self-Hosted Honcho with Authentication
+
+When pointing Hermes at a self-hosted Honcho server, `hermes honcho setup` (and `hermes memory setup`) ask for a **local JWT / bearer token** after the base URL. Paste a JWT signed with the server's `AUTH_JWT_SECRET` (the Honcho compose env var) to enable authenticated access; leave it blank for servers running with `AUTH_USE_AUTH=false`. The local token is stored under the host block (`hosts.<host>.apiKey` in `honcho.json`), separate from any cloud `apiKey`, so you can flip the `Cloud or local?` prompt back to `cloud` later without losing either credential.
+
 ### Full Config Reference
 
 | Key | Default | Description |
@@ -199,11 +203,12 @@ When Honcho is active as the memory provider, five tools become available:
 
 ## CLI Commands
 
-The `hermes honcho` subcommand is **only registered when Honcho is the active memory provider** (`memory.provider: honcho` in `config.yaml`). Run `hermes memory setup` and pick Honcho first; the subcommand appears on the next invocation.
+The `hermes honcho` subcommand is **only registered when Honcho is the active memory provider** (`memory.provider: honcho` in `config.yaml`). On a fresh install, configure Honcho directly with `hermes memory setup honcho` (or run `hermes memory setup` and pick it from the list); the `hermes honcho` subcommand then appears on the next invocation.
 
 ```bash
+hermes memory setup honcho    # Configure Honcho directly (works before activation)
 hermes honcho status          # Connection status, config, and key settings
-hermes honcho setup           # Redirects to `hermes memory setup`
+hermes honcho setup           # Redirects to `hermes memory setup` (post-activation alias)
 hermes honcho strategy        # Show or set session strategy (per-session/per-directory/per-repo/global)
 hermes honcho peer            # Show or update peer names + dialectic reasoning level
 hermes honcho mode            # Show or set recall mode (hybrid/context/tools)
diff --git a/website/docs/user-guide/features/kanban.md b/website/docs/user-guide/features/kanban.md
index ede083b0590..0192f9c6461 100644
--- a/website/docs/user-guide/features/kanban.md
+++ b/website/docs/user-guide/features/kanban.md
@@ -155,6 +155,36 @@ events WebSocket is pinned to a board at connection time; switching in
 the UI opens a fresh WS against the new board.
 
 
+## File attachments
+
+Tasks can carry file attachments — PDFs, images, source documents — so a
+worker has the source material it needs without you pasting paths into the
+body and hoping it finds them.
+
+- **Upload** — open a task in the dashboard drawer and use the
+  **Attachments** section's *Upload file* button (multiple files at once
+  are fine). Each upload is capped at 25 MB.
+- **Storage** — files land under
+  `<hermes-home>/kanban/attachments/<task_id>/` for the default board, or
+  `<hermes-home>/kanban/boards/<slug>/attachments/<task_id>/` for a named
+  board. Set `HERMES_KANBAN_ATTACHMENTS_ROOT` to pin a custom location.
+- **What the worker sees** — when the dispatcher hands a task to a worker,
+  the worker's context includes an **Attachments** section listing each
+  file's name and its **absolute path**. The worker has full file/terminal
+  tool access, so it reads attachments directly (`read_file`, or shell
+  tools like `pdftotext`).
+- **Download / remove** — the drawer lists each attachment with a download
+  link and a remove (×) control. Removing an attachment deletes both the
+  metadata row and the on-disk file.
+
+:::note Remote terminal backends
+Attachment paths resolve directly on the **local** terminal backend, which
+is the default for Kanban workers. If you run workers on a remote backend
+(Docker, Modal), mount the board's `attachments/` directory into the
+sandbox so the absolute paths in the worker context are reachable.
+:::
+
+
 ## Quick start
 
 The commands below are **you** (the human) setting up the board and creating tasks. Once a task is assigned, the dispatcher spawns the assigned profile as a worker, and from there **the model drives the task through `kanban_*` tool calls, not CLI commands** — see [How workers interact with the board](#how-workers-interact-with-the-board).
diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md
index f584c7288a8..00f2555d620 100644
--- a/website/docs/user-guide/features/memory-providers.md
+++ b/website/docs/user-guide/features/memory-providers.md
@@ -66,7 +66,7 @@ AI-native cross-session user modeling with dialectic reasoning, session-scoped c
 hermes memory setup        # select "honcho" — runs the Honcho-specific post-setup
 ```
 
-The legacy `hermes honcho setup` command still works (it now redirects to `hermes memory setup`), but is only registered after Honcho is selected as the active memory provider.
+On a fresh install, configure Honcho directly with `hermes memory setup honcho`. The legacy `hermes honcho setup` command still works (it now redirects to `hermes memory setup`), but is only registered after Honcho is selected as the active memory provider.
 
 **Config:** `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.json` (global). Resolution order: `$HERMES_HOME/honcho.json` > `~/.hermes/honcho.json` > `~/.honcho/config.json`. See the [config reference](https://github.com/NousResearch/hermes-agent/blob/main/plugins/memory/honcho/README.md) and the [Honcho integration guide](https://docs.honcho.dev/v3/guides/integrations/hermes).
 
diff --git a/website/docs/user-guide/messaging/weixin.md b/website/docs/user-guide/messaging/weixin.md
index a0d25ee8cb9..30d75dd5bcd 100644
--- a/website/docs/user-guide/messaging/weixin.md
+++ b/website/docs/user-guide/messaging/weixin.md
@@ -123,6 +123,8 @@ Set these in `config.yaml` under `platforms.weixin.extra`:
 | `allow_from` | `[]` | User IDs allowed for DMs (when dm_policy=allowlist) |
 | `group_allow_from` | `[]` | Group IDs allowed (when group_policy=allowlist) |
 | `split_multiline_messages` | `false` | When `true`, split multi-line replies into multiple chat messages (legacy behavior). When `false`, keep multi-line replies as one message unless they exceed the length limit. |
+| `text_batch_delay_seconds` | `3.0` | Quiet period (seconds) before a buffered burst of rapid text messages is flushed as one combined request. iLink delivers messages individually, so this debounce avoids one agent invocation per fragment. Set `0` to dispatch each message immediately. |
+| `text_batch_split_delay_seconds` | `5.0` | Extended flush delay used when the latest fragment is near the split threshold (long messages iLink may have chunked). |
 
 ## Access Policies
 
diff --git a/website/docs/user-guide/messaging/whatsapp.md b/website/docs/user-guide/messaging/whatsapp.md
index d2bd52a56b3..5fb5eb2aecf 100644
--- a/website/docs/user-guide/messaging/whatsapp.md
+++ b/website/docs/user-guide/messaging/whatsapp.md
@@ -201,6 +201,22 @@ Code blocks and inline code are preserved as-is since WhatsApp supports triple-b
 
 When the agent calls tools (web search, file operations, etc.), WhatsApp displays real-time progress indicators showing which tool is running. This is enabled by default — no configuration needed.
 
+### Message Batching (Debounce)
+
+WhatsApp delivers each message individually, so a rapid burst (forwarded batches, paste-splits, multi-line text) would otherwise trigger a separate agent invocation per fragment — wasting tokens and producing several disjointed replies. The adapter buffers successive text messages from the same chat and dispatches them as one combined request after a short quiet period (default **5s**, extended to **10s** for very long fragments). Tune via `config.yaml`:
+
+```yaml
+# ~/.hermes/config.yaml
+gateway:
+  platforms:
+    whatsapp:
+      extra:
+        text_batch_delay_seconds: 5.0         # quiet period before flushing a batch
+        text_batch_split_delay_seconds: 10.0  # extended delay near the split threshold
+```
+
+Set `text_batch_delay_seconds: 0` to dispatch each message immediately (disables batching).
+
 ---
 
 ## Troubleshooting