Merge branch 'main' of github.com:NousResearch/hermes-agent into bb/gui

# Conflicts: # tui_gateway/server.py
2026-07-27 17:58:07 +00:00 · 2026-05-30 13:19:27 -05:00 · 2026-05-30 13:19:27 -05:00 · c83cd38391
commit c83cd38391
parent 2e157a2154 5921d66785
157 changed files with 10059 additions and 831 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -1680,26 +1680,48 @@ def _read_main_provider() -> str:
 # per turn — no lock needed. Cleared by ``clear_runtime_main()``.
 _RUNTIME_MAIN_PROVIDER: str = ""
 _RUNTIME_MAIN_MODEL: str = ""
+_RUNTIME_MAIN_BASE_URL: str = ""
+_RUNTIME_MAIN_API_KEY: str = ""
+_RUNTIME_MAIN_API_MODE: str = ""


-def set_runtime_main(provider: str, model: str) -> None:
-    """Record the live runtime provider/model for the current AIAgent.
+def set_runtime_main(
+    provider: str,
+    model: str,
+    *,
+    base_url: str = "",
+    api_key: str = "",
+    api_mode: str = "",
+) -> None:
+    """Record the live runtime provider/model/credentials for the current AIAgent.

    Called by ``run_agent.AIAgent._sync_runtime_main_for_aux_routing`` (or
    equivalent setter) at the top of each turn so that
    ``_read_main_provider`` / ``_read_main_model`` reflect CLI/gateway
    overrides instead of the stale config.yaml default.
+
+    For ``custom:`` providers, ``base_url`` and ``api_key`` must also be
+    recorded so that ``_resolve_auto`` can construct a valid client in
+    Step 1 instead of falling through to the aggregator chain.
    """
    global _RUNTIME_MAIN_PROVIDER, _RUNTIME_MAIN_MODEL
+    global _RUNTIME_MAIN_BASE_URL, _RUNTIME_MAIN_API_KEY, _RUNTIME_MAIN_API_MODE
    _RUNTIME_MAIN_PROVIDER = (provider or "").strip().lower()
    _RUNTIME_MAIN_MODEL = (model or "").strip()
+    _RUNTIME_MAIN_BASE_URL = (base_url or "").strip()
+    _RUNTIME_MAIN_API_KEY = api_key.strip() if isinstance(api_key, str) else ""
+    _RUNTIME_MAIN_API_MODE = (api_mode or "").strip()


 def clear_runtime_main() -> None:
    """Clear the runtime override (e.g. on session end)."""
    global _RUNTIME_MAIN_PROVIDER, _RUNTIME_MAIN_MODEL
+    global _RUNTIME_MAIN_BASE_URL, _RUNTIME_MAIN_API_KEY, _RUNTIME_MAIN_API_MODE
    _RUNTIME_MAIN_PROVIDER = ""
    _RUNTIME_MAIN_MODEL = ""
+    _RUNTIME_MAIN_BASE_URL = ""
+    _RUNTIME_MAIN_API_KEY = ""
+    _RUNTIME_MAIN_API_MODE = ""


 def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[str]]:
@ -2980,6 +3002,18 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
    runtime_api_key = runtime.get("api_key", "")
    runtime_api_mode = str(runtime.get("api_mode") or "")

+    # Fall back to process-local globals when main_runtime dict was not
+    # provided or was incomplete.  ``set_runtime_main()`` now records
+    # base_url/api_key/api_mode alongside provider/model, so custom:
+    # providers get the full credential surface in Step 1 of the
+    # auto-detect chain.
+    if not runtime_base_url and _RUNTIME_MAIN_BASE_URL:
+        runtime_base_url = _RUNTIME_MAIN_BASE_URL
+    if not runtime_api_key and _RUNTIME_MAIN_API_KEY:
+        runtime_api_key = _RUNTIME_MAIN_API_KEY
+    if not runtime_api_mode and _RUNTIME_MAIN_API_MODE:
+        runtime_api_mode = _RUNTIME_MAIN_API_MODE
+
    # ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named
    #    provider (not 'custom').  This catches the common "env poisoning"
    #    scenario where a user switches providers via `hermes model` but the
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@ -1283,6 +1283,18 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
            agent._copy_reasoning_content_for_api(msg, api_msg)
            for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"):
                api_msg.pop(internal_field, None)
+            # Strict OpenAI-compatible gateways (Fireworks-backed OpenCode Go,
+            # Mistral, Moonshot/Kimi) reject any message key outside the Chat
+            # Completions schema. The main loop drops these via
+            # ChatCompletionsTransport.convert_messages(), but the summary path
+            # hand-builds messages and calls chat.completions.create() directly,
+            # bypassing the transport — so mirror that sanitization here:
+            # tool_name (SQLite FTS bookkeeping), the codex_* reasoning carriers,
+            # and every Hermes-internal underscore-prefixed scaffolding key.
+            for schema_foreign in ("tool_name", "codex_reasoning_items", "codex_message_items"):
+                api_msg.pop(schema_foreign, None)
+            for internal_key in [k for k in api_msg if isinstance(k, str) and k.startswith("_")]:
+                api_msg.pop(internal_key, None)
            if _needs_sanitize:
                agent._sanitize_tool_calls_for_strict_api(api_msg)
            api_messages.append(api_msg)
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -40,17 +40,47 @@ SUMMARY_PREFIX = (
    "window — treat it as background reference, NOT as active instructions. "
    "Do NOT answer questions or fulfill requests mentioned in this summary; "
    "they were already addressed. "
-    "Your current task is identified in the '## Active Task' section of the "
-    "summary — resume exactly from there. "
+    "Respond ONLY to the latest user message that appears AFTER this "
+    "summary — that message is the single source of truth for what to do "
+    "right now. "
+    "If the latest user message is consistent with the '## Active Task' "
+    "section, you may use the summary as background. If the latest user "
+    "message contradicts, supersedes, changes topic from, or in any way "
+    "diverges from '## Active Task' / '## In Progress' / '## Pending User "
+    "Asks' / '## Remaining Work', the latest message WINS — discard those "
+    "stale items entirely and do not 'wrap up the old task first'. "
+    "Reverse signals in the latest message (e.g. 'stop', 'undo', 'roll "
+    "back', 'just verify', 'don't do that anymore', 'never mind', a new "
+    "topic) must immediately end any in-flight work described in the "
+    "summary; do not re-surface it in later turns. "
    "IMPORTANT: Your persistent memory (MEMORY.md, USER.md) in the system "
    "prompt is ALWAYS authoritative and active — never ignore or deprioritize "
    "memory content due to this compaction note. "
-    "Respond ONLY to the latest user message "
-    "that appears AFTER this summary. The current session state (files, "
-    "config, etc.) may reflect work described here — avoid repeating it:"
+    "The current session state (files, config, etc.) may reflect work "
+    "described here — avoid repeating it:"
 )
 LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"

+# Handoff prefixes that shipped in earlier releases. A summary persisted under
+# one of these can be inherited into a resumed lineage (#35344); when it is
+# re-normalized on re-compaction we must strip the OLD prefix too, otherwise the
+# stale directive it carried (e.g. "resume exactly from Active Task") survives
+# embedded in the body and keeps hijacking replies. Keep newest-first; entries
+# are matched literally. Add a frozen copy here whenever SUMMARY_PREFIX changes.
+_HISTORICAL_SUMMARY_PREFIXES = (
+    # Pre-#35344: contained the self-contradicting "resume exactly" directive.
+    "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
+    "into the summary below. This is a handoff from a previous context "
+    "window — treat it as background reference, NOT as active instructions. "
+    "Do NOT answer questions or fulfill requests mentioned in this summary; "
+    "they were already addressed. "
+    "Your current task is identified in the '## Active Task' section of the "
+    "summary — resume exactly from there. "
+    "Respond ONLY to the latest user message "
+    "that appears AFTER this summary. The current session state (files, "
+    "config, etc.) may reflect work described here — avoid repeating it:",
+)
+
 # Minimum tokens for the summary output
 _MIN_SUMMARY_TOKENS = 2000
 # Proportion of compressed content to allocate for summary
@ -1236,11 +1266,27 @@ Summary generation was unavailable, so this is a best-effort deterministic fallb

        # Shared structured template (used by both paths).
        _template_sections = f"""## Active Task
-[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or
-task assignment verbatim — the exact words they used. If multiple tasks
-were requested and only some are done, list only the ones NOT yet completed.
-Continuation should pick up exactly here. Example:
+[THE SINGLE MOST IMPORTANT FIELD. Capture the user's most recent unfulfilled
+input verbatim — the exact words they used. This includes:
+- Explicit task assignments ("refactor the auth module")
+- Questions awaiting an answer ("waarom staat X op Y?", "wat zijn de volgende stappen?")
+- Decisions awaiting input ("optie A of B?")
+- Ongoing discussions where the assistant owes the next substantive reply
+A conversation where the user just asked a question IS an active task — the
+task is "answer that question with full context". Do NOT write "None" merely
+because the user did not issue an imperative command; reserve "None" for the
+rare case where the last exchange was fully resolved and the user said
+something like "thanks, that's all".
+If multiple items are outstanding, list only the ones NOT yet completed.
+Continuation should pick up exactly here. Examples:
 "User asked: 'Now refactor the auth module to use JWT instead of sessions'"
+"User asked: 'Waarom stond provider ineens op openrouter?' — needs investigation + answer"
+"User chose option A; awaiting implementation of step 2"
+If the user's most recent message was a reverse signal (stop, undo, roll
+back, never mind, just verify, change of topic) that supersedes earlier
+work, write the reverse signal verbatim and DO NOT carry forward the
+cancelled task. Example: "User asked: 'Stop the i18n refactor and just
+verify the current diff' — earlier i18n in-flight work is cancelled."
 If no outstanding task exists, write "None."]

 ## Goal
@ -1306,7 +1352,7 @@ PREVIOUS SUMMARY:
 NEW TURNS TO INCORPORATE:
 {content_to_summarize}

-Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled request — this is the most important field for task continuity.
+Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled input — this includes any question, decision request, or discussion turn that the assistant has not yet answered. Only write "None" if the last exchange was fully resolved.

 {_template_sections}"""
        else:
@ -1470,9 +1516,16 @@ The user has requested that this compaction PRIORITISE preserving all informatio

    @staticmethod
    def _strip_summary_prefix(summary: str) -> str:
-        """Return summary body without the current or legacy handoff prefix."""
+        """Return summary body without the current, legacy, or any historical
+        handoff prefix.
+
+        Historical prefixes must be stripped too: a handoff persisted under an
+        older prefix can be inherited into a resumed lineage (#35344), and if we
+        only re-prepend the current prefix without removing the old one, the
+        stale directive it carried stays embedded in the body.
+        """
        text = (summary or "").strip()
-        for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX):
+        for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX, *_HISTORICAL_SUMMARY_PREFIXES):
            if text.startswith(prefix):
                return text[len(prefix):].lstrip()
        return text
@ -1486,7 +1539,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
    @staticmethod
    def _is_context_summary_content(content: Any) -> bool:
        text = _content_text_for_contains(content).lstrip()
-        return text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX)
+        if text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX):
+            return True
+        return any(text.startswith(p) for p in _HISTORICAL_SUMMARY_PREFIXES)

    @classmethod
    def _find_latest_context_summary(
--- a/agent/context_engine.py
+++ b/agent/context_engine.py
@ -115,6 +115,15 @@ class ContextEngine(ABC):
        """
        return False

+    def should_defer_preflight_to_real_usage(self, rough_tokens: int) -> bool:
+        """Return True when preflight should trust recent real usage instead.
+
+        Built-in compression uses this to avoid re-compacting from known-noisy
+        rough estimates after a compressed request has already fit. Third-party
+        engines can ignore it safely.
+        """
+        return False
+
    # -- Optional: manual /compress preflight ------------------------------

    def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@ -575,19 +575,18 @@ def compress_context(
            force=True,
        )

-    # Update token estimate after compaction so pressure calculations
-    # use the post-compression count, not the stale pre-compression one.
-    # Use estimate_request_tokens_rough() so tool schemas are included —
-    # with 50+ tools enabled, schemas alone can add 20-30K tokens, and
-    # omitting them delays the next compression cycle far past the
-    # configured threshold (issue #14695).
+    # Keep the post-compression rough estimate for diagnostics, but do not
+    # treat it as provider-reported prompt usage. Schema-heavy rough estimates
+    # can remain above threshold even after the next real API request fits.
    _compressed_est = estimate_request_tokens_rough(
        compressed,
        system_prompt=new_system_prompt or "",
        tools=agent.tools or None,
    )
-    agent.context_compressor.last_prompt_tokens = _compressed_est
+    agent.context_compressor.last_compression_rough_tokens = _compressed_est
+    agent.context_compressor.last_prompt_tokens = -1
    agent.context_compressor.last_completion_tokens = 0
+    agent.context_compressor.awaiting_real_usage_after_compression = True

    # Clear the file-read dedup cache.  After compression the original
    # read content is summarised away — if the model re-reads the same
@ -599,7 +598,7 @@ def compress_context(
        pass

    logger.info(
-        "context compression done: session=%s messages=%d->%d tokens=~%s",
+        "context compression done: session=%s messages=%d->%d rough_tokens=~%s awaiting_real_usage=true",
        agent.session_id or "none", _pre_msg_count, len(compressed),
        f"{_compressed_est:,}",
    )
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@ -392,6 +392,9 @@ def run_conversation(
        set_runtime_main(
            getattr(agent, "provider", "") or "",
            getattr(agent, "model", "") or "",
+            base_url=getattr(agent, "base_url", "") or "",
+            api_key=getattr(agent, "api_key", "") or "",
+            api_mode=getattr(agent, "api_mode", "") or "",
        )
    except Exception:
        pass
@ -600,18 +603,50 @@ def run_conversation(
            system_prompt=active_system_prompt or "",
            tools=agent.tools or None,
        )
+        _compressor = agent.context_compressor
+        _defer_preflight = getattr(
+            _compressor,
+            "should_defer_preflight_to_real_usage",
+            lambda _tokens: False,
+        )
+        _preflight_deferred = _defer_preflight(_preflight_tokens)

-        if agent.context_compressor.should_compress(_preflight_tokens):
+        if not _preflight_deferred:
+            # Keep the CLI/ACP context display in sync with what preflight
+            # actually measured.  The status bar reads
+            # ``compressor.last_prompt_tokens``, which otherwise only updates
+            # from a *successful* API response.  When the conversation has grown
+            # since the last successful call — or when compression then fails
+            # (e.g. the auxiliary summary model times out) and no fresh usage
+            # arrives — the bar stays stuck at the old, smaller value while
+            # preflight reports a much larger number, looking out of sync.
+            # Seed it with the fresh estimate (only ever revising upward; a real
+            # ``update_from_response`` will correct it after the next API call).
+            # Skipped when deferring — a deferred estimate is known to over-count
+            # vs the last real provider prompt, so trusting it for the display
+            # would re-introduce the very desync we're avoiding.
+            if _preflight_tokens > (_compressor.last_prompt_tokens or 0):
+                _compressor.last_prompt_tokens = _preflight_tokens
+
+        if _preflight_deferred:
+            logger.info(
+                "Skipping preflight compression: rough estimate ~%s >= %s, "
+                "but last real provider prompt was %s after compression",
+                f"{_preflight_tokens:,}",
+                f"{_compressor.threshold_tokens:,}",
+                f"{_compressor.last_real_prompt_tokens:,}",
+            )
+        elif _compressor.should_compress(_preflight_tokens):
            logger.info(
                "Preflight compression: ~%s tokens >= %s threshold (model %s, ctx %s)",
                f"{_preflight_tokens:,}",
-                f"{agent.context_compressor.threshold_tokens:,}",
+                f"{_compressor.threshold_tokens:,}",
                agent.model,
-                f"{agent.context_compressor.context_length:,}",
+                f"{_compressor.context_length:,}",
            )
            agent._emit_status(
                f"📦 Preflight compression: ~{_preflight_tokens:,} tokens "
-                f">= {agent.context_compressor.threshold_tokens:,} threshold. "
+                f">= {_compressor.threshold_tokens:,} threshold. "
                "This may take a moment."
            )
            # May need multiple passes for very large sessions with small
@ -646,8 +681,8 @@ def run_conversation(
                    system_prompt=active_system_prompt or "",
                    tools=agent.tools or None,
                )
-                if _preflight_tokens < agent.context_compressor.threshold_tokens:
-                    break  # Under threshold
+                if not _compressor.should_compress(_preflight_tokens):
+                    break  # Under threshold or anti-thrash guard stopped it

    # Plugin hook: pre_llm_call
    # Fired once per turn before the tool-calling loop.  Plugins can
@ -1457,7 +1492,8 @@ def run_conversation(
                    
                    if retry_count >= max_retries:
                        # Try fallback before giving up
-                        agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
+                        if agent._has_pending_fallback():
+                            agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
                        if agent._try_activate_fallback():
                            retry_count = 0
                            compression_attempts = 0
@ -3059,12 +3095,17 @@ def run_conversation(
                ) and not is_context_length_error

                if is_client_error:
-                    # Try fallback before aborting — a different provider
-                    # may not have the same issue (rate limit, auth, etc.)
-                    if classified.reason == FailoverReason.content_policy_blocked:
-                        agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
-                    else:
-                        agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
+                    # Try fallback before aborting — a different provider may
+                    # not have the same issue (rate limit, auth, etc.). Only
+                    # announce the attempt when a fallback chain actually
+                    # exists; otherwise "trying fallback..." is a lie and the
+                    # session looks like it's recovering when it's about to
+                    # abort silently (#35314, #17446).
+                    if agent._has_pending_fallback():
+                        if classified.reason == FailoverReason.content_policy_blocked:
+                            agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
+                        else:
+                            agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
                    if agent._try_activate_fallback():
                        retry_count = 0
                        compression_attempts = 0
@ -3207,7 +3248,8 @@ def run_conversation(
                        retry_count = 0
                        continue
                    # Try fallback before giving up entirely
-                    agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
+                    if agent._has_pending_fallback():
+                        agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
                    if agent._try_activate_fallback():
                        retry_count = 0
                        compression_attempts = 0
@ -3862,6 +3904,11 @@ def run_conversation(
                    # inflate completion_tokens with reasoning,
                    # causing premature compression.  (#12026)
                    _real_tokens = _compressor.last_prompt_tokens
+                elif _compressor.last_prompt_tokens == -1:
+                    # Compression just ran and no API-reported prompt count
+                    # has arrived yet. Avoid treating a schema-heavy rough
+                    # post-compression estimate as real context pressure.
+                    _real_tokens = 0
                else:
                    # Include tool schemas — with 50+ tools enabled
                    # these add 20-30K tokens the messages-only
@ -4443,6 +4490,55 @@ def run_conversation(
        except Exception as _ver_err:
            logger.debug("file-mutation verifier footer failed: %s", _ver_err)

+    # Turn-completion explainer.
+    # When a turn ends abnormally after substantive work — empty content
+    # after retries, a partial/truncated stream, a still-pending tool
+    # result, or an iteration/budget limit — the user otherwise gets a
+    # blank or fragmentary response box with no consolidated reason why
+    # the agent stopped (#34452).  Surface a single user-visible
+    # explanation derived from ``_turn_exit_reason``, mirroring the
+    # file-mutation verifier footer pattern above.
+    #
+    # Gate carefully so healthy turns stay quiet:
+    #   - ``text_response(...)`` exits never produce an explanation
+    #     (handled inside the formatter), so a terse ``Done.`` is silent.
+    #   - We only ACT when there is no genuinely usable reply this turn:
+    #     an empty response, the "(empty)" terminal sentinel, or a
+    #     suspiciously short partial fragment with no terminating
+    #     punctuation (e.g. "The").  A real short answer keeps its text.
+    if not interrupted:
+        try:
+            if agent._turn_completion_explainer_enabled():
+                _stripped = (final_response or "").strip()
+                _is_empty_terminal = _stripped == "" or _stripped == "(empty)"
+                # A short fragment that is not a normal text_response exit
+                # and lacks sentence-ending punctuation is treated as a
+                # truncated partial (the "The" case from #34452).
+                _is_partial_fragment = (
+                    not _is_empty_terminal
+                    and not str(_turn_exit_reason).startswith("text_response")
+                    and len(_stripped) <= 24
+                    and _stripped[-1:] not in {".", "!", "?", "。", "！", "？", "`", ")"}
+                )
+                if _is_empty_terminal or _is_partial_fragment:
+                    _explanation = agent._format_turn_completion_explanation(
+                        _turn_exit_reason
+                    )
+                    if _explanation:
+                        if _is_empty_terminal:
+                            # Replace the bare "(empty)"/blank sentinel with
+                            # the actionable explanation.
+                            final_response = _explanation
+                        else:
+                            # Keep the partial fragment, append the reason so
+                            # the user sees both what arrived and why it
+                            # stopped.
+                            final_response = (
+                                _stripped + "\n\n" + _explanation
+                            )
+        except Exception as _exp_err:
+            logger.debug("turn-completion explainer failed: %s", _exp_err)
+
    _response_transformed = False

    # Plugin hook: transform_llm_output
--- a/agent/lsp/cli.py
+++ b/agent/lsp/cli.py
@ -247,18 +247,13 @@ def _cmd_restart() -> int:


 def _cmd_which(server_id: str) -> int:
-    from agent.lsp.install import INSTALL_RECIPES, hermes_lsp_bin_dir
-    import shutil as _shutil
+    from agent.lsp.install import INSTALL_RECIPES, _existing_binary

    recipe = INSTALL_RECIPES.get(server_id)
    bin_name = (recipe or {}).get("bin", server_id)
-    staged = hermes_lsp_bin_dir() / bin_name
-    if staged.exists():
-        sys.stdout.write(str(staged) + "\n")
-        return 0
-    on_path = _shutil.which(bin_name)
-    if on_path:
-        sys.stdout.write(on_path + "\n")
+    resolved = _existing_binary(bin_name)
+    if resolved:
+        sys.stdout.write(resolved + "\n")
        return 0
    sys.stderr.write(f"{server_id}: not installed\n")
    return 1
@ -292,11 +287,9 @@ def _backend_warnings() -> list:
    suggestion across common platforms.
    """
    import shutil as _shutil
-    from agent.lsp.install import hermes_lsp_bin_dir
+    from agent.lsp.install import _existing_binary
    notes: list = []
-    bash_installed = _shutil.which("bash-language-server") is not None or (
-        (hermes_lsp_bin_dir() / "bash-language-server").exists()
-    )
+    bash_installed = _existing_binary("bash-language-server") is not None
    if bash_installed and _shutil.which("shellcheck") is None:
        notes.append(
            "bash-language-server is installed but shellcheck is missing — "
--- a/agent/lsp/client.py
+++ b/agent/lsp/client.py
@ -44,6 +44,7 @@ from __future__ import annotations
 import asyncio
 import logging
 import os
+import sys
 from pathlib import Path
 from typing import Any, Awaitable, Callable, Dict, List, Optional, Set
 from urllib.parse import quote, unquote
@ -244,15 +245,27 @@ class LSPClient:
            await self._cleanup_process()
            raise

+    @staticmethod
+    def _win_wrap_cmd(cmd: List[str]) -> List[str]:
+        """On Windows, wrap .cmd/.bat shims so CreateProcess can run them."""
+        exe = cmd[0]
+        if exe.lower().endswith((".cmd", ".bat")):
+            return ["cmd.exe", "/c", *cmd]
+        return cmd
+
    async def _spawn(self) -> None:
        env = dict(os.environ)
        if self._env:
            env.update(self._env)

+        cmd = self._command
+        if sys.platform == "win32":
+            cmd = self._win_wrap_cmd(cmd)
+
        try:
            self._proc = await asyncio.create_subprocess_exec(
-                self._command[0],
-                *self._command[1:],
+                cmd[0],
+                *cmd[1:],
                stdin=asyncio.subprocess.PIPE,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
@ -261,7 +274,7 @@ class LSPClient:
            )
        except FileNotFoundError as e:
            raise LSPProtocolError(
-                f"LSP server binary not found: {self._command[0]} ({e})"
+                f"LSP server binary not found: {cmd[0]} ({e})"
            ) from e

        # Drain stderr at debug level — if we don't, the pipe buffer
--- a/agent/lsp/install.py
+++ b/agent/lsp/install.py
@ -108,6 +108,11 @@ INSTALL_RECIPES: Dict[str, Dict[str, Any]] = {
 _install_locks: Dict[str, threading.Lock] = {}
 _install_results: Dict[str, Optional[str]] = {}
 _install_lock_meta = threading.Lock()
+_WINDOWS_WRAPPER_SUFFIXES = (".cmd", ".exe", ".bat")
+
+
+def _is_windows() -> bool:
+    return os.name == "nt"


 def hermes_lsp_bin_dir() -> Path:
@ -120,14 +125,33 @@ def hermes_lsp_bin_dir() -> Path:
    return p


+def _native_binary_candidates(base: Path) -> list[Path]:
+    """Return platform-native executable candidates for a staged binary."""
+    candidates = [base]
+    if _is_windows():
+        existing = {str(base).lower()}
+        for suffix in _WINDOWS_WRAPPER_SUFFIXES:
+            candidate = Path(str(base) + suffix)
+            key = str(candidate).lower()
+            if key not in existing:
+                candidates.append(candidate)
+                existing.add(key)
+    return candidates
+
+
 def _existing_binary(name: str) -> Optional[str]:
    """Probe the staging dir + PATH for a binary named ``name``."""
-    staged = hermes_lsp_bin_dir() / name
-    if staged.exists() and os.access(staged, os.X_OK):
-        return str(staged)
+    for staged in _native_binary_candidates(hermes_lsp_bin_dir() / name):
+        if staged.exists() and os.access(staged, os.X_OK):
+            return str(staged)
    on_path = shutil.which(name)
    if on_path:
        return on_path
+    if _is_windows():
+        for suffix in _WINDOWS_WRAPPER_SUFFIXES:
+            on_path = shutil.which(f"{name}{suffix}")
+            if on_path:
+                return on_path
    return None


@ -250,12 +274,7 @@ def _install_npm(

    # Find the bin
    nm_bin = staging / "node_modules" / ".bin" / bin_name
-    if os.name == "nt":
-        # On Windows npm sometimes drops `.cmd` shims
-        candidates = [nm_bin, nm_bin.with_suffix(".cmd")]
-    else:
-        candidates = [nm_bin]
-    for c in candidates:
+    for c in _native_binary_candidates(nm_bin):
        if c.exists():
            # Symlink into our `lsp/bin/` for stable PATH access.
            link = hermes_lsp_bin_dir() / c.name
@ -301,7 +320,7 @@ def _install_go(pkg: str, bin_name: str) -> Optional[str]:
        logger.warning("[install] go install errored for %s: %s", pkg, e)
        return None
    bin_path = staging / bin_name
-    if os.name == "nt":
+    if _is_windows():
        bin_path = bin_path.with_suffix(".exe")
    if bin_path.exists():
        return str(bin_path)
@ -337,19 +356,24 @@ def _install_pip(pkg: str, bin_name: str) -> Optional[str]:
    except (subprocess.TimeoutExpired, OSError) as e:
        logger.warning("[install] pip install errored for %s: %s", pkg, e)
        return None
-    # Look for the script
-    bin_path = pip_target / "bin" / bin_name
-    if bin_path.exists():
-        link = hermes_lsp_bin_dir() / bin_name
-        if not link.exists():
-            try:
-                link.symlink_to(bin_path)
-            except (OSError, NotImplementedError):
-                try:
-                    shutil.copy2(bin_path, link)
-                except OSError:
-                    return str(bin_path)
-        return str(link if link.exists() else bin_path)
+    # Look for the console script.  POSIX wheels generally write to bin/,
+    # while native Windows installs use Scripts/.
+    script_dirs = [pip_target / "bin"]
+    if _is_windows():
+        script_dirs.append(pip_target / "Scripts")
+    for script_dir in script_dirs:
+        for bin_path in _native_binary_candidates(script_dir / bin_name):
+            if bin_path.exists():
+                link = hermes_lsp_bin_dir() / bin_path.name
+                if not link.exists():
+                    try:
+                        link.symlink_to(bin_path)
+                    except (OSError, NotImplementedError):
+                        try:
+                            shutil.copy2(bin_path, link)
+                        except OSError:
+                            return str(bin_path)
+                return str(link if link.exists() else bin_path)
    return None


--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@ -180,28 +180,9 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
        except Exception:
            pass

-        # Checkpoint for file-mutating tools
-        if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
-            try:
-                file_path = function_args.get("path", "")
-                if file_path:
-                    work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
-                    agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
-            except Exception:
-                pass
-
-        # Checkpoint before destructive terminal commands
-        if function_name == "terminal" and agent._checkpoint_mgr.enabled:
-            try:
-                cmd = function_args.get("command", "")
-                if _is_destructive_command(cmd):
-                    cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
-                    agent._checkpoint_mgr.ensure_checkpoint(
-                        cwd, f"before terminal: {cmd[:60]}"
-                    )
-            except Exception:
-                pass
-
+        # ── Block evaluation (BEFORE checkpoint preflight) ───────────
+        # We must know whether the tool will execute before touching
+        # checkpoint state (dedup slot, real snapshots).
        block_result = None
        blocked_by_guardrail = False
        if _ts_scope_block is not None:
@ -224,6 +205,30 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
                    block_result = agent._guardrail_block_result(guardrail_decision)
                    blocked_by_guardrail = True

+        # ── Checkpoint preflight (only for tools that will execute) ──
+        if block_result is None:
+            # Checkpoint for file-mutating tools
+            if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
+                try:
+                    file_path = function_args.get("path", "")
+                    if file_path:
+                        work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
+                        agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
+                except Exception:
+                    pass
+
+            # Checkpoint before destructive terminal commands
+            if function_name == "terminal" and agent._checkpoint_mgr.enabled:
+                try:
+                    cmd = function_args.get("command", "")
+                    if _is_destructive_command(cmd):
+                        cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
+                        agent._checkpoint_mgr.ensure_checkpoint(
+                            cwd, f"before terminal: {cmd[:60]}"
+                        )
+                except Exception:
+                    pass
+
        parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail))

    # ── Logging / callbacks ──────────────────────────────────────────
@ -301,33 +306,38 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
        # submit site below (GHSA-qg5c-hvr5-hjgr, #13617).
        start = time.time()
        try:
-            result = agent._invoke_tool(
-                function_name,
-                function_args,
-                effective_task_id,
-                tool_call.id,
-                messages=messages,
-                pre_tool_block_checked=True,
-            )
-        except Exception as tool_error:
-            result = f"Error executing tool '{function_name}': {tool_error}"
-            logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
-        duration = time.time() - start
-        is_error, _ = _detect_tool_failure(function_name, result)
-        if is_error:
-            logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
-        else:
-            logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
-        results[index] = (function_name, function_args, result, duration, is_error, False)
-        # Tear down worker-tid tracking.  Clear any interrupt bit we may
-        # have set so the next task scheduled onto this recycled tid
-        # starts with a clean slate.
-        with agent._tool_worker_threads_lock:
-            agent._tool_worker_threads.discard(_worker_tid)
-        try:
-            _ra()._set_interrupt(False, _worker_tid)
-        except Exception:
-            pass
+            try:
+                result = agent._invoke_tool(
+                    function_name,
+                    function_args,
+                    effective_task_id,
+                    tool_call.id,
+                    messages=messages,
+                    pre_tool_block_checked=True,
+                )
+            except Exception as tool_error:
+                result = f"Error executing tool '{function_name}': {tool_error}"
+                logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
+            duration = time.time() - start
+            is_error, _ = _detect_tool_failure(function_name, result)
+            if is_error:
+                logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
+            else:
+                logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
+            results[index] = (function_name, function_args, result, duration, is_error, False)
+        finally:
+            # Tear down worker-tid tracking.  Clear any interrupt bit we may
+            # have set so the next task scheduled onto this recycled tid
+            # starts with a clean slate.  This MUST be in a finally block
+            # because BaseException subclasses (CancelledError, KeyboardInterrupt)
+            # bypass ``except Exception`` and would otherwise leak the tid
+            # into _interrupted_threads, poisoning the recycled thread.
+            with agent._tool_worker_threads_lock:
+                agent._tool_worker_threads.discard(_worker_tid)
+            try:
+                _ra()._set_interrupt(False, _worker_tid)
+            except Exception:
+                pass

    # Start spinner for CLI mode (skip when TUI handles tool progress)
    spinner = None
@ -753,10 +763,14 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
        elif function_name == "delegate_task":
            tasks_arg = function_args.get("tasks")
            if tasks_arg and isinstance(tasks_arg, list):
-                spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
+                spinner_label = f"🔀 delegating {len(tasks_arg)} tasks · (/agents to monitor)"
            else:
                goal_preview = (function_args.get("goal") or "")[:30]
-                spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
+                spinner_label = (
+                    f"🔀 {goal_preview} · (/agents to monitor)"
+                    if goal_preview
+                    else "🔀 delegating · (/agents to monitor)"
+                )
            spinner = None
            if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
                face = random.choice(KawaiiSpinner.get_waiting_faces())
--- a/cli.py
+++ b/cli.py
@ -787,8 +787,10 @@ def AIAgent(*args, **kwargs):


 def get_tool_definitions(*args, **kwargs):
+    from hermes_cli.mcp_startup import wait_for_mcp_discovery
    from model_tools import get_tool_definitions as _get_tool_definitions

+    wait_for_mcp_discovery()
    return _get_tool_definitions(*args, **kwargs)


@ -896,9 +898,12 @@ def _prepare_deferred_agent_startup() -> None:
            exc_info=True,
        )
    try:
-        from tools.mcp_tool import discover_mcp_tools
+        from hermes_cli.mcp_startup import start_background_mcp_discovery

-        discover_mcp_tools()
+        start_background_mcp_discovery(
+            logger=logger,
+            thread_name="termux-cli-mcp-discovery",
+        )
    except Exception:
        logger.debug(
            "MCP tool discovery failed at deferred CLI startup",
@ -1537,9 +1542,17 @@ def _query_osc11_background() -> str | None:
    Most modern terminals reply with \x1b]11;rgb:RRRR/GGGG/BBBB\x1b\\
    within a few ms.  We wait up to 100ms total before giving up.
    Returns "#RRGGBB" or None on timeout / non-tty.
+
+    Skipped over SSH: the round-trip routinely exceeds our 100ms budget, so a
+    late reply lands after prompt_toolkit has grabbed the tty — its payload
+    leaks in as typed text and the BEL terminator reads as Ctrl+G (open
+    editor), trapping the user in a stray editor. Remote sessions fall back to
+    COLORFGBG / env hints / the dark default instead.
    """
    if not sys.stdin.isatty() or not sys.stdout.isatty():
        return None
+    if any(os.environ.get(v) for v in ("SSH_CONNECTION", "SSH_CLIENT", "SSH_TTY")):
+        return None
    try:
        import termios
        import tty
@ -1587,8 +1600,11 @@ def _query_osc11_background() -> str | None:
        r, g, b = norm(m.group(1)), norm(m.group(2)), norm(m.group(3))
        return f"#{r:02X}{g:02X}{b:02X}"
    finally:
+        # TCSAFLUSH discards any unread input as it restores the original
+        # attributes — scrubs a slow/partial OSC 11 reply out of the tty
+        # buffer before prompt_toolkit can read it as keystrokes.
        try:
-            termios.tcsetattr(fd, termios.TCSANOW, old)
+            termios.tcsetattr(fd, termios.TCSAFLUSH, old)
        except Exception:
            pass

@ -4872,6 +4888,10 @@ class HermesCLI:
        if not self._ensure_runtime_credentials():
            return False

+        from hermes_cli.mcp_startup import wait_for_mcp_discovery
+
+        wait_for_mcp_discovery()
+
        # Initialize SQLite session store for CLI sessions (if not already done in __init__)
        if self._session_db is None:
            try:
@ -12928,6 +12948,13 @@ class HermesCLI:
                        if event.app.is_running:
                            event.app.exit()
                    event.app.current_buffer.reset(append_to_history=True)
+                    # Force a repaint: process_command() prints through
+                    # patch_stdout (scrolls output above the prompt) and never
+                    # invalidates the app, so the just-cleared input area can
+                    # keep showing the submitted text until some unrelated
+                    # redraw fires. Every other early-return branch in this
+                    # handler invalidates after reset — match them.
+                    event.app.invalidate()
                    return

                # Handle /steer while the agent is running immediately on the
@ -12939,6 +12966,13 @@ class HermesCLI:
                if self._should_handle_steer_command_inline(text, has_images=has_images):
                    self.process_command(text)
                    event.app.current_buffer.reset(append_to_history=True)
+                    # Force a repaint after clearing the buffer.  /steer is
+                    # dispatched mid-run while the agent streams output through
+                    # patch_stdout; process_command() never invalidates the
+                    # app, so without this the submitted "/steer <text>" can
+                    # linger in the input area (looking unsent) and invite an
+                    # accidental re-submit. See issue #34569.
+                    event.app.invalidate()
                    return

                # Snapshot and clear attached images
--- a/gateway/config.py
+++ b/gateway/config.py
@ -474,6 +474,13 @@ class GatewayConfig:
    
    # Delivery settings
    always_log_local: bool = True  # Always save cron outputs to local files
+    # Drop outbound "silence narration" messages (e.g. *(silent)*, 🔇, a bare
+    # ".") pre-send. These are model hallucinations emitted when a persona has
+    # nothing actionable to say; in bot-to-bot channels they mirror back and
+    # forth, burning tokens and crashing models. Substrate-level guard that
+    # survives SOUL.md/prompt drift across providers. Opt out with False for
+    # raw passthrough.
+    filter_silence_narration: bool = True

    # STT settings
    stt_enabled: bool = True  # Whether to auto-transcribe inbound voice messages
@ -582,6 +589,7 @@ class GatewayConfig:
            "quick_commands": self.quick_commands,
            "sessions_dir": str(self.sessions_dir),
            "always_log_local": self.always_log_local,
+            "filter_silence_narration": self.filter_silence_narration,
            "stt_enabled": self.stt_enabled,
            "group_sessions_per_user": self.group_sessions_per_user,
            "thread_sessions_per_user": self.thread_sessions_per_user,
@ -650,6 +658,9 @@ class GatewayConfig:
            quick_commands=quick_commands,
            sessions_dir=sessions_dir,
            always_log_local=_coerce_bool(data.get("always_log_local"), True),
+            filter_silence_narration=_coerce_bool(
+                data.get("filter_silence_narration"), True
+            ),
            stt_enabled=_coerce_bool(stt_enabled, True),
            group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
            thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
@ -757,21 +768,32 @@ def load_gateway_config() -> GatewayConfig:
            if "always_log_local" in yaml_cfg:
                gw_data["always_log_local"] = yaml_cfg["always_log_local"]

+            if "filter_silence_narration" in yaml_cfg:
+                gw_data["filter_silence_narration"] = yaml_cfg[
+                    "filter_silence_narration"
+                ]
+
            if "unauthorized_dm_behavior" in yaml_cfg:
                gw_data["unauthorized_dm_behavior"] = _normalize_unauthorized_dm_behavior(
                    yaml_cfg.get("unauthorized_dm_behavior"),
                    "pair",
                )

-            # Merge platforms section from config.yaml into gw_data so that
-            # nested keys like platforms.webhook.extra.routes are loaded.
-            yaml_platforms = yaml_cfg.get("platforms")
+            # Merge platform config into gw_data so runtime-only settings under
+            # ``gateway.platforms`` are loaded the same way as top-level
+            # ``platforms``. Merge nested first so top-level config keeps
+            # precedence, matching the existing gateway.streaming fallback.
+            gateway_cfg = yaml_cfg.get("gateway")
+            gateway_platforms = gateway_cfg.get("platforms") if isinstance(gateway_cfg, dict) else None
            platforms_data = gw_data.setdefault("platforms", {})
            if not isinstance(platforms_data, dict):
                platforms_data = {}
                gw_data["platforms"] = platforms_data
-            if isinstance(yaml_platforms, dict):
-                for plat_name, plat_block in yaml_platforms.items():
+
+            def _merge_platform_map(source_platforms: Any) -> None:
+                if not isinstance(source_platforms, dict):
+                    return
+                for plat_name, plat_block in source_platforms.items():
                    if not isinstance(plat_block, dict):
                        continue
                    existing = platforms_data.get(plat_name, {})
@ -785,6 +807,10 @@ def load_gateway_config() -> GatewayConfig:
                    if merged_extra:
                        merged["extra"] = merged_extra
                    platforms_data[plat_name] = merged
+
+            _merge_platform_map(gateway_platforms)
+            _merge_platform_map(yaml_cfg.get("platforms"))
+            if platforms_data:
                gw_data["platforms"] = platforms_data
            # Iterate built-in platforms plus any registered plugin platforms
            # so plugin authors get the same shared-key bridging (#24836).
@ -890,6 +916,18 @@ def load_gateway_config() -> GatewayConfig:
                    if entry.apply_yaml_config_fn is None:
                        continue
                    platform_cfg = yaml_cfg.get(entry.name)
+                    # Fall back to the platform's block under ``platforms`` /
+                    # ``gateway.platforms`` so adapter hooks still run when the
+                    # user configured the platform only under those nested paths
+                    # (e.g. ``platforms.discord.extra.allow_from``) and not via a
+                    # top-level ``discord:`` block.
+                    if not isinstance(platform_cfg, dict):
+                        for _src in (gateway_platforms, yaml_cfg.get("platforms")):
+                            if isinstance(_src, dict):
+                                _candidate = _src.get(entry.name)
+                                if isinstance(_candidate, dict):
+                                    platform_cfg = _candidate
+                                    break
                    if not isinstance(platform_cfg, dict):
                        continue
                    try:
--- a/gateway/delivery.py
+++ b/gateway/delivery.py
@ -9,6 +9,8 @@ Routes messages to the appropriate destination based on:
 """

 import logging
+import os
+import re
 from pathlib import Path
 from datetime import datetime
 from dataclasses import dataclass
@ -21,6 +23,32 @@ logger = logging.getLogger(__name__)
 MAX_PLATFORM_OUTPUT = 4000
 TRUNCATED_VISIBLE = 3800

+# Matches strings that are *only* a "silence" narration with optional markdown
+# wrappers. Covers: *(silent)*, _silent_, `silent`, ~silent~, (silent), silent,
+# 🔇, a bare ".", "…", and the whitespace/marker-padded variants seen in the
+# wild. Anchored to start/end so substantive messages that merely *contain* the
+# word "silent" are never matched.
+_SILENCE_NARRATION = re.compile(
+    r'^[\s*_~`]*\(?\s*(silent|silence|no\s+response|no\s+reply)\s*\.?\)?[\s*_~`]*$'
+    r'|^[\s*_~`]*[\U0001F507\.\u2026]+[\s*_~`]*$',
+    re.IGNORECASE,
+)
+
+
+def _is_silence_narration(content: Optional[str]) -> bool:
+    """Return True when ``content`` is *only* a silence-narration token.
+
+    Length-guarded (real messages are longer) and anchored to the whole string
+    so legitimate prose like "The deployment ran silently" or "Silence is
+    golden — here is the plan..." is never flagged.
+    """
+    if not content:
+        return False
+    stripped = content.strip()
+    if not stripped or len(stripped) > 64:  # length guard
+        return False
+    return bool(_SILENCE_NARRATION.match(stripped))
+
 from .config import Platform, GatewayConfig
 from .session import SessionSource

@ -261,6 +289,18 @@ class DeliveryRouter:
        path.write_text(content)
        return path

+    def _filter_silence_narration_enabled(self) -> bool:
+        """Whether the outbound silence-narration filter is active.
+
+        ``HERMES_FILTER_SILENCE_NARRATION`` env var overrides config when set;
+        otherwise the ``gateway.filter_silence_narration`` config flag wins
+        (default True).
+        """
+        env = os.getenv("HERMES_FILTER_SILENCE_NARRATION")
+        if env is not None:
+            return env.strip().lower() in ("1", "true", "yes", "on")
+        return bool(getattr(self.config, "filter_silence_narration", True))
+
    async def _deliver_to_platform(
        self,
        target: DeliveryTarget,
@ -286,6 +326,27 @@ class DeliveryRouter:
                + f"\n\n... [truncated, full output saved to {saved_path}]"
            )
        
+        # Substrate-level anti-loop guard: drop hallucinated "silence narration"
+        # (*(silent)*, 🔇, a bare ".", etc.) before it ever reaches the adapter.
+        # In bot-to-bot channels these tokens mirror back and forth until a
+        # model crashes with "no content after all retries". Behavioral prompt
+        # rules drift across providers; this single chokepoint covers every
+        # platform adapter regardless of which persona's prompt failed.
+        # Local/file delivery (_deliver_local) is a separate path and is never
+        # filtered — saved silence has no loop risk.
+        if self._filter_silence_narration_enabled() and _is_silence_narration(content):
+            logger.warning(
+                "Dropped silence-narration outbound to %s (chat=%s): %r",
+                target.platform.value,
+                target.chat_id,
+                content[:40],
+            )
+            return {
+                "success": True,
+                "filtered": "silence_narration",
+                "delivered": False,
+            }
+
        send_metadata = dict(metadata or {})
        is_named_telegram_private_topic = False
        named_telegram_private_topic_name: Optional[str] = None
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -1191,10 +1191,12 @@ _MEDIA_EXT_ALTERNATION = "|".join(
 # bare-path detector (extract_local_files) downstream rather than silently
 # deleted. Shared by the non-streaming dispatch path and the streaming
 # consumer so both behave identically.
+# Path anchors: ``~/`` (Unix home-relative), ``/`` (Unix absolute),
+# ``X:\\`` or ``X:/`` (Windows drive-letter absolute — #34632).
 MEDIA_TAG_CLEANUP_RE = re.compile(
    r'''[`"']?MEDIA:\s*'''
    r'''(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|'''
-    r'''(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:''' + _MEDIA_EXT_ALTERNATION + r'''))'''
+    r'''(?:~/|/|[A-Za-z]:[/\\])\S+(?:[^\S\n]+\S+)*?\.(?:''' + _MEDIA_EXT_ALTERNATION + r'''))'''
    r'''(?=[\s`"',;:)\]}]|$)[`"']?''',
    re.IGNORECASE,
 )
@ -2665,9 +2667,10 @@ class BasePlatformAdapter(ABC):

        # (?<![/:\w.]) prevents matching inside URLs (e.g. https://…/img.png)
        #             and relative paths (./foo.png)
-        # (?:~/|/)    anchors to absolute or home-relative paths
+        # (?:~/|/)    anchors to absolute or home-relative Unix paths
+        # (?:[A-Za-z]:[/\\]) anchors to Windows drive-letter paths (#34632)
        path_re = re.compile(
-            r'(?<![/:\w.])(?:~/|/)(?:[\w.\-]+/)*[\w.\-]+\.(?:' + ext_part + r')\b',
+            r'(?<![/:\w.])(?:~/|/|[A-Za-z]:[/\\])(?:[\w.\-]+[/\\])*[\w.\-]+\.(?:' + ext_part + r')\b',
            re.IGNORECASE,
        )

--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@ -2804,21 +2804,8 @@ class TelegramAdapter(BasePlatformAdapter):
                return slug

        try:
-            # Build provider buttons — 2 per row
-            buttons: list = []
-            for p in providers:
-                count = p.get("total_models", len(p.get("models", [])))
-                label = f"{p['name']} ({count})"
-                if p.get("is_current"):
-                    label = f"✓ {label}"
-                # Compact callback data: mp:<slug>  (max 64 bytes)
-                buttons.append(
-                    InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
-                )
-
-            rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
-            rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
-            keyboard = InlineKeyboardMarkup(rows)
+            # Build provider buttons — folds provider groups (display only).
+            keyboard = self._build_provider_keyboard(providers)

            provider_label = get_label(current_provider)
            text = self.format_message(
@ -2865,6 +2852,56 @@ class TelegramAdapter(BasePlatformAdapter):

    _MODEL_PAGE_SIZE = 8

+    def _build_provider_keyboard(self, providers: list):
+        """Build the top-level provider keyboard, folding provider groups.
+
+        Provider families (Kimi/Moonshot, MiniMax, xAI Grok, ...) collapse to
+        a single ``mpg:<gid>`` button; tapping it drills into a member
+        sub-keyboard. Single providers (and groups with only one authenticated
+        member) render as direct ``mp:<slug>`` buttons. Grouping mirrors the
+        CLI ``hermes model`` picker via the shared ``group_providers`` fold,
+        so all surfaces stay consistent.
+        """
+        try:
+            from hermes_cli.models import group_providers
+        except Exception:
+            group_providers = None
+
+        by_slug = {p.get("slug"): p for p in providers}
+
+        def _provider_button(p):
+            count = p.get("total_models", len(p.get("models", [])))
+            label = f"{p['name']} ({count})"
+            if p.get("is_current"):
+                label = f"✓ {label}"
+            return InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
+
+        buttons: list = []
+        if group_providers is not None:
+            for row in group_providers([p.get("slug") for p in providers]):
+                if row["kind"] == "group":
+                    members = [by_slug[m] for m in row["members"] if m in by_slug]
+                    count = sum(
+                        m.get("total_models", len(m.get("models", []))) for m in members
+                    )
+                    label = f"{row['label']} ▸ ({count})"
+                    if any(m.get("is_current") for m in members):
+                        label = f"✓ {label}"
+                    buttons.append(
+                        InlineKeyboardButton(label, callback_data=f"mpg:{row['group_id']}")
+                    )
+                else:
+                    p = by_slug.get(row["slug"])
+                    if p is not None:
+                        buttons.append(_provider_button(p))
+        else:
+            for p in providers:
+                buttons.append(_provider_button(p))
+
+        rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
+        rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
+        return InlineKeyboardMarkup(rows)
+
    def _build_model_keyboard(self, models: list, page: int) -> tuple:
        """Build paginated model buttons. Returns (keyboard, page_info_text)."""
        page_size = self._MODEL_PAGE_SIZE
@ -3043,10 +3080,23 @@ class TelegramAdapter(BasePlatformAdapter):
            # Clean up state
            self._model_picker_state.pop(chat_id, None)

-        elif data == "mb":
-            # --- Back to provider list ---
+        elif data.startswith("mpg:"):
+            # --- Provider group selected: show member providers ---
+            group_id = data[4:]
+            try:
+                from hermes_cli.models import PROVIDER_GROUPS
+                _label, member_slugs = PROVIDER_GROUPS.get(group_id, ("", []))
+            except Exception:
+                _label, member_slugs = "", []
+
+            by_slug = {p["slug"]: p for p in state["providers"]}
+            members = [by_slug[m] for m in member_slugs if m in by_slug]
+            if not members:
+                await query.answer(text="Group not found.")
+                return
+
            buttons = []
-            for p in state["providers"]:
+            for p in members:
                count = p.get("total_models", len(p.get("models", [])))
                label = f"{p['name']} ({count})"
                if p.get("is_current"):
@ -3054,11 +3104,30 @@ class TelegramAdapter(BasePlatformAdapter):
                buttons.append(
                    InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
                )
-
            rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
-            rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
+            rows.append([
+                InlineKeyboardButton("◀ Back", callback_data="mb"),
+                InlineKeyboardButton("✗ Cancel", callback_data="mx"),
+            ])
            keyboard = InlineKeyboardMarkup(rows)

+            await query.edit_message_text(
+                text=self.format_message(
+                    (
+                        f"⚙ *Model Configuration*\n\n"
+                        f"Provider family: *{_label or group_id}*\n\n"
+                        f"Select a provider:"
+                    )
+                ),
+                parse_mode=ParseMode.MARKDOWN_V2,
+                reply_markup=keyboard,
+            )
+            await query.answer()
+
+        elif data == "mb":
+            # --- Back to provider list (folds groups) ---
+            keyboard = self._build_provider_keyboard(state["providers"])
+
            try:
                provider_label = get_label(state["current_provider"])
            except Exception:
@ -3107,7 +3176,7 @@ class TelegramAdapter(BasePlatformAdapter):
        query_user_name = getattr(query.from_user, "first_name", None)

        # --- Model picker callbacks ---
-        if data.startswith(("mp:", "mm:", "mb", "mx", "mg:")):
+        if data.startswith(("mp:", "mpg:", "mm:", "mb", "mx", "mg:")):
            chat_id = str(query.message.chat_id) if query.message else None
            if chat_id:
                await self._handle_model_picker_callback(query, data, chat_id)
--- a/gateway/platforms/weixin.py
+++ b/gateway/platforms/weixin.py
@ -1180,12 +1180,48 @@ class WeixinAdapter(BasePlatformAdapter):
            default=False,
        )

+        # Text debounce batching (mirrors Telegram adapter pattern).
+        # iLink delivers messages individually, so rapid multi-message
+        # bursts (forwarded batches, paste-splits) each trigger a
+        # separate agent invocation.  Default 3s delay / 5s split delay
+        # are tuned for iLink's typical delivery cadence.  Tunable via
+        # config.yaml under
+        # ``gateway.platforms.weixin.extra.text_batch_delay_seconds`` /
+        # ``text_batch_split_delay_seconds``.
+        self._text_batch_delay_seconds = self._coerce_float_extra(
+            "text_batch_delay_seconds", 3.0
+        )
+        self._text_batch_split_delay_seconds = self._coerce_float_extra(
+            "text_batch_split_delay_seconds", 5.0
+        )
+        self._pending_text_batches: Dict[str, MessageEvent] = {}
+        self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
+
        if self._account_id and not self._token:
            persisted = load_weixin_account(hermes_home, self._account_id)
            if persisted:
                self._token = str(persisted.get("token") or "").strip()
                self._base_url = str(persisted.get("base_url") or self._base_url).strip().rstrip("/")

+    def _coerce_float_extra(self, key: str, default: float) -> float:
+        """Read a float from ``config.extra``, guarding against bad/non-finite values.
+
+        The result is fed directly to ``asyncio.sleep()``, so NaN/Inf and
+        unparseable values fall back to ``default``.
+        """
+        import math
+
+        value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None
+        if value is None:
+            return float(default)
+        try:
+            parsed = float(value)
+        except (TypeError, ValueError):
+            return float(default)
+        if not math.isfinite(parsed) or parsed < 0:
+            return float(default)
+        return parsed
+
    @staticmethod
    def _coerce_list(value: Any) -> List[str]:
        if value is None:
@ -1247,6 +1283,11 @@ class WeixinAdapter(BasePlatformAdapter):
    async def disconnect(self) -> None:
        _LIVE_ADAPTERS.pop(self._token, None)
        self._running = False
+        for task in self._pending_text_batch_tasks.values():
+            if not task.done():
+                task.cancel()
+        self._pending_text_batches.clear()
+        self._pending_text_batch_tasks.clear()
        if self._poll_task and not self._poll_task.done():
            self._poll_task.cancel()
            try:
@ -1395,12 +1436,10 @@ class WeixinAdapter(BasePlatformAdapter):
            timestamp=datetime.now(),
        )
        logger.info("[%s] inbound from=%s type=%s media=%d", self.name, _safe_id(sender_id), source.chat_type, len(media_paths))
-        await self.handle_message(event)
-
-    @property
-    def enforces_own_access_policy(self) -> bool:
-        """Weixin gates DM/group access at intake via dm_policy/group_policy."""
-        return True
+        if event.message_type == MessageType.TEXT:
+            self._enqueue_text_event(event)
+        else:
+            await self.handle_message(event)

    def _is_dm_allowed(self, sender_id: str) -> bool:
        if self._dm_policy == "disabled":
@ -1409,6 +1448,76 @@ class WeixinAdapter(BasePlatformAdapter):
            return sender_id in self._allow_from
        return True

+    @property
+    def enforces_own_access_policy(self) -> bool:
+        """Weixin gates DM/group access at intake via dm_policy/group_policy."""
+        return True
+
+    # ------------------------------------------------------------------
+    # Text debounce batching
+    # ------------------------------------------------------------------
+
+    _SPLIT_THRESHOLD = 1800  # iLink chunks at ~2048 chars
+
+    def _text_batch_key(self, event: MessageEvent) -> str:
+        """Session-scoped key for text message batching."""
+        from gateway.session import build_session_key
+        return build_session_key(
+            event.source,
+            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
+            thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
+        )
+
+    def _enqueue_text_event(self, event: MessageEvent) -> None:
+        """Buffer a text event and reset the flush timer.
+
+        When users forward multiple messages or send rapid-fire texts
+        via WeChat, each arrives as a separate iLink message. This
+        concatenates them and waits for a short quiet period before
+        dispatching the combined message.
+        """
+        key = self._text_batch_key(event)
+        existing = self._pending_text_batches.get(key)
+        chunk_len = len(event.text or "")
+        if existing is None:
+            event._last_chunk_len = chunk_len  # type: ignore[attr-defined]
+            self._pending_text_batches[key] = event
+        else:
+            if event.text:
+                existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
+            existing._last_chunk_len = chunk_len  # type: ignore[attr-defined]
+            if event.media_urls:
+                existing.media_urls.extend(event.media_urls)
+                existing.media_types.extend(event.media_types)
+
+        prior_task = self._pending_text_batch_tasks.get(key)
+        if prior_task and not prior_task.done():
+            prior_task.cancel()
+        self._pending_text_batch_tasks[key] = asyncio.create_task(
+            self._flush_text_batch(key)
+        )
+
+    async def _flush_text_batch(self, key: str) -> None:
+        """Wait for quiet period then dispatch aggregated text."""
+        current_task = asyncio.current_task()
+        try:
+            pending = self._pending_text_batches.get(key)
+            last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
+            if last_len >= self._SPLIT_THRESHOLD:
+                delay = self._text_batch_split_delay_seconds
+            else:
+                delay = self._text_batch_delay_seconds
+            await asyncio.sleep(delay)
+            if self._pending_text_batch_tasks.get(key) is not current_task:
+                return
+            event = self._pending_text_batches.pop(key, None)
+            if not event:
+                return
+            await self.handle_message(event)
+        finally:
+            if self._pending_text_batch_tasks.get(key) is current_task:
+                self._pending_text_batch_tasks.pop(key, None)
+
    async def _collect_media(self, item: Dict[str, Any], media_paths: List[str], media_types: List[str]) -> None:
        item_type = item.get("type")
        if item_type == ITEM_IMAGE:
--- a/gateway/platforms/whatsapp.py
+++ b/gateway/platforms/whatsapp.py
@ -278,6 +278,43 @@ class WhatsAppAdapter(BasePlatformAdapter):
        # notification before the normal "✓ whatsapp disconnected" fires.
        self._shutting_down: bool = False

+        # Text debounce batching (mirrors Telegram adapter pattern).
+        # WhatsApp often delivers multiple messages in rapid succession
+        # (e.g. forwarded batches, paste-splits) — without debounce each
+        # message triggers a separate agent invocation, wasting tokens and
+        # flooding the user with reply fragments.  Default 5s delay /
+        # 10s split delay are conservative for WhatsApp's delivery cadence.
+        # Tunable via config.yaml under
+        # ``gateway.platforms.whatsapp.extra.text_batch_delay_seconds`` /
+        # ``text_batch_split_delay_seconds``.
+        self._text_batch_delay_seconds = self._coerce_float_extra(
+            "text_batch_delay_seconds", 5.0
+        )
+        self._text_batch_split_delay_seconds = self._coerce_float_extra(
+            "text_batch_split_delay_seconds", 10.0
+        )
+        self._pending_text_batches: Dict[str, MessageEvent] = {}
+        self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
+
+    def _coerce_float_extra(self, key: str, default: float) -> float:
+        """Read a float from ``config.extra``, guarding against bad/non-finite values.
+
+        The result is fed directly to ``asyncio.sleep()``, so NaN/Inf and
+        unparseable values fall back to ``default``.
+        """
+        import math
+
+        value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None
+        if value is None:
+            return float(default)
+        try:
+            parsed = float(value)
+        except (TypeError, ValueError):
+            return float(default)
+        if not math.isfinite(parsed) or parsed < 0:
+            return float(default)
+        return parsed
+
    def _effective_reply_prefix(self) -> str:
        """Return the prefix the Node bridge will add in self-chat mode."""
        whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat")
@ -1139,7 +1176,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
                        for msg_data in messages:
                            event = await self._build_message_event(msg_data)
                            if event:
-                                await self.handle_message(event)
+                                if event.message_type == MessageType.TEXT:
+                                    self._enqueue_text_event(event)
+                                else:
+                                    await self.handle_message(event)
            except asyncio.CancelledError:
                break
            except Exception as e:
@ -1151,7 +1191,67 @@ class WhatsAppAdapter(BasePlatformAdapter):
                await asyncio.sleep(5)
            
            await asyncio.sleep(1)  # Poll interval
-    
+
+    # ── Text debounce batching ──────────────────────────────────────
+
+    _SPLIT_THRESHOLD = 6000  # WhatsApp supports ~65K chars; generous threshold
+
+    def _text_batch_key(self, event: MessageEvent) -> str:
+        """Session-scoped key for text message batching."""
+        from gateway.session import build_session_key
+        return build_session_key(
+            event.source,
+            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
+            thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
+        )
+
+    def _enqueue_text_event(self, event: MessageEvent) -> None:
+        """Buffer a text event and reset the flush timer.
+
+        When WhatsApp delivers rapid-fire messages (e.g. forwarded
+        batches), this concatenates them and waits for a short quiet
+        period before dispatching the combined message.
+        """
+        key = self._text_batch_key(event)
+        existing = self._pending_text_batches.get(key)
+        chunk_len = len(event.text or "")
+        if existing is None:
+            event._last_chunk_len = chunk_len  # type: ignore[attr-defined]
+            self._pending_text_batches[key] = event
+        else:
+            if event.text:
+                existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
+            existing._last_chunk_len = chunk_len  # type: ignore[attr-defined]
+            if event.media_urls:
+                existing.media_urls.extend(event.media_urls)
+                existing.media_types.extend(event.media_types)
+
+        prior_task = self._pending_text_batch_tasks.get(key)
+        if prior_task and not prior_task.done():
+            prior_task.cancel()
+        self._pending_text_batch_tasks[key] = asyncio.create_task(
+            self._flush_text_batch(key)
+        )
+
+    async def _flush_text_batch(self, key: str) -> None:
+        """Wait for quiet period then dispatch aggregated text."""
+        current_task = asyncio.current_task()
+        try:
+            pending = self._pending_text_batches.get(key)
+            last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
+            if last_len >= self._SPLIT_THRESHOLD:
+                delay = self._text_batch_split_delay_seconds
+            else:
+                delay = self._text_batch_delay_seconds
+            await asyncio.sleep(delay)
+            event = self._pending_text_batches.pop(key, None)
+            if not event:
+                return
+            await self.handle_message(event)
+        finally:
+            if self._pending_text_batch_tasks.get(key) is current_task:
+                self._pending_text_batch_tasks.pop(key, None)
+
    async def _build_message_event(self, data: Dict[str, Any]) -> Optional[MessageEvent]:
        """Build a MessageEvent from bridge message data, downloading images to cache."""
        try:
--- a/gateway/run.py
+++ b/gateway/run.py
@ -1730,6 +1730,14 @@ class GatewayRunner:
        self._running_agents: Dict[str, Any] = {}
        self._running_agents_ts: Dict[str, float] = {}  # start timestamp per session
        self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
+        # Last successfully-resolved (non-empty) model, keyed by session. Used
+        # as a fallback when a fresh config read transiently returns an empty
+        # model (e.g. an mtime-keyed config-cache miss during a post-interrupt
+        # recovery turn). Without this, the agent is built with model="" and
+        # every API call fails HTTP 400 "No models provided" — the session goes
+        # silent until the user manually re-sends. See #35314. ``"*"`` holds a
+        # process-wide last-known-good for sessions seen for the first time.
+        self._last_resolved_model: Dict[str, str] = {}
        # Overflow buffer for explicit /queue commands.  The adapter-level
        # _pending_messages dict is a single slot per session (designed for
        # "next-turn" follow-ups where repeated sends collapse into one
@ -2488,6 +2496,32 @@ class GatewayRunner:
            except Exception:
                pass

+        # Final safety net (#35314): if resolution still produced an empty
+        # model — e.g. a transient config-cache miss during a post-interrupt
+        # recovery turn returned an empty user_config — reuse the last model we
+        # successfully resolved for this session (or, failing that, the most
+        # recent one resolved process-wide). Building an agent with model=""
+        # makes every API call fail HTTP 400 "No models provided" and the
+        # session goes silent until the user manually re-sends. ``getattr``
+        # guards against bare test runners built via ``object.__new__``.
+        _last_good = getattr(self, "_last_resolved_model", None)
+        if _last_good is not None:
+            if not model:
+                _recovered = _last_good.get(resolved_session_key or "") or _last_good.get("*")
+                if _recovered:
+                    logger.warning(
+                        "Empty model resolved for session=%s — recovering "
+                        "last-known-good model %s (config read likely returned "
+                        "empty; see #35314)",
+                        resolved_session_key or "", _recovered,
+                    )
+                    model = _recovered
+            elif model:
+                # Cache the good resolution for future recovery turns.
+                if resolved_session_key:
+                    _last_good[resolved_session_key] = model
+                _last_good["*"] = model
+
        return model, runtime_kwargs

    def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
@ -2784,10 +2818,12 @@ class GatewayRunner:
        """Mark a queued platform as paused — keep it in ``_failed_platforms``
        but stop the reconnect watcher from hammering it.

-        Used by the circuit breaker after ``_PAUSE_AFTER_FAILURES`` consecutive
-        retryable failures, and by ``/platform pause <name>`` for manual
-        intervention.  Paused platforms are surfaced in ``/platform list``
-        and resumed with ``/platform resume <name>``.
+        Used by ``/platform pause <name>`` for manual operator intervention.
+        Paused platforms are surfaced in ``/platform list`` and resumed with
+        ``/platform resume <name>``.  Note: the reconnect watcher does NOT
+        auto-pause — retryable (network/DNS) failures keep retrying at the
+        backoff cap indefinitely so a transient outage self-heals without
+        manual intervention.
        """
        info = getattr(self, "_failed_platforms", {}).get(platform)
        if info is None:
@ -5865,15 +5901,17 @@ class GatewayRunner:
        """Background task that periodically retries connecting failed platforms.

        Uses exponential backoff: 30s → 60s → 120s → 240s → 300s (cap).
-        Retryable failures keep retrying at the backoff cap indefinitely
-        — but if a platform fails ``_PAUSE_AFTER_FAILURES`` times in a row
-        without ever succeeding, it is *paused*: kept in the retry queue
-        but no longer hammered.  The user surfaces it with ``/platform list``
-        and resumes it with ``/platform resume <name>``.  Non-retryable
-        failures (bad auth, etc.) still drop out of the queue immediately.
+        Retryable failures (network/DNS blips) keep retrying at the backoff
+        cap indefinitely — they self-heal once connectivity returns, so a
+        transient outage never requires manual intervention. Non-retryable
+        failures (bad auth, etc.) drop out of the queue immediately. The
+        circuit breaker (``_pause_failed_platform`` / ``/platform pause``)
+        remains available for manual operator control via ``/platform list``
+        and ``/platform resume <name>``, but is no longer triggered
+        automatically — auto-pausing a recovered platform was the cause of
+        bots silently staying dead after a transient DNS failure.
        """
        _BACKOFF_CAP = 300  # 5 minutes max between retries
-        _PAUSE_AFTER_FAILURES = 10  # circuit-breaker threshold

        await asyncio.sleep(10)  # initial delay — let startup finish
        while self._running:
@ -5968,14 +6006,14 @@ class GatewayRunner:
                            "Reconnect %s failed, next retry in %ds",
                            platform.value, backoff,
                        )
-                        if attempt >= _PAUSE_AFTER_FAILURES:
-                            self._pause_failed_platform(
-                                platform,
-                                reason=(
-                                    adapter.fatal_error_message
-                                    or "failed to reconnect"
-                                ),
-                            )
+                        # Retryable failures (network/DNS blips) keep retrying
+                        # at the backoff cap indefinitely — they self-heal once
+                        # connectivity returns. We do NOT auto-pause them: a
+                        # transient outage must never require manual `/platform
+                        # resume` to recover. Non-retryable failures (bad auth,
+                        # etc.) already drop out of the queue via the
+                        # `not fatal_error_retryable` branch above, so anything
+                        # reaching here is by definition retryable.
                except Exception as e:
                    self._update_platform_runtime_status(
                        platform.value,
@ -5990,8 +6028,9 @@ class GatewayRunner:
                        "Reconnect %s error: %s, next retry in %ds",
                        platform.value, e, backoff,
                    )
-                    if attempt >= _PAUSE_AFTER_FAILURES:
-                        self._pause_failed_platform(platform, reason=str(e))
+                    # A raised exception during reconnect (connect timeout, DNS
+                    # resolution failure, etc.) is inherently transient — keep
+                    # retrying at the backoff cap rather than auto-pausing.

            # Check every 10 seconds for platforms that need reconnection
            for _ in range(10):
@ -10531,6 +10570,22 @@ class GatewayRunner:
                            except Exception as exc:
                                logger.warning("Picker model switch failed for cached agent: %s", exc)

+                        # Persist the new model to the session DB so the
+                        # dashboard shows the updated model (#34850).
+                        _sess_db = getattr(_self, "_session_db", None)
+                        if _sess_db is not None:
+                            try:
+                                _sess_entry = _self.session_store.get_or_create_session(
+                                    event.source
+                                )
+                                _sess_db.update_session_model(
+                                    _sess_entry.session_id, result.new_model
+                                )
+                            except Exception as exc:
+                                logger.debug(
+                                    "Failed to persist model switch to DB: %s", exc
+                                )
+
                        # Store model note + session override
                        if not hasattr(_self, "_pending_model_notes"):
                            _self._pending_model_notes = {}
@ -10668,6 +10723,20 @@ class GatewayRunner:
            except Exception as exc:
                logger.warning("In-place model switch failed for cached agent: %s", exc)

+        # Persist the new model to the session DB so the dashboard
+        # shows the updated model (#34850).
+        _sess_db = getattr(self, "_session_db", None)
+        if _sess_db is not None:
+            try:
+                _sess_entry = self.session_store.get_or_create_session(source)
+                _sess_db.update_session_model(
+                    _sess_entry.session_id, result.new_model
+                )
+            except Exception as exc:
+                logger.debug(
+                    "Failed to persist model switch to DB: %s", exc
+                )
+
        # Store a note to prepend to the next user message so the model
        # knows about the switch (avoids system messages mid-history).
        if not hasattr(self, "_pending_model_notes"):
@ -15313,8 +15382,52 @@ class GatewayRunner:
        ("compression", "target_ratio"),
        ("compression", "protect_last_n"),
        ("agent", "disabled_toolsets"),
+        ("memory", "provider"),
    )

+    _HONCHO_CACHE_BUSTING_KEYS = (
+        "honcho.peer_name",
+        "honcho.ai_peer",
+        "honcho.pin_peer_name",
+        "honcho.runtime_peer_prefix",
+        "honcho.user_peer_aliases",
+    )
+    _HONCHO_CACHE_BUSTING_MEMO: dict[tuple[str, int | None], dict[str, Any]] = {}
+
+    @classmethod
+    def _empty_honcho_cache_busting_config(cls) -> dict[str, Any]:
+        return {key: None for key in cls._HONCHO_CACHE_BUSTING_KEYS}
+
+    @classmethod
+    def _extract_honcho_cache_busting_config(cls) -> dict[str, Any]:
+        """Extract Honcho identity keys, memoized by honcho.json mtime."""
+        try:
+            from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path
+
+            path = resolve_config_path()
+            try:
+                mtime_ns = path.stat().st_mtime_ns
+            except OSError:
+                mtime_ns = None
+            memo_key = (str(path), mtime_ns)
+            cached = cls._HONCHO_CACHE_BUSTING_MEMO.get(memo_key)
+            if cached is not None:
+                return dict(cached)
+
+            hcfg = HonchoClientConfig.from_global_config(config_path=path)
+            aliases = hcfg.user_peer_aliases or {}
+            values = {
+                "honcho.peer_name": hcfg.peer_name,
+                "honcho.ai_peer": hcfg.ai_peer,
+                "honcho.pin_peer_name": bool(hcfg.pin_peer_name),
+                "honcho.runtime_peer_prefix": hcfg.runtime_peer_prefix or "",
+                "honcho.user_peer_aliases": sorted(aliases.items()) if isinstance(aliases, dict) else [],
+            }
+            cls._HONCHO_CACHE_BUSTING_MEMO = {memo_key: values}
+            return dict(values)
+        except Exception:
+            return cls._empty_honcho_cache_busting_config()
+
    @classmethod
    def _extract_cache_busting_config(cls, user_config: dict | None) -> dict:
        """Pull values that must bust the cached agent.
@ -15345,26 +15458,12 @@ class GatewayRunner:
            out["tools.registry_generation"] = None

        # Honcho identity-mapping keys live in honcho.json, not user_config.
-        # HonchoSessionManager freezes the resolved peer_name / ai_peer /
-        # pin / aliases / prefix at construction; without busting here,
-        # mid-flight honcho.json edits go unread until the next unrelated
-        # cache eviction.
-        try:
-            from plugins.memory.honcho.client import HonchoClientConfig
-
-            hcfg = HonchoClientConfig.from_global_config()
-            out["honcho.peer_name"] = hcfg.peer_name
-            out["honcho.ai_peer"] = hcfg.ai_peer
-            out["honcho.pin_peer_name"] = bool(hcfg.pin_peer_name)
-            out["honcho.runtime_peer_prefix"] = hcfg.runtime_peer_prefix or ""
-            aliases = hcfg.user_peer_aliases or {}
-            out["honcho.user_peer_aliases"] = sorted(aliases.items()) if isinstance(aliases, dict) else []
-        except Exception:
-            out["honcho.peer_name"] = None
-            out["honcho.ai_peer"] = None
-            out["honcho.pin_peer_name"] = None
-            out["honcho.runtime_peer_prefix"] = None
-            out["honcho.user_peer_aliases"] = None
+        # Only read that file when Honcho is the active memory provider.
+        provider = cfg_get(cfg, "memory", "provider")
+        if isinstance(provider, str) and provider.lower() == "honcho":
+            out.update(cls._extract_honcho_cache_busting_config())
+        else:
+            out.update(cls._empty_honcho_cache_busting_config())

        return out

@ -17203,7 +17302,7 @@ class GatewayRunner:
                    _hc = _hm.get("content", "")
                    if "MEDIA:" in _hc:
                        _TOOL_MEDIA_RE = re.compile(
-                            r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
+                            r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
                            r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
                            r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
                            r'txt|csv|apk|ipa))',
@ -17529,7 +17628,7 @@ class GatewayRunner:
                        content = msg.get("content", "")
                        if "MEDIA:" in content:
                            _TOOL_MEDIA_RE = re.compile(
-                                r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
+                                r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
                                r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
                                r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
                                r'txt|csv|apk|ipa))',
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@ -12,14 +12,16 @@ import threading
 import time
 from pathlib import Path
 from hermes_constants import get_hermes_home
-from typing import Dict, List, Optional
+from typing import TYPE_CHECKING, Dict, List, Optional

-from rich.console import Console
-from rich.panel import Panel
-from rich.table import Table
-
-from prompt_toolkit import print_formatted_text as _pt_print
-from prompt_toolkit.formatted_text import ANSI as _PT_ANSI
+# rich and prompt_toolkit are imported lazily (inside the functions that use
+# them) rather than at module level.  Importing this module is on the TUI
+# gateway's critical startup path purely to reach the lightweight update-check
+# helpers (``prefetch_update_check``); pulling rich.console + prompt_toolkit
+# eagerly added ~50ms of wasted imports before ``gateway.ready`` could fire.
+# Keep the type-only reference available to checkers without the runtime cost.
+if TYPE_CHECKING:
+    from rich.console import Console

 logger = logging.getLogger(__name__)

@ -36,6 +38,8 @@ _RST = "\033[0m"

 def cprint(text: str):
    """Print ANSI-colored text through prompt_toolkit's renderer."""
+    from prompt_toolkit import print_formatted_text as _pt_print
+    from prompt_toolkit.formatted_text import ANSI as _PT_ANSI
    _pt_print(_PT_ANSI(text))


@ -471,7 +475,7 @@ def _display_toolset_name(toolset_name: str) -> str:
    )


-def build_welcome_banner(console: Console, model: str, cwd: str,
+def build_welcome_banner(console: "Console", model: str, cwd: str,
                         tools: List[dict] = None,
                         enabled_toolsets: List[str] = None,
                         session_id: str = None,
@ -490,6 +494,8 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
        context_length: Model's context window size in tokens.
    """
    from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS
+    from rich.panel import Panel
+    from rich.table import Table
    if get_toolset_for_tool is None:
        from model_tools import get_toolset_for_tool

--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -286,9 +286,22 @@ def detect_install_method(project_root: Optional[Path] = None) -> str:
    Resolution order:
    1. Stamped ``~/.hermes/.install_method`` file (written by installers)
    2. HERMES_MANAGED env / .managed marker (NixOS, Homebrew)
-    3. Container detection (/.dockerenv, /run/.containerenv, cgroup)
-    4. .git directory presence -> 'git'
-    5. Fallback -> 'pip'
+    3. .git directory presence -> 'git'
+    4. Fallback -> 'pip'
+
+    Note: running inside a container is NOT treated as "docker" on its own.
+    The two supported install paths both self-identify via the
+    ``.install_method`` stamp (caught by step 1), so neither relies on
+    container detection here:
+      - the curl installer (scripts/install.sh, the README/website install
+        command) git-clones the repo and stamps ``git``;
+      - the published ``nousresearch/hermes-agent`` image stamps ``docker``
+        at boot via ``docker/stage2-hook.sh``.
+    An unsupported manual install dropped into a container (no stamp) was
+    wrongly classified as the published image by bare container detection,
+    so ``hermes update`` bailed with "doesn't apply inside the Docker
+    container". Without that fallback such installs fall through to the
+    ``.git``/pip checks and behave like any off-path install. See issue #34397.
    """
    stamp = get_hermes_home() / ".install_method"
    try:
@ -300,9 +313,6 @@ def detect_install_method(project_root: Optional[Path] = None) -> str:
    managed = get_managed_system()
    if managed:
        return managed.lower().replace(" ", "-")
-    from hermes_constants import is_container
-    if is_container():
-        return "docker"
    if project_root is None:
        project_root = Path(__file__).parent.parent.resolve()
    if (project_root / ".git").is_dir():
@ -320,6 +330,34 @@ def stamp_install_method(method: str) -> None:
        pass


+def is_uv_tool_install() -> bool:
+    """Return True when the *running* Hermes lives in a ``uv tool`` layout.
+
+    ``uv tool install hermes-agent`` places the install at
+    ``.../uv/tools/hermes-agent/...`` (default ``~/.local/share/uv/tools``,
+    or ``$UV_TOOL_DIR/...``). Such installs live outside any virtualenv, so
+    ``uv pip install`` fails with ``No virtual environment found`` and the
+    update path must use ``uv tool upgrade`` instead.
+
+    Detection is intentionally restricted to properties of the running
+    interpreter (``sys.prefix`` / ``sys.executable``). We deliberately do
+    NOT consult ``uv tool list``: it would also return True when
+    ``hermes-agent`` happens to be uv-tool-installed on the machine while
+    the *active* Hermes is a regular pip/venv install, causing
+    ``hermes update`` to upgrade the wrong copy. It would also block on a
+    subprocess call (~seconds) just to compute a recommendation string.
+    """
+    def _has_uv_tool_marker(path: str) -> bool:
+        norm = os.path.normpath(path).replace(os.sep, "/").lower()
+        return "/uv/tools/hermes-agent/" in norm + "/"
+
+    if _has_uv_tool_marker(sys.prefix):
+        return True
+    if _has_uv_tool_marker(sys.executable or ""):
+        return True
+    return False
+
+
 def recommended_update_command_for_method(method: str) -> str:
    """Return the update command or guidance for a given install method."""
    if method == "nixos":
@ -329,9 +367,10 @@ def recommended_update_command_for_method(method: str) -> str:
    if method == "docker":
        return "docker pull nousresearch/hermes-agent:latest"
    if method == "pip":
+        if is_uv_tool_install():
+            return "uv tool upgrade hermes-agent"
        import shutil
-        uv = shutil.which("uv")
-        if uv:
+        if shutil.which("uv"):
            return "uv pip install --upgrade hermes-agent"
        return "pip install --upgrade hermes-agent"
    return "hermes update"
@ -1184,6 +1223,11 @@ DEFAULT_CONFIG = {
        # Mirrors `hermes -c` muscle memory.  Default off so existing
        # users aren't surprised.  HERMES_TUI_RESUME=<id> always wins.
        "tui_auto_resume_recent": False,
+        # When true (default), `hermes --tui` drops a one-time hint
+        # ("subagents working · /agents to watch live") the first time a turn
+        # starts delegating, nudging the user toward the live spawn-tree
+        # dashboard. Set false to suppress the hint.
+        "tui_agents_nudge": True,
        "bell_on_complete": False,
        "show_reasoning": False,
        "streaming": False,
@ -1203,6 +1247,13 @@ DEFAULT_CONFIG = {
        # class of over-claim that otherwise forces users to run
        # `git status` to verify edits landed.  Set false to suppress.
        "file_mutation_verifier": True,
+        # Turn-completion explainer.  When true (default), the agent appends a
+        # one-line explanation to its final response whenever a turn ends
+        # abnormally with no usable reply — empty content after retries, a
+        # partial/truncated stream, a still-pending tool result, or an
+        # iteration/budget limit.  Replaces the bare "(empty)" sentinel so the
+        # failure isn't silent from the UI's perspective.  Set false to suppress.
+        "turn_completion_explainer": True,
        "show_cost": False,       # Show $ cost in the status bar (off by default)
        "skin": "default",
        # UI language for static user-facing messages (approval prompts, a
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@ -204,6 +204,60 @@ def _fail_and_issue(text: str, detail: str, fix: str, issues: list[str]) -> None
    issues.append(fix)


+def _read_pyproject_version() -> str | None:
+    """Read the ``version = "..."`` from ``pyproject.toml`` at the project root.
+
+    Returns None when running from an installed wheel (no pyproject.toml ships
+    with the package) or when the file can't be parsed. Reads only the
+    ``[project]`` version, ignoring any version strings that appear in other
+    tables.
+    """
+    pyproject = PROJECT_ROOT / "pyproject.toml"
+    try:
+        text = pyproject.read_text(encoding="utf-8")
+    except OSError:
+        return None
+    in_project = False
+    for raw in text.splitlines():
+        line = raw.strip()
+        if line.startswith("[") and line.endswith("]"):
+            in_project = line == "[project]"
+            continue
+        if in_project and line.startswith("version") and "=" in line:
+            value = line.split("=", 1)[1]
+            value = value.split("#", 1)[0].strip().strip("\"'")
+            return value or None
+    return None
+
+
+def _check_version_consistency(issues: list[str]) -> None:
+    """Verify pyproject.toml version matches hermes_cli.__version__.
+
+    A git conflict resolution (reset/merge) can revert one file without the
+    other, leaving ``hermes --version`` reporting a stale version while
+    ``pyproject.toml`` is current. Detect that drift so users can re-sync.
+    Silent no-op for installed wheels where pyproject.toml isn't present.
+    """
+    try:
+        from hermes_cli import __version__ as init_version
+    except Exception:
+        return
+    pyproject_version = _read_pyproject_version()
+    if pyproject_version is None:
+        # Installed wheel or unreadable pyproject — nothing to cross-check.
+        return
+    if pyproject_version == init_version:
+        check_ok("Version files consistent", f"({init_version})")
+    else:
+        _fail_and_issue(
+            "Version mismatch between source files",
+            f"(pyproject.toml {pyproject_version} != hermes_cli/__init__.py {init_version})",
+            "Re-sync version files (e.g. run 'hermes update', or set "
+            "hermes_cli/__init__.py __version__ to match pyproject.toml)",
+            issues,
+        )
+
+
 def _check_s6_supervision(issues: list[str]) -> None:
    """Inside a container under our s6 /init, surface what s6 sees.

@ -509,6 +563,10 @@ def run_doctor(args):
        check_ok("Virtual environment active")
    else:
        check_warn("Not in virtual environment", "(recommended)")
+
+    # Detect drift between pyproject.toml and hermes_cli/__init__.py versions
+    # (a git conflict resolution can silently revert one but not the other).
+    _check_version_consistency(issues)
    
    _section("Required Packages")
    required_packages = [
--- a/hermes_cli/kanban_db.py
+++ b/hermes_cli/kanban_db.py
@ -396,6 +396,41 @@ def workspaces_root(board: Optional[str] = None) -> Path:
    return board_dir(slug) / "workspaces"


+def attachments_root(board: Optional[str] = None) -> Path:
+    """Return the directory under which task file attachments are stored.
+
+    Mirrors :func:`worker_logs_dir` / :func:`workspaces_root`: anchored
+    per-board so attachments don't leak between projects. Each task gets
+    its own ``<root>/.../attachments/<task_id>/`` subdirectory.
+
+    ``HERMES_KANBAN_ATTACHMENTS_ROOT`` pins the path directly (highest
+    precedence) for tests and unusual deployments.
+
+    ``default`` uses ``<root>/kanban/attachments/``; other boards use
+    ``<root>/kanban/boards/<slug>/attachments/``.
+
+    Workers (which run with full file-tool access) read attached files
+    by the absolute path surfaced in :func:`build_worker_context`. On the
+    local terminal backend — the default for kanban — that path resolves
+    directly. Remote backends (Docker/Modal) need this directory mounted;
+    see the kanban docs.
+    """
+    override = os.environ.get("HERMES_KANBAN_ATTACHMENTS_ROOT", "").strip()
+    if override:
+        return Path(override).expanduser()
+    slug = _normalize_board_slug(board)
+    if slug is None:
+        slug = get_current_board()
+    if slug == DEFAULT_BOARD:
+        return kanban_home() / "kanban" / "attachments"
+    return board_dir(slug) / "attachments"
+
+
+def task_attachments_dir(task_id: str, board: Optional[str] = None) -> Path:
+    """Return the per-task attachment directory ``<root>/<task_id>/``."""
+    return attachments_root(board=board) / task_id
+
+
 def worker_logs_dir(board: Optional[str] = None) -> Path:
    """Return the directory under which per-task worker logs are written.

@ -831,6 +866,20 @@ class Comment:
    created_at: int


+@dataclass
+class Attachment:
+    """In-memory view of a row from the ``task_attachments`` table."""
+
+    id: int
+    task_id: str
+    filename: str
+    stored_path: str
+    content_type: Optional[str]
+    size: int
+    uploaded_by: Optional[str]
+    created_at: int
+
+
@dataclass
 class Event:
    id: int
@ -957,6 +1006,23 @@ CREATE TABLE IF NOT EXISTS task_runs (
    error               TEXT
 );

+-- Files attached to a task (PDFs, images, source documents). The blob
+-- lives on disk under ``attachments_root(board)/<task_id>/<stored_name>``;
+-- this row carries metadata + the absolute ``stored_path`` so the
+-- dashboard can list/download and ``build_worker_context`` can surface
+-- the absolute path to the worker (which has full file-tool access). See
+-- #35338.
+CREATE TABLE IF NOT EXISTS task_attachments (
+    id           INTEGER PRIMARY KEY AUTOINCREMENT,
+    task_id      TEXT NOT NULL,
+    filename     TEXT NOT NULL,
+    stored_path  TEXT NOT NULL,
+    content_type TEXT,
+    size         INTEGER NOT NULL DEFAULT 0,
+    uploaded_by  TEXT,
+    created_at   INTEGER NOT NULL
+);
+
 -- Subscription from a gateway source (platform + chat + thread) to a
 -- task. The gateway's kanban-notifier watcher tails task_events and
 -- pushes ``completed`` / ``blocked`` / ``spawn_auto_blocked`` events to
@ -981,6 +1047,7 @@ CREATE INDEX IF NOT EXISTS idx_comments_task         ON task_comments(task_id, c
 CREATE INDEX IF NOT EXISTS idx_events_task           ON task_events(task_id, created_at);
 CREATE INDEX IF NOT EXISTS idx_runs_task             ON task_runs(task_id, started_at);
 CREATE INDEX IF NOT EXISTS idx_runs_status           ON task_runs(status);
+CREATE INDEX IF NOT EXISTS idx_attachments_task      ON task_attachments(task_id, created_at);
 CREATE INDEX IF NOT EXISTS idx_notify_task           ON kanban_notify_subs(task_id);
 """

@ -1637,6 +1704,140 @@ def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None:
            (new, old),
        )

+    _rebuild_drifted_tables(conn)
+
+
+# Legacy DBs defined these tables with a ``TEXT PRIMARY KEY`` id (or, for
+# ``kanban_notify_subs``, a nullable ``TEXT last_event_id``). The current
+# schema uses ``INTEGER PRIMARY KEY AUTOINCREMENT`` / ``INTEGER NOT NULL
+# DEFAULT 0``. ``CREATE TABLE IF NOT EXISTS`` skips existing tables
+# regardless of schema and ``_add_column_if_missing`` only adds columns, so
+# neither can fix a drifted column type — the table must be rebuilt. See
+# #35096.
+#
+# Each entry pairs the canonical CREATE TABLE with the CREATE INDEX
+# statements that DROP TABLE would otherwise take down with it (including
+# ``idx_events_run``, added by the additive pass above). To guard against
+# this list drifting from SCHEMA_SQL, ``test_rebuilt_schema_matches_fresh``
+# asserts a rebuilt legacy DB is byte-identical to a fresh one.
+_REBUILD_SPECS = {
+    "task_events": (
+        "CREATE TABLE task_events ("
+        " id INTEGER PRIMARY KEY AUTOINCREMENT,"
+        " task_id TEXT NOT NULL, run_id INTEGER, kind TEXT NOT NULL,"
+        " payload TEXT, created_at INTEGER NOT NULL)",
+        (
+            "CREATE INDEX idx_events_task ON task_events(task_id, created_at)",
+            "CREATE INDEX idx_events_run ON task_events(run_id, id)",
+        ),
+    ),
+    "task_comments": (
+        "CREATE TABLE task_comments ("
+        " id INTEGER PRIMARY KEY AUTOINCREMENT,"
+        " task_id TEXT NOT NULL, author TEXT NOT NULL, body TEXT NOT NULL,"
+        " created_at INTEGER NOT NULL)",
+        ("CREATE INDEX idx_comments_task ON task_comments(task_id, created_at)",),
+    ),
+    "task_runs": (
+        "CREATE TABLE task_runs ("
+        " id INTEGER PRIMARY KEY AUTOINCREMENT,"
+        " task_id TEXT NOT NULL, profile TEXT, step_key TEXT,"
+        " status TEXT NOT NULL, claim_lock TEXT, claim_expires INTEGER,"
+        " worker_pid INTEGER, max_runtime_seconds INTEGER,"
+        " last_heartbeat_at INTEGER, started_at INTEGER NOT NULL,"
+        " ended_at INTEGER, outcome TEXT, summary TEXT, metadata TEXT,"
+        " error TEXT)",
+        (
+            "CREATE INDEX idx_runs_task ON task_runs(task_id, started_at)",
+            "CREATE INDEX idx_runs_status ON task_runs(status)",
+        ),
+    ),
+    "kanban_notify_subs": (
+        "CREATE TABLE kanban_notify_subs ("
+        " task_id TEXT NOT NULL, platform TEXT NOT NULL, chat_id TEXT NOT NULL,"
+        " thread_id TEXT NOT NULL DEFAULT '', user_id TEXT,"
+        " notifier_profile TEXT, created_at INTEGER NOT NULL,"
+        " last_event_id INTEGER NOT NULL DEFAULT 0,"
+        " PRIMARY KEY (task_id, platform, chat_id, thread_id))",
+        ("CREATE INDEX idx_notify_task ON kanban_notify_subs(task_id)",),
+    ),
+}
+
+
+def _table_has_drifted(conn: sqlite3.Connection, table: str) -> bool:
+    """True when ``table`` still carries the legacy (pre-AUTOINCREMENT) shape."""
+    info = conn.execute(f"PRAGMA table_info({table})").fetchall()
+    if not info:
+        return False  # table absent — nothing to rebuild
+    if table == "kanban_notify_subs":
+        lei = next((c for c in info if c["name"] == "last_event_id"), None)
+        return lei is not None and (lei["type"] or "").upper() != "INTEGER"
+    # task_events / task_comments / task_runs: id must be INTEGER and a PK.
+    id_col = next((c for c in info if c["name"] == "id"), None)
+    if id_col is None:
+        return False
+    return not ((id_col["type"] or "").upper() == "INTEGER" and id_col["pk"])
+
+
+def _rebuild_drifted_tables(conn: sqlite3.Connection) -> None:
+    """Rebuild any kanban table whose column types drifted from SCHEMA_SQL.
+
+    Old boards crash the gateway notifier (``int(None)`` on a NULL id in
+    ``unseen_events_for_sub``) and never match the ``id > cursor`` filter, so
+    every kanban notification is silently lost (#35096). Each affected table is
+    rebuilt with the standard SQLite pattern — CREATE new → INSERT shared
+    columns → DROP old → RENAME — recreating its indexes too (DROP TABLE takes
+    them down). The legacy TEXT ids are dropped (they aren't valid integers);
+    AUTOINCREMENT assigns fresh ones and ``last_event_id`` cursors reset to 0,
+    so the first post-migration tick replays a task's event history once —
+    the safe failure mode for a feature that was already fully broken.
+
+    The whole pass runs in one transaction so an interruption can't leave a
+    table half-renamed, and under ``connect()``'s init locks so nothing races
+    it. Idempotent: a correctly-typed DB skips every table and returns without
+    opening a transaction.
+    """
+    drifted = [t for t in _REBUILD_SPECS if _table_has_drifted(conn, t)]
+    if not drifted:
+        return
+
+    conn.execute("BEGIN IMMEDIATE")
+    try:
+        for table in drifted:
+            create_sql, index_sqls = _REBUILD_SPECS[table]
+            old_cols = [c["name"] for c in conn.execute(f"PRAGMA table_info({table})")]
+            _log.info("kanban migration: rebuilding %s to match current schema", table)
+            conn.execute(f"ALTER TABLE {table} RENAME TO {table}_legacy")
+            conn.execute(create_sql)
+            new_cols = {c["name"] for c in conn.execute(f"PRAGMA table_info({table})")}
+            if table == "kanban_notify_subs":
+                # Cast the legacy TEXT cursor to INTEGER; NULL / non-numeric → 0.
+                shared = [c for c in old_cols if c in new_cols and c != "last_event_id"]
+                cols_csv = ", ".join(shared)
+                conn.execute(
+                    f"INSERT INTO {table} ({cols_csv}, last_event_id) "
+                    f"SELECT {cols_csv}, COALESCE(CAST(last_event_id AS INTEGER), 0) "
+                    f"FROM {table}_legacy"
+                )
+            else:
+                # Drop the legacy TEXT id; AUTOINCREMENT reassigns it.
+                shared = [c for c in old_cols if c in new_cols and c != "id"]
+                cols_csv = ", ".join(shared)
+                conn.execute(
+                    f"INSERT INTO {table} ({cols_csv}) "
+                    f"SELECT {cols_csv} FROM {table}_legacy"
+                )
+            conn.execute(f"DROP TABLE {table}_legacy")
+            for index_sql in index_sqls:
+                conn.execute(index_sql)
+        conn.execute("COMMIT")
+    except Exception:
+        try:
+            conn.execute("ROLLBACK")
+        except sqlite3.OperationalError:
+            pass
+        raise
+

 def _check_file_length_invariant(conn: sqlite3.Connection) -> None:
    """Read the SQLite header page_count and compare against actual file size.
@ -2252,6 +2453,121 @@ def list_comments(conn: sqlite3.Connection, task_id: str) -> list[Comment]:
    ]


+# ---------------------------------------------------------------------------
+# Attachments
+# ---------------------------------------------------------------------------
+
+def add_attachment(
+    conn: sqlite3.Connection,
+    task_id: str,
+    *,
+    filename: str,
+    stored_path: str,
+    content_type: Optional[str] = None,
+    size: int = 0,
+    uploaded_by: Optional[str] = None,
+) -> int:
+    """Record a file attachment for a task. Returns the new attachment id.
+
+    The caller is responsible for writing the blob to ``stored_path``
+    first (under :func:`task_attachments_dir`); this only persists the
+    metadata row and appends an ``attached`` event.
+    """
+    if not filename or not filename.strip():
+        raise ValueError("attachment filename is required")
+    if not stored_path or not stored_path.strip():
+        raise ValueError("attachment stored_path is required")
+    now = int(time.time())
+    with write_txn(conn):
+        if not conn.execute(
+            "SELECT 1 FROM tasks WHERE id = ?", (task_id,)
+        ).fetchone():
+            raise ValueError(f"unknown task {task_id}")
+        cur = conn.execute(
+            "INSERT INTO task_attachments "
+            "(task_id, filename, stored_path, content_type, size, uploaded_by, created_at) "
+            "VALUES (?, ?, ?, ?, ?, ?, ?)",
+            (
+                task_id,
+                filename.strip(),
+                stored_path,
+                content_type,
+                int(size),
+                uploaded_by,
+                now,
+            ),
+        )
+        _append_event(
+            conn,
+            task_id,
+            "attached",
+            {"filename": filename.strip(), "size": int(size), "by": uploaded_by},
+        )
+        return int(cur.lastrowid or 0)
+
+
+def list_attachments(conn: sqlite3.Connection, task_id: str) -> list[Attachment]:
+    rows = conn.execute(
+        "SELECT * FROM task_attachments WHERE task_id = ? ORDER BY created_at ASC, id ASC",
+        (task_id,),
+    ).fetchall()
+    return [
+        Attachment(
+            id=r["id"],
+            task_id=r["task_id"],
+            filename=r["filename"],
+            stored_path=r["stored_path"],
+            content_type=r["content_type"],
+            size=r["size"] or 0,
+            uploaded_by=r["uploaded_by"],
+            created_at=r["created_at"],
+        )
+        for r in rows
+    ]
+
+
+def get_attachment(conn: sqlite3.Connection, attachment_id: int) -> Optional[Attachment]:
+    r = conn.execute(
+        "SELECT * FROM task_attachments WHERE id = ?", (attachment_id,)
+    ).fetchone()
+    if r is None:
+        return None
+    return Attachment(
+        id=r["id"],
+        task_id=r["task_id"],
+        filename=r["filename"],
+        stored_path=r["stored_path"],
+        content_type=r["content_type"],
+        size=r["size"] or 0,
+        uploaded_by=r["uploaded_by"],
+        created_at=r["created_at"],
+    )
+
+
+def delete_attachment(conn: sqlite3.Connection, attachment_id: int) -> Optional[Attachment]:
+    """Delete an attachment row and its on-disk blob. Returns the removed row.
+
+    Returns ``None`` when no row matched. The blob is removed best-effort
+    (a missing file is not an error); the metadata row is the source of
+    truth for whether an attachment "exists".
+    """
+    with write_txn(conn):
+        att = get_attachment(conn, attachment_id)
+        if att is None:
+            return None
+        conn.execute("DELETE FROM task_attachments WHERE id = ?", (attachment_id,))
+        _append_event(
+            conn, att.task_id, "attachment_removed", {"filename": att.filename}
+        )
+    try:
+        p = Path(att.stored_path)
+        if p.is_file():
+            p.unlink()
+    except OSError:
+        pass
+    return att
+
+
 def list_events(conn: sqlite3.Connection, task_id: str) -> list[Event]:
    rows = conn.execute(
        "SELECT * FROM task_events WHERE task_id = ? ORDER BY created_at ASC, id ASC",
@ -2457,7 +2773,9 @@ def _has_sticky_block(conn: sqlite3.Connection, task_id: str) -> bool:
    return bool(row) and row["kind"] == "blocked"


-def recompute_ready(conn: sqlite3.Connection) -> int:
+def recompute_ready(
+    conn: sqlite3.Connection, failure_limit: int = None,
+) -> int:
    """Promote ``todo`` tasks to ``ready`` when all parents are ``done`` or ``archived``.

    Returns the number of tasks promoted.  Safe to call inside or outside
@ -2465,17 +2783,34 @@ def recompute_ready(conn: sqlite3.Connection) -> int:

    ``blocked`` tasks are also considered for promotion (so a task
    blocked purely by a parent dependency unblocks itself when the
-    parent completes), *except* when the most recent block event was a
-    worker-initiated ``kanban_block`` — those stay blocked until an
-    explicit ``kanban_unblock`` (#28712).  Without that guard, a
-    ``review-required`` handoff would auto-respawn, the fresh worker
-    would find nothing to do, exit cleanly, get recorded as a protocol
-    violation, and the cycle would repeat indefinitely.
+    parent completes), *except* in two cases:
+
+    1. The most recent block event was a worker-initiated
+       ``kanban_block`` — those stay blocked until an explicit
+       ``kanban_unblock`` (#28712).
+
+    2. The task's ``consecutive_failures`` has reached the effective
+       failure limit.  This prevents infinite retry loops when a task
+       repeatedly exhausts its iteration budget: without this guard the
+       counter would reset on every recovery cycle and the circuit
+       breaker could never trip (#35072).
+
+    The effective failure limit resolves in the same order as the
+    circuit breaker in ``_record_task_failure`` so the two never
+    disagree about when a task is permanently blocked:
+
+      1. per-task ``max_retries`` if set
+      2. caller-supplied ``failure_limit`` (the dispatcher passes the
+         ``kanban.failure_limit`` config value through ``dispatch_once``)
+      3. ``DEFAULT_FAILURE_LIMIT``
    """
+    if failure_limit is None:
+        failure_limit = DEFAULT_FAILURE_LIMIT
    promoted = 0
    with write_txn(conn):
        todo_rows = conn.execute(
-            "SELECT id, status FROM tasks WHERE status IN ('todo', 'blocked')"
+            "SELECT id, status, consecutive_failures, max_retries "
+            "FROM tasks WHERE status IN ('todo', 'blocked')"
        ).fetchall()
        for row in todo_rows:
            task_id = row["id"]
@ -2493,13 +2828,25 @@ def recompute_ready(conn: sqlite3.Connection) -> int:
                (task_id,),
            ).fetchall()
            if all(p["status"] in ("done", "archived") for p in parents):
-                # Blocked tasks also get their failure counters reset —
-                # this is effectively an auto-unblock (circuit-breaker
-                # recovery; worker-initiated blocks are skipped above).
                if cur_status == "blocked":
+                    # Don't auto-recover tasks that have hit the
+                    # circuit-breaker failure limit.  Without this
+                    # guard, a task that repeatedly exhausts its
+                    # iteration budget would cycle forever:
+                    # block → auto-recover → respawn → budget
+                    # exhausted → block → …  The counter must also
+                    # be preserved so the breaker can accumulate
+                    # across recovery cycles.
+                    failures = int(row["consecutive_failures"] or 0)
+                    task_limit = row["max_retries"]
+                    effective_limit = (
+                        int(task_limit) if task_limit is not None
+                        else int(failure_limit)
+                    )
+                    if failures >= effective_limit:
+                        continue
                    conn.execute(
-                        "UPDATE tasks SET status = 'ready', "
-                        "consecutive_failures = 0, last_failure_error = NULL "
+                        "UPDATE tasks SET status = 'ready' "
                        "WHERE id = ? AND status = 'blocked'",
                        (task_id,),
                    )
@ -5424,7 +5771,7 @@ def dispatch_once(
    if _crash_auto_blocked:
        result.auto_blocked.extend(_crash_auto_blocked)
    result.timed_out = enforce_max_runtime(conn)
-    result.promoted = recompute_ready(conn)
+    result.promoted = recompute_ready(conn, failure_limit=failure_limit)

    # Count tasks already running so max_spawn enforces concurrency rather
    # than a per-tick spawn budget. See the docstring above for the full
@ -6300,6 +6647,25 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
        lines.append(_cap(task.body, _CTX_MAX_BODY_BYTES))
        lines.append("")

+    # Attachments — files uploaded to this task (PDFs, source docs,
+    # images). Surface the absolute on-disk path so the worker, which has
+    # full file-tool access, can read them directly (read_file, terminal
+    # `pdftotext`, etc.). On the local terminal backend the path resolves
+    # as-is; remote backends need the kanban attachments dir mounted.
+    attachments = list_attachments(conn, task_id)
+    if attachments:
+        lines.append("## Attachments")
+        lines.append(
+            "Files attached to this task. Read them with the file/terminal "
+            "tools at the absolute paths below:"
+        )
+        for att in attachments:
+            size_kb = max(1, (att.size + 1023) // 1024) if att.size else 0
+            size_str = f", {size_kb} KB" if size_kb else ""
+            ctype = f", {att.content_type}" if att.content_type else ""
+            lines.append(f"- `{att.filename}`{ctype}{size_str} → `{att.stored_path}`")
+        lines.append("")
+
    # Prior attempts — show closed runs so a retrying worker sees the
    # history. Skip the currently-active run (that's this worker).
    # Cap at _CTX_MAX_PRIOR_ATTEMPTS most-recent closed runs; older
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -65,6 +65,46 @@ import os
 import sys


+def _set_process_title() -> None:
+    """Set the process title to 'hermes' so tools like 'ps', 'top', and
+    'htop' show the app name instead of 'python3.xx'.
+
+    Purely cosmetic — non-fatal on any platform.
+
+    Strategy (try in order):
+      1. ``setproctitle`` (opt-in dep — installed via ``hermes tools`` or
+         ``pip install setproctitle``, or bundled in a future release).
+      2. ctypes ``prctl(PR_SET_NAME)`` (Linux only, 15-char limit).
+      3. ctypes ``pthread_setname_np`` (macOS only, kernel thread name —
+         changes lldb/top but not ``ps aux``).
+      4. No-op on Windows (the .exe name is already ``hermes.exe``).
+    """
+    # Strategy 1: setproctitle (best — works on macOS, Linux, BSD)
+    try:
+        import setproctitle  # type: ignore[import-untyped]
+
+        setproctitle.setproctitle("hermes")
+        return
+    except ImportError:
+        pass
+
+    # Strategy 2/3: platform-specific ctypes fallback
+    import ctypes
+    import platform
+
+    try:
+        system = platform.system()
+        if system == "Linux":
+            libc = ctypes.CDLL("libc.so.6", use_errno=True)
+            libc.prctl(15, b"hermes", 0, 0, 0)  # PR_SET_NAME = 15
+        elif system == "Darwin":
+            libc = ctypes.CDLL("libc.dylib", use_errno=True)
+            libc.pthread_setname_np(b"hermes")
+        # Windows: the .exe name is already ``hermes.exe`` — nothing to do.
+    except Exception:
+        pass
+
+
 # Mouse-tracking residue suppression — runs BEFORE every other import on the
 # TUI hot path so the terminal stops emitting SGR/X10 mouse reports while the
 # Python launcher is still doing imports (≈100–300ms in cooked + echo mode,
@ -2385,7 +2425,12 @@ def select_provider_and_model(args=None):
    if active == "openrouter" and get_env_value("OPENAI_BASE_URL"):
        active = "custom"

-    from hermes_cli.models import CANONICAL_PROVIDERS, _PROVIDER_LABELS
+    from hermes_cli.models import (
+        CANONICAL_PROVIDERS,
+        _PROVIDER_LABELS,
+        group_providers,
+        provider_group_for_slug,
+    )

    provider_labels = dict(_PROVIDER_LABELS)  # derive from canonical list
    if active and active in _custom_provider_map:
@ -2398,8 +2443,43 @@ def select_provider_and_model(args=None):
    print(f"  Active provider:  {active_label}")
    print()

-    # Step 1: Provider selection — flat list from CANONICAL_PROVIDERS
-    all_providers = [(p.slug, p.tui_desc) for p in CANONICAL_PROVIDERS]
+    # Step 1: Provider selection.
+    #
+    # Canonical providers are folded into top-level groups (display only — see
+    # PROVIDER_GROUPS in hermes_cli/models.py). A multi-member group shows one
+    # row ("Kimi / Moonshot ▸"); picking it opens a member sub-picker that
+    # resolves back to a concrete slug, so the dispatch chain below is
+    # unchanged. Custom providers and the trailing actions stay flat.
+    canonical_descs = {p.slug: p.tui_desc for p in CANONICAL_PROVIDERS}
+    grouped_rows = group_providers([p.slug for p in CANONICAL_PROVIDERS])
+
+    # The group/slug that should be pre-selected: the active provider's group
+    # if it's grouped, otherwise the active slug itself.
+    active_group = provider_group_for_slug(active) if active else ""
+
+    # ordered entries: (key, label, members)
+    #   members == [] → leaf row, key is a provider slug / action
+    #   members != [] → group row, key is "group:<gid>"
+    ordered: list[tuple[str, str, list[str]]] = []
+    default_idx = 0
+    for row in grouped_rows:
+        if row["kind"] == "group":
+            gid = row["group_id"]
+            label = f"{row['label']} ▸"
+            key = f"group:{gid}"
+            is_active = bool(active_group) and gid == active_group
+            members = row["members"]
+        else:
+            slug = row["slug"]
+            label = canonical_descs.get(slug, provider_labels.get(slug, slug))
+            key = slug
+            is_active = bool(active) and slug == active
+            members = []
+        if is_active:
+            ordered.append((key, f"{label}  ← currently active", members))
+            default_idx = len(ordered) - 1
+        else:
+            ordered.append((key, label, members))

    for key, provider_info in _custom_provider_map.items():
        name = provider_info["name"]
@ -2407,36 +2487,49 @@ def select_provider_and_model(args=None):
        short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/")
        saved_model = provider_info.get("model", "")
        model_hint = f" — {saved_model}" if saved_model else ""
-        all_providers.append((key, f"{name} ({short_url}){model_hint}"))
-
-    # Build the menu
-    ordered = []
-    default_idx = 0
-    for key, label in all_providers:
+        label = f"{name} ({short_url}){model_hint}"
        if active and key == active:
-            ordered.append((key, f"{label}  ← currently active"))
+            ordered.append((key, f"{label}  ← currently active", []))
            default_idx = len(ordered) - 1
        else:
-            ordered.append((key, label))
+            ordered.append((key, label, []))

-    ordered.append(("custom", "Custom endpoint (enter URL manually)"))
+    ordered.append(("custom", "Custom endpoint (enter URL manually)", []))
    _has_saved_custom_list = isinstance(config.get("custom_providers"), list) and bool(
        config.get("custom_providers")
    )
    if _has_saved_custom_list:
-        ordered.append(("remove-custom", "Remove a saved custom provider"))
-    ordered.append(("aux-config", "Configure auxiliary models..."))
-    ordered.append(("cancel", "Leave unchanged"))
+        ordered.append(("remove-custom", "Remove a saved custom provider", []))
+    ordered.append(("aux-config", "Configure auxiliary models...", []))
+    ordered.append(("cancel", "Leave unchanged", []))

    provider_idx = _prompt_provider_choice(
-        [label for _, label in ordered],
+        [label for _, label, _ in ordered],
        default=default_idx,
    )
    if provider_idx is None or ordered[provider_idx][0] == "cancel":
        print("No change.")
        return

-    selected_provider = ordered[provider_idx][0]
+    selected_key = ordered[provider_idx][0]
+    selected_members = ordered[provider_idx][2]
+
+    # Group row → drill into a member sub-picker. Default to the active member
+    # if the active provider lives in this group.
+    if selected_members:
+        member_default = 0
+        if active in selected_members:
+            member_default = selected_members.index(active)
+        member_labels = [
+            canonical_descs.get(m, provider_labels.get(m, m)) for m in selected_members
+        ]
+        member_idx = _prompt_provider_choice(member_labels, default=member_default)
+        if member_idx is None:
+            print("No change.")
+            return
+        selected_provider = selected_members[member_idx]
+    else:
+        selected_provider = selected_key

    if selected_provider == "aux-config":
        _aux_config_menu()
@ -8008,39 +8101,6 @@ def _detect_concurrent_hermes_instances(
    except Exception:
        return []

-    # Build a set of PIDs to exclude: the Python process itself plus its
-    # entire parent chain. On Windows the setuptools-generated hermes.exe
-    # launcher is a separate native process that spawns python.exe (the
-    # interpreter that runs our code).  os.getpid() returns the Python PID,
-    # but the launcher (which holds the file lock) is the parent.  Without
-    # walking the parent chain, every ``hermes update`` reports its own
-    # launcher as a concurrent instance — a false positive.
-    if exclude_pid is not None:
-        exclude_pids: set[int] = {exclude_pid}
-    else:
-        exclude_pids = {os.getpid()}
-    # The parent-walk is best-effort: if psutil rejects a PID (NoSuchProcess /
-    # AccessDenied) we stop walking and use whatever we've collected so far.
-    # Broader Exception catch on the outer block guards against partially-
-    # stubbed psutil in unit tests (e.g. a SimpleNamespace lacking Process /
-    # NoSuchProcess) — the surrounding update flow documents this helper as
-    # "never raises".
-    try:
-        current = psutil.Process(next(iter(exclude_pids)))
-        while True:
-            try:
-                parent = current.parent()
-            except Exception:
-                break
-            if parent is None or parent.pid <= 0:
-                break
-            if parent.pid in exclude_pids:
-                break  # loop detected
-            exclude_pids.add(parent.pid)
-            current = parent
-    except Exception:
-        pass
-
    # Resolve every shim path to its canonical form once for cheap comparison.
    shim_paths: set[str] = set()
    for shim in _hermes_exe_shims(scripts_dir):
@ -8051,6 +8111,56 @@ def _detect_concurrent_hermes_instances(
    if not shim_paths:
        return []

+    # Build a set of PIDs to exclude: the Python process itself plus every
+    # ancestor whose executable is one of our shims. On Windows the
+    # setuptools-generated hermes.exe launcher is a separate native process
+    # that spawns python.exe (the interpreter that runs our code).
+    # os.getpid() returns the Python PID, but the launcher (which holds the
+    # file lock) is the parent. Without excluding it, every ``hermes update``
+    # reports its own launcher as a concurrent instance — a false positive
+    # (issues #29341, #34795).
+    #
+    # Two robustness points learned from the field:
+    #   1. Use ``proc.parents()`` — it returns the WHOLE ancestor list in one
+    #      call. The earlier per-hop ``current.parent()`` loop bailed on the
+    #      first psutil error (AccessDenied/NoSuchProcess is common on Windows
+    #      across session/elevation boundaries), leaving the launcher shim in
+    #      the candidate set and re-triggering the false positive.
+    #   2. Only exclude ancestors whose exe is itself a shim. A genuine second
+    #      hermes.exe sitting *under* a non-Hermes parent (e.g. a Hermes
+    #      Desktop backend child) must still be flagged, so we don't blanket-
+    #      exclude unrelated ancestors like the shell or terminal.
+    # Broad ``except Exception`` guards against partially-stubbed psutil in
+    # unit tests; this helper is documented as "never raises".
+    if exclude_pid is not None:
+        exclude_pids: set[int] = {int(exclude_pid)}
+    else:
+        exclude_pids = {os.getpid()}
+    try:
+        seed = next(iter(exclude_pids))
+        try:
+            ancestors = psutil.Process(seed).parents()
+        except Exception:
+            ancestors = []
+        for ancestor in ancestors:
+            try:
+                anc_exe = ancestor.exe()
+            except Exception:
+                continue
+            if not anc_exe:
+                continue
+            try:
+                anc_norm = str(Path(anc_exe).resolve()).lower()
+            except (OSError, ValueError):
+                anc_norm = str(anc_exe).lower()
+            if anc_norm in shim_paths:
+                try:
+                    exclude_pids.add(int(ancestor.pid))
+                except Exception:
+                    continue
+    except Exception:
+        pass
+
    matches: list[tuple[int, str]] = []
    try:
        proc_iter = psutil.process_iter(["pid", "exe", "name"])
@ -8091,6 +8201,13 @@ def _format_concurrent_instances_message(
    lines.append("")
    lines.append("  Close Hermes Desktop, exit any open `hermes` REPLs, and")
    lines.append("  stop the gateway (`hermes gateway stop`) before retrying.")
+    lines.append("")
+    if matches:
+        pid_args = " ".join(f"/PID {pid}" for pid, _ in matches)
+        lines.append("  If you've already closed everything and these PIDs are")
+        lines.append("  stale, terminate them directly, then retry the update:")
+        lines.append(f"      taskkill {pid_args} /F")
+        lines.append("")
    lines.append("  Override with `hermes update --force` if you've already")
    lines.append("  confirmed those processes will not write to the venv.")
    return "\n".join(lines)
@ -9055,18 +9172,51 @@ def cmd_update(args):
 def _cmd_update_pip(args):
    """Update Hermes via pip (for PyPI installs)."""
    from hermes_cli import __version__
+    from hermes_cli.config import is_uv_tool_install

    print(f"→ Current version: {__version__}")
    print("→ Checking PyPI for updates...")

    uv = shutil.which("uv")
-    if uv:
+    in_venv = sys.prefix != sys.base_prefix
+    # pipx-managed installs live under .../pipx/venvs/<name>/...
+    pipx_managed = "pipx" in sys.prefix.split(os.sep)
+    pipx = shutil.which("pipx") if pipx_managed else None
+
+    # Only the ``uv pip install`` path inside a venv needs VIRTUAL_ENV
+    # exported (uv refuses to install without it when the launcher shim
+    # didn't activate the venv). ``uv tool upgrade`` / ``pipx upgrade``
+    # operate on a named environment and ignore VIRTUAL_ENV, so we don't
+    # set it for them.
+    export_virtualenv = False
+
+    if is_uv_tool_install():
+        if not uv:
+            print("✗ Detected a uv-tool install but `uv` is not on PATH; install uv and retry.")
+            sys.exit(1)
+        cmd = [uv, "tool", "upgrade", "hermes-agent"]
+    elif pipx_managed and pipx:
+        # pipx owns its own venv; ``pipx upgrade`` is the only correct path.
+        # Matches scripts/auto-update.sh, which already uses pipx upgrade.
+        cmd = [pipx, "upgrade", "hermes-agent"]
+    elif uv:
        cmd = [uv, "pip", "install", "--upgrade", "hermes-agent"]
+        if in_venv:
+            # Launcher shim runs the venv interpreter but doesn't export
+            # VIRTUAL_ENV; without it uv errors "No virtual environment found".
+            export_virtualenv = True
+        else:
+            # Outside any venv, ``--system`` lets uv target the active
+            # interpreter, matching pip's default behaviour.
+            cmd.insert(3, "--system")
    else:
        cmd = [sys.executable, "-m", "pip", "install", "--upgrade", "hermes-agent"]

    print(f"→ Running: {' '.join(cmd)}")
-    result = subprocess.run(cmd)
+    run_kwargs = {}
+    if export_virtualenv:
+        run_kwargs["env"] = {**os.environ, "VIRTUAL_ENV": sys.prefix}
+    result = subprocess.run(cmd, **run_kwargs)
    if result.returncode != 0:
        print("✗ Update failed")
        sys.exit(1)
@ -11157,6 +11307,13 @@ def cmd_completion(args, parser=None):
        print(generate_bash(parser))


+def cmd_prompt_size(args):
+    """Show a byte/char breakdown of the system prompt + tool schemas."""
+    from hermes_cli.prompt_size import cmd_prompt_size as _impl
+
+    _impl(args)
+
+
 def cmd_logs(args):
    """View and filter Hermes log files."""
    from hermes_cli.logs import tail_log, list_logs
@ -11193,6 +11350,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
        "dump", "fallback", "gateway", "hooks", "import", "insights",
        "gui", "desktop", "kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
        "model", "pairing", "plugins", "portal", "postinstall", "profile", "proxy",
+        "prompt-size",
        "send", "sessions", "setup",
        "skills", "slack", "status", "tools", "uninstall", "update",
        "version", "webhook", "whatsapp", "chat", "secrets", "security",
@ -11293,6 +11451,26 @@ _AGENT_SUBCOMMANDS = {
 }


+def _is_tui_chat_launch(args) -> bool:
+    return bool(getattr(args, "tui", False) or os.environ.get("HERMES_TUI") == "1")
+
+
+def _command_has_dedicated_mcp_startup(args) -> bool:
+    if args.command == "acp":
+        return True
+    if args.command == "gateway" and getattr(args, "gateway_command", None) == "run":
+        return True
+    if args.command == "cron" and getattr(args, "cron_command", None) in {"run", "tick"}:
+        return True
+    return False
+
+
+def _should_background_mcp_startup(args) -> bool:
+    if _is_tui_chat_launch(args):
+        return False
+    return args.command in {None, "chat", "rl"}
+
+
 def _prepare_agent_startup(args) -> None:
    """Discover plugins/MCP/hooks for commands that can run an agent turn."""
    _sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
@ -11312,19 +11490,42 @@ def _prepare_agent_startup(args) -> None:
            "plugin discovery failed at CLI startup",
            exc_info=True,
        )
-    try:
-        # MCP tool discovery — no event loop running in CLI/TUI startup,
-        # so inline is safe.  Moved here from model_tools.py module scope
-        # to avoid freezing the gateway's event loop on its first message
-        # via the same lazy import path (#16856).
-        from tools.mcp_tool import discover_mcp_tools
+    _run_inline_mcp_discovery = True
+    if _is_tui_chat_launch(args):
+        # The TUI launcher hands off to a dedicated startup path that already
+        # backgrounds MCP discovery with a bounded join before the first tool
+        # snapshot.
+        _run_inline_mcp_discovery = False
+    elif _command_has_dedicated_mcp_startup(args):
+        # These entrypoints already do their own MCP startup later on the real
+        # runtime path (gateway executor, ACP launcher, cron job runner).
+        _run_inline_mcp_discovery = False
+    elif _should_background_mcp_startup(args):
+        try:
+            from hermes_cli.mcp_startup import start_background_mcp_discovery

-        discover_mcp_tools()
-    except Exception:
-        logger.debug(
-            "MCP tool discovery failed at CLI startup",
-            exc_info=True,
-        )
+            start_background_mcp_discovery(
+                logger=logger,
+                thread_name="cli-mcp-discovery",
+            )
+        except Exception:
+            logger.debug(
+                "Background MCP tool discovery failed at CLI startup",
+                exc_info=True,
+            )
+        _run_inline_mcp_discovery = False
+    if _run_inline_mcp_discovery:
+        try:
+            # MCP tool discovery remains synchronous for entrypoints that do
+            # not own a later bounded/executor startup path.
+            from tools.mcp_tool import discover_mcp_tools
+
+            discover_mcp_tools()
+        except Exception:
+            logger.debug(
+                "MCP tool discovery failed at CLI startup",
+                exc_info=True,
+            )
    try:
        from hermes_cli.config import load_config
        from agent.shell_hooks import register_from_config
@ -11465,6 +11666,10 @@ def _try_termux_fast_tui_launch() -> bool:

 def main():
    """Main entry point for hermes CLI."""
+    # Cosmetic: make the process show up as 'hermes' instead of 'python3.11'
+    # in ps/top/htop.  Non-fatal — just a nicer UX.
+    _set_process_title()
+
    # Force UTF-8 stdio on Windows before anything prints.  No-op elsewhere.
    try:
        from hermes_cli.stdio import configure_windows_stdio
@ -13218,9 +13423,15 @@ Examples:
        ),
    )
    memory_sub = memory_parser.add_subparsers(dest="memory_command")
-    memory_sub.add_parser(
+    _setup_parser = memory_sub.add_parser(
        "setup", help="Interactive provider selection and configuration"
    )
+    _setup_parser.add_argument(
+        "provider",
+        nargs="?",
+        default=None,
+        help="Provider to configure directly (e.g. honcho), skipping the picker",
+    )
    memory_sub.add_parser("status", help="Show current memory provider config")
    memory_sub.add_parser("off", help="Disable external provider (built-in only)")
    _reset_parser = memory_sub.add_parser(
@ -14471,6 +14682,30 @@ Examples:
    )
    logs_parser.set_defaults(func=cmd_logs)

+    # =========================================================================
+    # prompt-size command
+    # =========================================================================
+    prompt_size_parser = subparsers.add_parser(
+        "prompt-size",
+        help="Show a byte breakdown of the system prompt + tool schemas",
+        description=(
+            "Report the fixed prompt budget for a fresh session: system "
+            "prompt total, skills index, memory, user profile, and tool-schema "
+            "JSON. Runs offline (no API call)."
+        ),
+    )
+    prompt_size_parser.add_argument(
+        "--platform",
+        default="cli",
+        help="Platform to simulate (cli, telegram, discord, ...). Default: cli",
+    )
+    prompt_size_parser.add_argument(
+        "--json",
+        action="store_true",
+        help="Emit the breakdown as JSON",
+    )
+    prompt_size_parser.set_defaults(func=cmd_prompt_size)
+
    # =========================================================================
    # Parse and execute
    # =========================================================================
--- a/hermes_cli/mcp_startup.py
+++ b/hermes_cli/mcp_startup.py
@ -0,0 +1,59 @@
+"""Shared CLI/TUI-safe helpers for background MCP discovery."""
+
+from __future__ import annotations
+
+import threading
+from typing import Optional
+
+_mcp_discovery_lock = threading.Lock()
+_mcp_discovery_started = False
+_mcp_discovery_thread: Optional[threading.Thread] = None
+
+
+def _has_configured_mcp_servers() -> bool:
+    """Cheap config probe so non-MCP users avoid importing the MCP stack."""
+    try:
+        from hermes_cli.config import read_raw_config
+
+        mcp_servers = (read_raw_config() or {}).get("mcp_servers")
+        return isinstance(mcp_servers, dict) and len(mcp_servers) > 0
+    except Exception:
+        # Be conservative: if config probing fails, try discovery in the
+        # background so startup still can't block.
+        return True
+
+
+def start_background_mcp_discovery(*, logger, thread_name: str) -> None:
+    """Spawn one shared background MCP discovery thread for this process."""
+    global _mcp_discovery_started, _mcp_discovery_thread
+
+    with _mcp_discovery_lock:
+        if _mcp_discovery_started:
+            return
+        _mcp_discovery_started = True
+        if not _has_configured_mcp_servers():
+            return
+
+        def _discover() -> None:
+            try:
+                from tools.mcp_tool import discover_mcp_tools
+
+                discover_mcp_tools()
+            except Exception:
+                logger.debug("Background MCP tool discovery failed", exc_info=True)
+
+        thread = threading.Thread(
+            target=_discover,
+            name=thread_name,
+            daemon=True,
+        )
+        _mcp_discovery_thread = thread
+        thread.start()
+
+
+def wait_for_mcp_discovery(timeout: float = 0.75) -> None:
+    """Briefly wait for background MCP discovery before the first tool snapshot."""
+    thread = _mcp_discovery_thread
+    if thread is None or not thread.is_alive():
+        return
+    thread.join(timeout=timeout)
--- a/hermes_cli/memory_setup.py
+++ b/hermes_cli/memory_setup.py
@ -452,7 +452,11 @@ def memory_command(args) -> None:
    """Route memory subcommands."""
    sub = getattr(args, "memory_command", None)
    if sub == "setup":
-        cmd_setup(args)
+        provider = getattr(args, "provider", None)
+        if provider:
+            cmd_setup_provider(provider)
+        else:
+            cmd_setup(args)
    elif sub == "status":
        cmd_status(args)
    else:
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -936,6 +936,105 @@ _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
 _PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider


+# ---------------------------------------------------------------------------
+# Provider groups — DISPLAY ONLY
+#
+# Some vendors expose several Hermes provider slugs (one per endpoint /
+# auth method: global API, China API, OAuth coding plan, ...). Listing every
+# slug as a top-level row in the interactive `hermes model` / setup wizard /
+# Telegram `/model` pickers makes that list long and noisy.
+#
+# These groups fold related slugs under one top-level row in INTERACTIVE
+# PICKERS only. They do NOT change ``CANONICAL_PROVIDERS``, slug identity,
+# the ``--provider`` flag, ``/model <provider:model>``, or any typed path —
+# every member slug remains individually addressable. Grouping is a pure
+# display affordance; ``group_providers()`` is the single fold used by all
+# three picker surfaces so they stay consistent.
+#
+#   group_id -> (display_label, [member_slug, ...])
+#
+# Member order is the order shown inside the group submenu.
+# ---------------------------------------------------------------------------
+PROVIDER_GROUPS: dict[str, tuple[str, list[str]]] = {
+    "kimi":     ("Kimi / Moonshot", ["kimi-coding", "kimi-coding-cn"]),
+    "minimax":  ("MiniMax",         ["minimax", "minimax-oauth", "minimax-cn"]),
+    "xai":      ("xAI Grok",        ["xai", "xai-oauth"]),
+    "google":   ("Google Gemini",   ["gemini", "google-gemini-cli"]),
+    "openai":   ("OpenAI",          ["openai-codex", "openai-api"]),
+    "opencode": ("OpenCode",        ["opencode-zen", "opencode-go"]),
+    "copilot":  ("GitHub Copilot",  ["copilot", "copilot-acp"]),
+}
+
+# Reverse index: member slug -> group_id. Built once at import.
+_SLUG_TO_GROUP: dict[str, str] = {
+    slug: gid for gid, (_label, members) in PROVIDER_GROUPS.items() for slug in members
+}
+
+
+def provider_group_for_slug(slug: str) -> str:
+    """Return the group_id a provider slug belongs to, or "" if ungrouped."""
+    return _SLUG_TO_GROUP.get(str(slug or "").strip().lower(), "")
+
+
+def group_providers(slugs):
+    """Fold a flat ordered slug iterable into picker rows by provider group.
+
+    DISPLAY ONLY. Used by every interactive picker (``hermes model``, the
+    setup wizard, the Telegram ``/model`` keyboard) so grouping is identical
+    across surfaces.
+
+    Each returned row is a dict::
+
+        {"kind": "single", "slug": <slug>}                       # ungrouped, or
+                                                                  # 1-member group
+        {"kind": "group", "group_id": <gid>, "label": <label>,
+         "members": [<slug>, ...]}                                # 2+ members
+
+    Rules:
+      * A group row appears at the position of its FIRST present member, in
+        the input order. Subsequent members fold into that row (and are not
+        emitted again).
+      * Member order inside a group follows ``PROVIDER_GROUPS`` declaration,
+        restricted to the members actually present in ``slugs``.
+      * A group reduced to a single present member degrades to a ``single``
+        row — no pointless one-item submenu.
+      * Slugs not in any group pass through as ``single`` rows, order
+        preserved.
+      * Duplicate slugs in the input are ignored after first sight.
+    """
+    seen: set[str] = set()
+    # Which present members each group has, in declaration order.
+    group_members: dict[str, list[str]] = {}
+    for gid, (_label, members) in PROVIDER_GROUPS.items():
+        present = [m for m in members if m in set(slugs)]
+        if present:
+            group_members[gid] = present
+
+    rows = []
+    emitted_groups: set[str] = set()
+    for slug in slugs:
+        s = str(slug or "").strip().lower()
+        if not s or s in seen:
+            continue
+        seen.add(s)
+        gid = _SLUG_TO_GROUP.get(s, "")
+        if not gid:
+            rows.append({"kind": "single", "slug": s})
+            continue
+        if gid in emitted_groups:
+            continue  # already folded at the first member's position
+        emitted_groups.add(gid)
+        members = group_members.get(gid, [s])
+        if len(members) <= 1:
+            rows.append({"kind": "single", "slug": members[0]})
+        else:
+            label, _ = PROVIDER_GROUPS[gid]
+            rows.append(
+                {"kind": "group", "group_id": gid, "label": label, "members": list(members)}
+            )
+    return rows
+
+
 _PROVIDER_ALIASES = {
    "glm": "zai",
    "z-ai": "zai",
--- a/hermes_cli/nous_account.py
+++ b/hermes_cli/nous_account.py
@ -4,6 +4,7 @@ from __future__ import annotations

 import hashlib
 import json
+import threading
 import time
 import urllib.request
 from dataclasses import dataclass
@ -15,6 +16,7 @@ NousAccountInfoSource = Literal["jwt", "account_api", "inference_key", "none", "

 _ACCOUNT_INFO_CACHE_TTL = 60
 _account_info_cache: tuple[str, float, "NousPortalAccountInfo"] | None = None
+_ACCOUNT_INFO_CACHE_LOCK = threading.Lock()


@dataclass(frozen=True)
@ -302,10 +304,11 @@ def _fresh_account_info(
        portal_base_url = _portal_base_url(refreshed_state) or portal_base_url
        cache_key = _cache_key(access_token, portal_base_url)

-        if not force_fresh and _account_info_cache is not None:
-            cached_key, cached_at, cached_info = _account_info_cache
-            if cached_key == cache_key and (time.monotonic() - cached_at) < _ACCOUNT_INFO_CACHE_TTL:
-                return cached_info
+        with _ACCOUNT_INFO_CACHE_LOCK:
+            if not force_fresh and _account_info_cache is not None:
+                cached_key, cached_at, cached_info = _account_info_cache
+                if cached_key == cache_key and (time.monotonic() - cached_at) < _ACCOUNT_INFO_CACHE_TTL:
+                    return cached_info

        payload = _fetch_nous_account_info(access_token, portal_base_url)
        if not payload:
@ -327,7 +330,8 @@ def _fresh_account_info(
            state=refreshed_state,
            portal_base_url=portal_base_url,
        )
-        _account_info_cache = (cache_key, time.monotonic(), info)
+        with _ACCOUNT_INFO_CACHE_LOCK:
+            _account_info_cache = (cache_key, time.monotonic(), info)
        return info
    except Exception as exc:
        return _error_info(
--- a/hermes_cli/nous_subscription.py
+++ b/hermes_cli/nous_subscription.py
@ -587,9 +587,20 @@ def apply_nous_managed_defaults(
        changed.add("browser")

    if "image_gen" in selected_toolsets and not fal_key_is_configured():
+        image_cfg = config.get("image_gen")
+        if not isinstance(image_cfg, dict):
+            image_cfg = {}
+            config["image_gen"] = image_cfg
+        image_cfg["use_gateway"] = True
        changed.add("image_gen")

    if "video_gen" in selected_toolsets and not fal_key_is_configured():
+        video_cfg = config.get("video_gen")
+        if not isinstance(video_cfg, dict):
+            video_cfg = {}
+            config["video_gen"] = video_cfg
+        video_cfg["provider"] = "fal"
+        video_cfg["use_gateway"] = True
        changed.add("video_gen")

    return changed
--- a/hermes_cli/oneshot.py
+++ b/hermes_cli/oneshot.py
@ -174,28 +174,55 @@ def run_oneshot(
    # Redirect stderr AND stdout to devnull for the entire call tree.
    # We'll print the final response to the real stdout at the end.
    real_stdout = sys.stdout
+    real_stderr = sys.stderr
    devnull = open(os.devnull, "w", encoding="utf-8")

+    response: Optional[str] = None
+    failure: BaseException | None = None
    try:
        with redirect_stdout(devnull), redirect_stderr(devnull):
-            response = _run_agent(
-                prompt,
-                model=model,
-                provider=provider,
-                toolsets=explicit_toolsets,
-                use_config_toolsets=use_config_toolsets,
-            )
+            try:
+                response = _run_agent(
+                    prompt,
+                    model=model,
+                    provider=provider,
+                    toolsets=explicit_toolsets,
+                    use_config_toolsets=use_config_toolsets,
+                )
+            except BaseException as exc:  # noqa: BLE001
+                # Capture anything that escapes the agent (including OSError
+                # from prompt_toolkit/Vt100 when stdout is a non-TTY pipe,
+                # KeyboardInterrupt, SystemExit, etc.) so we can surface it on
+                # the real stderr instead of crashing past the redirect with a
+                # traceback that the caller never sees. A silent exit in a
+                # cron / SSH / subprocess context is the worst failure mode.
+                # See #30623.
+                failure = exc
    finally:
        try:
            devnull.close()
        except Exception:
            pass

-    if response:
-        real_stdout.write(response)
-        if not response.endswith("\n"):
-            real_stdout.write("\n")
-        real_stdout.flush()
+    if failure is not None:
+        # Re-raise control-flow exceptions so the parent handles them as usual
+        # (Ctrl-C / explicit sys.exit() inside the agent).
+        if isinstance(failure, (KeyboardInterrupt, SystemExit)):
+            raise failure
+        real_stderr.write(f"hermes -z: agent failed: {failure}\n")
+        real_stderr.flush()
+        return 1
+
+    if not (response or "").strip():
+        real_stderr.write("hermes -z: no final response was produced; treating the run as failed.\n")
+        real_stderr.flush()
+        return 1
+
+    assert response is not None  # narrowed by the empty-response guard above
+    real_stdout.write(response)
+    if not response.endswith("\n"):
+        real_stdout.write("\n")
+    real_stdout.flush()
    return 0


--- a/hermes_cli/profiles.py
+++ b/hermes_cli/profiles.py
@ -1471,8 +1471,9 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:

 def _migrate_honcho_profile_host(old_name: str, new_name: str, new_dir: Path) -> None:
    """Rename Honcho host blocks for a renamed profile without changing peers."""
-    old_host = f"hermes.{old_name}"
-    new_host = f"hermes.{new_name}"
+    old_host = f"hermes_{old_name}"
+    legacy_old_host = f"hermes.{old_name}"
+    new_host = f"hermes_{new_name}"

    candidates = [
        new_dir / "honcho.json",
@ -1496,18 +1497,24 @@ def _migrate_honcho_profile_host(old_name: str, new_name: str, new_dir: Path) ->
            continue

        hosts = raw.get("hosts")
-        if not isinstance(hosts, dict) or old_host not in hosts:
+        if not isinstance(hosts, dict):
+            continue
+        source_host = old_host if old_host in hosts else legacy_old_host
+        if source_host not in hosts:
            continue

        if new_host in hosts:
            print(f"⚠ Honcho host block not migrated: {new_host} already exists in {path}")
            continue

-        block = hosts[old_host]
+        block = hosts[source_host]
        if isinstance(block, dict) and "aiPeer" not in block:
-            bare = old_host.split(".", 1)[1] if "." in old_host else old_host
+            if source_host.startswith("hermes_"):
+                bare = source_host.split("_", 1)[1]
+            else:
+                bare = source_host.split(".", 1)[1] if "." in source_host else source_host
            block["aiPeer"] = bare
-        hosts[new_host] = hosts.pop(old_host)
+        hosts[new_host] = hosts.pop(source_host)
        tmp = path.with_suffix(path.suffix + ".tmp")
        try:
            tmp.write_text(json.dumps(raw, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
@ -1519,7 +1526,7 @@ def _migrate_honcho_profile_host(old_name: str, new_name: str, new_dir: Path) ->
                pass
            continue

-        print(f"✓ Honcho host updated: {old_host} → {new_host}")
+        print(f"✓ Honcho host updated: {source_host} → {new_host}")


 def rename_profile(old_name: str, new_name: str) -> Path:
--- a/hermes_cli/prompt_size.py
+++ b/hermes_cli/prompt_size.py
@ -0,0 +1,153 @@
+"""Prompt-size diagnostic: ``hermes prompt-size``.
+
+Reports a byte/char breakdown of the system prompt the agent would build for
+a fresh session — system prompt total, the ``<available_skills>`` index,
+memory + user profile, and tool-schema JSON. Lets users see where their fixed
+prompt budget goes (issue #34667) without parsing a saved session JSON by hand.
+
+The diagnostic builds a real inspection agent (so the numbers match what
+actually ships on the wire) but never makes a network call: it passes dummy
+credentials so ``AIAgent.__init__`` takes the direct-construction path, then
+calls ``build_system_prompt_parts`` / inspects ``agent.tools`` offline.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from typing import Any, Dict, List, Tuple
+
+# The skills index is wrapped in this tag pair inside the stable tier.
+_SKILLS_BLOCK_RE = re.compile(r"<available_skills>.*?</available_skills>", re.DOTALL)
+
+
+def _bytes(s: str) -> int:
+    return len(s.encode("utf-8"))
+
+
+def _build_inspection_agent(platform: str) -> Any:
+    """Construct an offline AIAgent for prompt inspection.
+
+    Dummy ``api_key`` + ``base_url`` force the direct-construction path in
+    ``run_agent.py`` (no provider auto-detection, no network). Toolsets and
+    platform come from the caller so the breakdown matches a real session.
+    """
+    from run_agent import AIAgent
+    from hermes_cli.config import load_config
+
+    cfg = load_config()
+    model_cfg = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {}
+    model = model_cfg.get("default") or model_cfg.get("model") or ""
+
+    return AIAgent(
+        model=model,
+        api_key="inspect-only",
+        base_url="https://openrouter.ai/api/v1",
+        quiet_mode=True,
+        save_trajectories=False,
+        platform=platform,
+    )
+
+
+def compute_prompt_breakdown(platform: str = "cli") -> Dict[str, Any]:
+    """Return a dict of prompt-size measurements for a fresh session.
+
+    Keys: ``system_prompt`` (chars/bytes), ``skills_index``, ``memory``,
+    ``user_profile``, ``tools`` (count + json bytes), and ``sections`` (a list
+    of (label, chars, bytes) for the three prompt tiers).
+    """
+    from agent.system_prompt import build_system_prompt, build_system_prompt_parts
+
+    agent = _build_inspection_agent(platform)
+
+    parts = build_system_prompt_parts(agent)
+    full = build_system_prompt(agent)
+
+    stable = parts.get("stable", "")
+    context = parts.get("context", "")
+    volatile = parts.get("volatile", "")
+
+    # Skills index — the <available_skills> block (the largest single block
+    # when many skills are installed). Measured inside the stable tier.
+    skills_match = _SKILLS_BLOCK_RE.search(stable)
+    skills_index = skills_match.group(0) if skills_match else ""
+
+    # Memory + user profile live in the volatile tier. We re-derive their
+    # blocks directly from the memory store so the numbers are attributable
+    # even though they're joined into ``volatile``.
+    memory_block = ""
+    user_block = ""
+    store = getattr(agent, "_memory_store", None)
+    if store is not None:
+        try:
+            if getattr(agent, "_memory_enabled", True):
+                memory_block = store.format_for_system_prompt("memory") or ""
+            if getattr(agent, "_user_profile_enabled", True):
+                user_block = store.format_for_system_prompt("user") or ""
+        except Exception:
+            pass
+
+    # Tool-schema JSON — the other half of the fixed per-call payload.
+    tools = getattr(agent, "tools", None) or []
+    tools_json = json.dumps(tools, ensure_ascii=False)
+
+    sections: List[Tuple[str, int, int]] = [
+        ("stable (identity/guidance/skills)", len(stable), _bytes(stable)),
+        ("context (AGENTS.md/cwd files)", len(context), _bytes(context)),
+        ("volatile (memory/profile/timestamp)", len(volatile), _bytes(volatile)),
+    ]
+
+    return {
+        "platform": platform,
+        "model": getattr(agent, "model", "") or "",
+        "system_prompt": {"chars": len(full), "bytes": _bytes(full)},
+        "skills_index": {"chars": len(skills_index), "bytes": _bytes(skills_index)},
+        "memory": {"chars": len(memory_block), "bytes": _bytes(memory_block)},
+        "user_profile": {"chars": len(user_block), "bytes": _bytes(user_block)},
+        "tools": {"count": len(tools), "json_bytes": _bytes(tools_json)},
+        "sections": sections,
+    }
+
+
+def _fmt_kb(n: int) -> str:
+    return f"{n / 1024:.1f} KB"
+
+
+def render_breakdown(data: Dict[str, Any]) -> str:
+    """Render the breakdown as plain text suitable for a terminal."""
+    lines: List[str] = []
+    sp = data["system_prompt"]
+    lines.append(f"Prompt-size breakdown (platform={data['platform']}, model={data['model'] or 'unset'})")
+    lines.append("")
+    lines.append(f"  System prompt total : {sp['bytes']:>8,} B  ({_fmt_kb(sp['bytes'])}, {sp['chars']:,} chars)")
+    lines.append("")
+    lines.append("  Major blocks:")
+    si = data["skills_index"]
+    mem = data["memory"]
+    up = data["user_profile"]
+    lines.append(f"    skills index       : {si['bytes']:>8,} B  ({_fmt_kb(si['bytes'])})")
+    lines.append(f"    memory             : {mem['bytes']:>8,} B  ({_fmt_kb(mem['bytes'])})")
+    lines.append(f"    user profile       : {up['bytes']:>8,} B  ({_fmt_kb(up['bytes'])})")
+    lines.append("")
+    lines.append("  Prompt tiers:")
+    for label, chars, byts in data["sections"]:
+        lines.append(f"    {label:<36}: {byts:>8,} B  ({_fmt_kb(byts)})")
+    lines.append("")
+    tools = data["tools"]
+    lines.append(f"  Tool schemas         : {tools['json_bytes']:>8,} B  ({_fmt_kb(tools['json_bytes'])}, {tools['count']} tools)")
+    return "\n".join(lines)
+
+
+def cmd_prompt_size(args: Any) -> None:
+    """Entry point for ``hermes prompt-size``."""
+    platform = getattr(args, "platform", "cli") or "cli"
+    as_json = getattr(args, "json", False)
+    try:
+        data = compute_prompt_breakdown(platform)
+    except Exception as e:
+        print(f"Could not compute prompt-size breakdown: {e}")
+        return
+    if as_json:
+        print(json.dumps(data, ensure_ascii=False, indent=2))
+    else:
+        print(render_breakdown(data))
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -4168,10 +4168,19 @@ _LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})
 def _ws_client_is_allowed(ws: "WebSocket") -> bool:
    """Check if the WebSocket client IP is acceptable.

-    Loopback mode: only loopback clients allowed — the legacy
+    Loopback bind: only loopback clients allowed — the legacy
    ``?token=<_SESSION_TOKEN>`` path is the only auth we have, so we
    don't want LAN hosts guessing tokens.

+    Explicit non-loopback bind (``--host 0.0.0.0``, ``--host ::``, or a
+    specific address such as a Tailscale/LAN IP, always with
+    ``--insecure``): allow any peer. The operator explicitly opted into
+    non-loopback exposure, so the loopback-only peer restriction does not
+    apply. DNS-rebinding is still blocked by the Host/Origin guard in
+    :func:`_ws_host_origin_is_allowed`, which mirrors the HTTP layer and
+    requires the Host header to match the bound interface — the same
+    defence ``_is_accepted_host`` applies to non-loopback HTTP requests.
+
    Gated mode: any peer is allowed — uvicorn's ``proxy_headers=True``
    (enabled when the OAuth gate is active so cookies can pick up
    ``X-Forwarded-Proto``) rewrites ``ws.client.host`` to the
@ -4182,6 +4191,14 @@ def _ws_client_is_allowed(ws: "WebSocket") -> bool:
    """
    if getattr(app.state, "auth_required", False):
        return True
+    # Any explicit non-loopback bind (0.0.0.0, ::, or a specific LAN /
+    # Tailscale address) means the operator opted into non-loopback
+    # access via --insecure.  The loopback-only peer gate only applies to
+    # an actual loopback bind; otherwise the WS handshake is rejected even
+    # though same-bind HTTP requests pass _is_accepted_host.
+    bound_host = (getattr(app.state, "bound_host", "") or "").strip().lower()
+    if bound_host and bound_host not in _LOOPBACK_HOSTS:
+        return True
    client_host = ws.client.host if ws.client else ""
    if not client_host:
        return True
--- a/hermes_state.py
+++ b/hermes_state.py
@ -381,6 +381,7 @@ class SessionDB:

        self._lock = threading.Lock()
        self._write_count = 0
+        self._fts_enabled = False
        try:
            self._conn = sqlite3.connect(
                str(self.db_path),
@ -389,7 +390,6 @@ class SessionDB:
                # handles contention instead of sitting in SQLite's internal
                # busy handler for up to 30s.
                timeout=1.0,
-                # Autocommit mode: Python's default isolation_level=""
                # auto-starts transactions on DML, which conflicts with our
                # explicit BEGIN IMMEDIATE.  None = we manage transactions
                # ourselves.
@ -725,14 +725,44 @@ class SessionDB:
        # FTS5 setup (separate because CREATE VIRTUAL TABLE can't be in executescript with IF NOT EXISTS reliably)
        try:
            cursor.execute("SELECT * FROM messages_fts LIMIT 0")
-        except sqlite3.OperationalError:
-            cursor.executescript(FTS_SQL)
+            self._fts_enabled = True
+        except sqlite3.OperationalError as exc:
+            if "no such table" not in str(exc).lower():
+                raise
+            try:
+                cursor.executescript(FTS_SQL)
+                self._fts_enabled = True
+            except sqlite3.OperationalError as fts_exc:
+                err = str(fts_exc).lower()
+                if "fts5" not in err and "no such module" not in err:
+                    raise
+                logger.warning(
+                    "SQLite FTS5 unavailable for %s; full-text session search "
+                    "disabled. This usually means Hermes is running on an "
+                    "unsupported install (e.g. a pip-installed or pip-managed "
+                    "Python whose bundled SQLite lacks FTS5) rather than a "
+                    "mainline install. Some features may be missing or behave "
+                    "differently. Install the supported way: "
+                    "https://hermes-agent.nousresearch.com (underlying error: %s)",
+                    self.db_path,
+                    fts_exc,
+                )

        # Trigram FTS5 for CJK/substring search
        try:
            cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
-        except sqlite3.OperationalError:
-            cursor.executescript(FTS_TRIGRAM_SQL)
+        except sqlite3.OperationalError as exc:
+            if "no such table" not in str(exc).lower():
+                raise
+            try:
+                cursor.executescript(FTS_TRIGRAM_SQL)
+            except sqlite3.OperationalError as fts_exc:
+                err = str(fts_exc).lower()
+                if "fts5" not in err and "no such module" not in err:
+                    raise
+                # Same FTS5-unavailable cause already warned about above for
+                # messages_fts; the trigram table is an additional CJK index,
+                # so just degrade silently here. CJK search falls back to LIKE.

        self._conn.commit()

@ -947,6 +977,20 @@ class SessionDB:
            )
        self._execute_write(_do)

+    def update_session_model(self, session_id: str, model: str) -> None:
+        """Update the model for a session after a mid-session switch.
+
+        Unlike ``update_token_counts`` which uses ``COALESCE(model, ?)``
+        (only filling in NULL), this unconditionally sets the model column
+        so that the dashboard reflects the user's latest /model choice.
+        """
+        def _do(conn):
+            conn.execute(
+                "UPDATE sessions SET model = ? WHERE id = ?",
+                (model, session_id),
+            )
+        self._execute_write(_do)
+
    def update_token_counts(
        self,
        session_id: str,
@ -2333,6 +2377,9 @@ class SessionDB:
        ignores ``sort``. The trigram CJK path honours ``sort`` like the main
        FTS5 path.
        """
+        if not self._fts_enabled:
+            return []
+
        if not query or not query.strip():
            return []

--- a/locales/af.yaml
+++ b/locales/af.yaml
@ -255,7 +255,7 @@ gateway:
    title:                 "**Titel:** {title}"
    created:               "**Geskep:** {timestamp}"
    last_activity:         "**Laaste aktiwiteit:** {timestamp}"
-    tokens:                "**Tokens:** {tokens}"
+    tokens:                "**Kumulatiewe API-tokens (elke oproep weer gestuur):** {tokens}"
    agent_running:         "**Agent loop:** {state}"
    state_yes:             "Ja ⚡"
    state_no:              "Nee"
--- a/locales/de.yaml
+++ b/locales/de.yaml
@ -255,7 +255,7 @@ gateway:
    title:                 "**Titel:** {title}"
    created:               "**Erstellt:** {timestamp}"
    last_activity:         "**Letzte Aktivität:** {timestamp}"
-    tokens:                "**Tokens:** {tokens}"
+    tokens:                "**Kumulierte API-Tokens (bei jedem Aufruf erneut gesendet):** {tokens}"
    agent_running:         "**Agent läuft:** {state}"
    state_yes:             "Ja ⚡"
    state_no:              "Nein"
--- a/locales/en.yaml
+++ b/locales/en.yaml
@ -270,7 +270,7 @@ gateway:
    title:                 "**Title:** {title}"
    created:               "**Created:** {timestamp}"
    last_activity:         "**Last Activity:** {timestamp}"
-    tokens:                "**Tokens:** {tokens}"
+    tokens:                "**Cumulative API tokens (re-sent each call):** {tokens}"
    agent_running:         "**Agent Running:** {state}"
    state_yes:             "Yes ⚡"
    state_no:              "No"
--- a/locales/es.yaml
+++ b/locales/es.yaml
@ -255,7 +255,7 @@ gateway:
    title:                 "**Título:** {title}"
    created:               "**Creado:** {timestamp}"
    last_activity:         "**Última actividad:** {timestamp}"
-    tokens:                "**Tokens:** {tokens}"
+    tokens:                "**Tokens de API acumulados (reenviados en cada llamada):** {tokens}"
    agent_running:         "**Agente activo:** {state}"
    state_yes:             "Sí ⚡"
    state_no:              "No"
--- a/locales/pt.yaml
+++ b/locales/pt.yaml
@ -255,7 +255,7 @@ gateway:
    title:                 "**Título:** {title}"
    created:               "**Criada:** {timestamp}"
    last_activity:         "**Última atividade:** {timestamp}"
-    tokens:                "**Tokens:** {tokens}"
+    tokens:                "**Tokens de API cumulativos (reenviados a cada chamada):** {tokens}"
    agent_running:         "**Agente em execução:** {state}"
    state_yes:             "Sim ⚡"
    state_no:              "Não"
--- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md
+++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
@ -32,14 +32,14 @@ Honcho provides AI-native cross-session user modeling. It learns who the user is
 ### Cloud (app.honcho.dev)

 ```bash
-hermes honcho setup
+hermes memory setup honcho
 # select "cloud", paste API key from https://app.honcho.dev
 ```

 ### Self-hosted

 ```bash
-hermes honcho setup
+hermes memory setup honcho
 # select "local", enter base URL (e.g. http://localhost:8000)
 ```

--- a/plugins/browser/browser_use/provider.py
+++ b/plugins/browser/browser_use/provider.py
@ -119,17 +119,20 @@ class BrowserUseBrowserProvider(BrowserProvider):
        return "Browser Use"

    def is_available(self) -> bool:
-        return self._get_config_or_none() is not None
+        return self._get_config_or_none(refresh_token=False) is not None

    # ------------------------------------------------------------------
    # Config resolution (direct API key OR managed Nous gateway)
    # ------------------------------------------------------------------

-    def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
+    def _get_config_or_none(self, *, refresh_token: bool = True) -> Optional[Dict[str, Any]]:
        # Import here to avoid a hard dependency at module-import time —
        # managed_tool_gateway pulls in the Nous auth stack which can be
        # heavy and is not needed for direct-API-key users.
-        from tools.managed_tool_gateway import resolve_managed_tool_gateway
+        from tools.managed_tool_gateway import (
+            peek_nous_access_token,
+            resolve_managed_tool_gateway,
+        )
        from tools.tool_backend_helpers import prefers_gateway

        # Direct API key wins unless the user has explicitly opted into the
@ -142,7 +145,11 @@ class BrowserUseBrowserProvider(BrowserProvider):
                "managed_mode": False,
            }

-        managed = resolve_managed_tool_gateway("browser-use")
+        # Keep availability scans off the synchronous OAuth refresh path.
+        managed = resolve_managed_tool_gateway(
+            "browser-use",
+            token_reader=None if refresh_token else peek_nous_access_token,
+        )
        if managed is None:
            return None

--- a/plugins/kanban/dashboard/dist/index.js
+++ b/plugins/kanban/dashboard/dist/index.js
@ -2741,6 +2741,8 @@
    // Ready/Block/Complete buttons feel like no-ops.  See #26744.
    const [patchErr, setPatchErr] = useState(null);
    const [newComment, setNewComment] = useState("");
+    const [uploadBusy, setUploadBusy] = useState(false);
+    const [uploadErr, setUploadErr] = useState(null);
    const [editing, setEditing] = useState(false);
    // Home-channel notification toggles. homeChannels is the list of platforms
    // the user has a /sethome on; each entry has a `subscribed` bool telling
@ -2789,6 +2791,49 @@
      }).catch(function (e) { setErr(String(e.message || e)); });
    };

+    // File upload uses raw fetch (not SDK.fetchJSON, which JSON-encodes)
+    // so the browser sets the multipart boundary. Auth rides the session
+    // cookie + bearer token, matching the rest of the dashboard.
+    const handleUpload = function (fileList) {
+      const files = Array.prototype.slice.call(fileList || []);
+      if (!files.length) return;
+      setUploadBusy(true);
+      setUploadErr(null);
+      const token = window.__HERMES_SESSION_TOKEN__ || "";
+      const headers = token ? { Authorization: "Bearer " + token } : {};
+      const url = withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}/attachments`, boardSlug);
+      // Upload sequentially so a partial failure leaves a clear state.
+      let chain = Promise.resolve();
+      files.forEach(function (f) {
+        chain = chain.then(function () {
+          const fd = new FormData();
+          fd.append("file", f, f.name);
+          return fetch(url, { method: "POST", headers: headers, credentials: "same-origin", body: fd })
+            .then(function (resp) {
+              if (!resp.ok) {
+                return resp.text().then(function (txt) {
+                  throw new Error(parseApiErrorMessage(new Error(resp.status + ": " + txt)));
+                });
+              }
+            });
+        });
+      });
+      chain.then(function () {
+        load();
+        props.onRefresh();
+      }).catch(function (e) {
+        setUploadErr(String(e.message || e));
+      }).finally(function () {
+        setUploadBusy(false);
+      });
+    };
+
+    const handleDeleteAttachment = function (attachmentId) {
+      return SDK.fetchJSON(withBoard(`${API}/attachments/${attachmentId}`, boardSlug), { method: "DELETE" })
+        .then(function () { load(); props.onRefresh(); })
+        .catch(function (e) { setUploadErr(String(e.message || e)); });
+    };
+
    const doPatch = function (patch, opts) {
      if (opts && opts.confirm && !window.confirm(opts.confirm)) {
        return Promise.resolve();
@ -2946,6 +2991,10 @@
          homeBusy: homeBusy,
          onToggleHomeSub: toggleHomeSubscription,
          onRefresh: props.onRefresh,
+          onUpload: handleUpload,
+          onDeleteAttachment: handleDeleteAttachment,
+          uploadBusy: uploadBusy,
+          uploadErr: uploadErr,
        }) : null,
        data ? h("div", { className: "hermes-kanban-drawer-comment-row" },
          h(Input, {
@ -2968,11 +3017,118 @@
    );
  }

+  function _fmtBytes(n) {
+    n = Number(n) || 0;
+    if (n < 1024) return n + " B";
+    if (n < 1024 * 1024) return (n / 1024).toFixed(1) + " KB";
+    return (n / (1024 * 1024)).toFixed(1) + " MB";
+  }
+
+  // Attachments section in the task drawer (#35338). Upload button +
+  // list with download links and a delete (×) per row. The download
+  // link hits GET /attachments/:id which streams the file; the worker
+  // context surfaces the same files' absolute paths so a kanban worker
+  // can read them with the file/terminal tools.
+  function AttachmentsSection(props) {
+    const i18n = props.i18n;
+    const atts = props.attachments || [];
+    const fileRef = useRef(null);
+    const [dlErr, setDlErr] = useState(null);
+    // Download via authenticated fetch → blob → synthetic anchor click.
+    // A plain <a href> can't carry the session header/bearer the dashboard
+    // auth middleware requires in loopback mode, so fetch with the token
+    // and hand the browser a blob URL instead.
+    function downloadAttachment(a) {
+      const token = window.__HERMES_SESSION_TOKEN__ || "";
+      const headers = token ? { Authorization: "Bearer " + token } : {};
+      const url = withBoard(`${API}/attachments/${a.id}`, props.boardSlug);
+      setDlErr(null);
+      fetch(url, { headers: headers, credentials: "same-origin" })
+        .then(function (resp) {
+          if (!resp.ok) {
+            return resp.text().then(function (txt) {
+              throw new Error(parseApiErrorMessage(new Error(resp.status + ": " + txt)));
+            });
+          }
+          return resp.blob();
+        })
+        .then(function (blob) {
+          const objUrl = URL.createObjectURL(blob);
+          const link = document.createElement("a");
+          link.href = objUrl;
+          link.download = a.filename || "attachment";
+          document.body.appendChild(link);
+          link.click();
+          document.body.removeChild(link);
+          setTimeout(function () { URL.revokeObjectURL(objUrl); }, 10000);
+        })
+        .catch(function (e) { setDlErr(String(e.message || e)); });
+    }
+    return h("div", { className: "hermes-kanban-section" },
+      h("div", { className: "hermes-kanban-section-head" },
+        `${tx(i18n, "attachments", "Attachments")} (${atts.length})`),
+      h("input", {
+        ref: fileRef,
+        type: "file",
+        multiple: true,
+        style: { display: "none" },
+        onChange: function (e) {
+          if (props.onUpload) props.onUpload(e.target.files);
+          // Reset so selecting the same file again re-triggers onChange.
+          try { e.target.value = ""; } catch (_e) { /* ignore */ }
+        },
+      }),
+      h("div", { className: "flex items-center gap-2 mb-2" },
+        h(Button, {
+          size: "sm",
+          variant: "outline",
+          disabled: !!props.uploadBusy,
+          onClick: function () { if (fileRef.current) fileRef.current.click(); },
+        }, props.uploadBusy
+            ? tx(i18n, "uploading", "Uploading…")
+            : tx(i18n, "uploadFile", "Upload file")),
+      ),
+      (props.uploadErr || dlErr)
+        ? h("div", { className: "text-xs text-destructive mb-2" }, props.uploadErr || dlErr)
+        : null,
+      atts.length === 0
+        ? h("div", { className: "text-xs text-muted-foreground" },
+            tx(i18n, "noAttachments", "— no attachments —"))
+        : atts.map(function (a) {
+            return h("div", {
+              key: a.id,
+              className: "flex items-center justify-between gap-2 py-1 text-sm",
+            },
+              h("button", {
+                type: "button",
+                className: "hermes-kanban-attachment-link truncate",
+                title: a.filename,
+                onClick: function () { downloadAttachment(a); },
+              }, a.filename),
+              h("span", { className: "text-xs text-muted-foreground whitespace-nowrap" },
+                _fmtBytes(a.size)),
+              h("button", {
+                type: "button",
+                className: "hermes-kanban-drawer-close",
+                title: tx(i18n, "removeAttachment", "Remove attachment"),
+                onClick: function () {
+                  if (window.confirm(tx(i18n, "confirmRemoveAttachment",
+                      "Remove this attachment?"))) {
+                    if (props.onDelete) props.onDelete(a.id);
+                  }
+                },
+              }, "×"),
+            );
+          }),
+    );
+  }
+
  function TaskDetail(props) {
    const { t: i18n } = useI18n();
    const t = props.data.task;
    const comments = props.data.comments || [];
    const events = props.data.events || [];
+    const attachments = props.data.attachments || [];
    const links = props.data.links || { parents: [], children: [] };

    return h("div", { className: "hermes-kanban-drawer-body" },
@ -3042,6 +3198,15 @@
        h("div", { className: "hermes-kanban-section-head" }, tx(i18n, "result", "Result")),
        h(MarkdownBlock, { source: t.result, enabled: props.renderMarkdown }),
      ) : null,
+      h(AttachmentsSection, {
+        attachments: attachments,
+        boardSlug: props.boardSlug,
+        onUpload: props.onUpload,
+        onDelete: props.onDeleteAttachment,
+        uploadBusy: props.uploadBusy,
+        uploadErr: props.uploadErr,
+        i18n: i18n,
+      }),
      h("div", { className: "hermes-kanban-section" },
        h("div", { className: "hermes-kanban-section-head" },
          `${tx(i18n, "comments", "Comments")} (${comments.length})`),
--- a/plugins/kanban/dashboard/dist/style.css
+++ b/plugins/kanban/dashboard/dist/style.css
@ -334,6 +334,11 @@
 .hermes-kanban-drawer {
  width: min(var(--hermes-kanban-drawer-width, 640px), 92vw);
  height: 100vh;
+  /* Dynamic viewport unit excludes the mobile browser's collapsing chrome
+     (URL/nav bars) so the drawer's bottom row stays reachable. Falls back to
+     100vh on browsers without dvh support. */
+  height: 100dvh;
+  max-height: 100dvh;
  background: var(--color-card);
  border-left: 1px solid var(--color-border);
  display: flex;
@ -352,10 +357,23 @@
  align-items: center;
  justify-content: space-between;
  padding: 0.6rem 0.8rem;
+  /* Honor the top safe-area inset (notch) so the task id / close button are
+     not clipped on mobile. */
+  padding-top: max(0.6rem, env(safe-area-inset-top));
  border-bottom: 1px solid var(--color-border);
  font-family: var(--font-mono, ui-monospace, monospace);
 }

+/* On mobile the dashboard shell renders a fixed top bar (min-h-14, hidden at
+   the lg breakpoint). The drawer is a body-level z-60 overlay starting at the
+   viewport top, so its header would sit behind that bar. Push the header down
+   by the bar height (3.5rem) plus the top safe-area inset. */
+@media (max-width: 1023px) {
+  .hermes-kanban-drawer-head {
+    padding-top: calc(3.5rem + env(safe-area-inset-top));
+  }
+}
+
 .hermes-kanban-drawer-close {
  appearance: none;
  background: transparent;
@ -368,10 +386,33 @@
 }
 .hermes-kanban-drawer-close:hover { color: var(--color-foreground); }

+/* Attachment download trigger — styled as a link, rendered as a <button>
+   so the click handler can fetch with the session token (#35338). */
+.hermes-kanban-attachment-link {
+  appearance: none;
+  background: transparent;
+  border: 0;
+  padding: 0;
+  margin: 0;
+  text-align: left;
+  color: var(--color-primary, #6ea8fe);
+  cursor: pointer;
+  text-decoration: none;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+  flex: 1;
+}
+.hermes-kanban-attachment-link:hover { text-decoration: underline; }
+
 .hermes-kanban-drawer-body {
  flex: 1;
  overflow-y: auto;
  padding: 0.9rem;
+  /* When no comment row is rendered (loading / error states), the scrolling
+     body is the bottom-most element — extend its bottom padding past the
+     mobile browser chrome so the last content stays readable. */
+  padding-bottom: max(0.9rem, calc(0.9rem + env(safe-area-inset-bottom)));
  display: flex;
  flex-direction: column;
  gap: 0.85rem;
@ -530,6 +571,9 @@
  display: flex;
  gap: 0.4rem;
  padding: 0.55rem 0.75rem;
+  /* Keep the comment input clear of the mobile browser nav bar / home
+     indicator by extending the bottom padding with the safe-area inset. */
+  padding-bottom: max(0.55rem, calc(0.55rem + env(safe-area-inset-bottom)));
  border-top: 1px solid var(--color-border);
  background: color-mix(in srgb, var(--color-card) 90%, transparent);
 }
--- a/plugins/kanban/dashboard/plugin_api.py
+++ b/plugins/kanban/dashboard/plugin_api.py
@ -43,9 +43,11 @@ import os
 import sqlite3
 import time
 from dataclasses import asdict
+from pathlib import Path
 from typing import Any, Optional

-from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect, status as http_status
+from fastapi import APIRouter, File, Form, HTTPException, Query, UploadFile, WebSocket, WebSocketDisconnect, status as http_status
+from fastapi.responses import FileResponse
 from pydantic import BaseModel, Field

 from hermes_cli import kanban_db
@ -186,6 +188,21 @@ def _comment_dict(c: kanban_db.Comment) -> dict[str, Any]:
    }


+def _attachment_dict(a: kanban_db.Attachment) -> dict[str, Any]:
+    """Serialise an Attachment for the drawer. ``stored_path`` is the
+    absolute on-disk path workers read; the UI uses ``id`` for download."""
+    return {
+        "id": a.id,
+        "task_id": a.task_id,
+        "filename": a.filename,
+        "content_type": a.content_type,
+        "size": a.size,
+        "uploaded_by": a.uploaded_by,
+        "stored_path": a.stored_path,
+        "created_at": a.created_at,
+    }
+
+
 def _run_dict(r: kanban_db.Run) -> dict[str, Any]:
    """Serialise a Run for the drawer's Run history section."""
    return {
@ -531,6 +548,7 @@ def get_task(
            "task": task_d,
            "comments": [_comment_dict(c) for c in kanban_db.list_comments(conn, task_id)],
            "events": [_event_dict(e) for e in kanban_db.list_events(conn, task_id)],
+            "attachments": [_attachment_dict(a) for a in kanban_db.list_attachments(conn, task_id)],
            "links": _links_for(conn, task_id),
            "runs": [
                _run_dict(r)
@ -609,6 +627,165 @@ def create_task(payload: CreateTaskBody, board: Optional[str] = Query(None)):
        conn.close()


+# ---------------------------------------------------------------------------
+# Attachments — upload / list / download / delete (#35338)
+# ---------------------------------------------------------------------------
+
+# Cap a single upload so a runaway request can't fill the disk. 25 MB
+# comfortably covers PDFs, images, and source docs — the kanban use case.
+_MAX_ATTACHMENT_BYTES = 25 * 1024 * 1024
+
+
+def _safe_attachment_name(raw: str) -> str:
+    """Reduce a client-supplied filename to a safe basename.
+
+    Strips any directory components (``os.path.basename`` on both
+    separators) so a malicious ``../../etc/passwd`` or ``C:\\x`` collapses
+    to its leaf. Rejects empty / dotfile-only names. The result is only
+    ever joined under the per-task attachments dir, never used verbatim
+    as a path from the client.
+    """
+    name = (raw or "").replace("\\", "/").split("/")[-1].strip()
+    # Drop control chars and leading dots so we never write a dotfile or
+    # a name with embedded NULs/newlines.
+    name = "".join(ch for ch in name if ch.isprintable() and ch not in '\x00').strip()
+    name = name.lstrip(".").strip()
+    if not name:
+        raise HTTPException(status_code=400, detail="invalid attachment filename")
+    return name[:200]
+
+
+@router.get("/tasks/{task_id}/attachments")
+def list_task_attachments(task_id: str, board: Optional[str] = Query(None)):
+    board = _resolve_board(board)
+    conn = _conn(board=board)
+    try:
+        if kanban_db.get_task(conn, task_id) is None:
+            raise HTTPException(status_code=404, detail=f"task {task_id} not found")
+        return {
+            "attachments": [
+                _attachment_dict(a) for a in kanban_db.list_attachments(conn, task_id)
+            ]
+        }
+    finally:
+        conn.close()
+
+
+@router.post("/tasks/{task_id}/attachments")
+async def upload_task_attachment(
+    task_id: str,
+    file: UploadFile = File(...),
+    board: Optional[str] = Query(None),
+    uploaded_by: Optional[str] = Form(None),
+):
+    """Store an uploaded file for a task and record its metadata.
+
+    The blob lands under ``attachments_root(board)/<task_id>/`` with a
+    sanitised, collision-resolved name. The worker reads it via the
+    absolute path surfaced in ``build_worker_context``.
+    """
+    board = _resolve_board(board)
+    conn = _conn(board=board)
+    try:
+        if kanban_db.get_task(conn, task_id) is None:
+            raise HTTPException(status_code=404, detail=f"task {task_id} not found")
+
+        safe_name = _safe_attachment_name(file.filename or "")
+
+        # Stream to disk with a hard size cap so a huge upload can't fill
+        # the disk. Read in chunks; abort + clean up if the cap is hit.
+        dest_dir = kanban_db.task_attachments_dir(task_id, board=board)
+        dest_dir.mkdir(parents=True, exist_ok=True)
+
+        # Resolve name collisions: foo.pdf → foo (1).pdf, foo (2).pdf, …
+        stem, dot, ext = safe_name.partition(".")
+        candidate = safe_name
+        n = 1
+        while (dest_dir / candidate).exists():
+            candidate = f"{stem} ({n}){dot}{ext}"
+            n += 1
+        dest_path = dest_dir / candidate
+
+        total = 0
+        try:
+            with open(dest_path, "wb") as out:
+                while True:
+                    chunk = await file.read(1024 * 1024)
+                    if not chunk:
+                        break
+                    total += len(chunk)
+                    if total > _MAX_ATTACHMENT_BYTES:
+                        out.close()
+                        dest_path.unlink(missing_ok=True)
+                        raise HTTPException(
+                            status_code=413,
+                            detail=(
+                                f"attachment exceeds {_MAX_ATTACHMENT_BYTES // (1024 * 1024)} MB limit"
+                            ),
+                        )
+                    out.write(chunk)
+        except HTTPException:
+            raise
+        except OSError as exc:
+            raise HTTPException(status_code=500, detail=f"failed to store attachment: {exc}")
+
+        att_id = kanban_db.add_attachment(
+            conn,
+            task_id,
+            filename=candidate,
+            stored_path=str(dest_path.resolve()),
+            content_type=file.content_type,
+            size=total,
+            uploaded_by=(uploaded_by or "dashboard"),
+        )
+        att = kanban_db.get_attachment(conn, att_id)
+        return {"attachment": _attachment_dict(att) if att else None}
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    finally:
+        conn.close()
+
+
+@router.get("/attachments/{attachment_id}")
+def download_attachment(attachment_id: int, board: Optional[str] = Query(None)):
+    board = _resolve_board(board)
+    conn = _conn(board=board)
+    try:
+        att = kanban_db.get_attachment(conn, attachment_id)
+        if att is None:
+            raise HTTPException(status_code=404, detail="attachment not found")
+        # Confirm the blob still lives under the board's attachments root
+        # before serving — defense in depth against a tampered DB row.
+        root = kanban_db.attachments_root(board=board).resolve()
+        try:
+            stored = Path(att.stored_path).resolve()
+            stored.relative_to(root)
+        except (ValueError, OSError):
+            raise HTTPException(status_code=404, detail="attachment file unavailable")
+        if not stored.is_file():
+            raise HTTPException(status_code=404, detail="attachment file missing on disk")
+        return FileResponse(
+            path=str(stored),
+            filename=att.filename,
+            media_type=att.content_type or "application/octet-stream",
+        )
+    finally:
+        conn.close()
+
+
+@router.delete("/attachments/{attachment_id}")
+def remove_attachment(attachment_id: int, board: Optional[str] = Query(None)):
+    board = _resolve_board(board)
+    conn = _conn(board=board)
+    try:
+        att = kanban_db.delete_attachment(conn, attachment_id)
+        if att is None:
+            raise HTTPException(status_code=404, detail="attachment not found")
+        return {"ok": True, "id": attachment_id}
+    finally:
+        conn.close()
+
+
 # ---------------------------------------------------------------------------
 # PATCH /tasks/:id  (status / assignee / priority / title / body)
 # ---------------------------------------------------------------------------
--- a/plugins/memory/hindsight/init.py
+++ b/plugins/memory/hindsight/init.py
@ -633,7 +633,8 @@ class HindsightMemoryProvider(MemoryProvider):
            except Exception:
                pass
        existing.update(values)
-        config_path.write_text(json.dumps(existing, indent=2))
+        from utils import atomic_json_write
+        atomic_json_write(config_path, existing, mode=0o600)

    def post_setup(self, hermes_home: str, config: dict) -> None:
        """Custom setup wizard — installs only the deps needed for the selected mode."""
--- a/plugins/memory/honcho/README.md
+++ b/plugins/memory/honcho/README.md
@ -12,8 +12,8 @@ AI-native cross-session user modeling with multi-pass dialectic reasoning, sessi
 ## Setup

 ```bash
-hermes honcho setup    # full interactive wizard (cloud or local)
-hermes memory setup    # generic picker, also works
+hermes memory setup honcho   # configure Honcho directly (works on a fresh install)
+hermes memory setup          # generic picker, choose Honcho from the list
 ```

 Or manually:
@ -22,6 +22,10 @@ hermes config set memory.provider honcho
 echo "HONCHO_API_KEY=***" >> ~/.hermes/.env
 ```

+> `hermes honcho setup` also works, but only **after** Honcho is the active
+> memory provider — the `honcho` subcommand is registered for the active
+> provider only. On a fresh install, use `hermes memory setup honcho`.
+
 ## Architecture Overview

 ### Two-Layer Context Injection
@ -109,7 +113,7 @@ Config is read from the first file that exists:
 | 2 | `~/.hermes/honcho.json` | Default profile (shared host blocks) |
 | 3 | `~/.honcho/config.json` | Global (cross-app interop) |

-Host key is derived from the active Hermes profile: `hermes` (default) or `hermes.<profile>`.
+Host key is derived from the active Hermes profile: `hermes` (default) or `hermes_<profile>`.

 For every key, resolution order is: **host block > root > env var > default**.

@ -154,7 +158,7 @@ In gateway deployments (Telegram, Discord, Slack, etc.) each user arrives with a

 **Host vs root semantics.** All three keys are accepted at both root and `hosts.<host>` levels. Host-level wins. For maps and prefixes, host-level *replaces* the root value as a whole (not merge), so a host can intentionally own its identity universe or wipe it with `userPeerAliases: {}` / `runtimePeerPrefix: ""`.

-**Deployment shapes** (`hermes honcho setup` asks one prompt to set these):
+**Deployment shapes** (`hermes memory setup honcho` asks one prompt to set these):

 - **Single-operator** — `pinUserPeer: true`. All gateway users → `peerName`. Recommended for personal use where you connect Hermes to your own Telegram/Discord/etc.
 - **Multi-user gateway** — `pinUserPeer: false`, optional `runtimePeerPrefix`. Each runtime user → own peer. Recommended for bots serving many humans.
@ -225,7 +229,7 @@ Multiple Hermes profiles can share one workspace while maintaining separate AI i
      "recallMode": "hybrid",
      "sessionStrategy": "per-directory"
    },
-    "hermes.coder": {
+    "hermes_coder": {
      "aiPeer": "coder",
      "recallMode": "tools",
      "sessionStrategy": "per-repo"
@ -236,7 +240,7 @@ Multiple Hermes profiles can share one workspace while maintaining separate AI i

 Both profiles see the same user (`yourname`) in the same shared environment (`hermes`), but each AI peer builds its own observations, conclusions, and behavior patterns. The coder's memory stays code-oriented; the main agent's stays broad.

-Host key is derived from the active Hermes profile: `hermes` (default) or `hermes.<profile>` (e.g. `hermes -p coder` → host key `hermes.coder`).
+Host key is derived from the active Hermes profile: `hermes` (default) or `hermes_<profile>` (e.g. `hermes -p coder` -> host key `hermes_coder`). Older `hermes.<profile>` host blocks are still read for compatibility and are migrated when the CLI writes profile-scoped Honcho config.

 ### Dialectic & Reasoning

@ -307,7 +311,8 @@ Presets:

 | Command | Description |
 |---------|-------------|
-| `hermes honcho setup` | Full interactive setup wizard |
+| `hermes memory setup honcho` | Configure Honcho directly — works on a fresh install |
+| `hermes honcho setup` | Interactive setup wizard (only registered once Honcho is the active provider; redirects to `hermes memory setup`) |
 | `hermes honcho status` | Show resolved config for active profile |
 | `hermes honcho enable` / `disable` | Toggle Honcho for active profile |
 | `hermes honcho mode <mode>` | Change recall or observation mode |
@ -344,7 +349,7 @@ Presets:
      "dialecticMaxChars": 600,
      "saveMessages": true
    },
-    "hermes.coder": {
+    "hermes_coder": {
      "enabled": true,
      "aiPeer": "coder",
      "sessionStrategy": "per-repo",
--- a/plugins/memory/honcho/init.py
+++ b/plugins/memory/honcho/init.py
@ -249,6 +249,7 @@ class HonchoMemoryProvider(MemoryProvider):
    def save_config(self, values, hermes_home):
        """Write config to $HERMES_HOME/honcho.json (Honcho SDK native format)."""
        import json
+        import os
        from pathlib import Path
        config_path = Path(hermes_home) / "honcho.json"
        existing = {}
@ -258,7 +259,8 @@ class HonchoMemoryProvider(MemoryProvider):
            except Exception:
                pass
        existing.update(values)
-        config_path.write_text(json.dumps(existing, indent=2))
+        from utils import atomic_json_write
+        atomic_json_write(config_path, existing, mode=0o600)

    def get_config_schema(self):
        return [
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@ -11,7 +11,7 @@ import sys
 from pathlib import Path

 from hermes_constants import get_hermes_home
-from plugins.memory.honcho.client import resolve_active_host, resolve_config_path, HOST
+from plugins.memory.honcho.client import _host_block, profile_host_key, resolve_active_host, resolve_config_path, HOST
 from hermes_cli.config import cfg_get


@ -36,7 +36,7 @@ def clone_honcho_for_profile(profile_name: str) -> bool:
    if not default_block and not has_key:
        return False

-    new_host = f"{HOST}.{profile_name}"
+    new_host = profile_host_key(profile_name)
    if new_host in hosts:
        return False  # already exists

@ -192,7 +192,7 @@ def cmd_sync(args) -> None:
        if p.name == "default":
            continue
        if clone_honcho_for_profile(p.name):
-            print(f"  + {p.name} -> hermes.{p.name}")
+            print(f"  + {p.name} -> {profile_host_key(p.name)}")
            created += 1
        else:
            skipped += 1
@ -243,7 +243,7 @@ def _host_key() -> str:
    if _profile_override:
        if _profile_override in {"default", "custom"}:
            return HOST
-        return f"{HOST}.{_profile_override}"
+        return profile_host_key(_profile_override)
    return resolve_active_host()


@ -275,10 +275,8 @@ def _read_config() -> dict:
 def _write_config(cfg: dict, path: Path | None = None) -> None:
    path = path or _local_config_path()
    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(
-        json.dumps(cfg, indent=2, ensure_ascii=False) + "\n",
-        encoding="utf-8",
-    )
+    from utils import atomic_json_write
+    atomic_json_write(path, cfg, mode=0o600)


 def _resolve_api_key(cfg: dict) -> str:
@ -292,7 +290,7 @@ def _resolve_api_key(cfg: dict) -> str:
    config shapes, e.g. ``localhost:8000``) still pass — the Honcho SDK
    will reject them itself with a clearer error than ours.
    """
-    host_key = ((cfg.get("hosts") or {}).get(_host_key()) or {}).get("apiKey")
+    host_key = _host_block(cfg, _host_key()).get("apiKey")
    key = host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")
    if not key:
        base_url = cfg.get("baseUrl") or cfg.get("base_url") or os.environ.get("HONCHO_BASE_URL", "")
@ -462,21 +460,58 @@ def cmd_setup(args) -> None:
    cfg.pop("base_url", None)

    if is_local:
-        # --- Local: ask for base URL, skip or clear API key ---
+        # --- Local: ask for base URL, optionally accept a JWT for auth ---
        current_url = cfg.get("baseUrl") or ""
        new_url = _prompt("Base URL", default=current_url or "http://localhost:8000")
        if new_url:
            cfg["baseUrl"] = new_url

-        # For local no-auth, the SDK must not send an API key.
-        # We keep the key in config (for cloud switching later) but
-        # the client should skip auth when baseUrl is local.
-        current_key = cfg.get("apiKey", "")
-        if current_key:
-            print(f"\n  API key present in config (kept for cloud/hybrid use).")
-            print("  Local connections will skip auth automatically.")
+        # Self-hosted Honcho can run with AUTH_USE_AUTH=true and an
+        # AUTH_JWT_SECRET on the server side. In that case clients must
+        # send a JWT signed with that secret as the bearer token (the
+        # Honcho SDK takes it via ``api_key=``). Cloud users got prompted
+        # for a key already; the local path historically skipped this and
+        # forced users to disable auth on the server. Offer the prompt
+        # here too. We store it under the host block (not the top-level
+        # apiKey) so ``get_honcho_client`` recognises it as an explicit
+        # local auth opt-in (see ``_host_has_key`` in client.py) and
+        # cloud/hybrid switching is unaffected.
+        current_host_key = hermes_host.get("apiKey", "")
+        masked = (
+            f"...{current_host_key[-8:]}"
+            if len(current_host_key) > 8
+            else ("set" if current_host_key else "not set")
+        )
+        print(
+            "\n  Local Honcho auth (JWT signed with the server's "
+            "AUTH_JWT_SECRET)."
+        )
+        print(
+            "  Leave blank if your server runs with AUTH_USE_AUTH=false. "
+            f"Current: {masked}"
+        )
+        new_local_key = _prompt(
+            "Local JWT / bearer token (blank to skip / keep current)",
+            secret=True,
+        )
+        if new_local_key:
+            hermes_host["apiKey"] = new_local_key
+        elif current_host_key:
+            print("  Keeping existing local JWT.")
        else:
-            print("\n  No API key set. Local no-auth ready.")
+            # Surface the top-level key situation for transparency.
+            top_key = cfg.get("apiKey", "")
+            if top_key:
+                print(
+                    "\n  Top-level API key present in config (kept for "
+                    "cloud/hybrid use)."
+                )
+                print(
+                    "  Local connections will skip auth automatically "
+                    "until a local JWT is set above."
+                )
+            else:
+                print("\n  No local JWT set. Local no-auth ready.")
    else:
        # --- Cloud: set default base URL, require API key ---
        cfg.pop("baseUrl", None)  # cloud uses SDK default
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@ -32,6 +32,24 @@ logger = logging.getLogger(__name__)
 HOST = "hermes"


+def profile_host_key(profile: str | None) -> str:
+    """Return the safe Honcho host key for a Hermes profile."""
+    if not profile or profile in {"default", "custom"}:
+        return HOST
+    sanitized = "".join(c if c.isalnum() or c in "_-" else "_" for c in profile).strip("_")
+    return f"{HOST}_{sanitized or 'profile'}"
+
+
+def _host_block(raw: dict, host: str) -> dict:
+    """Return host config, accepting legacy dot-form profile host keys."""
+    hosts = raw.get("hosts") or {}
+    block = hosts.get(host, {})
+    if block or not host.startswith(f"{HOST}_"):
+        return block
+    legacy = f"{HOST}.{host[len(HOST) + 1:]}"
+    return hosts.get(legacy, {})
+
+
 def resolve_active_host() -> str:
    """Derive the Honcho host key from the active Hermes profile.

@ -47,8 +65,7 @@ def resolve_active_host() -> str:
    try:
        from hermes_cli.profiles import get_active_profile_name
        profile = get_active_profile_name()
-        if profile and profile not in {"default", "custom"}:
-            return f"{HOST}.{profile}"
+        return profile_host_key(profile)
    except Exception:
        pass
    return HOST
@ -406,7 +423,7 @@ class HonchoClientConfig:
            logger.warning("Failed to read %s: %s, falling back to env", path, e)
            return cls.from_env(host=resolved_host)

-        host_block = (raw.get("hosts") or {}).get(resolved_host, {})
+        host_block = _host_block(raw, resolved_host)
        # A hosts.hermes block or explicit enabled flag means the user
        # intentionally configured Honcho for this host.
        _explicitly_configured = bool(host_block) or raw.get("enabled") is True
@ -811,7 +828,10 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
        or "::1" in resolved_base_url
    )
    if _is_local:
-        # Check if the host block has its own apiKey (explicit local auth)
+        # Check if the host block has its own apiKey (explicit local auth).
+        # Auth-skipping is loopback-only: a stored key is likely a cloud key
+        # that would break a no-auth local server, so we substitute the SDK's
+        # required-non-empty placeholder unless the host block opts in.
        _raw = config.raw or {}
        _host_block = (_raw.get("hosts") or {}).get(config.host, {})
        _host_has_key = bool(_host_block.get("apiKey"))
@ -819,6 +839,18 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
    else:
        effective_api_key = config.api_key

+    # The Honcho SDK's route builders (e.g. routes.workspaces()) already
+    # include the version prefix (e.g. "/v3/workspaces").  When a user-supplied
+    # base_url already ends in a version segment (e.g.
+    # "http://localhost:38000/v3", "https://honcho.my.ts.net/v3"), concatenating
+    # the two produces "/v3/v3/workspaces" → 404 on every call.  This is a pure
+    # routing concern independent of host, so strip a trailing version segment
+    # from ANY base_url — loopback, LAN, custom domain, or cloud alike.  The
+    # SDK then appends its own versioned paths correctly.
+    if resolved_base_url:
+        import re as _re
+        resolved_base_url = _re.sub(r"/v\d+/*$", "", resolved_base_url).rstrip("/")
+
    kwargs: dict = {
        "workspace_id": config.workspace_id,
        "api_key": effective_api_key,
--- a/plugins/memory/mem0/init.py
+++ b/plugins/memory/mem0/init.py
@ -155,7 +155,8 @@ class Mem0MemoryProvider(MemoryProvider):
            except Exception:
                pass
        existing.update(values)
-        config_path.write_text(json.dumps(existing, indent=2))
+        from utils import atomic_json_write
+        atomic_json_write(config_path, existing, mode=0o600)

    def get_config_schema(self):
        return [
--- a/plugins/memory/supermemory/init.py
+++ b/plugins/memory/supermemory/init.py
@ -152,7 +152,8 @@ def _save_supermemory_config(values: dict, hermes_home: str) -> None:
        except Exception:
            existing = {}
    existing.update(values)
-    config_path.write_text(json.dumps(existing, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+    from utils import atomic_json_write
+    atomic_json_write(config_path, existing, mode=0o600, sort_keys=True)


 def _detect_category(text: str) -> str:
--- a/plugins/platforms/discord/adapter.py
+++ b/plugins/platforms/discord/adapter.py
@ -6093,16 +6093,17 @@ def _apply_yaml_config(yaml_cfg: dict, discord_cfg: dict) -> dict | None:
    ``gateway/config.py::load_gateway_config()`` before this migration.

    The DiscordAdapter reads its runtime configuration via ``os.getenv()``
-    throughout the connect / handle code paths (``DISCORD_REQUIRE_MENTION``,
-    ``DISCORD_FREE_RESPONSE_CHANNELS``, ``DISCORD_AUTO_THREAD``,
-    ``DISCORD_REACTIONS``, ``DISCORD_IGNORED_CHANNELS``,
-    ``DISCORD_ALLOWED_CHANNELS``, ``DISCORD_NO_THREAD_CHANNELS``,
-    ``DISCORD_HISTORY_BACKFILL``, ``DISCORD_HISTORY_BACKFILL_LIMIT``,
-    ``DISCORD_ALLOW_MENTION_*``, ``DISCORD_REPLY_TO_MODE``,
-    ``DISCORD_THREAD_REQUIRE_MENTION``).  Rather than rewrite ~50 call sites
-    inside the adapter to read from ``PlatformConfig.extra`` instead, this
-    hook keeps the existing env-driven model and merely owns the
-    YAML→env translation here, next to the adapter that consumes it.
+    throughout the connect / handle code paths (``DISCORD_ALLOWED_USERS``,
+    ``DISCORD_REQUIRE_MENTION``, ``DISCORD_FREE_RESPONSE_CHANNELS``,
+    ``DISCORD_AUTO_THREAD``, ``DISCORD_REACTIONS``,
+    ``DISCORD_IGNORED_CHANNELS``, ``DISCORD_ALLOWED_CHANNELS``,
+    ``DISCORD_NO_THREAD_CHANNELS``, ``DISCORD_HISTORY_BACKFILL``,
+    ``DISCORD_HISTORY_BACKFILL_LIMIT``, ``DISCORD_ALLOW_MENTION_*``,
+    ``DISCORD_REPLY_TO_MODE``, ``DISCORD_THREAD_REQUIRE_MENTION``).
+    Rather than rewrite ~50 call sites inside the adapter to read from
+    ``PlatformConfig.extra`` instead, this hook keeps the existing
+    env-driven model and merely owns the YAML→env translation here, next to
+    the adapter that consumes it.

    Env vars take precedence over YAML — every assignment is guarded by
    ``not os.getenv(...)`` so explicit env vars survive a config.yaml
@ -6113,6 +6114,22 @@ def _apply_yaml_config(yaml_cfg: dict, discord_cfg: dict) -> dict | None:
        os.environ["DISCORD_REQUIRE_MENTION"] = str(discord_cfg["require_mention"]).lower()
    if "thread_require_mention" in discord_cfg and not os.getenv("DISCORD_THREAD_REQUIRE_MENTION"):
        os.environ["DISCORD_THREAD_REQUIRE_MENTION"] = str(discord_cfg["thread_require_mention"]).lower()
+    platforms_cfg = yaml_cfg.get("platforms")
+    platform_extra_cfg = {}
+    if isinstance(platforms_cfg, dict):
+        discord_platform_cfg = platforms_cfg.get("discord")
+        if isinstance(discord_platform_cfg, dict):
+            candidate_extra = discord_platform_cfg.get("extra")
+            if isinstance(candidate_extra, dict):
+                platform_extra_cfg = candidate_extra
+    allowed_users_cfg = (
+        discord_cfg["allow_from"] if "allow_from" in discord_cfg
+        else platform_extra_cfg.get("allow_from")
+    )
+    if allowed_users_cfg is not None and not os.getenv("DISCORD_ALLOWED_USERS"):
+        if isinstance(allowed_users_cfg, list):
+            allowed_users_cfg = ",".join(str(v) for v in allowed_users_cfg)
+        os.environ["DISCORD_ALLOWED_USERS"] = str(allowed_users_cfg)
    frc = discord_cfg.get("free_response_channels")
    if frc is not None and not os.getenv("DISCORD_FREE_RESPONSE_CHANNELS"):
        if isinstance(frc, list):
--- a/plugins/web/firecrawl/provider.py
+++ b/plugins/web/firecrawl/provider.py
@ -146,16 +146,16 @@ def _get_firecrawl_gateway_url() -> str:
 def _is_tool_gateway_ready() -> bool:
    """Return True when gateway URL + Nous Subscriber token are available.

-    Reads ``read_nous_access_token`` and ``resolve_managed_tool_gateway``
+    Reads ``peek_nous_access_token`` and ``resolve_managed_tool_gateway``
    via :mod:`tools.web_tools` rather than direct imports, so unit tests
-    that ``patch("tools.web_tools._read_nous_access_token", ...)`` see
+    that ``patch("tools.web_tools._peek_nous_access_token", ...)`` see
    their patches honored. The names are re-exported on
    :mod:`tools.web_tools` for exactly this reason.
    """
    import tools.web_tools as _wt

    return _wt.resolve_managed_tool_gateway(
-        "firecrawl", token_reader=_wt._read_nous_access_token
+        "firecrawl", token_reader=_wt._peek_nous_access_token
    ) is not None


--- a/pyproject.toml
+++ b/pyproject.toml
@ -87,7 +87,7 @@ edge-tts = ["edge-tts==7.2.7"]
 modal = ["modal==1.3.4"]
 daytona = ["daytona==0.155.0"]
 hindsight = ["hindsight-client==0.6.1"]
-dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10", "setuptools==82.0.1"]
+dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "starlette==1.0.1", "ty==0.0.21", "ruff==0.15.10", "setuptools==82.0.1"]  # starlette: CVE-2026-48710
 messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "brotlicffi==1.2.0.1", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]
 cron = []  # croniter is now a core dependency; this extra kept for back-compat
 slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1", "aiohttp==3.13.3"]
@ -114,14 +114,21 @@ pty = [
  # without pulling in extra packages.
 ]
 honcho = ["honcho-ai==2.0.1"]
-mcp = ["mcp==1.26.0"]
+# CVE-2026-48710 (BadHost): Starlette is pulled transitively by mcp's
+# sse-starlette / HTTP-SSE stack (and by fastapi in the `web` extra). Before
+# 1.0.1, a malformed Host header makes `request.url.path` desync from the path
+# the ASGI router actually dispatched, so middleware/endpoints that gate on
+# `request.url` can be bypassed. We pin a patched Starlette directly in every
+# extra that exposes a Starlette-backed server surface so pip/uv can't resolve
+# a vulnerable pre-1.0.1 transitive. Bump in lockstep with uv.lock.
+mcp = ["mcp==1.26.0", "starlette==1.0.1"]  # starlette: CVE-2026-48710
 homeassistant = ["aiohttp==3.13.3"]
 sms = ["aiohttp==3.13.3"]
 # Computer use — macOS background desktop control via cua-driver (MCP stdio).
 # The cua-driver binary itself is installed via `hermes tools` post-setup
 # (curl install script); this extra just pins the MCP client used to talk
 # to it, which is already provided by the `mcp` extra.
-computer-use = ["mcp==1.26.0"]
+computer-use = ["mcp==1.26.0", "starlette==1.0.1"]  # starlette: CVE-2026-48710
 acp = ["agent-client-protocol==0.9.0"]
 # mistral: Voxtral STT + TTS. Pinned to an exact verified-clean version.
 # The `mistralai` PyPI project was quarantined 2026-05-12 after the malicious
@ -174,7 +181,9 @@ youtube = [
  "youtube-transcript-api==1.2.4",
 ]
 # `hermes dashboard` (localhost SPA + API).  Not in core to keep the default install lean.
-web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0"]
+# starlette==1.0.1 pinned for CVE-2026-48710 (BadHost) — fastapi pulls Starlette
+# transitively and pre-1.0.1 is the vulnerable range. See the mcp extra above.
+web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0", "starlette==1.0.1"]
 all = [
  # Policy (2026-05-12): `[all]` includes only extras that genuinely
  # CAN'T be lazy-installed via `tools/lazy_deps.py` — i.e. things every
@ -216,7 +225,7 @@ hermes-agent = "run_agent:main"
 hermes-acp = "acp_adapter.entry:main"

 [tool.setuptools]
-py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"]
+py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils", "mcp_serve"]

 [tool.setuptools.package-data]
 hermes_cli = ["web_dist/**/*", "tui_dist/**/*", "scripts/install.sh", "scripts/install.ps1"]
--- a/run_agent.py
+++ b/run_agent.py
@ -2195,6 +2195,126 @@ class AIAgent:
            lines.append(f"  • … and {remaining} more")
        return "\n".join(lines)

+    def _turn_completion_explainer_enabled(self) -> bool:
+        """Check whether the end-of-turn completion explainer footer is on.
+
+        Config path: ``display.turn_completion_explainer`` (bool, default
+        True).  ``HERMES_TURN_COMPLETION_EXPLAINER`` env var overrides
+        config.  Exposed as a method so tests can patch a single seam,
+        mirroring ``_file_mutation_verifier_enabled``.
+        """
+        try:
+            import os as _os
+            env = _os.environ.get("HERMES_TURN_COMPLETION_EXPLAINER")
+            if env is not None:
+                return env.strip().lower() not in {"0", "false", "no", "off"}
+            # Read from the persisted config.yaml so gateway and CLI share
+            # the same setting.  Import lazily to avoid a startup-time cycle.
+            try:
+                from hermes_cli.config import load_config as _load_config
+                _cfg = _load_config() or {}
+            except Exception:
+                _cfg = {}
+            _display = _cfg.get("display") if isinstance(_cfg, dict) else None
+            if isinstance(_display, dict) and "turn_completion_explainer" in _display:
+                return bool(_display.get("turn_completion_explainer"))
+        except Exception:
+            pass
+        return True  # safe default: explainer on
+
+    @staticmethod
+    def _format_turn_completion_explanation(turn_exit_reason: str) -> str:
+        """Render a user-facing explanation for an abnormal turn ending.
+
+        Maps the internal ``turn_exit_reason`` to a short, actionable
+        message so a turn that produced no usable assistant reply (empty
+        content after retries, a partial/truncated stream, a still-pending
+        tool result, or an iteration/budget limit) is never silent from
+        the UI's perspective — the symptom users report in #34452.
+
+        Returns an empty string for reasons that are NOT abnormal (e.g.
+        a normal ``text_response(...)`` exit), so callers can concatenate
+        or substitute unconditionally without warning on healthy turns
+        like a terse ``Done.``.
+        """
+        if not turn_exit_reason:
+            return ""
+        reason = str(turn_exit_reason)
+
+        # Normal completion — stay quiet.  ``text_response(...)`` is the
+        # healthy terminal; anything that produced a real reply is fine.
+        if reason.startswith("text_response"):
+            return ""
+
+        prefix = "⚠️ No reply: "
+        if reason == "empty_response_exhausted":
+            return (
+                prefix
+                + "the model returned empty content after retries and any "
+                "fallback providers. Try `continue`, switch model/provider, "
+                "or inspect the tool output above."
+            )
+        if reason == "all_retries_exhausted_no_response":
+            return (
+                prefix
+                + "all API retries were exhausted before a response was "
+                "produced (provider errors / rate limits). Try `continue` "
+                "or switch provider."
+            )
+        if reason == "partial_stream_recovery":
+            return (
+                prefix
+                + "streaming stopped early and only a partial response was "
+                "recovered. Send `continue` to resume from where it stopped."
+            )
+        if reason == "fallback_prior_turn_content":
+            return (
+                prefix
+                + "no new content was produced this turn; showing recovered "
+                "prior context. Send `continue` to retry."
+            )
+        if reason == "interrupted_during_api_call":
+            return (
+                prefix
+                + "the request was interrupted mid-call before a reply was "
+                "received. Send `continue` to retry."
+            )
+        if reason == "budget_exhausted":
+            return (
+                prefix
+                + "the per-turn iteration/cost budget was exhausted before a "
+                "final answer. Send `continue` to keep going."
+            )
+        if reason == "ollama_runtime_context_too_small":
+            return (
+                prefix
+                + "the local model's context window was too small to finish. "
+                "Increase the context size or use a larger model."
+            )
+        if reason.startswith("max_iterations_reached"):
+            return (
+                prefix
+                + "the maximum tool-iteration limit was reached before a "
+                "final answer. Send `continue` to keep going, or raise "
+                "`max_iterations`."
+            )
+        if reason.startswith("error_near_max_iterations"):
+            return (
+                prefix
+                + "an error occurred near the iteration limit before a final "
+                "answer. Check the tool output above, then send `continue`."
+            )
+        if reason == "pending_tool_result":
+            return (
+                prefix
+                + "the turn stopped while a tool result was still pending and "
+                "the model produced no follow-up text. Send `continue` to "
+                "let it summarize."
+            )
+        # Unknown/diagnostic-only reasons (e.g. "unknown", guardrail_halt
+        # which already surfaces its own message) — don't second-guess.
+        return ""
+
    def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None:
        """Forwarder — see ``agent.agent_runtime_helpers.apply_pending_steer_to_tool_results``."""
        from agent.agent_runtime_helpers import apply_pending_steer_to_tool_results
@ -3487,6 +3607,18 @@ class AIAgent:
        from agent.chat_completion_helpers import try_activate_fallback
        return try_activate_fallback(self, reason)

+    def _has_pending_fallback(self) -> bool:
+        """Whether a fallback provider is actually available to switch to.
+
+        Used to gate user-facing "trying fallback..." status so we don't
+        announce a fallback that will never be attempted (the user has no
+        fallback chain configured).  Mirrors the early-return guard in
+        ``try_activate_fallback`` (#35314, #17446).
+        """
+        chain = getattr(self, "_fallback_chain", None) or []
+        index = getattr(self, "_fallback_index", 0)
+        return index < len(chain)
+
    # ── Per-turn primary restoration ─────────────────────────────────────

    def _restore_primary_runtime(self) -> bool:
--- a/scripts/install.sh
+++ b/scripts/install.sh
@ -540,6 +540,7 @@ check_python() {
    if PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION" 2>/dev/null)"; then
        PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
        log_success "Python found: $PYTHON_FOUND_VERSION"
+        ensure_fts5
        return 0
    fi

@ -549,6 +550,7 @@ check_python() {
        PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION")"
        PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
        log_success "Python installed: $PYTHON_FOUND_VERSION"
+        ensure_fts5
    else
        log_error "Failed to install Python $PYTHON_VERSION"
        log_info "Install Python $PYTHON_VERSION manually, then re-run this script"
@ -556,6 +558,51 @@ check_python() {
    fi
 }

+# Probe whether $1 (a python executable) links a SQLite with the FTS5
+# module compiled in. Hermes' session store (hermes_state.py) creates FTS5
+# virtual tables for full-text session search; a SQLite without FTS5 makes
+# the bundled-python path unusable for that feature. Returns 0 if FTS5 works.
+_python_has_fts5() {
+    "$1" - <<'PY' 2>/dev/null
+import sqlite3, sys
+try:
+    sqlite3.connect(":memory:").execute("CREATE VIRTUAL TABLE t USING fts5(x)")
+except Exception:
+    sys.exit(1)
+PY
+}
+
+# Guarantee the resolved uv-managed interpreter ships FTS5. uv's Python
+# distributions only gained FTS5 in mid-2025 (python-build-standalone #694),
+# so a stale interpreter already in uv's store — which `uv python find`
+# happily reuses — can lack it. When that happens, force a reinstall of the
+# latest patch for $PYTHON_VERSION (which has FTS5) and re-resolve. This keeps
+# the supported install path's session search working without bundling a
+# second SQLite or asking the user to do anything.
+ensure_fts5() {
+    [ -n "${PYTHON_PATH:-}" ] || return 0
+    if _python_has_fts5 "$PYTHON_PATH"; then
+        return 0
+    fi
+
+    log_warn "Resolved Python's SQLite lacks the FTS5 module (session search needs it)."
+    log_info "Reinstalling a current Python $PYTHON_VERSION with FTS5 via uv..."
+    if "$UV_CMD" python install "$PYTHON_VERSION" --reinstall >/dev/null 2>&1; then
+        PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION" 2>/dev/null)"
+        PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
+    fi
+
+    if [ -n "${PYTHON_PATH:-}" ] && _python_has_fts5 "$PYTHON_PATH"; then
+        log_success "FTS5 available ($PYTHON_FOUND_VERSION)"
+    else
+        # Could not obtain an FTS5-capable interpreter (offline, pinned env,
+        # etc.). Install proceeds — Hermes degrades gracefully and disables
+        # only full-text session search — but warn so it isn't a silent gap.
+        log_warn "Could not obtain an FTS5-capable Python. Hermes will run, but"
+        log_warn "full-text session search will be disabled until FTS5 is present."
+    fi
+}
+
 check_git() {
    log_info "Checking Git..."

--- a/scripts/release.py
+++ b/scripts/release.py
@ -45,6 +45,10 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"

 # Auto-extracted from noreply emails + manual overrides
 AUTHOR_MAP = {
+    "zhipengli@thebrainly.ai": "a1245582339",
+    "mathijs.vd.hurk@gmail.com": "mathijsvandenhurk",
+    "drpelagik@gmail.com": "SeaXen",
+    "lengr@users.noreply.github.com": "LengR",
    "metalclaudbot@gmail.com": "HashClawAI",
    "tonybear55665566@gmail.com": "TonyPepeBear",
    "kaspersniels@gmail.com": "nielskaspers",
@ -67,6 +71,7 @@ AUTHOR_MAP = {
    "wangpuv@hotmail.com": "wangpuv",
    "202622897+ticketclosed-wontfix@users.noreply.github.com": "ticketclosed-wontfix",
    "wuxuebin1993@gmail.com": "victorGPT",
+    "wei.chen.coder@gmail.com": "wenchengxucool",
    "frowte3k@gmail.com": "Frowtek",
    "211828103+julio-cloudvisor@users.noreply.github.com": "julio-cloudvisor",
    "17778+kweiner@users.noreply.github.com": "kweiner",
@ -220,6 +225,7 @@ AUTHOR_MAP = {
    "264291321+v1b3coder@users.noreply.github.com": "v1b3coder",
    "silverchris@foxmail.com": "ming1523",
    "maksesipov@gmail.com": "Qwinty",
+    "byquenox@gmail.com": "Que0x",
    "denisamania@gmail.com": "CalmProton",
    "308068+mbac@users.noreply.github.com": "mbac",
    "nicoechaniz@altermundi.net": "nicoechaniz",
@ -649,8 +655,10 @@ AUTHOR_MAP = {
    "alexazzjjtt@163.com": "alexzhu0",
    "pub_forgreatagent@antgroup.com": "AntAISecurityLab",
    "252620095+briandevans@users.noreply.github.com": "briandevans",
+    "incharge.automation@gmail.com": "inchargeautomation-lab",
    "danielrpike9@gmail.com": "Bartok9",
    "96944678+ymylive@users.noreply.github.com": "sweetcornna",
+    "laflamme@illinoisalumni.org": "briancl2",
    "skozyuk@cruxexperts.com": "CruxExperts",
    "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
    "12250313+Kailigithub@users.noreply.github.com": "Kailigithub",
@ -1203,6 +1211,7 @@ AUTHOR_MAP = {
    "86501179+1RB@users.noreply.github.com": "1RB",  # PR #25462 salvage (discord forwarded messages)
    "44045943+ayushere@users.noreply.github.com": "ayushere",  # PR #25342 salvage (memory teardown leak)
    "15791290+domtriola@users.noreply.github.com": "domtriola",  # PR #25424 salvage (docs tirith link)
+    "tuancookiez@gmail.com": "tuancookiez-hub",  # PR #34865 salvage (LSP Windows .cmd shim spawn, #34864)
    "284216128+ephron-ren@users.noreply.github.com": "ephron-ren",  # PR #25358 salvage (MiMo reasoning echo-back)
    "96843562+freqyfreqy@users.noreply.github.com": "freqyfreqy",  # PR #25423 salvage (docs LSP worktree -> repo)
    "54306477+fu576@users.noreply.github.com": "fu576",  # PR #25369 salvage (api_mode not inherited cross-provider)
--- a/skills/productivity/google-workspace/scripts/google_api.py
+++ b/skills/productivity/google-workspace/scripts/google_api.py
@ -129,7 +129,11 @@ def _run_gws(parts: list[str], *, params: dict | None = None, body: dict | None


 def _headers_dict(msg: dict) -> dict[str, str]:
-    return {h["name"]: h["value"] for h in msg.get("payload", {}).get("headers", [])}
+    return {
+        h["name"].lower(): h["value"]
+        for h in msg.get("payload", {}).get("headers", [])
+        if h.get("name")
+    }


 def _extract_message_body(msg: dict) -> str:
@ -230,10 +234,10 @@ def gmail_search(args):
                {
                    "id": msg["id"],
                    "threadId": msg["threadId"],
-                    "from": headers.get("From", ""),
-                    "to": headers.get("To", ""),
-                    "subject": headers.get("Subject", ""),
-                    "date": headers.get("Date", ""),
+                    "from": headers.get("from", ""),
+                    "to": headers.get("to", ""),
+                    "subject": headers.get("subject", ""),
+                    "date": headers.get("date", ""),
                    "snippet": msg.get("snippet", ""),
                    "labels": msg.get("labelIds", []),
                }
@ -260,10 +264,10 @@ def gmail_search(args):
        output.append({
            "id": msg["id"],
            "threadId": msg["threadId"],
-            "from": headers.get("From", ""),
-            "to": headers.get("To", ""),
-            "subject": headers.get("Subject", ""),
-            "date": headers.get("Date", ""),
+            "from": headers.get("from", ""),
+            "to": headers.get("to", ""),
+            "subject": headers.get("subject", ""),
+            "date": headers.get("date", ""),
            "snippet": msg.get("snippet", ""),
            "labels": msg.get("labelIds", []),
        })
@ -281,10 +285,10 @@ def gmail_get(args):
        result = {
            "id": msg["id"],
            "threadId": msg["threadId"],
-            "from": headers.get("From", ""),
-            "to": headers.get("To", ""),
-            "subject": headers.get("Subject", ""),
-            "date": headers.get("Date", ""),
+            "from": headers.get("from", ""),
+            "to": headers.get("to", ""),
+            "subject": headers.get("subject", ""),
+            "date": headers.get("date", ""),
            "labels": msg.get("labelIds", []),
            "body": _extract_message_body(msg),
        }
@ -300,10 +304,10 @@ def gmail_get(args):
    result = {
        "id": msg["id"],
        "threadId": msg["threadId"],
-        "from": headers.get("From", ""),
-        "to": headers.get("To", ""),
-        "subject": headers.get("Subject", ""),
-        "date": headers.get("Date", ""),
+        "from": headers.get("from", ""),
+        "to": headers.get("to", ""),
+        "subject": headers.get("subject", ""),
+        "date": headers.get("date", ""),
        "labels": msg.get("labelIds", []),
        "body": _extract_message_body(msg),
    }
@ -314,12 +318,12 @@ def gmail_get(args):
 def gmail_send(args):
    if _gws_binary():
        message = MIMEText(args.body, "html" if args.html else "plain")
-        message["to"] = args.to
-        message["subject"] = args.subject
+        message["To"] = args.to
+        message["Subject"] = args.subject
        if args.cc:
-            message["cc"] = args.cc
+            message["Cc"] = args.cc
        if args.from_header:
-            message["from"] = args.from_header
+            message["From"] = args.from_header

        raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
        body = {"raw": raw}
@ -336,12 +340,12 @@ def gmail_send(args):

    service = build_service("gmail", "v1")
    message = MIMEText(args.body, "html" if args.html else "plain")
-    message["to"] = args.to
-    message["subject"] = args.subject
+    message["To"] = args.to
+    message["Subject"] = args.subject
    if args.cc:
-        message["cc"] = args.cc
+        message["Cc"] = args.cc
    if args.from_header:
-        message["from"] = args.from_header
+        message["From"] = args.from_header

    raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
    body = {"raw": raw}
@ -367,18 +371,18 @@ def gmail_reply(args):
        )
        headers = _headers_dict(original)

-        subject = headers.get("Subject", "")
+        subject = headers.get("subject", "")
        if not subject.startswith("Re:"):
            subject = f"Re: {subject}"

        message = MIMEText(args.body)
-        message["to"] = headers.get("From", "")
-        message["subject"] = subject
+        message["To"] = headers.get("from", "")
+        message["Subject"] = subject
        if args.from_header:
-            message["from"] = args.from_header
-        if headers.get("Message-ID"):
-            message["In-Reply-To"] = headers["Message-ID"]
-            message["References"] = headers["Message-ID"]
+            message["From"] = args.from_header
+        if headers.get("message-id"):
+            message["In-Reply-To"] = headers["message-id"]
+            message["References"] = headers["message-id"]

        raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
        result = _run_gws(
@ -396,18 +400,18 @@ def gmail_reply(args):
    ).execute()
    headers = _headers_dict(original)

-    subject = headers.get("Subject", "")
+    subject = headers.get("subject", "")
    if not subject.startswith("Re:"):
        subject = f"Re: {subject}"

    message = MIMEText(args.body)
-    message["to"] = headers.get("From", "")
-    message["subject"] = subject
+    message["To"] = headers.get("from", "")
+    message["Subject"] = subject
    if args.from_header:
-        message["from"] = args.from_header
-    if headers.get("Message-ID"):
-        message["In-Reply-To"] = headers["Message-ID"]
-        message["References"] = headers["Message-ID"]
+        message["From"] = args.from_header
+    if headers.get("message-id"):
+        message["In-Reply-To"] = headers["message-id"]
+        message["References"] = headers["message-id"]

    raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
    body = {"raw": raw, "threadId": original["threadId"]}
--- a/tests/agent/lsp/test_install_and_lint_fixes.py
+++ b/tests/agent/lsp/test_install_and_lint_fixes.py
@ -94,6 +94,47 @@ def test_install_npm_works_without_extras(tmp_path, monkeypatch):
    assert install_targets == ["pyright"]


+def test_existing_binary_finds_windows_wrapper_in_staging(tmp_path, monkeypatch):
+    """Installed Windows shims should satisfy later status/probe calls."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from agent.lsp import install as install_mod
+
+    wrapper = install_mod.hermes_lsp_bin_dir() / "pyright-langserver.cmd"
+    wrapper.write_text("@echo off\n")
+    wrapper.chmod(0o755)
+
+    monkeypatch.setattr(install_mod, "_is_windows", lambda: True)
+    monkeypatch.setattr(install_mod.shutil, "which", lambda _name: None)
+
+    assert install_mod._existing_binary("pyright-langserver") == str(wrapper)
+    assert install_mod.detect_status("pyright") == "installed"
+
+
+def test_install_pip_finds_windows_scripts_launcher(tmp_path, monkeypatch):
+    """pip console scripts can land in Scripts/ on native Windows."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from agent.lsp import install as install_mod
+
+    def fake_run(cmd, **kwargs):
+        scripts_dir = install_mod.hermes_lsp_bin_dir().parent / "python-packages" / "Scripts"
+        scripts_dir.mkdir(parents=True, exist_ok=True)
+        launcher = scripts_dir / "fake-language-server.exe"
+        launcher.write_text("launcher\n")
+        launcher.chmod(0o755)
+        return MagicMock(returncode=0, stderr="")
+
+    monkeypatch.setattr(install_mod, "_is_windows", lambda: True)
+    monkeypatch.setattr(install_mod.subprocess, "run", fake_run)
+
+    resolved = install_mod._install_pip("fake-lsp", "fake-language-server")
+
+    assert resolved is not None
+    assert resolved.endswith("fake-language-server.exe")
+    assert (install_mod.hermes_lsp_bin_dir() / "fake-language-server.exe").exists()
+
+
 # ---------------------------------------------------------------------------
 # Fix 2: ``hermes lsp status`` surfaces shellcheck-missing for bash
 # ---------------------------------------------------------------------------
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@ -41,6 +41,8 @@ class TestShouldCompress:

 class TestUpdateFromResponse:
    def test_updates_fields(self, compressor):
+        compressor.awaiting_real_usage_after_compression = True
+        compressor.last_compression_rough_tokens = 90_000
        compressor.update_from_response({
            "prompt_tokens": 5000,
            "completion_tokens": 1000,
@ -48,12 +50,39 @@ class TestUpdateFromResponse:
        })
        assert compressor.last_prompt_tokens == 5000
        assert compressor.last_completion_tokens == 1000
+        assert compressor.last_real_prompt_tokens == 5000
+        assert compressor.last_rough_tokens_when_real_prompt_fit == 90_000
+        assert compressor.awaiting_real_usage_after_compression is False

    def test_missing_fields_default_zero(self, compressor):
        compressor.update_from_response({})
        assert compressor.last_prompt_tokens == 0


+class TestPreflightDeferral:
+    def test_defers_when_recent_real_usage_fit_and_rough_growth_is_small(self, compressor):
+        compressor.threshold_tokens = 85_000
+        compressor.last_real_prompt_tokens = 50_000
+        compressor.last_rough_tokens_when_real_prompt_fit = 90_000
+
+        assert compressor.should_defer_preflight_to_real_usage(93_000) is True
+        assert compressor.last_rough_tokens_when_real_prompt_fit == 93_000
+
+    def test_does_not_defer_when_rough_growth_is_large(self, compressor):
+        compressor.threshold_tokens = 85_000
+        compressor.last_real_prompt_tokens = 50_000
+        compressor.last_rough_tokens_when_real_prompt_fit = 90_000
+
+        assert compressor.should_defer_preflight_to_real_usage(100_000) is False
+
+    def test_does_not_defer_without_recent_real_usage(self, compressor):
+        compressor.threshold_tokens = 85_000
+        compressor.last_real_prompt_tokens = 0
+        compressor.last_rough_tokens_when_real_prompt_fit = 90_000
+
+        assert compressor.should_defer_preflight_to_real_usage(93_000) is False
+
+

 class TestCompress:
    def _make_messages(self, n):
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@ -123,55 +123,6 @@ class TestEstimateMessagesTokensRough:
 # =========================================================================

 class TestDefaultContextLengths:
-    def test_claude_models_context_lengths(self):
-        for key, value in DEFAULT_CONTEXT_LENGTHS.items():
-            if "claude" not in key:
-                continue
-            # Claude 4.6+ models (4.6, 4.7, 4.8) have 1M context at standard
-            # API pricing (no long-context premium).  Older Claude 4.x and
-            # 3.x models cap at 200k.
-            if any(tag in key for tag in ("4.6", "4-6", "4.7", "4-7", "4.8", "4-8")):
-                assert value == 1000000, f"{key} should be 1000000"
-            else:
-                assert value == 200000, f"{key} should be 200000"
-
-    def test_gpt4_models_128k_or_1m(self):
-        # gpt-4.1 and gpt-4.1-mini have 1M context; other gpt-4* have 128k
-        for key, value in DEFAULT_CONTEXT_LENGTHS.items():
-            if "gpt-4" in key and "gpt-4.1" not in key:
-                assert value == 128000, f"{key} should be 128000"
-
-    def test_gpt41_models_1m(self):
-        for key, value in DEFAULT_CONTEXT_LENGTHS.items():
-            if "gpt-4.1" in key:
-                assert value == 1047576, f"{key} should be 1047576"
-
-    def test_gemini_models_1m(self):
-        for key, value in DEFAULT_CONTEXT_LENGTHS.items():
-            if "gemini" in key:
-                assert value == 1048576, f"{key} should be 1048576"
-
-    def test_grok_models_context_lengths(self):
-        # xAI /v1/models does not return context_length metadata, so
-        # DEFAULT_CONTEXT_LENGTHS must cover the Grok family explicitly.
-        # Values sourced from models.dev (2026-04).
-        expected = {
-            "grok-4.20": 2000000,
-            "grok-4-fast": 2000000,
-            "grok-4": 256000,
-            "grok-build": 256000,
-            "grok-code-fast": 256000,
-            "grok-3": 131072,
-            "grok-2": 131072,
-            "grok-2-vision": 8192,
-            "grok": 131072,
-        }
-        for key, value in expected.items():
-            assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing from DEFAULT_CONTEXT_LENGTHS"
-            assert DEFAULT_CONTEXT_LENGTHS[key] == value, (
-                f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}"
-            )
-
    def test_grok_substring_matching(self):
        # Longest-first substring matching must resolve the real xAI model
        # IDs to the correct fallback entries without 128k probe-down.
@ -268,13 +219,6 @@ class TestDefaultContextLengths:
                    f"{model_id}: expected {expected_ctx}, got {actual}"
                )

-    def test_all_values_positive(self):
-        for key, value in DEFAULT_CONTEXT_LENGTHS.items():
-            assert value > 0, f"{key} has non-positive context length"
-
-    def test_dict_is_not_empty(self):
-        assert len(DEFAULT_CONTEXT_LENGTHS) >= 10
-

 # =========================================================================
 # Codex OAuth context-window resolution (provider="openai-codex")
@ -1141,12 +1085,6 @@ class TestContextProbeTiers:
        for i in range(len(CONTEXT_PROBE_TIERS) - 1):
            assert CONTEXT_PROBE_TIERS[i] > CONTEXT_PROBE_TIERS[i + 1]

-    def test_first_tier_is_256k(self):
-        assert CONTEXT_PROBE_TIERS[0] == 256_000
-
-    def test_last_tier_is_8k(self):
-        assert CONTEXT_PROBE_TIERS[-1] == 8_000
-

 class TestGetNextProbeTier:
    def test_from_256k(self):
--- a/tests/agent/test_models_dev.py
+++ b/tests/agent/test_models_dev.py
@ -82,17 +82,6 @@ SAMPLE_REGISTRY = {


 class TestProviderMapping:
-    def test_all_mapped_providers_are_strings(self):
-        for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
-            assert isinstance(hermes_id, str)
-            assert isinstance(mdev_id, str)
-
-    def test_known_providers_mapped(self):
-        assert PROVIDER_TO_MODELS_DEV["anthropic"] == "anthropic"
-        assert PROVIDER_TO_MODELS_DEV["copilot"] == "github-copilot"
-        assert PROVIDER_TO_MODELS_DEV["stepfun"] == "stepfun"
-        assert PROVIDER_TO_MODELS_DEV["kilocode"] == "kilo"
-
    def test_xai_oauth_uses_xai_catalog(self):
        assert PROVIDER_TO_MODELS_DEV["xai"] == "xai"
        assert PROVIDER_TO_MODELS_DEV["xai-oauth"] == "xai"
--- a/tests/agent/test_resume_stale_active_task.py
+++ b/tests/agent/test_resume_stale_active_task.py
@ -0,0 +1,141 @@
+"""Regression coverage for #35344: a resumed session must not let a stale
+``## Active Task`` from an inherited compaction handoff hijack the reply to a
+new, unrelated user message.
+
+The failure mode (real report): a lineage was compacted, producing a handoff
+whose ``## Active Task`` described task A. The lineage was resumed later and
+the user asked about an unrelated task B. The model answered with A because
+the handoff's resume directive outranked the fresh ask.
+
+The structural fix lives in ``SUMMARY_PREFIX``: the handoff is framed as
+reference-only and the latest user message explicitly *wins* on conflict, with
+named reverse-signal verbs. Two invariants guard the resume path specifically:
+
+  1. A handoff persisted under the OLD (conflicting) prefix is re-normalized to
+     the CURRENT prefix when it is re-compacted on a resumed lineage — so a
+     pre-fix stale handoff cannot keep its "resume exactly" directive forever.
+
+  2. The current handoff prefix contains an unambiguous "latest message wins /
+     discard stale Active Task" rule, so an unrelated new ask is privileged over
+     the inherited ``## Active Task``.
+
+These are content/structural assertions (no live model call) — they pin the
+mechanism that makes the stale task historical rather than active.
+"""
+
+from agent.context_compressor import (
+    SUMMARY_PREFIX,
+    LEGACY_SUMMARY_PREFIX,
+    ContextCompressor,
+)
+
+
+# The conflicting prefix that shipped before the #35344 fix. A handoff
+# persisted in a resumed lineage could carry this verbatim.
+_OLD_CONFLICTING_PREFIX = (
+    "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
+    "into the summary below. This is a handoff from a previous context "
+    "window — treat it as background reference, NOT as active instructions. "
+    "Do NOT answer questions or fulfill requests mentioned in this summary; "
+    "they were already addressed. "
+    "Your current task is identified in the '## Active Task' section of the "
+    "summary — resume exactly from there. "
+    "Respond ONLY to the latest user message "
+    "that appears AFTER this summary. The current session state (files, "
+    "config, etc.) may reflect work described here — avoid repeating it:"
+)
+
+
+def test_latest_message_wins_over_inherited_active_task():
+    """The handoff must explicitly privilege the latest user message over a
+    stale ``## Active Task`` — the core #35344 contract."""
+    lower = SUMMARY_PREFIX.lower()
+    assert "latest user message" in lower
+    assert "## active task" in lower
+    # Conflict-resolution must be explicit, not implied.
+    assert "wins" in lower or "supersede" in lower
+    assert "discard" in lower
+
+
+def test_no_resume_exactly_directive_can_hijack():
+    """The directive that caused the hijack ("resume exactly from Active
+    Task") must be gone."""
+    assert "resume exactly" not in SUMMARY_PREFIX.lower()
+
+
+def test_resumed_stale_handoff_gets_renormalized_to_current_prefix():
+    """A handoff persisted under the OLD conflicting prefix (e.g. saved before
+    the fix and inherited into a resumed lineage) is upgraded to the CURRENT
+    prefix when re-normalized on re-compaction — so the "resume exactly"
+    directive cannot survive into a resumed session."""
+    stale_body = (
+        "## Active Task\n"
+        "User asked: 'Migrate the billing module to Stripe'\n\n"
+        "## Goal\nMigrate billing.\n"
+    )
+    stale_handoff = f"{_OLD_CONFLICTING_PREFIX}\n{stale_body}"
+
+    # Sanity: the fixture really does carry the old directive.
+    assert "resume exactly" in stale_handoff.lower()
+
+    renormalized = ContextCompressor._with_summary_prefix(stale_handoff)
+
+    # The body is preserved...
+    assert "Migrate the billing module to Stripe" in renormalized
+    # ...but the conflicting directive is stripped and replaced with the
+    # current latest-message-wins framing.
+    assert "resume exactly" not in renormalized.lower()
+    assert renormalized.startswith(SUMMARY_PREFIX)
+    assert "wins" in renormalized.lower()
+
+
+def test_legacy_prefix_handoff_also_renormalized():
+    """The same upgrade applies to the oldest ``[CONTEXT SUMMARY]:`` handoff
+    format that may sit in a long-lived resumed lineage."""
+    legacy = f"{LEGACY_SUMMARY_PREFIX} ## Active Task\nUser asked: 'task A'"
+    renormalized = ContextCompressor._with_summary_prefix(legacy)
+    assert renormalized.startswith(SUMMARY_PREFIX)
+    assert LEGACY_SUMMARY_PREFIX not in renormalized
+    assert "task A" in renormalized
+
+
+def test_inherited_handoff_detected_in_resumed_protected_head():
+    """On a resumed lineage the handoff commonly sits right after the system
+    prompt (in the protected head). ``_find_latest_context_summary`` must
+    detect it there so re-compaction rehydrates state from it rather than
+    serializing it as a fresh user turn (which is what let the stale Active
+    Task read as live intent)."""
+    messages = [
+        {"role": "system", "content": "system prompt"},
+        {"role": "user", "content": f"{SUMMARY_PREFIX}\n## Active Task\nUser asked: 'task A'"},
+        {"role": "assistant", "content": "ok"},
+        {"role": "user", "content": "Unrelated task B: what's the capital of France?"},
+    ]
+    # Search the whole post-system range.
+    idx, body = ContextCompressor._find_latest_context_summary(
+        messages, 1, len(messages)
+    )
+    assert idx == 1, "handoff in protected head must be found"
+    assert "task A" in body
+    # The detected body is stripped of the prefix (treated as state, not a
+    # standalone instruction message).
+    assert not body.startswith(SUMMARY_PREFIX)
+
+
+def test_historical_prefixed_handoff_detected_and_stripped():
+    """A pre-fix handoff (old conflicting prefix) inherited into a resumed
+    lineage must still be recognized as a context summary AND have its old
+    directive stripped on detection — otherwise re-compaction serializes the
+    stale 'resume exactly' text as a fresh turn."""
+    messages = [
+        {"role": "system", "content": "system prompt"},
+        {"role": "user", "content": f"{_OLD_CONFLICTING_PREFIX}\n## Active Task\nUser asked: 'task A'"},
+        {"role": "assistant", "content": "ok"},
+        {"role": "user", "content": "Unrelated task B"},
+    ]
+    idx, body = ContextCompressor._find_latest_context_summary(
+        messages, 1, len(messages)
+    )
+    assert idx == 1
+    assert "task A" in body
+    assert "resume exactly" not in body.lower()
--- a/tests/agent/test_set_runtime_main_custom_provider.py
+++ b/tests/agent/test_set_runtime_main_custom_provider.py
@ -0,0 +1,226 @@
+"""Regression test: set_runtime_main() must pass base_url/api_key/api_mode
+so that _resolve_auto() can route custom: providers in Step 1.
+
+Fixes https://github.com/NousResearch/hermes-agent/issues/34777
+"""
+import pytest
+from unittest.mock import patch, MagicMock
+
+
+def _get_globals(mod):
+    """Read runtime globals without triggering redaction."""
+    return {
+        "provider": mod._RUNTIME_MAIN_PROVIDER,
+        "model": mod._RUNTIME_MAIN_MODEL,
+        "base_url": mod._RUNTIME_MAIN_BASE_URL,
+        "cred": mod._RUNTIME_MAIN_API_KEY,  # renamed to avoid redaction
+        "api_mode": mod._RUNTIME_MAIN_API_MODE,
+    }
+
+
+class TestSetRuntimeMainCustomProvider:
+    """set_runtime_main must propagate base_url/api_key/api_mode for custom providers."""
+
+    def test_globals_stored(self):
+        """set_runtime_main stores all five fields in process-local globals."""
+        import agent.auxiliary_client as mod
+
+        mod.clear_runtime_main()
+        try:
+            mod.set_runtime_main(
+                "custom:my-router",
+                "glm-5.1",
+                base_url="https://my-server.example.com/v1",
+                api_key="sk-test-key",
+                api_mode="chat_completions",
+            )
+            g = _get_globals(mod)
+            assert g["provider"] == "custom:my-router"
+            assert g["model"] == "glm-5.1"
+            assert g["base_url"] == "https://my-server.example.com/v1"
+            assert g["cred"] == "sk-test-key"
+            assert g["api_mode"] == "chat_completions"
+        finally:
+            mod.clear_runtime_main()
+
+    def test_clear_resets_all_globals(self):
+        """clear_runtime_main resets all five globals to empty."""
+        import agent.auxiliary_client as mod
+
+        mod.set_runtime_main(
+            "custom:x", "m",
+            base_url="https://x.example.com",
+            api_key="sk-abc",
+            api_mode="chat_completions",
+        )
+        mod.clear_runtime_main()
+        g = _get_globals(mod)
+        for v in g.values():
+            assert v == "", f"Expected empty, got {v!r}"
+
+    def test_resolve_auto_uses_globals_for_custom_provider(self):
+        """_resolve_auto reads base_url/api_key from globals when main_runtime is None."""
+        import agent.auxiliary_client as mod
+
+        mod.clear_runtime_main()
+        try:
+            mod.set_runtime_main(
+                "custom:test-router",
+                "test-model",
+                base_url="https://custom-endpoint.example.com/v1",
+                api_key="sk-test-123",
+            )
+
+            with patch.object(mod, "resolve_provider_client") as mock_resolve:
+                mock_resolve.return_value = (MagicMock(), "test-model")
+                client, resolved = mod._resolve_auto(main_runtime=None)
+
+                mock_resolve.assert_called_once()
+                call_args = mock_resolve.call_args
+                assert call_args[0][0] == "custom"
+                assert call_args[1]["explicit_base_url"] == "https://custom-endpoint.example.com/v1"
+                assert call_args[1]["explicit_api_key"] == "sk-test-123"
+        finally:
+            mod.clear_runtime_main()
+
+    def test_explicit_main_runtime_takes_precedence(self):
+        """When main_runtime dict has values, globals are NOT used."""
+        import agent.auxiliary_client as mod
+
+        mod.clear_runtime_main()
+        try:
+            mod.set_runtime_main(
+                "custom:router-a",
+                "model-a",
+                base_url="https://from-global.example.com",
+                api_key="sk-global",
+            )
+
+            with patch.object(mod, "resolve_provider_client") as mock_resolve:
+                mock_resolve.return_value = (MagicMock(), "model-b")
+                main_rt = {
+                    "provider": "custom:router-b",
+                    "model": "model-b",
+                    "base_url": "https://from-dict.example.com",
+                    "api_key": "sk-dict",
+                }
+                mod._resolve_auto(main_runtime=main_rt)
+
+                call_args = mock_resolve.call_args[1]
+                assert call_args["explicit_base_url"] == "https://from-dict.example.com"
+                assert call_args["explicit_api_key"] == "sk-dict"
+        finally:
+            mod.clear_runtime_main()
+
+    def test_backward_compatible_defaults(self):
+        """Calling set_runtime_main with only positional args still works."""
+        import agent.auxiliary_client as mod
+
+        mod.clear_runtime_main()
+        try:
+            mod.set_runtime_main("openrouter", "gpt-4o")
+            g = _get_globals(mod)
+            assert g["provider"] == "openrouter"
+            assert g["model"] == "gpt-4o"
+            assert g["base_url"] == ""
+            assert g["cred"] == ""
+            assert g["api_mode"] == ""
+        finally:
+            mod.clear_runtime_main()
+
+
+class TestResolveAutoCustomEndToEnd:
+    """End-to-end routing assertions — build a *real* client (no mock on
+    resolve_provider_client) and verify the auxiliary auto-detect chain lands
+    on the user's custom endpoint instead of falling through to the aggregator
+    chain.  These guard the actual user-visible symptom in #34777 (aux tasks
+    silently routed to a fallback provider) rather than just the wiring.
+    """
+
+    @staticmethod
+    def _client_base_url(client):
+        for chain in (("base_url",), ("_client", "base_url")):
+            obj = client
+            try:
+                for attr in chain:
+                    obj = getattr(obj, attr)
+                return str(obj)
+            except AttributeError:
+                continue
+        return None
+
+    def test_config_less_custom_endpoint_routes_via_global(self, tmp_path, monkeypatch):
+        """custom:<name> with NO config entry: the live base_url carried by
+        set_runtime_main() must build a real client at that endpoint — not
+        fall through to Step 2 (the regression in #34777)."""
+        import agent.auxiliary_client as mod
+
+        # Hermetic: no aggregator creds, no stale OPENAI_BASE_URL.
+        for var in ("OPENROUTER_API_KEY", "NOUS_API_KEY", "OPENAI_API_KEY",
+                    "OPENAI_BASE_URL"):
+            monkeypatch.delenv(var, raising=False)
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        (hermes_home / "config.yaml").write_text(
+            "model:\n"
+            "  default: glm-5.1\n"
+            "  provider: 'custom:ephemeral'\n"
+            "  base_url: ''\n"
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        mod.clear_runtime_main()
+        try:
+            mod.set_runtime_main(
+                "custom:ephemeral",
+                "glm-5.1",
+                base_url="https://ephemeral.live/v1",
+                api_key="sk-live",
+            )
+            client, resolved = mod.resolve_provider_client("auto", None)
+            assert client is not None, (
+                "config-less custom endpoint fell through to Step 2 — "
+                "the #34777 bug is back"
+            )
+            assert resolved == "glm-5.1"
+            base = self._client_base_url(client)
+            assert base and base.rstrip("/") == "https://ephemeral.live/v1"
+        finally:
+            mod.clear_runtime_main()
+
+    def test_named_custom_with_config_entry_still_routes(self, tmp_path, monkeypatch):
+        """Regression guard: custom:<name> WITH a custom_providers entry must
+        still resolve to that entry's endpoint.  An earlier competing fix
+        collapsed the provider to bare ``custom`` before resolution, which
+        broke the named-custom branch and returned None here."""
+        import agent.auxiliary_client as mod
+
+        for var in ("OPENROUTER_API_KEY", "NOUS_API_KEY", "OPENAI_API_KEY",
+                    "OPENAI_BASE_URL"):
+            monkeypatch.delenv(var, raising=False)
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        (hermes_home / "config.yaml").write_text(
+            "model:\n"
+            "  default: glm-5.1\n"
+            "  provider: 'custom:openclaw'\n"
+            "  base_url: ''\n"
+            "custom_providers:\n"
+            "  - name: openclaw\n"
+            "    base_url: 'https://withcfg.example/v1'\n"
+            "    model: glm-5.1\n"
+            "    api_key: cfg-key\n"
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        # No live base_url carried — resolution must come from config alone,
+        # via the named-custom branch in resolve_provider_client.
+        mod.clear_runtime_main()
+        try:
+            mod.set_runtime_main("custom:openclaw", "glm-5.1")
+            client, resolved = mod.resolve_provider_client("auto", None)
+            assert client is not None
+            base = self._client_base_url(client)
+            assert base and base.rstrip("/") == "https://withcfg.example/v1"
+        finally:
+            mod.clear_runtime_main()
--- a/tests/agent/test_summary_prefix_semantics.py
+++ b/tests/agent/test_summary_prefix_semantics.py
@ -0,0 +1,62 @@
+"""Pin the semantics of SUMMARY_PREFIX so the compaction handoff doesn't
+re-introduce conflicting instructions.
+
+Background: SUMMARY_PREFIX previously contained two contradictory directives:
+
+  1. "treat it as background reference, NOT as active instructions"
+     "Do NOT answer questions or fulfill requests mentioned in this summary"
+     "Respond ONLY to the latest user message that appears AFTER this summary"
+
+  2. "Your current task is identified in the '## Active Task' section of the
+     summary — resume exactly from there."
+
+When the latest user message contradicted Active Task (e.g. "stop the
+i18n refactor", "never mind, look at grafana"), the model often followed
+(2) anyway because "resume exactly" is a strong directive — leading to
+the agent repeatedly re-surfacing already-cancelled work across turns.
+
+These tests pin the post-fix invariants so the conflict cannot regress.
+"""
+
+from agent.context_compressor import SUMMARY_PREFIX
+
+
+def test_no_resume_exactly_directive():
+    """The prefix must not tell the model to resume Active Task verbatim."""
+    assert "resume exactly" not in SUMMARY_PREFIX.lower()
+
+
+def test_latest_message_wins_on_conflict():
+    """The prefix must explicitly say latest user message wins on conflict."""
+    lower = SUMMARY_PREFIX.lower()
+    assert "latest user message" in lower
+    # Must have an explicit conflict-resolution rule.
+    assert "wins" in lower or "supersede" in lower or "discard" in lower
+
+
+def test_reverse_signals_called_out():
+    """Reverse signals (stop/undo/never mind/topic change) must be named so
+    the model recognizes them as cancellation triggers, not just background."""
+    lower = SUMMARY_PREFIX.lower()
+    # At least a few of the canonical reverse-signal verbs should appear.
+    reverse_terms = ["stop", "undo", "roll back", "never mind", "just verify"]
+    hits = sum(1 for t in reverse_terms if t in lower)
+    assert hits >= 3, (
+        f"Expected ≥3 reverse-signal terms in SUMMARY_PREFIX, found {hits}. "
+        "Without naming them the model treats reverse signals as ordinary "
+        "context and keeps pushing the cancelled task."
+    )
+
+
+def test_summary_marked_reference_only():
+    """The REFERENCE ONLY framing must remain — it's the entire point."""
+    assert "REFERENCE ONLY" in SUMMARY_PREFIX
+    assert "background reference" in SUMMARY_PREFIX
+    assert "NOT as active instructions" in SUMMARY_PREFIX
+
+
+def test_memory_authority_preserved():
+    """The fix must not weaken the MEMORY.md / USER.md authority clause."""
+    assert "MEMORY.md" in SUMMARY_PREFIX
+    assert "USER.md" in SUMMARY_PREFIX
+    assert "authoritative" in SUMMARY_PREFIX
--- a/tests/cli/test_cli_light_mode.py
+++ b/tests/cli/test_cli_light_mode.py
@ -75,6 +75,27 @@ class TestLightModeDetection:
        assert cli_mod._detect_light_mode() is True


+class TestOsc11Probe:
+    """The OSC 11 background probe must never run where its reply can leak
+    into prompt_toolkit's input (a late BEL-terminated reply reads as Ctrl+G
+    = open-editor, trapping the user in a stray editor). Guard the cases we
+    refuse to probe in.
+    """
+
+    @pytest.mark.parametrize("var", ("SSH_CONNECTION", "SSH_CLIENT", "SSH_TTY"))
+    def test_skips_over_ssh(self, cli_mod, monkeypatch, var):
+        monkeypatch.setattr(cli_mod.sys.stdin, "isatty", lambda: True, raising=False)
+        monkeypatch.setattr(cli_mod.sys.stdout, "isatty", lambda: True, raising=False)
+        for v in ("SSH_CONNECTION", "SSH_CLIENT", "SSH_TTY"):
+            monkeypatch.delenv(v, raising=False)
+        monkeypatch.setenv(var, "1.2.3.4 5555 22")
+        assert cli_mod._query_osc11_background() is None
+
+    def test_skips_when_not_a_tty(self, cli_mod, monkeypatch):
+        monkeypatch.setattr(cli_mod.sys.stdin, "isatty", lambda: False, raising=False)
+        assert cli_mod._query_osc11_background() is None
+
+
 class TestLightModeRemap:
    def test_remap_no_op_in_dark_mode(self, cli_mod, monkeypatch):
        monkeypatch.setenv("HERMES_LIGHT", "0")
@ -133,7 +154,9 @@ class TestSkinConfigHook:
        after = SkinConfig.get_color
        assert before is after

-    def test_skin_color_remaps_through_wrapper_in_light_mode(self, cli_mod, monkeypatch):
+    def test_skin_color_remaps_through_wrapper_in_light_mode(
+        self, cli_mod, monkeypatch
+    ):
        from hermes_cli.skin_engine import SkinConfig

        cli_mod._LIGHT_MODE_CACHE = True
--- a/tests/cli/test_steer_inline_repaint_34569.py
+++ b/tests/cli/test_steer_inline_repaint_34569.py
@ -0,0 +1,116 @@
+"""Regression guard for issue #34569 — inline /steer (and /model) submit
+must repaint the input area after clearing the buffer.
+
+Mechanism of the bug
+--------------------
+``handle_enter`` dispatches ``/steer`` (and ``/model``) inline on the UI
+thread while the agent is running.  Those branches called
+``buffer.reset(append_to_history=True)`` but — unlike every *other*
+early-return branch in the handler — did NOT call ``event.app.invalidate()``.
+Because ``process_command()`` prints through ``patch_stdout`` (which scrolls
+output above the prompt and never triggers a prompt_toolkit redraw), the
+just-cleared input area could keep showing the submitted ``/steer <text>``
+until some unrelated redraw fired.  The user saw their submitted text as if
+it were unsent and could accidentally re-submit it.
+
+This test pins the contract structurally: inside ``handle_enter``, any
+inline-command early-return that resets the buffer must be followed by an
+``event.app.invalidate()`` before its ``return``.  It is an *invariant*
+(every reset-then-return repaints), not a snapshot of current source.
+"""
+
+from __future__ import annotations
+
+import ast
+from pathlib import Path
+
+
+def _load_handle_enter_node() -> ast.FunctionDef:
+    """Extract the ``handle_enter`` nested function node from cli.py."""
+    cli_path = Path(__file__).resolve().parents[2] / "cli.py"
+    tree = ast.parse(cli_path.read_text(encoding="utf-8"))
+
+    target = None
+    for node in ast.walk(tree):
+        if isinstance(node, ast.FunctionDef) and node.name == "handle_enter":
+            target = node
+            break
+    assert target is not None, "handle_enter closure not found in cli.py"
+    return target
+
+
+def _is_buffer_reset(node: ast.stmt) -> bool:
+    """True if the statement is ``...current_buffer.reset(...)``."""
+    if not isinstance(node, ast.Expr):
+        return False
+    call = node.value
+    if not isinstance(call, ast.Call):
+        return False
+    func = call.func
+    return isinstance(func, ast.Attribute) and func.attr == "reset"
+
+
+def _is_invalidate(node: ast.stmt) -> bool:
+    """True if the statement is ``event.app.invalidate()``."""
+    if not isinstance(node, ast.Expr):
+        return False
+    call = node.value
+    if not isinstance(call, ast.Call):
+        return False
+    func = call.func
+    return isinstance(func, ast.Attribute) and func.attr == "invalidate"
+
+
+def _collect_reset_blocks(func: ast.FunctionDef) -> list[list[ast.stmt]]:
+    """Find every statement sequence (a block body/orelse/finalbody) within
+    ``handle_enter`` that contains a ``buffer.reset()`` call."""
+    blocks: list[list[ast.stmt]] = []
+    for node in ast.walk(func):
+        for attr in ("body", "orelse", "finalbody"):
+            seq = getattr(node, attr, None)
+            if not isinstance(seq, list):
+                continue
+            if any(isinstance(s, ast.stmt) and _is_buffer_reset(s) for s in seq):
+                blocks.append(seq)
+    return blocks
+
+
+def test_inline_command_reset_branches_invalidate():
+    """Every handle_enter branch that resets the buffer and then returns must
+    invalidate the app first (issue #34569)."""
+    func = _load_handle_enter_node()
+    reset_blocks = _collect_reset_blocks(func)
+
+    assert reset_blocks, "expected to find buffer.reset() calls in handle_enter"
+
+    offenders = []
+    for seq in reset_blocks:
+        for i, stmt in enumerate(seq):
+            if not _is_buffer_reset(stmt):
+                continue
+            # Find the next return after this reset in the same block.
+            ret_idx = None
+            for j in range(i + 1, len(seq)):
+                if isinstance(seq[j], ast.Return):
+                    ret_idx = j
+                    break
+            if ret_idx is None:
+                # reset not directly followed by a return in this block
+                # (e.g. the fall-through reset at the end of the handler) —
+                # the next user input naturally repaints, so skip.
+                continue
+            between = seq[i + 1 : ret_idx]
+            if not any(_is_invalidate(s) for s in between):
+                offenders.append(ast.dump(stmt))
+
+    assert not offenders, (
+        "handle_enter has reset-then-return branch(es) that never call "
+        "event.app.invalidate() — the input area can keep showing the "
+        "submitted text (issue #34569). Offending reset stmts:\n"
+        + "\n".join(offenders)
+    )
+
+
+if __name__ == "__main__":  # pragma: no cover
+    test_inline_command_reset_branches_invalidate()
+    print("ok")
--- a/tests/gateway/test_agent_cache.py
+++ b/tests/gateway/test_agent_cache.py
@ -276,6 +276,111 @@ class TestExtractCacheBustingConfig:

        assert out["tools.registry_generation"] == 12345

+
+    def test_skips_honcho_config_read_when_provider_is_not_honcho(self, monkeypatch):
+        """Non-Honcho gateways must not read/parse honcho.json on every message."""
+        from gateway.run import GatewayRunner
+
+        called = False
+
+        def _boom():
+            nonlocal called
+            called = True
+            raise AssertionError("should not read Honcho config")
+
+        monkeypatch.setattr(GatewayRunner, "_extract_honcho_cache_busting_config", _boom)
+
+        out = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "mem0"}})
+
+        assert called is False
+        assert out["honcho.peer_name"] is None
+        assert out["honcho.user_peer_aliases"] is None
+
+    def test_reads_honcho_config_only_when_provider_is_honcho(self, monkeypatch):
+        from gateway.run import GatewayRunner
+
+        calls = []
+
+        def _fake():
+            calls.append(True)
+            return {
+                "honcho.peer_name": "eri",
+                "honcho.ai_peer": "hermes",
+                "honcho.pin_peer_name": True,
+                "honcho.runtime_peer_prefix": "tg_",
+                "honcho.user_peer_aliases": [("123", "eri")],
+            }
+
+        monkeypatch.setattr(GatewayRunner, "_extract_honcho_cache_busting_config", _fake)
+
+        out = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
+
+        assert calls == [True]
+        assert out["honcho.peer_name"] == "eri"
+        assert out["honcho.user_peer_aliases"] == [("123", "eri")]
+
+    def test_memory_provider_change_busts_signature(self, monkeypatch):
+        """Switching memory.provider must itself change the cache-busting
+        signature, so the agent is rebuilt when a user swaps providers
+        mid-gateway (independent of the honcho.json identity keys)."""
+        from gateway.run import GatewayRunner
+
+        # Neutralize honcho.json reads so the only varying input is the
+        # provider value itself.
+        monkeypatch.setattr(
+            GatewayRunner,
+            "_extract_honcho_cache_busting_config",
+            classmethod(lambda cls: cls._empty_honcho_cache_busting_config()),
+        )
+
+        sig_honcho = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
+        sig_mem0 = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "mem0"}})
+
+        assert sig_honcho["memory.provider"] == "honcho"
+        assert sig_mem0["memory.provider"] == "mem0"
+        assert sig_honcho != sig_mem0
+
+    def test_honcho_cache_busting_config_memoized_by_mtime(self, monkeypatch, tmp_path):
+        """Repeated Honcho extraction for unchanged honcho.json should reuse parse result."""
+        from types import SimpleNamespace
+        from gateway.run import GatewayRunner
+
+        config_path = tmp_path / "honcho.json"
+        config_path.write_text("{}")
+        parse_calls = []
+
+        class FakeConfig:
+            peer_name = "eri"
+            ai_peer = "hermes"
+            pin_peer_name = False
+            runtime_peer_prefix = "tg_"
+            user_peer_aliases = {"123": "eri"}
+
+            @classmethod
+            def from_global_config(cls, config_path=None):
+                parse_calls.append(config_path)
+                return cls()
+
+        fake_client = SimpleNamespace(
+            HonchoClientConfig=FakeConfig,
+            resolve_config_path=lambda: config_path,
+        )
+        monkeypatch.setitem(__import__("sys").modules, "plugins.memory.honcho.client", fake_client)
+        monkeypatch.setattr(GatewayRunner, "_HONCHO_CACHE_BUSTING_MEMO", {})
+
+        first = GatewayRunner._extract_honcho_cache_busting_config()
+        second = GatewayRunner._extract_honcho_cache_busting_config()
+
+        assert first == second
+        assert first["honcho.user_peer_aliases"] == [("123", "eri")]
+        assert parse_calls == [config_path]
+
+        config_path.write_text("{\n  \"changed\": true\n}")
+        third = GatewayRunner._extract_honcho_cache_busting_config()
+
+        assert third == first
+        assert parse_calls == [config_path, config_path]
+
    def test_full_round_trip_busts_cache_on_real_edit(self):
        """End-to-end: simulate a config edit on main and verify the
        extracted cache_keys change produces a new signature."""
--- a/tests/gateway/test_config.py
+++ b/tests/gateway/test_config.py
@ -343,6 +343,56 @@ class TestLoadGatewayConfig:
        # Env value preserved, not clobbered by yaml.
        assert os.environ.get("DISCORD_THREAD_REQUIRE_MENTION") == "true"

+    def test_bridges_discord_allow_from_from_config_yaml(self, tmp_path, monkeypatch):
+        """discord.allow_from should populate DISCORD_ALLOWED_USERS for auth."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "discord:\n"
+            "  allow_from:\n"
+            "    - \"123456789012345678\"\n"
+            "    - \"999888777666555444\"\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.delenv("DISCORD_ALLOWED_USERS", raising=False)
+
+        config = load_gateway_config()
+
+        assert config.platforms[Platform.DISCORD].extra["allow_from"] == [
+            "123456789012345678",
+            "999888777666555444",
+        ]
+        assert os.environ.get("DISCORD_ALLOWED_USERS") == (
+            "123456789012345678,999888777666555444"
+        )
+
+    def test_bridges_discord_platform_extra_allow_from_to_env(self, tmp_path, monkeypatch):
+        """platforms.discord.extra.allow_from should reach DISCORD_ALLOWED_USERS too."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "platforms:\n"
+            "  discord:\n"
+            "    extra:\n"
+            "      allow_from:\n"
+            "        - \"123456789012345678\"\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.delenv("DISCORD_ALLOWED_USERS", raising=False)
+
+        config = load_gateway_config()
+
+        assert config.platforms[Platform.DISCORD].extra["allow_from"] == [
+            "123456789012345678",
+        ]
+        assert os.environ.get("DISCORD_ALLOWED_USERS") == "123456789012345678"
+
    def test_bridges_quoted_false_platform_enabled_from_config_yaml(self, tmp_path, monkeypatch):
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
@ -361,6 +411,69 @@ class TestLoadGatewayConfig:
        assert config.platforms[Platform.API_SERVER].enabled is False
        assert Platform.API_SERVER not in config.get_connected_platforms()

+    def test_bridges_nested_gateway_platforms_from_config_yaml(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "gateway:\n"
+            "  platforms:\n"
+            "    telegram:\n"
+            "      enabled: true\n"
+            "      token: nested-token\n"
+            "      home_channel:\n"
+            "        platform: telegram\n"
+            "        chat_id: \"123\"\n"
+            "        name: Nested Home\n"
+            "      extra:\n"
+            "        reply_prefix: nested\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config = load_gateway_config()
+
+        telegram = config.platforms[Platform.TELEGRAM]
+        assert telegram.enabled is True
+        assert telegram.token == "nested-token"
+        assert telegram.home_channel == HomeChannel(
+            platform=Platform.TELEGRAM,
+            chat_id="123",
+            name="Nested Home",
+        )
+        assert telegram.extra["reply_prefix"] == "nested"
+
+    def test_top_level_platforms_override_nested_gateway_platforms(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "gateway:\n"
+            "  platforms:\n"
+            "    telegram:\n"
+            "      enabled: false\n"
+            "      token: nested-token\n"
+            "      extra:\n"
+            "        reply_prefix: nested\n"
+            "platforms:\n"
+            "  telegram:\n"
+            "    enabled: true\n"
+            "    token: top-token\n"
+            "    extra:\n"
+            "      reply_prefix: top\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config = load_gateway_config()
+
+        telegram = config.platforms[Platform.TELEGRAM]
+        assert telegram.enabled is True
+        assert telegram.token == "top-token"
+        assert telegram.extra["reply_prefix"] == "top"
+
    def test_bridges_quoted_false_session_notify_from_config_yaml(self, tmp_path, monkeypatch):
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
--- a/tests/gateway/test_delivery_silence_filter.py
+++ b/tests/gateway/test_delivery_silence_filter.py
@ -0,0 +1,202 @@
+"""Tests for the outbound silence-narration filter (anti-loop control).
+
+See the gateway delivery path: hallucinated "silence" tokens like ``*(silent)*``
+are dropped pre-send so bot-to-bot channels can't mirror them into a token-burning
+loop that crashes a model with "no content after all retries".
+"""
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform
+from gateway.delivery import (
+    DeliveryRouter,
+    DeliveryTarget,
+    _is_silence_narration,
+)
+
+
+# --- Truth table -----------------------------------------------------------
+
+POSITIVE_CASES = [
+    "*(silent)*",
+    "*Silence.*",
+    "🔇",
+    ".",
+    "…",
+    "...",
+    "(silent)",
+    "_silent_",
+    "silent",
+    " *(silent)* ",
+    "`silent`",
+    "~silent~",
+    "Silence",
+    "no response",
+    "No Reply.",
+]
+
+NEGATIVE_CASES = [
+    "Silence is golden — here is the plan...",
+    "Silent install completed",
+    "The deployment ran silently in the background",
+    "ok",
+    "👍",
+    "Here is the result:\n\n- item one\n- item two",
+    "I have nothing to add, but here is why: the build is green.",
+    "silently",  # word boundary — trailing letters mean it isn't a bare token
+    "no responses were collected from the survey",
+    # A 64+ char string that opens with a silence token must not be dropped.
+    "silent " + "x" * 70,
+    "",
+    "   ",
+]
+
+
+@pytest.mark.parametrize("content", POSITIVE_CASES)
+def test_is_silence_narration_positive(content):
+    assert _is_silence_narration(content) is True
+
+
+@pytest.mark.parametrize("content", NEGATIVE_CASES)
+def test_is_silence_narration_negative(content):
+    assert _is_silence_narration(content) is False
+
+
+def test_is_silence_narration_none_safe():
+    assert _is_silence_narration(None) is False
+
+
+def test_length_guard_rejects_long_strings():
+    # Exactly 65 chars of dots — over the 64-char guard, so not treated as narration.
+    assert _is_silence_narration("." * 65) is False
+    assert _is_silence_narration("." * 64) is True
+
+
+# --- Integration through DeliveryRouter ------------------------------------
+
+class RecordingAdapter:
+    def __init__(self):
+        self.calls = []
+
+    async def send(self, chat_id, content, metadata=None):
+        self.calls.append({"chat_id": chat_id, "content": content, "metadata": metadata})
+        return {"success": True}
+
+
+@pytest.mark.asyncio
+async def test_silence_narration_dropped_pre_send(tmp_path, monkeypatch):
+    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
+    monkeypatch.delenv("HERMES_FILTER_SILENCE_NARRATION", raising=False)
+    adapter = RecordingAdapter()
+    router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
+    target = DeliveryTarget.parse("discord:99887766")
+
+    result = await router._deliver_to_platform(target, "*(silent)*", metadata=None)
+
+    assert adapter.calls == []  # adapter.send never invoked
+    assert result == {
+        "success": True,
+        "filtered": "silence_narration",
+        "delivered": False,
+    }
+
+
+@pytest.mark.asyncio
+async def test_real_message_is_delivered(tmp_path, monkeypatch):
+    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
+    monkeypatch.delenv("HERMES_FILTER_SILENCE_NARRATION", raising=False)
+    adapter = RecordingAdapter()
+    router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
+    target = DeliveryTarget.parse("discord:99887766")
+
+    result = await router._deliver_to_platform(
+        target, "Silence is golden — here is the plan...", metadata=None
+    )
+
+    assert len(adapter.calls) == 1
+    assert adapter.calls[0]["content"] == "Silence is golden — here is the plan..."
+    assert result == {"success": True}
+
+
+@pytest.mark.asyncio
+async def test_config_opt_out_lets_silence_through(tmp_path, monkeypatch):
+    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
+    monkeypatch.delenv("HERMES_FILTER_SILENCE_NARRATION", raising=False)
+    adapter = RecordingAdapter()
+    config = GatewayConfig(filter_silence_narration=False)
+    router = DeliveryRouter(config, adapters={Platform.DISCORD: adapter})
+    target = DeliveryTarget.parse("discord:99887766")
+
+    result = await router._deliver_to_platform(target, "*(silent)*", metadata=None)
+
+    assert len(adapter.calls) == 1
+    assert adapter.calls[0]["content"] == "*(silent)*"
+    assert result == {"success": True}
+
+
+@pytest.mark.asyncio
+async def test_env_override_disables_filter(tmp_path, monkeypatch):
+    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
+    monkeypatch.setenv("HERMES_FILTER_SILENCE_NARRATION", "0")
+    adapter = RecordingAdapter()
+    # Config default is True, but env override wins.
+    router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
+    target = DeliveryTarget.parse("discord:99887766")
+
+    result = await router._deliver_to_platform(target, "🔇", metadata=None)
+
+    assert len(adapter.calls) == 1
+    assert result == {"success": True}
+
+
+@pytest.mark.asyncio
+async def test_env_override_enables_filter_over_config(tmp_path, monkeypatch):
+    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
+    monkeypatch.setenv("HERMES_FILTER_SILENCE_NARRATION", "1")
+    adapter = RecordingAdapter()
+    # Config says off, env override forces on.
+    config = GatewayConfig(filter_silence_narration=False)
+    router = DeliveryRouter(config, adapters={Platform.DISCORD: adapter})
+    target = DeliveryTarget.parse("discord:99887766")
+
+    result = await router._deliver_to_platform(target, "*(silent)*", metadata=None)
+
+    assert adapter.calls == []
+    assert result["filtered"] == "silence_narration"
+
+
+@pytest.mark.asyncio
+async def test_local_delivery_not_filtered(tmp_path, monkeypatch):
+    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
+    monkeypatch.delenv("HERMES_FILTER_SILENCE_NARRATION", raising=False)
+    router = DeliveryRouter(GatewayConfig(), adapters={})
+
+    results = await router.deliver(
+        content="*(silent)*",
+        targets=[DeliveryTarget.parse("local")],
+        job_id="silence-job",
+    )
+
+    # Local path saved the file (no loop risk) and was not filtered.
+    local_result = results["local"]
+    assert local_result["success"] is True
+    saved_path = local_result["result"]["path"]
+    assert saved_path.endswith(".md")
+
+
+# --- Config round-trip ------------------------------------------------------
+
+def test_config_flag_defaults_true():
+    assert GatewayConfig().filter_silence_narration is True
+
+
+def test_config_from_dict_parses_flag():
+    cfg = GatewayConfig.from_dict({"filter_silence_narration": False})
+    assert cfg.filter_silence_narration is False
+
+
+def test_config_to_dict_roundtrip():
+    cfg = GatewayConfig(filter_silence_narration=False)
+    assert cfg.to_dict()["filter_silence_narration"] is False
+    restored = GatewayConfig.from_dict(cfg.to_dict())
+    assert restored.filter_silence_narration is False
--- a/tests/gateway/test_empty_model_recovery.py
+++ b/tests/gateway/test_empty_model_recovery.py
@ -0,0 +1,147 @@
+"""Regression tests for #35314 — empty model on the post-interrupt recovery turn.
+
+After a ``stream_interrupt_abort`` during an active gateway session, the recovery
+turn was sometimes built with ``model=""`` (a transient config-cache miss returned
+an empty ``user_config``). Every API call then failed HTTP 400 "No models
+provided", "trying fallback..." was logged but never executed (the user had no
+fallback configured), and the session went silent until the user re-sent.
+
+These tests pin two fixes:
+  1. ``_resolve_session_agent_runtime`` caches the last successfully-resolved
+     model per session and recovers it when a fresh resolution comes back empty.
+  2. ``_has_pending_fallback`` gates the "trying fallback..." status so it is only
+     announced when a fallback chain actually exists.
+"""
+
+import threading
+
+import gateway.run as gateway_run
+
+
+def _make_runner():
+    runner = object.__new__(gateway_run.GatewayRunner)
+    runner._session_model_overrides = {}
+    runner._last_resolved_model = {}
+    runner._service_tier = None
+    runner._agent_cache = {}
+    runner._agent_cache_lock = threading.Lock()
+    return runner
+
+
+def _patch_resolution(monkeypatch, *, model_from_config: str, provider: str = "openrouter"):
+    """Stub gateway model + runtime resolution to a known state."""
+    monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda cfg=None: model_from_config)
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {
+            "provider": provider,
+            "api_key": "x",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_mode": "chat_completions",
+        },
+    )
+
+
+def test_normal_turn_caches_last_resolved_model(monkeypatch):
+    _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
+    runner = _make_runner()
+    sk = "agent:main:discord:dm:123"
+
+    model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}})
+
+    assert model == "deepseek/deepseek-v4-flash"
+    # Cached per-session AND process-wide for first-seen-session recovery.
+    assert runner._last_resolved_model[sk] == "deepseek/deepseek-v4-flash"
+    assert runner._last_resolved_model["*"] == "deepseek/deepseek-v4-flash"
+
+
+def test_empty_model_recovers_session_last_good(monkeypatch):
+    runner = _make_runner()
+    sk = "agent:main:discord:dm:123"
+
+    # Turn 1: config has the model — cache it.
+    _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
+    runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}})
+
+    # Turn 2: simulate the transient empty config read (the #35314 race).
+    _patch_resolution(monkeypatch, model_from_config="", provider="")
+    model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={})
+
+    assert model == "deepseek/deepseek-v4-flash", "recovery turn must reuse last-known-good, not build model=''"
+
+
+def test_empty_model_new_session_recovers_global_last_good(monkeypatch):
+    runner = _make_runner()
+
+    # Prime a different session so the process-wide "*" slot is populated.
+    _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
+    runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:111", user_config={"model": {}})
+
+    # A brand-new session that hits an empty config read still recovers via "*".
+    _patch_resolution(monkeypatch, model_from_config="", provider="")
+    model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:999", user_config={})
+
+    assert model == "deepseek/deepseek-v4-flash"
+
+
+def test_cold_start_empty_model_does_not_crash(monkeypatch):
+    """No last-good anywhere + empty config → returns '' gracefully (no exception)."""
+    _patch_resolution(monkeypatch, model_from_config="", provider="")
+    runner = _make_runner()
+
+    model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:1", user_config={})
+
+    assert model == ""
+
+
+def test_bare_runner_without_cache_attr_does_not_crash(monkeypatch):
+    """object.__new__ runners (test helpers / pitfall #17) lack _last_resolved_model.
+
+    The getattr guard must tolerate the missing attribute.
+    """
+    _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
+    runner = object.__new__(gateway_run.GatewayRunner)
+    runner._session_model_overrides = {}
+    runner._service_tier = None
+    # Deliberately omit _last_resolved_model.
+
+    model, _ = runner._resolve_session_agent_runtime(session_key="x", user_config={"model": {}})
+
+    assert model == "deepseek/deepseek-v4-flash"
+
+
+# ── _has_pending_fallback gate ──────────────────────────────────────────────
+
+
+def _bare_agent():
+    import run_agent
+
+    return object.__new__(run_agent.AIAgent)
+
+
+def test_has_pending_fallback_empty_chain():
+    agent = _bare_agent()
+    agent._fallback_chain = []
+    agent._fallback_index = 0
+    assert agent._has_pending_fallback() is False
+
+
+def test_has_pending_fallback_with_chain():
+    agent = _bare_agent()
+    agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}]
+    agent._fallback_index = 0
+    assert agent._has_pending_fallback() is True
+
+
+def test_has_pending_fallback_exhausted_chain():
+    agent = _bare_agent()
+    agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}]
+    agent._fallback_index = 1
+    assert agent._has_pending_fallback() is False
+
+
+def test_has_pending_fallback_missing_attrs():
+    """Bare agent with no fallback attributes set must default to False, not crash."""
+    agent = _bare_agent()
+    assert agent._has_pending_fallback() is False
--- a/tests/gateway/test_extract_local_files.py
+++ b/tests/gateway/test_extract_local_files.py
@ -336,9 +336,35 @@ class TestEdgeCases:
        paths, _ = _extract("File at /tmp/my file.png here")
        assert paths == []

-    def test_windows_path_not_matched(self):
-        """Windows-style paths should not match."""
-        paths, _ = _extract("See C:\\Users\\test\\image.png")
+    @pytest.mark.parametrize(
+        "content,expected",
+        [
+            # Backslash separators (native Windows style)
+            ("See C:\\Users\\test\\image.png here", "C:\\Users\\test\\image.png"),
+            # Forward slashes with drive letter (common in cross-platform code)
+            ("See C:/Users/test/image.png here", "C:/Users/test/image.png"),
+            # Non-C: drive
+            ("Video at D:/data/clip.mp4 ready", "D:/data/clip.mp4"),
+            # Lowercase drive letter
+            ("Path e:/audio/track.mp3 done", "e:/audio/track.mp3"),
+        ],
+    )
+    def test_windows_drive_letter_paths_matched(self, content, expected):
+        """Windows drive-letter paths (C:/..., C:\\...) must be detected (#34632).
+
+        Prior behavior anchored on (?:~/|/) only, which silently dropped
+        Windows absolute paths so the agent's bare-path references were
+        sent as text instead of native uploads.
+        """
+        paths, cleaned = _extract(content)
+        assert paths == [expected]
+        assert expected not in cleaned
+
+    def test_relative_windows_path_not_matched(self):
+        """A bare Windows-style filename without a drive letter must still
+        not match (e.g. ``foo\\bar.png`` is treated as relative, like its
+        Unix sibling ``foo/bar.png``)."""
+        paths, _ = _extract("File at foo\\bar.png here")
        assert paths == []

    def test_relative_path_not_matched(self):
--- a/tests/gateway/test_platform_base.py
+++ b/tests/gateway/test_platform_base.py
@ -361,6 +361,45 @@ class TestExtractMedia:
        assert "[[audio_as_voice]]" not in cleaned
        assert "[[as_document]]" not in cleaned

+    # Windows path support — regression coverage for #34632
+
+    def test_media_tag_windows_backslash_path(self):
+        """extract_media should recognise Windows backslash paths."""
+        media, cleaned = BasePlatformAdapter.extract_media(
+            r"MEDIA:C:\Users\kotsu\file.pdf"
+        )
+        assert len(media) == 1
+        assert media[0][0].endswith("file.pdf")
+
+    def test_media_tag_windows_forward_slash_path(self):
+        """extract_media should recognise Windows forward-slash paths."""
+        media, cleaned = BasePlatformAdapter.extract_media(
+            "MEDIA:C:/Users/kotsu/file.pdf"
+        )
+        assert len(media) == 1
+        assert media[0][0].endswith("file.pdf")
+
+    def test_media_tag_windows_drive_root(self):
+        """extract_media should recognise a path at the drive root."""
+        media, cleaned = BasePlatformAdapter.extract_media(
+            r"MEDIA:D:\report.md"
+        )
+        assert len(media) == 1
+        assert media[0][0].endswith("report.md")
+
+    def test_media_tag_unix_paths_still_work(self):
+        """Unix absolute and tilde paths must still extract after Windows change."""
+        for content in ["MEDIA:/tmp/audio.ogg", r"MEDIA:~/docs/notes.md"]:
+            media, _ = BasePlatformAdapter.extract_media(content)
+            assert len(media) == 1, f"Failed for: {content}"
+
+    def test_relative_path_still_ignored(self):
+        """Relative Windows-style paths (no drive letter) must not match."""
+        media, _ = BasePlatformAdapter.extract_media(
+            r"MEDIA:Users\kotsu\file.pdf"
+        )
+        assert media == []
+

 class TestMediaExtensionAllowlistParity:
    """Regression coverage for issue #34517 — the MEDIA: extension black hole.
--- a/tests/gateway/test_platform_reconnect.py
+++ b/tests/gateway/test_platform_reconnect.py
@ -294,19 +294,20 @@ class TestPlatformReconnectWatcher:
        assert runner._failed_platforms[Platform.TELEGRAM]["attempts"] == 2

    @pytest.mark.asyncio
-    async def test_reconnect_pauses_after_circuit_breaker_threshold(self):
-        """After enough consecutive retryable failures, the watcher should
-        *pause* the platform (keep it in the queue but stop hammering it),
-        not drop it. The user resumes via /platform resume.
+    async def test_reconnect_never_auto_pauses_retryable_failures(self):
+        """Retryable failures (network/DNS) must keep retrying indefinitely —
+        the watcher must NOT auto-pause them. Auto-pausing a transiently-failed
+        platform left bots silently dead after a DNS blip (#35284). The pause
+        circuit breaker remains available for manual /platform pause only.
        """
        runner = _make_runner()

        platform_config = PlatformConfig(enabled=True, token="test")
-        # 9 prior attempts — the next failure will be the 10th and should
-        # trip the circuit breaker.
+        # Far past the old circuit-breaker threshold (10): even after many
+        # consecutive retryable failures the platform must stay unpaused.
        runner._failed_platforms[Platform.TELEGRAM] = {
            "config": platform_config,
-            "attempts": 9,
+            "attempts": 25,
            "next_retry": time.monotonic() - 1,
        }

@ -332,12 +333,15 @@ class TestPlatformReconnectWatcher:

            await run_one_iteration()

-        # Platform stays in queue — paused, not dropped
+        # Platform stays in queue and keeps retrying — never auto-paused.
        assert Platform.TELEGRAM in runner._failed_platforms
        info = runner._failed_platforms[Platform.TELEGRAM]
-        assert info["paused"] is True
-        assert info["attempts"] == 10
-        assert "pause_reason" in info
+        assert info.get("paused") is not True
+        assert "pause_reason" not in info
+        assert info["attempts"] == 26
+        # next_retry is pushed out by the backoff (capped at 300s), not inf.
+        assert info["next_retry"] != float("inf")
+        assert info["next_retry"] > time.monotonic()

    @pytest.mark.asyncio
    async def test_reconnect_skips_paused_platforms(self):
--- a/tests/gateway/test_run_tool_media_re.py
+++ b/tests/gateway/test_run_tool_media_re.py
@ -0,0 +1,147 @@
+r"""Tests for _TOOL_MEDIA_RE regex patterns in gateway/run.py.
+
+Issue #34632: The _TOOL_MEDIA_RE patterns in GatewayRunner used (?:/|~\/) to
+anchor paths, which only matched Unix-style absolute and home-relative paths.
+Windows absolute paths (C:\\Users\\..., D:/...) were silently ignored, causing
+MEDIA directive delivery to fail on Windows.
+
+Fix: Add [A-Za-z]:[/\\\\] as a third anchor alternative in both patterns.
+
+Two identical _TOOL_MEDIA_RE patterns exist in run.py:
+1. History scanning (~L17223): collects already-seen media paths
+2. Result scanning (~L17549): extracts new media tags from agent output
+
+This test file validates that both equivalent regex patterns correctly match
+Windows paths while preserving existing Unix path matching behavior.
+"""
+
+import re
+
+import pytest
+
+
+# Reconstruct the exact _TOOL_MEDIA_RE pattern from gateway/run.py
+# The pattern is built by concatenating raw string parts:
+#   r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|...))'
+_TOOL_MEDIA_RE = re.compile(
+    r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
+    r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
+    r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
+    r'txt|csv|apk|ipa))',
+    re.IGNORECASE,
+)
+
+
+# Reconstruct the pre-fix pattern (without Windows anchor) for regression proof
+_TOOL_MEDIA_RE_PRE_FIX = re.compile(
+    r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
+    r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
+    r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
+    r'txt|csv|apk|ipa))',
+    re.IGNORECASE,
+)
+
+
+class TestToolMediaReWindowsPaths:
+    """Issue #34632: _TOOL_MEDIA_RE must match Windows absolute paths."""
+
+    # ── Positive: Windows paths now match ──────────────────────────
+
+    @pytest.mark.parametrize("media_tag, expected_path", [
+        # Windows backslash paths
+        ("MEDIA:C:\\Users\\test\\image.png", "C:\\Users\\test\\image.png"),
+        ("MEDIA:D:\\data\\report.pdf", "D:\\data\\report.pdf"),
+        ("MEDIA:E:\\Photos\\vacation.jpg", "E:\\Photos\\vacation.jpg"),
+        # Windows forward-slash paths
+        ("MEDIA:C:/Users/test/image.png", "C:/Users/test/image.png"),
+        ("MEDIA:D:/data/report.pdf", "D:/data/report.pdf"),
+        # Mixed separators
+        ("MEDIA:C:\\Users/test\\image.webp", "C:\\Users/test\\image.webp"),
+        # Various extensions
+        ("MEDIA:F:\\videos\\clip.mp4", "F:\\videos\\clip.mp4"),
+        ("MEDIA:G:\\audio\\song.mp3", "G:\\audio\\song.mp3"),
+        ("MEDIA:H:\\docs\\sheet.xlsx", "H:\\docs\\sheet.xlsx"),
+        ("MEDIA:Z:\\archive\\backup.zip", "Z:\\archive\\backup.zip"),
+    ])
+    def test_windows_paths_match(self, media_tag, expected_path):
+        """Windows absolute paths with drive letters are matched."""
+        match = _TOOL_MEDIA_RE.search(media_tag)
+        assert match is not None, f"Should match: {media_tag}"
+        assert match.group(1) == expected_path
+
+    # ── Positive: Unix paths still match ───────────────────────────
+
+    @pytest.mark.parametrize("media_tag, expected_path", [
+        ("MEDIA:/tmp/output.png", "/tmp/output.png"),
+        ("MEDIA:/var/log/report.pdf", "/var/log/report.pdf"),
+        ("MEDIA:/home/user/docs/file.txt", "/home/user/docs/file.txt"),
+        # Home-relative
+        ("MEDIA:~/Downloads/image.jpg", "~/Downloads/image.jpg"),
+        ("MEDIA:~/Documents/report.pdf", "~/Documents/report.pdf"),
+    ])
+    def test_unix_paths_still_match(self, media_tag, expected_path):
+        """Unix-style absolute and home-relative paths still match."""
+        match = _TOOL_MEDIA_RE.search(media_tag)
+        assert match is not None, f"Should match: {media_tag}"
+        assert match.group(1) == expected_path
+
+    # ── Negative: invalid paths don't match ────────────────────────
+
+    @pytest.mark.parametrize("text", [
+        "No MEDIA tag here",
+        "MEDIA:relative/path/file.png",       # relative path, no anchor
+        "MEDIA:file.png",                      # no directory
+        "MEDIA:C:file.png",                    # drive letter but no separator
+        "MEDIA:/path/to/file.unknown",         # unsupported extension
+        "MEDIA:/path/to/file",                 # no extension
+        "MEDIA:",                               # empty path
+    ])
+    def test_invalid_paths_dont_match(self, text):
+        """Non-MEDIA text, relative paths, and unsupported extensions are ignored."""
+        match = _TOOL_MEDIA_RE.search(text)
+        assert match is None, f"Should NOT match: {text}"
+
+    # ── Negative/preserved: old pattern rejects Windows paths ──────
+
+    @pytest.mark.parametrize("media_tag", [
+        "MEDIA:C:\\Users\\test\\image.png",
+        "MEDIA:D:/data/report.pdf",
+        "MEDIA:C:\\path\\file.jpg",
+    ])
+    def test_pre_fix_pattern_rejects_windows(self, media_tag):
+        """The pre-fix pattern (without Windows anchor) does NOT match Windows paths.
+        This proves the fix is necessary — without it, these paths are silently ignored."""
+        match = _TOOL_MEDIA_RE_PRE_FIX.search(media_tag)
+        assert match is None, f"Pre-fix pattern should NOT match: {media_tag}"
+
+    # ── Edge cases ─────────────────────────────────────────────────
+
+    def test_multiple_media_tags_in_content(self):
+        """Multiple MEDIA tags in the same content are all found."""
+        content = (
+            "Some text MEDIA:C:\\path\\img.png and more MEDIA:/tmp/out.pdf trailing"
+        )
+        matches = list(_TOOL_MEDIA_RE.finditer(content))
+        assert len(matches) == 2
+        paths = [m.group(1) for m in matches]
+        assert "C:\\path\\img.png" in paths
+        assert "/tmp/out.pdf" in paths
+
+    def test_case_insensitive_drive_letter(self):
+        """Drive letters are case-insensitive due to re.IGNORECASE."""
+        match_lower = _TOOL_MEDIA_RE.search("MEDIA:c:\\path\\file.png")
+        match_upper = _TOOL_MEDIA_RE.search("MEDIA:C:\\path\\file.png")
+        assert match_lower is not None
+        assert match_upper is not None
+        assert match_lower.group(1).lower() == match_upper.group(1).lower()
+
+    @pytest.mark.parametrize("media_tag", [
+        "MEDIA:C:\\path\\file.jpeg",
+        "MEDIA:C:\\path\\file.JPG",
+        "MEDIA:C:\\path\\file.GIF",
+        "MEDIA:C:\\path\\file.MP4",
+    ])
+    def test_case_insensitive_extensions(self, media_tag):
+        """File extensions are matched case-insensitively."""
+        match = _TOOL_MEDIA_RE.search(media_tag)
+        assert match is not None, f"Should match: {media_tag}"
--- a/tests/gateway/test_status_command.py
+++ b/tests/gateway/test_status_command.py
@ -97,7 +97,7 @@ async def test_status_command_reports_running_agent_without_interrupt(monkeypatc
    result = await runner._handle_message(_make_event("/status"))

    assert "**Session ID:** `sess-1`" in result
-    assert "**Tokens:** 321" in result
+    assert "**Cumulative API tokens (re-sent each call):** 321" in result
    assert "**Agent Running:** Yes ⚡" in result
    assert "**Title:**" not in result
    running_agent.interrupt.assert_not_called()
@ -150,7 +150,7 @@ async def test_status_command_reads_token_totals_from_session_db():
    result = await runner._handle_message(_make_event("/status"))

    # 1000 + 250 + 500 + 100 + 50 = 1,900
-    assert "**Tokens:** 1,900" in result
+    assert "**Cumulative API tokens (re-sent each call):** 1,900" in result


@pytest.mark.asyncio
@ -171,7 +171,7 @@ async def test_status_command_tokens_zero_when_session_db_row_missing():

    result = await runner._handle_message(_make_event("/status"))

-    assert "**Tokens:** 0" in result
+    assert "**Cumulative API tokens (re-sent each call):** 0" in result


@pytest.mark.asyncio
--- a/tests/gateway/test_telegram_model_picker.py
+++ b/tests/gateway/test_telegram_model_picker.py
@ -146,6 +146,78 @@ class TestTelegramModelPicker:
        # State is cleaned up after a successful switch.
        assert "12345" not in adapter._model_picker_state

+    @pytest.mark.asyncio
+    async def test_provider_group_folds_and_drills_down(self, monkeypatch):
+        """A provider family (e.g. MiniMax) collapses to one mpg: button at
+        the top level; tapping it expands to its authenticated members as
+        mp: buttons. A group reduced to a single authenticated member shows
+        no submenu (direct mp: button).
+
+        Inspects callback_data by recording every InlineKeyboardButton built,
+        which is robust to whether `telegram` is the real SDK or the module
+        mock (the SDK markup objects don't expose a plain iterable under the
+        mock)."""
+        import gateway.platforms.telegram as tg
+
+        built: list = []
+
+        class _RecordingButton:
+            def __init__(self, text, callback_data=None, **kw):
+                self.text = text
+                self.callback_data = callback_data
+                built.append(callback_data)
+
+        class _RecordingMarkup:
+            def __init__(self, rows):
+                self.inline_keyboard = rows
+
+        monkeypatch.setattr(tg, "InlineKeyboardButton", _RecordingButton)
+        monkeypatch.setattr(tg, "InlineKeyboardMarkup", _RecordingMarkup)
+
+        adapter = _make_adapter()
+
+        async def mock_send_message(**kwargs):
+            return SimpleNamespace(message_id=101)
+
+        adapter._bot.send_message = AsyncMock(side_effect=mock_send_message)
+
+        providers = [
+            {"slug": "minimax", "name": "MiniMax", "total_models": 2},
+            {"slug": "minimax-cn", "name": "MiniMax (China)", "total_models": 3},
+            {"slug": "xai", "name": "xAI", "total_models": 1},  # lone group member
+        ]
+
+        await adapter.send_model_picker(
+            chat_id="12345",
+            providers=providers,
+            current_model="m",
+            current_provider="minimax",
+            session_key="s",
+            on_model_selected=AsyncMock(),
+            metadata=None,
+        )
+
+        # Top-level keyboard: MiniMax family folded into one group button;
+        # xai (lone member) degraded to a direct provider button.
+        assert "mpg:minimax" in built
+        assert "mp:xai" in built
+        assert "mp:minimax" not in built
+        assert "mp:minimax-cn" not in built
+
+        # Drill into the MiniMax group → members appear as mp: buttons + back.
+        built.clear()
+        query = AsyncMock()
+        query.message = MagicMock()
+        query.message.chat_id = 12345
+        query.answer = AsyncMock()
+        query.edit_message_text = AsyncMock()
+
+        await adapter._handle_model_picker_callback(query, "mpg:minimax", "12345")
+
+        assert "mp:minimax" in built
+        assert "mp:minimax-cn" in built
+        assert "mb" in built  # back-to-providers button present
+
    @pytest.mark.asyncio
    async def test_retries_without_thread_when_thread_not_found(self):
        adapter = _make_adapter()
--- a/tests/gateway/test_weixin.py
+++ b/tests/gateway/test_weixin.py
@ -11,6 +11,7 @@ import pytest
 from gateway.config import PlatformConfig
 from gateway.config import GatewayConfig, HomeChannel, Platform, _apply_env_overrides
 from gateway.platforms.base import SendResult
+from gateway.platforms.base import MessageEvent, MessageType
 from gateway.platforms import weixin
 from gateway.platforms.weixin import ContextTokenStore, WeixinAdapter
 from tools.send_message_tool import _parse_target_ref, _send_to_platform
@ -853,15 +854,27 @@ class TestWeixinContentDedup:
        adapter = _make_adapter()
        adapter._poll_session = object()
        adapter.handle_message = AsyncMock()
+        # Tighten the text-debounce delay so the flush completes quickly.
+        adapter._text_batch_delay_seconds = 0.05
+        adapter._text_batch_split_delay_seconds = 0.05

        base_msg = {
            "from_user_id": "wxid_user1",
            "item_list": [{"type": 1, "text_item": {"text": "hello world"}}],
        }

-        asyncio.run(adapter._process_message({**base_msg, "message_id": "msg-1"}))
-        asyncio.run(adapter._process_message({**base_msg, "message_id": "msg-2"}))
+        async def _drive():
+            # Both inbound messages share the same event loop so the debounce
+            # task created by the first one survives to be flushed.
+            await adapter._process_message({**base_msg, "message_id": "msg-1"})
+            await adapter._process_message({**base_msg, "message_id": "msg-2"})
+            # Wait out the quiet period so the buffered text batch flushes.
+            await asyncio.sleep(0.2)

+        asyncio.run(_drive())
+
+        # Content-dedup drops the second (duplicate) message before it is even
+        # enqueued, so only one combined dispatch reaches handle_message.
        assert adapter.handle_message.await_count == 1
        event = adapter.handle_message.await_args[0][0]
        assert event.text == "hello world"
@ -882,3 +895,76 @@ class TestWeixinContentDedup:
        assert adapter.handle_message.await_count == 0
        # is_duplicate should only be called for message_id, never for content
        assert all("content:" not in str(call) for call in adapter._dedup.is_duplicate.call_args_list)
+
+
+class TestWeixinTextDebounce:
+    """Text-debounce batching for rapid multi-message bursts (issue #35301).
+
+    Delays are read from ``config.extra`` (config.yaml), not env vars.
+    """
+
+    def test_batch_delays_default_from_config(self):
+        adapter = _make_adapter()
+        assert adapter._text_batch_delay_seconds == 3.0
+        assert adapter._text_batch_split_delay_seconds == 5.0
+
+    def test_batch_delays_overridden_via_config_extra(self):
+        adapter = WeixinAdapter(
+            PlatformConfig(
+                enabled=True,
+                token="test-token",
+                extra={
+                    "account_id": "test-account",
+                    "text_batch_delay_seconds": "0.5",
+                    "text_batch_split_delay_seconds": 1.5,
+                },
+            )
+        )
+        assert adapter._text_batch_delay_seconds == 0.5
+        assert adapter._text_batch_split_delay_seconds == 1.5
+
+    def test_invalid_config_value_falls_back_to_default(self):
+        adapter = WeixinAdapter(
+            PlatformConfig(
+                enabled=True,
+                token="test-token",
+                extra={
+                    "account_id": "test-account",
+                    "text_batch_delay_seconds": "not-a-number",
+                    "text_batch_split_delay_seconds": -4,
+                },
+            )
+        )
+        assert adapter._text_batch_delay_seconds == 3.0
+        assert adapter._text_batch_split_delay_seconds == 5.0
+
+    def test_rapid_texts_collapse_into_single_dispatch(self):
+        adapter = _make_adapter()
+        adapter._text_batch_delay_seconds = 0.05
+        adapter._text_batch_split_delay_seconds = 0.05
+        dispatched = []
+
+        async def _capture(event):
+            dispatched.append(event.text)
+
+        adapter.handle_message = _capture
+
+        def _event(text):
+            return MessageEvent(
+                text=text,
+                message_type=MessageType.TEXT,
+                source=adapter.build_source(
+                    chat_id="wxid_user1", chat_type="dm",
+                    user_id="wxid_user1", user_name="wxid_user1",
+                ),
+            )
+
+        async def _drive():
+            adapter._enqueue_text_event(_event("one"))
+            adapter._enqueue_text_event(_event("two"))
+            adapter._enqueue_text_event(_event("three"))
+            assert dispatched == []  # nothing flushed during the burst
+            await asyncio.sleep(0.2)
+
+        asyncio.run(_drive())
+        assert dispatched == ["one\ntwo\nthree"]
--- a/tests/gateway/test_whatsapp_text_batching.py
+++ b/tests/gateway/test_whatsapp_text_batching.py
@ -0,0 +1,107 @@
+"""Text-debounce batching for the WhatsApp adapter (issue #35301).
+
+WhatsApp delivers rapid multi-message bursts (forwarded batches, paste-splits)
+individually.  Without debounce each fragment triggers a separate agent
+invocation, wasting tokens and flooding the user with reply fragments.  This
+mirrors the Telegram/WeCom/Feishu pattern.
+
+Batch delays are read from ``config.extra`` (config.yaml), not env vars.
+"""
+
+import asyncio
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent, MessageType
+from gateway.platforms.whatsapp import WhatsAppAdapter
+from gateway.session import SessionSource
+
+
+def _make_adapter(**extra):
+    base = {"session_name": "test"}
+    base.update(extra)
+    return WhatsAppAdapter(PlatformConfig(enabled=True, extra=base))
+
+
+def _event(text):
+    src = SessionSource(
+        platform=Platform.WHATSAPP,
+        chat_id="chat123",
+        chat_type="dm",
+        user_id="user1",
+        user_name="tester",
+    )
+    return MessageEvent(text=text, message_type=MessageType.TEXT, source=src)
+
+
+def test_batch_delays_default_from_config():
+    adapter = _make_adapter()
+    assert adapter._text_batch_delay_seconds == 5.0
+    assert adapter._text_batch_split_delay_seconds == 10.0
+
+
+def test_batch_delays_overridden_via_config_extra():
+    adapter = _make_adapter(
+        text_batch_delay_seconds="2.5",
+        text_batch_split_delay_seconds=7,
+    )
+    assert adapter._text_batch_delay_seconds == 2.5
+    assert adapter._text_batch_split_delay_seconds == 7.0
+
+
+def test_invalid_config_value_falls_back_to_default():
+    adapter = _make_adapter(
+        text_batch_delay_seconds="garbage",
+        text_batch_split_delay_seconds=-3,
+    )
+    assert adapter._text_batch_delay_seconds == 5.0
+    assert adapter._text_batch_split_delay_seconds == 10.0
+
+
+def test_env_var_is_ignored(monkeypatch):
+    # Config-only path: the legacy HERMES_* env var must NOT influence delays.
+    monkeypatch.setenv("HERMES_WHATSAPP_TEXT_BATCH_DELAY_SECONDS", "99")
+    adapter = _make_adapter()
+    assert adapter._text_batch_delay_seconds == 5.0
+
+
+def test_rapid_texts_collapse_into_single_dispatch():
+    adapter = _make_adapter(
+        text_batch_delay_seconds=0.05,
+        text_batch_split_delay_seconds=0.05,
+    )
+    dispatched = []
+
+    async def _capture(event):
+        dispatched.append(event.text)
+
+    adapter.handle_message = _capture
+
+    async def _drive():
+        adapter._enqueue_text_event(_event("one"))
+        adapter._enqueue_text_event(_event("two"))
+        adapter._enqueue_text_event(_event("three"))
+        assert dispatched == []  # nothing flushed during the burst
+        await asyncio.sleep(0.2)
+
+    asyncio.run(_drive())
+    assert dispatched == ["one\ntwo\nthree"]
+
+
+def test_lone_message_dispatched_alone():
+    adapter = _make_adapter(
+        text_batch_delay_seconds=0.05,
+        text_batch_split_delay_seconds=0.05,
+    )
+    dispatched = []
+
+    async def _capture(event):
+        dispatched.append(event.text)
+
+    adapter.handle_message = _capture
+
+    async def _drive():
+        adapter._enqueue_text_event(_event("solo"))
+        await asyncio.sleep(0.2)
+
+    asyncio.run(_drive())
+    assert dispatched == ["solo"]
--- a/tests/hermes_cli/test_cmd_update.py
+++ b/tests/hermes_cli/test_cmd_update.py
@ -39,6 +39,45 @@ def mock_args():
    return SimpleNamespace()


+class TestCmdUpdatePip:
+    """Regression tests for pip-install update flows."""
+
+    @patch("shutil.which", return_value="/usr/bin/uv")
+    @patch("subprocess.run")
+    def test_update_pip_exports_virtualenv_from_sys_prefix(
+        self, mock_run, _mock_which, mock_args, monkeypatch
+    ):
+        from hermes_cli import main as hm
+
+        mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        monkeypatch.delenv("VIRTUAL_ENV", raising=False)
+        monkeypatch.setattr(hm.sys, "prefix", "/tmp/hermes-launcher-venv")
+        monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
+
+        hm._cmd_update_pip(mock_args)
+
+        assert mock_run.call_count == 1
+        assert mock_run.call_args.args[0] == ["/usr/bin/uv", "pip", "install", "--upgrade", "hermes-agent"]
+        assert mock_run.call_args.kwargs["env"]["VIRTUAL_ENV"] == "/tmp/hermes-launcher-venv"
+
+    @patch("shutil.which", return_value="/usr/bin/uv")
+    @patch("subprocess.run")
+    def test_update_pip_does_not_export_virtualenv_for_system_python(
+        self, mock_run, _mock_which, mock_args, monkeypatch
+    ):
+        from hermes_cli import main as hm
+
+        mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        monkeypatch.delenv("VIRTUAL_ENV", raising=False)
+        monkeypatch.setattr(hm.sys, "prefix", "/usr")
+        monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
+
+        hm._cmd_update_pip(mock_args)
+
+        assert mock_run.call_count == 1
+        assert "env" not in mock_run.call_args.kwargs
+
+
 class TestCmdUpdateBranchFallback:
    """cmd_update falls back to main when current branch has no remote counterpart."""

--- a/tests/hermes_cli/test_copilot_in_model_list.py
+++ b/tests/hermes_cli/test_copilot_in_model_list.py
@ -6,25 +6,6 @@ from unittest.mock import patch
 from hermes_cli.model_switch import list_authenticated_providers


-@patch.dict(os.environ, {"GH_TOKEN": "test-key"}, clear=False)
-def test_copilot_picker_keeps_curated_copilot_models_when_live_catalog_unavailable():
-    with patch("agent.models_dev.fetch_models_dev", return_value={}), \
-         patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \
-         patch("hermes_cli.models._fetch_github_models", return_value=None):
-        providers = list_authenticated_providers(current_provider="openrouter", max_models=50)
-
-    copilot = next((p for p in providers if p["slug"] == "copilot"), None)
-
-    assert copilot is not None
-    assert "gpt-5.4" in copilot["models"]
-    assert "claude-sonnet-4.6" in copilot["models"]
-    assert "claude-sonnet-4" in copilot["models"]
-    assert "claude-sonnet-4.5" in copilot["models"]
-    assert "claude-haiku-4.5" in copilot["models"]
-    assert "gemini-3.1-pro-preview" in copilot["models"]
-    assert "claude-opus-4.6" not in copilot["models"]
-
-
@patch.dict(os.environ, {"GH_TOKEN": "test-key"}, clear=False)
 def test_copilot_picker_uses_live_catalog_when_available():
    live_models = ["gpt-5.4", "claude-sonnet-4.6", "gemini-3.1-pro-preview"]
--- a/tests/hermes_cli/test_dashboard_auth_ws_auth.py
+++ b/tests/hermes_cli/test_dashboard_auth_ws_auth.py
@ -80,6 +80,25 @@ def loopback_app():
    web_server.app.state.auth_required = prev_required


+@pytest.fixture
+def insecure_public_app():
+    """web_server.app configured for all-interfaces insecure mode."""
+    _reset_for_tests()
+    clear_providers()
+    prev_host = getattr(web_server.app.state, "bound_host", None)
+    prev_port = getattr(web_server.app.state, "bound_port", None)
+    prev_required = getattr(web_server.app.state, "auth_required", None)
+    web_server.app.state.bound_host = "0.0.0.0"
+    web_server.app.state.bound_port = 9120
+    web_server.app.state.auth_required = False
+    client = TestClient(web_server.app, base_url="http://192.168.0.222:9120")
+    yield client
+    _reset_for_tests()
+    web_server.app.state.bound_host = prev_host
+    web_server.app.state.bound_port = prev_port
+    web_server.app.state.auth_required = prev_required
+
+
 def _logged_in(client: TestClient) -> None:
    """Drive the stub OAuth round trip so the client holds session cookies."""
    r1 = client.get("/auth/login?provider=stub", follow_redirects=False)
@ -143,6 +162,30 @@ class TestWsTicketEndpoint:
 # ---------------------------------------------------------------------------


+@pytest.fixture
+def insecure_explicit_host_app():
+    """web_server.app bound to an explicit non-loopback host (--insecure).
+
+    Models `--host 100.64.0.10 --insecure` (e.g. a Tailscale IP behind
+    `tailscale serve`) — a specific address rather than the all-interfaces
+    0.0.0.0 wildcard.
+    """
+    _reset_for_tests()
+    clear_providers()
+    prev_host = getattr(web_server.app.state, "bound_host", None)
+    prev_port = getattr(web_server.app.state, "bound_port", None)
+    prev_required = getattr(web_server.app.state, "auth_required", None)
+    web_server.app.state.bound_host = "100.64.0.10"
+    web_server.app.state.bound_port = 9119
+    web_server.app.state.auth_required = False
+    client = TestClient(web_server.app, base_url="http://100.64.0.10:9119")
+    yield client
+    _reset_for_tests()
+    web_server.app.state.bound_host = prev_host
+    web_server.app.state.bound_port = prev_port
+    web_server.app.state.auth_required = prev_required
+
+
 def _fake_ws(*, query: dict, client_host: str = "127.0.0.1", path: str = "/api/pty"):
    """Build a stand-in for starlette.WebSocket good enough for _ws_auth_ok."""

@ -281,6 +324,48 @@ class TestWsRequestIsAllowedGated:
        ws.headers = {"host": "127.0.0.1:8080"}
        assert web_server._ws_request_is_allowed(ws) is True

+    def test_non_loopback_peer_allowed_in_insecure_public_mode(self, insecure_public_app):
+        """`--host 0.0.0.0 --insecure` is an explicit LAN/public opt-in.
+
+        Regression coverage for the dashboard `/chat` breakage where the
+        HTML shell loaded on 9120 but every WebSocket upgrade was rejected
+        with 403 because the loopback-only peer guard still ran even though
+        the operator intentionally exposed the dashboard on all interfaces.
+        """
+        ws = _fake_ws(query={}, client_host="192.168.0.55")
+        ws.headers = {
+            "host": "192.168.0.222:9120",
+            "origin": "http://192.168.0.222:9120",
+        }
+        assert web_server._ws_request_is_allowed(ws) is True
+
+    def test_peer_allowed_on_explicit_non_loopback_bind(self, insecure_explicit_host_app):
+        """`--host 100.64.0.10 --insecure` (Tailscale/LAN IP) is an explicit
+        non-loopback opt-in too — not just the 0.0.0.0 wildcard.
+
+        Regression coverage: the merged 0.0.0.0/:: fix did not cover binding
+        directly to a specific tailnet/LAN address, so `/chat` HTML loaded but
+        WS upgrades were still rejected by the loopback-only peer guard.
+        """
+        ws = _fake_ws(query={}, client_host="100.64.0.99")
+        ws.headers = {
+            "host": "100.64.0.10:9119",
+            "origin": "http://100.64.0.10:9119",
+        }
+        assert web_server._ws_request_is_allowed(ws) is True
+
+    def test_rebinding_host_rejected_on_explicit_non_loopback_bind(
+        self, insecure_explicit_host_app
+    ):
+        """Lifting the peer-IP gate for an explicit bind must NOT lift the
+        DNS-rebinding Host guard: a mismatched Host header is still rejected,
+        because an explicit non-loopback bind requires an exact Host match in
+        `_is_accepted_host` (unlike the 0.0.0.0 wildcard, which accepts any).
+        """
+        ws = _fake_ws(query={}, client_host="100.64.0.99")
+        ws.headers = {"host": "evil.example.com"}
+        assert web_server._ws_request_is_allowed(ws) is False
+
    def test_host_origin_guard_still_runs_in_gated_mode(self, gated_app):
        """Bypassing the peer-IP check must not bypass the DNS-rebinding
        Host header guard — that one still protects against attacker
--- a/tests/hermes_cli/test_gmi_provider.py
+++ b/tests/hermes_cli/test_gmi_provider.py
@ -80,14 +80,6 @@ class TestGmiConfigRegistry:


 class TestGmiModelCatalog:
-    def test_static_model_fallback_exists(self):
-        assert "gmi" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["gmi"]
-        assert "zai-org/GLM-5.1-FP8" in models
-        assert "deepseek-ai/DeepSeek-V3.2" in models
-        assert "moonshotai/Kimi-K2.5" in models
-        assert "anthropic/claude-sonnet-4.6" in models
-
    def test_canonical_provider_entry(self):
        slugs = [p.slug for p in CANONICAL_PROVIDERS]
        assert "gmi" in slugs
@ -267,11 +259,6 @@ class TestGmiModelMetadata:


 class TestGmiAuxiliary:
-    def test_aux_default_model(self):
-        from agent.auxiliary_client import _get_aux_model_for_provider
-
-        assert _get_aux_model_for_provider("gmi") == "google/gemini-3.1-flash-lite-preview"
-
    def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
        monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")

--- a/tests/hermes_cli/test_kanban_blocked_sticky.py
+++ b/tests/hermes_cli/test_kanban_blocked_sticky.py
@ -106,20 +106,30 @@ def test_worker_block_on_child_with_done_parents_is_still_sticky(kanban_home: Pa

 def test_circuit_breaker_block_still_auto_promotes(kanban_home: Path) -> None:
    """A child that was put into ``blocked`` *without* a worker-issued
-    ``kanban_block`` (e.g. circuit-breaker after repeated spawn
-    failures, manual DB triage) must still get auto-promoted when its
-    parents complete — preserves the pre-#28712 recovery semantics."""
+    ``kanban_block`` (e.g. a transient crash, manual DB triage) and whose
+    ``consecutive_failures`` is still *below* the circuit-breaker limit
+    must get auto-promoted when its parents complete — preserves the
+    pre-#28712 recovery semantics for genuinely transient failures.
+
+    The complementary case — a block whose failure count has *reached*
+    the limit must stay blocked — is covered by
+    ``test_kanban_db.py::test_recompute_ready_skips_tasks_at_failure_limit``
+    (#35072).  Together they pin the contract: ``recompute_ready`` defers
+    the give-up decision to the same effective limit the breaker uses, so
+    the two never disagree.
+    """
    with kb.connect() as conn:
        parent = kb.create_task(conn, title="parent")
        child = kb.create_task(conn, title="child", parents=[parent])
        kb.complete_task(conn, parent, result="ok")

-        # Simulate a circuit-breaker / direct triage that flips status
-        # without emitting a ``blocked`` event — exactly what
-        # ``_record_task_failure`` does after a ``gave_up``.
+        # Simulate a transient circuit-breaker / direct triage that flips
+        # status without emitting a ``blocked`` event — exactly what
+        # ``_record_task_failure`` does below the limit.  One failure is
+        # under the default limit (2), so recovery is still correct.
        conn.execute(
-            "UPDATE tasks SET status='blocked', consecutive_failures=5, "
-            "last_failure_error='persistent error' WHERE id=?",
+            "UPDATE tasks SET status='blocked', consecutive_failures=1, "
+            "last_failure_error='transient error' WHERE id=?",
            (child,),
        )
        conn.commit()
@ -128,8 +138,9 @@ def test_circuit_breaker_block_still_auto_promotes(kanban_home: Path) -> None:
        assert promoted == 1
        task = kb.get_task(conn, child)
        assert task.status == "ready"
-        assert task.consecutive_failures == 0
-        assert task.last_failure_error is None
+        # Counter is preserved across recovery (not reset) so the breaker
+        # can still accumulate if the task keeps failing (#35072).
+        assert task.consecutive_failures == 1


 def test_gave_up_event_alone_does_not_make_block_sticky(kanban_home: Path) -> None:
--- a/tests/hermes_cli/test_kanban_db.py
+++ b/tests/hermes_cli/test_kanban_db.py
@ -307,7 +307,8 @@ def test_recompute_ready_cascades_through_chain(kanban_home):


 def test_recompute_ready_promotes_blocked_with_done_parents(kanban_home):
-    """blocked tasks with all parents done should be promoted to ready."""
+    """blocked tasks with all parents done should be promoted to ready,
+    unless the circuit-breaker failure limit has been reached."""
    with kb.connect() as conn:
        parent = kb.create_task(conn, title="parent", assignee="a")
        child = kb.create_task(
@ -316,16 +317,16 @@ def test_recompute_ready_promotes_blocked_with_done_parents(kanban_home):
        # Complete the parent
        kb.claim_task(conn, parent)
        kb.complete_task(conn, parent, result="ok")
-        # Manually block the child (simulates a worker that failed
-        # after the parent finished)
+        # Manually block the child with zero failures (simulates a
+        # dependency block, not a circuit-breaker block).
        conn.execute(
-            "UPDATE tasks SET status='blocked', consecutive_failures=5, "
-            "last_failure_error='persistent error' WHERE id=?",
+            "UPDATE tasks SET status='blocked', consecutive_failures=0, "
+            "last_failure_error=NULL WHERE id=?",
            (child,),
        )
        conn.commit()
        assert kb.get_task(conn, child).status == "blocked"
-        # recompute_ready should promote blocked → ready and reset failures
+        # recompute_ready should promote blocked → ready
        promoted = kb.recompute_ready(conn)
        assert promoted == 1
        task = kb.get_task(conn, child)
@ -815,6 +816,149 @@ def test_unblock_resets_failure_counters(kanban_home):
        assert task.last_failure_error is None


+def test_recompute_ready_skips_tasks_at_failure_limit(kanban_home):
+    """recompute_ready must not auto-recover tasks whose consecutive_failures
+    has reached the circuit-breaker limit (#35072).
+
+    Without this guard, a task that repeatedly exhausts its iteration
+    budget would cycle forever: block → auto-recover (counter reset)
+    → respawn → budget exhausted → block → …
+    """
+    with kb.connect() as conn:
+        parent = kb.create_task(conn, title="parent", assignee="a")
+        child = kb.create_task(conn, title="child", assignee="a",
+                               parents=[parent])
+        # Complete the parent so the child's dependencies are satisfied.
+        kb.claim_task(conn, parent)
+        kb.complete_task(conn, parent, summary="done")
+
+        # Simulate the child having exhausted its budget twice,
+        # hitting the default failure limit (2).
+        kb.claim_task(conn, child)
+        kb._record_task_failure(
+            conn, child, error="budget exhausted 1",
+            outcome="timed_out", release_claim=True, end_run=True,
+            failure_limit=2,
+        )
+        kb._record_task_failure(
+            conn, child, error="budget exhausted 2",
+            outcome="timed_out", release_claim=True, end_run=True,
+            failure_limit=2,
+        )
+        task = kb.get_task(conn, child)
+        assert task.status == "blocked"
+        assert task.consecutive_failures >= 2
+
+        # recompute_ready must NOT promote this task — the circuit
+        # breaker has tripped and it should stay blocked.
+        promoted = kb.recompute_ready(conn)
+        assert promoted == 0
+        assert kb.get_task(conn, child).status == "blocked"
+
+        # Explicit unblock should still work and reset the counter.
+        assert kb.unblock_task(conn, child)
+        task = kb.get_task(conn, child)
+        assert task.status == "ready"
+        assert task.consecutive_failures == 0
+
+
+def test_recompute_ready_recovers_below_limit(kanban_home):
+    """recompute_ready auto-recovers blocked tasks that haven't hit the
+    failure limit yet — the counter is preserved across recovery."""
+    with kb.connect() as conn:
+        t = kb.create_task(conn, title="task", assignee="a")
+        kb.claim_task(conn, t)
+        # One failure, below the default limit of 2.
+        kb._record_task_failure(
+            conn, t, error="budget exhausted 1",
+            outcome="timed_out", release_claim=True, end_run=True,
+            failure_limit=2,
+        )
+        task = kb.get_task(conn, t)
+        assert task.status == "ready"
+        assert task.consecutive_failures == 1
+
+        # Simulate being blocked by something else (not circuit breaker).
+        conn.execute(
+            "UPDATE tasks SET status = 'blocked' WHERE id = ?", (t,),
+        )
+        conn.commit()
+
+        promoted = kb.recompute_ready(conn)
+        assert promoted == 1
+        task = kb.get_task(conn, t)
+        assert task.status == "ready"
+        # Counter must be preserved, not reset.
+        assert task.consecutive_failures == 1
+
+
+def test_recompute_ready_honours_dispatcher_failure_limit(kanban_home):
+    """The guard's effective limit must follow the same resolution order
+    as the circuit breaker (#35072): per-task max_retries → dispatcher
+    failure_limit → DEFAULT_FAILURE_LIMIT.
+
+    Without threading the dispatcher's ``kanban.failure_limit`` through,
+    the guard falls back to DEFAULT_FAILURE_LIMIT and disagrees with the
+    breaker — sticking a task prematurely (config limit > default) or
+    letting a tripped task escape (config limit < default).
+    """
+    with kb.connect() as conn:
+        # Config allows MORE retries than the default. A task blocked
+        # with failures below the configured limit must still recover.
+        t = kb.create_task(conn, title="lenient", assignee="a")
+        conn.execute(
+            "UPDATE tasks SET status='blocked', consecutive_failures=? "
+            "WHERE id=?",
+            (kb.DEFAULT_FAILURE_LIMIT, t),
+        )
+        conn.commit()
+        # Default-limit call would stick it (failures >= default).
+        assert kb.recompute_ready(conn) == 0
+        assert kb.get_task(conn, t).status == "blocked"
+        # Dispatcher configured a higher limit → recover, preserve counter.
+        promoted = kb.recompute_ready(
+            conn, failure_limit=kb.DEFAULT_FAILURE_LIMIT + 2
+        )
+        assert promoted == 1
+        task = kb.get_task(conn, t)
+        assert task.status == "ready"
+        assert task.consecutive_failures == kb.DEFAULT_FAILURE_LIMIT
+
+        # Config allows FEWER retries than the default. A task at the
+        # stricter limit must stay blocked even though it's below default.
+        t2 = kb.create_task(conn, title="strict", assignee="a")
+        conn.execute(
+            "UPDATE tasks SET status='blocked', consecutive_failures=1 "
+            "WHERE id=?",
+            (t2,),
+        )
+        conn.commit()
+        # Default-limit (2) would recover it (1 < 2).
+        # Stricter config limit (1) must keep it blocked (1 >= 1).
+        assert kb.recompute_ready(conn, failure_limit=1) == 0
+        assert kb.get_task(conn, t2).status == "blocked"
+
+
+def test_recompute_ready_per_task_max_retries_overrides_dispatcher(kanban_home):
+    """A per-task ``max_retries`` wins over the dispatcher failure_limit,
+    matching ``_record_task_failure``'s resolution order."""
+    with kb.connect() as conn:
+        t = kb.create_task(conn, title="per-task", assignee="a")
+        # Per-task allows 4 retries; dispatcher config says 2.
+        conn.execute(
+            "UPDATE tasks SET status='blocked', consecutive_failures=2, "
+            "max_retries=4 WHERE id=?",
+            (t,),
+        )
+        conn.commit()
+        # failures(2) < per-task limit(4) → recover, despite dispatcher=2.
+        promoted = kb.recompute_ready(conn, failure_limit=2)
+        assert promoted == 1
+        task = kb.get_task(conn, t)
+        assert task.status == "ready"
+        assert task.consecutive_failures == 2
+
+
 # ---------------------------------------------------------------------------
 # Parent-completion invariant at the claim gate (RCA t_a6acd07d)
 # ---------------------------------------------------------------------------
--- a/tests/hermes_cli/test_kanban_db_init.py
+++ b/tests/hermes_cli/test_kanban_db_init.py
@ -1,11 +1,74 @@
 from __future__ import annotations

+import sqlite3
 import threading
 from pathlib import Path

 from hermes_cli import kanban_db as kb


+def _make_legacy_db(path: Path) -> None:
+    """Write a kanban DB with the pre-AUTOINCREMENT (TEXT PK) schema for the
+    four tables #35096 affects, keeping every other table current so the
+    additive-column migration runs cleanly on top.
+    """
+    conn = sqlite3.connect(str(path))
+    conn.executescript(kb.SCHEMA_SQL)
+    conn.executescript(
+        """
+        DROP TABLE task_events;
+        DROP TABLE task_comments;
+        DROP TABLE task_runs;
+        DROP TABLE kanban_notify_subs;
+        CREATE TABLE task_comments (id TEXT PRIMARY KEY, task_id TEXT NOT NULL,
+            author TEXT NOT NULL, body TEXT NOT NULL, created_at INTEGER NOT NULL);
+        CREATE TABLE task_events (id TEXT PRIMARY KEY, task_id TEXT NOT NULL,
+            kind TEXT NOT NULL, payload TEXT, created_at INTEGER NOT NULL);
+        CREATE TABLE task_runs (id TEXT PRIMARY KEY, task_id TEXT NOT NULL,
+            profile TEXT, status TEXT NOT NULL, started_at INTEGER NOT NULL);
+        CREATE TABLE kanban_notify_subs (task_id TEXT NOT NULL, platform TEXT NOT NULL,
+            chat_id TEXT NOT NULL, thread_id TEXT NOT NULL DEFAULT '', user_id TEXT,
+            created_at INTEGER NOT NULL, last_event_id TEXT,
+            PRIMARY KEY (task_id, platform, chat_id, thread_id));
+        """
+    )
+    conn.execute("INSERT INTO tasks (id, title, status, created_at) VALUES ('task-1', 'T', 'done', 1000)")
+    conn.execute("INSERT INTO task_comments VALUES ('c-1', 'task-1', 'agent', 'hi', 1500)")
+    conn.execute("INSERT INTO task_events VALUES ('e-1', 'task-1', 'completed', NULL, 2000)")
+    conn.execute("INSERT INTO task_events VALUES ('e-2', 'task-1', 'blocked', NULL, 2100)")
+    conn.execute("INSERT INTO task_runs VALUES ('r-1', 'task-1', 'default', 'done', 1000)")
+    conn.execute(
+        "INSERT INTO kanban_notify_subs (task_id, platform, chat_id, created_at, last_event_id) "
+        "VALUES ('task-1', 'telegram', '123', 1000, 'e-1')"
+    )
+    conn.commit()
+    conn.close()
+
+
+def _setup_home(tmp_path, monkeypatch) -> Path:
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    db_path = kb.kanban_db_path(board="legacy")
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    kb._INITIALIZED_PATHS.discard(str(db_path.resolve()))
+    return db_path
+
+
+def _table_struct(conn: sqlite3.Connection, table: str):
+    cols = [
+        (r["name"], (r["type"] or "").upper(), r["notnull"], r["pk"])
+        for r in conn.execute(f"PRAGMA table_info({table})")
+    ]
+    idx = sorted(
+        r["name"]
+        for r in conn.execute(f"PRAGMA index_list({table})")
+        if not r["name"].startswith("sqlite_")
+    )
+    return cols, idx
+
+
 def test_connect_initialization_is_thread_safe(tmp_path, monkeypatch):
    home = tmp_path / ".hermes"
    home.mkdir()
@ -36,3 +99,79 @@ def test_connect_initialization_is_thread_safe(tmp_path, monkeypatch):
    with kb.connect(board="default") as conn:
        cols = {row["name"] for row in conn.execute("PRAGMA table_info(tasks)")}
    assert "max_retries" in cols
+
+
+def test_legacy_text_pk_tables_rebuilt_to_integer_autoincrement(tmp_path, monkeypatch):
+    """A pre-AUTOINCREMENT DB is migrated in place: id columns become INTEGER
+    PKs, ``last_event_id`` becomes INTEGER, data is preserved, and indexes
+    are recreated (DROP TABLE would otherwise take them down)."""
+    db_path = _setup_home(tmp_path, monkeypatch)
+    _make_legacy_db(db_path)
+
+    with kb.connect(db_path) as conn:
+        for table in ("task_events", "task_comments", "task_runs"):
+            id_col = {r["name"]: r for r in conn.execute(f"PRAGMA table_info({table})")}["id"]
+            assert id_col["type"].upper() == "INTEGER" and id_col["pk"] == 1
+
+        lei = {r["name"]: r for r in conn.execute("PRAGMA table_info(kanban_notify_subs)")}
+        assert lei["last_event_id"]["type"].upper() == "INTEGER"
+
+        # Data preserved across the rebuild.
+        assert len(conn.execute("SELECT * FROM task_events").fetchall()) == 2
+        assert conn.execute("SELECT body FROM task_comments").fetchone()["body"] == "hi"
+        assert len(conn.execute("SELECT * FROM task_runs").fetchall()) == 1
+        # Non-numeric legacy cursor ("e-1") casts to 0.
+        assert conn.execute("SELECT last_event_id FROM kanban_notify_subs").fetchone()["last_event_id"] == 0
+
+        # Indexes restored, including idx_events_run (added by the additive pass).
+        indexes = {r[0] for r in conn.execute("SELECT name FROM sqlite_master WHERE type='index'")}
+        for name in ("idx_events_task", "idx_events_run", "idx_comments_task",
+                     "idx_runs_task", "idx_runs_status", "idx_notify_task"):
+            assert name in indexes
+
+        # AUTOINCREMENT actually works after the rebuild.
+        conn.execute("INSERT INTO task_events (task_id, kind, created_at) VALUES ('task-1', 'completed', 3000)")
+        new_id = conn.execute("SELECT id FROM task_events ORDER BY id DESC LIMIT 1").fetchone()["id"]
+        assert isinstance(new_id, int) and new_id >= 1
+
+
+def test_rebuilt_schema_matches_fresh_db(tmp_path, monkeypatch):
+    """The rebuilt tables must be structurally identical to a fresh DB, so the
+    hand-written DDL in ``_REBUILD_SPECS`` can't silently drift from SCHEMA_SQL."""
+    legacy_path = _setup_home(tmp_path, monkeypatch)
+    _make_legacy_db(legacy_path)
+    fresh_path = kb.kanban_db_path(board="fresh")
+    fresh_path.parent.mkdir(parents=True, exist_ok=True)
+    kb._INITIALIZED_PATHS.discard(str(fresh_path.resolve()))
+
+    with kb.connect(legacy_path) as migrated, kb.connect(fresh_path) as fresh:
+        for table in ("task_events", "task_comments", "task_runs", "kanban_notify_subs"):
+            assert _table_struct(migrated, table) == _table_struct(fresh, table)
+
+
+def test_migration_is_idempotent(tmp_path, monkeypatch):
+    """Re-opening an already-migrated DB is a no-op and leaves data intact."""
+    db_path = _setup_home(tmp_path, monkeypatch)
+    _make_legacy_db(db_path)
+
+    with kb.connect(db_path):
+        pass
+    kb._INITIALIZED_PATHS.discard(str(db_path.resolve()))
+    with kb.connect(db_path) as conn:
+        id_col = {r["name"]: r for r in conn.execute("PRAGMA table_info(task_events)")}["id"]
+        assert id_col["type"].upper() == "INTEGER"
+        assert len(conn.execute("SELECT * FROM task_events").fetchall()) == 2
+
+
+def test_unseen_events_for_sub_survives_migrated_db(tmp_path, monkeypatch):
+    """The crash that motivated #35096 — ``int(None)`` on a NULL cursor — is
+    gone after migration; the notifier query returns an integer cursor."""
+    db_path = _setup_home(tmp_path, monkeypatch)
+    _make_legacy_db(db_path)
+
+    with kb.connect(db_path) as conn:
+        cursor, events = kb.unseen_events_for_sub(
+            conn, task_id="task-1", platform="telegram", chat_id="123"
+        )
+        assert isinstance(cursor, int)
+        assert isinstance(events, list)
--- a/tests/hermes_cli/test_mcp_startup.py
+++ b/tests/hermes_cli/test_mcp_startup.py
@ -0,0 +1,166 @@
+"""Regression tests for bounded/lazy CLI MCP startup."""
+
+from __future__ import annotations
+
+from argparse import Namespace
+import sys
+import threading
+import time
+import types
+
+import pytest
+
+import cli as cli_mod
+from hermes_cli import main as main_mod
+from hermes_cli import mcp_startup
+
+
+@pytest.fixture(autouse=True)
+def _reset_mcp_startup_state():
+    saved_started = mcp_startup._mcp_discovery_started
+    saved_thread = mcp_startup._mcp_discovery_thread
+    try:
+        mcp_startup._mcp_discovery_started = False
+        mcp_startup._mcp_discovery_thread = None
+        yield
+    finally:
+        thread = mcp_startup._mcp_discovery_thread
+        if thread is not None and thread.is_alive():
+            thread.join(timeout=1.0)
+        mcp_startup._mcp_discovery_started = saved_started
+        mcp_startup._mcp_discovery_thread = saved_thread
+
+
+def _agent_args(**overrides) -> Namespace:
+    base = {
+        "accept_hooks": False,
+        "command": "chat",
+        "cron_command": None,
+        "gateway_command": None,
+        "mcp_action": None,
+        "tui": False,
+    }
+    base.update(overrides)
+    return Namespace(**base)
+
+
+def test_prepare_agent_startup_backgrounds_blocking_mcp_for_chat(monkeypatch):
+    stop = threading.Event()
+    calls = {"mcp": 0}
+
+    def _blocking_discover():
+        calls["mcp"] += 1
+        stop.wait()
+
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.plugins",
+        types.SimpleNamespace(discover_plugins=lambda: None),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.config",
+        types.SimpleNamespace(
+            read_raw_config=lambda: {"mcp_servers": {"demo": {"transport": "stdio"}}},
+            load_config=lambda: {},
+        ),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "agent.shell_hooks",
+        types.SimpleNamespace(register_from_config=lambda *_a, **_k: None),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "tools.mcp_tool",
+        types.SimpleNamespace(discover_mcp_tools=_blocking_discover),
+    )
+
+    try:
+        start = time.monotonic()
+        main_mod._prepare_agent_startup(_agent_args())
+        elapsed = time.monotonic() - start
+        assert elapsed < 0.2
+        assert calls["mcp"] == 1
+        assert mcp_startup._mcp_discovery_thread is not None
+        assert mcp_startup._mcp_discovery_thread.is_alive()
+    finally:
+        stop.set()
+
+
+def test_prepare_agent_startup_skips_mcp_bootstrap_for_tui_chat(monkeypatch):
+    calls = {"mcp": 0}
+
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.plugins",
+        types.SimpleNamespace(discover_plugins=lambda: None),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.config",
+        types.SimpleNamespace(load_config=lambda: {}),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "agent.shell_hooks",
+        types.SimpleNamespace(register_from_config=lambda *_a, **_k: None),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "tools.mcp_tool",
+        types.SimpleNamespace(
+            discover_mcp_tools=lambda: calls.__setitem__("mcp", calls["mcp"] + 1)
+        ),
+    )
+
+    main_mod._prepare_agent_startup(_agent_args(tui=True))
+
+    assert calls["mcp"] == 0
+    assert mcp_startup._mcp_discovery_thread is None
+
+
+def test_cli_get_tool_definitions_briefly_waits_for_fast_mcp_thread(monkeypatch):
+    thread = threading.Thread(target=lambda: time.sleep(0.05), daemon=True)
+    thread.start()
+    mcp_startup._mcp_discovery_thread = thread
+
+    monkeypatch.setitem(
+        sys.modules,
+        "model_tools",
+        types.SimpleNamespace(get_tool_definitions=lambda *_a, **_k: ["ok"]),
+    )
+
+    start = time.monotonic()
+    result = cli_mod.get_tool_definitions(enabled_toolsets=["web"], quiet_mode=True)
+    elapsed = time.monotonic() - start
+
+    assert result == ["ok"]
+    assert elapsed >= 0.04
+    assert not thread.is_alive()
+
+
+def test_init_agent_waits_for_mcp_discovery_before_agent_build(monkeypatch):
+    waited = {"done": False}
+
+    cli = cli_mod.HermesCLI(compact=True)
+    cli._session_db = object()
+    cli._resumed = False
+    cli.conversation_history = []
+    cli._install_tool_callbacks = lambda: None
+    cli._ensure_tirith_security = lambda: None
+    cli._ensure_runtime_credentials = lambda: True
+
+    monkeypatch.setattr(
+        mcp_startup,
+        "wait_for_mcp_discovery",
+        lambda timeout=0.75: waited.__setitem__("done", True),
+    )
+
+    def _fake_agent(*_a, **_k):
+        assert waited["done"] is True
+        return types.SimpleNamespace()
+
+    monkeypatch.setattr(cli_mod, "AIAgent", _fake_agent)
+
+    assert cli._init_agent() is True
--- a/tests/hermes_cli/test_memory_setup_provider_arg.py
+++ b/tests/hermes_cli/test_memory_setup_provider_arg.py
@ -0,0 +1,50 @@
+"""Tests for `hermes memory setup [provider]` routing.
+
+The `memory setup` subcommand accepts an optional positional ``provider`` so a
+fresh install can configure a specific provider directly (e.g.
+``hermes memory setup honcho``) without the interactive picker — which matters
+because the per-provider ``hermes <provider>`` subcommand is only registered
+once that provider is active.
+"""
+
+from types import SimpleNamespace
+from unittest.mock import patch
+
+from hermes_cli import memory_setup
+
+
+class TestMemorySetupProviderRouting:
+    def test_setup_with_provider_arg_skips_picker(self):
+        """`memory setup honcho` routes straight to cmd_setup_provider."""
+        args = SimpleNamespace(memory_command="setup", provider="honcho")
+        with patch.object(memory_setup, "cmd_setup_provider") as direct, \
+             patch.object(memory_setup, "cmd_setup") as picker:
+            memory_setup.memory_command(args)
+        direct.assert_called_once_with("honcho")
+        picker.assert_not_called()
+
+    def test_setup_without_provider_runs_picker(self):
+        """`memory setup` (no provider) runs the interactive picker."""
+        args = SimpleNamespace(memory_command="setup", provider=None)
+        with patch.object(memory_setup, "cmd_setup_provider") as direct, \
+             patch.object(memory_setup, "cmd_setup") as picker:
+            memory_setup.memory_command(args)
+        picker.assert_called_once_with(args)
+        direct.assert_not_called()
+
+    def test_setup_with_missing_provider_attr_runs_picker(self):
+        """A SimpleNamespace lacking `provider` must not crash — fall back to picker."""
+        args = SimpleNamespace(memory_command="setup")
+        with patch.object(memory_setup, "cmd_setup_provider") as direct, \
+             patch.object(memory_setup, "cmd_setup") as picker:
+            memory_setup.memory_command(args)
+        picker.assert_called_once_with(args)
+        direct.assert_not_called()
+
+    def test_unknown_provider_reports_and_returns_early(self, capsys):
+        """An unknown provider name surfaces a helpful message and returns
+        before any config load/save (the not-found guard precedes those imports)."""
+        memory_setup.cmd_setup_provider("notaprovider")
+        out = capsys.readouterr().out
+        assert "not found" in out
+        assert "hermes memory setup" in out
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@ -142,10 +142,6 @@ class TestCuratedModelsForProvider:
        assert len(models) > 0
        assert any("claude" in m[0] for m in models)

-    def test_zai_returns_glm_models(self):
-        models = curated_models_for_provider("zai")
-        assert any("glm" in m[0] for m in models)
-
    def test_unknown_provider_returns_empty(self):
        assert curated_models_for_provider("totally-unknown") == []

@ -199,9 +195,6 @@ class TestProviderModelIds:
    def test_unknown_provider_returns_empty(self):
        assert provider_model_ids("some-unknown-provider") == []

-    def test_zai_returns_glm_models(self):
-        assert "glm-5" in provider_model_ids("zai")
-
    def test_stepfun_prefers_live_catalog(self):
        with patch(
            "hermes_cli.auth.resolve_api_key_provider_credentials",
@ -222,31 +215,6 @@ class TestProviderModelIds:
             patch("hermes_cli.models._fetch_github_models", return_value=["gpt-5.4", "claude-sonnet-4.6"]):
            assert provider_model_ids("copilot-acp") == ["gpt-5.4", "claude-sonnet-4.6"]

-    def test_copilot_falls_back_to_curated_defaults_without_stale_opus(self):
-        with patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \
-             patch("hermes_cli.models._fetch_github_models", return_value=None):
-            ids = provider_model_ids("copilot")
-
-        assert "gpt-5.4" in ids
-        assert "claude-sonnet-4.6" in ids
-        assert "claude-sonnet-4" in ids
-        assert "claude-sonnet-4.5" in ids
-        assert "claude-haiku-4.5" in ids
-        assert "gemini-3.1-pro-preview" in ids
-        assert "claude-opus-4.6" not in ids
-
-    def test_copilot_acp_falls_back_to_copilot_defaults(self):
-        with patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \
-             patch("hermes_cli.models._fetch_github_models", return_value=None):
-            ids = provider_model_ids("copilot-acp")
-
-        assert "gpt-5.4" in ids
-        assert "claude-sonnet-4.6" in ids
-        assert "claude-sonnet-4" in ids
-        assert "gemini-3.1-pro-preview" in ids
-        assert "copilot-acp" not in ids
-        assert "claude-opus-4.6" not in ids
-

 # -- fetch_api_models --------------------------------------------------------

--- a/tests/hermes_cli/test_models.py
+++ b/tests/hermes_cli/test_models.py
@ -56,10 +56,6 @@ class TestOpenRouterModels:
            assert isinstance(mid, str) and len(mid) > 0
            assert isinstance(desc, str)

-    def test_at_least_5_models(self):
-        """Sanity check that the models list hasn't been accidentally truncated."""
-        assert len(OPENROUTER_MODELS) >= 5
-

 class TestFetchOpenRouterModels:
    def test_live_fetch_recomputes_free_tags(self, monkeypatch):
--- a/tests/hermes_cli/test_nous_subscription.py
+++ b/tests/hermes_cli/test_nous_subscription.py
@ -231,3 +231,93 @@ def test_get_gateway_eligible_tools_ignores_quoted_false_opt_in(monkeypatch):
    assert "web" in has_direct
    assert "web" not in already_managed
    assert set(unconfigured) == {"image_gen", "video_gen", "tts", "browser"}
+
+
+def test_apply_nous_managed_defaults_writes_video_gen_config(monkeypatch):
+    """apply_nous_managed_defaults must write video_gen.provider and
+    video_gen.use_gateway when a Nous subscriber selects video_gen
+    without a direct FAL_KEY."""
+    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda **kw: True)
+    monkeypatch.delenv("FAL_KEY", raising=False)
+    monkeypatch.setattr(ns, "fal_key_is_configured", lambda: False)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info",
+        lambda **kw: _account(logged_in=True, paid=True),
+    )
+
+    config = {"model": {"provider": "nous"}}
+    changed = ns.apply_nous_managed_defaults(
+        config, enabled_toolsets=["video_gen"],
+    )
+
+    assert "video_gen" in changed
+    assert config["video_gen"]["provider"] == "fal"
+    assert config["video_gen"]["use_gateway"] is True
+
+
+def test_apply_nous_managed_defaults_writes_image_gen_config(monkeypatch):
+    """apply_nous_managed_defaults must write image_gen.use_gateway
+    when a Nous subscriber selects image_gen without a direct FAL_KEY."""
+    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda **kw: True)
+    monkeypatch.delenv("FAL_KEY", raising=False)
+    monkeypatch.setattr(ns, "fal_key_is_configured", lambda: False)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info",
+        lambda **kw: _account(logged_in=True, paid=True),
+    )
+
+    config = {"model": {"provider": "nous"}}
+    changed = ns.apply_nous_managed_defaults(
+        config, enabled_toolsets=["image_gen"],
+    )
+
+    assert "image_gen" in changed
+    assert config["image_gen"]["use_gateway"] is True
+
+
+def test_apply_nous_managed_defaults_skips_fal_tools_when_key_present(monkeypatch):
+    """When FAL_KEY is set, apply_nous_managed_defaults should not touch
+    image_gen or video_gen config — the user's direct key takes precedence."""
+    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda **kw: True)
+    monkeypatch.setenv("FAL_KEY", "fal-direct-key")
+    monkeypatch.setattr(ns, "fal_key_is_configured", lambda: True)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info",
+        lambda **kw: _account(logged_in=True, paid=True),
+    )
+
+    config = {"model": {"provider": "nous"}}
+    changed = ns.apply_nous_managed_defaults(
+        config, enabled_toolsets=["image_gen", "video_gen"],
+    )
+
+    assert "image_gen" not in changed
+    assert "video_gen" not in changed
+    assert "image_gen" not in config
+    assert "video_gen" not in config
+
+
+def test_apply_nous_managed_defaults_preserves_existing_video_gen_section(monkeypatch):
+    """When video_gen config already exists as a dict, the function should
+    update it in-place rather than replacing it."""
+    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda **kw: True)
+    monkeypatch.delenv("FAL_KEY", raising=False)
+    monkeypatch.setattr(ns, "fal_key_is_configured", lambda: False)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info",
+        lambda **kw: _account(logged_in=True, paid=True),
+    )
+
+    config = {
+        "model": {"provider": "nous"},
+        "video_gen": {"model": "pixverse-v6"},
+    }
+    changed = ns.apply_nous_managed_defaults(
+        config, enabled_toolsets=["video_gen"],
+    )
+
+    assert "video_gen" in changed
+    assert config["video_gen"]["provider"] == "fal"
+    assert config["video_gen"]["use_gateway"] is True
+    # Pre-existing keys should be preserved
+    assert config["video_gen"]["model"] == "pixverse-v6"
--- a/tests/hermes_cli/test_ollama_cloud_provider.py
+++ b/tests/hermes_cli/test_ollama_cloud_provider.py
@ -495,12 +495,3 @@ class TestOllamaCloudSuffixStripping:
        assert _strip_ollama_cloud_suffix("qwen3-coder:480b-cloud") == "qwen3-coder:480b"
        assert _strip_ollama_cloud_suffix("nemotron-3-nano:30b") == "nemotron-3-nano:30b"
        assert _strip_ollama_cloud_suffix("") == ""
-
-
-# ── Auxiliary Model ──
-
-class TestOllamaCloudAuxiliary:
-    def test_aux_model_defined(self):
-        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
-        assert "ollama-cloud" in _API_KEY_PROVIDER_AUX_MODELS
-        assert _API_KEY_PROVIDER_AUX_MODELS["ollama-cloud"] == "nemotron-3-nano:30b"
--- a/tests/hermes_cli/test_pip_install_detection.py
+++ b/tests/hermes_cli/test_pip_install_detection.py
@ -48,12 +48,32 @@ def test_stamp_file_takes_precedence(tmp_path):
        assert detect_install_method(project_root=tmp_path) == "docker"


-def test_docker_detected_via_dockerenv(tmp_path):
+def test_container_without_stamp_is_not_docker(tmp_path):
+    """An unstamped install in a generic container must NOT be flagged as docker.
+
+    Regression for issue #34397. The two supported installs both stamp
+    ``.install_method`` (the curl installer -> ``git``, covered by
+    ``test_stamp_file_takes_precedence``; the published image -> ``docker``),
+    so neither hits this path. An unsupported manual install dropped into a
+    container has no stamp and was wrongly classified as the published Docker
+    image, so ``hermes update`` refused to run. With a ``.git`` checkout it
+    must resolve to ``git``.
+    """
+    (tmp_path / ".git").mkdir()
    with patch("hermes_cli.config.get_managed_system", return_value=None), \
         patch("hermes_cli.config.get_hermes_home", return_value=tmp_path), \
         patch("hermes_constants.is_container", return_value=True):
        from hermes_cli.config import detect_install_method
-        assert detect_install_method(project_root=tmp_path) == "docker"
+        assert detect_install_method(project_root=tmp_path) == "git"
+
+
+def test_container_pip_install_without_stamp_is_pip(tmp_path):
+    """Container + no .git + no stamp -> pip, not docker (issue #34397)."""
+    with patch("hermes_cli.config.get_managed_system", return_value=None), \
+         patch("hermes_cli.config.get_hermes_home", return_value=tmp_path), \
+         patch("hermes_constants.is_container", return_value=True):
+        from hermes_cli.config import detect_install_method
+        assert detect_install_method(project_root=tmp_path) == "pip"


 def test_recommended_update_command_docker():
--- a/tests/hermes_cli/test_profiles.py
+++ b/tests/hermes_cli/test_profiles.py
@ -754,8 +754,8 @@ class TestRenameProfile:

        cfg = json.loads(honcho_path.read_text())
        assert "hermes.ssi_health" not in cfg["hosts"]
-        assert cfg["hosts"]["hermes.heimdall"]["aiPeer"] == "ssi_health"
-        assert cfg["hosts"]["hermes.heimdall"]["peerName"] == "user-peer"
+        assert cfg["hosts"]["hermes_heimdall"]["aiPeer"] == "ssi_health"
+        assert cfg["hosts"]["hermes_heimdall"]["peerName"] == "user-peer"

    def test_pins_ai_peer_when_absent_on_honcho_host_rename(self, profile_env):
        tmp_path = profile_env
@ -772,8 +772,8 @@ class TestRenameProfile:

        cfg = json.loads(honcho_path.read_text())
        assert "hermes.ssi_health" not in cfg["hosts"]
-        assert cfg["hosts"]["hermes.heimdall"]["aiPeer"] == "ssi_health"
-        assert cfg["hosts"]["hermes.heimdall"]["workspace"] == "hermes"
+        assert cfg["hosts"]["hermes_heimdall"]["aiPeer"] == "ssi_health"
+        assert cfg["hosts"]["hermes_heimdall"]["workspace"] == "hermes"

    def test_does_not_overwrite_existing_honcho_host_on_rename(self, profile_env):
        tmp_path = profile_env
@ -782,7 +782,7 @@ class TestRenameProfile:
        honcho_path.write_text(json.dumps({
            "hosts": {
                "hermes.ssi_health": {"aiPeer": "ssi_health"},
-                "hermes.heimdall": {"aiPeer": "heimdall"},
+                "hermes_heimdall": {"aiPeer": "heimdall"},
            }
        }))

@ -791,7 +791,7 @@ class TestRenameProfile:

        cfg = json.loads(honcho_path.read_text())
        assert cfg["hosts"]["hermes.ssi_health"]["aiPeer"] == "ssi_health"
-        assert cfg["hosts"]["hermes.heimdall"]["aiPeer"] == "heimdall"
+        assert cfg["hosts"]["hermes_heimdall"]["aiPeer"] == "heimdall"

    def test_default_raises_value_error(self, profile_env):
        with pytest.raises(ValueError, match="default"):
--- a/tests/hermes_cli/test_prompt_size.py
+++ b/tests/hermes_cli/test_prompt_size.py
@ -0,0 +1,118 @@
+"""Tests for the ``hermes prompt-size`` diagnostic (issue #34667)."""
+
+import json
+
+import pytest
+
+from hermes_cli.prompt_size import (
+    _SKILLS_BLOCK_RE,
+    compute_prompt_breakdown,
+    render_breakdown,
+)
+
+
+def _seed_memory(hermes_home, memory_text="", user_text=""):
+    mem_dir = hermes_home / "memories"
+    mem_dir.mkdir(parents=True, exist_ok=True)
+    if memory_text:
+        (mem_dir / "MEMORY.md").write_text(memory_text, encoding="utf-8")
+    if user_text:
+        (mem_dir / "USER.md").write_text(user_text, encoding="utf-8")
+
+
+def _seed_skill(hermes_home, name, description):
+    skill_dir = hermes_home / "skills" / "demo" / name
+    skill_dir.mkdir(parents=True, exist_ok=True)
+    (skill_dir / "SKILL.md").write_text(
+        f"---\nname: {name}\ndescription: {description}\n---\n# {name}\nbody\n",
+        encoding="utf-8",
+    )
+
+
+@pytest.fixture
+def isolated_home(tmp_path, monkeypatch):
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.chdir(tmp_path)  # avoid picking up the repo's AGENTS.md
+    return hermes_home
+
+
+def test_breakdown_keys_and_shape(isolated_home):
+    """The breakdown exposes every documented key with int byte/char counts."""
+    data = compute_prompt_breakdown("cli")
+    assert set(data) >= {
+        "platform",
+        "model",
+        "system_prompt",
+        "skills_index",
+        "memory",
+        "user_profile",
+        "tools",
+        "sections",
+    }
+    assert data["platform"] == "cli"
+    for key in ("system_prompt", "skills_index", "memory", "user_profile"):
+        assert data[key]["bytes"] >= 0
+        assert data[key]["chars"] >= 0
+    assert data["tools"]["count"] >= 0
+    assert data["tools"]["json_bytes"] >= 0
+    # System prompt is non-trivial even with empty home (identity + guidance).
+    assert data["system_prompt"]["bytes"] > 0
+
+
+def test_runs_offline_without_credentials(isolated_home, monkeypatch):
+    """No provider credentials configured → still produces a breakdown."""
+    for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "NOUS_API_KEY",
+                "ANTHROPIC_API_KEY"):
+        monkeypatch.delenv(var, raising=False)
+    data = compute_prompt_breakdown("cli")
+    assert data["system_prompt"]["bytes"] > 0
+
+
+def test_skills_index_reflects_installed_skills(isolated_home):
+    """Installing a skill makes the skills-index block non-empty.
+
+    Note: the skills prompt is cached per-process (in-process LRU + disk
+    snapshot), so we seed the skill BEFORE the first build rather than
+    comparing before/after within one process.
+    """
+    _seed_skill(isolated_home, "hello", "a demo skill for size testing")
+    data = compute_prompt_breakdown("cli")
+    assert data["skills_index"]["bytes"] > 0
+
+
+def test_memory_and_profile_are_attributed(isolated_home):
+    """Memory and user-profile blocks are measured separately."""
+    _seed_memory(
+        isolated_home,
+        memory_text="Project uses pytest.\n",
+        user_text="User is a developer.\n",
+    )
+    data = compute_prompt_breakdown("cli")
+    assert data["memory"]["bytes"] > 0
+    assert data["user_profile"]["bytes"] > 0
+
+
+def test_skills_block_regex_matches_tagged_block():
+    text = "preamble\n<available_skills>\n  cat:\n    - a: b\n</available_skills>\ntail"
+    m = _SKILLS_BLOCK_RE.search(text)
+    assert m is not None
+    assert m.group(0).startswith("<available_skills>")
+    assert m.group(0).endswith("</available_skills>")
+
+
+def test_render_breakdown_is_plain_text(isolated_home):
+    data = compute_prompt_breakdown("cli")
+    out = render_breakdown(data)
+    assert "System prompt total" in out
+    assert "skills index" in out
+    assert "Tool schemas" in out
+    # Plain text — no JSON braces leaking in.
+    assert not out.strip().startswith("{")
+
+
+def test_json_serializable(isolated_home):
+    data = compute_prompt_breakdown("cli")
+    # Round-trips cleanly for ``--json`` output.
+    assert json.loads(json.dumps(data)) == json.loads(json.dumps(data))
--- a/tests/hermes_cli/test_provider_groups.py
+++ b/tests/hermes_cli/test_provider_groups.py
@ -0,0 +1,118 @@
+"""Tests for provider-group folding (display-only picker grouping).
+
+These are invariant tests, not catalog snapshots: they assert how
+``group_providers`` folds a flat slug list and how member slugs relate to
+``PROVIDER_GROUPS`` / ``CANONICAL_PROVIDERS`` — not the specific set of
+vendors, which is expected to change over time.
+"""
+
+from hermes_cli.models import (
+    CANONICAL_PROVIDERS,
+    PROVIDER_GROUPS,
+    group_providers,
+    provider_group_for_slug,
+)
+
+
+def _slugs(rows):
+    """Flatten picker rows back to the concrete slugs they expose."""
+    out = []
+    for r in rows:
+        if r["kind"] == "single":
+            out.append(r["slug"])
+        else:
+            out.extend(r["members"])
+    return out
+
+
+def test_groups_reference_real_canonical_slugs():
+    """Every group member must be an actual provider slug. Guards typos and
+    stale group entries after a provider is renamed/removed."""
+    canonical = {p.slug for p in CANONICAL_PROVIDERS}
+    for gid, (label, members) in PROVIDER_GROUPS.items():
+        assert label, f"group {gid} has empty label"
+        assert len(members) >= 1
+        for m in members:
+            assert m in canonical, f"group {gid} member {m!r} is not a canonical slug"
+
+
+def test_member_slugs_are_unique_across_groups():
+    """A slug may belong to at most one group."""
+    seen = {}
+    for gid, (_label, members) in PROVIDER_GROUPS.items():
+        for m in members:
+            assert m not in seen, f"{m!r} in both {seen[m]!r} and {gid!r}"
+            seen[m] = gid
+
+
+def test_reverse_index_matches_groups():
+    for gid, (_label, members) in PROVIDER_GROUPS.items():
+        for m in members:
+            assert provider_group_for_slug(m) == gid
+    assert provider_group_for_slug("openrouter") == ""
+    assert provider_group_for_slug("") == ""
+
+
+def test_ungrouped_providers_pass_through_in_order():
+    rows = group_providers(["nous", "openrouter", "deepseek"])
+    assert all(r["kind"] == "single" for r in rows)
+    assert [r["slug"] for r in rows] == ["nous", "openrouter", "deepseek"]
+
+
+def test_multi_member_group_folds_to_one_row():
+    rows = group_providers(["minimax", "minimax-oauth", "minimax-cn"])
+    assert len(rows) == 1
+    row = rows[0]
+    assert row["kind"] == "group"
+    assert row["group_id"] == "minimax"
+    assert row["members"] == ["minimax", "minimax-oauth", "minimax-cn"]
+
+
+def test_group_appears_at_first_member_position():
+    """The group row takes the slot of its earliest-listed present member,
+    and later members do not re-emit."""
+    rows = group_providers(["nous", "minimax", "deepseek", "minimax-cn"])
+    kinds = [(r["kind"], r.get("group_id") or r.get("slug")) for r in rows]
+    assert kinds == [
+        ("single", "nous"),
+        ("group", "minimax"),
+        ("single", "deepseek"),
+    ]
+    # both minimax members folded into the single group row
+    assert rows[1]["members"] == ["minimax", "minimax-cn"]
+
+
+def test_single_present_member_degrades_to_single_row():
+    """A group with only one present member shows no submenu."""
+    rows = group_providers(["xai"])  # xai-oauth absent
+    assert len(rows) == 1
+    assert rows[0]["kind"] == "single"
+    assert rows[0]["slug"] == "xai"
+
+
+def test_member_order_follows_declaration_not_input():
+    """Inside a folded group, members are ordered by PROVIDER_GROUPS, not by
+    the order they appeared in the input list."""
+    rows = group_providers(["minimax-cn", "minimax", "minimax-oauth"])
+    assert rows[0]["members"] == ["minimax", "minimax-oauth", "minimax-cn"]
+
+
+def test_duplicate_slugs_ignored():
+    rows = group_providers(["nous", "nous", "minimax", "minimax"])
+    assert [r.get("slug") or r["group_id"] for r in rows] == ["nous", "minimax"]
+
+
+def test_fold_is_lossless_for_present_slugs():
+    """Every input slug (deduped) must still be reachable through the folded
+    rows — grouping hides nothing."""
+    flat = [p.slug for p in CANONICAL_PROVIDERS]
+    rows = group_providers(flat)
+    assert set(_slugs(rows)) == set(flat)
+
+
+def test_canonical_fold_row_count_shrinks():
+    """Folding the full canonical list produces fewer top-level rows than the
+    flat list (proves grouping actually consolidates)."""
+    flat = [p.slug for p in CANONICAL_PROVIDERS]
+    rows = group_providers(flat)
+    assert len(rows) < len(flat)
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@ -757,8 +757,68 @@ def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
    assert config["web"]["backend"] == "firecrawl"
    assert config["tts"]["provider"] == "openai"
    assert config["browser"]["cloud_provider"] == "browser-use"
+    assert config["image_gen"]["use_gateway"] is True
    assert configured == []

+
+def test_first_install_nous_auto_configures_video_gen(monkeypatch):
+    """When a Nous subscriber checks video_gen in the toolset checklist,
+    apply_nous_managed_defaults must write video_gen.provider and
+    video_gen.use_gateway so the FAL plugin can route through the gateway
+    at runtime.  Regression test for the bug where video_gen was marked as
+    auto-configured but no config was actually written."""
+    monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
+    config = {
+        "model": {"provider": "nous"},
+        "platform_toolsets": {"cli": []},
+    }
+    for env_var in (
+        "VOICE_TOOLS_OPENAI_KEY",
+        "OPENAI_API_KEY",
+        "ELEVENLABS_API_KEY",
+        "FIRECRAWL_API_KEY",
+        "FIRECRAWL_API_URL",
+        "TAVILY_API_KEY",
+        "PARALLEL_API_KEY",
+        "BROWSERBASE_API_KEY",
+        "BROWSERBASE_PROJECT_ID",
+        "BROWSER_USE_API_KEY",
+        "FAL_KEY",
+    ):
+        monkeypatch.delenv(env_var, raising=False)
+
+    monkeypatch.setattr(
+        "hermes_cli.tools_config._prompt_toolset_checklist",
+        lambda *args, **kwargs: {"video_gen"},
+    )
+    monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None)
+    monkeypatch.setattr(
+        "hermes_cli.tools_config._get_enabled_platforms",
+        lambda: ["cli"],
+    )
+    monkeypatch.setattr(
+        "hermes_cli.nous_subscription.get_nous_portal_account_info",
+        lambda *args, **kwargs: NousPortalAccountInfo(
+            logged_in=True,
+            source="jwt",
+            fresh=False,
+            paid_service_access=True,
+        ),
+    )
+
+    configured = []
+    monkeypatch.setattr(
+        "hermes_cli.tools_config._configure_toolset",
+        lambda ts_key, config: configured.append(ts_key),
+    )
+
+    tools_command(first_install=True, config=config)
+
+    assert config["video_gen"]["provider"] == "fal"
+    assert config["video_gen"]["use_gateway"] is True
+    # video_gen should NOT appear in the manual configure list — it's auto-configured
+    assert "video_gen" not in configured
+
 # ── Platform / toolset consistency ────────────────────────────────────────────


--- a/tests/hermes_cli/test_tui_resume_flow.py
+++ b/tests/hermes_cli/test_tui_resume_flow.py
@ -638,6 +638,60 @@ def test_oneshot_rejects_invalid_only_toolsets(monkeypatch, capsys):
    assert "did not contain any valid toolsets" in err


+def test_oneshot_fails_closed_on_empty_final_response(monkeypatch, capsys):
+    _stub_plugin_discovery(monkeypatch)
+    import hermes_cli.oneshot as oneshot_mod
+
+    monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "")
+
+    assert oneshot_mod.run_oneshot("hello") == 1
+    captured = capsys.readouterr()
+    assert captured.out == ""
+    assert "no final response" in captured.err
+
+
+def test_oneshot_prints_nonempty_final_response(monkeypatch, capsys):
+    _stub_plugin_discovery(monkeypatch)
+    import hermes_cli.oneshot as oneshot_mod
+
+    monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "done")
+
+    assert oneshot_mod.run_oneshot("hello") == 0
+    captured = capsys.readouterr()
+    assert captured.out == "done\n"
+    assert captured.err == ""
+
+
+def test_oneshot_fails_closed_on_agent_exception(monkeypatch, capsys):
+    _stub_plugin_discovery(monkeypatch)
+    import hermes_cli.oneshot as oneshot_mod
+
+    def _boom(*_args, **_kwargs):
+        raise OSError("not a TTY")
+
+    monkeypatch.setattr(oneshot_mod, "_run_agent", _boom)
+
+    assert oneshot_mod.run_oneshot("hello") == 1
+    captured = capsys.readouterr()
+    assert captured.out == ""
+    assert "agent failed" in captured.err
+    assert "not a TTY" in captured.err
+
+
+def test_oneshot_reraises_keyboard_interrupt(monkeypatch):
+    _stub_plugin_discovery(monkeypatch)
+    import hermes_cli.oneshot as oneshot_mod
+    import pytest as _pytest
+
+    def _interrupt(*_args, **_kwargs):
+        raise KeyboardInterrupt
+
+    monkeypatch.setattr(oneshot_mod, "_run_agent", _interrupt)
+
+    with _pytest.raises(KeyboardInterrupt):
+        oneshot_mod.run_oneshot("hello")
+
+
 def test_oneshot_filters_invalid_toolsets_before_redirect(monkeypatch, capsys):
    _stub_plugin_discovery(monkeypatch)
    from hermes_cli.oneshot import _validate_explicit_toolsets
--- a/tests/hermes_cli/test_update_concurrent_quarantine.py
+++ b/tests/hermes_cli/test_update_concurrent_quarantine.py
@ -128,24 +128,31 @@ def test_detect_concurrent_is_noop_off_windows(_winp, tmp_path):
 def _fake_psutil_with_parent_chain(
    parent_chain: list[int],
    proc_iter_rows: list,
+    *,
+    ancestor_exe: str | None = None,
 ):
-    """Build a psutil stand-in that has Process()/parent() AND process_iter().
+    """Build a psutil stand-in that has Process()/parents()/exe() AND process_iter().

-    ``parent_chain`` is the list of PIDs returned by successive ``.parent()``
-    calls starting from the seed (``os.getpid()``); the last entry's
-    ``.parent()`` returns ``None`` to terminate the walk.
+    ``parent_chain`` is the ordered list of ancestor PIDs (closest first)
+    returned by ``proc.parents()`` on the seed (``os.getpid()``).
+    ``ancestor_exe`` is the executable path reported by each ancestor's
+    ``.exe()``; when it matches one of our shim paths the ancestor is
+    excluded (the launcher-shim case). Pass ``None`` to model an ancestor
+    whose exe can't be read (psutil error) — it stays in the candidate set.
    """

    class _FakeProc:
-        def __init__(self, pid: int, chain: list[int]):
+        def __init__(self, pid: int, exe_path: str | None):
            self.pid = pid
-            self._chain = chain
+            self._exe = exe_path

-        def parent(self):
-            if not self._chain:
-                return None
-            next_pid = self._chain[0]
-            return _FakeProc(next_pid, self._chain[1:])
+        def exe(self):
+            if self._exe is None:
+                raise OSError("exe unavailable")
+            return self._exe
+
+        def parents(self):
+            return [_FakeProc(p, ancestor_exe) for p in parent_chain]

    class _NoSuchProcess(Exception):
        pass
@ -153,8 +160,8 @@ def _fake_psutil_with_parent_chain(
    class _AccessDenied(Exception):
        pass

-    def _process(pid):
-        return _FakeProc(pid, list(parent_chain))
+    def _process(pid=None):
+        return _FakeProc(pid if pid is not None else os.getpid(), ancestor_exe)

    return types.SimpleNamespace(
        Process=_process,
@ -185,6 +192,7 @@ def test_detect_concurrent_excludes_parent_chain(_winp, tmp_path):
    fake_psutil = _fake_psutil_with_parent_chain(
        parent_chain=[launcher_pid],
        proc_iter_rows=rows,
+        ancestor_exe=str(shim),
    )
    with patch.dict(sys.modules, {"psutil": fake_psutil}):
        result = cli_main._detect_concurrent_hermes_instances(scripts_dir)
@ -211,6 +219,7 @@ def test_detect_concurrent_still_finds_unrelated_other_hermes(_winp, tmp_path):
    fake_psutil = _fake_psutil_with_parent_chain(
        parent_chain=[launcher_pid],
        proc_iter_rows=rows,
+        ancestor_exe=str(shim),
    )
    with patch.dict(sys.modules, {"psutil": fake_psutil}):
        result = cli_main._detect_concurrent_hermes_instances(scripts_dir)
@ -238,6 +247,7 @@ def test_detect_concurrent_parent_chain_walks_deep(_winp, tmp_path):
    fake_psutil = _fake_psutil_with_parent_chain(
        parent_chain=[parent_pid, grandparent_pid, greatgrandparent_pid],
        proc_iter_rows=rows,
+        ancestor_exe=str(shim),
    )
    with patch.dict(sys.modules, {"psutil": fake_psutil}):
        result = cli_main._detect_concurrent_hermes_instances(scripts_dir)
@ -246,25 +256,38 @@ def test_detect_concurrent_parent_chain_walks_deep(_winp, tmp_path):


@patch.object(cli_main, "_is_windows", return_value=True)
-def test_detect_concurrent_parent_walk_handles_cycle(_winp, tmp_path):
-    """A PID cycle in the parent chain must not hang the walk."""
+def test_detect_concurrent_parents_call_robust_to_one_bad_hop(_winp, tmp_path):
+    """The launcher shim is still excluded even when an ancestor exe is unreadable.
+
+    Field regression (issues #29341, #34795): the old per-hop ``parent()``
+    walk bailed on the FIRST psutil error, so an AccessDenied on any hop left
+    the launcher shim in the candidate set and re-triggered the false
+    positive. ``parents()`` returns the whole list at once; we evaluate each
+    ancestor independently, so one unreadable hop never strands the launcher.
+    """
    scripts_dir = tmp_path
    shim = scripts_dir / "hermes.exe"
    shim.write_bytes(b"")
    me = os.getpid()
-    bogus_loop_pid = me + 1
+    launcher_pid = me + 100

-    rows = [_make_proc(me, str(shim), "python.exe")]
-    # Chain that points back to ``me`` — the loop-detection branch must break.
+    rows = [
+        _make_proc(me, str(shim), "python.exe"),
+        _make_proc(launcher_pid, str(shim), "hermes.exe"),
+    ]
+    # ancestor_exe=None → every ancestor's .exe() raises OSError. The helper
+    # must swallow it per-ancestor and not crash; the launcher won't be
+    # excluded in this degenerate case, but a real run reads the shim exe.
    fake_psutil = _fake_psutil_with_parent_chain(
-        parent_chain=[bogus_loop_pid, me, bogus_loop_pid],
+        parent_chain=[launcher_pid],
        proc_iter_rows=rows,
+        ancestor_exe=None,
    )
    with patch.dict(sys.modules, {"psutil": fake_psutil}):
        result = cli_main._detect_concurrent_hermes_instances(scripts_dir)

-    # No crash, no hang; self + bogus_loop_pid excluded; no others reported.
-    assert result == []
+    # No crash; helper completes. (Degenerate stub: launcher exe unreadable.)
+    assert result == [(launcher_pid, "hermes.exe")]


@patch.object(cli_main, "_is_windows", return_value=True)
@ -310,6 +333,11 @@ def test_format_message_mentions_pids_and_remediation(tmp_path):
    assert "--force" in msg
    # Mentions the file that would have been overwritten
    assert str(tmp_path / "hermes.exe") in msg
+    # Self-service kill command targets the exact stale PIDs (issue #34795).
+    assert "taskkill" in msg
+    assert "/PID 1234" in msg
+    assert "/PID 5678" in msg
+    assert "/F" in msg


 # ---------------------------------------------------------------------------
--- a/tests/hermes_cli/test_uv_tool_update.py
+++ b/tests/hermes_cli/test_uv_tool_update.py
@ -0,0 +1,311 @@
+"""Tests for uv-tool install detection in the update path (issue #29700).
+
+``uv tool install hermes-agent`` lives outside any venv, so the previous
+``uv pip install --upgrade`` update path failed with ``No virtual
+environment found``. ``is_uv_tool_install`` should detect this layout and
+both the user-facing recommended command and the actual
+``_cmd_update_pip`` subprocess invocation should switch to
+``uv tool upgrade hermes-agent``.
+
+Detection is restricted to properties of the running interpreter
+(``sys.prefix`` / ``sys.executable``) so a pip/venv install on a machine
+that also has ``uv tool install hermes-agent`` does not get misclassified.
+"""
+from __future__ import annotations
+
+import subprocess
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# is_uv_tool_install
+# ---------------------------------------------------------------------------
+
+
+class TestIsUvToolInstall:
+    def test_returns_true_when_sys_prefix_matches_uv_tool_layout(self):
+        from hermes_cli import config
+
+        with patch.object(config.sys, "prefix", "/home/user/.local/share/uv/tools/hermes-agent"):
+            assert config.is_uv_tool_install() is True
+
+    def test_returns_true_when_sys_executable_matches_uv_tool_layout(self):
+        """Some uv-tool layouts surface the marker on ``sys.executable`` (bin/python)."""
+        from hermes_cli import config
+
+        with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
+             patch.object(
+                 config.sys,
+                 "executable",
+                 "/home/user/.local/share/uv/tools/hermes-agent/bin/python",
+             ):
+            assert config.is_uv_tool_install() is True
+
+    def test_returns_false_when_neither_prefix_nor_executable_matches(self):
+        from hermes_cli import config
+
+        with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
+             patch.object(config.sys, "executable", "/usr/bin/python3"):
+            assert config.is_uv_tool_install() is False
+
+    def test_does_not_consult_uv_tool_list(self):
+        """Detection must NOT shell out: ``uv tool list`` would false-positive
+        when the active install is pip/venv but the machine also has
+        ``uv tool install hermes-agent`` somewhere on disk. Copilot review on
+        PR #29703 flagged this; the fix is to never call ``uv tool list``
+        from the detection path."""
+        from hermes_cli import config
+
+        with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
+             patch.object(config.sys, "executable", "/usr/bin/python3"), \
+             patch("subprocess.run") as mock_run:
+            assert config.is_uv_tool_install() is False
+            mock_run.assert_not_called()
+
+    def test_case_insensitive_match(self):
+        """Match must be case-insensitive — Windows paths preserve case
+        (e.g. ``...AppData\\Local\\UV\\Tools\\hermes-agent``) and a case-sensitive
+        check would miss them. We exercise the lower-cased compare path here
+        without monkey-patching ``os.sep``, which would break the whole suite."""
+        from hermes_cli import config
+
+        with patch.object(
+            config.sys, "prefix", "/HOME/USER/.local/share/UV/Tools/hermes-agent"
+        ):
+            assert config.is_uv_tool_install() is True
+
+    def test_handles_empty_executable(self):
+        from hermes_cli import config
+
+        with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
+             patch.object(config.sys, "executable", ""):
+            assert config.is_uv_tool_install() is False
+
+
+# ---------------------------------------------------------------------------
+# recommended_update_command_for_method
+# ---------------------------------------------------------------------------
+
+
+class TestRecommendedUpdateCommandForUvTool:
+    def test_uv_tool_install_recommends_uv_tool_upgrade(self):
+        from hermes_cli import config
+
+        with patch("shutil.which", return_value="/usr/local/bin/uv"), \
+             patch.object(config, "is_uv_tool_install", return_value=True):
+            cmd = config.recommended_update_command_for_method("pip")
+            assert cmd == "uv tool upgrade hermes-agent"
+
+    def test_uv_tool_install_recommends_uv_tool_upgrade_even_without_uv_on_path(self):
+        """Recommendation reflects the *install method*, not whether ``uv`` is
+        currently on PATH — the user needs to know the right command to run."""
+        from hermes_cli import config
+
+        with patch("shutil.which", return_value=None), \
+             patch.object(config, "is_uv_tool_install", return_value=True):
+            cmd = config.recommended_update_command_for_method("pip")
+            assert cmd == "uv tool upgrade hermes-agent"
+
+    def test_uv_pip_install_keeps_legacy_recommendation(self):
+        """Existing behavior: uv is on PATH but Hermes is a regular pip install."""
+        from hermes_cli import config
+
+        with patch("shutil.which", return_value="/usr/local/bin/uv"), \
+             patch.object(config, "is_uv_tool_install", return_value=False):
+            cmd = config.recommended_update_command_for_method("pip")
+            assert cmd == "uv pip install --upgrade hermes-agent"
+
+    def test_no_uv_falls_back_to_plain_pip(self):
+        from hermes_cli import config
+
+        with patch("shutil.which", return_value=None), \
+             patch.object(config, "is_uv_tool_install", return_value=False):
+            cmd = config.recommended_update_command_for_method("pip")
+            assert cmd == "pip install --upgrade hermes-agent"
+
+    def test_recommendation_does_not_spawn_subprocess(self):
+        """Computing the recommendation string must be cheap — no ``uv tool list``
+        spawn. Copilot review on PR #29703 flagged the prior subprocess hop
+        as adding overhead and a multi-second timeout window for what is
+        purely a display string."""
+        from hermes_cli import config
+
+        with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
+             patch.object(config.sys, "executable", "/usr/bin/python3"), \
+             patch("shutil.which", return_value="/usr/local/bin/uv"), \
+             patch("subprocess.run") as mock_run:
+            cmd = config.recommended_update_command_for_method("pip")
+            mock_run.assert_not_called()
+            assert cmd == "uv pip install --upgrade hermes-agent"
+
+
+# ---------------------------------------------------------------------------
+# _cmd_update_pip subprocess command
+# ---------------------------------------------------------------------------
+
+
+class TestCmdUpdatePipUsesUvTool:
+    @patch("subprocess.run")
+    def test_runs_uv_tool_upgrade_when_uv_tool_install(self, mock_run):
+        """The actual subprocess invocation must switch to ``uv tool upgrade``."""
+        from hermes_cli.main import _cmd_update_pip
+
+        mock_run.return_value = subprocess.CompletedProcess(["uv"], 0, stdout="", stderr="")
+        with patch("shutil.which", return_value="/usr/local/bin/uv"), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=True):
+            _cmd_update_pip(SimpleNamespace())
+
+        assert mock_run.call_args[0][0] == ["/usr/local/bin/uv", "tool", "upgrade", "hermes-agent"]
+
+    @patch("subprocess.run")
+    def test_runs_uv_pip_install_when_not_uv_tool(self, mock_run):
+        """Existing behavior preserved when uv is present but Hermes isn't a tool install."""
+        from hermes_cli.main import _cmd_update_pip
+
+        mock_run.return_value = subprocess.CompletedProcess(["uv"], 0, stdout="", stderr="")
+        with patch("shutil.which", return_value="/usr/local/bin/uv"), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=False):
+            _cmd_update_pip(SimpleNamespace())
+
+        assert mock_run.call_args[0][0] == [
+            "/usr/local/bin/uv",
+            "pip",
+            "install",
+            "--upgrade",
+            "hermes-agent",
+        ]
+
+    @patch("subprocess.run")
+    def test_falls_back_to_pip_when_no_uv(self, mock_run):
+        from hermes_cli.main import _cmd_update_pip
+
+        mock_run.return_value = subprocess.CompletedProcess(["pip"], 0, stdout="", stderr="")
+        with patch("shutil.which", return_value=None), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=False):
+            _cmd_update_pip(SimpleNamespace())
+
+        cmd = mock_run.call_args[0][0]
+        assert cmd[1:] == ["-m", "pip", "install", "--upgrade", "hermes-agent"]
+
+    @patch("subprocess.run")
+    def test_exits_nonzero_on_subprocess_failure(self, mock_run):
+        from hermes_cli.main import _cmd_update_pip
+
+        mock_run.return_value = subprocess.CompletedProcess(["uv"], 1, stdout="", stderr="")
+        with patch("shutil.which", return_value="/usr/local/bin/uv"), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=True):
+            with pytest.raises(SystemExit) as exc_info:
+                _cmd_update_pip(SimpleNamespace())
+        assert exc_info.value.code == 1
+
+    @patch("subprocess.run")
+    def test_uv_tool_install_without_uv_on_path_exits_with_hint(self, mock_run):
+        """If the running interpreter looks like a uv-tool install but ``uv`` is
+        somehow missing from PATH, surface a clear hint instead of silently
+        falling back to ``python -m pip``, which would either fail (no venv)
+        or upgrade the wrong copy."""
+        from hermes_cli.main import _cmd_update_pip
+
+        with patch("shutil.which", return_value=None), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=True):
+            with pytest.raises(SystemExit) as exc_info:
+                _cmd_update_pip(SimpleNamespace())
+        assert exc_info.value.code == 1
+        mock_run.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# pipx-managed installs, --system fallback, and VIRTUAL_ENV overlay
+# (issue #29700 / #35031 family — consolidated update-path handling)
+# ---------------------------------------------------------------------------
+
+
+class TestCmdUpdatePipInstallLayouts:
+    """The uv pip path must adapt to where the running interpreter lives:
+
+    - inside a venv (launcher shim)  -> export VIRTUAL_ENV, no ``--system``
+    - bare pip outside any venv      -> add ``--system``, no overlay
+    - pipx-managed                   -> ``pipx upgrade``
+    """
+
+    @patch("subprocess.run")
+    def test_pipx_managed_uses_pipx_upgrade(self, mock_run, monkeypatch):
+        from hermes_cli import main as hm
+
+        mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        monkeypatch.setattr(hm.sys, "prefix", "/home/u/.local/pipx/venvs/hermes-agent")
+        monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
+
+        def _which(name):
+            return {"uv": "/usr/bin/uv", "pipx": "/usr/bin/pipx"}.get(name)
+
+        with patch("shutil.which", side_effect=_which), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=False):
+            hm._cmd_update_pip(SimpleNamespace())
+
+        assert mock_run.call_args[0][0] == ["/usr/bin/pipx", "upgrade", "hermes-agent"]
+        # pipx upgrade ignores VIRTUAL_ENV; we must not set it.
+        assert "env" not in mock_run.call_args.kwargs
+
+    @patch("subprocess.run")
+    def test_pipx_layout_without_pipx_binary_treated_as_venv(
+        self, mock_run, monkeypatch
+    ):
+        from hermes_cli import main as hm
+
+        mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        monkeypatch.setattr(hm.sys, "prefix", "/home/u/.local/pipx/venvs/hermes-agent")
+        monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
+
+        # pipx layout detected via prefix, but pipx binary missing on PATH.
+        def _which(name):
+            return "/usr/bin/uv" if name == "uv" else None
+
+        with patch("shutil.which", side_effect=_which), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=False):
+            hm._cmd_update_pip(SimpleNamespace())
+
+        # prefix != base_prefix, so this is treated as a venv -> overlay, no --system.
+        assert mock_run.call_args[0][0] == [
+            "/usr/bin/uv", "pip", "install", "--upgrade", "hermes-agent",
+        ]
+        assert mock_run.call_args.kwargs["env"]["VIRTUAL_ENV"].endswith("hermes-agent")
+
+    @patch("subprocess.run")
+    def test_bare_pip_outside_venv_adds_system(self, mock_run, monkeypatch):
+        from hermes_cli import main as hm
+
+        mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        # No venv: prefix == base_prefix.
+        monkeypatch.setattr(hm.sys, "prefix", "/usr")
+        monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
+
+        with patch("shutil.which", return_value="/usr/bin/uv"), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=False):
+            hm._cmd_update_pip(SimpleNamespace())
+
+        assert mock_run.call_args[0][0] == [
+            "/usr/bin/uv", "pip", "install", "--system", "--upgrade", "hermes-agent",
+        ]
+        assert "env" not in mock_run.call_args.kwargs
+
+    @patch("subprocess.run")
+    def test_venv_exports_virtualenv_and_omits_system(self, mock_run, monkeypatch):
+        from hermes_cli import main as hm
+
+        mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        monkeypatch.delenv("VIRTUAL_ENV", raising=False)
+        monkeypatch.setattr(hm.sys, "prefix", "/home/u/.hermes/hermes-agent/venv")
+        monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
+
+        with patch("shutil.which", return_value="/usr/bin/uv"), \
+             patch("hermes_cli.config.is_uv_tool_install", return_value=False):
+            hm._cmd_update_pip(SimpleNamespace())
+
+        cmd = mock_run.call_args[0][0]
+        assert "--system" not in cmd
+        assert cmd == ["/usr/bin/uv", "pip", "install", "--upgrade", "hermes-agent"]
+        assert mock_run.call_args.kwargs["env"]["VIRTUAL_ENV"] == "/home/u/.hermes/hermes-agent/venv"
--- a/tests/honcho_plugin/test_async_memory.py
+++ b/tests/honcho_plugin/test_async_memory.py
@ -249,9 +249,12 @@ class TestFlushAll:
        mgr = _make_manager(write_frequency="async")
        sess = _make_session()
        sess.add_message("user", "pending")
-        mgr._async_queue.put(sess)

        with patch.object(mgr, "_flush_session") as mock_flush:
+            # Put the item AFTER the mock is installed so the background
+            # writer thread (if it dequeues before flush_all) still hits
+            # the mock rather than the real _flush_session.
+            mgr._async_queue.put(sess)
            mgr.flush_all()
            # Called at least once for the queued item
            assert mock_flush.call_count >= 1
--- a/Show more
+++ b/Show more